diff options
author | cypherpunks <cypherpunks@torproject.org> | 2018-08-29 03:22:30 +0000 |
---|---|---|
committer | cypherpunks <cypherpunks@torproject.org> | 2018-09-03 13:54:43 +0000 |
commit | d32b08af6f38d76d609edfddd44159446b5f25b6 (patch) | |
tree | a2fee3b622a53b557b4acdc299b962d179c3f509 /src/test/test_util.c | |
parent | 1c62adb65baa99c92f937318c452955306301643 (diff) | |
download | tor-d32b08af6f38d76d609edfddd44159446b5f25b6.tar.gz tor-d32b08af6f38d76d609edfddd44159446b5f25b6.zip |
string: add string_is_utf8() helper
Ticket #27373.
Diffstat (limited to 'src/test/test_util.c')
-rw-r--r-- | src/test/test_util.c | 48 |
1 files changed, 48 insertions, 0 deletions
diff --git a/src/test/test_util.c b/src/test/test_util.c index 6cbd504e34..bdf8b5dafa 100644 --- a/src/test/test_util.c +++ b/src/test/test_util.c @@ -4012,6 +4012,53 @@ test_util_string_is_C_identifier(void *ptr) } static void +test_util_string_is_utf8(void *ptr) +{ + (void)ptr; + + tt_int_op(1, OP_EQ, string_is_utf8(NULL, 0)); + tt_int_op(1, OP_EQ, string_is_utf8("", 1)); + tt_int_op(1, OP_EQ, string_is_utf8("\uFEFF", 3)); + tt_int_op(1, OP_EQ, string_is_utf8("\uFFFE", 3)); + tt_int_op(1, OP_EQ, string_is_utf8("ascii\x7f\n", 7)); + tt_int_op(1, OP_EQ, string_is_utf8("Risqu\u00e9=1", 9)); + + // Validate exactly 'len' bytes. + tt_int_op(0, OP_EQ, string_is_utf8("\0\x80", 2)); + tt_int_op(0, OP_EQ, string_is_utf8("Risqu\u00e9=1", 6)); + + // Reject sequences with missing bytes. + tt_int_op(0, OP_EQ, string_is_utf8("\x80", 1)); + tt_int_op(0, OP_EQ, string_is_utf8("\xc2", 1)); + tt_int_op(0, OP_EQ, string_is_utf8("\xc2 ", 2)); + tt_int_op(0, OP_EQ, string_is_utf8("\xe1\x80", 2)); + tt_int_op(0, OP_EQ, string_is_utf8("\xe1\x80 ", 3)); + tt_int_op(0, OP_EQ, string_is_utf8("\xf1\x80\x80", 3)); + tt_int_op(0, OP_EQ, string_is_utf8("\xf1\x80\x80 ", 4)); + + // Reject encodings that are overly long. + tt_int_op(0, OP_EQ, string_is_utf8("\xc1\xbf", 2)); + tt_int_op(1, OP_EQ, string_is_utf8("\xc2\x80", 2)); + tt_int_op(0, OP_EQ, string_is_utf8("\xe0\x9f\xbf", 3)); + tt_int_op(1, OP_EQ, string_is_utf8("\xe0\xa0\x80", 3)); + tt_int_op(0, OP_EQ, string_is_utf8("\xf0\x8f\xbf\xbf", 4)); + tt_int_op(1, OP_EQ, string_is_utf8("\xf0\x90\x80\x80", 4)); + + // Reject UTF-16 surrogate halves. + tt_int_op(1, OP_EQ, string_is_utf8("\xed\x9f\xbf", 3)); + tt_int_op(0, OP_EQ, string_is_utf8("\xed\xa0\x80", 3)); + tt_int_op(0, OP_EQ, string_is_utf8("\xed\xbf\xbf", 3)); + tt_int_op(1, OP_EQ, string_is_utf8("\xee\x80\x80", 3)); + + // The maximum legal codepoint, 10FFFF. + tt_int_op(1, OP_EQ, string_is_utf8("\xf4\x8f\xbf\xbf", 4)); + tt_int_op(0, OP_EQ, string_is_utf8("\xf4\x90\x80\x80", 4)); + + done: + ; +} + +static void test_util_asprintf(void *ptr) { #define LOREMIPSUM \ @@ -6398,6 +6445,7 @@ struct testcase_t util_tests[] = { UTIL_TEST(clamp_double_to_int64, 0), UTIL_TEST(find_str_at_start_of_line, 0), UTIL_TEST(string_is_C_identifier, 0), + UTIL_TEST(string_is_utf8, 0), UTIL_TEST(asprintf, 0), UTIL_TEST(listdir, 0), UTIL_TEST(parent_dir, 0), |