diff options
author | Nick Mathewson <nickm@torproject.org> | 2018-09-12 08:12:19 -0400 |
---|---|---|
committer | Nick Mathewson <nickm@torproject.org> | 2018-09-12 08:12:19 -0400 |
commit | 8294c40c96a9ec59ff3e0a3ce2f4926c275ab114 (patch) | |
tree | 633a6fa2622ce0babfc9f73f17f1d734402c02bb /src/test/test_util.c | |
parent | 5a2374b0740ce662e4b323f104c8ff58164d996a (diff) | |
parent | f8c0f694b7f335358f612b0a26aae8b48110fa50 (diff) | |
download | tor-8294c40c96a9ec59ff3e0a3ce2f4926c275ab114.tar.gz tor-8294c40c96a9ec59ff3e0a3ce2f4926c275ab114.zip |
Merge remote-tracking branch 'tor-github/pr/318'
Diffstat (limited to 'src/test/test_util.c')
-rw-r--r-- | src/test/test_util.c | 48 |
1 files changed, 48 insertions, 0 deletions
diff --git a/src/test/test_util.c b/src/test/test_util.c index b5a231dac6..7bc1b7921a 100644 --- a/src/test/test_util.c +++ b/src/test/test_util.c @@ -4013,6 +4013,53 @@ test_util_string_is_C_identifier(void *ptr) } static void +test_util_string_is_utf8(void *ptr) +{ + (void)ptr; + + tt_int_op(1, OP_EQ, string_is_utf8(NULL, 0)); + tt_int_op(1, OP_EQ, string_is_utf8("", 1)); + tt_int_op(1, OP_EQ, string_is_utf8("\uFEFF", 3)); + tt_int_op(1, OP_EQ, string_is_utf8("\uFFFE", 3)); + tt_int_op(1, OP_EQ, string_is_utf8("ascii\x7f\n", 7)); + tt_int_op(1, OP_EQ, string_is_utf8("Risqu\u00e9=1", 9)); + + // Validate exactly 'len' bytes. + tt_int_op(0, OP_EQ, string_is_utf8("\0\x80", 2)); + tt_int_op(0, OP_EQ, string_is_utf8("Risqu\u00e9=1", 6)); + + // Reject sequences with missing bytes. + tt_int_op(0, OP_EQ, string_is_utf8("\x80", 1)); + tt_int_op(0, OP_EQ, string_is_utf8("\xc2", 1)); + tt_int_op(0, OP_EQ, string_is_utf8("\xc2 ", 2)); + tt_int_op(0, OP_EQ, string_is_utf8("\xe1\x80", 2)); + tt_int_op(0, OP_EQ, string_is_utf8("\xe1\x80 ", 3)); + tt_int_op(0, OP_EQ, string_is_utf8("\xf1\x80\x80", 3)); + tt_int_op(0, OP_EQ, string_is_utf8("\xf1\x80\x80 ", 4)); + + // Reject encodings that are overly long. + tt_int_op(0, OP_EQ, string_is_utf8("\xc1\xbf", 2)); + tt_int_op(1, OP_EQ, string_is_utf8("\xc2\x80", 2)); + tt_int_op(0, OP_EQ, string_is_utf8("\xe0\x9f\xbf", 3)); + tt_int_op(1, OP_EQ, string_is_utf8("\xe0\xa0\x80", 3)); + tt_int_op(0, OP_EQ, string_is_utf8("\xf0\x8f\xbf\xbf", 4)); + tt_int_op(1, OP_EQ, string_is_utf8("\xf0\x90\x80\x80", 4)); + + // Reject UTF-16 surrogate halves. + tt_int_op(1, OP_EQ, string_is_utf8("\xed\x9f\xbf", 3)); + tt_int_op(0, OP_EQ, string_is_utf8("\xed\xa0\x80", 3)); + tt_int_op(0, OP_EQ, string_is_utf8("\xed\xbf\xbf", 3)); + tt_int_op(1, OP_EQ, string_is_utf8("\xee\x80\x80", 3)); + + // The maximum legal codepoint, 10FFFF. + tt_int_op(1, OP_EQ, string_is_utf8("\xf4\x8f\xbf\xbf", 4)); + tt_int_op(0, OP_EQ, string_is_utf8("\xf4\x90\x80\x80", 4)); + + done: + ; +} + +static void test_util_asprintf(void *ptr) { #define LOREMIPSUM \ @@ -6409,6 +6456,7 @@ struct testcase_t util_tests[] = { UTIL_TEST(clamp_double_to_int64, 0), UTIL_TEST(find_str_at_start_of_line, 0), UTIL_TEST(string_is_C_identifier, 0), + UTIL_TEST(string_is_utf8, 0), UTIL_TEST(asprintf, 0), UTIL_TEST(listdir, 0), UTIL_TEST(parent_dir, 0), |