Merge remote-tracking branch 'tor-github/pr/318'

author: Nick Mathewson <nickm@torproject.org> 2018-09-12 08:12:19 -0400
committer: Nick Mathewson <nickm@torproject.org> 2018-09-12 08:12:19 -0400
commit: 8294c40c96a9ec59ff3e0a3ce2f4926c275ab114 (patch)
tree: 633a6fa2622ce0babfc9f73f17f1d734402c02bb /src/test/test_util.c
parent: 5a2374b0740ce662e4b323f104c8ff58164d996a (diff)
parent: f8c0f694b7f335358f612b0a26aae8b48110fa50 (diff)
download: tor-8294c40c96a9ec59ff3e0a3ce2f4926c275ab114.tar.gz
tor-8294c40c96a9ec59ff3e0a3ce2f4926c275ab114.zip
1 files changed, 48 insertions, 0 deletions
diff --git a/src/test/test_util.c b/src/test/test_util.c
index b5a231dac6..7bc1b7921a 100644
--- a/src/test/test_util.c
+++ b/src/test/test_util.c
@@ -4013,6 +4013,53 @@ test_util_string_is_C_identifier(void *ptr)
 }
 
 static void
+test_util_string_is_utf8(void *ptr)
+{
+  (void)ptr;
+
+  tt_int_op(1, OP_EQ, string_is_utf8(NULL, 0));
+  tt_int_op(1, OP_EQ, string_is_utf8("", 1));
+  tt_int_op(1, OP_EQ, string_is_utf8("\uFEFF", 3));
+  tt_int_op(1, OP_EQ, string_is_utf8("\uFFFE", 3));
+  tt_int_op(1, OP_EQ, string_is_utf8("ascii\x7f\n", 7));
+  tt_int_op(1, OP_EQ, string_is_utf8("Risqu\u00e9=1", 9));
+
+  // Validate exactly 'len' bytes.
+  tt_int_op(0, OP_EQ, string_is_utf8("\0\x80", 2));
+  tt_int_op(0, OP_EQ, string_is_utf8("Risqu\u00e9=1", 6));
+
+  // Reject sequences with missing bytes.
+  tt_int_op(0, OP_EQ, string_is_utf8("\x80", 1));
+  tt_int_op(0, OP_EQ, string_is_utf8("\xc2", 1));
+  tt_int_op(0, OP_EQ, string_is_utf8("\xc2 ", 2));
+  tt_int_op(0, OP_EQ, string_is_utf8("\xe1\x80", 2));
+  tt_int_op(0, OP_EQ, string_is_utf8("\xe1\x80 ", 3));
+  tt_int_op(0, OP_EQ, string_is_utf8("\xf1\x80\x80", 3));
+  tt_int_op(0, OP_EQ, string_is_utf8("\xf1\x80\x80 ", 4));
+
+  // Reject encodings that are overly long.
+  tt_int_op(0, OP_EQ, string_is_utf8("\xc1\xbf", 2));
+  tt_int_op(1, OP_EQ, string_is_utf8("\xc2\x80", 2));
+  tt_int_op(0, OP_EQ, string_is_utf8("\xe0\x9f\xbf", 3));
+  tt_int_op(1, OP_EQ, string_is_utf8("\xe0\xa0\x80", 3));
+  tt_int_op(0, OP_EQ, string_is_utf8("\xf0\x8f\xbf\xbf", 4));
+  tt_int_op(1, OP_EQ, string_is_utf8("\xf0\x90\x80\x80", 4));
+
+  // Reject UTF-16 surrogate halves.
+  tt_int_op(1, OP_EQ, string_is_utf8("\xed\x9f\xbf", 3));
+  tt_int_op(0, OP_EQ, string_is_utf8("\xed\xa0\x80", 3));
+  tt_int_op(0, OP_EQ, string_is_utf8("\xed\xbf\xbf", 3));
+  tt_int_op(1, OP_EQ, string_is_utf8("\xee\x80\x80", 3));
+
+  // The maximum legal codepoint, 10FFFF.
+  tt_int_op(1, OP_EQ, string_is_utf8("\xf4\x8f\xbf\xbf", 4));
+  tt_int_op(0, OP_EQ, string_is_utf8("\xf4\x90\x80\x80", 4));
+
+ done:
+  ;
+}
+
+static void
 test_util_asprintf(void *ptr)
 {
 #define LOREMIPSUM                                              \
@@ -6409,6 +6456,7 @@ struct testcase_t util_tests[] = {
   UTIL_TEST(clamp_double_to_int64, 0),
   UTIL_TEST(find_str_at_start_of_line, 0),
   UTIL_TEST(string_is_C_identifier, 0),
+  UTIL_TEST(string_is_utf8, 0),
   UTIL_TEST(asprintf, 0),
   UTIL_TEST(listdir, 0),
   UTIL_TEST(parent_dir, 0),
author	Nick Mathewson <nickm@torproject.org>	2018-09-12 08:12:19 -0400
committer	Nick Mathewson <nickm@torproject.org>	2018-09-12 08:12:19 -0400
commit	8294c40c96a9ec59ff3e0a3ce2f4926c275ab114 (patch)
tree	633a6fa2622ce0babfc9f73f17f1d734402c02bb /src/test/test_util.c
parent	5a2374b0740ce662e4b323f104c8ff58164d996a (diff)
parent	f8c0f694b7f335358f612b0a26aae8b48110fa50 (diff)
download	tor-8294c40c96a9ec59ff3e0a3ce2f4926c275ab114.tar.gz tor-8294c40c96a9ec59ff3e0a3ce2f4926c275ab114.zip