diff options
author | teor <teor@torproject.org> | 2020-01-07 17:05:48 +1000 |
---|---|---|
committer | teor <teor@torproject.org> | 2020-01-07 17:05:48 +1000 |
commit | d62dbb676242dbdfdd121828c97e12e737aa596c (patch) | |
tree | aa1c6c42fe523d29eb8a33f64f15b6309331c207 /src | |
parent | 1b63eea66cbb8793a3cff05de8d856ce3b93fc17 (diff) | |
download | tor-d62dbb676242dbdfdd121828c97e12e737aa596c.tar.gz tor-d62dbb676242dbdfdd121828c97e12e737aa596c.zip |
string: Add extra UTF-8 test cases
These test cases are validated differently by some
programming languages, because those languages have
incorrect UTF-8 implementations.
We want to make sure that tor validates them correctly.
Closes ticket 32845.
Diffstat (limited to 'src')
-rw-r--r-- | src/test/test_util.c | 35 |
1 files changed, 34 insertions, 1 deletions
diff --git a/src/test/test_util.c b/src/test/test_util.c index 7f7e157c17..92623ea0cd 100644 --- a/src/test/test_util.c +++ b/src/test/test_util.c @@ -4104,10 +4104,43 @@ test_util_string_is_utf8(void *ptr) tt_int_op(0, OP_EQ, string_is_utf8("\xed\xbf\xbf", 3)); tt_int_op(1, OP_EQ, string_is_utf8("\xee\x80\x80", 3)); - // The maximum legal codepoint, 10FFFF. + // The minimum legal codepoint, 0x00. + tt_int_op(1, OP_EQ, string_is_utf8("\0", 1)); + + // The maximum legal codepoint, 0x10FFFF. tt_int_op(1, OP_EQ, string_is_utf8("\xf4\x8f\xbf\xbf", 4)); tt_int_op(0, OP_EQ, string_is_utf8("\xf4\x90\x80\x80", 4)); + /* Test cases that vary between programming languages / + * UTF-8 implementations. + * Source: POC||GTFO 19, page 43 + * https://www.alchemistowl.org/pocorgtfo/ + */ + + // Invalid (in most implementations) + // surrogate + tt_int_op(0, OP_EQ, string_is_utf8("\xed\xa0\x81", 3)); + // nullsurrog + tt_int_op(0, OP_EQ, string_is_utf8("\x30\x00\xed\xa0\x81", 5)); + // threehigh + tt_int_op(0, OP_EQ, string_is_utf8("\xed\xbf\xbf", 3)); + // fourhigh + tt_int_op(0, OP_EQ, string_is_utf8("\xf4\x90\xbf\xbf", 4)); + // fivebyte + tt_int_op(0, OP_EQ, string_is_utf8("\xfb\x80\x80\x80\x80", 5)); + // sixbyte + tt_int_op(0, OP_EQ, string_is_utf8("\xfd\x80\x80\x80\x80", 5)); + // sixhigh + tt_int_op(0, OP_EQ, string_is_utf8("\xfd\xbf\xbf\xbf\xbf", 5)); + + // Valid (in most implementations) + // fourbyte + tt_int_op(1, OP_EQ, string_is_utf8("\xf0\x90\x8d\x88", 4)); + // fourbyte2 + tt_int_op(1, OP_EQ, string_is_utf8("\xf0\xbf\xbf\xbf", 4)); + // nullbyte + tt_int_op(1, OP_EQ, string_is_utf8("\x30\x31\x32\x00\x33", 5)); + done: ; } |