diff options
author | teor <teor@torproject.org> | 2020-01-07 17:09:25 +1000 |
---|---|---|
committer | Nick Mathewson <nickm@torproject.org> | 2020-01-07 10:16:08 -0500 |
commit | 0e3fd4df9990b240cab3754a7c3e809d081b56d9 (patch) | |
tree | e45c51407cb70f70c1396ace5cbb5b862b964495 /src/lib/string | |
parent | d62dbb676242dbdfdd121828c97e12e737aa596c (diff) | |
download | tor-0e3fd4df9990b240cab3754a7c3e809d081b56d9.tar.gz tor-0e3fd4df9990b240cab3754a7c3e809d081b56d9.zip |
string: Check UTF-8 string pointer and length
If they are inconsistent, output a raw bug log.
Part of 32845.
Diffstat (limited to 'src/lib/string')
-rw-r--r-- | src/lib/string/util_string.c | 21 |
1 files changed, 19 insertions, 2 deletions
diff --git a/src/lib/string/util_string.c b/src/lib/string/util_string.c index f5061a11d2..93b3eb09f2 100644 --- a/src/lib/string/util_string.c +++ b/src/lib/string/util_string.c @@ -506,6 +506,23 @@ validate_char(const uint8_t *c, uint8_t len) int string_is_utf8(const char *str, size_t len) { + // If str is NULL, don't try to read it + if (!str) { + // We could test for this case, but the low-level logs would produce + // confusing test output. + // LCOV_EXCL_START + if (len) { + // Use the low-level logging function, so that the log module can + // validate UTF-8 (if needed in future code) + tor_log_err_sigsafe( + "BUG: string_is_utf8() called with NULL str but non-zero len."); + // Since it's a bug, we should probably reject this string + return false; + } + // LCOV_EXCL_STOP + return true; + } + for (size_t i = 0; i < len;) { uint8_t num_bytes = bytes_in_char(str[i]); if (num_bytes == 0) // Invalid leading byte found. @@ -530,8 +547,8 @@ string_is_utf8(const char *str, size_t len) int string_is_utf8_no_bom(const char *str, size_t len) { - if (len >= 3 && (!strcmpstart(str, "\uFEFF") || - !strcmpstart(str, "\uFFFE"))) { + if (str && len >= 3 && (!strcmpstart(str, "\uFEFF") || + !strcmpstart(str, "\uFFFE"))) { return false; } return string_is_utf8(str, len); |