summaryrefslogtreecommitdiff
path: root/src/lib/string/util_string.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/string/util_string.c')
-rw-r--r--src/lib/string/util_string.c23
1 files changed, 20 insertions, 3 deletions
diff --git a/src/lib/string/util_string.c b/src/lib/string/util_string.c
index f5061a11d2..c8f12d780e 100644
--- a/src/lib/string/util_string.c
+++ b/src/lib/string/util_string.c
@@ -1,6 +1,6 @@
/* Copyright (c) 2003-2004, Roger Dingledine
* Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
- * Copyright (c) 2007-2019, The Tor Project, Inc. */
+ * Copyright (c) 2007-2020, The Tor Project, Inc. */
/* See LICENSE for licensing information */
/**
@@ -506,6 +506,23 @@ validate_char(const uint8_t *c, uint8_t len)
int
string_is_utf8(const char *str, size_t len)
{
+ // If str is NULL, don't try to read it
+ if (!str) {
+ // We could test for this case, but the low-level logs would produce
+ // confusing test output.
+ // LCOV_EXCL_START
+ if (len) {
+ // Use the low-level logging function, so that the log module can
+ // validate UTF-8 (if needed in future code)
+ tor_log_err_sigsafe(
+ "BUG: string_is_utf8() called with NULL str but non-zero len.");
+ // Since it's a bug, we should probably reject this string
+ return false;
+ }
+ // LCOV_EXCL_STOP
+ return true;
+ }
+
for (size_t i = 0; i < len;) {
uint8_t num_bytes = bytes_in_char(str[i]);
if (num_bytes == 0) // Invalid leading byte found.
@@ -530,8 +547,8 @@ string_is_utf8(const char *str, size_t len)
int
string_is_utf8_no_bom(const char *str, size_t len)
{
- if (len >= 3 && (!strcmpstart(str, "\uFEFF") ||
- !strcmpstart(str, "\uFFFE"))) {
+ if (str && len >= 3 && (!strcmpstart(str, "\uFEFF") ||
+ !strcmpstart(str, "\uFFFE"))) {
return false;
}
return string_is_utf8(str, len);