1 files changed, 34 insertions, 22 deletions
diff --git a/src/lib/string/util_string.c b/src/lib/string/util_string.c
index f934f66f02..c8f12d780e 100644
--- a/src/lib/string/util_string.c
+++ b/src/lib/string/util_string.c
@@ -1,6 +1,6 @@
 /* Copyright (c) 2003-2004, Roger Dingledine
  * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
- * Copyright (c) 2007-2019, The Tor Project, Inc. */
+ * Copyright (c) 2007-2020, The Tor Project, Inc. */
 /* See LICENSE for licensing information */
 
 /**
@@ -71,7 +71,7 @@ tor_memstr(const void *haystack, size_t hlen, const char *needle)
 
 /** Return true iff the 'len' bytes at 'mem' are all zero. */
 int
-tor_mem_is_zero(const char *mem, size_t len)
+fast_mem_is_zero(const char *mem, size_t len)
 {
   static const char ZERO[] = {
     0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
@@ -95,17 +95,14 @@ tor_mem_is_zero(const char *mem, size_t len)
 int
 tor_digest_is_zero(const char *digest)
 {
-  static const uint8_t ZERO_DIGEST[] = {
-    0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0
-  };
-  return tor_memeq(digest, ZERO_DIGEST, DIGEST_LEN);
+  return safe_mem_is_zero(digest, DIGEST_LEN);
 }
 
 /** Return true iff the DIGEST256_LEN bytes in digest are all zero. */
 int
 tor_digest256_is_zero(const char *digest)
 {
-  return tor_mem_is_zero(digest, DIGEST256_LEN);
+  return safe_mem_is_zero(digest, DIGEST256_LEN);
 }
 
 /** Remove from the string <b>s</b> every character which appears in
@@ -212,21 +209,6 @@ strcmpstart(const char *s1, const char *s2)
   return strncmp(s1, s2, n);
 }
 
-/** Compare the s1_len-byte string <b>s1</b> with <b>s2</b>,
- * without depending on a terminating nul in s1.  Sorting order is first by
- * length, then lexically; return values are as for strcmp.
- */
-int
-strcmp_len(const char *s1, const char *s2, size_t s1_len)
-{
-  size_t s2_len = strlen(s2);
-  if (s1_len < s2_len)
-    return -1;
-  if (s1_len > s2_len)
-    return 1;
-  return fast_memcmp(s1, s2, s2_len);
-}
-
 /** Compares the first strlen(s2) characters of s1 with s2.  Returns as for
  * strcasecmp.
  */
@@ -524,6 +506,23 @@ validate_char(const uint8_t *c, uint8_t len)
 int
 string_is_utf8(const char *str, size_t len)
 {
+  // If str is NULL, don't try to read it
+  if (!str) {
+    // We could test for this case, but the low-level logs would produce
+    // confusing test output.
+    // LCOV_EXCL_START
+    if (len) {
+      // Use the low-level logging function, so that the log module can
+      // validate UTF-8 (if needed in future code)
+      tor_log_err_sigsafe(
+        "BUG: string_is_utf8() called with NULL str but non-zero len.");
+      // Since it's a bug, we should probably reject this string
+      return false;
+    }
+    // LCOV_EXCL_STOP
+    return true;
+  }
+
   for (size_t i = 0; i < len;) {
     uint8_t num_bytes = bytes_in_char(str[i]);
     if (num_bytes == 0) // Invalid leading byte found.
@@ -541,3 +540,16 @@ string_is_utf8(const char *str, size_t len)
   }
   return true;
 }
+
+/** As string_is_utf8(), but returns false if the string begins with a UTF-8
+ * byte order mark (BOM).
+ */
+int
+string_is_utf8_no_bom(const char *str, size_t len)
+{
+  if (str && len >= 3 && (!strcmpstart(str, "\uFEFF") ||
+                          !strcmpstart(str, "\uFFFE"))) {
+    return false;
+  }
+  return string_is_utf8(str, len);
+}