summaryrefslogtreecommitdiff
path: root/src/lib/string
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/string')
-rw-r--r--src/lib/string/.may_include10
-rw-r--r--src/lib/string/compat_ctype.c72
-rw-r--r--src/lib/string/compat_ctype.h67
-rw-r--r--src/lib/string/compat_string.c74
-rw-r--r--src/lib/string/compat_string.h62
-rw-r--r--src/lib/string/include.am27
-rw-r--r--src/lib/string/parse_int.c131
-rw-r--r--src/lib/string/parse_int.h25
-rw-r--r--src/lib/string/printf.c167
-rw-r--r--src/lib/string/printf.h30
-rw-r--r--src/lib/string/scanf.c317
-rw-r--r--src/lib/string/scanf.h24
-rw-r--r--src/lib/string/util_string.c543
-rw-r--r--src/lib/string/util_string.h57
14 files changed, 1606 insertions, 0 deletions
diff --git a/src/lib/string/.may_include b/src/lib/string/.may_include
new file mode 100644
index 0000000000..ec5c769831
--- /dev/null
+++ b/src/lib/string/.may_include
@@ -0,0 +1,10 @@
+orconfig.h
+lib/cc/*.h
+lib/defs/*.h
+lib/err/*.h
+lib/malloc/*.h
+lib/ctime/*.h
+lib/string/*.h
+
+strlcat.c
+strlcpy.c
diff --git a/src/lib/string/compat_ctype.c b/src/lib/string/compat_ctype.c
new file mode 100644
index 0000000000..f5d82be3ae
--- /dev/null
+++ b/src/lib/string/compat_ctype.c
@@ -0,0 +1,72 @@
+/* Copyright (c) 2003-2004, Roger Dingledine
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2019, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file compat_ctype.c
+ * \brief Locale-independent character-type inspection (backend)
+ **/
+
+#include "lib/string/compat_ctype.h"
+
+/**
+ * Tables to implement ctypes-replacement TOR_IS*() functions. Each table
+ * has 256 bits to look up whether a character is in some set or not. This
+ * fails on non-ASCII platforms, but it is hard to find a platform whose
+ * character set is not a superset of ASCII nowadays. */
+
+/**@{*/
+const uint32_t TOR_ISALPHA_TABLE[8] =
+ { 0, 0, 0x7fffffe, 0x7fffffe, 0, 0, 0, 0 };
+const uint32_t TOR_ISALNUM_TABLE[8] =
+ { 0, 0x3ff0000, 0x7fffffe, 0x7fffffe, 0, 0, 0, 0 };
+const uint32_t TOR_ISSPACE_TABLE[8] = { 0x3e00, 0x1, 0, 0, 0, 0, 0, 0 };
+const uint32_t TOR_ISXDIGIT_TABLE[8] =
+ { 0, 0x3ff0000, 0x7e, 0x7e, 0, 0, 0, 0 };
+const uint32_t TOR_ISDIGIT_TABLE[8] = { 0, 0x3ff0000, 0, 0, 0, 0, 0, 0 };
+const uint32_t TOR_ISPRINT_TABLE[8] =
+ { 0, 0xffffffff, 0xffffffff, 0x7fffffff, 0, 0, 0, 0x0 };
+const uint32_t TOR_ISUPPER_TABLE[8] = { 0, 0, 0x7fffffe, 0, 0, 0, 0, 0 };
+const uint32_t TOR_ISLOWER_TABLE[8] = { 0, 0, 0, 0x7fffffe, 0, 0, 0, 0 };
+
+/** Upper-casing and lowercasing tables to map characters to upper/lowercase
+ * equivalents. Used by tor_toupper() and tor_tolower(). */
+/**@{*/
+const uint8_t TOR_TOUPPER_TABLE[256] = {
+ 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
+ 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
+ 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
+ 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
+ 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
+ 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
+ 96,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
+ 80,81,82,83,84,85,86,87,88,89,90,123,124,125,126,127,
+ 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
+ 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
+ 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
+ 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
+ 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
+ 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
+ 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
+ 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
+};
+const uint8_t TOR_TOLOWER_TABLE[256] = {
+ 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
+ 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
+ 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
+ 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
+ 64,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
+ 112,113,114,115,116,117,118,119,120,121,122,91,92,93,94,95,
+ 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
+ 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
+ 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
+ 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
+ 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
+ 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
+ 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
+ 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
+ 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
+ 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
+};
+/**@}*/
diff --git a/src/lib/string/compat_ctype.h b/src/lib/string/compat_ctype.h
new file mode 100644
index 0000000000..dbddd356c1
--- /dev/null
+++ b/src/lib/string/compat_ctype.h
@@ -0,0 +1,67 @@
+/* Copyright (c) 2003-2004, Roger Dingledine
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2019, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file compat_ctype.h
+ * \brief Locale-independent character-type inspection (header)
+ **/
+
+#ifndef TOR_COMPAT_CTYPE_H
+#define TOR_COMPAT_CTYPE_H
+
+#include "orconfig.h"
+#include "lib/cc/torint.h"
+
+/* Much of the time when we're checking ctypes, we're doing spec compliance,
+ * which all assumes we're doing ASCII. */
+#define DECLARE_CTYPE_FN(name) \
+ static int TOR_##name(char c); \
+ extern const uint32_t TOR_##name##_TABLE[]; \
+ static inline int TOR_##name(char c) { \
+ uint8_t u = c; \
+ return !!(TOR_##name##_TABLE[(u >> 5) & 7] & (1u << (u & 31))); \
+ }
+DECLARE_CTYPE_FN(ISALPHA)
+DECLARE_CTYPE_FN(ISALNUM)
+DECLARE_CTYPE_FN(ISSPACE)
+DECLARE_CTYPE_FN(ISDIGIT)
+DECLARE_CTYPE_FN(ISXDIGIT)
+DECLARE_CTYPE_FN(ISPRINT)
+DECLARE_CTYPE_FN(ISLOWER)
+DECLARE_CTYPE_FN(ISUPPER)
+extern const uint8_t TOR_TOUPPER_TABLE[];
+extern const uint8_t TOR_TOLOWER_TABLE[];
+#define TOR_TOLOWER(c) (TOR_TOLOWER_TABLE[(uint8_t)c])
+#define TOR_TOUPPER(c) (TOR_TOUPPER_TABLE[(uint8_t)c])
+
+static inline int hex_decode_digit(char c);
+
+/** Helper: given a hex digit, return its value, or -1 if it isn't hex. */
+static inline int
+hex_decode_digit(char c)
+{
+ switch (c) {
+ case '0': return 0;
+ case '1': return 1;
+ case '2': return 2;
+ case '3': return 3;
+ case '4': return 4;
+ case '5': return 5;
+ case '6': return 6;
+ case '7': return 7;
+ case '8': return 8;
+ case '9': return 9;
+ case 'A': case 'a': return 10;
+ case 'B': case 'b': return 11;
+ case 'C': case 'c': return 12;
+ case 'D': case 'd': return 13;
+ case 'E': case 'e': return 14;
+ case 'F': case 'f': return 15;
+ default:
+ return -1;
+ }
+}
+
+#endif /* !defined(TOR_COMPAT_CTYPE_H) */
diff --git a/src/lib/string/compat_string.c b/src/lib/string/compat_string.c
new file mode 100644
index 0000000000..e125c921a4
--- /dev/null
+++ b/src/lib/string/compat_string.c
@@ -0,0 +1,74 @@
+/* Copyright (c) 2003-2004, Roger Dingledine
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2019, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file compat_string.c
+ * \brief Useful string-processing functions that some platforms don't
+ * provide.
+ **/
+
+#include "lib/string/compat_string.h"
+#include "lib/err/torerr.h"
+
+/* Inline the strl functions if the platform doesn't have them. */
+#ifndef HAVE_STRLCPY
+#include "strlcpy.c"
+#endif
+#ifndef HAVE_STRLCAT
+#include "strlcat.c"
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+
+/** Helper for tor_strtok_r_impl: Advances cp past all characters in
+ * <b>sep</b>, and returns its new value. */
+static char *
+strtok_helper(char *cp, const char *sep)
+{
+ if (sep[1]) {
+ while (*cp && strchr(sep, *cp))
+ ++cp;
+ } else {
+ while (*cp && *cp == *sep)
+ ++cp;
+ }
+ return cp;
+}
+
+/** Implementation of strtok_r for platforms whose coders haven't figured out
+ * how to write one. Hey, retrograde libc developers! You can use this code
+ * here for free! */
+char *
+tor_strtok_r_impl(char *str, const char *sep, char **lasts)
+{
+ char *cp, *start;
+ raw_assert(*sep);
+ if (str) {
+ str = strtok_helper(str, sep);
+ if (!*str)
+ return NULL;
+ start = cp = *lasts = str;
+ } else if (!*lasts || !**lasts) {
+ return NULL;
+ } else {
+ start = cp = *lasts;
+ }
+
+ if (sep[1]) {
+ while (*cp && !strchr(sep, *cp))
+ ++cp;
+ } else {
+ cp = strchr(cp, *sep);
+ }
+
+ if (!cp || !*cp) {
+ *lasts = NULL;
+ } else {
+ *cp++ = '\0';
+ *lasts = strtok_helper(cp, sep);
+ }
+ return start;
+}
diff --git a/src/lib/string/compat_string.h b/src/lib/string/compat_string.h
new file mode 100644
index 0000000000..a0e37bb6dc
--- /dev/null
+++ b/src/lib/string/compat_string.h
@@ -0,0 +1,62 @@
+/* Copyright (c) 2003-2004, Roger Dingledine
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2019, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file compat_string.h
+ * \brief Header for compat_string.c
+ **/
+
+#ifndef TOR_COMPAT_STRING_H
+#define TOR_COMPAT_STRING_H
+
+#include "orconfig.h"
+#include "lib/cc/compat_compiler.h"
+
+#include <stddef.h>
+
+/* ===== String compatibility */
+#ifdef _WIN32
+/* Windows doesn't have str(n)casecmp, but mingw defines it: only define it
+ * ourselves if it's missing. */
+#ifndef HAVE_STRNCASECMP
+static inline int strncasecmp(const char *a, const char *b, size_t n);
+static inline int strncasecmp(const char *a, const char *b, size_t n) {
+ return _strnicmp(a,b,n);
+}
+#endif
+#ifndef HAVE_STRCASECMP
+static inline int strcasecmp(const char *a, const char *b);
+static inline int strcasecmp(const char *a, const char *b) {
+ return _stricmp(a,b);
+}
+#endif
+#endif
+
+#if defined __APPLE__
+/* On OSX 10.9 and later, the overlap-checking code for strlcat would
+ * appear to have a severe bug that can sometimes cause aborts in Tor.
+ * Instead, use the non-checking variants. This is sad.
+ *
+ * See https://trac.torproject.org/projects/tor/ticket/15205
+ */
+#undef strlcat
+#undef strlcpy
+#endif /* defined __APPLE__ */
+
+#ifndef HAVE_STRLCAT
+size_t strlcat(char *dst, const char *src, size_t siz);
+#endif
+#ifndef HAVE_STRLCPY
+size_t strlcpy(char *dst, const char *src, size_t siz);
+#endif
+
+char *tor_strtok_r_impl(char *str, const char *sep, char **lasts);
+#ifdef HAVE_STRTOK_R
+#define tor_strtok_r(str, sep, lasts) strtok_r(str, sep, lasts)
+#else
+#define tor_strtok_r(str, sep, lasts) tor_strtok_r_impl(str, sep, lasts)
+#endif
+
+#endif
diff --git a/src/lib/string/include.am b/src/lib/string/include.am
new file mode 100644
index 0000000000..edd74b8a3e
--- /dev/null
+++ b/src/lib/string/include.am
@@ -0,0 +1,27 @@
+
+noinst_LIBRARIES += src/lib/libtor-string.a
+
+if UNITTESTS_ENABLED
+noinst_LIBRARIES += src/lib/libtor-string-testing.a
+endif
+
+src_lib_libtor_string_a_SOURCES = \
+ src/lib/string/compat_ctype.c \
+ src/lib/string/compat_string.c \
+ src/lib/string/util_string.c \
+ src/lib/string/parse_int.c \
+ src/lib/string/printf.c \
+ src/lib/string/scanf.c
+
+src_lib_libtor_string_testing_a_SOURCES = \
+ $(src_lib_libtor_string_a_SOURCES)
+src_lib_libtor_string_testing_a_CPPFLAGS = $(AM_CPPFLAGS) $(TEST_CPPFLAGS)
+src_lib_libtor_string_testing_a_CFLAGS = $(AM_CFLAGS) $(TEST_CFLAGS)
+
+noinst_HEADERS += \
+ src/lib/string/compat_ctype.h \
+ src/lib/string/compat_string.h \
+ src/lib/string/util_string.h \
+ src/lib/string/parse_int.h \
+ src/lib/string/printf.h \
+ src/lib/string/scanf.h
diff --git a/src/lib/string/parse_int.c b/src/lib/string/parse_int.c
new file mode 100644
index 0000000000..fbdd554a47
--- /dev/null
+++ b/src/lib/string/parse_int.c
@@ -0,0 +1,131 @@
+/* Copyright (c) 2003, Roger Dingledine
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2019, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file parse_int.c
+ * \brief Convert strings into the integers they encode, with bounds checking.
+ **/
+
+#include "lib/string/parse_int.h"
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* Helper: common code to check whether the result of a strtol or strtoul or
+ * strtoll is correct. */
+#define CHECK_STRTOX_RESULT() \
+ /* Did an overflow occur? */ \
+ if (errno == ERANGE) \
+ goto err; \
+ /* Was at least one character converted? */ \
+ if (endptr == s) \
+ goto err; \
+ /* Were there unexpected unconverted characters? */ \
+ if (!next && *endptr) \
+ goto err; \
+ /* Illogical (max, min) inputs? */ \
+ if (max < min) \
+ goto err; \
+ /* Is r within limits? */ \
+ if (r < min || r > max) \
+ goto err; \
+ if (ok) *ok = 1; \
+ if (next) *next = endptr; \
+ return r; \
+ err: \
+ if (ok) *ok = 0; \
+ if (next) *next = endptr; \
+ return 0
+
+/** Extract a long from the start of <b>s</b>, in the given numeric
+ * <b>base</b>. If <b>base</b> is 0, <b>s</b> is parsed as a decimal,
+ * octal, or hex number in the syntax of a C integer literal. If
+ * there is unconverted data and <b>next</b> is provided, set
+ * *<b>next</b> to the first unconverted character. An error has
+ * occurred if no characters are converted; or if there are
+ * unconverted characters and <b>next</b> is NULL; or if the parsed
+ * value is not between <b>min</b> and <b>max</b>. When no error
+ * occurs, return the parsed value and set *<b>ok</b> (if provided) to
+ * 1. When an error occurs, return 0 and set *<b>ok</b> (if provided)
+ * to 0.
+ */
+long
+tor_parse_long(const char *s, int base, long min, long max,
+ int *ok, char **next)
+{
+ char *endptr;
+ long r;
+
+ if (base < 0) {
+ if (ok)
+ *ok = 0;
+ return 0;
+ }
+
+ errno = 0;
+ r = strtol(s, &endptr, base);
+ CHECK_STRTOX_RESULT();
+}
+
+/** As tor_parse_long(), but return an unsigned long. */
+unsigned long
+tor_parse_ulong(const char *s, int base, unsigned long min,
+ unsigned long max, int *ok, char **next)
+{
+ char *endptr;
+ unsigned long r;
+
+ if (base < 0) {
+ if (ok)
+ *ok = 0;
+ return 0;
+ }
+
+ errno = 0;
+ r = strtoul(s, &endptr, base);
+ CHECK_STRTOX_RESULT();
+}
+
+/** As tor_parse_long(), but return a double. */
+double
+tor_parse_double(const char *s, double min, double max, int *ok, char **next)
+{
+ char *endptr;
+ double r;
+
+ errno = 0;
+ r = strtod(s, &endptr);
+ CHECK_STRTOX_RESULT();
+}
+
+/** As tor_parse_long, but return a uint64_t. Only base 10 is guaranteed to
+ * work for now. */
+uint64_t
+tor_parse_uint64(const char *s, int base, uint64_t min,
+ uint64_t max, int *ok, char **next)
+{
+ char *endptr;
+ uint64_t r;
+
+ if (base < 0) {
+ if (ok)
+ *ok = 0;
+ return 0;
+ }
+
+ errno = 0;
+#ifdef HAVE_STRTOULL
+ r = (uint64_t)strtoull(s, &endptr, base);
+#elif defined(_WIN32)
+ r = (uint64_t)_strtoui64(s, &endptr, base);
+#elif SIZEOF_LONG == 8
+ r = (uint64_t)strtoul(s, &endptr, base);
+#else
+#error "I don't know how to parse 64-bit numbers."
+#endif /* defined(HAVE_STRTOULL) || ... */
+
+ CHECK_STRTOX_RESULT();
+}
diff --git a/src/lib/string/parse_int.h b/src/lib/string/parse_int.h
new file mode 100644
index 0000000000..925547942e
--- /dev/null
+++ b/src/lib/string/parse_int.h
@@ -0,0 +1,25 @@
+/* Copyright (c) 2003, Roger Dingledine
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2019, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file parse_int.h
+ * \brief Header for parse_int.c
+ **/
+
+#ifndef TOR_PARSE_INT_H
+#define TOR_PARSE_INT_H
+
+#include "lib/cc/torint.h"
+
+long tor_parse_long(const char *s, int base, long min,
+ long max, int *ok, char **next);
+unsigned long tor_parse_ulong(const char *s, int base, unsigned long min,
+ unsigned long max, int *ok, char **next);
+double tor_parse_double(const char *s, double min, double max, int *ok,
+ char **next);
+uint64_t tor_parse_uint64(const char *s, int base, uint64_t min,
+ uint64_t max, int *ok, char **next);
+
+#endif
diff --git a/src/lib/string/printf.c b/src/lib/string/printf.c
new file mode 100644
index 0000000000..a5cb71ce09
--- /dev/null
+++ b/src/lib/string/printf.c
@@ -0,0 +1,167 @@
+/* Copyright (c) 2003-2004, Roger Dingledine
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2019, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file printf.c
+ * \brief Compatibility wrappers around snprintf and its friends
+ **/
+
+#include "lib/string/printf.h"
+#include "lib/err/torerr.h"
+#include "lib/cc/torint.h"
+#include "lib/malloc/malloc.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+
+/** Replacement for snprintf. Differs from platform snprintf in two
+ * ways: First, always NUL-terminates its output. Second, always
+ * returns -1 if the result is truncated. (Note that this return
+ * behavior does <i>not</i> conform to C99; it just happens to be
+ * easier to emulate "return -1" with conformant implementations than
+ * it is to emulate "return number that would be written" with
+ * non-conformant implementations.) */
+int
+tor_snprintf(char *str, size_t size, const char *format, ...)
+{
+ va_list ap;
+ int r;
+ va_start(ap,format);
+ r = tor_vsnprintf(str,size,format,ap);
+ va_end(ap);
+ return r;
+}
+
+/** Replacement for vsnprintf; behavior differs as tor_snprintf differs from
+ * snprintf.
+ */
+int
+tor_vsnprintf(char *str, size_t size, const char *format, va_list args)
+{
+ int r;
+ if (size == 0)
+ return -1; /* no place for the NUL */
+ if (size > SIZE_T_CEILING)
+ return -1;
+#ifdef _WIN32
+ r = _vsnprintf(str, size, format, args);
+#else
+ r = vsnprintf(str, size, format, args);
+#endif
+ str[size-1] = '\0';
+ if (r < 0 || r >= (ssize_t)size)
+ return -1;
+ return r;
+}
+
+/**
+ * Portable asprintf implementation. Does a printf() into a newly malloc'd
+ * string. Sets *<b>strp</b> to this string, and returns its length (not
+ * including the terminating NUL character).
+ *
+ * You can treat this function as if its implementation were something like
+ <pre>
+ char buf[_INFINITY_];
+ tor_snprintf(buf, sizeof(buf), fmt, args);
+ *strp = tor_strdup(buf);
+ return strlen(*strp):
+ </pre>
+ * Where _INFINITY_ is an imaginary constant so big that any string can fit
+ * into it.
+ */
+int
+tor_asprintf(char **strp, const char *fmt, ...)
+{
+ int r;
+ va_list args;
+ va_start(args, fmt);
+ r = tor_vasprintf(strp, fmt, args);
+ va_end(args);
+ if (!*strp || r < 0) {
+ /* LCOV_EXCL_START */
+ raw_assert_unreached_msg("Internal error in asprintf");
+ /* LCOV_EXCL_STOP */
+ }
+ return r;
+}
+
+/**
+ * Portable vasprintf implementation. Does a printf() into a newly malloc'd
+ * string. Differs from regular vasprintf in the same ways that
+ * tor_asprintf() differs from regular asprintf.
+ */
+int
+tor_vasprintf(char **strp, const char *fmt, va_list args)
+{
+ /* use a temporary variable in case *strp is in args. */
+ char *strp_tmp=NULL;
+#ifdef HAVE_VASPRINTF
+ /* If the platform gives us one, use it. */
+ int r = vasprintf(&strp_tmp, fmt, args);
+ if (r < 0)
+ *strp = NULL; // LCOV_EXCL_LINE -- no cross-platform way to force this
+ else
+ *strp = strp_tmp;
+ return r;
+#elif defined(HAVE__VSCPRINTF)
+ /* On Windows, _vsnprintf won't tell us the length of the string if it
+ * overflows, so we need to use _vcsprintf to tell how much to allocate */
+ int len, r;
+ va_list tmp_args;
+ va_copy(tmp_args, args);
+ len = _vscprintf(fmt, tmp_args);
+ va_end(tmp_args);
+ if (len < 0) {
+ *strp = NULL;
+ return -1;
+ }
+ strp_tmp = tor_malloc(len + 1);
+ r = _vsnprintf(strp_tmp, len+1, fmt, args);
+ if (r != len) {
+ tor_free(strp_tmp);
+ *strp = NULL;
+ return -1;
+ }
+ *strp = strp_tmp;
+ return len;
+#else
+ /* Everywhere else, we have a decent vsnprintf that tells us how many
+ * characters we need. We give it a try on a short buffer first, since
+ * it might be nice to avoid the second vsnprintf call.
+ */
+ /* XXXX This code spent a number of years broken (see bug 30651). It is
+ * possible that no Tor users actually run on systems without vasprintf() or
+ * _vscprintf(). If so, we should consider removing this code. */
+ char buf[128];
+ int len, r;
+ va_list tmp_args;
+ va_copy(tmp_args, args);
+ /* Use vsnprintf to retrieve needed length. tor_vsnprintf() is not an
+ * option here because it will simply return -1 if buf is not large enough
+ * to hold the complete string.
+ */
+ len = vsnprintf(buf, sizeof(buf), fmt, tmp_args);
+ va_end(tmp_args);
+ buf[sizeof(buf) - 1] = '\0';
+ if (len < 0) {
+ *strp = NULL;
+ return -1;
+ }
+ if (len < (int)sizeof(buf)) {
+ *strp = tor_strdup(buf);
+ return len;
+ }
+ strp_tmp = tor_malloc(len+1);
+ /* use of tor_vsnprintf() will ensure string is null terminated */
+ r = tor_vsnprintf(strp_tmp, len+1, fmt, args);
+ if (r != len) {
+ tor_free(strp_tmp);
+ *strp = NULL;
+ return -1;
+ }
+ *strp = strp_tmp;
+ return len;
+#endif /* defined(HAVE_VASPRINTF) || ... */
+}
diff --git a/src/lib/string/printf.h b/src/lib/string/printf.h
new file mode 100644
index 0000000000..2cc13d6bee
--- /dev/null
+++ b/src/lib/string/printf.h
@@ -0,0 +1,30 @@
+/* Copyright (c) 2003-2004, Roger Dingledine
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2019, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file printf.h
+ * \brief Header for printf.c
+ **/
+
+#ifndef TOR_UTIL_PRINTF_H
+#define TOR_UTIL_PRINTF_H
+
+#include "orconfig.h"
+#include "lib/cc/compat_compiler.h"
+
+#include <stdarg.h>
+#include <stddef.h>
+
+int tor_snprintf(char *str, size_t size, const char *format, ...)
+ CHECK_PRINTF(3,4);
+int tor_vsnprintf(char *str, size_t size, const char *format, va_list args)
+ CHECK_PRINTF(3,0);
+
+int tor_asprintf(char **strp, const char *fmt, ...)
+ CHECK_PRINTF(2,3);
+int tor_vasprintf(char **strp, const char *fmt, va_list args)
+ CHECK_PRINTF(2,0);
+
+#endif /* !defined(TOR_UTIL_STRING_H) */
diff --git a/src/lib/string/scanf.c b/src/lib/string/scanf.c
new file mode 100644
index 0000000000..1bc39b5182
--- /dev/null
+++ b/src/lib/string/scanf.c
@@ -0,0 +1,317 @@
+/* Copyright (c) 2003-2004, Roger Dingledine
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2019, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file scanf.c
+ * \brief Locale-independent minimal implementation of sscanf().
+ **/
+
+#include "lib/string/scanf.h"
+#include "lib/string/compat_ctype.h"
+#include "lib/cc/torint.h"
+#include "lib/err/torerr.h"
+
+#include <stdlib.h>
+
+#define MAX_SCANF_WIDTH 9999
+
+/** Helper: given an ASCII-encoded decimal digit, return its numeric value.
+ * NOTE: requires that its input be in-bounds. */
+static int
+digit_to_num(char d)
+{
+ int num = ((int)d) - (int)'0';
+ raw_assert(num <= 9 && num >= 0);
+ return num;
+}
+
+/** Helper: Read an unsigned int from *<b>bufp</b> of up to <b>width</b>
+ * characters. (Handle arbitrary width if <b>width</b> is less than 0.) On
+ * success, store the result in <b>out</b>, advance bufp to the next
+ * character, and return 0. On failure, return -1. */
+static int
+scan_unsigned(const char **bufp, unsigned long *out, int width, unsigned base)
+{
+ unsigned long result = 0;
+ int scanned_so_far = 0;
+ const int hex = base==16;
+ raw_assert(base == 10 || base == 16);
+ if (!bufp || !*bufp || !out)
+ return -1;
+ if (width<0)
+ width=MAX_SCANF_WIDTH;
+
+ while (**bufp && (hex?TOR_ISXDIGIT(**bufp):TOR_ISDIGIT(**bufp))
+ && scanned_so_far < width) {
+ unsigned digit = hex?hex_decode_digit(*(*bufp)++):digit_to_num(*(*bufp)++);
+ // Check for overflow beforehand, without actually causing any overflow
+ // This preserves functionality on compilers that don't wrap overflow
+ // (i.e. that trap or optimise away overflow)
+ // result * base + digit > ULONG_MAX
+ // result * base > ULONG_MAX - digit
+ if (result > (ULONG_MAX - digit)/base)
+ return -1; /* Processing this digit would overflow */
+ result = result * base + digit;
+ ++scanned_so_far;
+ }
+
+ if (!scanned_so_far) /* No actual digits scanned */
+ return -1;
+
+ *out = result;
+ return 0;
+}
+
+/** Helper: Read an signed int from *<b>bufp</b> of up to <b>width</b>
+ * characters. (Handle arbitrary width if <b>width</b> is less than 0.) On
+ * success, store the result in <b>out</b>, advance bufp to the next
+ * character, and return 0. On failure, return -1. */
+static int
+scan_signed(const char **bufp, long *out, int width)
+{
+ int neg = 0;
+ unsigned long result = 0;
+
+ if (!bufp || !*bufp || !out)
+ return -1;
+ if (width<0)
+ width=MAX_SCANF_WIDTH;
+
+ if (**bufp == '-') {
+ neg = 1;
+ ++*bufp;
+ --width;
+ }
+
+ if (scan_unsigned(bufp, &result, width, 10) < 0)
+ return -1;
+
+ if (neg && result > 0) {
+ if (result > ((unsigned long)LONG_MAX) + 1)
+ return -1; /* Underflow */
+ else if (result == ((unsigned long)LONG_MAX) + 1)
+ *out = LONG_MIN;
+ else {
+ /* We once had a far more clever no-overflow conversion here, but
+ * some versions of GCC apparently ran it into the ground. Now
+ * we just check for LONG_MIN explicitly.
+ */
+ *out = -(long)result;
+ }
+ } else {
+ if (result > LONG_MAX)
+ return -1; /* Overflow */
+ *out = (long)result;
+ }
+
+ return 0;
+}
+
+/** Helper: Read a decimal-formatted double from *<b>bufp</b> of up to
+ * <b>width</b> characters. (Handle arbitrary width if <b>width</b> is less
+ * than 0.) On success, store the result in <b>out</b>, advance bufp to the
+ * next character, and return 0. On failure, return -1. */
+static int
+scan_double(const char **bufp, double *out, int width)
+{
+ int neg = 0;
+ double result = 0;
+ int scanned_so_far = 0;
+
+ if (!bufp || !*bufp || !out)
+ return -1;
+ if (width<0)
+ width=MAX_SCANF_WIDTH;
+
+ if (**bufp == '-') {
+ neg = 1;
+ ++*bufp;
+ }
+
+ while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) {
+ const int digit = digit_to_num(*(*bufp)++);
+ result = result * 10 + digit;
+ ++scanned_so_far;
+ }
+ if (**bufp == '.') {
+ double fracval = 0, denominator = 1;
+ ++*bufp;
+ ++scanned_so_far;
+ while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) {
+ const int digit = digit_to_num(*(*bufp)++);
+ fracval = fracval * 10 + digit;
+ denominator *= 10;
+ ++scanned_so_far;
+ }
+ result += fracval / denominator;
+ }
+
+ if (!scanned_so_far) /* No actual digits scanned */
+ return -1;
+
+ *out = neg ? -result : result;
+ return 0;
+}
+
+/** Helper: copy up to <b>width</b> non-space characters from <b>bufp</b> to
+ * <b>out</b>. Make sure <b>out</b> is nul-terminated. Advance <b>bufp</b>
+ * to the next non-space character or the EOS. */
+static int
+scan_string(const char **bufp, char *out, int width)
+{
+ int scanned_so_far = 0;
+ if (!bufp || !out || width < 0)
+ return -1;
+ while (**bufp && ! TOR_ISSPACE(**bufp) && scanned_so_far < width) {
+ *out++ = *(*bufp)++;
+ ++scanned_so_far;
+ }
+ *out = '\0';
+ return 0;
+}
+
+/** Locale-independent, minimal, no-surprises scanf variant, accepting only a
+ * restricted pattern format. For more info on what it supports, see
+ * tor_sscanf() documentation. */
+int
+tor_vsscanf(const char *buf, const char *pattern, va_list ap)
+{
+ int n_matched = 0;
+
+ while (*pattern) {
+ if (*pattern != '%') {
+ if (*buf == *pattern) {
+ ++buf;
+ ++pattern;
+ continue;
+ } else {
+ return n_matched;
+ }
+ } else {
+ int width = -1;
+ int longmod = 0;
+ ++pattern;
+ if (TOR_ISDIGIT(*pattern)) {
+ width = digit_to_num(*pattern++);
+ while (TOR_ISDIGIT(*pattern)) {
+ width *= 10;
+ width += digit_to_num(*pattern++);
+ if (width > MAX_SCANF_WIDTH)
+ return -1;
+ }
+ if (!width) /* No zero-width things. */
+ return -1;
+ }
+ if (*pattern == 'l') {
+ longmod = 1;
+ ++pattern;
+ }
+ if (*pattern == 'u' || *pattern == 'x') {
+ unsigned long u;
+ const int base = (*pattern == 'u') ? 10 : 16;
+ if (!*buf)
+ return n_matched;
+ if (scan_unsigned(&buf, &u, width, base)<0)
+ return n_matched;
+ if (longmod) {
+ unsigned long *out = va_arg(ap, unsigned long *);
+ *out = u;
+ } else {
+ unsigned *out = va_arg(ap, unsigned *);
+ if (u > UINT_MAX)
+ return n_matched;
+ *out = (unsigned) u;
+ }
+ ++pattern;
+ ++n_matched;
+ } else if (*pattern == 'f') {
+ double *d = va_arg(ap, double *);
+ if (!longmod)
+ return -1; /* float not supported */
+ if (!*buf)
+ return n_matched;
+ if (scan_double(&buf, d, width)<0)
+ return n_matched;
+ ++pattern;
+ ++n_matched;
+ } else if (*pattern == 'd') {
+ long lng=0;
+ if (scan_signed(&buf, &lng, width)<0)
+ return n_matched;
+ if (longmod) {
+ long *out = va_arg(ap, long *);
+ *out = lng;
+ } else {
+ int *out = va_arg(ap, int *);
+#if LONG_MAX > INT_MAX
+ if (lng < INT_MIN || lng > INT_MAX)
+ return n_matched;
+#endif
+ *out = (int)lng;
+ }
+ ++pattern;
+ ++n_matched;
+ } else if (*pattern == 's') {
+ char *s = va_arg(ap, char *);
+ if (longmod)
+ return -1;
+ if (width < 0)
+ return -1;
+ if (scan_string(&buf, s, width)<0)
+ return n_matched;
+ ++pattern;
+ ++n_matched;
+ } else if (*pattern == 'c') {
+ char *ch = va_arg(ap, char *);
+ if (longmod)
+ return -1;
+ if (width != -1)
+ return -1;
+ if (!*buf)
+ return n_matched;
+ *ch = *buf++;
+ ++pattern;
+ ++n_matched;
+ } else if (*pattern == '%') {
+ if (*buf != '%')
+ return n_matched;
+ if (longmod)
+ return -1;
+ ++buf;
+ ++pattern;
+ } else {
+ return -1; /* Unrecognized pattern component. */
+ }
+ }
+ }
+
+ return n_matched;
+}
+
+/** Minimal sscanf replacement: parse <b>buf</b> according to <b>pattern</b>
+ * and store the results in the corresponding argument fields. Differs from
+ * sscanf in that:
+ * <ul><li>It only handles %u, %lu, %x, %lx, %[NUM]s, %d, %ld, %lf, and %c.
+ * <li>It only handles decimal inputs for %lf. (12.3, not 1.23e1)
+ * <li>It does not handle arbitrarily long widths.
+ * <li>Numbers do not consume any space characters.
+ * <li>It is locale-independent.
+ * <li>%u and %x do not consume any space.
+ * <li>It returns -1 on malformed patterns.</ul>
+ *
+ * (As with other locale-independent functions, we need this to parse data that
+ * is in ASCII without worrying that the C library's locale-handling will make
+ * miscellaneous characters look like numbers, spaces, and so on.)
+ */
+int
+tor_sscanf(const char *buf, const char *pattern, ...)
+{
+ int r;
+ va_list ap;
+ va_start(ap, pattern);
+ r = tor_vsscanf(buf, pattern, ap);
+ va_end(ap);
+ return r;
+}
diff --git a/src/lib/string/scanf.h b/src/lib/string/scanf.h
new file mode 100644
index 0000000000..6673173de5
--- /dev/null
+++ b/src/lib/string/scanf.h
@@ -0,0 +1,24 @@
+/* Copyright (c) 2003-2004, Roger Dingledine
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2019, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file scanf.h
+ * \brief Header for scanf.c
+ **/
+
+#ifndef TOR_UTIL_SCANF_H
+#define TOR_UTIL_SCANF_H
+
+#include "orconfig.h"
+#include "lib/cc/compat_compiler.h"
+
+#include <stdarg.h>
+
+int tor_vsscanf(const char *buf, const char *pattern, va_list ap) \
+ CHECK_SCANF(2, 0);
+int tor_sscanf(const char *buf, const char *pattern, ...)
+ CHECK_SCANF(2, 3);
+
+#endif /* !defined(TOR_UTIL_STRING_H) */
diff --git a/src/lib/string/util_string.c b/src/lib/string/util_string.c
new file mode 100644
index 0000000000..f934f66f02
--- /dev/null
+++ b/src/lib/string/util_string.c
@@ -0,0 +1,543 @@
+/* Copyright (c) 2003-2004, Roger Dingledine
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2019, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file util_string.c
+ * \brief Non-standard string functions used throughout Tor.
+ **/
+
+#include "lib/string/util_string.h"
+#include "lib/string/compat_ctype.h"
+#include "lib/err/torerr.h"
+#include "lib/ctime/di_ops.h"
+#include "lib/defs/digest_sizes.h"
+
+#include <string.h>
+#include <stdlib.h>
+
+/** Given <b>hlen</b> bytes at <b>haystack</b> and <b>nlen</b> bytes at
+ * <b>needle</b>, return a pointer to the first occurrence of the needle
+ * within the haystack, or NULL if there is no such occurrence.
+ *
+ * This function is <em>not</em> timing-safe.
+ *
+ * Requires that <b>nlen</b> be greater than zero.
+ */
+const void *
+tor_memmem(const void *_haystack, size_t hlen,
+ const void *_needle, size_t nlen)
+{
+#if defined(HAVE_MEMMEM) && (!defined(__GNUC__) || __GNUC__ >= 2)
+ raw_assert(nlen);
+ return memmem(_haystack, hlen, _needle, nlen);
+#else
+ /* This isn't as fast as the GLIBC implementation, but it doesn't need to
+ * be. */
+ const char *p, *last_possible_start;
+ const char *haystack = (const char*)_haystack;
+ const char *needle = (const char*)_needle;
+ char first;
+ raw_assert(nlen);
+
+ if (nlen > hlen)
+ return NULL;
+
+ p = haystack;
+ /* Last position at which the needle could start. */
+ last_possible_start = haystack + hlen - nlen;
+ first = *(const char*)needle;
+ while ((p = memchr(p, first, last_possible_start + 1 - p))) {
+ if (fast_memeq(p, needle, nlen))
+ return p;
+ if (++p > last_possible_start) {
+ /* This comparison shouldn't be necessary, since if p was previously
+ * equal to last_possible_start, the next memchr call would be
+ * "memchr(p, first, 0)", which will return NULL. But it clarifies the
+ * logic. */
+ return NULL;
+ }
+ }
+ return NULL;
+#endif /* defined(HAVE_MEMMEM) && (!defined(__GNUC__) || __GNUC__ >= 2) */
+}
+
+const void *
+tor_memstr(const void *haystack, size_t hlen, const char *needle)
+{
+ return tor_memmem(haystack, hlen, needle, strlen(needle));
+}
+
+/** Return true iff the 'len' bytes at 'mem' are all zero. */
+int
+tor_mem_is_zero(const char *mem, size_t len)
+{
+ static const char ZERO[] = {
+ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
+ };
+ while (len >= sizeof(ZERO)) {
+ /* It's safe to use fast_memcmp here, since the very worst thing an
+ * attacker could learn is how many initial bytes of a secret were zero */
+ if (fast_memcmp(mem, ZERO, sizeof(ZERO)))
+ return 0;
+ len -= sizeof(ZERO);
+ mem += sizeof(ZERO);
+ }
+ /* Deal with leftover bytes. */
+ if (len)
+ return fast_memeq(mem, ZERO, len);
+
+ return 1;
+}
+
+/** Return true iff the DIGEST_LEN bytes in digest are all zero. */
+int
+tor_digest_is_zero(const char *digest)
+{
+ static const uint8_t ZERO_DIGEST[] = {
+ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0
+ };
+ return tor_memeq(digest, ZERO_DIGEST, DIGEST_LEN);
+}
+
+/** Return true iff the DIGEST256_LEN bytes in digest are all zero. */
+int
+tor_digest256_is_zero(const char *digest)
+{
+ return tor_mem_is_zero(digest, DIGEST256_LEN);
+}
+
+/** Remove from the string <b>s</b> every character which appears in
+ * <b>strip</b>. */
+void
+tor_strstrip(char *s, const char *strip)
+{
+ char *readp = s;
+ while (*readp) {
+ if (strchr(strip, *readp)) {
+ ++readp;
+ } else {
+ *s++ = *readp++;
+ }
+ }
+ *s = '\0';
+}
+
+/** Convert all alphabetic characters in the nul-terminated string <b>s</b> to
+ * lowercase. */
+void
+tor_strlower(char *s)
+{
+ while (*s) {
+ *s = TOR_TOLOWER(*s);
+ ++s;
+ }
+}
+
+/** Convert all alphabetic characters in the nul-terminated string <b>s</b> to
+ * lowercase. */
+void
+tor_strupper(char *s)
+{
+ while (*s) {
+ *s = TOR_TOUPPER(*s);
+ ++s;
+ }
+}
+
+/** Return 1 if every character in <b>s</b> is printable, else return 0.
+ */
+int
+tor_strisprint(const char *s)
+{
+ while (*s) {
+ if (!TOR_ISPRINT(*s))
+ return 0;
+ s++;
+ }
+ return 1;
+}
+
+/** Return 1 if no character in <b>s</b> is uppercase, else return 0.
+ */
+int
+tor_strisnonupper(const char *s)
+{
+ while (*s) {
+ if (TOR_ISUPPER(*s))
+ return 0;
+ s++;
+ }
+ return 1;
+}
+
+/** Return true iff every character in <b>s</b> is whitespace space; else
+ * return false. */
+int
+tor_strisspace(const char *s)
+{
+ while (*s) {
+ if (!TOR_ISSPACE(*s))
+ return 0;
+ s++;
+ }
+ return 1;
+}
+
+/** As strcmp, except that either string may be NULL. The NULL string is
+ * considered to be before any non-NULL string. */
+int
+strcmp_opt(const char *s1, const char *s2)
+{
+ if (!s1) {
+ if (!s2)
+ return 0;
+ else
+ return -1;
+ } else if (!s2) {
+ return 1;
+ } else {
+ return strcmp(s1, s2);
+ }
+}
+
+/** Compares the first strlen(s2) characters of s1 with s2. Returns as for
+ * strcmp.
+ */
+int
+strcmpstart(const char *s1, const char *s2)
+{
+ size_t n = strlen(s2);
+ return strncmp(s1, s2, n);
+}
+
+/** Compare the s1_len-byte string <b>s1</b> with <b>s2</b>,
+ * without depending on a terminating nul in s1. Sorting order is first by
+ * length, then lexically; return values are as for strcmp.
+ */
+int
+strcmp_len(const char *s1, const char *s2, size_t s1_len)
+{
+ size_t s2_len = strlen(s2);
+ if (s1_len < s2_len)
+ return -1;
+ if (s1_len > s2_len)
+ return 1;
+ return fast_memcmp(s1, s2, s2_len);
+}
+
+/** Compares the first strlen(s2) characters of s1 with s2. Returns as for
+ * strcasecmp.
+ */
+int
+strcasecmpstart(const char *s1, const char *s2)
+{
+ size_t n = strlen(s2);
+ return strncasecmp(s1, s2, n);
+}
+
+/** Compare the value of the string <b>prefix</b> with the start of the
+ * <b>memlen</b>-byte memory chunk at <b>mem</b>. Return as for strcmp.
+ *
+ * [As fast_memcmp(mem, prefix, strlen(prefix)) but returns -1 if memlen is
+ * less than strlen(prefix).]
+ */
+int
+fast_memcmpstart(const void *mem, size_t memlen,
+ const char *prefix)
+{
+ size_t plen = strlen(prefix);
+ if (memlen < plen)
+ return -1;
+ return fast_memcmp(mem, prefix, plen);
+}
+
+/** Compares the last strlen(s2) characters of s1 with s2. Returns as for
+ * strcmp.
+ */
+int
+strcmpend(const char *s1, const char *s2)
+{
+ size_t n1 = strlen(s1), n2 = strlen(s2);
+ if (n2>n1)
+ return strcmp(s1,s2);
+ else
+ return strncmp(s1+(n1-n2), s2, n2);
+}
+
+/** Compares the last strlen(s2) characters of s1 with s2. Returns as for
+ * strcasecmp.
+ */
+int
+strcasecmpend(const char *s1, const char *s2)
+{
+ size_t n1 = strlen(s1), n2 = strlen(s2);
+ if (n2>n1) /* then they can't be the same; figure out which is bigger */
+ return strcasecmp(s1,s2);
+ else
+ return strncasecmp(s1+(n1-n2), s2, n2);
+}
+
+/** Return a pointer to the first char of s that is not whitespace and
+ * not a comment, or to the terminating NUL if no such character exists.
+ */
+const char *
+eat_whitespace(const char *s)
+{
+ raw_assert(s);
+
+ while (1) {
+ switch (*s) {
+ case '\0':
+ default:
+ return s;
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ ++s;
+ break;
+ case '#':
+ ++s;
+ while (*s && *s != '\n')
+ ++s;
+ }
+ }
+}
+
+/** Return a pointer to the first char of s that is not whitespace and
+ * not a comment, or to the terminating NUL if no such character exists.
+ */
+const char *
+eat_whitespace_eos(const char *s, const char *eos)
+{
+ raw_assert(s);
+ raw_assert(eos && s <= eos);
+
+ while (s < eos) {
+ switch (*s) {
+ case '\0':
+ default:
+ return s;
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ ++s;
+ break;
+ case '#':
+ ++s;
+ while (s < eos && *s && *s != '\n')
+ ++s;
+ }
+ }
+ return s;
+}
+
+/** Return a pointer to the first char of s that is not a space or a tab
+ * or a \\r, or to the terminating NUL if no such character exists. */
+const char *
+eat_whitespace_no_nl(const char *s)
+{
+ while (*s == ' ' || *s == '\t' || *s == '\r')
+ ++s;
+ return s;
+}
+
+/** As eat_whitespace_no_nl, but stop at <b>eos</b> whether we have
+ * found a non-whitespace character or not. */
+const char *
+eat_whitespace_eos_no_nl(const char *s, const char *eos)
+{
+ while (s < eos && (*s == ' ' || *s == '\t' || *s == '\r'))
+ ++s;
+ return s;
+}
+
+/** Return a pointer to the first char of s that is whitespace or <b>#</b>,
+ * or to the terminating NUL if no such character exists.
+ */
+const char *
+find_whitespace(const char *s)
+{
+ /* tor_assert(s); */
+ while (1) {
+ switch (*s)
+ {
+ case '\0':
+ case '#':
+ case ' ':
+ case '\r':
+ case '\n':
+ case '\t':
+ return s;
+ default:
+ ++s;
+ }
+ }
+}
+
+/** As find_whitespace, but stop at <b>eos</b> whether we have found a
+ * whitespace or not. */
+const char *
+find_whitespace_eos(const char *s, const char *eos)
+{
+ /* tor_assert(s); */
+ while (s < eos) {
+ switch (*s)
+ {
+ case '\0':
+ case '#':
+ case ' ':
+ case '\r':
+ case '\n':
+ case '\t':
+ return s;
+ default:
+ ++s;
+ }
+ }
+ return s;
+}
+
+/** Return the first occurrence of <b>needle</b> in <b>haystack</b> that
+ * occurs at the start of a line (that is, at the beginning of <b>haystack</b>
+ * or immediately after a newline). Return NULL if no such string is found.
+ */
+const char *
+find_str_at_start_of_line(const char *haystack, const char *needle)
+{
+ size_t needle_len = strlen(needle);
+
+ do {
+ if (!strncmp(haystack, needle, needle_len))
+ return haystack;
+
+ haystack = strchr(haystack, '\n');
+ if (!haystack)
+ return NULL;
+ else
+ ++haystack;
+ } while (*haystack);
+
+ return NULL;
+}
+
+/** Returns true if <b>string</b> could be a C identifier.
+ A C identifier must begin with a letter or an underscore and the
+ rest of its characters can be letters, numbers or underscores. No
+ length limit is imposed. */
+int
+string_is_C_identifier(const char *string)
+{
+ size_t iter;
+ size_t length = strlen(string);
+ if (!length)
+ return 0;
+
+ for (iter = 0; iter < length ; iter++) {
+ if (iter == 0) {
+ if (!(TOR_ISALPHA(string[iter]) ||
+ string[iter] == '_'))
+ return 0;
+ } else {
+ if (!(TOR_ISALPHA(string[iter]) ||
+ TOR_ISDIGIT(string[iter]) ||
+ string[iter] == '_'))
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+/** A byte with the top <b>x</b> bits set. */
+#define TOP_BITS(x) ((uint8_t)(0xFF << (8 - (x))))
+/** A byte with the lowest <b>x</b> bits set. */
+#define LOW_BITS(x) ((uint8_t)(0xFF >> (8 - (x))))
+
+/** Given the leading byte <b>b</b>, return the total number of bytes in the
+ * UTF-8 character. Returns 0 if it's an invalid leading byte.
+ */
+static uint8_t
+bytes_in_char(uint8_t b)
+{
+ if ((TOP_BITS(1) & b) == 0x00)
+ return 1; // a 1-byte UTF-8 char, aka ASCII
+ if ((TOP_BITS(3) & b) == TOP_BITS(2))
+ return 2; // a 2-byte UTF-8 char
+ if ((TOP_BITS(4) & b) == TOP_BITS(3))
+ return 3; // a 3-byte UTF-8 char
+ if ((TOP_BITS(5) & b) == TOP_BITS(4))
+ return 4; // a 4-byte UTF-8 char
+
+ // Invalid: either the top 2 bits are 10, or the top 5 bits are 11111.
+ return 0;
+}
+
+/** Returns true iff <b>b</b> is a UTF-8 continuation byte. */
+static bool
+is_continuation_byte(uint8_t b)
+{
+ uint8_t top2bits = b & TOP_BITS(2);
+ return top2bits == TOP_BITS(1);
+}
+
+/** Returns true iff the <b>len</b> bytes in <b>c</b> are a valid UTF-8
+ * character.
+ */
+static bool
+validate_char(const uint8_t *c, uint8_t len)
+{
+ if (len == 1)
+ return true; // already validated this is an ASCII char earlier.
+
+ uint8_t mask = LOW_BITS(7 - len); // bitmask for the leading byte.
+ uint32_t codepoint = c[0] & mask;
+
+ mask = LOW_BITS(6); // bitmask for continuation bytes.
+ for (uint8_t i = 1; i < len; i++) {
+ if (!is_continuation_byte(c[i]))
+ return false;
+ codepoint <<= 6;
+ codepoint |= (c[i] & mask);
+ }
+
+ if (len == 2 && codepoint <= 0x7f)
+ return false; // Invalid, overly long encoding, should have fit in 1 byte.
+
+ if (len == 3 && codepoint <= 0x7ff)
+ return false; // Invalid, overly long encoding, should have fit in 2 bytes.
+
+ if (len == 4 && codepoint <= 0xffff)
+ return false; // Invalid, overly long encoding, should have fit in 3 bytes.
+
+ if (codepoint >= 0xd800 && codepoint <= 0xdfff)
+ return false; // Invalid, reserved for UTF-16 surrogate pairs.
+
+ return codepoint <= 0x10ffff; // Check if within maximum.
+}
+
+/** Returns true iff the first <b>len</b> bytes in <b>str</b> are a
+ valid UTF-8 string. */
+int
+string_is_utf8(const char *str, size_t len)
+{
+ for (size_t i = 0; i < len;) {
+ uint8_t num_bytes = bytes_in_char(str[i]);
+ if (num_bytes == 0) // Invalid leading byte found.
+ return false;
+
+ size_t next_char = i + num_bytes;
+ if (next_char > len)
+ return false;
+
+ // Validate the continuation bytes in this multi-byte character,
+ // and advance to the next character in the string.
+ if (!validate_char((const uint8_t*)&str[i], num_bytes))
+ return false;
+ i = next_char;
+ }
+ return true;
+}
diff --git a/src/lib/string/util_string.h b/src/lib/string/util_string.h
new file mode 100644
index 0000000000..d9fbf8c61e
--- /dev/null
+++ b/src/lib/string/util_string.h
@@ -0,0 +1,57 @@
+/* Copyright (c) 2003-2004, Roger Dingledine
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2019, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file util_string.h
+ * \brief Header for util_string.c
+ **/
+
+#ifndef TOR_UTIL_STRING_H
+#define TOR_UTIL_STRING_H
+
+#include "orconfig.h"
+#include "lib/cc/compat_compiler.h"
+
+#include <stddef.h>
+
+const void *tor_memmem(const void *haystack, size_t hlen, const void *needle,
+ size_t nlen);
+const void *tor_memstr(const void *haystack, size_t hlen,
+ const char *needle);
+int tor_mem_is_zero(const char *mem, size_t len);
+int tor_digest_is_zero(const char *digest);
+int tor_digest256_is_zero(const char *digest);
+
+/** Allowable characters in a hexadecimal string. */
+#define HEX_CHARACTERS "0123456789ABCDEFabcdef"
+void tor_strlower(char *s);
+void tor_strupper(char *s);
+int tor_strisprint(const char *s);
+int tor_strisnonupper(const char *s);
+int tor_strisspace(const char *s);
+int strcmp_opt(const char *s1, const char *s2);
+int strcmpstart(const char *s1, const char *s2);
+int strcmp_len(const char *s1, const char *s2, size_t len);
+int strcasecmpstart(const char *s1, const char *s2);
+int strcmpend(const char *s1, const char *s2);
+int strcasecmpend(const char *s1, const char *s2);
+int fast_memcmpstart(const void *mem, size_t memlen, const char *prefix);
+
+void tor_strstrip(char *s, const char *strip);
+
+const char *eat_whitespace(const char *s);
+const char *eat_whitespace_eos(const char *s, const char *eos);
+const char *eat_whitespace_no_nl(const char *s);
+const char *eat_whitespace_eos_no_nl(const char *s, const char *eos);
+const char *find_whitespace(const char *s);
+const char *find_whitespace_eos(const char *s, const char *eos);
+const char *find_str_at_start_of_line(const char *haystack,
+ const char *needle);
+
+int string_is_C_identifier(const char *string);
+
+int string_is_utf8(const char *str, size_t len);
+
+#endif /* !defined(TOR_UTIL_STRING_H) */