aboutsummaryrefslogtreecommitdiff
path: root/src/lib/string
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/string')
-rw-r--r--src/lib/string/.may_include4
-rw-r--r--src/lib/string/compat_ctype.c3
-rw-r--r--src/lib/string/compat_ctype.h2
-rw-r--r--src/lib/string/compat_string.c6
-rw-r--r--src/lib/string/compat_string.h13
-rw-r--r--src/lib/string/include.am2
-rw-r--r--src/lib/string/lib_string.md13
-rw-r--r--src/lib/string/parse_int.c7
-rw-r--r--src/lib/string/parse_int.h4
-rw-r--r--src/lib/string/printf.c10
-rw-r--r--src/lib/string/printf.h4
-rw-r--r--src/lib/string/scanf.c2
-rw-r--r--src/lib/string/scanf.h4
-rw-r--r--src/lib/string/strings.md102
-rw-r--r--src/lib/string/util_string.c56
-rw-r--r--src/lib/string/util_string.h9
16 files changed, 190 insertions, 51 deletions
diff --git a/src/lib/string/.may_include b/src/lib/string/.may_include
index ec5c769831..1fb9127f19 100644
--- a/src/lib/string/.may_include
+++ b/src/lib/string/.may_include
@@ -6,5 +6,5 @@ lib/malloc/*.h
lib/ctime/*.h
lib/string/*.h
-strlcat.c
-strlcpy.c
+ext/strlcat.c
+ext/strlcpy.c
diff --git a/src/lib/string/compat_ctype.c b/src/lib/string/compat_ctype.c
index f5d82be3ae..a7668bfbfb 100644
--- a/src/lib/string/compat_ctype.c
+++ b/src/lib/string/compat_ctype.c
@@ -1,6 +1,6 @@
/* Copyright (c) 2003-2004, Roger Dingledine
* Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
- * Copyright (c) 2007-2019, The Tor Project, Inc. */
+ * Copyright (c) 2007-2020, The Tor Project, Inc. */
/* See LICENSE for licensing information */
/**
@@ -29,6 +29,7 @@ const uint32_t TOR_ISPRINT_TABLE[8] =
{ 0, 0xffffffff, 0xffffffff, 0x7fffffff, 0, 0, 0, 0x0 };
const uint32_t TOR_ISUPPER_TABLE[8] = { 0, 0, 0x7fffffe, 0, 0, 0, 0, 0 };
const uint32_t TOR_ISLOWER_TABLE[8] = { 0, 0, 0, 0x7fffffe, 0, 0, 0, 0 };
+/**@}*/
/** Upper-casing and lowercasing tables to map characters to upper/lowercase
* equivalents. Used by tor_toupper() and tor_tolower(). */
diff --git a/src/lib/string/compat_ctype.h b/src/lib/string/compat_ctype.h
index dbddd356c1..53ee6066f8 100644
--- a/src/lib/string/compat_ctype.h
+++ b/src/lib/string/compat_ctype.h
@@ -1,6 +1,6 @@
/* Copyright (c) 2003-2004, Roger Dingledine
* Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
- * Copyright (c) 2007-2019, The Tor Project, Inc. */
+ * Copyright (c) 2007-2020, The Tor Project, Inc. */
/* See LICENSE for licensing information */
/**
diff --git a/src/lib/string/compat_string.c b/src/lib/string/compat_string.c
index e125c921a4..2bd3c2f2b4 100644
--- a/src/lib/string/compat_string.c
+++ b/src/lib/string/compat_string.c
@@ -1,6 +1,6 @@
/* Copyright (c) 2003-2004, Roger Dingledine
* Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
- * Copyright (c) 2007-2019, The Tor Project, Inc. */
+ * Copyright (c) 2007-2020, The Tor Project, Inc. */
/* See LICENSE for licensing information */
/**
@@ -14,10 +14,10 @@
/* Inline the strl functions if the platform doesn't have them. */
#ifndef HAVE_STRLCPY
-#include "strlcpy.c"
+#include "ext/strlcpy.c"
#endif
#ifndef HAVE_STRLCAT
-#include "strlcat.c"
+#include "ext/strlcat.c"
#endif
#include <stdlib.h>
diff --git a/src/lib/string/compat_string.h b/src/lib/string/compat_string.h
index a0e37bb6dc..f05265bdcc 100644
--- a/src/lib/string/compat_string.h
+++ b/src/lib/string/compat_string.h
@@ -1,6 +1,6 @@
/* Copyright (c) 2003-2004, Roger Dingledine
* Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
- * Copyright (c) 2007-2019, The Tor Project, Inc. */
+ * Copyright (c) 2007-2020, The Tor Project, Inc. */
/* See LICENSE for licensing information */
/**
@@ -25,20 +25,23 @@ static inline int strncasecmp(const char *a, const char *b, size_t n);
static inline int strncasecmp(const char *a, const char *b, size_t n) {
return _strnicmp(a,b,n);
}
-#endif
+#endif /* !defined(HAVE_STRNCASECMP) */
#ifndef HAVE_STRCASECMP
static inline int strcasecmp(const char *a, const char *b);
static inline int strcasecmp(const char *a, const char *b) {
return _stricmp(a,b);
}
-#endif
-#endif
+#endif /* !defined(HAVE_STRCASECMP) */
+#endif /* defined(_WIN32) */
#if defined __APPLE__
/* On OSX 10.9 and later, the overlap-checking code for strlcat would
* appear to have a severe bug that can sometimes cause aborts in Tor.
* Instead, use the non-checking variants. This is sad.
*
+ * (If --enable-fragile-hardening is passed to configure, we use the hardened
+ * variants, which do not suffer from this issue.)
+ *
* See https://trac.torproject.org/projects/tor/ticket/15205
*/
#undef strlcat
@@ -59,4 +62,4 @@ char *tor_strtok_r_impl(char *str, const char *sep, char **lasts);
#define tor_strtok_r(str, sep, lasts) tor_strtok_r_impl(str, sep, lasts)
#endif
-#endif
+#endif /* !defined(TOR_COMPAT_STRING_H) */
diff --git a/src/lib/string/include.am b/src/lib/string/include.am
index edd74b8a3e..82d35cc5af 100644
--- a/src/lib/string/include.am
+++ b/src/lib/string/include.am
@@ -5,6 +5,7 @@ if UNITTESTS_ENABLED
noinst_LIBRARIES += src/lib/libtor-string-testing.a
endif
+# ADD_C_FILE: INSERT SOURCES HERE.
src_lib_libtor_string_a_SOURCES = \
src/lib/string/compat_ctype.c \
src/lib/string/compat_string.c \
@@ -18,6 +19,7 @@ src_lib_libtor_string_testing_a_SOURCES = \
src_lib_libtor_string_testing_a_CPPFLAGS = $(AM_CPPFLAGS) $(TEST_CPPFLAGS)
src_lib_libtor_string_testing_a_CFLAGS = $(AM_CFLAGS) $(TEST_CFLAGS)
+# ADD_C_FILE: INSERT HEADERS HERE.
noinst_HEADERS += \
src/lib/string/compat_ctype.h \
src/lib/string/compat_string.h \
diff --git a/src/lib/string/lib_string.md b/src/lib/string/lib_string.md
new file mode 100644
index 0000000000..98e3e652ed
--- /dev/null
+++ b/src/lib/string/lib_string.md
@@ -0,0 +1,13 @@
+@dir /lib/string
+@brief lib/string: Low-level string manipulation.
+
+We have a number of compatibility functions here: some are for handling
+functionality that is not implemented (or not implemented the same) on every
+platform; some are for providing locale-independent versions of libc
+functions that would otherwise be defined differently for different users.
+
+Other functions here are for common string-manipulation operations that we do
+in the rest of the codebase.
+
+Any string function high-level enough to need logging belongs in a
+higher-level module.
diff --git a/src/lib/string/parse_int.c b/src/lib/string/parse_int.c
index fbdd554a47..11ce0fa415 100644
--- a/src/lib/string/parse_int.c
+++ b/src/lib/string/parse_int.c
@@ -1,6 +1,6 @@
/* Copyright (c) 2003, Roger Dingledine
* Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
- * Copyright (c) 2007-2019, The Tor Project, Inc. */
+ * Copyright (c) 2007-2020, The Tor Project, Inc. */
/* See LICENSE for licensing information */
/**
@@ -9,6 +9,7 @@
**/
#include "lib/string/parse_int.h"
+#include "lib/cc/compat_compiler.h"
#include <errno.h>
#include <stdlib.h>
@@ -17,6 +18,7 @@
/* Helper: common code to check whether the result of a strtol or strtoul or
* strtoll is correct. */
#define CHECK_STRTOX_RESULT() \
+ STMT_BEGIN \
/* Did an overflow occur? */ \
if (errno == ERANGE) \
goto err; \
@@ -38,7 +40,8 @@
err: \
if (ok) *ok = 0; \
if (next) *next = endptr; \
- return 0
+ return 0; \
+ STMT_END
/** Extract a long from the start of <b>s</b>, in the given numeric
* <b>base</b>. If <b>base</b> is 0, <b>s</b> is parsed as a decimal,
diff --git a/src/lib/string/parse_int.h b/src/lib/string/parse_int.h
index 925547942e..27939ade61 100644
--- a/src/lib/string/parse_int.h
+++ b/src/lib/string/parse_int.h
@@ -1,6 +1,6 @@
/* Copyright (c) 2003, Roger Dingledine
* Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
- * Copyright (c) 2007-2019, The Tor Project, Inc. */
+ * Copyright (c) 2007-2020, The Tor Project, Inc. */
/* See LICENSE for licensing information */
/**
@@ -22,4 +22,4 @@ double tor_parse_double(const char *s, double min, double max, int *ok,
uint64_t tor_parse_uint64(const char *s, int base, uint64_t min,
uint64_t max, int *ok, char **next);
-#endif
+#endif /* !defined(TOR_PARSE_INT_H) */
diff --git a/src/lib/string/printf.c b/src/lib/string/printf.c
index a5cb71ce09..86d860935e 100644
--- a/src/lib/string/printf.c
+++ b/src/lib/string/printf.c
@@ -1,6 +1,6 @@
/* Copyright (c) 2003-2004, Roger Dingledine
* Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
- * Copyright (c) 2007-2019, The Tor Project, Inc. */
+ * Copyright (c) 2007-2020, The Tor Project, Inc. */
/* See LICENSE for licensing information */
/**
@@ -117,8 +117,8 @@ tor_vasprintf(char **strp, const char *fmt, va_list args)
*strp = NULL;
return -1;
}
- strp_tmp = tor_malloc(len + 1);
- r = _vsnprintf(strp_tmp, len+1, fmt, args);
+ strp_tmp = tor_malloc((size_t)len + 1);
+ r = _vsnprintf(strp_tmp, (size_t)len+1, fmt, args);
if (r != len) {
tor_free(strp_tmp);
*strp = NULL;
@@ -153,9 +153,9 @@ tor_vasprintf(char **strp, const char *fmt, va_list args)
*strp = tor_strdup(buf);
return len;
}
- strp_tmp = tor_malloc(len+1);
+ strp_tmp = tor_malloc((size_t)len+1);
/* use of tor_vsnprintf() will ensure string is null terminated */
- r = tor_vsnprintf(strp_tmp, len+1, fmt, args);
+ r = tor_vsnprintf(strp_tmp, (size_t)len+1, fmt, args);
if (r != len) {
tor_free(strp_tmp);
*strp = NULL;
diff --git a/src/lib/string/printf.h b/src/lib/string/printf.h
index 2cc13d6bee..5ab751b338 100644
--- a/src/lib/string/printf.h
+++ b/src/lib/string/printf.h
@@ -1,6 +1,6 @@
/* Copyright (c) 2003-2004, Roger Dingledine
* Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
- * Copyright (c) 2007-2019, The Tor Project, Inc. */
+ * Copyright (c) 2007-2020, The Tor Project, Inc. */
/* See LICENSE for licensing information */
/**
@@ -27,4 +27,4 @@ int tor_asprintf(char **strp, const char *fmt, ...)
int tor_vasprintf(char **strp, const char *fmt, va_list args)
CHECK_PRINTF(2,0);
-#endif /* !defined(TOR_UTIL_STRING_H) */
+#endif /* !defined(TOR_UTIL_PRINTF_H) */
diff --git a/src/lib/string/scanf.c b/src/lib/string/scanf.c
index 1bc39b5182..89d1683204 100644
--- a/src/lib/string/scanf.c
+++ b/src/lib/string/scanf.c
@@ -1,6 +1,6 @@
/* Copyright (c) 2003-2004, Roger Dingledine
* Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
- * Copyright (c) 2007-2019, The Tor Project, Inc. */
+ * Copyright (c) 2007-2020, The Tor Project, Inc. */
/* See LICENSE for licensing information */
/**
diff --git a/src/lib/string/scanf.h b/src/lib/string/scanf.h
index 6673173de5..67e9c5eb78 100644
--- a/src/lib/string/scanf.h
+++ b/src/lib/string/scanf.h
@@ -1,6 +1,6 @@
/* Copyright (c) 2003-2004, Roger Dingledine
* Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
- * Copyright (c) 2007-2019, The Tor Project, Inc. */
+ * Copyright (c) 2007-2020, The Tor Project, Inc. */
/* See LICENSE for licensing information */
/**
@@ -21,4 +21,4 @@ int tor_vsscanf(const char *buf, const char *pattern, va_list ap) \
int tor_sscanf(const char *buf, const char *pattern, ...)
CHECK_SCANF(2, 3);
-#endif /* !defined(TOR_UTIL_STRING_H) */
+#endif /* !defined(TOR_UTIL_SCANF_H) */
diff --git a/src/lib/string/strings.md b/src/lib/string/strings.md
new file mode 100644
index 0000000000..b22574a05a
--- /dev/null
+++ b/src/lib/string/strings.md
@@ -0,0 +1,102 @@
+
+@page strings String processing in Tor
+
+Since you're reading about a C program, you probably expected this
+section: it's full of functions for manipulating the (notoriously
+dubious) C string abstraction. I'll describe some often-missed
+highlights here.
+
+### Comparing strings and memory chunks ###
+
+We provide strcmpstart() and strcmpend() to perform a strcmp with the start
+or end of a string.
+
+ tor_assert(!strcmpstart("Hello world","Hello"));
+ tor_assert(!strcmpend("Hello world","world"));
+
+ tor_assert(!strcasecmpstart("HELLO WORLD","Hello"));
+ tor_assert(!strcasecmpend("HELLO WORLD","world"));
+
+To compare two string pointers, either of which might be NULL, use
+strcmp_opt().
+
+To search for a string or a chunk of memory within a non-null
+terminated memory block, use tor_memstr or tor_memmem respectively.
+
+We avoid using memcmp() directly, since it tends to be used in cases
+when having a constant-time operation would be better. Instead, we
+recommend tor_memeq() and tor_memneq() for when you need a
+constant-time operation. In cases when you need a fast comparison,
+and timing leaks are not a danger, you can use fast_memeq() and
+fast_memneq().
+
+It's a common pattern to take a string representing one or more lines
+of text, and search within it for some other string, at the start of a
+line. You could search for "\\ntarget", but that would miss the first
+line. Instead, use find_str_at_start_of_line.
+
+### Parsing text ###
+
+Over the years, we have accumulated lots of ways to parse text --
+probably too many. Refactoring them to be safer and saner could be a
+good project! The one that seems most error-resistant is tokenizing
+text with smartlist_split_strings(). This function takes a smartlist,
+a string, and a separator, and splits the string along occurrences of
+the separator, adding new strings for the sub-elements to the given
+smartlist.
+
+To handle time, you can use one of the functions mentioned above in
+"Parsing and encoding time values".
+
+For numbers in general, use the tor_parse_{long,ulong,double,uint64}
+family of functions. Each of these can be called in a few ways. The
+most general is as follows:
+
+ const int BASE = 10;
+ const int MINVAL = 10, MAXVAL = 10000;
+ const char *next;
+ int ok;
+ long lng = tor_parse_long("100", BASE, MINVAL, MAXVAL, &ok, &next);
+
+The return value should be ignored if "ok" is set to false. The input
+string needs to contain an entire number, or it's considered
+invalid... unless the "next" pointer is available, in which case extra
+characters at the end are allowed, and "next" is set to point to the
+first such character.
+
+### Generating blocks of text ###
+
+For not-too-large blocks of text, we provide tor_asprintf(), which
+behaves like other members of the sprintf() family, except that it
+always allocates enough memory on the heap for its output.
+
+For larger blocks: Rather than using strlcat and strlcpy to build
+text, or keeping pointers to the interior of a memory block, we
+recommend that you use the smartlist_* functions to build a smartlist
+full of substrings in order. Then you can concatenate them into a
+single string with smartlist_join_strings(), which also takes optional
+separator and terminator arguments.
+
+Alternatively, you might find it more convenient (and more
+allocation-efficient) to use the buffer API in buffers.c: Construct a buf_t
+object, add your data to it with buf_add_string(), buf_add_printf(), and so
+on, then call buf_extract() to get the resulting output.
+
+As a convenience, we provide smartlist_add_asprintf(), which combines
+the two methods above together. Many of the cryptographic digest
+functions also accept a not-yet-concatenated smartlist of strings.
+
+### Logging helpers ###
+
+Often we'd like to log a value that comes from an untrusted source.
+To do this, use escaped() to escape the nonprintable characters and
+other confusing elements in a string, and surround it by quotes. (Use
+esc_for_log() if you need to allocate a new string.)
+
+It's also handy to put memory chunks into hexadecimal before logging;
+you can use hex_str(memory, length) for that.
+
+The escaped() and hex_str() functions both provide outputs that are
+only valid till they are next invoked; they are not threadsafe.
+
+*/
diff --git a/src/lib/string/util_string.c b/src/lib/string/util_string.c
index f934f66f02..c8f12d780e 100644
--- a/src/lib/string/util_string.c
+++ b/src/lib/string/util_string.c
@@ -1,6 +1,6 @@
/* Copyright (c) 2003-2004, Roger Dingledine
* Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
- * Copyright (c) 2007-2019, The Tor Project, Inc. */
+ * Copyright (c) 2007-2020, The Tor Project, Inc. */
/* See LICENSE for licensing information */
/**
@@ -71,7 +71,7 @@ tor_memstr(const void *haystack, size_t hlen, const char *needle)
/** Return true iff the 'len' bytes at 'mem' are all zero. */
int
-tor_mem_is_zero(const char *mem, size_t len)
+fast_mem_is_zero(const char *mem, size_t len)
{
static const char ZERO[] = {
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
@@ -95,17 +95,14 @@ tor_mem_is_zero(const char *mem, size_t len)
int
tor_digest_is_zero(const char *digest)
{
- static const uint8_t ZERO_DIGEST[] = {
- 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0
- };
- return tor_memeq(digest, ZERO_DIGEST, DIGEST_LEN);
+ return safe_mem_is_zero(digest, DIGEST_LEN);
}
/** Return true iff the DIGEST256_LEN bytes in digest are all zero. */
int
tor_digest256_is_zero(const char *digest)
{
- return tor_mem_is_zero(digest, DIGEST256_LEN);
+ return safe_mem_is_zero(digest, DIGEST256_LEN);
}
/** Remove from the string <b>s</b> every character which appears in
@@ -212,21 +209,6 @@ strcmpstart(const char *s1, const char *s2)
return strncmp(s1, s2, n);
}
-/** Compare the s1_len-byte string <b>s1</b> with <b>s2</b>,
- * without depending on a terminating nul in s1. Sorting order is first by
- * length, then lexically; return values are as for strcmp.
- */
-int
-strcmp_len(const char *s1, const char *s2, size_t s1_len)
-{
- size_t s2_len = strlen(s2);
- if (s1_len < s2_len)
- return -1;
- if (s1_len > s2_len)
- return 1;
- return fast_memcmp(s1, s2, s2_len);
-}
-
/** Compares the first strlen(s2) characters of s1 with s2. Returns as for
* strcasecmp.
*/
@@ -524,6 +506,23 @@ validate_char(const uint8_t *c, uint8_t len)
int
string_is_utf8(const char *str, size_t len)
{
+ // If str is NULL, don't try to read it
+ if (!str) {
+ // We could test for this case, but the low-level logs would produce
+ // confusing test output.
+ // LCOV_EXCL_START
+ if (len) {
+ // Use the low-level logging function, so that the log module can
+ // validate UTF-8 (if needed in future code)
+ tor_log_err_sigsafe(
+ "BUG: string_is_utf8() called with NULL str but non-zero len.");
+ // Since it's a bug, we should probably reject this string
+ return false;
+ }
+ // LCOV_EXCL_STOP
+ return true;
+ }
+
for (size_t i = 0; i < len;) {
uint8_t num_bytes = bytes_in_char(str[i]);
if (num_bytes == 0) // Invalid leading byte found.
@@ -541,3 +540,16 @@ string_is_utf8(const char *str, size_t len)
}
return true;
}
+
+/** As string_is_utf8(), but returns false if the string begins with a UTF-8
+ * byte order mark (BOM).
+ */
+int
+string_is_utf8_no_bom(const char *str, size_t len)
+{
+ if (str && len >= 3 && (!strcmpstart(str, "\uFEFF") ||
+ !strcmpstart(str, "\uFFFE"))) {
+ return false;
+ }
+ return string_is_utf8(str, len);
+}
diff --git a/src/lib/string/util_string.h b/src/lib/string/util_string.h
index d9fbf8c61e..e89233df88 100644
--- a/src/lib/string/util_string.h
+++ b/src/lib/string/util_string.h
@@ -1,6 +1,6 @@
/* Copyright (c) 2003-2004, Roger Dingledine
* Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
- * Copyright (c) 2007-2019, The Tor Project, Inc. */
+ * Copyright (c) 2007-2020, The Tor Project, Inc. */
/* See LICENSE for licensing information */
/**
@@ -20,7 +20,10 @@ const void *tor_memmem(const void *haystack, size_t hlen, const void *needle,
size_t nlen);
const void *tor_memstr(const void *haystack, size_t hlen,
const char *needle);
-int tor_mem_is_zero(const char *mem, size_t len);
+int fast_mem_is_zero(const char *mem, size_t len);
+#define fast_digest_is_zero(d) fast_mem_is_zero((d), DIGEST_LEN)
+#define fast_digetst256_is_zero(d) fast_mem_is_zero((d), DIGEST256_LEN)
+
int tor_digest_is_zero(const char *digest);
int tor_digest256_is_zero(const char *digest);
@@ -33,7 +36,6 @@ int tor_strisnonupper(const char *s);
int tor_strisspace(const char *s);
int strcmp_opt(const char *s1, const char *s2);
int strcmpstart(const char *s1, const char *s2);
-int strcmp_len(const char *s1, const char *s2, size_t len);
int strcasecmpstart(const char *s1, const char *s2);
int strcmpend(const char *s1, const char *s2);
int strcasecmpend(const char *s1, const char *s2);
@@ -53,5 +55,6 @@ const char *find_str_at_start_of_line(const char *haystack,
int string_is_C_identifier(const char *string);
int string_is_utf8(const char *str, size_t len);
+int string_is_utf8_no_bom(const char *str, size_t len);
#endif /* !defined(TOR_UTIL_STRING_H) */