diff options
Diffstat (limited to 'src/lib/string')
-rw-r--r-- | src/lib/string/.may_include | 4 | ||||
-rw-r--r-- | src/lib/string/compat_ctype.c | 3 | ||||
-rw-r--r-- | src/lib/string/compat_ctype.h | 2 | ||||
-rw-r--r-- | src/lib/string/compat_string.c | 6 | ||||
-rw-r--r-- | src/lib/string/compat_string.h | 13 | ||||
-rw-r--r-- | src/lib/string/include.am | 2 | ||||
-rw-r--r-- | src/lib/string/lib_string.md | 13 | ||||
-rw-r--r-- | src/lib/string/parse_int.c | 7 | ||||
-rw-r--r-- | src/lib/string/parse_int.h | 4 | ||||
-rw-r--r-- | src/lib/string/printf.c | 10 | ||||
-rw-r--r-- | src/lib/string/printf.h | 4 | ||||
-rw-r--r-- | src/lib/string/scanf.c | 2 | ||||
-rw-r--r-- | src/lib/string/scanf.h | 4 | ||||
-rw-r--r-- | src/lib/string/strings.md | 102 | ||||
-rw-r--r-- | src/lib/string/util_string.c | 56 | ||||
-rw-r--r-- | src/lib/string/util_string.h | 9 |
16 files changed, 190 insertions, 51 deletions
diff --git a/src/lib/string/.may_include b/src/lib/string/.may_include index ec5c769831..1fb9127f19 100644 --- a/src/lib/string/.may_include +++ b/src/lib/string/.may_include @@ -6,5 +6,5 @@ lib/malloc/*.h lib/ctime/*.h lib/string/*.h -strlcat.c -strlcpy.c +ext/strlcat.c +ext/strlcpy.c diff --git a/src/lib/string/compat_ctype.c b/src/lib/string/compat_ctype.c index f5d82be3ae..a7668bfbfb 100644 --- a/src/lib/string/compat_ctype.c +++ b/src/lib/string/compat_ctype.c @@ -1,6 +1,6 @@ /* Copyright (c) 2003-2004, Roger Dingledine * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. - * Copyright (c) 2007-2019, The Tor Project, Inc. */ + * Copyright (c) 2007-2020, The Tor Project, Inc. */ /* See LICENSE for licensing information */ /** @@ -29,6 +29,7 @@ const uint32_t TOR_ISPRINT_TABLE[8] = { 0, 0xffffffff, 0xffffffff, 0x7fffffff, 0, 0, 0, 0x0 }; const uint32_t TOR_ISUPPER_TABLE[8] = { 0, 0, 0x7fffffe, 0, 0, 0, 0, 0 }; const uint32_t TOR_ISLOWER_TABLE[8] = { 0, 0, 0, 0x7fffffe, 0, 0, 0, 0 }; +/**@}*/ /** Upper-casing and lowercasing tables to map characters to upper/lowercase * equivalents. Used by tor_toupper() and tor_tolower(). */ diff --git a/src/lib/string/compat_ctype.h b/src/lib/string/compat_ctype.h index dbddd356c1..53ee6066f8 100644 --- a/src/lib/string/compat_ctype.h +++ b/src/lib/string/compat_ctype.h @@ -1,6 +1,6 @@ /* Copyright (c) 2003-2004, Roger Dingledine * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. - * Copyright (c) 2007-2019, The Tor Project, Inc. */ + * Copyright (c) 2007-2020, The Tor Project, Inc. */ /* See LICENSE for licensing information */ /** diff --git a/src/lib/string/compat_string.c b/src/lib/string/compat_string.c index e125c921a4..2bd3c2f2b4 100644 --- a/src/lib/string/compat_string.c +++ b/src/lib/string/compat_string.c @@ -1,6 +1,6 @@ /* Copyright (c) 2003-2004, Roger Dingledine * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. - * Copyright (c) 2007-2019, The Tor Project, Inc. */ + * Copyright (c) 2007-2020, The Tor Project, Inc. */ /* See LICENSE for licensing information */ /** @@ -14,10 +14,10 @@ /* Inline the strl functions if the platform doesn't have them. */ #ifndef HAVE_STRLCPY -#include "strlcpy.c" +#include "ext/strlcpy.c" #endif #ifndef HAVE_STRLCAT -#include "strlcat.c" +#include "ext/strlcat.c" #endif #include <stdlib.h> diff --git a/src/lib/string/compat_string.h b/src/lib/string/compat_string.h index a0e37bb6dc..f05265bdcc 100644 --- a/src/lib/string/compat_string.h +++ b/src/lib/string/compat_string.h @@ -1,6 +1,6 @@ /* Copyright (c) 2003-2004, Roger Dingledine * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. - * Copyright (c) 2007-2019, The Tor Project, Inc. */ + * Copyright (c) 2007-2020, The Tor Project, Inc. */ /* See LICENSE for licensing information */ /** @@ -25,20 +25,23 @@ static inline int strncasecmp(const char *a, const char *b, size_t n); static inline int strncasecmp(const char *a, const char *b, size_t n) { return _strnicmp(a,b,n); } -#endif +#endif /* !defined(HAVE_STRNCASECMP) */ #ifndef HAVE_STRCASECMP static inline int strcasecmp(const char *a, const char *b); static inline int strcasecmp(const char *a, const char *b) { return _stricmp(a,b); } -#endif -#endif +#endif /* !defined(HAVE_STRCASECMP) */ +#endif /* defined(_WIN32) */ #if defined __APPLE__ /* On OSX 10.9 and later, the overlap-checking code for strlcat would * appear to have a severe bug that can sometimes cause aborts in Tor. * Instead, use the non-checking variants. This is sad. * + * (If --enable-fragile-hardening is passed to configure, we use the hardened + * variants, which do not suffer from this issue.) + * * See https://trac.torproject.org/projects/tor/ticket/15205 */ #undef strlcat @@ -59,4 +62,4 @@ char *tor_strtok_r_impl(char *str, const char *sep, char **lasts); #define tor_strtok_r(str, sep, lasts) tor_strtok_r_impl(str, sep, lasts) #endif -#endif +#endif /* !defined(TOR_COMPAT_STRING_H) */ diff --git a/src/lib/string/include.am b/src/lib/string/include.am index edd74b8a3e..82d35cc5af 100644 --- a/src/lib/string/include.am +++ b/src/lib/string/include.am @@ -5,6 +5,7 @@ if UNITTESTS_ENABLED noinst_LIBRARIES += src/lib/libtor-string-testing.a endif +# ADD_C_FILE: INSERT SOURCES HERE. src_lib_libtor_string_a_SOURCES = \ src/lib/string/compat_ctype.c \ src/lib/string/compat_string.c \ @@ -18,6 +19,7 @@ src_lib_libtor_string_testing_a_SOURCES = \ src_lib_libtor_string_testing_a_CPPFLAGS = $(AM_CPPFLAGS) $(TEST_CPPFLAGS) src_lib_libtor_string_testing_a_CFLAGS = $(AM_CFLAGS) $(TEST_CFLAGS) +# ADD_C_FILE: INSERT HEADERS HERE. noinst_HEADERS += \ src/lib/string/compat_ctype.h \ src/lib/string/compat_string.h \ diff --git a/src/lib/string/lib_string.md b/src/lib/string/lib_string.md new file mode 100644 index 0000000000..98e3e652ed --- /dev/null +++ b/src/lib/string/lib_string.md @@ -0,0 +1,13 @@ +@dir /lib/string +@brief lib/string: Low-level string manipulation. + +We have a number of compatibility functions here: some are for handling +functionality that is not implemented (or not implemented the same) on every +platform; some are for providing locale-independent versions of libc +functions that would otherwise be defined differently for different users. + +Other functions here are for common string-manipulation operations that we do +in the rest of the codebase. + +Any string function high-level enough to need logging belongs in a +higher-level module. diff --git a/src/lib/string/parse_int.c b/src/lib/string/parse_int.c index fbdd554a47..11ce0fa415 100644 --- a/src/lib/string/parse_int.c +++ b/src/lib/string/parse_int.c @@ -1,6 +1,6 @@ /* Copyright (c) 2003, Roger Dingledine * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. - * Copyright (c) 2007-2019, The Tor Project, Inc. */ + * Copyright (c) 2007-2020, The Tor Project, Inc. */ /* See LICENSE for licensing information */ /** @@ -9,6 +9,7 @@ **/ #include "lib/string/parse_int.h" +#include "lib/cc/compat_compiler.h" #include <errno.h> #include <stdlib.h> @@ -17,6 +18,7 @@ /* Helper: common code to check whether the result of a strtol or strtoul or * strtoll is correct. */ #define CHECK_STRTOX_RESULT() \ + STMT_BEGIN \ /* Did an overflow occur? */ \ if (errno == ERANGE) \ goto err; \ @@ -38,7 +40,8 @@ err: \ if (ok) *ok = 0; \ if (next) *next = endptr; \ - return 0 + return 0; \ + STMT_END /** Extract a long from the start of <b>s</b>, in the given numeric * <b>base</b>. If <b>base</b> is 0, <b>s</b> is parsed as a decimal, diff --git a/src/lib/string/parse_int.h b/src/lib/string/parse_int.h index 925547942e..27939ade61 100644 --- a/src/lib/string/parse_int.h +++ b/src/lib/string/parse_int.h @@ -1,6 +1,6 @@ /* Copyright (c) 2003, Roger Dingledine * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. - * Copyright (c) 2007-2019, The Tor Project, Inc. */ + * Copyright (c) 2007-2020, The Tor Project, Inc. */ /* See LICENSE for licensing information */ /** @@ -22,4 +22,4 @@ double tor_parse_double(const char *s, double min, double max, int *ok, uint64_t tor_parse_uint64(const char *s, int base, uint64_t min, uint64_t max, int *ok, char **next); -#endif +#endif /* !defined(TOR_PARSE_INT_H) */ diff --git a/src/lib/string/printf.c b/src/lib/string/printf.c index a5cb71ce09..86d860935e 100644 --- a/src/lib/string/printf.c +++ b/src/lib/string/printf.c @@ -1,6 +1,6 @@ /* Copyright (c) 2003-2004, Roger Dingledine * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. - * Copyright (c) 2007-2019, The Tor Project, Inc. */ + * Copyright (c) 2007-2020, The Tor Project, Inc. */ /* See LICENSE for licensing information */ /** @@ -117,8 +117,8 @@ tor_vasprintf(char **strp, const char *fmt, va_list args) *strp = NULL; return -1; } - strp_tmp = tor_malloc(len + 1); - r = _vsnprintf(strp_tmp, len+1, fmt, args); + strp_tmp = tor_malloc((size_t)len + 1); + r = _vsnprintf(strp_tmp, (size_t)len+1, fmt, args); if (r != len) { tor_free(strp_tmp); *strp = NULL; @@ -153,9 +153,9 @@ tor_vasprintf(char **strp, const char *fmt, va_list args) *strp = tor_strdup(buf); return len; } - strp_tmp = tor_malloc(len+1); + strp_tmp = tor_malloc((size_t)len+1); /* use of tor_vsnprintf() will ensure string is null terminated */ - r = tor_vsnprintf(strp_tmp, len+1, fmt, args); + r = tor_vsnprintf(strp_tmp, (size_t)len+1, fmt, args); if (r != len) { tor_free(strp_tmp); *strp = NULL; diff --git a/src/lib/string/printf.h b/src/lib/string/printf.h index 2cc13d6bee..5ab751b338 100644 --- a/src/lib/string/printf.h +++ b/src/lib/string/printf.h @@ -1,6 +1,6 @@ /* Copyright (c) 2003-2004, Roger Dingledine * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. - * Copyright (c) 2007-2019, The Tor Project, Inc. */ + * Copyright (c) 2007-2020, The Tor Project, Inc. */ /* See LICENSE for licensing information */ /** @@ -27,4 +27,4 @@ int tor_asprintf(char **strp, const char *fmt, ...) int tor_vasprintf(char **strp, const char *fmt, va_list args) CHECK_PRINTF(2,0); -#endif /* !defined(TOR_UTIL_STRING_H) */ +#endif /* !defined(TOR_UTIL_PRINTF_H) */ diff --git a/src/lib/string/scanf.c b/src/lib/string/scanf.c index 1bc39b5182..89d1683204 100644 --- a/src/lib/string/scanf.c +++ b/src/lib/string/scanf.c @@ -1,6 +1,6 @@ /* Copyright (c) 2003-2004, Roger Dingledine * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. - * Copyright (c) 2007-2019, The Tor Project, Inc. */ + * Copyright (c) 2007-2020, The Tor Project, Inc. */ /* See LICENSE for licensing information */ /** diff --git a/src/lib/string/scanf.h b/src/lib/string/scanf.h index 6673173de5..67e9c5eb78 100644 --- a/src/lib/string/scanf.h +++ b/src/lib/string/scanf.h @@ -1,6 +1,6 @@ /* Copyright (c) 2003-2004, Roger Dingledine * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. - * Copyright (c) 2007-2019, The Tor Project, Inc. */ + * Copyright (c) 2007-2020, The Tor Project, Inc. */ /* See LICENSE for licensing information */ /** @@ -21,4 +21,4 @@ int tor_vsscanf(const char *buf, const char *pattern, va_list ap) \ int tor_sscanf(const char *buf, const char *pattern, ...) CHECK_SCANF(2, 3); -#endif /* !defined(TOR_UTIL_STRING_H) */ +#endif /* !defined(TOR_UTIL_SCANF_H) */ diff --git a/src/lib/string/strings.md b/src/lib/string/strings.md new file mode 100644 index 0000000000..b22574a05a --- /dev/null +++ b/src/lib/string/strings.md @@ -0,0 +1,102 @@ + +@page strings String processing in Tor + +Since you're reading about a C program, you probably expected this +section: it's full of functions for manipulating the (notoriously +dubious) C string abstraction. I'll describe some often-missed +highlights here. + +### Comparing strings and memory chunks ### + +We provide strcmpstart() and strcmpend() to perform a strcmp with the start +or end of a string. + + tor_assert(!strcmpstart("Hello world","Hello")); + tor_assert(!strcmpend("Hello world","world")); + + tor_assert(!strcasecmpstart("HELLO WORLD","Hello")); + tor_assert(!strcasecmpend("HELLO WORLD","world")); + +To compare two string pointers, either of which might be NULL, use +strcmp_opt(). + +To search for a string or a chunk of memory within a non-null +terminated memory block, use tor_memstr or tor_memmem respectively. + +We avoid using memcmp() directly, since it tends to be used in cases +when having a constant-time operation would be better. Instead, we +recommend tor_memeq() and tor_memneq() for when you need a +constant-time operation. In cases when you need a fast comparison, +and timing leaks are not a danger, you can use fast_memeq() and +fast_memneq(). + +It's a common pattern to take a string representing one or more lines +of text, and search within it for some other string, at the start of a +line. You could search for "\\ntarget", but that would miss the first +line. Instead, use find_str_at_start_of_line. + +### Parsing text ### + +Over the years, we have accumulated lots of ways to parse text -- +probably too many. Refactoring them to be safer and saner could be a +good project! The one that seems most error-resistant is tokenizing +text with smartlist_split_strings(). This function takes a smartlist, +a string, and a separator, and splits the string along occurrences of +the separator, adding new strings for the sub-elements to the given +smartlist. + +To handle time, you can use one of the functions mentioned above in +"Parsing and encoding time values". + +For numbers in general, use the tor_parse_{long,ulong,double,uint64} +family of functions. Each of these can be called in a few ways. The +most general is as follows: + + const int BASE = 10; + const int MINVAL = 10, MAXVAL = 10000; + const char *next; + int ok; + long lng = tor_parse_long("100", BASE, MINVAL, MAXVAL, &ok, &next); + +The return value should be ignored if "ok" is set to false. The input +string needs to contain an entire number, or it's considered +invalid... unless the "next" pointer is available, in which case extra +characters at the end are allowed, and "next" is set to point to the +first such character. + +### Generating blocks of text ### + +For not-too-large blocks of text, we provide tor_asprintf(), which +behaves like other members of the sprintf() family, except that it +always allocates enough memory on the heap for its output. + +For larger blocks: Rather than using strlcat and strlcpy to build +text, or keeping pointers to the interior of a memory block, we +recommend that you use the smartlist_* functions to build a smartlist +full of substrings in order. Then you can concatenate them into a +single string with smartlist_join_strings(), which also takes optional +separator and terminator arguments. + +Alternatively, you might find it more convenient (and more +allocation-efficient) to use the buffer API in buffers.c: Construct a buf_t +object, add your data to it with buf_add_string(), buf_add_printf(), and so +on, then call buf_extract() to get the resulting output. + +As a convenience, we provide smartlist_add_asprintf(), which combines +the two methods above together. Many of the cryptographic digest +functions also accept a not-yet-concatenated smartlist of strings. + +### Logging helpers ### + +Often we'd like to log a value that comes from an untrusted source. +To do this, use escaped() to escape the nonprintable characters and +other confusing elements in a string, and surround it by quotes. (Use +esc_for_log() if you need to allocate a new string.) + +It's also handy to put memory chunks into hexadecimal before logging; +you can use hex_str(memory, length) for that. + +The escaped() and hex_str() functions both provide outputs that are +only valid till they are next invoked; they are not threadsafe. + +*/ diff --git a/src/lib/string/util_string.c b/src/lib/string/util_string.c index f934f66f02..c8f12d780e 100644 --- a/src/lib/string/util_string.c +++ b/src/lib/string/util_string.c @@ -1,6 +1,6 @@ /* Copyright (c) 2003-2004, Roger Dingledine * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. - * Copyright (c) 2007-2019, The Tor Project, Inc. */ + * Copyright (c) 2007-2020, The Tor Project, Inc. */ /* See LICENSE for licensing information */ /** @@ -71,7 +71,7 @@ tor_memstr(const void *haystack, size_t hlen, const char *needle) /** Return true iff the 'len' bytes at 'mem' are all zero. */ int -tor_mem_is_zero(const char *mem, size_t len) +fast_mem_is_zero(const char *mem, size_t len) { static const char ZERO[] = { 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, @@ -95,17 +95,14 @@ tor_mem_is_zero(const char *mem, size_t len) int tor_digest_is_zero(const char *digest) { - static const uint8_t ZERO_DIGEST[] = { - 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 - }; - return tor_memeq(digest, ZERO_DIGEST, DIGEST_LEN); + return safe_mem_is_zero(digest, DIGEST_LEN); } /** Return true iff the DIGEST256_LEN bytes in digest are all zero. */ int tor_digest256_is_zero(const char *digest) { - return tor_mem_is_zero(digest, DIGEST256_LEN); + return safe_mem_is_zero(digest, DIGEST256_LEN); } /** Remove from the string <b>s</b> every character which appears in @@ -212,21 +209,6 @@ strcmpstart(const char *s1, const char *s2) return strncmp(s1, s2, n); } -/** Compare the s1_len-byte string <b>s1</b> with <b>s2</b>, - * without depending on a terminating nul in s1. Sorting order is first by - * length, then lexically; return values are as for strcmp. - */ -int -strcmp_len(const char *s1, const char *s2, size_t s1_len) -{ - size_t s2_len = strlen(s2); - if (s1_len < s2_len) - return -1; - if (s1_len > s2_len) - return 1; - return fast_memcmp(s1, s2, s2_len); -} - /** Compares the first strlen(s2) characters of s1 with s2. Returns as for * strcasecmp. */ @@ -524,6 +506,23 @@ validate_char(const uint8_t *c, uint8_t len) int string_is_utf8(const char *str, size_t len) { + // If str is NULL, don't try to read it + if (!str) { + // We could test for this case, but the low-level logs would produce + // confusing test output. + // LCOV_EXCL_START + if (len) { + // Use the low-level logging function, so that the log module can + // validate UTF-8 (if needed in future code) + tor_log_err_sigsafe( + "BUG: string_is_utf8() called with NULL str but non-zero len."); + // Since it's a bug, we should probably reject this string + return false; + } + // LCOV_EXCL_STOP + return true; + } + for (size_t i = 0; i < len;) { uint8_t num_bytes = bytes_in_char(str[i]); if (num_bytes == 0) // Invalid leading byte found. @@ -541,3 +540,16 @@ string_is_utf8(const char *str, size_t len) } return true; } + +/** As string_is_utf8(), but returns false if the string begins with a UTF-8 + * byte order mark (BOM). + */ +int +string_is_utf8_no_bom(const char *str, size_t len) +{ + if (str && len >= 3 && (!strcmpstart(str, "\uFEFF") || + !strcmpstart(str, "\uFFFE"))) { + return false; + } + return string_is_utf8(str, len); +} diff --git a/src/lib/string/util_string.h b/src/lib/string/util_string.h index d9fbf8c61e..e89233df88 100644 --- a/src/lib/string/util_string.h +++ b/src/lib/string/util_string.h @@ -1,6 +1,6 @@ /* Copyright (c) 2003-2004, Roger Dingledine * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. - * Copyright (c) 2007-2019, The Tor Project, Inc. */ + * Copyright (c) 2007-2020, The Tor Project, Inc. */ /* See LICENSE for licensing information */ /** @@ -20,7 +20,10 @@ const void *tor_memmem(const void *haystack, size_t hlen, const void *needle, size_t nlen); const void *tor_memstr(const void *haystack, size_t hlen, const char *needle); -int tor_mem_is_zero(const char *mem, size_t len); +int fast_mem_is_zero(const char *mem, size_t len); +#define fast_digest_is_zero(d) fast_mem_is_zero((d), DIGEST_LEN) +#define fast_digetst256_is_zero(d) fast_mem_is_zero((d), DIGEST256_LEN) + int tor_digest_is_zero(const char *digest); int tor_digest256_is_zero(const char *digest); @@ -33,7 +36,6 @@ int tor_strisnonupper(const char *s); int tor_strisspace(const char *s); int strcmp_opt(const char *s1, const char *s2); int strcmpstart(const char *s1, const char *s2); -int strcmp_len(const char *s1, const char *s2, size_t len); int strcasecmpstart(const char *s1, const char *s2); int strcmpend(const char *s1, const char *s2); int strcasecmpend(const char *s1, const char *s2); @@ -53,5 +55,6 @@ const char *find_str_at_start_of_line(const char *haystack, int string_is_C_identifier(const char *string); int string_is_utf8(const char *str, size_t len); +int string_is_utf8_no_bom(const char *str, size_t len); #endif /* !defined(TOR_UTIL_STRING_H) */ |