diff options
author | Nick Mathewson <nickm@torproject.org> | 2018-06-22 09:23:30 -0400 |
---|---|---|
committer | Nick Mathewson <nickm@torproject.org> | 2018-06-22 09:49:13 -0400 |
commit | 1abadee3fd1c15f3720003c411ec6043c29d7c09 (patch) | |
tree | 64cb47b2d5352219fc71f36505d9c0bdcc93bb0a /src/lib/string | |
parent | 1e07b4031e2613826cf7ff2838a2f0c8f03e81c2 (diff) | |
download | tor-1abadee3fd1c15f3720003c411ec6043c29d7c09.tar.gz tor-1abadee3fd1c15f3720003c411ec6043c29d7c09.zip |
Extract key string manipulation functions into a new library.
Diffstat (limited to 'src/lib/string')
-rw-r--r-- | src/lib/string/.may_include | 6 | ||||
-rw-r--r-- | src/lib/string/compat_ctype.c | 67 | ||||
-rw-r--r-- | src/lib/string/compat_ctype.h | 60 | ||||
-rw-r--r-- | src/lib/string/include.am | 23 | ||||
-rw-r--r-- | src/lib/string/printf.c | 152 | ||||
-rw-r--r-- | src/lib/string/printf.h | 25 | ||||
-rw-r--r-- | src/lib/string/scanf.c | 312 | ||||
-rw-r--r-- | src/lib/string/scanf.h | 19 | ||||
-rw-r--r-- | src/lib/string/util_string.c | 340 | ||||
-rw-r--r-- | src/lib/string/util_string.h | 42 |
10 files changed, 1046 insertions, 0 deletions
diff --git a/src/lib/string/.may_include b/src/lib/string/.may_include new file mode 100644 index 0000000000..8781566d9f --- /dev/null +++ b/src/lib/string/.may_include @@ -0,0 +1,6 @@ +orconfig.h +lib/cc/*.h +lib/err/*.h +lib/malloc/*.h +lib/ctime/*.h +lib/string/*.h diff --git a/src/lib/string/compat_ctype.c b/src/lib/string/compat_ctype.c new file mode 100644 index 0000000000..d1d4ce0ffc --- /dev/null +++ b/src/lib/string/compat_ctype.c @@ -0,0 +1,67 @@ +/* Copyright (c) 2003-2004, Roger Dingledine + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#include "lib/string/compat_ctype.h" + +/** + * Tables to implement ctypes-replacement TOR_IS*() functions. Each table + * has 256 bits to look up whether a character is in some set or not. This + * fails on non-ASCII platforms, but it is hard to find a platform whose + * character set is not a superset of ASCII nowadays. */ + +/**@{*/ +const uint32_t TOR_ISALPHA_TABLE[8] = + { 0, 0, 0x7fffffe, 0x7fffffe, 0, 0, 0, 0 }; +const uint32_t TOR_ISALNUM_TABLE[8] = + { 0, 0x3ff0000, 0x7fffffe, 0x7fffffe, 0, 0, 0, 0 }; +const uint32_t TOR_ISSPACE_TABLE[8] = { 0x3e00, 0x1, 0, 0, 0, 0, 0, 0 }; +const uint32_t TOR_ISXDIGIT_TABLE[8] = + { 0, 0x3ff0000, 0x7e, 0x7e, 0, 0, 0, 0 }; +const uint32_t TOR_ISDIGIT_TABLE[8] = { 0, 0x3ff0000, 0, 0, 0, 0, 0, 0 }; +const uint32_t TOR_ISPRINT_TABLE[8] = + { 0, 0xffffffff, 0xffffffff, 0x7fffffff, 0, 0, 0, 0x0 }; +const uint32_t TOR_ISUPPER_TABLE[8] = { 0, 0, 0x7fffffe, 0, 0, 0, 0, 0 }; +const uint32_t TOR_ISLOWER_TABLE[8] = { 0, 0, 0, 0x7fffffe, 0, 0, 0, 0 }; + +/** Upper-casing and lowercasing tables to map characters to upper/lowercase + * equivalents. Used by tor_toupper() and tor_tolower(). */ +/**@{*/ +const uint8_t TOR_TOUPPER_TABLE[256] = { + 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, + 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, + 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, + 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63, + 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79, + 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95, + 96,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79, + 80,81,82,83,84,85,86,87,88,89,90,123,124,125,126,127, + 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, + 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, + 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, + 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, + 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, + 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, + 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, + 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255, +}; +const uint8_t TOR_TOLOWER_TABLE[256] = { + 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, + 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, + 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, + 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63, + 64,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119,120,121,122,91,92,93,94,95, + 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, + 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, + 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, + 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, + 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, + 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, + 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, + 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, + 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255, +}; +/**@}*/ diff --git a/src/lib/string/compat_ctype.h b/src/lib/string/compat_ctype.h new file mode 100644 index 0000000000..32f314f332 --- /dev/null +++ b/src/lib/string/compat_ctype.h @@ -0,0 +1,60 @@ +/* Copyright (c) 2003-2004, Roger Dingledine + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#ifndef TOR_COMPAT_CTYPE_H +#define TOR_COMPAT_CTYPE_H + +#include "orconfig.h" +#include "lib/cc/torint.h" + +/* Much of the time when we're checking ctypes, we're doing spec compliance, + * which all assumes we're doing ASCII. */ +#define DECLARE_CTYPE_FN(name) \ + static int TOR_##name(char c); \ + extern const uint32_t TOR_##name##_TABLE[]; \ + static inline int TOR_##name(char c) { \ + uint8_t u = c; \ + return !!(TOR_##name##_TABLE[(u >> 5) & 7] & (1u << (u & 31))); \ + } +DECLARE_CTYPE_FN(ISALPHA) +DECLARE_CTYPE_FN(ISALNUM) +DECLARE_CTYPE_FN(ISSPACE) +DECLARE_CTYPE_FN(ISDIGIT) +DECLARE_CTYPE_FN(ISXDIGIT) +DECLARE_CTYPE_FN(ISPRINT) +DECLARE_CTYPE_FN(ISLOWER) +DECLARE_CTYPE_FN(ISUPPER) +extern const uint8_t TOR_TOUPPER_TABLE[]; +extern const uint8_t TOR_TOLOWER_TABLE[]; +#define TOR_TOLOWER(c) (TOR_TOLOWER_TABLE[(uint8_t)c]) +#define TOR_TOUPPER(c) (TOR_TOUPPER_TABLE[(uint8_t)c]) + +/** Helper: given a hex digit, return its value, or -1 if it isn't hex. */ +inline int +hex_decode_digit(char c) +{ + switch (c) { + case '0': return 0; + case '1': return 1; + case '2': return 2; + case '3': return 3; + case '4': return 4; + case '5': return 5; + case '6': return 6; + case '7': return 7; + case '8': return 8; + case '9': return 9; + case 'A': case 'a': return 10; + case 'B': case 'b': return 11; + case 'C': case 'c': return 12; + case 'D': case 'd': return 13; + case 'E': case 'e': return 14; + case 'F': case 'f': return 15; + default: + return -1; + } +} + +#endif /* !defined(TOR_COMPAT_CTYPE_H) */ diff --git a/src/lib/string/include.am b/src/lib/string/include.am new file mode 100644 index 0000000000..d458515d28 --- /dev/null +++ b/src/lib/string/include.am @@ -0,0 +1,23 @@ + +noinst_LIBRARIES += src/lib/libtor-string.a + +if UNITTESTS_ENABLED +noinst_LIBRARIES += src/lib/libtor-string-testing.a +endif + +src_lib_libtor_string_a_SOURCES = \ + src/lib/string/compat_ctype.c \ + src/lib/string/util_string.c \ + src/lib/string/printf.c \ + src/lib/string/scanf.c + +src_lib_libtor_string_testing_a_SOURCES = \ + $(src_lib_libtor_string_a_SOURCES) +src_lib_libtor_string_testing_a_CPPFLAGS = $(AM_CPPFLAGS) $(TEST_CPPFLAGS) +src_lib_libtor_string_testing_a_CFLAGS = $(AM_CFLAGS) $(TEST_CFLAGS) + +noinst_HEADERS += \ + src/lib/string/compat_ctype.h \ + src/lib/string/util_string.h \ + src/lib/string/printf.h \ + src/lib/string/scanf.h diff --git a/src/lib/string/printf.c b/src/lib/string/printf.c new file mode 100644 index 0000000000..4443e25fb4 --- /dev/null +++ b/src/lib/string/printf.c @@ -0,0 +1,152 @@ +/* Copyright (c) 2003-2004, Roger Dingledine + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#include "lib/string/printf.h" +#include "lib/err/torerr.h" +#include "lib/cc/torint.h" +#include "lib/malloc/util_malloc.h" + +#include <stdlib.h> +#include <stdio.h> + +/** Replacement for snprintf. Differs from platform snprintf in two + * ways: First, always NUL-terminates its output. Second, always + * returns -1 if the result is truncated. (Note that this return + * behavior does <i>not</i> conform to C99; it just happens to be + * easier to emulate "return -1" with conformant implementations than + * it is to emulate "return number that would be written" with + * non-conformant implementations.) */ +int +tor_snprintf(char *str, size_t size, const char *format, ...) +{ + va_list ap; + int r; + va_start(ap,format); + r = tor_vsnprintf(str,size,format,ap); + va_end(ap); + return r; +} + +/** Replacement for vsnprintf; behavior differs as tor_snprintf differs from + * snprintf. + */ +int +tor_vsnprintf(char *str, size_t size, const char *format, va_list args) +{ + int r; + if (size == 0) + return -1; /* no place for the NUL */ + if (size > SIZE_T_CEILING) + return -1; +#ifdef _WIN32 + r = _vsnprintf(str, size, format, args); +#else + r = vsnprintf(str, size, format, args); +#endif + str[size-1] = '\0'; + if (r < 0 || r >= (ssize_t)size) + return -1; + return r; +} + +/** + * Portable asprintf implementation. Does a printf() into a newly malloc'd + * string. Sets *<b>strp</b> to this string, and returns its length (not + * including the terminating NUL character). + * + * You can treat this function as if its implementation were something like + <pre> + char buf[_INFINITY_]; + tor_snprintf(buf, sizeof(buf), fmt, args); + *strp = tor_strdup(buf); + return strlen(*strp): + </pre> + * Where _INFINITY_ is an imaginary constant so big that any string can fit + * into it. + */ +int +tor_asprintf(char **strp, const char *fmt, ...) +{ + int r; + va_list args; + va_start(args, fmt); + r = tor_vasprintf(strp, fmt, args); + va_end(args); + if (!*strp || r < 0) { + /* LCOV_EXCL_START */ + raw_assert_unreached_msg("Internal error in asprintf"); + /* LCOV_EXCL_STOP */ + } + return r; +} + +/** + * Portable vasprintf implementation. Does a printf() into a newly malloc'd + * string. Differs from regular vasprintf in the same ways that + * tor_asprintf() differs from regular asprintf. + */ +int +tor_vasprintf(char **strp, const char *fmt, va_list args) +{ + /* use a temporary variable in case *strp is in args. */ + char *strp_tmp=NULL; +#ifdef HAVE_VASPRINTF + /* If the platform gives us one, use it. */ + int r = vasprintf(&strp_tmp, fmt, args); + if (r < 0) + *strp = NULL; + else + *strp = strp_tmp; + return r; +#elif defined(HAVE__VSCPRINTF) + /* On Windows, _vsnprintf won't tell us the length of the string if it + * overflows, so we need to use _vcsprintf to tell how much to allocate */ + int len, r; + va_list tmp_args; + va_copy(tmp_args, args); + len = _vscprintf(fmt, tmp_args); + va_end(tmp_args); + if (len < 0) { + *strp = NULL; + return -1; + } + strp_tmp = tor_malloc(len + 1); + r = _vsnprintf(strp_tmp, len+1, fmt, args); + if (r != len) { + tor_free(strp_tmp); + *strp = NULL; + return -1; + } + *strp = strp_tmp; + return len; +#else + /* Everywhere else, we have a decent vsnprintf that tells us how many + * characters we need. We give it a try on a short buffer first, since + * it might be nice to avoid the second vsnprintf call. + */ + char buf[128]; + int len, r; + va_list tmp_args; + va_copy(tmp_args, args); + /* vsnprintf() was properly checked but tor_vsnprintf() available so + * why not use it? */ + len = tor_vsnprintf(buf, sizeof(buf), fmt, tmp_args); + va_end(tmp_args); + if (len < (int)sizeof(buf)) { + *strp = tor_strdup(buf); + return len; + } + strp_tmp = tor_malloc(len+1); + /* use of tor_vsnprintf() will ensure string is null terminated */ + r = tor_vsnprintf(strp_tmp, len+1, fmt, args); + if (r != len) { + tor_free(strp_tmp); + *strp = NULL; + return -1; + } + *strp = strp_tmp; + return len; +#endif /* defined(HAVE_VASPRINTF) || ... */ +} diff --git a/src/lib/string/printf.h b/src/lib/string/printf.h new file mode 100644 index 0000000000..2f46206545 --- /dev/null +++ b/src/lib/string/printf.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2003-2004, Roger Dingledine + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#ifndef TOR_UTIL_PRINTF_H +#define TOR_UTIL_PRINTF_H + +#include "orconfig.h" +#include "lib/cc/compat_compiler.h" + +#include <stdarg.h> +#include <stddef.h> + +int tor_snprintf(char *str, size_t size, const char *format, ...) + CHECK_PRINTF(3,4) ATTR_NONNULL((1,3)); +int tor_vsnprintf(char *str, size_t size, const char *format, va_list args) + CHECK_PRINTF(3,0) ATTR_NONNULL((1,3)); + +int tor_asprintf(char **strp, const char *fmt, ...) + CHECK_PRINTF(2,3); +int tor_vasprintf(char **strp, const char *fmt, va_list args) + CHECK_PRINTF(2,0); + +#endif /* !defined(TOR_UTIL_STRING_H) */ diff --git a/src/lib/string/scanf.c b/src/lib/string/scanf.c new file mode 100644 index 0000000000..0c5082799c --- /dev/null +++ b/src/lib/string/scanf.c @@ -0,0 +1,312 @@ +/* Copyright (c) 2003-2004, Roger Dingledine + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#include "lib/string/scanf.h" +#include "lib/string/compat_ctype.h" +#include "lib/cc/torint.h" +#include "lib/err/torerr.h" + +#include <stdlib.h> + +#define MAX_SCANF_WIDTH 9999 + +/** Helper: given an ASCII-encoded decimal digit, return its numeric value. + * NOTE: requires that its input be in-bounds. */ +static int +digit_to_num(char d) +{ + int num = ((int)d) - (int)'0'; + raw_assert(num <= 9 && num >= 0); + return num; +} + +/** Helper: Read an unsigned int from *<b>bufp</b> of up to <b>width</b> + * characters. (Handle arbitrary width if <b>width</b> is less than 0.) On + * success, store the result in <b>out</b>, advance bufp to the next + * character, and return 0. On failure, return -1. */ +static int +scan_unsigned(const char **bufp, unsigned long *out, int width, unsigned base) +{ + unsigned long result = 0; + int scanned_so_far = 0; + const int hex = base==16; + raw_assert(base == 10 || base == 16); + if (!bufp || !*bufp || !out) + return -1; + if (width<0) + width=MAX_SCANF_WIDTH; + + while (**bufp && (hex?TOR_ISXDIGIT(**bufp):TOR_ISDIGIT(**bufp)) + && scanned_so_far < width) { + unsigned digit = hex?hex_decode_digit(*(*bufp)++):digit_to_num(*(*bufp)++); + // Check for overflow beforehand, without actually causing any overflow + // This preserves functionality on compilers that don't wrap overflow + // (i.e. that trap or optimise away overflow) + // result * base + digit > ULONG_MAX + // result * base > ULONG_MAX - digit + if (result > (ULONG_MAX - digit)/base) + return -1; /* Processing this digit would overflow */ + result = result * base + digit; + ++scanned_so_far; + } + + if (!scanned_so_far) /* No actual digits scanned */ + return -1; + + *out = result; + return 0; +} + +/** Helper: Read an signed int from *<b>bufp</b> of up to <b>width</b> + * characters. (Handle arbitrary width if <b>width</b> is less than 0.) On + * success, store the result in <b>out</b>, advance bufp to the next + * character, and return 0. On failure, return -1. */ +static int +scan_signed(const char **bufp, long *out, int width) +{ + int neg = 0; + unsigned long result = 0; + + if (!bufp || !*bufp || !out) + return -1; + if (width<0) + width=MAX_SCANF_WIDTH; + + if (**bufp == '-') { + neg = 1; + ++*bufp; + --width; + } + + if (scan_unsigned(bufp, &result, width, 10) < 0) + return -1; + + if (neg && result > 0) { + if (result > ((unsigned long)LONG_MAX) + 1) + return -1; /* Underflow */ + else if (result == ((unsigned long)LONG_MAX) + 1) + *out = LONG_MIN; + else { + /* We once had a far more clever no-overflow conversion here, but + * some versions of GCC apparently ran it into the ground. Now + * we just check for LONG_MIN explicitly. + */ + *out = -(long)result; + } + } else { + if (result > LONG_MAX) + return -1; /* Overflow */ + *out = (long)result; + } + + return 0; +} + +/** Helper: Read a decimal-formatted double from *<b>bufp</b> of up to + * <b>width</b> characters. (Handle arbitrary width if <b>width</b> is less + * than 0.) On success, store the result in <b>out</b>, advance bufp to the + * next character, and return 0. On failure, return -1. */ +static int +scan_double(const char **bufp, double *out, int width) +{ + int neg = 0; + double result = 0; + int scanned_so_far = 0; + + if (!bufp || !*bufp || !out) + return -1; + if (width<0) + width=MAX_SCANF_WIDTH; + + if (**bufp == '-') { + neg = 1; + ++*bufp; + } + + while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) { + const int digit = digit_to_num(*(*bufp)++); + result = result * 10 + digit; + ++scanned_so_far; + } + if (**bufp == '.') { + double fracval = 0, denominator = 1; + ++*bufp; + ++scanned_so_far; + while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) { + const int digit = digit_to_num(*(*bufp)++); + fracval = fracval * 10 + digit; + denominator *= 10; + ++scanned_so_far; + } + result += fracval / denominator; + } + + if (!scanned_so_far) /* No actual digits scanned */ + return -1; + + *out = neg ? -result : result; + return 0; +} + +/** Helper: copy up to <b>width</b> non-space characters from <b>bufp</b> to + * <b>out</b>. Make sure <b>out</b> is nul-terminated. Advance <b>bufp</b> + * to the next non-space character or the EOS. */ +static int +scan_string(const char **bufp, char *out, int width) +{ + int scanned_so_far = 0; + if (!bufp || !out || width < 0) + return -1; + while (**bufp && ! TOR_ISSPACE(**bufp) && scanned_so_far < width) { + *out++ = *(*bufp)++; + ++scanned_so_far; + } + *out = '\0'; + return 0; +} + +/** Locale-independent, minimal, no-surprises scanf variant, accepting only a + * restricted pattern format. For more info on what it supports, see + * tor_sscanf() documentation. */ +int +tor_vsscanf(const char *buf, const char *pattern, va_list ap) +{ + int n_matched = 0; + + while (*pattern) { + if (*pattern != '%') { + if (*buf == *pattern) { + ++buf; + ++pattern; + continue; + } else { + return n_matched; + } + } else { + int width = -1; + int longmod = 0; + ++pattern; + if (TOR_ISDIGIT(*pattern)) { + width = digit_to_num(*pattern++); + while (TOR_ISDIGIT(*pattern)) { + width *= 10; + width += digit_to_num(*pattern++); + if (width > MAX_SCANF_WIDTH) + return -1; + } + if (!width) /* No zero-width things. */ + return -1; + } + if (*pattern == 'l') { + longmod = 1; + ++pattern; + } + if (*pattern == 'u' || *pattern == 'x') { + unsigned long u; + const int base = (*pattern == 'u') ? 10 : 16; + if (!*buf) + return n_matched; + if (scan_unsigned(&buf, &u, width, base)<0) + return n_matched; + if (longmod) { + unsigned long *out = va_arg(ap, unsigned long *); + *out = u; + } else { + unsigned *out = va_arg(ap, unsigned *); + if (u > UINT_MAX) + return n_matched; + *out = (unsigned) u; + } + ++pattern; + ++n_matched; + } else if (*pattern == 'f') { + double *d = va_arg(ap, double *); + if (!longmod) + return -1; /* float not supported */ + if (!*buf) + return n_matched; + if (scan_double(&buf, d, width)<0) + return n_matched; + ++pattern; + ++n_matched; + } else if (*pattern == 'd') { + long lng=0; + if (scan_signed(&buf, &lng, width)<0) + return n_matched; + if (longmod) { + long *out = va_arg(ap, long *); + *out = lng; + } else { + int *out = va_arg(ap, int *); +#if LONG_MAX > INT_MAX + if (lng < INT_MIN || lng > INT_MAX) + return n_matched; +#endif + *out = (int)lng; + } + ++pattern; + ++n_matched; + } else if (*pattern == 's') { + char *s = va_arg(ap, char *); + if (longmod) + return -1; + if (width < 0) + return -1; + if (scan_string(&buf, s, width)<0) + return n_matched; + ++pattern; + ++n_matched; + } else if (*pattern == 'c') { + char *ch = va_arg(ap, char *); + if (longmod) + return -1; + if (width != -1) + return -1; + if (!*buf) + return n_matched; + *ch = *buf++; + ++pattern; + ++n_matched; + } else if (*pattern == '%') { + if (*buf != '%') + return n_matched; + if (longmod) + return -1; + ++buf; + ++pattern; + } else { + return -1; /* Unrecognized pattern component. */ + } + } + } + + return n_matched; +} + +/** Minimal sscanf replacement: parse <b>buf</b> according to <b>pattern</b> + * and store the results in the corresponding argument fields. Differs from + * sscanf in that: + * <ul><li>It only handles %u, %lu, %x, %lx, %[NUM]s, %d, %ld, %lf, and %c. + * <li>It only handles decimal inputs for %lf. (12.3, not 1.23e1) + * <li>It does not handle arbitrarily long widths. + * <li>Numbers do not consume any space characters. + * <li>It is locale-independent. + * <li>%u and %x do not consume any space. + * <li>It returns -1 on malformed patterns.</ul> + * + * (As with other locale-independent functions, we need this to parse data that + * is in ASCII without worrying that the C library's locale-handling will make + * miscellaneous characters look like numbers, spaces, and so on.) + */ +int +tor_sscanf(const char *buf, const char *pattern, ...) +{ + int r; + va_list ap; + va_start(ap, pattern); + r = tor_vsscanf(buf, pattern, ap); + va_end(ap); + return r; +} diff --git a/src/lib/string/scanf.h b/src/lib/string/scanf.h new file mode 100644 index 0000000000..9cfa9cc6c1 --- /dev/null +++ b/src/lib/string/scanf.h @@ -0,0 +1,19 @@ +/* Copyright (c) 2003-2004, Roger Dingledine + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#ifndef TOR_UTIL_SCANF_H +#define TOR_UTIL_SCANF_H + +#include "orconfig.h" +#include "lib/cc/compat_compiler.h" + +#include <stdarg.h> + +int tor_vsscanf(const char *buf, const char *pattern, va_list ap) \ + CHECK_SCANF(2, 0); +int tor_sscanf(const char *buf, const char *pattern, ...) + CHECK_SCANF(2, 3); + +#endif /* !defined(TOR_UTIL_STRING_H) */ diff --git a/src/lib/string/util_string.c b/src/lib/string/util_string.c new file mode 100644 index 0000000000..aba37fcc00 --- /dev/null +++ b/src/lib/string/util_string.c @@ -0,0 +1,340 @@ +/* Copyright (c) 2003-2004, Roger Dingledine + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#include "lib/string/util_string.h" +#include "lib/string/compat_ctype.h" +#include "lib/err/torerr.h" +#include "lib/ctime/di_ops.h" + +#include <string.h> +#include <stdlib.h> + +/** Remove from the string <b>s</b> every character which appears in + * <b>strip</b>. */ +void +tor_strstrip(char *s, const char *strip) +{ + char *readp = s; + while (*readp) { + if (strchr(strip, *readp)) { + ++readp; + } else { + *s++ = *readp++; + } + } + *s = '\0'; +} + +/** Convert all alphabetic characters in the nul-terminated string <b>s</b> to + * lowercase. */ +void +tor_strlower(char *s) +{ + while (*s) { + *s = TOR_TOLOWER(*s); + ++s; + } +} + +/** Convert all alphabetic characters in the nul-terminated string <b>s</b> to + * lowercase. */ +void +tor_strupper(char *s) +{ + while (*s) { + *s = TOR_TOUPPER(*s); + ++s; + } +} + +/** Return 1 if every character in <b>s</b> is printable, else return 0. + */ +int +tor_strisprint(const char *s) +{ + while (*s) { + if (!TOR_ISPRINT(*s)) + return 0; + s++; + } + return 1; +} + +/** Return 1 if no character in <b>s</b> is uppercase, else return 0. + */ +int +tor_strisnonupper(const char *s) +{ + while (*s) { + if (TOR_ISUPPER(*s)) + return 0; + s++; + } + return 1; +} + +/** Return true iff every character in <b>s</b> is whitespace space; else + * return false. */ +int +tor_strisspace(const char *s) +{ + while (*s) { + if (!TOR_ISSPACE(*s)) + return 0; + s++; + } + return 1; +} + +/** As strcmp, except that either string may be NULL. The NULL string is + * considered to be before any non-NULL string. */ +int +strcmp_opt(const char *s1, const char *s2) +{ + if (!s1) { + if (!s2) + return 0; + else + return -1; + } else if (!s2) { + return 1; + } else { + return strcmp(s1, s2); + } +} + +/** Compares the first strlen(s2) characters of s1 with s2. Returns as for + * strcmp. + */ +int +strcmpstart(const char *s1, const char *s2) +{ + size_t n = strlen(s2); + return strncmp(s1, s2, n); +} + +/** Compare the s1_len-byte string <b>s1</b> with <b>s2</b>, + * without depending on a terminating nul in s1. Sorting order is first by + * length, then lexically; return values are as for strcmp. + */ +int +strcmp_len(const char *s1, const char *s2, size_t s1_len) +{ + size_t s2_len = strlen(s2); + if (s1_len < s2_len) + return -1; + if (s1_len > s2_len) + return 1; + return fast_memcmp(s1, s2, s2_len); +} + +/** Compares the first strlen(s2) characters of s1 with s2. Returns as for + * strcasecmp. + */ +int +strcasecmpstart(const char *s1, const char *s2) +{ + size_t n = strlen(s2); + return strncasecmp(s1, s2, n); +} + +/** Compares the last strlen(s2) characters of s1 with s2. Returns as for + * strcmp. + */ +int +strcmpend(const char *s1, const char *s2) +{ + size_t n1 = strlen(s1), n2 = strlen(s2); + if (n2>n1) + return strcmp(s1,s2); + else + return strncmp(s1+(n1-n2), s2, n2); +} + +/** Compares the last strlen(s2) characters of s1 with s2. Returns as for + * strcasecmp. + */ +int +strcasecmpend(const char *s1, const char *s2) +{ + size_t n1 = strlen(s1), n2 = strlen(s2); + if (n2>n1) /* then they can't be the same; figure out which is bigger */ + return strcasecmp(s1,s2); + else + return strncasecmp(s1+(n1-n2), s2, n2); +} + +/** Return a pointer to the first char of s that is not whitespace and + * not a comment, or to the terminating NUL if no such character exists. + */ +const char * +eat_whitespace(const char *s) +{ + raw_assert(s); + + while (1) { + switch (*s) { + case '\0': + default: + return s; + case ' ': + case '\t': + case '\n': + case '\r': + ++s; + break; + case '#': + ++s; + while (*s && *s != '\n') + ++s; + } + } +} + +/** Return a pointer to the first char of s that is not whitespace and + * not a comment, or to the terminating NUL if no such character exists. + */ +const char * +eat_whitespace_eos(const char *s, const char *eos) +{ + raw_assert(s); + raw_assert(eos && s <= eos); + + while (s < eos) { + switch (*s) { + case '\0': + default: + return s; + case ' ': + case '\t': + case '\n': + case '\r': + ++s; + break; + case '#': + ++s; + while (s < eos && *s && *s != '\n') + ++s; + } + } + return s; +} + +/** Return a pointer to the first char of s that is not a space or a tab + * or a \\r, or to the terminating NUL if no such character exists. */ +const char * +eat_whitespace_no_nl(const char *s) +{ + while (*s == ' ' || *s == '\t' || *s == '\r') + ++s; + return s; +} + +/** As eat_whitespace_no_nl, but stop at <b>eos</b> whether we have + * found a non-whitespace character or not. */ +const char * +eat_whitespace_eos_no_nl(const char *s, const char *eos) +{ + while (s < eos && (*s == ' ' || *s == '\t' || *s == '\r')) + ++s; + return s; +} + +/** Return a pointer to the first char of s that is whitespace or <b>#</b>, + * or to the terminating NUL if no such character exists. + */ +const char * +find_whitespace(const char *s) +{ + /* tor_assert(s); */ + while (1) { + switch (*s) + { + case '\0': + case '#': + case ' ': + case '\r': + case '\n': + case '\t': + return s; + default: + ++s; + } + } +} + +/** As find_whitespace, but stop at <b>eos</b> whether we have found a + * whitespace or not. */ +const char * +find_whitespace_eos(const char *s, const char *eos) +{ + /* tor_assert(s); */ + while (s < eos) { + switch (*s) + { + case '\0': + case '#': + case ' ': + case '\r': + case '\n': + case '\t': + return s; + default: + ++s; + } + } + return s; +} + +/** Return the first occurrence of <b>needle</b> in <b>haystack</b> that + * occurs at the start of a line (that is, at the beginning of <b>haystack</b> + * or immediately after a newline). Return NULL if no such string is found. + */ +const char * +find_str_at_start_of_line(const char *haystack, const char *needle) +{ + size_t needle_len = strlen(needle); + + do { + if (!strncmp(haystack, needle, needle_len)) + return haystack; + + haystack = strchr(haystack, '\n'); + if (!haystack) + return NULL; + else + ++haystack; + } while (*haystack); + + return NULL; +} + +/** Returns true if <b>string</b> could be a C identifier. + A C identifier must begin with a letter or an underscore and the + rest of its characters can be letters, numbers or underscores. No + length limit is imposed. */ +int +string_is_C_identifier(const char *string) +{ + size_t iter; + size_t length = strlen(string); + if (!length) + return 0; + + for (iter = 0; iter < length ; iter++) { + if (iter == 0) { + if (!(TOR_ISALPHA(string[iter]) || + string[iter] == '_')) + return 0; + } else { + if (!(TOR_ISALPHA(string[iter]) || + TOR_ISDIGIT(string[iter]) || + string[iter] == '_')) + return 0; + } + } + + return 1; +} diff --git a/src/lib/string/util_string.h b/src/lib/string/util_string.h new file mode 100644 index 0000000000..f194c36373 --- /dev/null +++ b/src/lib/string/util_string.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2003-2004, Roger Dingledine + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#ifndef TOR_UTIL_STRING_H +#define TOR_UTIL_STRING_H + +#include "orconfig.h" +#include "lib/cc/compat_compiler.h" + +#include <stddef.h> + +/** Allowable characters in a hexadecimal string. */ +#define HEX_CHARACTERS "0123456789ABCDEFabcdef" +void tor_strlower(char *s) ATTR_NONNULL((1)); +void tor_strupper(char *s) ATTR_NONNULL((1)); +int tor_strisprint(const char *s) ATTR_NONNULL((1)); +int tor_strisnonupper(const char *s) ATTR_NONNULL((1)); +int tor_strisspace(const char *s); +int strcmp_opt(const char *s1, const char *s2); +int strcmpstart(const char *s1, const char *s2) ATTR_NONNULL((1,2)); +int strcmp_len(const char *s1, const char *s2, size_t len) ATTR_NONNULL((1,2)); +int strcasecmpstart(const char *s1, const char *s2) ATTR_NONNULL((1,2)); +int strcmpend(const char *s1, const char *s2) ATTR_NONNULL((1,2)); +int strcasecmpend(const char *s1, const char *s2) ATTR_NONNULL((1,2)); +int fast_memcmpstart(const void *mem, size_t memlen, const char *prefix); + +void tor_strstrip(char *s, const char *strip) ATTR_NONNULL((1,2)); + +const char *eat_whitespace(const char *s); +const char *eat_whitespace_eos(const char *s, const char *eos); +const char *eat_whitespace_no_nl(const char *s); +const char *eat_whitespace_eos_no_nl(const char *s, const char *eos); +const char *find_whitespace(const char *s); +const char *find_whitespace_eos(const char *s, const char *eos); +const char *find_str_at_start_of_line(const char *haystack, + const char *needle); + +int string_is_C_identifier(const char *string); + +#endif /* !defined(TOR_UTIL_STRING_H) */ |