diff options
author | Nick Mathewson <nickm@torproject.org> | 2018-06-22 09:23:30 -0400 |
---|---|---|
committer | Nick Mathewson <nickm@torproject.org> | 2018-06-22 09:49:13 -0400 |
commit | 1abadee3fd1c15f3720003c411ec6043c29d7c09 (patch) | |
tree | 64cb47b2d5352219fc71f36505d9c0bdcc93bb0a /src/common | |
parent | 1e07b4031e2613826cf7ff2838a2f0c8f03e81c2 (diff) | |
download | tor-1abadee3fd1c15f3720003c411ec6043c29d7c09.tar.gz tor-1abadee3fd1c15f3720003c411ec6043c29d7c09.zip |
Extract key string manipulation functions into a new library.
Diffstat (limited to 'src/common')
-rw-r--r-- | src/common/compat.c | 202 | ||||
-rw-r--r-- | src/common/compat.h | 34 | ||||
-rw-r--r-- | src/common/util.c | 629 | ||||
-rw-r--r-- | src/common/util.h | 35 | ||||
-rw-r--r-- | src/common/util_format.c | 37 | ||||
-rw-r--r-- | src/common/util_format.h | 2 |
6 files changed, 9 insertions, 930 deletions
diff --git a/src/common/compat.c b/src/common/compat.c index 9462195ba2..8929f2f3d3 100644 --- a/src/common/compat.c +++ b/src/common/compat.c @@ -404,147 +404,6 @@ tor_munmap_file(tor_mmap_t *handle) #error "cannot implement tor_mmap_file" #endif /* defined(HAVE_MMAP) || ... || ... */ -/** Replacement for snprintf. Differs from platform snprintf in two - * ways: First, always NUL-terminates its output. Second, always - * returns -1 if the result is truncated. (Note that this return - * behavior does <i>not</i> conform to C99; it just happens to be - * easier to emulate "return -1" with conformant implementations than - * it is to emulate "return number that would be written" with - * non-conformant implementations.) */ -int -tor_snprintf(char *str, size_t size, const char *format, ...) -{ - va_list ap; - int r; - va_start(ap,format); - r = tor_vsnprintf(str,size,format,ap); - va_end(ap); - return r; -} - -/** Replacement for vsnprintf; behavior differs as tor_snprintf differs from - * snprintf. - */ -int -tor_vsnprintf(char *str, size_t size, const char *format, va_list args) -{ - int r; - if (size == 0) - return -1; /* no place for the NUL */ - if (size > SIZE_T_CEILING) - return -1; -#ifdef _WIN32 - r = _vsnprintf(str, size, format, args); -#else - r = vsnprintf(str, size, format, args); -#endif - str[size-1] = '\0'; - if (r < 0 || r >= (ssize_t)size) - return -1; - return r; -} - -/** - * Portable asprintf implementation. Does a printf() into a newly malloc'd - * string. Sets *<b>strp</b> to this string, and returns its length (not - * including the terminating NUL character). - * - * You can treat this function as if its implementation were something like - <pre> - char buf[_INFINITY_]; - tor_snprintf(buf, sizeof(buf), fmt, args); - *strp = tor_strdup(buf); - return strlen(*strp): - </pre> - * Where _INFINITY_ is an imaginary constant so big that any string can fit - * into it. - */ -int -tor_asprintf(char **strp, const char *fmt, ...) -{ - int r; - va_list args; - va_start(args, fmt); - r = tor_vasprintf(strp, fmt, args); - va_end(args); - if (!*strp || r < 0) { - /* LCOV_EXCL_START */ - log_err(LD_BUG, "Internal error in asprintf"); - tor_assert(0); - /* LCOV_EXCL_STOP */ - } - return r; -} - -/** - * Portable vasprintf implementation. Does a printf() into a newly malloc'd - * string. Differs from regular vasprintf in the same ways that - * tor_asprintf() differs from regular asprintf. - */ -int -tor_vasprintf(char **strp, const char *fmt, va_list args) -{ - /* use a temporary variable in case *strp is in args. */ - char *strp_tmp=NULL; -#ifdef HAVE_VASPRINTF - /* If the platform gives us one, use it. */ - int r = vasprintf(&strp_tmp, fmt, args); - if (r < 0) - *strp = NULL; - else - *strp = strp_tmp; - return r; -#elif defined(HAVE__VSCPRINTF) - /* On Windows, _vsnprintf won't tell us the length of the string if it - * overflows, so we need to use _vcsprintf to tell how much to allocate */ - int len, r; - va_list tmp_args; - va_copy(tmp_args, args); - len = _vscprintf(fmt, tmp_args); - va_end(tmp_args); - if (len < 0) { - *strp = NULL; - return -1; - } - strp_tmp = tor_malloc(len + 1); - r = _vsnprintf(strp_tmp, len+1, fmt, args); - if (r != len) { - tor_free(strp_tmp); - *strp = NULL; - return -1; - } - *strp = strp_tmp; - return len; -#else - /* Everywhere else, we have a decent vsnprintf that tells us how many - * characters we need. We give it a try on a short buffer first, since - * it might be nice to avoid the second vsnprintf call. - */ - char buf[128]; - int len, r; - va_list tmp_args; - va_copy(tmp_args, args); - /* vsnprintf() was properly checked but tor_vsnprintf() available so - * why not use it? */ - len = tor_vsnprintf(buf, sizeof(buf), fmt, tmp_args); - va_end(tmp_args); - if (len < (int)sizeof(buf)) { - *strp = tor_strdup(buf); - return len; - } - strp_tmp = tor_malloc(len+1); - /* use of tor_vsnprintf() will ensure string is null terminated */ - r = tor_vsnprintf(strp_tmp, len+1, fmt, args); - if (r != len) { - tor_free(strp_tmp); - *strp = NULL; - return -1; - } - *strp = strp_tmp; - return len; -#endif /* defined(HAVE_VASPRINTF) || ... */ -} - /** Given <b>hlen</b> bytes at <b>haystack</b> and <b>nlen</b> bytes at * <b>needle</b>, return a pointer to the first occurrence of the needle * within the haystack, or NULL if there is no such occurrence. @@ -591,67 +450,6 @@ tor_memmem(const void *_haystack, size_t hlen, #endif /* defined(HAVE_MEMMEM) && (!defined(__GNUC__) || __GNUC__ >= 2) */ } -/** - * Tables to implement ctypes-replacement TOR_IS*() functions. Each table - * has 256 bits to look up whether a character is in some set or not. This - * fails on non-ASCII platforms, but it is hard to find a platform whose - * character set is not a superset of ASCII nowadays. */ - -/**@{*/ -const uint32_t TOR_ISALPHA_TABLE[8] = - { 0, 0, 0x7fffffe, 0x7fffffe, 0, 0, 0, 0 }; -const uint32_t TOR_ISALNUM_TABLE[8] = - { 0, 0x3ff0000, 0x7fffffe, 0x7fffffe, 0, 0, 0, 0 }; -const uint32_t TOR_ISSPACE_TABLE[8] = { 0x3e00, 0x1, 0, 0, 0, 0, 0, 0 }; -const uint32_t TOR_ISXDIGIT_TABLE[8] = - { 0, 0x3ff0000, 0x7e, 0x7e, 0, 0, 0, 0 }; -const uint32_t TOR_ISDIGIT_TABLE[8] = { 0, 0x3ff0000, 0, 0, 0, 0, 0, 0 }; -const uint32_t TOR_ISPRINT_TABLE[8] = - { 0, 0xffffffff, 0xffffffff, 0x7fffffff, 0, 0, 0, 0x0 }; -const uint32_t TOR_ISUPPER_TABLE[8] = { 0, 0, 0x7fffffe, 0, 0, 0, 0, 0 }; -const uint32_t TOR_ISLOWER_TABLE[8] = { 0, 0, 0, 0x7fffffe, 0, 0, 0, 0 }; - -/** Upper-casing and lowercasing tables to map characters to upper/lowercase - * equivalents. Used by tor_toupper() and tor_tolower(). */ -/**@{*/ -const uint8_t TOR_TOUPPER_TABLE[256] = { - 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, - 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, - 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, - 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63, - 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79, - 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95, - 96,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79, - 80,81,82,83,84,85,86,87,88,89,90,123,124,125,126,127, - 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, - 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, - 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, - 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, - 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, - 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, - 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, - 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255, -}; -const uint8_t TOR_TOLOWER_TABLE[256] = { - 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, - 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, - 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, - 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63, - 64,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111, - 112,113,114,115,116,117,118,119,120,121,122,91,92,93,94,95, - 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111, - 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, - 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, - 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, - 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, - 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, - 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, - 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, - 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, - 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255, -}; -/**@}*/ - /** Helper for tor_strtok_r_impl: Advances cp past all characters in * <b>sep</b>, and returns its new value. */ static char * diff --git a/src/common/compat.h b/src/common/compat.h index dff29a42b0..21fd0211a3 100644 --- a/src/common/compat.h +++ b/src/common/compat.h @@ -47,6 +47,8 @@ #include "lib/cc/compat_compiler.h" #include "common/compat_time.h" +#include "lib/string/compat_ctype.h" +#include "lib/string/printf.h" #include <stdio.h> #include <errno.h> @@ -97,16 +99,6 @@ typedef struct tor_mmap_t { tor_mmap_t *tor_mmap_file(const char *filename) ATTR_NONNULL((1)); int tor_munmap_file(tor_mmap_t *handle) ATTR_NONNULL((1)); -int tor_snprintf(char *str, size_t size, const char *format, ...) - CHECK_PRINTF(3,4) ATTR_NONNULL((1,3)); -int tor_vsnprintf(char *str, size_t size, const char *format, va_list args) - CHECK_PRINTF(3,0) ATTR_NONNULL((1,3)); - -int tor_asprintf(char **strp, const char *fmt, ...) - CHECK_PRINTF(2,3); -int tor_vasprintf(char **strp, const char *fmt, va_list args) - CHECK_PRINTF(2,0); - const void *tor_memmem(const void *haystack, size_t hlen, const void *needle, size_t nlen) ATTR_NONNULL((1,3)); static const void *tor_memstr(const void *haystack, size_t hlen, @@ -117,28 +109,6 @@ tor_memstr(const void *haystack, size_t hlen, const char *needle) return tor_memmem(haystack, hlen, needle, strlen(needle)); } -/* Much of the time when we're checking ctypes, we're doing spec compliance, - * which all assumes we're doing ASCII. */ -#define DECLARE_CTYPE_FN(name) \ - static int TOR_##name(char c); \ - extern const uint32_t TOR_##name##_TABLE[]; \ - static inline int TOR_##name(char c) { \ - uint8_t u = c; \ - return !!(TOR_##name##_TABLE[(u >> 5) & 7] & (1u << (u & 31))); \ - } -DECLARE_CTYPE_FN(ISALPHA) -DECLARE_CTYPE_FN(ISALNUM) -DECLARE_CTYPE_FN(ISSPACE) -DECLARE_CTYPE_FN(ISDIGIT) -DECLARE_CTYPE_FN(ISXDIGIT) -DECLARE_CTYPE_FN(ISPRINT) -DECLARE_CTYPE_FN(ISLOWER) -DECLARE_CTYPE_FN(ISUPPER) -extern const uint8_t TOR_TOUPPER_TABLE[]; -extern const uint8_t TOR_TOLOWER_TABLE[]; -#define TOR_TOLOWER(c) (TOR_TOLOWER_TABLE[(uint8_t)c]) -#define TOR_TOUPPER(c) (TOR_TOUPPER_TABLE[(uint8_t)c]) - char *tor_strtok_r_impl(char *str, const char *sep, char **lasts); #ifdef HAVE_STRTOK_R #define tor_strtok_r(str, sep, lasts) strtok_r(str, sep, lasts) diff --git a/src/common/util.c b/src/common/util.c index 358a68fd75..597ff2c420 100644 --- a/src/common/util.c +++ b/src/common/util.c @@ -385,22 +385,6 @@ n_bits_set_u8(uint8_t v) * String manipulation * ===== */ -/** Remove from the string <b>s</b> every character which appears in - * <b>strip</b>. */ -void -tor_strstrip(char *s, const char *strip) -{ - char *readp = s; - while (*readp) { - if (strchr(strip, *readp)) { - ++readp; - } else { - *s++ = *readp++; - } - } - *s = '\0'; -} - /** Return a pointer to a NUL-terminated hexadecimal string encoding * the first <b>fromlen</b> bytes of <b>from</b>. (fromlen must be \<= 32.) The * result does not need to be deallocated, but repeated calls to @@ -416,145 +400,6 @@ hex_str(const char *from, size_t fromlen) return buf; } -/** Convert all alphabetic characters in the nul-terminated string <b>s</b> to - * lowercase. */ -void -tor_strlower(char *s) -{ - while (*s) { - *s = TOR_TOLOWER(*s); - ++s; - } -} - -/** Convert all alphabetic characters in the nul-terminated string <b>s</b> to - * lowercase. */ -void -tor_strupper(char *s) -{ - while (*s) { - *s = TOR_TOUPPER(*s); - ++s; - } -} - -/** Return 1 if every character in <b>s</b> is printable, else return 0. - */ -int -tor_strisprint(const char *s) -{ - while (*s) { - if (!TOR_ISPRINT(*s)) - return 0; - s++; - } - return 1; -} - -/** Return 1 if no character in <b>s</b> is uppercase, else return 0. - */ -int -tor_strisnonupper(const char *s) -{ - while (*s) { - if (TOR_ISUPPER(*s)) - return 0; - s++; - } - return 1; -} - -/** Return true iff every character in <b>s</b> is whitespace space; else - * return false. */ -int -tor_strisspace(const char *s) -{ - while (*s) { - if (!TOR_ISSPACE(*s)) - return 0; - s++; - } - return 1; -} - -/** As strcmp, except that either string may be NULL. The NULL string is - * considered to be before any non-NULL string. */ -int -strcmp_opt(const char *s1, const char *s2) -{ - if (!s1) { - if (!s2) - return 0; - else - return -1; - } else if (!s2) { - return 1; - } else { - return strcmp(s1, s2); - } -} - -/** Compares the first strlen(s2) characters of s1 with s2. Returns as for - * strcmp. - */ -int -strcmpstart(const char *s1, const char *s2) -{ - size_t n = strlen(s2); - return strncmp(s1, s2, n); -} - -/** Compare the s1_len-byte string <b>s1</b> with <b>s2</b>, - * without depending on a terminating nul in s1. Sorting order is first by - * length, then lexically; return values are as for strcmp. - */ -int -strcmp_len(const char *s1, const char *s2, size_t s1_len) -{ - size_t s2_len = strlen(s2); - if (s1_len < s2_len) - return -1; - if (s1_len > s2_len) - return 1; - return fast_memcmp(s1, s2, s2_len); -} - -/** Compares the first strlen(s2) characters of s1 with s2. Returns as for - * strcasecmp. - */ -int -strcasecmpstart(const char *s1, const char *s2) -{ - size_t n = strlen(s2); - return strncasecmp(s1, s2, n); -} - -/** Compares the last strlen(s2) characters of s1 with s2. Returns as for - * strcmp. - */ -int -strcmpend(const char *s1, const char *s2) -{ - size_t n1 = strlen(s1), n2 = strlen(s2); - if (n2>n1) - return strcmp(s1,s2); - else - return strncmp(s1+(n1-n2), s2, n2); -} - -/** Compares the last strlen(s2) characters of s1 with s2. Returns as for - * strcasecmp. - */ -int -strcasecmpend(const char *s1, const char *s2) -{ - size_t n1 = strlen(s1), n2 = strlen(s2); - if (n2>n1) /* then they can't be the same; figure out which is bigger */ - return strcasecmp(s1,s2); - else - return strncasecmp(s1+(n1-n2), s2, n2); -} - /** Compare the value of the string <b>prefix</b> with the start of the * <b>memlen</b>-byte memory chunk at <b>mem</b>. Return as for strcmp. * @@ -571,179 +416,6 @@ fast_memcmpstart(const void *mem, size_t memlen, return fast_memcmp(mem, prefix, plen); } -/** Return a pointer to the first char of s that is not whitespace and - * not a comment, or to the terminating NUL if no such character exists. - */ -const char * -eat_whitespace(const char *s) -{ - tor_assert(s); - - while (1) { - switch (*s) { - case '\0': - default: - return s; - case ' ': - case '\t': - case '\n': - case '\r': - ++s; - break; - case '#': - ++s; - while (*s && *s != '\n') - ++s; - } - } -} - -/** Return a pointer to the first char of s that is not whitespace and - * not a comment, or to the terminating NUL if no such character exists. - */ -const char * -eat_whitespace_eos(const char *s, const char *eos) -{ - tor_assert(s); - tor_assert(eos && s <= eos); - - while (s < eos) { - switch (*s) { - case '\0': - default: - return s; - case ' ': - case '\t': - case '\n': - case '\r': - ++s; - break; - case '#': - ++s; - while (s < eos && *s && *s != '\n') - ++s; - } - } - return s; -} - -/** Return a pointer to the first char of s that is not a space or a tab - * or a \\r, or to the terminating NUL if no such character exists. */ -const char * -eat_whitespace_no_nl(const char *s) -{ - while (*s == ' ' || *s == '\t' || *s == '\r') - ++s; - return s; -} - -/** As eat_whitespace_no_nl, but stop at <b>eos</b> whether we have - * found a non-whitespace character or not. */ -const char * -eat_whitespace_eos_no_nl(const char *s, const char *eos) -{ - while (s < eos && (*s == ' ' || *s == '\t' || *s == '\r')) - ++s; - return s; -} - -/** Return a pointer to the first char of s that is whitespace or <b>#</b>, - * or to the terminating NUL if no such character exists. - */ -const char * -find_whitespace(const char *s) -{ - /* tor_assert(s); */ - while (1) { - switch (*s) - { - case '\0': - case '#': - case ' ': - case '\r': - case '\n': - case '\t': - return s; - default: - ++s; - } - } -} - -/** As find_whitespace, but stop at <b>eos</b> whether we have found a - * whitespace or not. */ -const char * -find_whitespace_eos(const char *s, const char *eos) -{ - /* tor_assert(s); */ - while (s < eos) { - switch (*s) - { - case '\0': - case '#': - case ' ': - case '\r': - case '\n': - case '\t': - return s; - default: - ++s; - } - } - return s; -} - -/** Return the first occurrence of <b>needle</b> in <b>haystack</b> that - * occurs at the start of a line (that is, at the beginning of <b>haystack</b> - * or immediately after a newline). Return NULL if no such string is found. - */ -const char * -find_str_at_start_of_line(const char *haystack, const char *needle) -{ - size_t needle_len = strlen(needle); - - do { - if (!strncmp(haystack, needle, needle_len)) - return haystack; - - haystack = strchr(haystack, '\n'); - if (!haystack) - return NULL; - else - ++haystack; - } while (*haystack); - - return NULL; -} - -/** Returns true if <b>string</b> could be a C identifier. - A C identifier must begin with a letter or an underscore and the - rest of its characters can be letters, numbers or underscores. No - length limit is imposed. */ -int -string_is_C_identifier(const char *string) -{ - size_t iter; - size_t length = strlen(string); - if (!length) - return 0; - - for (iter = 0; iter < length ; iter++) { - if (iter == 0) { - if (!(TOR_ISALPHA(string[iter]) || - string[iter] == '_')) - return 0; - } else { - if (!(TOR_ISALPHA(string[iter]) || - TOR_ISDIGIT(string[iter]) || - string[iter] == '_')) - return 0; - } - } - - return 1; -} - /** Return true iff the 'len' bytes at 'mem' are all zero. */ int tor_mem_is_zero(const char *mem, size_t len) @@ -2923,307 +2595,6 @@ expand_filename(const char *filename) #endif /* defined(_WIN32) */ } -#define MAX_SCANF_WIDTH 9999 - -/** Helper: given an ASCII-encoded decimal digit, return its numeric value. - * NOTE: requires that its input be in-bounds. */ -static int -digit_to_num(char d) -{ - int num = ((int)d) - (int)'0'; - tor_assert(num <= 9 && num >= 0); - return num; -} - -/** Helper: Read an unsigned int from *<b>bufp</b> of up to <b>width</b> - * characters. (Handle arbitrary width if <b>width</b> is less than 0.) On - * success, store the result in <b>out</b>, advance bufp to the next - * character, and return 0. On failure, return -1. */ -static int -scan_unsigned(const char **bufp, unsigned long *out, int width, unsigned base) -{ - unsigned long result = 0; - int scanned_so_far = 0; - const int hex = base==16; - tor_assert(base == 10 || base == 16); - if (!bufp || !*bufp || !out) - return -1; - if (width<0) - width=MAX_SCANF_WIDTH; - - while (**bufp && (hex?TOR_ISXDIGIT(**bufp):TOR_ISDIGIT(**bufp)) - && scanned_so_far < width) { - unsigned digit = hex?hex_decode_digit(*(*bufp)++):digit_to_num(*(*bufp)++); - // Check for overflow beforehand, without actually causing any overflow - // This preserves functionality on compilers that don't wrap overflow - // (i.e. that trap or optimise away overflow) - // result * base + digit > ULONG_MAX - // result * base > ULONG_MAX - digit - if (result > (ULONG_MAX - digit)/base) - return -1; /* Processing this digit would overflow */ - result = result * base + digit; - ++scanned_so_far; - } - - if (!scanned_so_far) /* No actual digits scanned */ - return -1; - - *out = result; - return 0; -} - -/** Helper: Read an signed int from *<b>bufp</b> of up to <b>width</b> - * characters. (Handle arbitrary width if <b>width</b> is less than 0.) On - * success, store the result in <b>out</b>, advance bufp to the next - * character, and return 0. On failure, return -1. */ -static int -scan_signed(const char **bufp, long *out, int width) -{ - int neg = 0; - unsigned long result = 0; - - if (!bufp || !*bufp || !out) - return -1; - if (width<0) - width=MAX_SCANF_WIDTH; - - if (**bufp == '-') { - neg = 1; - ++*bufp; - --width; - } - - if (scan_unsigned(bufp, &result, width, 10) < 0) - return -1; - - if (neg && result > 0) { - if (result > ((unsigned long)LONG_MAX) + 1) - return -1; /* Underflow */ - else if (result == ((unsigned long)LONG_MAX) + 1) - *out = LONG_MIN; - else { - /* We once had a far more clever no-overflow conversion here, but - * some versions of GCC apparently ran it into the ground. Now - * we just check for LONG_MIN explicitly. - */ - *out = -(long)result; - } - } else { - if (result > LONG_MAX) - return -1; /* Overflow */ - *out = (long)result; - } - - return 0; -} - -/** Helper: Read a decimal-formatted double from *<b>bufp</b> of up to - * <b>width</b> characters. (Handle arbitrary width if <b>width</b> is less - * than 0.) On success, store the result in <b>out</b>, advance bufp to the - * next character, and return 0. On failure, return -1. */ -static int -scan_double(const char **bufp, double *out, int width) -{ - int neg = 0; - double result = 0; - int scanned_so_far = 0; - - if (!bufp || !*bufp || !out) - return -1; - if (width<0) - width=MAX_SCANF_WIDTH; - - if (**bufp == '-') { - neg = 1; - ++*bufp; - } - - while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) { - const int digit = digit_to_num(*(*bufp)++); - result = result * 10 + digit; - ++scanned_so_far; - } - if (**bufp == '.') { - double fracval = 0, denominator = 1; - ++*bufp; - ++scanned_so_far; - while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) { - const int digit = digit_to_num(*(*bufp)++); - fracval = fracval * 10 + digit; - denominator *= 10; - ++scanned_so_far; - } - result += fracval / denominator; - } - - if (!scanned_so_far) /* No actual digits scanned */ - return -1; - - *out = neg ? -result : result; - return 0; -} - -/** Helper: copy up to <b>width</b> non-space characters from <b>bufp</b> to - * <b>out</b>. Make sure <b>out</b> is nul-terminated. Advance <b>bufp</b> - * to the next non-space character or the EOS. */ -static int -scan_string(const char **bufp, char *out, int width) -{ - int scanned_so_far = 0; - if (!bufp || !out || width < 0) - return -1; - while (**bufp && ! TOR_ISSPACE(**bufp) && scanned_so_far < width) { - *out++ = *(*bufp)++; - ++scanned_so_far; - } - *out = '\0'; - return 0; -} - -/** Locale-independent, minimal, no-surprises scanf variant, accepting only a - * restricted pattern format. For more info on what it supports, see - * tor_sscanf() documentation. */ -int -tor_vsscanf(const char *buf, const char *pattern, va_list ap) -{ - int n_matched = 0; - - while (*pattern) { - if (*pattern != '%') { - if (*buf == *pattern) { - ++buf; - ++pattern; - continue; - } else { - return n_matched; - } - } else { - int width = -1; - int longmod = 0; - ++pattern; - if (TOR_ISDIGIT(*pattern)) { - width = digit_to_num(*pattern++); - while (TOR_ISDIGIT(*pattern)) { - width *= 10; - width += digit_to_num(*pattern++); - if (width > MAX_SCANF_WIDTH) - return -1; - } - if (!width) /* No zero-width things. */ - return -1; - } - if (*pattern == 'l') { - longmod = 1; - ++pattern; - } - if (*pattern == 'u' || *pattern == 'x') { - unsigned long u; - const int base = (*pattern == 'u') ? 10 : 16; - if (!*buf) - return n_matched; - if (scan_unsigned(&buf, &u, width, base)<0) - return n_matched; - if (longmod) { - unsigned long *out = va_arg(ap, unsigned long *); - *out = u; - } else { - unsigned *out = va_arg(ap, unsigned *); - if (u > UINT_MAX) - return n_matched; - *out = (unsigned) u; - } - ++pattern; - ++n_matched; - } else if (*pattern == 'f') { - double *d = va_arg(ap, double *); - if (!longmod) - return -1; /* float not supported */ - if (!*buf) - return n_matched; - if (scan_double(&buf, d, width)<0) - return n_matched; - ++pattern; - ++n_matched; - } else if (*pattern == 'd') { - long lng=0; - if (scan_signed(&buf, &lng, width)<0) - return n_matched; - if (longmod) { - long *out = va_arg(ap, long *); - *out = lng; - } else { - int *out = va_arg(ap, int *); -#if LONG_MAX > INT_MAX - if (lng < INT_MIN || lng > INT_MAX) - return n_matched; -#endif - *out = (int)lng; - } - ++pattern; - ++n_matched; - } else if (*pattern == 's') { - char *s = va_arg(ap, char *); - if (longmod) - return -1; - if (width < 0) - return -1; - if (scan_string(&buf, s, width)<0) - return n_matched; - ++pattern; - ++n_matched; - } else if (*pattern == 'c') { - char *ch = va_arg(ap, char *); - if (longmod) - return -1; - if (width != -1) - return -1; - if (!*buf) - return n_matched; - *ch = *buf++; - ++pattern; - ++n_matched; - } else if (*pattern == '%') { - if (*buf != '%') - return n_matched; - if (longmod) - return -1; - ++buf; - ++pattern; - } else { - return -1; /* Unrecognized pattern component. */ - } - } - } - - return n_matched; -} - -/** Minimal sscanf replacement: parse <b>buf</b> according to <b>pattern</b> - * and store the results in the corresponding argument fields. Differs from - * sscanf in that: - * <ul><li>It only handles %u, %lu, %x, %lx, %[NUM]s, %d, %ld, %lf, and %c. - * <li>It only handles decimal inputs for %lf. (12.3, not 1.23e1) - * <li>It does not handle arbitrarily long widths. - * <li>Numbers do not consume any space characters. - * <li>It is locale-independent. - * <li>%u and %x do not consume any space. - * <li>It returns -1 on malformed patterns.</ul> - * - * (As with other locale-independent functions, we need this to parse data that - * is in ASCII without worrying that the C library's locale-handling will make - * miscellaneous characters look like numbers, spaces, and so on.) - */ -int -tor_sscanf(const char *buf, const char *pattern, ...) -{ - int r; - va_list ap; - va_start(ap, pattern); - r = tor_vsscanf(buf, pattern, ap); - va_end(ap); - return r; -} - /** Append the string produced by tor_asprintf(<b>pattern</b>, <b>...</b>) * to <b>sl</b>. */ void diff --git a/src/common/util.h b/src/common/util.h index 8977b460bf..5833fe567f 100644 --- a/src/common/util.h +++ b/src/common/util.h @@ -25,6 +25,8 @@ #include "lib/err/torerr.h" #include "lib/malloc/util_malloc.h" #include "lib/wallclock/approx_time.h" +#include "lib/string/util_string.h" +#include "lib/string/scanf.h" #include "common/util_bug.h" #ifndef O_BINARY @@ -96,22 +98,6 @@ uint32_t tor_add_u32_nowrap(uint32_t a, uint32_t b); /* String manipulation */ -/** Allowable characters in a hexadecimal string. */ -#define HEX_CHARACTERS "0123456789ABCDEFabcdef" -void tor_strlower(char *s) ATTR_NONNULL((1)); -void tor_strupper(char *s) ATTR_NONNULL((1)); -int tor_strisprint(const char *s) ATTR_NONNULL((1)); -int tor_strisnonupper(const char *s) ATTR_NONNULL((1)); -int tor_strisspace(const char *s); -int strcmp_opt(const char *s1, const char *s2); -int strcmpstart(const char *s1, const char *s2) ATTR_NONNULL((1,2)); -int strcmp_len(const char *s1, const char *s2, size_t len) ATTR_NONNULL((1,2)); -int strcasecmpstart(const char *s1, const char *s2) ATTR_NONNULL((1,2)); -int strcmpend(const char *s1, const char *s2) ATTR_NONNULL((1,2)); -int strcasecmpend(const char *s1, const char *s2) ATTR_NONNULL((1,2)); -int fast_memcmpstart(const void *mem, size_t memlen, const char *prefix); - -void tor_strstrip(char *s, const char *strip) ATTR_NONNULL((1,2)); long tor_parse_long(const char *s, int base, long min, long max, int *ok, char **next); unsigned long tor_parse_ulong(const char *s, int base, unsigned long min, @@ -120,16 +106,9 @@ double tor_parse_double(const char *s, double min, double max, int *ok, char **next); uint64_t tor_parse_uint64(const char *s, int base, uint64_t min, uint64_t max, int *ok, char **next); + const char *hex_str(const char *from, size_t fromlen) ATTR_NONNULL((1)); -const char *eat_whitespace(const char *s); -const char *eat_whitespace_eos(const char *s, const char *eos); -const char *eat_whitespace_no_nl(const char *s); -const char *eat_whitespace_eos_no_nl(const char *s, const char *eos); -const char *find_whitespace(const char *s); -const char *find_whitespace_eos(const char *s, const char *eos); -const char *find_str_at_start_of_line(const char *haystack, - const char *needle); -int string_is_C_identifier(const char *string); + int string_is_key_value(int severity, const char *string); int string_is_valid_dest(const char *string); int string_is_valid_nonrfc_hostname(const char *string); @@ -139,6 +118,7 @@ int string_is_valid_ipv6_address(const char *string); int tor_mem_is_zero(const char *mem, size_t len); int tor_digest_is_zero(const char *digest); int tor_digest256_is_zero(const char *digest); + char *esc_for_log(const char *string) ATTR_MALLOC; char *esc_for_log_len(const char *chars, size_t n) ATTR_MALLOC; const char *escaped(const char *string); @@ -147,11 +127,6 @@ char *tor_escape_str_for_pt_args(const char *string, const char *chars_to_escape); struct smartlist_t; -int tor_vsscanf(const char *buf, const char *pattern, va_list ap) \ - CHECK_SCANF(2, 0); -int tor_sscanf(const char *buf, const char *pattern, ...) - CHECK_SCANF(2, 3); - void smartlist_add_asprintf(struct smartlist_t *sl, const char *pattern, ...) CHECK_PRINTF(2, 3); void smartlist_add_vasprintf(struct smartlist_t *sl, const char *pattern, diff --git a/src/common/util_format.c b/src/common/util_format.c index 713e87129c..8b299725a4 100644 --- a/src/common/util_format.c +++ b/src/common/util_format.c @@ -465,39 +465,6 @@ base16_encode(char *dest, size_t destlen, const char *src, size_t srclen) *cp = '\0'; } -/** Helper: given a hex digit, return its value, or -1 if it isn't hex. */ -static inline int -hex_decode_digit_(char c) -{ - switch (c) { - case '0': return 0; - case '1': return 1; - case '2': return 2; - case '3': return 3; - case '4': return 4; - case '5': return 5; - case '6': return 6; - case '7': return 7; - case '8': return 8; - case '9': return 9; - case 'A': case 'a': return 10; - case 'B': case 'b': return 11; - case 'C': case 'c': return 12; - case 'D': case 'd': return 13; - case 'E': case 'e': return 14; - case 'F': case 'f': return 15; - default: - return -1; - } -} - -/** Helper: given a hex digit, return its value, or -1 if it isn't hex. */ -int -hex_decode_digit(char c) -{ - return hex_decode_digit_(c); -} - /** Given a hexadecimal string of <b>srclen</b> bytes in <b>src</b>, decode * it and store the result in the <b>destlen</b>-byte buffer at <b>dest</b>. * Return the number of bytes decoded on success, -1 on failure. If @@ -520,8 +487,8 @@ base16_decode(char *dest, size_t destlen, const char *src, size_t srclen) end = src+srclen; while (src<end) { - v1 = hex_decode_digit_(*src); - v2 = hex_decode_digit_(*(src+1)); + v1 = hex_decode_digit(*src); + v2 = hex_decode_digit(*(src+1)); if (v1<0||v2<0) return -1; *(uint8_t*)dest = (v1<<4)|v2; diff --git a/src/common/util_format.h b/src/common/util_format.h index 3580c28637..713fd0d003 100644 --- a/src/common/util_format.h +++ b/src/common/util_format.h @@ -44,9 +44,7 @@ void base32_encode(char *dest, size_t destlen, const char *src, size_t srclen); int base32_decode(char *dest, size_t destlen, const char *src, size_t srclen); size_t base32_encoded_size(size_t srclen); -int hex_decode_digit(char c); void base16_encode(char *dest, size_t destlen, const char *src, size_t srclen); int base16_decode(char *dest, size_t destlen, const char *src, size_t srclen); #endif /* !defined(TOR_UTIL_FORMAT_H) */ - |