diff options
Diffstat (limited to 'src/lib/string/scanf.c')
-rw-r--r-- | src/lib/string/scanf.c | 312 |
1 files changed, 312 insertions, 0 deletions
diff --git a/src/lib/string/scanf.c b/src/lib/string/scanf.c new file mode 100644 index 0000000000..0c5082799c --- /dev/null +++ b/src/lib/string/scanf.c @@ -0,0 +1,312 @@ +/* Copyright (c) 2003-2004, Roger Dingledine + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#include "lib/string/scanf.h" +#include "lib/string/compat_ctype.h" +#include "lib/cc/torint.h" +#include "lib/err/torerr.h" + +#include <stdlib.h> + +#define MAX_SCANF_WIDTH 9999 + +/** Helper: given an ASCII-encoded decimal digit, return its numeric value. + * NOTE: requires that its input be in-bounds. */ +static int +digit_to_num(char d) +{ + int num = ((int)d) - (int)'0'; + raw_assert(num <= 9 && num >= 0); + return num; +} + +/** Helper: Read an unsigned int from *<b>bufp</b> of up to <b>width</b> + * characters. (Handle arbitrary width if <b>width</b> is less than 0.) On + * success, store the result in <b>out</b>, advance bufp to the next + * character, and return 0. On failure, return -1. */ +static int +scan_unsigned(const char **bufp, unsigned long *out, int width, unsigned base) +{ + unsigned long result = 0; + int scanned_so_far = 0; + const int hex = base==16; + raw_assert(base == 10 || base == 16); + if (!bufp || !*bufp || !out) + return -1; + if (width<0) + width=MAX_SCANF_WIDTH; + + while (**bufp && (hex?TOR_ISXDIGIT(**bufp):TOR_ISDIGIT(**bufp)) + && scanned_so_far < width) { + unsigned digit = hex?hex_decode_digit(*(*bufp)++):digit_to_num(*(*bufp)++); + // Check for overflow beforehand, without actually causing any overflow + // This preserves functionality on compilers that don't wrap overflow + // (i.e. that trap or optimise away overflow) + // result * base + digit > ULONG_MAX + // result * base > ULONG_MAX - digit + if (result > (ULONG_MAX - digit)/base) + return -1; /* Processing this digit would overflow */ + result = result * base + digit; + ++scanned_so_far; + } + + if (!scanned_so_far) /* No actual digits scanned */ + return -1; + + *out = result; + return 0; +} + +/** Helper: Read an signed int from *<b>bufp</b> of up to <b>width</b> + * characters. (Handle arbitrary width if <b>width</b> is less than 0.) On + * success, store the result in <b>out</b>, advance bufp to the next + * character, and return 0. On failure, return -1. */ +static int +scan_signed(const char **bufp, long *out, int width) +{ + int neg = 0; + unsigned long result = 0; + + if (!bufp || !*bufp || !out) + return -1; + if (width<0) + width=MAX_SCANF_WIDTH; + + if (**bufp == '-') { + neg = 1; + ++*bufp; + --width; + } + + if (scan_unsigned(bufp, &result, width, 10) < 0) + return -1; + + if (neg && result > 0) { + if (result > ((unsigned long)LONG_MAX) + 1) + return -1; /* Underflow */ + else if (result == ((unsigned long)LONG_MAX) + 1) + *out = LONG_MIN; + else { + /* We once had a far more clever no-overflow conversion here, but + * some versions of GCC apparently ran it into the ground. Now + * we just check for LONG_MIN explicitly. + */ + *out = -(long)result; + } + } else { + if (result > LONG_MAX) + return -1; /* Overflow */ + *out = (long)result; + } + + return 0; +} + +/** Helper: Read a decimal-formatted double from *<b>bufp</b> of up to + * <b>width</b> characters. (Handle arbitrary width if <b>width</b> is less + * than 0.) On success, store the result in <b>out</b>, advance bufp to the + * next character, and return 0. On failure, return -1. */ +static int +scan_double(const char **bufp, double *out, int width) +{ + int neg = 0; + double result = 0; + int scanned_so_far = 0; + + if (!bufp || !*bufp || !out) + return -1; + if (width<0) + width=MAX_SCANF_WIDTH; + + if (**bufp == '-') { + neg = 1; + ++*bufp; + } + + while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) { + const int digit = digit_to_num(*(*bufp)++); + result = result * 10 + digit; + ++scanned_so_far; + } + if (**bufp == '.') { + double fracval = 0, denominator = 1; + ++*bufp; + ++scanned_so_far; + while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) { + const int digit = digit_to_num(*(*bufp)++); + fracval = fracval * 10 + digit; + denominator *= 10; + ++scanned_so_far; + } + result += fracval / denominator; + } + + if (!scanned_so_far) /* No actual digits scanned */ + return -1; + + *out = neg ? -result : result; + return 0; +} + +/** Helper: copy up to <b>width</b> non-space characters from <b>bufp</b> to + * <b>out</b>. Make sure <b>out</b> is nul-terminated. Advance <b>bufp</b> + * to the next non-space character or the EOS. */ +static int +scan_string(const char **bufp, char *out, int width) +{ + int scanned_so_far = 0; + if (!bufp || !out || width < 0) + return -1; + while (**bufp && ! TOR_ISSPACE(**bufp) && scanned_so_far < width) { + *out++ = *(*bufp)++; + ++scanned_so_far; + } + *out = '\0'; + return 0; +} + +/** Locale-independent, minimal, no-surprises scanf variant, accepting only a + * restricted pattern format. For more info on what it supports, see + * tor_sscanf() documentation. */ +int +tor_vsscanf(const char *buf, const char *pattern, va_list ap) +{ + int n_matched = 0; + + while (*pattern) { + if (*pattern != '%') { + if (*buf == *pattern) { + ++buf; + ++pattern; + continue; + } else { + return n_matched; + } + } else { + int width = -1; + int longmod = 0; + ++pattern; + if (TOR_ISDIGIT(*pattern)) { + width = digit_to_num(*pattern++); + while (TOR_ISDIGIT(*pattern)) { + width *= 10; + width += digit_to_num(*pattern++); + if (width > MAX_SCANF_WIDTH) + return -1; + } + if (!width) /* No zero-width things. */ + return -1; + } + if (*pattern == 'l') { + longmod = 1; + ++pattern; + } + if (*pattern == 'u' || *pattern == 'x') { + unsigned long u; + const int base = (*pattern == 'u') ? 10 : 16; + if (!*buf) + return n_matched; + if (scan_unsigned(&buf, &u, width, base)<0) + return n_matched; + if (longmod) { + unsigned long *out = va_arg(ap, unsigned long *); + *out = u; + } else { + unsigned *out = va_arg(ap, unsigned *); + if (u > UINT_MAX) + return n_matched; + *out = (unsigned) u; + } + ++pattern; + ++n_matched; + } else if (*pattern == 'f') { + double *d = va_arg(ap, double *); + if (!longmod) + return -1; /* float not supported */ + if (!*buf) + return n_matched; + if (scan_double(&buf, d, width)<0) + return n_matched; + ++pattern; + ++n_matched; + } else if (*pattern == 'd') { + long lng=0; + if (scan_signed(&buf, &lng, width)<0) + return n_matched; + if (longmod) { + long *out = va_arg(ap, long *); + *out = lng; + } else { + int *out = va_arg(ap, int *); +#if LONG_MAX > INT_MAX + if (lng < INT_MIN || lng > INT_MAX) + return n_matched; +#endif + *out = (int)lng; + } + ++pattern; + ++n_matched; + } else if (*pattern == 's') { + char *s = va_arg(ap, char *); + if (longmod) + return -1; + if (width < 0) + return -1; + if (scan_string(&buf, s, width)<0) + return n_matched; + ++pattern; + ++n_matched; + } else if (*pattern == 'c') { + char *ch = va_arg(ap, char *); + if (longmod) + return -1; + if (width != -1) + return -1; + if (!*buf) + return n_matched; + *ch = *buf++; + ++pattern; + ++n_matched; + } else if (*pattern == '%') { + if (*buf != '%') + return n_matched; + if (longmod) + return -1; + ++buf; + ++pattern; + } else { + return -1; /* Unrecognized pattern component. */ + } + } + } + + return n_matched; +} + +/** Minimal sscanf replacement: parse <b>buf</b> according to <b>pattern</b> + * and store the results in the corresponding argument fields. Differs from + * sscanf in that: + * <ul><li>It only handles %u, %lu, %x, %lx, %[NUM]s, %d, %ld, %lf, and %c. + * <li>It only handles decimal inputs for %lf. (12.3, not 1.23e1) + * <li>It does not handle arbitrarily long widths. + * <li>Numbers do not consume any space characters. + * <li>It is locale-independent. + * <li>%u and %x do not consume any space. + * <li>It returns -1 on malformed patterns.</ul> + * + * (As with other locale-independent functions, we need this to parse data that + * is in ASCII without worrying that the C library's locale-handling will make + * miscellaneous characters look like numbers, spaces, and so on.) + */ +int +tor_sscanf(const char *buf, const char *pattern, ...) +{ + int r; + va_list ap; + va_start(ap, pattern); + r = tor_vsscanf(buf, pattern, ap); + va_end(ap); + return r; +} |