Move formatting functions around.

The base64 and base32 functions used to be in crypto.c; crypto_format.h had no header; some general-purpose functions were in crypto_curve25519.c. This patch makes a {crypto,util}_format.[ch], and puts more functions there. Small modules are beautiful!
author: Nick Mathewson <nickm@torproject.org> 2015-07-31 11:21:34 -0400
committer: Nick Mathewson <nickm@torproject.org> 2015-07-31 11:21:34 -0400
commit: 347fe449fe818f97e0f3ba29dd0a08ff6d39081e (patch)
tree: 9397c155e38a7e7084b91c55f1c57f576aa42b25 /src/common/util_format.c
parent: 8c83e8cec0e8d4c29577ae7c7b27637e5b91c99e (diff)
download: tor-347fe449fe818f97e0f3ba29dd0a08ff6d39081e.tar.gz
tor-347fe449fe818f97e0f3ba29dd0a08ff6d39081e.zip
1 files changed, 528 insertions, 0 deletions
diff --git a/src/common/util_format.c b/src/common/util_format.c
new file mode 100644
index 0000000000..dc544a6c2e
--- /dev/null
+++ b/src/common/util_format.c
@@ -0,0 +1,528 @@
+/* Copyright (c) 2001, Matej Pfajfar.
+ * Copyright (c) 2001-2004, Roger Dingledine.
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2015, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+#include "orconfig.h"
+#include "torlog.h"
+#include "util.h"
+#include "util_format.h"
+#include "torint.h"
+
+#include <stddef.h>
+#include <string.h>
+#include <stdlib.h>
+
+/** Implements base32 encoding as in RFC 4648.  Limitation: Requires
+ * that srclen*8 is a multiple of 5.
+ */
+void
+base32_encode(char *dest, size_t destlen, const char *src, size_t srclen)
+{
+  unsigned int i, v, u;
+  size_t nbits = srclen * 8, bit;
+
+  tor_assert(srclen < SIZE_T_CEILING/8);
+  tor_assert((nbits%5) == 0); /* We need an even multiple of 5 bits. */
+  tor_assert((nbits/5)+1 <= destlen); /* We need enough space. */
+  tor_assert(destlen < SIZE_T_CEILING);
+
+  for (i=0,bit=0; bit < nbits; ++i, bit+=5) {
+    /* set v to the 16-bit value starting at src[bits/8], 0-padded. */
+    v = ((uint8_t)src[bit/8]) << 8;
+    if (bit+5<nbits) v += (uint8_t)src[(bit/8)+1];
+    /* set u to the 5-bit value at the bit'th bit of src. */
+    u = (v >> (11-(bit%8))) & 0x1F;
+    dest[i] = BASE32_CHARS[u];
+  }
+  dest[i] = '\0';
+}
+
+/** Implements base32 decoding as in RFC 4648.  Limitation: Requires
+ * that srclen*5 is a multiple of 8. Returns 0 if successful, -1 otherwise.
+ */
+int
+base32_decode(char *dest, size_t destlen, const char *src, size_t srclen)
+{
+  /* XXXX we might want to rewrite this along the lines of base64_decode, if
+   * it ever shows up in the profile. */
+  unsigned int i;
+  size_t nbits, j, bit;
+  char *tmp;
+  nbits = srclen * 5;
+
+  tor_assert(srclen < SIZE_T_CEILING / 5);
+  tor_assert((nbits%8) == 0); /* We need an even multiple of 8 bits. */
+  tor_assert((nbits/8) <= destlen); /* We need enough space. */
+  tor_assert(destlen < SIZE_T_CEILING);
+
+  memset(dest, 0, destlen);
+
+  /* Convert base32 encoded chars to the 5-bit values that they represent. */
+  tmp = tor_malloc_zero(srclen);
+  for (j = 0; j < srclen; ++j) {
+    if (src[j] > 0x60 && src[j] < 0x7B) tmp[j] = src[j] - 0x61;
+    else if (src[j] > 0x31 && src[j] < 0x38) tmp[j] = src[j] - 0x18;
+    else if (src[j] > 0x40 && src[j] < 0x5B) tmp[j] = src[j] - 0x41;
+    else {
+      log_warn(LD_BUG, "illegal character in base32 encoded string");
+      tor_free(tmp);
+      return -1;
+    }
+  }
+
+  /* Assemble result byte-wise by applying five possible cases. */
+  for (i = 0, bit = 0; bit < nbits; ++i, bit += 8) {
+    switch (bit % 40) {
+    case 0:
+      dest[i] = (((uint8_t)tmp[(bit/5)]) << 3) +
+                (((uint8_t)tmp[(bit/5)+1]) >> 2);
+      break;
+    case 8:
+      dest[i] = (((uint8_t)tmp[(bit/5)]) << 6) +
+                (((uint8_t)tmp[(bit/5)+1]) << 1) +
+                (((uint8_t)tmp[(bit/5)+2]) >> 4);
+      break;
+    case 16:
+      dest[i] = (((uint8_t)tmp[(bit/5)]) << 4) +
+                (((uint8_t)tmp[(bit/5)+1]) >> 1);
+      break;
+    case 24:
+      dest[i] = (((uint8_t)tmp[(bit/5)]) << 7) +
+                (((uint8_t)tmp[(bit/5)+1]) << 2) +
+                (((uint8_t)tmp[(bit/5)+2]) >> 3);
+      break;
+    case 32:
+      dest[i] = (((uint8_t)tmp[(bit/5)]) << 5) +
+                ((uint8_t)tmp[(bit/5)+1]);
+      break;
+    }
+  }
+
+  memset(tmp, 0, srclen); /* on the heap, this should be safe */
+  tor_free(tmp);
+  tmp = NULL;
+  return 0;
+}
+
+#define BASE64_OPENSSL_LINELEN 64
+
+/** Return the Base64 encoded size of <b>srclen</b> bytes of data in
+ * bytes.
+ *
+ * If <b>flags</b>&amp;BASE64_ENCODE_MULTILINE is true, return the size
+ * of the encoded output as multiline output (64 character, `\n' terminated
+ * lines).
+ */
+size_t
+base64_encode_size(size_t srclen, int flags)
+{
+  size_t enclen;
+  tor_assert(srclen < INT_MAX);
+
+  if (srclen == 0)
+    return 0;
+
+  enclen = ((srclen - 1) / 3) * 4 + 4;
+  if (flags & BASE64_ENCODE_MULTILINE) {
+    size_t remainder = enclen % BASE64_OPENSSL_LINELEN;
+    enclen += enclen / BASE64_OPENSSL_LINELEN;
+    if (remainder)
+      enclen++;
+  }
+  tor_assert(enclen < INT_MAX && enclen > srclen);
+  return enclen;
+}
+
+/** Internal table mapping 6 bit values to the Base64 alphabet. */
+static const char base64_encode_table[64] = {
+  'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
+  'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
+  'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
+  'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
+  'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
+  'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
+  'w', 'x', 'y', 'z', '0', '1', '2', '3',
+  '4', '5', '6', '7', '8', '9', '+', '/'
+};
+
+/** Base64 encode <b>srclen</b> bytes of data from <b>src</b>.  Write
+ * the result into <b>dest</b>, if it will fit within <b>destlen</b>
+ * bytes. Return the number of bytes written on success; -1 if
+ * destlen is too short, or other failure.
+ *
+ * If <b>flags</b>&amp;BASE64_ENCODE_MULTILINE is true, return encoded
+ * output in multiline format (64 character, `\n' terminated lines).
+ */
+int
+base64_encode(char *dest, size_t destlen, const char *src, size_t srclen,
+              int flags)
+{
+  const unsigned char *usrc = (unsigned char *)src;
+  const unsigned char *eous = usrc + srclen;
+  char *d = dest;
+  uint32_t n = 0;
+  size_t linelen = 0;
+  size_t enclen;
+  int n_idx = 0;
+
+  if (!src || !dest)
+    return -1;
+
+  /* Ensure that there is sufficient space, including the NUL. */
+  enclen = base64_encode_size(srclen, flags);
+  if (destlen < enclen + 1)
+    return -1;
+  if (destlen > SIZE_T_CEILING)
+    return -1;
+  if (enclen > INT_MAX)
+    return -1;
+
+  memset(dest, 0, enclen);
+
+  /* XXX/Yawning: If this ends up being too slow, this can be sped up
+   * by separating the multiline format case and the normal case, and
+   * processing 48 bytes of input at a time when newlines are desired.
+   */
+#define ENCODE_CHAR(ch) \
+  STMT_BEGIN                                                    \
+    *d++ = ch;                                                  \
+    if (flags & BASE64_ENCODE_MULTILINE) {                      \
+      if (++linelen % BASE64_OPENSSL_LINELEN == 0) {            \
+        linelen = 0;                                            \
+        *d++ = '\n';                                            \
+      }                                                         \
+    }                                                           \
+  STMT_END
+
+#define ENCODE_N(idx) \
+  ENCODE_CHAR(base64_encode_table[(n >> ((3 - idx) * 6)) & 0x3f])
+
+#define ENCODE_PAD() ENCODE_CHAR('=')
+
+  /* Iterate over all the bytes in src.  Each one will add 8 bits to the
+   * value we're encoding.  Accumulate bits in <b>n</b>, and whenever we
+   * have 24 bits, batch them into 4 bytes and flush those bytes to dest.
+   */
+  for ( ; usrc < eous; ++usrc) {
+    n = (n << 8) | *usrc;
+    if ((++n_idx) == 3) {
+      ENCODE_N(0);
+      ENCODE_N(1);
+      ENCODE_N(2);
+      ENCODE_N(3);
+      n_idx = 0;
+      n = 0;
+    }
+  }
+  switch (n_idx) {
+  case 0:
+    /* 0 leftover bits, no pading to add. */
+    break;
+  case 1:
+    /* 8 leftover bits, pad to 12 bits, write the 2 6-bit values followed
+     * by 2 padding characters.
+     */
+    n <<= 4;
+    ENCODE_N(2);
+    ENCODE_N(3);
+    ENCODE_PAD();
+    ENCODE_PAD();
+    break;
+  case 2:
+    /* 16 leftover bits, pad to 18 bits, write the 3 6-bit values followed
+     * by 1 padding character.
+     */
+    n <<= 2;
+    ENCODE_N(1);
+    ENCODE_N(2);
+    ENCODE_N(3);
+    ENCODE_PAD();
+    break;
+  default:
+    /* Something went catastrophically wrong. */
+    tor_fragile_assert();
+    return -1;
+  }
+
+#undef ENCODE_N
+#undef ENCODE_PAD
+#undef ENCODE_CHAR
+
+  /* Multiline output always includes at least one newline. */
+  if (flags & BASE64_ENCODE_MULTILINE && linelen != 0)
+    *d++ = '\n';
+
+  tor_assert(d - dest == (ptrdiff_t)enclen);
+
+  *d++ = '\0'; /* NUL terminate the output. */
+
+  return (int) enclen;
+}
+
+/** As base64_encode, but do not add any internal spaces or external padding
+ * to the output stream. */
+int
+base64_encode_nopad(char *dest, size_t destlen,
+                    const uint8_t *src, size_t srclen)
+{
+  int n = base64_encode(dest, destlen, (const char*) src, srclen, 0);
+  if (n <= 0)
+    return n;
+  tor_assert((size_t)n < destlen && dest[n] == 0);
+  char *in, *out;
+  in = out = dest;
+  while (*in) {
+    if (*in == '=' || *in == '\n') {
+      ++in;
+    } else {
+      *out++ = *in++;
+    }
+  }
+  *out = 0;
+
+  tor_assert(out - dest <= INT_MAX);
+
+  return (int)(out - dest);
+}
+
+/** As base64_decode, but do not require any padding on the input */
+int
+base64_decode_nopad(uint8_t *dest, size_t destlen,
+                    const char *src, size_t srclen)
+{
+  if (srclen > SIZE_T_CEILING - 4)
+    return -1;
+  char *buf = tor_malloc(srclen + 4);
+  memcpy(buf, src, srclen+1);
+  size_t buflen;
+  switch (srclen % 4)
+    {
+    case 0:
+    default:
+      buflen = srclen;
+      break;
+    case 1:
+      tor_free(buf);
+      return -1;
+    case 2:
+      memcpy(buf+srclen, "==", 3);
+      buflen = srclen + 2;
+      break;
+    case 3:
+      memcpy(buf+srclen, "=", 2);
+      buflen = srclen + 1;
+      break;
+  }
+  int n = base64_decode((char*)dest, destlen, buf, buflen);
+  tor_free(buf);
+  return n;
+}
+
+#undef BASE64_OPENSSL_LINELEN
+
+/** @{ */
+/** Special values used for the base64_decode_table */
+#define X 255
+#define SP 64
+#define PAD 65
+/** @} */
+/** Internal table mapping byte values to what they represent in base64.
+ * Numbers 0..63 are 6-bit integers.  SPs are spaces, and should be
+ * skipped.  Xs are invalid and must not appear in base64. PAD indicates
+ * end-of-string. */
+static const uint8_t base64_decode_table[256] = {
+  X, X, X, X, X, X, X, X, X, SP, SP, SP, X, SP, X, X, /* */
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  SP, X, X, X, X, X, X, X, X, X, X, 62, X, X, X, 63,
+  52, 53, 54, 55, 56, 57, 58, 59, 60, 61, X, X, X, PAD, X, X,
+  X, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+  15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, X, X, X, X, X,
+  X, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+  41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+};
+
+/** Base64 decode <b>srclen</b> bytes of data from <b>src</b>.  Write
+ * the result into <b>dest</b>, if it will fit within <b>destlen</b>
+ * bytes.  Return the number of bytes written on success; -1 if
+ * destlen is too short, or other failure.
+ *
+ * NOTE 1: destlen is checked conservatively, as though srclen contained no
+ * spaces or padding.
+ *
+ * NOTE 2: This implementation does not check for the correct number of
+ * padding "=" characters at the end of the string, and does not check
+ * for internal padding characters.
+ */
+int
+base64_decode(char *dest, size_t destlen, const char *src, size_t srclen)
+{
+  const char *eos = src+srclen;
+  uint32_t n=0;
+  int n_idx=0;
+  char *dest_orig = dest;
+
+  /* Max number of bits == srclen*6.
+   * Number of bytes required to hold all bits == (srclen*6)/8.
+   * Yes, we want to round down: anything that hangs over the end of a
+   * byte is padding. */
+  if (destlen < (srclen*3)/4)
+    return -1;
+  if (destlen > SIZE_T_CEILING)
+    return -1;
+
+  memset(dest, 0, destlen);
+
+  /* Iterate over all the bytes in src.  Each one will add 0 or 6 bits to the
+   * value we're decoding.  Accumulate bits in <b>n</b>, and whenever we have
+   * 24 bits, batch them into 3 bytes and flush those bytes to dest.
+   */
+  for ( ; src < eos; ++src) {
+    unsigned char c = (unsigned char) *src;
+    uint8_t v = base64_decode_table[c];
+    switch (v) {
+      case X:
+        /* This character isn't allowed in base64. */
+        return -1;
+      case SP:
+        /* This character is whitespace, and has no effect. */
+        continue;
+      case PAD:
+        /* We've hit an = character: the data is over. */
+        goto end_of_loop;
+      default:
+        /* We have an actual 6-bit value.  Append it to the bits in n. */
+        n = (n<<6) | v;
+        if ((++n_idx) == 4) {
+          /* We've accumulated 24 bits in n. Flush them. */
+          *dest++ = (n>>16);
+          *dest++ = (n>>8) & 0xff;
+          *dest++ = (n) & 0xff;
+          n_idx = 0;
+          n = 0;
+        }
+    }
+  }
+ end_of_loop:
+  /* If we have leftover bits, we need to cope. */
+  switch (n_idx) {
+    case 0:
+    default:
+      /* No leftover bits.  We win. */
+      break;
+    case 1:
+      /* 6 leftover bits. That's invalid; we can't form a byte out of that. */
+      return -1;
+    case 2:
+      /* 12 leftover bits: The last 4 are padding and the first 8 are data. */
+      *dest++ = n >> 4;
+      break;
+    case 3:
+      /* 18 leftover bits: The last 2 are padding and the first 16 are data. */
+      *dest++ = n >> 10;
+      *dest++ = n >> 2;
+  }
+
+  tor_assert((dest-dest_orig) <= (ssize_t)destlen);
+  tor_assert((dest-dest_orig) <= INT_MAX);
+
+  return (int)(dest-dest_orig);
+}
+#undef X
+#undef SP
+#undef PAD
+
+/** Encode the <b>srclen</b> bytes at <b>src</b> in a NUL-terminated,
+ * uppercase hexadecimal string; store it in the <b>destlen</b>-byte buffer
+ * <b>dest</b>.
+ */
+void
+base16_encode(char *dest, size_t destlen, const char *src, size_t srclen)
+{
+  const char *end;
+  char *cp;
+
+  tor_assert(destlen >= srclen*2+1);
+  tor_assert(destlen < SIZE_T_CEILING);
+
+  cp = dest;
+  end = src+srclen;
+  while (src<end) {
+    *cp++ = "0123456789ABCDEF"[ (*(const uint8_t*)src) >> 4 ];
+    *cp++ = "0123456789ABCDEF"[ (*(const uint8_t*)src) & 0xf ];
+    ++src;
+  }
+  *cp = '\0';
+}
+
+/** Helper: given a hex digit, return its value, or -1 if it isn't hex. */
+static INLINE int
+hex_decode_digit_(char c)
+{
+  switch (c) {
+    case '0': return 0;
+    case '1': return 1;
+    case '2': return 2;
+    case '3': return 3;
+    case '4': return 4;
+    case '5': return 5;
+    case '6': return 6;
+    case '7': return 7;
+    case '8': return 8;
+    case '9': return 9;
+    case 'A': case 'a': return 10;
+    case 'B': case 'b': return 11;
+    case 'C': case 'c': return 12;
+    case 'D': case 'd': return 13;
+    case 'E': case 'e': return 14;
+    case 'F': case 'f': return 15;
+    default:
+      return -1;
+  }
+}
+
+/** Helper: given a hex digit, return its value, or -1 if it isn't hex. */
+int
+hex_decode_digit(char c)
+{
+  return hex_decode_digit_(c);
+}
+
+/** Given a hexadecimal string of <b>srclen</b> bytes in <b>src</b>, decode it
+ * and store the result in the <b>destlen</b>-byte buffer at <b>dest</b>.
+ * Return 0 on success, -1 on failure. */
+int
+base16_decode(char *dest, size_t destlen, const char *src, size_t srclen)
+{
+  const char *end;
+
+  int v1,v2;
+  if ((srclen % 2) != 0)
+    return -1;
+  if (destlen < srclen/2 || destlen > SIZE_T_CEILING)
+    return -1;
+
+  memset(dest, 0, destlen);
+
+  end = src+srclen;
+  while (src<end) {
+    v1 = hex_decode_digit_(*src);
+    v2 = hex_decode_digit_(*(src+1));
+    if (v1<0||v2<0)
+      return -1;
+    *(uint8_t*)dest = (v1<<4)|v2;
+    ++dest;
+    src+=2;
+  }
+  return 0;
+}
+
author	Nick Mathewson <nickm@torproject.org>	2015-07-31 11:21:34 -0400
committer	Nick Mathewson <nickm@torproject.org>	2015-07-31 11:21:34 -0400
commit	347fe449fe818f97e0f3ba29dd0a08ff6d39081e (patch)
tree	9397c155e38a7e7084b91c55f1c57f576aa42b25 /src/common/util_format.c
parent	8c83e8cec0e8d4c29577ae7c7b27637e5b91c99e (diff)
download	tor-347fe449fe818f97e0f3ba29dd0a08ff6d39081e.tar.gz tor-347fe449fe818f97e0f3ba29dd0a08ff6d39081e.zip