diff options
author | Nick Mathewson <nickm@torproject.org> | 2018-06-21 11:51:49 -0400 |
---|---|---|
committer | Nick Mathewson <nickm@torproject.org> | 2018-06-21 13:08:25 -0400 |
commit | 2d20cbf837263898ce45c1e0a0f11b9e1126eeba (patch) | |
tree | 3af09a830bb62117ec3ddc9d2ea907f861d289ff /src/lib/compress | |
parent | ad7776f66d3e914a0842c20f31162c0e11befa34 (diff) | |
download | tor-2d20cbf837263898ce45c1e0a0f11b9e1126eeba.tar.gz tor-2d20cbf837263898ce45c1e0a0f11b9e1126eeba.zip |
Extract compression functions into a new library.
Diffstat (limited to 'src/lib/compress')
-rw-r--r-- | src/lib/compress/compress.c | 674 | ||||
-rw-r--r-- | src/lib/compress/compress.h | 93 | ||||
-rw-r--r-- | src/lib/compress/compress_lzma.c | 361 | ||||
-rw-r--r-- | src/lib/compress/compress_lzma.h | 46 | ||||
-rw-r--r-- | src/lib/compress/compress_none.c | 53 | ||||
-rw-r--r-- | src/lib/compress/compress_none.h | 20 | ||||
-rw-r--r-- | src/lib/compress/compress_zlib.c | 304 | ||||
-rw-r--r-- | src/lib/compress/compress_zlib.h | 46 | ||||
-rw-r--r-- | src/lib/compress/compress_zstd.c | 536 | ||||
-rw-r--r-- | src/lib/compress/compress_zstd.h | 53 | ||||
-rw-r--r-- | src/lib/compress/include.am | 25 |
11 files changed, 2211 insertions, 0 deletions
diff --git a/src/lib/compress/compress.c b/src/lib/compress/compress.c new file mode 100644 index 0000000000..44bb891af4 --- /dev/null +++ b/src/lib/compress/compress.c @@ -0,0 +1,674 @@ +/* Copyright (c) 2004, Roger Dingledine. + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file compress.c + * \brief Common compression API. + **/ + +#include "orconfig.h" + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "common/torint.h" + +#ifdef HAVE_NETINET_IN_H +#include <netinet/in.h> +#endif + +#include "common/util.h" +#include "common/torlog.h" +#include "common/compress.h" +#include "common/compress_lzma.h" +#include "common/compress_none.h" +#include "common/compress_zlib.h" +#include "common/compress_zstd.h" + +/** Total number of bytes allocated for compression state overhead. */ +static atomic_counter_t total_compress_allocation; + +/** @{ */ +/* These macros define the maximum allowable compression factor. Anything of + * size greater than CHECK_FOR_COMPRESSION_BOMB_AFTER is not allowed to + * have an uncompression factor (uncompressed size:compressed size ratio) of + * any greater than MAX_UNCOMPRESSION_FACTOR. + * + * Picking a value for MAX_UNCOMPRESSION_FACTOR is a trade-off: we want it to + * be small to limit the attack multiplier, but we also want it to be large + * enough so that no legitimate document --even ones we might invent in the + * future -- ever compresses by a factor of greater than + * MAX_UNCOMPRESSION_FACTOR. Within those parameters, there's a reasonably + * large range of possible values. IMO, anything over 8 is probably safe; IMO + * anything under 50 is probably sufficient. + */ +#define MAX_UNCOMPRESSION_FACTOR 25 +#define CHECK_FOR_COMPRESSION_BOMB_AFTER (1024*64) +/** @} */ + +/** Return true if uncompressing an input of size <b>in_size</b> to an input of + * size at least <b>size_out</b> looks like a compression bomb. */ +MOCK_IMPL(int, +tor_compress_is_compression_bomb,(size_t size_in, size_t size_out)) +{ + if (size_in == 0 || size_out < CHECK_FOR_COMPRESSION_BOMB_AFTER) + return 0; + + return (size_out / size_in > MAX_UNCOMPRESSION_FACTOR); +} + +/** Guess the size that <b>in_len</b> will be after compression or + * decompression. */ +static size_t +guess_compress_size(int compress, compress_method_t method, + compression_level_t compression_level, + size_t in_len) +{ + // ignore these for now. + (void)compression_level; + if (method == NO_METHOD) { + /* Guess that we'll need an extra byte, to avoid a needless realloc + * for nul-termination */ + return (in_len < SIZE_MAX) ? in_len + 1 : in_len; + } + + /* Always guess a factor of 2. */ + if (compress) { + in_len /= 2; + } else { + if (in_len < SIZE_T_CEILING/2) + in_len *= 2; + } + return MAX(in_len, 1024); +} + +/** Internal function to implement tor_compress/tor_uncompress, depending on + * whether <b>compress</b> is set. All arguments are as for tor_compress or + * tor_uncompress. */ +static int +tor_compress_impl(int compress, + char **out, size_t *out_len, + const char *in, size_t in_len, + compress_method_t method, + compression_level_t compression_level, + int complete_only, + int protocol_warn_level) +{ + tor_compress_state_t *stream; + int rv; + + stream = tor_compress_new(compress, method, compression_level); + + if (stream == NULL) { + log_warn(LD_GENERAL, "NULL stream while %scompressing", + compress?"":"de"); + log_debug(LD_GENERAL, "method: %d level: %d at len: %lu", + method, compression_level, (unsigned long)in_len); + return -1; + } + + size_t in_len_orig = in_len; + size_t out_remaining, out_alloc; + char *outptr; + + out_remaining = out_alloc = + guess_compress_size(compress, method, compression_level, in_len); + *out = outptr = tor_malloc(out_remaining); + + const int finish = complete_only || compress; + + while (1) { + switch (tor_compress_process(stream, + &outptr, &out_remaining, + &in, &in_len, finish)) { + case TOR_COMPRESS_DONE: + if (in_len == 0 || compress) { + goto done; + } else { + // More data is present, and we're decompressing. So we may need to + // reinitialize the stream if we are handling multiple concatenated + // inputs. + tor_compress_free(stream); + stream = tor_compress_new(compress, method, compression_level); + if (stream == NULL) { + log_warn(LD_GENERAL, "NULL stream while %scompressing", + compress?"":"de"); + goto err; + } + } + break; + case TOR_COMPRESS_OK: + if (compress || complete_only) { + log_fn(protocol_warn_level, LD_PROTOCOL, + "Unexpected %s while %scompressing", + complete_only?"end of input":"result", + compress?"":"de"); + log_debug(LD_GENERAL, "method: %d level: %d at len: %lu", + method, compression_level, (unsigned long)in_len); + goto err; + } else { + if (in_len == 0) { + goto done; + } + } + break; + case TOR_COMPRESS_BUFFER_FULL: { + if (!compress && outptr < *out+out_alloc) { + // A buffer error in this case means that we have a problem + // with our input. + log_fn(protocol_warn_level, LD_PROTOCOL, + "Possible truncated or corrupt compressed data"); + goto err; + } + if (out_alloc >= SIZE_T_CEILING / 2) { + log_warn(LD_GENERAL, "While %scompressing data: ran out of space.", + compress?"":"un"); + goto err; + } + if (!compress && + tor_compress_is_compression_bomb(in_len_orig, out_alloc)) { + // This should already have been caught down in the backend logic. + // LCOV_EXCL_START + tor_assert_nonfatal_unreached(); + goto err; + // LCOV_EXCL_STOP + } + const size_t offset = outptr - *out; + out_alloc *= 2; + *out = tor_realloc(*out, out_alloc); + outptr = *out + offset; + out_remaining = out_alloc - offset; + break; + } + case TOR_COMPRESS_ERROR: + log_fn(protocol_warn_level, LD_GENERAL, + "Error while %scompressing data: bad input?", + compress?"":"un"); + goto err; // bad data. + + // LCOV_EXCL_START + default: + tor_assert_nonfatal_unreached(); + goto err; + // LCOV_EXCL_STOP + } + } + done: + *out_len = outptr - *out; + if (compress && tor_compress_is_compression_bomb(*out_len, in_len_orig)) { + log_warn(LD_BUG, "We compressed something and got an insanely high " + "compression factor; other Tors would think this was a " + "compression bomb."); + goto err; + } + if (!compress) { + // NUL-terminate our output. + if (out_alloc == *out_len) + *out = tor_realloc(*out, out_alloc + 1); + (*out)[*out_len] = '\0'; + } + rv = 0; + goto out; + + err: + tor_free(*out); + *out_len = 0; + rv = -1; + goto out; + + out: + tor_compress_free(stream); + return rv; +} + +/** Given <b>in_len</b> bytes at <b>in</b>, compress them into a newly + * allocated buffer, using the method described in <b>method</b>. Store the + * compressed string in *<b>out</b>, and its length in *<b>out_len</b>. + * Return 0 on success, -1 on failure. + */ +int +tor_compress(char **out, size_t *out_len, + const char *in, size_t in_len, + compress_method_t method) +{ + return tor_compress_impl(1, out, out_len, in, in_len, method, + BEST_COMPRESSION, + 1, LOG_WARN); +} + +/** Given zero or more compressed strings of total length <b>in_len</b> bytes + * at <b>in</b>, uncompress them into a newly allocated buffer, using the + * method described in <b>method</b>. Store the uncompressed string in + * *<b>out</b>, and its length in *<b>out_len</b>. Return 0 on success, -1 on + * failure. + * + * If any bytes are written to <b>out</b>, an extra byte NUL is always + * written at the end, but not counted in <b>out_len</b>. This is a + * safety feature to ensure that the output can be treated as a + * NUL-terminated string -- though of course, callers should check + * out_len anyway. + * + * If <b>complete_only</b> is true, we consider a truncated input as a + * failure; otherwise we decompress as much as we can. Warn about truncated + * or corrupt inputs at <b>protocol_warn_level</b>. + */ +int +tor_uncompress(char **out, size_t *out_len, + const char *in, size_t in_len, + compress_method_t method, + int complete_only, + int protocol_warn_level) +{ + return tor_compress_impl(0, out, out_len, in, in_len, method, + BEST_COMPRESSION, + complete_only, protocol_warn_level); +} + +/** Try to tell whether the <b>in_len</b>-byte string in <b>in</b> is likely + * to be compressed or not. If it is, return the likeliest compression method. + * Otherwise, return UNKNOWN_METHOD. + */ +compress_method_t +detect_compression_method(const char *in, size_t in_len) +{ + if (in_len > 2 && fast_memeq(in, "\x1f\x8b", 2)) { + return GZIP_METHOD; + } else if (in_len > 2 && (in[0] & 0x0f) == 8 && + (ntohs(get_uint16(in)) % 31) == 0) { + return ZLIB_METHOD; + } else if (in_len > 2 && + fast_memeq(in, "\x5d\x00\x00", 3)) { + return LZMA_METHOD; + } else if (in_len > 3 && + fast_memeq(in, "\x28\xb5\x2f\xfd", 4)) { + return ZSTD_METHOD; + } else { + return UNKNOWN_METHOD; + } +} + +/** Return 1 if a given <b>method</b> is supported; otherwise 0. */ +int +tor_compress_supports_method(compress_method_t method) +{ + switch (method) { + case GZIP_METHOD: + case ZLIB_METHOD: + return tor_zlib_method_supported(); + case LZMA_METHOD: + return tor_lzma_method_supported(); + case ZSTD_METHOD: + return tor_zstd_method_supported(); + case NO_METHOD: + return 1; + case UNKNOWN_METHOD: + default: + return 0; + } +} + +/** + * Return a bitmask of the supported compression types, where 1<<m is + * set in the bitmask if and only if compression with method <b>m</b> is + * supported. + */ +unsigned +tor_compress_get_supported_method_bitmask(void) +{ + static unsigned supported = 0; + if (supported == 0) { + compress_method_t m; + for (m = NO_METHOD; m <= UNKNOWN_METHOD; ++m) { + if (tor_compress_supports_method(m)) { + supported |= (1u << m); + } + } + } + return supported; +} + +/** Table of compression method names. These should have an "x-" prefix, + * if they are not listed in the IANA content coding registry. */ +static const struct { + const char *name; + compress_method_t method; +} compression_method_names[] = { + { "gzip", GZIP_METHOD }, + { "deflate", ZLIB_METHOD }, + // We call this "x-tor-lzma" rather than "x-lzma", because we impose a + // lower maximum memory usage on the decoding side. + { "x-tor-lzma", LZMA_METHOD }, + { "x-zstd" , ZSTD_METHOD }, + { "identity", NO_METHOD }, + + /* Later entries in this table are not canonical; these are recognized but + * not emitted. */ + { "x-gzip", GZIP_METHOD }, +}; + +/** Return the canonical string representation of the compression method + * <b>method</b>, or NULL if the method isn't recognized. */ +const char * +compression_method_get_name(compress_method_t method) +{ + unsigned i; + for (i = 0; i < ARRAY_LENGTH(compression_method_names); ++i) { + if (method == compression_method_names[i].method) + return compression_method_names[i].name; + } + return NULL; +} + +/** Table of compression human readable method names. */ +static const struct { + compress_method_t method; + const char *name; +} compression_method_human_names[] = { + { NO_METHOD, "uncompressed" }, + { GZIP_METHOD, "gzipped" }, + { ZLIB_METHOD, "deflated" }, + { LZMA_METHOD, "LZMA compressed" }, + { ZSTD_METHOD, "Zstandard compressed" }, + { UNKNOWN_METHOD, "unknown encoding" }, +}; + +/** Return a human readable string representation of the compression method + * <b>method</b>, or NULL if the method isn't recognized. */ +const char * +compression_method_get_human_name(compress_method_t method) +{ + unsigned i; + for (i = 0; i < ARRAY_LENGTH(compression_method_human_names); ++i) { + if (method == compression_method_human_names[i].method) + return compression_method_human_names[i].name; + } + return NULL; +} + +/** Return the compression method represented by the string <b>name</b>, or + * UNKNOWN_METHOD if the string isn't recognized. */ +compress_method_t +compression_method_get_by_name(const char *name) +{ + unsigned i; + for (i = 0; i < ARRAY_LENGTH(compression_method_names); ++i) { + if (!strcmp(compression_method_names[i].name, name)) + return compression_method_names[i].method; + } + return UNKNOWN_METHOD; +} + +/** Return a string representation of the version of the library providing the + * compression method given in <b>method</b>. Returns NULL if <b>method</b> is + * unknown or unsupported. */ +const char * +tor_compress_version_str(compress_method_t method) +{ + switch (method) { + case GZIP_METHOD: + case ZLIB_METHOD: + return tor_zlib_get_version_str(); + case LZMA_METHOD: + return tor_lzma_get_version_str(); + case ZSTD_METHOD: + return tor_zstd_get_version_str(); + case NO_METHOD: + case UNKNOWN_METHOD: + default: + return NULL; + } +} + +/** Return a string representation of the version of the library, found at + * compile time, providing the compression method given in <b>method</b>. + * Returns NULL if <b>method</b> is unknown or unsupported. */ +const char * +tor_compress_header_version_str(compress_method_t method) +{ + switch (method) { + case GZIP_METHOD: + case ZLIB_METHOD: + return tor_zlib_get_header_version_str(); + case LZMA_METHOD: + return tor_lzma_get_header_version_str(); + case ZSTD_METHOD: + return tor_zstd_get_header_version_str(); + case NO_METHOD: + case UNKNOWN_METHOD: + default: + return NULL; + } +} + +/** Return the approximate number of bytes allocated for all + * supported compression schemas. */ +size_t +tor_compress_get_total_allocation(void) +{ + return atomic_counter_get(&total_compress_allocation) + + tor_zlib_get_total_allocation() + + tor_lzma_get_total_allocation() + + tor_zstd_get_total_allocation(); +} + +/** Internal state for an incremental compression/decompression. The body of + * this struct is not exposed. */ +struct tor_compress_state_t { + compress_method_t method; /**< The compression method. */ + + union { + tor_zlib_compress_state_t *zlib_state; + tor_lzma_compress_state_t *lzma_state; + tor_zstd_compress_state_t *zstd_state; + } u; /**< Compression backend state. */ +}; + +/** Construct and return a tor_compress_state_t object using <b>method</b>. If + * <b>compress</b>, it's for compression; otherwise it's for decompression. */ +tor_compress_state_t * +tor_compress_new(int compress, compress_method_t method, + compression_level_t compression_level) +{ + tor_compress_state_t *state; + + state = tor_malloc_zero(sizeof(tor_compress_state_t)); + state->method = method; + + switch (method) { + case GZIP_METHOD: + case ZLIB_METHOD: { + tor_zlib_compress_state_t *zlib_state = + tor_zlib_compress_new(compress, method, compression_level); + + if (zlib_state == NULL) + goto err; + + state->u.zlib_state = zlib_state; + break; + } + case LZMA_METHOD: { + tor_lzma_compress_state_t *lzma_state = + tor_lzma_compress_new(compress, method, compression_level); + + if (lzma_state == NULL) + goto err; + + state->u.lzma_state = lzma_state; + break; + } + case ZSTD_METHOD: { + tor_zstd_compress_state_t *zstd_state = + tor_zstd_compress_new(compress, method, compression_level); + + if (zstd_state == NULL) + goto err; + + state->u.zstd_state = zstd_state; + break; + } + case NO_METHOD: { + break; + } + case UNKNOWN_METHOD: + goto err; + } + + atomic_counter_add(&total_compress_allocation, + sizeof(tor_compress_state_t)); + return state; + + err: + tor_free(state); + return NULL; +} + +/** Compress/decompress some bytes using <b>state</b>. Read up to + * *<b>in_len</b> bytes from *<b>in</b>, and write up to *<b>out_len</b> bytes + * to *<b>out</b>, adjusting the values as we go. If <b>finish</b> is true, + * we've reached the end of the input. + * + * Return TOR_COMPRESS_DONE if we've finished the entire + * compression/decompression. + * Return TOR_COMPRESS_OK if we're processed everything from the input. + * Return TOR_COMPRESS_BUFFER_FULL if we're out of space on <b>out</b>. + * Return TOR_COMPRESS_ERROR if the stream is corrupt. + */ +tor_compress_output_t +tor_compress_process(tor_compress_state_t *state, + char **out, size_t *out_len, + const char **in, size_t *in_len, + int finish) +{ + tor_assert(state != NULL); + const size_t in_len_orig = *in_len; + const size_t out_len_orig = *out_len; + tor_compress_output_t rv; + + if (*out_len == 0 && (*in_len > 0 || finish)) { + // If we still have input data, but no space for output data, we might as + // well return early and let the caller do the reallocation of the out + // variable. + return TOR_COMPRESS_BUFFER_FULL; + } + + switch (state->method) { + case GZIP_METHOD: + case ZLIB_METHOD: + rv = tor_zlib_compress_process(state->u.zlib_state, + out, out_len, in, in_len, + finish); + break; + case LZMA_METHOD: + rv = tor_lzma_compress_process(state->u.lzma_state, + out, out_len, in, in_len, + finish); + break; + case ZSTD_METHOD: + rv = tor_zstd_compress_process(state->u.zstd_state, + out, out_len, in, in_len, + finish); + break; + case NO_METHOD: + rv = tor_cnone_compress_process(out, out_len, in, in_len, + finish); + break; + default: + case UNKNOWN_METHOD: + goto err; + } + if (BUG((rv == TOR_COMPRESS_OK) && + *in_len == in_len_orig && + *out_len == out_len_orig)) { + log_warn(LD_GENERAL, + "More info on the bug: method == %s, finish == %d, " + " *in_len == in_len_orig == %lu, " + "*out_len == out_len_orig == %lu", + compression_method_get_human_name(state->method), finish, + (unsigned long)in_len_orig, (unsigned long)out_len_orig); + return TOR_COMPRESS_ERROR; + } + + return rv; + err: + return TOR_COMPRESS_ERROR; +} + +/** Deallocate <b>state</b>. */ +void +tor_compress_free_(tor_compress_state_t *state) +{ + if (state == NULL) + return; + + switch (state->method) { + case GZIP_METHOD: + case ZLIB_METHOD: + tor_zlib_compress_free(state->u.zlib_state); + break; + case LZMA_METHOD: + tor_lzma_compress_free(state->u.lzma_state); + break; + case ZSTD_METHOD: + tor_zstd_compress_free(state->u.zstd_state); + break; + case NO_METHOD: + break; + case UNKNOWN_METHOD: + break; + } + + atomic_counter_sub(&total_compress_allocation, + sizeof(tor_compress_state_t)); + tor_free(state); +} + +/** Return the approximate number of bytes allocated for <b>state</b>. */ +size_t +tor_compress_state_size(const tor_compress_state_t *state) +{ + tor_assert(state != NULL); + + size_t size = sizeof(tor_compress_state_t); + + switch (state->method) { + case GZIP_METHOD: + case ZLIB_METHOD: + size += tor_zlib_compress_state_size(state->u.zlib_state); + break; + case LZMA_METHOD: + size += tor_lzma_compress_state_size(state->u.lzma_state); + break; + case ZSTD_METHOD: + size += tor_zstd_compress_state_size(state->u.zstd_state); + break; + case NO_METHOD: + case UNKNOWN_METHOD: + break; + } + + return size; +} + +/** Initialize all compression modules. */ +void +tor_compress_init(void) +{ + atomic_counter_init(&total_compress_allocation); + + tor_zlib_init(); + tor_lzma_init(); + tor_zstd_init(); +} + +/** Warn if we had any problems while setting up our compression libraries. + * + * (This isn't part of tor_compress_init, since the logs aren't set up yet.) + */ +void +tor_compress_log_init_warnings(void) +{ + tor_zstd_warn_if_version_mismatched(); +} + diff --git a/src/lib/compress/compress.h b/src/lib/compress/compress.h new file mode 100644 index 0000000000..10ad6d86bc --- /dev/null +++ b/src/lib/compress/compress.h @@ -0,0 +1,93 @@ +/* Copyright (c) 2003, Roger Dingledine + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file compress.h + * \brief Headers for compress.c + **/ + +#ifndef TOR_COMPRESS_H +#define TOR_COMPRESS_H + +/** Enumeration of what kind of compression to use. Only ZLIB_METHOD and + * GZIP_METHOD is guaranteed to be supported by the compress/uncompress + * functions here. Call tor_compress_supports_method() to check if a given + * compression schema is supported by Tor. */ +typedef enum { + NO_METHOD=0, // This method must be first. + GZIP_METHOD=1, + ZLIB_METHOD=2, + LZMA_METHOD=3, + ZSTD_METHOD=4, + UNKNOWN_METHOD=5, // This method must be last. Add new ones in the middle. +} compress_method_t; + +/** + * Enumeration to define tradeoffs between memory usage and compression level. + * BEST_COMPRESSION saves the most bandwidth; LOW_COMPRESSION saves the most + * memory. + **/ +typedef enum { + BEST_COMPRESSION, HIGH_COMPRESSION, MEDIUM_COMPRESSION, LOW_COMPRESSION +} compression_level_t; + +int tor_compress(char **out, size_t *out_len, + const char *in, size_t in_len, + compress_method_t method); + +int tor_uncompress(char **out, size_t *out_len, + const char *in, size_t in_len, + compress_method_t method, + int complete_only, + int protocol_warn_level); + +compress_method_t detect_compression_method(const char *in, size_t in_len); + +MOCK_DECL(int,tor_compress_is_compression_bomb,(size_t size_in, + size_t size_out)); + +int tor_compress_supports_method(compress_method_t method); +unsigned tor_compress_get_supported_method_bitmask(void); +const char *compression_method_get_name(compress_method_t method); +const char *compression_method_get_human_name(compress_method_t method); +compress_method_t compression_method_get_by_name(const char *name); + +const char *tor_compress_version_str(compress_method_t method); + +const char *tor_compress_header_version_str(compress_method_t method); + +size_t tor_compress_get_total_allocation(void); + +/** Return values from tor_compress_process; see that function's documentation + * for details. */ +typedef enum { + TOR_COMPRESS_OK, + TOR_COMPRESS_DONE, + TOR_COMPRESS_BUFFER_FULL, + TOR_COMPRESS_ERROR +} tor_compress_output_t; + +/** Internal state for an incremental compression/decompression. */ +typedef struct tor_compress_state_t tor_compress_state_t; + +tor_compress_state_t *tor_compress_new(int compress, + compress_method_t method, + compression_level_t level); + +tor_compress_output_t tor_compress_process(tor_compress_state_t *state, + char **out, size_t *out_len, + const char **in, size_t *in_len, + int finish); +void tor_compress_free_(tor_compress_state_t *state); +#define tor_compress_free(st) \ + FREE_AND_NULL(tor_compress_state_t, tor_compress_free_, (st)) + +size_t tor_compress_state_size(const tor_compress_state_t *state); + +void tor_compress_init(void); +void tor_compress_log_init_warnings(void); + +#endif /* !defined(TOR_COMPRESS_H) */ + diff --git a/src/lib/compress/compress_lzma.c b/src/lib/compress/compress_lzma.c new file mode 100644 index 0000000000..363dc1fb92 --- /dev/null +++ b/src/lib/compress/compress_lzma.c @@ -0,0 +1,361 @@ +/* Copyright (c) 2004, Roger Dingledine. + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file compress_lzma.c + * \brief Compression backend for LZMA. + * + * This module should never be invoked directly. Use the compress module + * instead. + **/ + +#include "orconfig.h" + +#include "common/util.h" +#include "common/torlog.h" +#include "common/compress.h" +#include "common/compress_lzma.h" + +#ifdef HAVE_LZMA +#include <lzma.h> +#endif + +/** The maximum amount of memory we allow the LZMA decoder to use, in bytes. */ +#define MEMORY_LIMIT (16 * 1024 * 1024) + +/** Total number of bytes allocated for LZMA state. */ +static atomic_counter_t total_lzma_allocation; + +#ifdef HAVE_LZMA +/** Given <b>level</b> return the memory level. */ +static int +memory_level(compression_level_t level) +{ + switch (level) { + default: + case BEST_COMPRESSION: + case HIGH_COMPRESSION: return 6; + case MEDIUM_COMPRESSION: return 4; + case LOW_COMPRESSION: return 2; + } +} + +/** Convert a given <b>error</b> to a human readable error string. */ +static const char * +lzma_error_str(lzma_ret error) +{ + switch (error) { + case LZMA_OK: + return "Operation completed successfully"; + case LZMA_STREAM_END: + return "End of stream"; + case LZMA_NO_CHECK: + return "Input stream lacks integrity check"; + case LZMA_UNSUPPORTED_CHECK: + return "Unable to calculate integrity check"; + case LZMA_GET_CHECK: + return "Integrity check available"; + case LZMA_MEM_ERROR: + return "Unable to allocate memory"; + case LZMA_MEMLIMIT_ERROR: + return "Memory limit reached"; + case LZMA_FORMAT_ERROR: + return "Unknown file format"; + case LZMA_OPTIONS_ERROR: + return "Unsupported options"; + case LZMA_DATA_ERROR: + return "Corrupt input data"; + case LZMA_BUF_ERROR: + return "Unable to progress"; + case LZMA_PROG_ERROR: + return "Programming error"; + default: + return "Unknown LZMA error"; + } +} +#endif /* defined(HAVE_LZMA) */ + +/** Return 1 if LZMA compression is supported; otherwise 0. */ +int +tor_lzma_method_supported(void) +{ +#ifdef HAVE_LZMA + return 1; +#else + return 0; +#endif +} + +/** Return a string representation of the version of the currently running + * version of liblzma. Returns NULL if LZMA is unsupported. */ +const char * +tor_lzma_get_version_str(void) +{ +#ifdef HAVE_LZMA + return lzma_version_string(); +#else + return NULL; +#endif +} + +/** Return a string representation of the version of liblzma used at + * compilation time. Returns NULL if LZMA is unsupported. */ +const char * +tor_lzma_get_header_version_str(void) +{ +#ifdef HAVE_LZMA + return LZMA_VERSION_STRING; +#else + return NULL; +#endif +} + +/** Internal LZMA state for incremental compression/decompression. + * The body of this struct is not exposed. */ +struct tor_lzma_compress_state_t { +#ifdef HAVE_LZMA + lzma_stream stream; /**< The LZMA stream. */ +#endif + + int compress; /**< True if we are compressing; false if we are inflating */ + + /** Number of bytes read so far. Used to detect compression bombs. */ + size_t input_so_far; + /** Number of bytes written so far. Used to detect compression bombs. */ + size_t output_so_far; + + /** Approximate number of bytes allocated for this object. */ + size_t allocation; +}; + +#ifdef HAVE_LZMA +/** Return an approximate number of bytes stored in memory to hold the LZMA + * encoder/decoder state. */ +static size_t +tor_lzma_state_size_precalc(int compress, compression_level_t level) +{ + uint64_t memory_usage; + + if (compress) + memory_usage = lzma_easy_encoder_memusage(memory_level(level)); + else + memory_usage = lzma_easy_decoder_memusage(memory_level(level)); + + if (memory_usage == UINT64_MAX) { + // LCOV_EXCL_START + log_warn(LD_GENERAL, "Unsupported compression level passed to LZMA %s", + compress ? "encoder" : "decoder"); + goto err; + // LCOV_EXCL_STOP + } + + if (memory_usage + sizeof(tor_lzma_compress_state_t) > SIZE_MAX) + memory_usage = SIZE_MAX; + else + memory_usage += sizeof(tor_lzma_compress_state_t); + + return (size_t)memory_usage; + + // LCOV_EXCL_START + err: + return 0; + // LCOV_EXCL_STOP +} +#endif /* defined(HAVE_LZMA) */ + +/** Construct and return a tor_lzma_compress_state_t object using + * <b>method</b>. If <b>compress</b>, it's for compression; otherwise it's for + * decompression. */ +tor_lzma_compress_state_t * +tor_lzma_compress_new(int compress, + compress_method_t method, + compression_level_t level) +{ + tor_assert(method == LZMA_METHOD); + +#ifdef HAVE_LZMA + tor_lzma_compress_state_t *result; + lzma_ret retval; + lzma_options_lzma stream_options; + + // Note that we do not explicitly initialize the lzma_stream object here, + // since the LZMA_STREAM_INIT "just" initializes all members to 0, which is + // also what `tor_malloc_zero()` does. + result = tor_malloc_zero(sizeof(tor_lzma_compress_state_t)); + result->compress = compress; + result->allocation = tor_lzma_state_size_precalc(compress, level); + + if (compress) { + lzma_lzma_preset(&stream_options, memory_level(level)); + + retval = lzma_alone_encoder(&result->stream, &stream_options); + + if (retval != LZMA_OK) { + // LCOV_EXCL_START + log_warn(LD_GENERAL, "Error from LZMA encoder: %s (%u).", + lzma_error_str(retval), retval); + goto err; + // LCOV_EXCL_STOP + } + } else { + retval = lzma_alone_decoder(&result->stream, MEMORY_LIMIT); + + if (retval != LZMA_OK) { + // LCOV_EXCL_START + log_warn(LD_GENERAL, "Error from LZMA decoder: %s (%u).", + lzma_error_str(retval), retval); + goto err; + // LCOV_EXCL_STOP + } + } + + atomic_counter_add(&total_lzma_allocation, result->allocation); + return result; + + /* LCOV_EXCL_START */ + err: + tor_free(result); + return NULL; + /* LCOV_EXCL_STOP */ +#else /* !(defined(HAVE_LZMA)) */ + (void)compress; + (void)method; + (void)level; + + return NULL; +#endif /* defined(HAVE_LZMA) */ +} + +/** Compress/decompress some bytes using <b>state</b>. Read up to + * *<b>in_len</b> bytes from *<b>in</b>, and write up to *<b>out_len</b> bytes + * to *<b>out</b>, adjusting the values as we go. If <b>finish</b> is true, + * we've reached the end of the input. + * + * Return TOR_COMPRESS_DONE if we've finished the entire + * compression/decompression. + * Return TOR_COMPRESS_OK if we're processed everything from the input. + * Return TOR_COMPRESS_BUFFER_FULL if we're out of space on <b>out</b>. + * Return TOR_COMPRESS_ERROR if the stream is corrupt. + */ +tor_compress_output_t +tor_lzma_compress_process(tor_lzma_compress_state_t *state, + char **out, size_t *out_len, + const char **in, size_t *in_len, + int finish) +{ +#ifdef HAVE_LZMA + lzma_ret retval; + lzma_action action; + + tor_assert(state != NULL); + tor_assert(*in_len <= UINT_MAX); + tor_assert(*out_len <= UINT_MAX); + + state->stream.next_in = (unsigned char *)*in; + state->stream.avail_in = *in_len; + state->stream.next_out = (unsigned char *)*out; + state->stream.avail_out = *out_len; + + action = finish ? LZMA_FINISH : LZMA_RUN; + + retval = lzma_code(&state->stream, action); + + state->input_so_far += state->stream.next_in - ((unsigned char *)*in); + state->output_so_far += state->stream.next_out - ((unsigned char *)*out); + + *out = (char *)state->stream.next_out; + *out_len = state->stream.avail_out; + *in = (const char *)state->stream.next_in; + *in_len = state->stream.avail_in; + + if (! state->compress && + tor_compress_is_compression_bomb(state->input_so_far, + state->output_so_far)) { + log_warn(LD_DIR, "Possible compression bomb; abandoning stream."); + return TOR_COMPRESS_ERROR; + } + + switch (retval) { + case LZMA_OK: + if (state->stream.avail_out == 0 || finish) + return TOR_COMPRESS_BUFFER_FULL; + + return TOR_COMPRESS_OK; + + case LZMA_BUF_ERROR: + if (state->stream.avail_in == 0 && !finish) + return TOR_COMPRESS_OK; + + return TOR_COMPRESS_BUFFER_FULL; + + case LZMA_STREAM_END: + return TOR_COMPRESS_DONE; + + // We list all the possible values of `lzma_ret` here to silence the + // `switch-enum` warning and to detect if a new member was added. + case LZMA_NO_CHECK: + case LZMA_UNSUPPORTED_CHECK: + case LZMA_GET_CHECK: + case LZMA_MEM_ERROR: + case LZMA_MEMLIMIT_ERROR: + case LZMA_FORMAT_ERROR: + case LZMA_OPTIONS_ERROR: + case LZMA_DATA_ERROR: + case LZMA_PROG_ERROR: + default: + log_warn(LD_GENERAL, "LZMA %s didn't finish: %s.", + state->compress ? "compression" : "decompression", + lzma_error_str(retval)); + return TOR_COMPRESS_ERROR; + } +#else /* !(defined(HAVE_LZMA)) */ + (void)state; + (void)out; + (void)out_len; + (void)in; + (void)in_len; + (void)finish; + return TOR_COMPRESS_ERROR; +#endif /* defined(HAVE_LZMA) */ +} + +/** Deallocate <b>state</b>. */ +void +tor_lzma_compress_free_(tor_lzma_compress_state_t *state) +{ + if (state == NULL) + return; + + atomic_counter_sub(&total_lzma_allocation, state->allocation); + +#ifdef HAVE_LZMA + lzma_end(&state->stream); +#endif + + tor_free(state); +} + +/** Return the approximate number of bytes allocated for <b>state</b>. */ +size_t +tor_lzma_compress_state_size(const tor_lzma_compress_state_t *state) +{ + tor_assert(state != NULL); + return state->allocation; +} + +/** Return the approximate number of bytes allocated for all LZMA states. */ +size_t +tor_lzma_get_total_allocation(void) +{ + return atomic_counter_get(&total_lzma_allocation); +} + +/** Initialize the lzma module */ +void +tor_lzma_init(void) +{ + atomic_counter_init(&total_lzma_allocation); +} + diff --git a/src/lib/compress/compress_lzma.h b/src/lib/compress/compress_lzma.h new file mode 100644 index 0000000000..9ef3382a25 --- /dev/null +++ b/src/lib/compress/compress_lzma.h @@ -0,0 +1,46 @@ +/* Copyright (c) 2003, Roger Dingledine + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file compress_lzma.h + * \brief Header for compress_lzma.c + **/ + +#ifndef TOR_COMPRESS_LZMA_H +#define TOR_COMPRESS_LZMA_H + +int tor_lzma_method_supported(void); + +const char *tor_lzma_get_version_str(void); + +const char *tor_lzma_get_header_version_str(void); + +/** Internal state for an incremental LZMA compression/decompression. */ +typedef struct tor_lzma_compress_state_t tor_lzma_compress_state_t; + +tor_lzma_compress_state_t * +tor_lzma_compress_new(int compress, + compress_method_t method, + compression_level_t compression_level); + +tor_compress_output_t +tor_lzma_compress_process(tor_lzma_compress_state_t *state, + char **out, size_t *out_len, + const char **in, size_t *in_len, + int finish); + +void tor_lzma_compress_free_(tor_lzma_compress_state_t *state); +#define tor_lzma_compress_free(st) \ + FREE_AND_NULL(tor_lzma_compress_state_t, \ + tor_lzma_compress_free_, (st)) + +size_t tor_lzma_compress_state_size(const tor_lzma_compress_state_t *state); + +size_t tor_lzma_get_total_allocation(void); + +void tor_lzma_init(void); + +#endif /* !defined(TOR_COMPRESS_LZMA_H) */ + diff --git a/src/lib/compress/compress_none.c b/src/lib/compress/compress_none.c new file mode 100644 index 0000000000..7e67046d34 --- /dev/null +++ b/src/lib/compress/compress_none.c @@ -0,0 +1,53 @@ +/* Copyright (c) 2004, Roger Dingledine. + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file compress_none.c + * \brief Compression backend for identity compression. + * + * We actually define this backend so that we can treat the identity transform + * as another case of compression. + * + * This module should never be invoked directly. Use the compress module + * instead. + **/ + +#include "orconfig.h" + +#include "common/util.h" +#include "common/torlog.h" +#include "common/compress.h" +#include "common/compress_none.h" + +/** Transfer some bytes using the identity transformation. Read up to + * *<b>in_len</b> bytes from *<b>in</b>, and write up to *<b>out_len</b> bytes + * to *<b>out</b>, adjusting the values as we go. If <b>finish</b> is true, + * we've reached the end of the input. + * + * Return TOR_COMPRESS_DONE if we've finished the entire + * compression/decompression. + * Return TOR_COMPRESS_OK if we're processed everything from the input. + * Return TOR_COMPRESS_BUFFER_FULL if we're out of space on <b>out</b>. + * Return TOR_COMPRESS_ERROR if the stream is corrupt. + */ +tor_compress_output_t +tor_cnone_compress_process(char **out, size_t *out_len, + const char **in, size_t *in_len, + int finish) +{ + size_t n_to_copy = MIN(*in_len, *out_len); + + memcpy(*out, *in, n_to_copy); + *out += n_to_copy; + *in += n_to_copy; + *out_len -= n_to_copy; + *in_len -= n_to_copy; + if (*in_len == 0) { + return finish ? TOR_COMPRESS_DONE : TOR_COMPRESS_OK; + } else { + return TOR_COMPRESS_BUFFER_FULL; + } +} + diff --git a/src/lib/compress/compress_none.h b/src/lib/compress/compress_none.h new file mode 100644 index 0000000000..5c395bbb30 --- /dev/null +++ b/src/lib/compress/compress_none.h @@ -0,0 +1,20 @@ +/* Copyright (c) 2003, Roger Dingledine + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file compress_none.h + * \brief Header for compress_none.c + **/ + +#ifndef TOR_COMPRESS_NONE_H +#define TOR_COMPRESS_NONE_H + +tor_compress_output_t +tor_cnone_compress_process(char **out, size_t *out_len, + const char **in, size_t *in_len, + int finish); + +#endif /* !defined(TOR_COMPRESS_NONE_H) */ + diff --git a/src/lib/compress/compress_zlib.c b/src/lib/compress/compress_zlib.c new file mode 100644 index 0000000000..663e27f794 --- /dev/null +++ b/src/lib/compress/compress_zlib.c @@ -0,0 +1,304 @@ +/* Copyright (c) 2004, Roger Dingledine. + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file compress_zlib.c + * \brief Compression backend for gzip and zlib. + * + * This module should never be invoked directly. Use the compress module + * instead. + **/ + +#include "orconfig.h" + +#include "common/util.h" +#include "common/torlog.h" +#include "common/compress.h" +#include "common/compress_zlib.h" + +/* zlib 1.2.4 and 1.2.5 do some "clever" things with macros. Instead of + saying "(defined(FOO) ? FOO : 0)" they like to say "FOO-0", on the theory + that nobody will care if the compile outputs a no-such-identifier warning. + + Sorry, but we like -Werror over here, so I guess we need to define these. + I hope that zlib 1.2.6 doesn't break these too. +*/ +#ifndef _LARGEFILE64_SOURCE +#define _LARGEFILE64_SOURCE 0 +#endif +#ifndef _LFS64_LARGEFILE +#define _LFS64_LARGEFILE 0 +#endif +#ifndef _FILE_OFFSET_BITS +#define _FILE_OFFSET_BITS 0 +#endif +#ifndef off64_t +#define off64_t int64_t +#endif + +#include <zlib.h> + +#if defined ZLIB_VERNUM && ZLIB_VERNUM < 0x1200 +#error "We require zlib version 1.2 or later." +#endif + +static size_t tor_zlib_state_size_precalc(int inflate, + int windowbits, int memlevel); + +/** Total number of bytes allocated for zlib state */ +static atomic_counter_t total_zlib_allocation; + +/** Given <b>level</b> return the memory level. */ +static int +memory_level(compression_level_t level) +{ + switch (level) { + default: + case BEST_COMPRESSION: return 9; + case HIGH_COMPRESSION: return 8; + case MEDIUM_COMPRESSION: return 7; + case LOW_COMPRESSION: return 6; + } +} + +/** Return the 'bits' value to tell zlib to use <b>method</b>.*/ +static inline int +method_bits(compress_method_t method, compression_level_t level) +{ + /* Bits+16 means "use gzip" in zlib >= 1.2 */ + const int flag = method == GZIP_METHOD ? 16 : 0; + switch (level) { + default: + case BEST_COMPRESSION: + case HIGH_COMPRESSION: return flag + 15; + case MEDIUM_COMPRESSION: return flag + 13; + case LOW_COMPRESSION: return flag + 11; + } +} + +/** Return 1 if zlib/gzip compression is supported; otherwise 0. */ +int +tor_zlib_method_supported(void) +{ + /* We currently always support zlib/gzip, but we keep this function around in + * case we some day decide to deprecate zlib/gzip support. + */ + return 1; +} + +/** Return a string representation of the version of the currently running + * version of zlib. */ +const char * +tor_zlib_get_version_str(void) +{ + return zlibVersion(); +} + +/** Return a string representation of the version of the version of zlib +* used at compilation. */ +const char * +tor_zlib_get_header_version_str(void) +{ + return ZLIB_VERSION; +} + +/** Internal zlib state for an incremental compression/decompression. + * The body of this struct is not exposed. */ +struct tor_zlib_compress_state_t { + struct z_stream_s stream; /**< The zlib stream */ + int compress; /**< True if we are compressing; false if we are inflating */ + + /** Number of bytes read so far. Used to detect zlib bombs. */ + size_t input_so_far; + /** Number of bytes written so far. Used to detect zlib bombs. */ + size_t output_so_far; + + /** Approximate number of bytes allocated for this object. */ + size_t allocation; +}; + +/** Return an approximate number of bytes used in RAM to hold a state with + * window bits <b>windowBits</b> and compression level 'memlevel' */ +static size_t +tor_zlib_state_size_precalc(int inflate_, int windowbits, int memlevel) +{ + windowbits &= 15; + +#define A_FEW_KILOBYTES 2048 + + if (inflate_) { + /* From zconf.h: + + "The memory requirements for inflate are (in bytes) 1 << windowBits + that is, 32K for windowBits=15 (default value) plus a few kilobytes + for small objects." + */ + return sizeof(tor_zlib_compress_state_t) + sizeof(struct z_stream_s) + + (1 << 15) + A_FEW_KILOBYTES; + } else { + /* Also from zconf.h: + + "The memory requirements for deflate are (in bytes): + (1 << (windowBits+2)) + (1 << (memLevel+9)) + ... plus a few kilobytes for small objects." + */ + return sizeof(tor_zlib_compress_state_t) + sizeof(struct z_stream_s) + + (1 << (windowbits + 2)) + (1 << (memlevel + 9)) + A_FEW_KILOBYTES; + } +#undef A_FEW_KILOBYTES +} + +/** Construct and return a tor_zlib_compress_state_t object using + * <b>method</b>. If <b>compress</b>, it's for compression; otherwise it's for + * decompression. */ +tor_zlib_compress_state_t * +tor_zlib_compress_new(int compress_, + compress_method_t method, + compression_level_t compression_level) +{ + tor_zlib_compress_state_t *out; + int bits, memlevel; + + if (! compress_) { + /* use this setting for decompression, since we might have the + * max number of window bits */ + compression_level = BEST_COMPRESSION; + } + + out = tor_malloc_zero(sizeof(tor_zlib_compress_state_t)); + out->stream.zalloc = Z_NULL; + out->stream.zfree = Z_NULL; + out->stream.opaque = NULL; + out->compress = compress_; + bits = method_bits(method, compression_level); + memlevel = memory_level(compression_level); + if (compress_) { + if (deflateInit2(&out->stream, Z_BEST_COMPRESSION, Z_DEFLATED, + bits, memlevel, + Z_DEFAULT_STRATEGY) != Z_OK) + goto err; // LCOV_EXCL_LINE + } else { + if (inflateInit2(&out->stream, bits) != Z_OK) + goto err; // LCOV_EXCL_LINE + } + out->allocation = tor_zlib_state_size_precalc(!compress_, bits, memlevel); + + atomic_counter_add(&total_zlib_allocation, out->allocation); + + return out; + + err: + tor_free(out); + return NULL; +} + +/** Compress/decompress some bytes using <b>state</b>. Read up to + * *<b>in_len</b> bytes from *<b>in</b>, and write up to *<b>out_len</b> bytes + * to *<b>out</b>, adjusting the values as we go. If <b>finish</b> is true, + * we've reached the end of the input. + * + * Return TOR_COMPRESS_DONE if we've finished the entire + * compression/decompression. + * Return TOR_COMPRESS_OK if we're processed everything from the input. + * Return TOR_COMPRESS_BUFFER_FULL if we're out of space on <b>out</b>. + * Return TOR_COMPRESS_ERROR if the stream is corrupt. + */ +tor_compress_output_t +tor_zlib_compress_process(tor_zlib_compress_state_t *state, + char **out, size_t *out_len, + const char **in, size_t *in_len, + int finish) +{ + int err; + tor_assert(state != NULL); + if (*in_len > UINT_MAX || + *out_len > UINT_MAX) { + return TOR_COMPRESS_ERROR; + } + + state->stream.next_in = (unsigned char*) *in; + state->stream.avail_in = (unsigned int)*in_len; + state->stream.next_out = (unsigned char*) *out; + state->stream.avail_out = (unsigned int)*out_len; + + if (state->compress) { + err = deflate(&state->stream, finish ? Z_FINISH : Z_NO_FLUSH); + } else { + err = inflate(&state->stream, finish ? Z_FINISH : Z_SYNC_FLUSH); + } + + state->input_so_far += state->stream.next_in - ((unsigned char*)*in); + state->output_so_far += state->stream.next_out - ((unsigned char*)*out); + + *out = (char*) state->stream.next_out; + *out_len = state->stream.avail_out; + *in = (const char *) state->stream.next_in; + *in_len = state->stream.avail_in; + + if (! state->compress && + tor_compress_is_compression_bomb(state->input_so_far, + state->output_so_far)) { + log_warn(LD_DIR, "Possible zlib bomb; abandoning stream."); + return TOR_COMPRESS_ERROR; + } + + switch (err) + { + case Z_STREAM_END: + return TOR_COMPRESS_DONE; + case Z_BUF_ERROR: + if (state->stream.avail_in == 0 && !finish) + return TOR_COMPRESS_OK; + return TOR_COMPRESS_BUFFER_FULL; + case Z_OK: + if (state->stream.avail_out == 0 || finish) + return TOR_COMPRESS_BUFFER_FULL; + return TOR_COMPRESS_OK; + default: + log_warn(LD_GENERAL, "Gzip returned an error: %s", + state->stream.msg ? state->stream.msg : "<no message>"); + return TOR_COMPRESS_ERROR; + } +} + +/** Deallocate <b>state</b>. */ +void +tor_zlib_compress_free_(tor_zlib_compress_state_t *state) +{ + if (state == NULL) + return; + + atomic_counter_sub(&total_zlib_allocation, state->allocation); + + if (state->compress) + deflateEnd(&state->stream); + else + inflateEnd(&state->stream); + + tor_free(state); +} + +/** Return the approximate number of bytes allocated for <b>state</b>. */ +size_t +tor_zlib_compress_state_size(const tor_zlib_compress_state_t *state) +{ + tor_assert(state != NULL); + return state->allocation; +} + +/** Return the approximate number of bytes allocated for all zlib states. */ +size_t +tor_zlib_get_total_allocation(void) +{ + return atomic_counter_get(&total_zlib_allocation); +} + +/** Set up global state for the zlib module */ +void +tor_zlib_init(void) +{ + atomic_counter_init(&total_zlib_allocation); +} + diff --git a/src/lib/compress/compress_zlib.h b/src/lib/compress/compress_zlib.h new file mode 100644 index 0000000000..7af68044de --- /dev/null +++ b/src/lib/compress/compress_zlib.h @@ -0,0 +1,46 @@ +/* Copyright (c) 2003, Roger Dingledine + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file compress_zlib.h + * \brief Header for compress_zlib.c + **/ + +#ifndef TOR_COMPRESS_ZLIB_H +#define TOR_COMPRESS_ZLIB_H + +int tor_zlib_method_supported(void); + +const char *tor_zlib_get_version_str(void); + +const char *tor_zlib_get_header_version_str(void); + +/** Internal state for an incremental zlib/gzip compression/decompression. */ +typedef struct tor_zlib_compress_state_t tor_zlib_compress_state_t; + +tor_zlib_compress_state_t * +tor_zlib_compress_new(int compress, + compress_method_t method, + compression_level_t compression_level); + +tor_compress_output_t +tor_zlib_compress_process(tor_zlib_compress_state_t *state, + char **out, size_t *out_len, + const char **in, size_t *in_len, + int finish); + +void tor_zlib_compress_free_(tor_zlib_compress_state_t *state); +#define tor_zlib_compress_free(st) \ + FREE_AND_NULL(tor_zlib_compress_state_t, \ + tor_zlib_compress_free_, (st)) + +size_t tor_zlib_compress_state_size(const tor_zlib_compress_state_t *state); + +size_t tor_zlib_get_total_allocation(void); + +void tor_zlib_init(void); + +#endif /* !defined(TOR_COMPRESS_ZLIB_H) */ + diff --git a/src/lib/compress/compress_zstd.c b/src/lib/compress/compress_zstd.c new file mode 100644 index 0000000000..dc0dcdec93 --- /dev/null +++ b/src/lib/compress/compress_zstd.c @@ -0,0 +1,536 @@ +/* Copyright (c) 2004, Roger Dingledine. + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file compress_zstd.c + * \brief Compression backend for Zstandard. + * + * This module should never be invoked directly. Use the compress module + * instead. + **/ + +#include "orconfig.h" + +#include "common/util.h" +#include "common/torlog.h" +#include "common/compress.h" +#include "common/compress_zstd.h" + +#ifdef ENABLE_ZSTD_ADVANCED_APIS +/* This is a lie, but we make sure it doesn't get us in trouble by wrapping + * all invocations of zstd's static-only functions in a check to make sure + * that the compile-time version matches the run-time version. */ +#define ZSTD_STATIC_LINKING_ONLY +#endif + +#ifdef HAVE_ZSTD +DISABLE_GCC_WARNING(unused-const-variable) +#include <zstd.h> +ENABLE_GCC_WARNING(unused-const-variable) +#endif + +/** Total number of bytes allocated for Zstandard state. */ +static atomic_counter_t total_zstd_allocation; + +#ifdef HAVE_ZSTD +/** Given <b>level</b> return the memory level. */ +static int +memory_level(compression_level_t level) +{ + switch (level) { + default: + case BEST_COMPRESSION: + case HIGH_COMPRESSION: return 9; + case MEDIUM_COMPRESSION: return 8; + case LOW_COMPRESSION: return 7; + } +} +#endif /* defined(HAVE_ZSTD) */ + +/** Return 1 if Zstandard compression is supported; otherwise 0. */ +int +tor_zstd_method_supported(void) +{ +#ifdef HAVE_ZSTD + return 1; +#else + return 0; +#endif +} + +#ifdef HAVE_ZSTD +/** Format a zstd version number as a string in <b>buf</b>. */ +static void +tor_zstd_format_version(char *buf, size_t buflen, unsigned version_number) +{ + tor_snprintf(buf, buflen, + "%u.%u.%u", + version_number / 10000 % 100, + version_number / 100 % 100, + version_number % 100); +} +#endif + +#define VERSION_STR_MAX_LEN 16 /* more than enough space for 99.99.99 */ + +/** Return a string representation of the version of the currently running + * version of libzstd. Returns NULL if Zstandard is unsupported. */ +const char * +tor_zstd_get_version_str(void) +{ +#ifdef HAVE_ZSTD + static char version_str[VERSION_STR_MAX_LEN]; + + tor_zstd_format_version(version_str, sizeof(version_str), + ZSTD_versionNumber()); + + return version_str; +#else /* !(defined(HAVE_ZSTD)) */ + return NULL; +#endif /* defined(HAVE_ZSTD) */ +} + +/** Return a string representation of the version of the version of libzstd + * used at compilation time. Returns NULL if Zstandard is unsupported. */ +const char * +tor_zstd_get_header_version_str(void) +{ +#ifdef HAVE_ZSTD + return ZSTD_VERSION_STRING; +#else + return NULL; +#endif +} + +#ifdef TOR_UNIT_TESTS +static int static_apis_disable_for_testing = 0; +#endif + +/** Return true iff we can use the "static-only" APIs. */ +int +tor_zstd_can_use_static_apis(void) +{ +#if defined(ZSTD_STATIC_LINKING_ONLY) && defined(HAVE_ZSTD) +#ifdef TOR_UNIT_TESTS + if (static_apis_disable_for_testing) { + return 0; + } +#endif + return (ZSTD_VERSION_NUMBER == ZSTD_versionNumber()); +#else + return 0; +#endif +} + +/** Internal Zstandard state for incremental compression/decompression. + * The body of this struct is not exposed. */ +struct tor_zstd_compress_state_t { +#ifdef HAVE_ZSTD + union { + /** Compression stream. Used when <b>compress</b> is true. */ + ZSTD_CStream *compress_stream; + /** Decompression stream. Used when <b>compress</b> is false. */ + ZSTD_DStream *decompress_stream; + } u; /**< Zstandard stream objects. */ +#endif /* defined(HAVE_ZSTD) */ + + int compress; /**< True if we are compressing; false if we are inflating */ + int have_called_end; /**< True if we are compressing and we've called + * ZSTD_endStream */ + + /** Number of bytes read so far. Used to detect compression bombs. */ + size_t input_so_far; + /** Number of bytes written so far. Used to detect compression bombs. */ + size_t output_so_far; + + /** Approximate number of bytes allocated for this object. */ + size_t allocation; +}; + +#ifdef HAVE_ZSTD +/** Return an approximate number of bytes stored in memory to hold the + * Zstandard compression/decompression state. This is a fake estimate + * based on inspecting the zstd source: tor_zstd_state_size_precalc() is + * more accurate when it's allowed to use "static-only" functions */ +static size_t +tor_zstd_state_size_precalc_fake(int compress, int preset) +{ + tor_assert(preset > 0); + + size_t memory_usage = sizeof(tor_zstd_compress_state_t); + + // The Zstandard library provides a number of functions that would be useful + // here, but they are, unfortunately, still considered experimental and are + // thus only available in libzstd if we link against the library statically. + // + // The code in this function tries to approximate the calculations without + // being able to use the following: + // + // - We do not have access to neither the internal members of ZSTD_CStream + // and ZSTD_DStream and their internal context objects. + // + // - We cannot use ZSTD_sizeof_CStream() and ZSTD_sizeof_DStream() since they + // are unexposed. + // + // In the future it might be useful to check if libzstd have started + // providing these functions in a stable manner and simplify this function. + if (compress) { + // We try to approximate the ZSTD_sizeof_CStream(ZSTD_CStream *stream) + // function here. This function uses the following fields to make its + // estimate: + + // - sizeof(ZSTD_CStream): Around 192 bytes on a 64-bit machine: + memory_usage += 192; + + // - ZSTD_sizeof_CCtx(stream->cctx): This function requires access to + // variables that are not exposed via the public API. We use a _very_ + // simplified function to calculate the estimated amount of bytes used in + // this struct. + // memory_usage += (preset - 0.5) * 1024 * 1024; + memory_usage += (preset * 1024 * 1024) - (512 * 1024); + // - ZSTD_sizeof_CDict(stream->cdictLocal): Unused in Tor: 0 bytes. + // - stream->outBuffSize: 128 KB: + memory_usage += 128 * 1024; + // - stream->inBuffSize: 2048 KB: + memory_usage += 2048 * 1024; + } else { + // We try to approximate the ZSTD_sizeof_DStream(ZSTD_DStream *stream) + // function here. This function uses the following fields to make its + // estimate: + + // - sizeof(ZSTD_DStream): Around 208 bytes on a 64-bit machine: + memory_usage += 208; + // - ZSTD_sizeof_DCtx(stream->dctx): Around 150 KB. + memory_usage += 150 * 1024; + + // - ZSTD_sizeof_DDict(stream->ddictLocal): Unused in Tor: 0 bytes. + // - stream->inBuffSize: 0 KB. + // - stream->outBuffSize: 0 KB. + } + + return memory_usage; +} + +/** Return an approximate number of bytes stored in memory to hold the + * Zstandard compression/decompression state. */ +static size_t +tor_zstd_state_size_precalc(int compress, int preset) +{ +#ifdef ZSTD_STATIC_LINKING_ONLY + if (tor_zstd_can_use_static_apis()) { + if (compress) { +#ifdef HAVE_ZSTD_ESTIMATECSTREAMSIZE + return ZSTD_estimateCStreamSize(preset); +#endif + } else { +#ifdef HAVE_ZSTD_ESTIMATEDCTXSIZE + /* Could use DStream, but that takes a windowSize. */ + return ZSTD_estimateDCtxSize(); +#endif + } + } +#endif + return tor_zstd_state_size_precalc_fake(compress, preset); +} +#endif /* defined(HAVE_ZSTD) */ + +/** Construct and return a tor_zstd_compress_state_t object using + * <b>method</b>. If <b>compress</b>, it's for compression; otherwise it's for + * decompression. */ +tor_zstd_compress_state_t * +tor_zstd_compress_new(int compress, + compress_method_t method, + compression_level_t level) +{ + tor_assert(method == ZSTD_METHOD); + +#ifdef HAVE_ZSTD + const int preset = memory_level(level); + tor_zstd_compress_state_t *result; + size_t retval; + + result = tor_malloc_zero(sizeof(tor_zstd_compress_state_t)); + result->compress = compress; + result->allocation = tor_zstd_state_size_precalc(compress, preset); + + if (compress) { + result->u.compress_stream = ZSTD_createCStream(); + + if (result->u.compress_stream == NULL) { + // LCOV_EXCL_START + log_warn(LD_GENERAL, "Error while creating Zstandard compression " + "stream"); + goto err; + // LCOV_EXCL_STOP + } + + retval = ZSTD_initCStream(result->u.compress_stream, preset); + + if (ZSTD_isError(retval)) { + // LCOV_EXCL_START + log_warn(LD_GENERAL, "Zstandard stream initialization error: %s", + ZSTD_getErrorName(retval)); + goto err; + // LCOV_EXCL_STOP + } + } else { + result->u.decompress_stream = ZSTD_createDStream(); + + if (result->u.decompress_stream == NULL) { + // LCOV_EXCL_START + log_warn(LD_GENERAL, "Error while creating Zstandard decompression " + "stream"); + goto err; + // LCOV_EXCL_STOP + } + + retval = ZSTD_initDStream(result->u.decompress_stream); + + if (ZSTD_isError(retval)) { + // LCOV_EXCL_START + log_warn(LD_GENERAL, "Zstandard stream initialization error: %s", + ZSTD_getErrorName(retval)); + goto err; + // LCOV_EXCL_STOP + } + } + + atomic_counter_add(&total_zstd_allocation, result->allocation); + return result; + + err: + // LCOV_EXCL_START + if (compress) { + ZSTD_freeCStream(result->u.compress_stream); + } else { + ZSTD_freeDStream(result->u.decompress_stream); + } + + tor_free(result); + return NULL; + // LCOV_EXCL_STOP +#else /* !(defined(HAVE_ZSTD)) */ + (void)compress; + (void)method; + (void)level; + + return NULL; +#endif /* defined(HAVE_ZSTD) */ +} + +/** Compress/decompress some bytes using <b>state</b>. Read up to + * *<b>in_len</b> bytes from *<b>in</b>, and write up to *<b>out_len</b> bytes + * to *<b>out</b>, adjusting the values as we go. If <b>finish</b> is true, + * we've reached the end of the input. + * + * Return TOR_COMPRESS_DONE if we've finished the entire + * compression/decompression. + * Return TOR_COMPRESS_OK if we're processed everything from the input. + * Return TOR_COMPRESS_BUFFER_FULL if we're out of space on <b>out</b>. + * Return TOR_COMPRESS_ERROR if the stream is corrupt. + */ +tor_compress_output_t +tor_zstd_compress_process(tor_zstd_compress_state_t *state, + char **out, size_t *out_len, + const char **in, size_t *in_len, + int finish) +{ +#ifdef HAVE_ZSTD + size_t retval; + + tor_assert(state != NULL); + tor_assert(*in_len <= UINT_MAX); + tor_assert(*out_len <= UINT_MAX); + + ZSTD_inBuffer input = { *in, *in_len, 0 }; + ZSTD_outBuffer output = { *out, *out_len, 0 }; + + if (BUG(finish == 0 && state->have_called_end)) { + finish = 1; + } + + if (state->compress) { + if (! state->have_called_end) + retval = ZSTD_compressStream(state->u.compress_stream, + &output, &input); + else + retval = 0; + } else { + retval = ZSTD_decompressStream(state->u.decompress_stream, + &output, &input); + } + + state->input_so_far += input.pos; + state->output_so_far += output.pos; + + *out = (char *)output.dst + output.pos; + *out_len = output.size - output.pos; + *in = (char *)input.src + input.pos; + *in_len = input.size - input.pos; + + if (! state->compress && + tor_compress_is_compression_bomb(state->input_so_far, + state->output_so_far)) { + log_warn(LD_DIR, "Possible compression bomb; abandoning stream."); + return TOR_COMPRESS_ERROR; + } + + if (ZSTD_isError(retval)) { + log_warn(LD_GENERAL, "Zstandard %s didn't finish: %s.", + state->compress ? "compression" : "decompression", + ZSTD_getErrorName(retval)); + return TOR_COMPRESS_ERROR; + } + + if (state->compress && !state->have_called_end) { + retval = ZSTD_flushStream(state->u.compress_stream, &output); + + *out = (char *)output.dst + output.pos; + *out_len = output.size - output.pos; + + if (ZSTD_isError(retval)) { + log_warn(LD_GENERAL, "Zstandard compression unable to flush: %s.", + ZSTD_getErrorName(retval)); + return TOR_COMPRESS_ERROR; + } + + // ZSTD_flushStream returns 0 if the frame is done, or >0 if it + // is incomplete. + if (retval > 0) { + return TOR_COMPRESS_BUFFER_FULL; + } + } + + if (!finish) { + // The caller says we're not done with the input, so no need to write an + // epilogue. + return TOR_COMPRESS_OK; + } else if (state->compress) { + if (*in_len) { + // We say that we're not done with the input, so we can't write an + // epilogue. + return TOR_COMPRESS_OK; + } + + retval = ZSTD_endStream(state->u.compress_stream, &output); + state->have_called_end = 1; + *out = (char *)output.dst + output.pos; + *out_len = output.size - output.pos; + + if (ZSTD_isError(retval)) { + log_warn(LD_GENERAL, "Zstandard compression unable to write " + "epilogue: %s.", + ZSTD_getErrorName(retval)); + return TOR_COMPRESS_ERROR; + } + + // endStream returns the number of bytes that is needed to write the + // epilogue. + if (retval > 0) + return TOR_COMPRESS_BUFFER_FULL; + + return TOR_COMPRESS_DONE; + } else /* if (!state->compress) */ { + // ZSTD_decompressStream returns 0 if the frame is done, or >0 if it + // is incomplete. + // We check this above. + tor_assert_nonfatal(!ZSTD_isError(retval)); + // Start a new frame if this frame is done + if (retval == 0) + return TOR_COMPRESS_DONE; + // Don't check out_len, it might have some space left if the next output + // chunk is larger than the remaining space + else if (*in_len > 0) + return TOR_COMPRESS_BUFFER_FULL; + else + return TOR_COMPRESS_OK; + } + +#else /* !(defined(HAVE_ZSTD)) */ + (void)state; + (void)out; + (void)out_len; + (void)in; + (void)in_len; + (void)finish; + + return TOR_COMPRESS_ERROR; +#endif /* defined(HAVE_ZSTD) */ +} + +/** Deallocate <b>state</b>. */ +void +tor_zstd_compress_free_(tor_zstd_compress_state_t *state) +{ + if (state == NULL) + return; + + atomic_counter_sub(&total_zstd_allocation, state->allocation); + +#ifdef HAVE_ZSTD + if (state->compress) { + ZSTD_freeCStream(state->u.compress_stream); + } else { + ZSTD_freeDStream(state->u.decompress_stream); + } +#endif /* defined(HAVE_ZSTD) */ + + tor_free(state); +} + +/** Return the approximate number of bytes allocated for <b>state</b>. */ +size_t +tor_zstd_compress_state_size(const tor_zstd_compress_state_t *state) +{ + tor_assert(state != NULL); + return state->allocation; +} + +/** Return the approximate number of bytes allocated for all Zstandard + * states. */ +size_t +tor_zstd_get_total_allocation(void) +{ + return atomic_counter_get(&total_zstd_allocation); +} + +/** Initialize the zstd module */ +void +tor_zstd_init(void) +{ + atomic_counter_init(&total_zstd_allocation); +} + +/** Warn if the header and library versions don't match. */ +void +tor_zstd_warn_if_version_mismatched(void) +{ +#if defined(HAVE_ZSTD) && defined(ENABLE_ZSTD_ADVANCED_APIS) + if (! tor_zstd_can_use_static_apis()) { + char header_version[VERSION_STR_MAX_LEN]; + char runtime_version[VERSION_STR_MAX_LEN]; + tor_zstd_format_version(header_version, sizeof(header_version), + ZSTD_VERSION_NUMBER); + tor_zstd_format_version(runtime_version, sizeof(runtime_version), + ZSTD_versionNumber()); + + log_warn(LD_GENERAL, + "Tor was compiled with zstd %s, but is running with zstd %s. " + "For safety, we'll avoid using advanced zstd functionality.", + header_version, runtime_version); + } +#endif +} + +#ifdef TOR_UNIT_TESTS +/** Testing only: disable usage of static-only APIs, so we can make sure that + * we still work without them. */ +void +tor_zstd_set_static_apis_disabled_for_testing(int disabled) +{ + static_apis_disable_for_testing = disabled; +} +#endif + diff --git a/src/lib/compress/compress_zstd.h b/src/lib/compress/compress_zstd.h new file mode 100644 index 0000000000..1177537a9e --- /dev/null +++ b/src/lib/compress/compress_zstd.h @@ -0,0 +1,53 @@ +/* Copyright (c) 2003, Roger Dingledine + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file compress_zstd.h + * \brief Header for compress_zstd.c + **/ + +#ifndef TOR_COMPRESS_ZSTD_H +#define TOR_COMPRESS_ZSTD_H + +int tor_zstd_method_supported(void); + +const char *tor_zstd_get_version_str(void); + +const char *tor_zstd_get_header_version_str(void); + +int tor_zstd_can_use_static_apis(void); + +/** Internal state for an incremental Zstandard compression/decompression. */ +typedef struct tor_zstd_compress_state_t tor_zstd_compress_state_t; + +tor_zstd_compress_state_t * +tor_zstd_compress_new(int compress, + compress_method_t method, + compression_level_t compression_level); + +tor_compress_output_t +tor_zstd_compress_process(tor_zstd_compress_state_t *state, + char **out, size_t *out_len, + const char **in, size_t *in_len, + int finish); + +void tor_zstd_compress_free_(tor_zstd_compress_state_t *state); +#define tor_zstd_compress_free(st) \ + FREE_AND_NULL(tor_zstd_compress_state_t, \ + tor_zstd_compress_free_, (st)) + +size_t tor_zstd_compress_state_size(const tor_zstd_compress_state_t *state); + +size_t tor_zstd_get_total_allocation(void); + +void tor_zstd_init(void); +void tor_zstd_warn_if_version_mismatched(void); + +#ifdef TOR_UNIT_TESTS +void tor_zstd_set_static_apis_disabled_for_testing(int disabled); +#endif + +#endif /* !defined(TOR_COMPRESS_ZSTD_H) */ + diff --git a/src/lib/compress/include.am b/src/lib/compress/include.am new file mode 100644 index 0000000000..eb3a89c355 --- /dev/null +++ b/src/lib/compress/include.am @@ -0,0 +1,25 @@ + +noinst_LIBRARIES += src/lib/libtor-compress.a + +if UNITTESTS_ENABLED +noinst_LIBRARIES += src/lib/libtor-compress-testing.a +endif + +src_lib_libtor_compress_a_SOURCES = \ + src/lib/compress/compress.c \ + src/lib/compress/compress_lzma.c \ + src/lib/compress/compress_none.c \ + src/lib/compress/compress_zlib.c \ + src/lib/compress/compress_zstd.c + +src_lib_libtor_compress_testing_a_SOURCES = \ + $(src_lib_libtor_compress_a_SOURCES) +src_lib_libtor_compress_testing_a_CPPFLAGS = $(AM_CPPFLAGS) $(TEST_CPPFLAGS) +src_lib_libtor_compress_testing_a_CFLAGS = $(AM_CFLAGS) $(TEST_CFLAGS) + +noinst_HEADERS += \ + src/lib/compress/compress.h \ + src/lib/compress/compress_lzma.h \ + src/lib/compress/compress_none.h \ + src/lib/compress/compress_zlib.h \ + src/lib/compress/compress_zstd.h |