diff options
author | Nick Mathewson <nickm@torproject.org> | 2005-01-19 22:40:33 +0000 |
---|---|---|
committer | Nick Mathewson <nickm@torproject.org> | 2005-01-19 22:40:33 +0000 |
commit | 69fa5be7b624fcda98c7d2e7e8f678f8ee592404 (patch) | |
tree | 7d19255140ded00fd9599a858a366654b2a1bb88 | |
parent | e0bf5976658ea76cd8b61d67f957b3c4f68e9108 (diff) | |
download | tor-69fa5be7b624fcda98c7d2e7e8f678f8ee592404.tar.gz tor-69fa5be7b624fcda98c7d2e7e8f678f8ee592404.zip |
Workaround for webservers that lie about Content-Encoding: Tor now tries to autodetect compressed directories and compression itself. (resolves bug 65)
svn:r3374
-rw-r--r-- | src/common/torgzip.c | 16 | ||||
-rw-r--r-- | src/common/torgzip.h | 6 | ||||
-rw-r--r-- | src/or/directory.c | 84 |
3 files changed, 95 insertions, 11 deletions
diff --git a/src/common/torgzip.c b/src/common/torgzip.c index 12af332b70..1705e6bc9b 100644 --- a/src/common/torgzip.c +++ b/src/common/torgzip.c @@ -134,6 +134,7 @@ tor_gzip_compress(char **out, size_t *out_len, return -1; } +/* DOCDOC -- sets *out to NULL on failure. */ int tor_gzip_uncompress(char **out, size_t *out_len, const char *in, size_t in_len, @@ -224,3 +225,18 @@ tor_gzip_uncompress(char **out, size_t *out_len, return -1; } +/** Try to tell whether the <b>in_len</b>-byte string in <b>in</b> is likely + * to be compressed or not. If it is, return the likeliest compression method. + * Otherwise, return 0. + */ +int detect_compression_method(const char *in, size_t in_len) +{ + if (in_len > 2 && in[0] == 0x1f && in[1] == 0x8b) { + return GZIP_METHOD; + } else if (in_len > 2 && (in[0] & 0x0f) == 8 && + get_uint16(in) % 31 == 0) { + return ZLIB_METHOD; + } else { + return 0; + } +} diff --git a/src/common/torgzip.h b/src/common/torgzip.h index a023cd0c2d..d6e070f8ec 100644 --- a/src/common/torgzip.h +++ b/src/common/torgzip.h @@ -11,7 +11,9 @@ #define __TORGZIP_H #define TORGZIP_H_ID "$Id$" -typedef enum { GZIP_METHOD=1, ZLIB_METHOD=2 } compress_method_t; +typedef enum { + GZIP_METHOD=1, ZLIB_METHOD=2, UNKNOWN_METHOD=3 +} compress_method_t; int tor_gzip_compress(char **out, size_t *out_len, @@ -24,4 +26,6 @@ tor_gzip_uncompress(char **out, size_t *out_len, int is_gzip_supported(void); +int detect_compression_method(const char *in, size_t in_len); + #endif diff --git a/src/or/directory.c b/src/or/directory.c index b4c5838c8e..881c85e20a 100644 --- a/src/or/directory.c +++ b/src/or/directory.c @@ -48,6 +48,7 @@ directory_send_command(connection_t *conn, const char *platform, int purpose, const char *resource, const char *payload, size_t payload_len); static int directory_handle_command(connection_t *conn); +static int body_is_plausible(const char *body, size_t body_len); /********* START VARIABLES **********/ @@ -483,7 +484,10 @@ parse_http_url(char *headers, char **url) * "HTTP/1.\%d \%d\%s\r\n...". * If it's well-formed, assign *<b>code</b>, point and return 0. * If <b>date</b> is provided, set *date to the Date header in the - * http headers, or 0 if no such header is found. + * http headers, or 0 if no such header is found. If <b>compression</b> + * is provided, set *<b>compression</b> to the compression method given + * in the Content-Encoding header, or 0 if no such header is found, or -1 + * if the value of the header is not recognized. * Otherwise, return -1. */ static int @@ -534,8 +538,8 @@ parse_http_response(const char *headers, int *code, time_t *date, } else if (!strcmp(enc, "gzip") || !strcmp(enc, "x-gzip")) { *compression = GZIP_METHOD; } else { - log_fn(LOG_WARN, "Unrecognized content encoding: '%s'", enc); - *compression = 0; + log_fn(LOG_INFO, "Unrecognized content encoding: '%s'", enc); + *compression = -1; } } SMARTLIST_FOREACH(parsed_headers, char *, s, tor_free(s)); @@ -544,6 +548,27 @@ parse_http_response(const char *headers, int *code, time_t *date, return 0; } +/** Return true iff <b>body</b> doesn't start with a plausible router or + * running-list or directory opening. This is a sign of possible compression. + **/ +static int +body_is_plausible(const char *body, size_t len) +{ + int i; + if (len < 32) + return 0; + if (!strcmpstart(body,"router") || + !strcmpstart(body,"signed-directory") || + !strcmpstart(body,"network-status") || + !strcmpstart(body,"running-routers")) + return 1; + for (i=0;i<32;++i) { + if (!isprint(body[i]) && !isspace(body[i])) + return 0; + } + return 1; +} + /** We are a client, and we've finished reading the server's * response. Parse and it and act appropriately. * @@ -560,6 +585,7 @@ connection_dir_client_reached_eof(connection_t *conn) time_t now, date_header=0; int delta; int compression; + int plausible; switch (fetch_from_buf_http(conn->inbuf, &headers, MAX_HEADERS_SIZE, @@ -592,17 +618,55 @@ connection_dir_client_reached_eof(connection_t *conn) } } - if (compression != 0) { - char *new_body; - size_t new_len; - if (tor_gzip_uncompress(&new_body, &new_len, body, body_len, compression)) { + plausible = body_is_plausible(body, body_len); + if (compression || !plausible) { + char *new_body = NULL; + size_t new_len = 0; + int guessed = detect_compression_method(body, body_len); + if (compression <= 0 || guessed != compression) { + /* Tell the user if we don't believe what we're told about compression.*/ + const char *description1, *description2; + if (compression == ZLIB_METHOD) + description1 = "as deflated"; + else if (compression = GZIP_METHOD) + description1 = "as gzipped"; + else if (compression == 0) + description1 = "as uncompressed"; + else + description1 = "with an unknown Content-Encoding"; + if (guessed == ZLIB_METHOD) + description2 = "deflated"; + else if (guessed == GZIP_METHOD) + description2 = "gzipped"; + else if (!plausible) + description2 = "confusing binary junk"; + else + description2 = "uncompressed"; + + log_fn(LOG_INFO, "HTTP body from server '%s' was labeled %s," + "but it seems to be %s.%s", + conn->address, description1, description2, + (compression>0 && guessed>0)?" Trying both.":""); + } + /* Try declared compression first if we can. */ + if (compression > 0) + tor_gzip_uncompress(&new_body, &new_len, body, body_len, compression); + /* Okay, if that didn't work, and we think that it was compressed + * differently, try that. */ + if (!new_body && guessed > 0 && compression != guessed) + tor_gzip_uncompress(&new_body, &new_len, body, body_len, guessed); + /* If we're pretty sure that we have a compressed directory, and + * we didn't manage to uncompress it, then warn and bail. */ + if (!plausible && !new_body) { log_fn(LOG_WARN, "Unable to decompress HTTP body (server '%s').", conn->address); tor_free(body); tor_free(headers); return -1; } - tor_free(body); - body = new_body; - body_len = new_len; + if (new_body) { + tor_free(body); + body = new_body; + body_len = new_len; + } } if (conn->purpose == DIR_PURPOSE_FETCH_DIR) { |