summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Mathewson <nickm@torproject.org>2005-01-19 22:40:33 +0000
committerNick Mathewson <nickm@torproject.org>2005-01-19 22:40:33 +0000
commit69fa5be7b624fcda98c7d2e7e8f678f8ee592404 (patch)
tree7d19255140ded00fd9599a858a366654b2a1bb88
parente0bf5976658ea76cd8b61d67f957b3c4f68e9108 (diff)
downloadtor-69fa5be7b624fcda98c7d2e7e8f678f8ee592404.tar.gz
tor-69fa5be7b624fcda98c7d2e7e8f678f8ee592404.zip
Workaround for webservers that lie about Content-Encoding: Tor now tries to autodetect compressed directories and compression itself. (resolves bug 65)
svn:r3374
-rw-r--r--src/common/torgzip.c16
-rw-r--r--src/common/torgzip.h6
-rw-r--r--src/or/directory.c84
3 files changed, 95 insertions, 11 deletions
diff --git a/src/common/torgzip.c b/src/common/torgzip.c
index 12af332b70..1705e6bc9b 100644
--- a/src/common/torgzip.c
+++ b/src/common/torgzip.c
@@ -134,6 +134,7 @@ tor_gzip_compress(char **out, size_t *out_len,
return -1;
}
+/* DOCDOC -- sets *out to NULL on failure. */
int
tor_gzip_uncompress(char **out, size_t *out_len,
const char *in, size_t in_len,
@@ -224,3 +225,18 @@ tor_gzip_uncompress(char **out, size_t *out_len,
return -1;
}
+/** Try to tell whether the <b>in_len</b>-byte string in <b>in</b> is likely
+ * to be compressed or not. If it is, return the likeliest compression method.
+ * Otherwise, return 0.
+ */
+int detect_compression_method(const char *in, size_t in_len)
+{
+ if (in_len > 2 && in[0] == 0x1f && in[1] == 0x8b) {
+ return GZIP_METHOD;
+ } else if (in_len > 2 && (in[0] & 0x0f) == 8 &&
+ get_uint16(in) % 31 == 0) {
+ return ZLIB_METHOD;
+ } else {
+ return 0;
+ }
+}
diff --git a/src/common/torgzip.h b/src/common/torgzip.h
index a023cd0c2d..d6e070f8ec 100644
--- a/src/common/torgzip.h
+++ b/src/common/torgzip.h
@@ -11,7 +11,9 @@
#define __TORGZIP_H
#define TORGZIP_H_ID "$Id$"
-typedef enum { GZIP_METHOD=1, ZLIB_METHOD=2 } compress_method_t;
+typedef enum {
+ GZIP_METHOD=1, ZLIB_METHOD=2, UNKNOWN_METHOD=3
+} compress_method_t;
int
tor_gzip_compress(char **out, size_t *out_len,
@@ -24,4 +26,6 @@ tor_gzip_uncompress(char **out, size_t *out_len,
int is_gzip_supported(void);
+int detect_compression_method(const char *in, size_t in_len);
+
#endif
diff --git a/src/or/directory.c b/src/or/directory.c
index b4c5838c8e..881c85e20a 100644
--- a/src/or/directory.c
+++ b/src/or/directory.c
@@ -48,6 +48,7 @@ directory_send_command(connection_t *conn, const char *platform,
int purpose, const char *resource,
const char *payload, size_t payload_len);
static int directory_handle_command(connection_t *conn);
+static int body_is_plausible(const char *body, size_t body_len);
/********* START VARIABLES **********/
@@ -483,7 +484,10 @@ parse_http_url(char *headers, char **url)
* "HTTP/1.\%d \%d\%s\r\n...".
* If it's well-formed, assign *<b>code</b>, point and return 0.
* If <b>date</b> is provided, set *date to the Date header in the
- * http headers, or 0 if no such header is found.
+ * http headers, or 0 if no such header is found. If <b>compression</b>
+ * is provided, set *<b>compression</b> to the compression method given
+ * in the Content-Encoding header, or 0 if no such header is found, or -1
+ * if the value of the header is not recognized.
* Otherwise, return -1.
*/
static int
@@ -534,8 +538,8 @@ parse_http_response(const char *headers, int *code, time_t *date,
} else if (!strcmp(enc, "gzip") || !strcmp(enc, "x-gzip")) {
*compression = GZIP_METHOD;
} else {
- log_fn(LOG_WARN, "Unrecognized content encoding: '%s'", enc);
- *compression = 0;
+ log_fn(LOG_INFO, "Unrecognized content encoding: '%s'", enc);
+ *compression = -1;
}
}
SMARTLIST_FOREACH(parsed_headers, char *, s, tor_free(s));
@@ -544,6 +548,27 @@ parse_http_response(const char *headers, int *code, time_t *date,
return 0;
}
+/** Return true iff <b>body</b> doesn't start with a plausible router or
+ * running-list or directory opening. This is a sign of possible compression.
+ **/
+static int
+body_is_plausible(const char *body, size_t len)
+{
+ int i;
+ if (len < 32)
+ return 0;
+ if (!strcmpstart(body,"router") ||
+ !strcmpstart(body,"signed-directory") ||
+ !strcmpstart(body,"network-status") ||
+ !strcmpstart(body,"running-routers"))
+ return 1;
+ for (i=0;i<32;++i) {
+ if (!isprint(body[i]) && !isspace(body[i]))
+ return 0;
+ }
+ return 1;
+}
+
/** We are a client, and we've finished reading the server's
* response. Parse and it and act appropriately.
*
@@ -560,6 +585,7 @@ connection_dir_client_reached_eof(connection_t *conn)
time_t now, date_header=0;
int delta;
int compression;
+ int plausible;
switch (fetch_from_buf_http(conn->inbuf,
&headers, MAX_HEADERS_SIZE,
@@ -592,17 +618,55 @@ connection_dir_client_reached_eof(connection_t *conn)
}
}
- if (compression != 0) {
- char *new_body;
- size_t new_len;
- if (tor_gzip_uncompress(&new_body, &new_len, body, body_len, compression)) {
+ plausible = body_is_plausible(body, body_len);
+ if (compression || !plausible) {
+ char *new_body = NULL;
+ size_t new_len = 0;
+ int guessed = detect_compression_method(body, body_len);
+ if (compression <= 0 || guessed != compression) {
+ /* Tell the user if we don't believe what we're told about compression.*/
+ const char *description1, *description2;
+ if (compression == ZLIB_METHOD)
+ description1 = "as deflated";
+ else if (compression = GZIP_METHOD)
+ description1 = "as gzipped";
+ else if (compression == 0)
+ description1 = "as uncompressed";
+ else
+ description1 = "with an unknown Content-Encoding";
+ if (guessed == ZLIB_METHOD)
+ description2 = "deflated";
+ else if (guessed == GZIP_METHOD)
+ description2 = "gzipped";
+ else if (!plausible)
+ description2 = "confusing binary junk";
+ else
+ description2 = "uncompressed";
+
+ log_fn(LOG_INFO, "HTTP body from server '%s' was labeled %s,"
+ "but it seems to be %s.%s",
+ conn->address, description1, description2,
+ (compression>0 && guessed>0)?" Trying both.":"");
+ }
+ /* Try declared compression first if we can. */
+ if (compression > 0)
+ tor_gzip_uncompress(&new_body, &new_len, body, body_len, compression);
+ /* Okay, if that didn't work, and we think that it was compressed
+ * differently, try that. */
+ if (!new_body && guessed > 0 && compression != guessed)
+ tor_gzip_uncompress(&new_body, &new_len, body, body_len, guessed);
+ /* If we're pretty sure that we have a compressed directory, and
+ * we didn't manage to uncompress it, then warn and bail. */
+ if (!plausible && !new_body) {
log_fn(LOG_WARN, "Unable to decompress HTTP body (server '%s').", conn->address);
tor_free(body); tor_free(headers);
return -1;
}
- tor_free(body);
- body = new_body;
- body_len = new_len;
+ if (new_body) {
+ tor_free(body);
+ body = new_body;
+ body_len = new_len;
+ }
}
if (conn->purpose == DIR_PURPOSE_FETCH_DIR) {