summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYawning Angel <yawning@schwanenlied.me>2015-06-24 13:52:29 +0000
committerYawning Angel <yawning@schwanenlied.me>2015-06-24 13:52:29 +0000
commit3f336966a264d7cd7c6dab08fb85d85273f06d68 (patch)
tree5c25761adefefba6037dedab2d4c881cca715f1d
parent8b35d8508835f23b8e804982db368c0304a08d40 (diff)
downloadtor-3f336966a264d7cd7c6dab08fb85d85273f06d68.tar.gz
tor-3f336966a264d7cd7c6dab08fb85d85273f06d68.zip
Work around nytimes.com's broken hostnames in our SOCKS checks.
RFC 952 is approximately 30 years old, and people are failing to comply, by serving A records with '_' as part of the hostname. Since relaxing the check is a QOL improvement for our userbase, relax the check to allow such abominations as destinations, especially since there are likely to be other similarly misconfigured domains out there.
-rw-r--r--changes/bug164304
-rw-r--r--src/common/util.c7
-rw-r--r--src/test/test_util.c9
3 files changed, 16 insertions, 4 deletions
diff --git a/changes/bug16430 b/changes/bug16430
new file mode 100644
index 0000000000..ca7b874f98
--- /dev/null
+++ b/changes/bug16430
@@ -0,0 +1,4 @@
+ o Minor features (client):
+ - Relax the validation done to hostnames in SOCKS5 requests, and allow
+ '_' to cope with domains observed in the wild that are serving non-RFC
+ compliant records. Resolves ticket 16430.
diff --git a/src/common/util.c b/src/common/util.c
index 942d0c290e..449015054f 100644
--- a/src/common/util.c
+++ b/src/common/util.c
@@ -1036,6 +1036,9 @@ string_is_valid_ipv6_address(const char *string)
/** Return true iff <b>string</b> matches a pattern of DNS names
* that we allow Tor clients to connect to.
+ *
+ * Note: This allows certain technically invalid characters ('_') to cope
+ * with misconfigured zones that have been encountered in the wild.
*/
int
string_is_valid_hostname(const char *string)
@@ -1048,7 +1051,7 @@ string_is_valid_hostname(const char *string)
smartlist_split_string(components,string,".",0,0);
SMARTLIST_FOREACH_BEGIN(components, char *, c) {
- if (c[0] == '-') {
+ if ((c[0] == '-') || (*c == '_')) {
result = 0;
break;
}
@@ -1057,7 +1060,7 @@ string_is_valid_hostname(const char *string)
if ((*c >= 'a' && *c <= 'z') ||
(*c >= 'A' && *c <= 'Z') ||
(*c >= '0' && *c <= '9') ||
- (*c == '-'))
+ (*c == '-') || (*c == '_'))
c++;
else
result = 0;
diff --git a/src/test/test_util.c b/src/test/test_util.c
index b0366db37f..0f64c26e01 100644
--- a/src/test/test_util.c
+++ b/src/test/test_util.c
@@ -4268,18 +4268,23 @@ test_util_hostname_validation(void *arg)
tt_assert(string_is_valid_hostname("stanford.edu"));
tt_assert(string_is_valid_hostname("multiple-words-with-hypens.jp"));
- // Subdomain name cannot start with '-'.
+ // Subdomain name cannot start with '-' or '_'.
tt_assert(!string_is_valid_hostname("-torproject.org"));
tt_assert(!string_is_valid_hostname("subdomain.-domain.org"));
tt_assert(!string_is_valid_hostname("-subdomain.domain.org"));
+ tt_assert(!string_is_valid_hostname("___abc.org"));
// Hostnames cannot contain non-alphanumeric characters.
tt_assert(!string_is_valid_hostname("%%domain.\\org."));
tt_assert(!string_is_valid_hostname("***x.net"));
- tt_assert(!string_is_valid_hostname("___abc.org"));
tt_assert(!string_is_valid_hostname("\xff\xffxyz.org"));
tt_assert(!string_is_valid_hostname("word1 word2.net"));
+ // Test workaround for nytimes.com stupidity, technically invalid,
+ // but we allow it since they are big, even though they are failing to
+ // comply with a ~30 year old standard.
+ tt_assert(string_is_valid_hostname("core3_euw1.fabrik.nytimes.com"));
+
// XXX: do we allow single-label DNS names?
done: