summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Goulet <dgoulet@torproject.org>2021-10-20 09:17:06 -0400
committerDavid Goulet <dgoulet@torproject.org>2021-10-20 09:17:06 -0400
commit07989171e3fead146911e63f24c2dfcabe39c660 (patch)
tree470088b0317d7cd3453a2ce4209a088c750f4108
parent5112b6cd4f10a72952ce8a23dcdda93654c10ef2 (diff)
parentcaa305a5adec8a0a50006d1bed3e6b019e016dd4 (diff)
downloadtor-07989171e3fead146911e63f24c2dfcabe39c660.tar.gz
tor-07989171e3fead146911e63f24c2dfcabe39c660.zip
Merge branch 'maint-0.4.6' into release-0.4.6
-rw-r--r--changes/ticket4047611
-rw-r--r--changes/ticket404917
-rw-r--r--src/core/or/connection_edge.c28
-rw-r--r--src/feature/dircache/dircache.c2
-rw-r--r--src/feature/nodelist/networkstatus.c2
-rw-r--r--src/feature/relay/dns.c14
-rw-r--r--src/feature/stats/rephist.c133
-rw-r--r--src/feature/stats/rephist.h3
-rw-r--r--src/test/test_hs_common.c10
-rw-r--r--src/test/test_stats.c78
10 files changed, 263 insertions, 25 deletions
diff --git a/changes/ticket40476 b/changes/ticket40476
index 062e36f9bc..86e4377a1c 100644
--- a/changes/ticket40476
+++ b/changes/ticket40476
@@ -1,8 +1,3 @@
- o Major feature (onion service v2):
- - Tor does NOT allow anymore to create v2 services, to connect as a client
- to a v2 service and for a relay to be a v2 HSDir or introduction point.
- This effectively disable onion service version 2 tor wide. Closes 40476.
- - The control port command HSFETCH and HSPOST don't allow version 2 as well.
- It is also not possible to create a v2 service with ADD_ONION.
- - See https://blog.torproject.org/v2-deprecation-timeline for details on
- how to transition from v2 to v3.
+ o Minor bugfix (onion service):
+ - Improve logging when a bad HS version is given. Fixes bug 40476; bugfix on
+ 0.4.6.1-alpha.
diff --git a/changes/ticket40491 b/changes/ticket40491
new file mode 100644
index 0000000000..01c6c7d748
--- /dev/null
+++ b/changes/ticket40491
@@ -0,0 +1,7 @@
+ o Major bugfixes (relay, overload state):
+ - Report the general overload state for DNS timeout errors only if X% of all
+ DNS queries over Y seconds are errors. Before that, it only took 1 timeout
+ to report the overload state which was just too low of a threshold. The X
+ and Y values are 1% and 10 minutes respectively but they are also
+ controlled by consensus parameters. Fixes bug 40491; bugfix on
+ 0.4.6.1-alpha.
diff --git a/src/core/or/connection_edge.c b/src/core/or/connection_edge.c
index 60a2f88ccb..d3979b3a7e 100644
--- a/src/core/or/connection_edge.c
+++ b/src/core/or/connection_edge.c
@@ -1612,6 +1612,23 @@ consider_plaintext_ports(entry_connection_t *conn, uint16_t port)
return 0;
}
+/** Return true iff <b>query</b> is a syntactically valid service ID (as
+ * generated by rend_get_service_id). */
+static int
+rend_valid_v2_service_id(const char *query)
+{
+ /** Length of 'y' portion of 'y.onion' URL. */
+#define REND_SERVICE_ID_LEN_BASE32 16
+
+ if (strlen(query) != REND_SERVICE_ID_LEN_BASE32)
+ return 0;
+
+ if (strspn(query, BASE32_CHARS) != REND_SERVICE_ID_LEN_BASE32)
+ return 0;
+
+ return 1;
+}
+
/** Parse the given hostname in address. Returns true if the parsing was
* successful and type_out contains the type of the hostname. Else, false is
* returned which means it was not recognized and type_out is set to
@@ -1675,6 +1692,14 @@ parse_extended_hostname(char *address, hostname_type_t *type_out)
if (q != address) {
memmove(address, q, strlen(q) + 1 /* also get \0 */);
}
+ /* v2 onion address check. */
+ if (strlen(query) == REND_SERVICE_ID_LEN_BASE32) {
+ *type_out = ONION_V2_HOSTNAME;
+ if (rend_valid_v2_service_id(query)) {
+ goto success;
+ }
+ goto failed;
+ }
/* v3 onion address check. */
if (strlen(query) == HS_SERVICE_ADDR_LEN_BASE32) {
@@ -1694,7 +1719,8 @@ parse_extended_hostname(char *address, hostname_type_t *type_out)
failed:
/* otherwise, return to previous state and return 0 */
*s = '.';
- const bool is_onion = (*type_out == ONION_V3_HOSTNAME);
+ const bool is_onion = (*type_out == ONION_V2_HOSTNAME) ||
+ (*type_out == ONION_V3_HOSTNAME);
log_warn(LD_APP, "Invalid %shostname %s; rejecting",
is_onion ? "onion " : "",
safe_str_client(address));
diff --git a/src/feature/dircache/dircache.c b/src/feature/dircache/dircache.c
index 7319b96caf..7fdb1bc70f 100644
--- a/src/feature/dircache/dircache.c
+++ b/src/feature/dircache/dircache.c
@@ -1569,8 +1569,6 @@ directory_handle_command_post,(dir_connection_t *conn, const char *headers,
char *url = NULL;
const or_options_t *options = get_options();
- (void) body_len;
-
log_debug(LD_DIRSERV,"Received POST command.");
conn->base_.state = DIR_CONN_STATE_SERVER_WRITING;
diff --git a/src/feature/nodelist/networkstatus.c b/src/feature/nodelist/networkstatus.c
index 2ffa6da1a3..af808a6ba7 100644
--- a/src/feature/nodelist/networkstatus.c
+++ b/src/feature/nodelist/networkstatus.c
@@ -103,6 +103,7 @@
#include "feature/dirauth/vote_microdesc_hash_st.h"
#include "feature/nodelist/vote_routerstatus_st.h"
#include "feature/nodelist/routerstatus_st.h"
+#include "feature/stats/rephist.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
@@ -1663,6 +1664,7 @@ notify_before_networkstatus_changes(const networkstatus_t *old_c,
dos_consensus_has_changed(new_c);
relay_consensus_has_changed(new_c);
hs_dos_consensus_has_changed(new_c);
+ rep_hist_consensus_has_changed(new_c);
}
/* Called after a new consensus has been put in the global state. It is safe
diff --git a/src/feature/relay/dns.c b/src/feature/relay/dns.c
index 22f929808e..ed8d235e92 100644
--- a/src/feature/relay/dns.c
+++ b/src/feature/relay/dns.c
@@ -1548,16 +1548,6 @@ evdns_callback(int result, char type, int count, int ttl, void *addresses,
tor_addr_make_unspec(&addr);
- /* Note down any DNS errors to the statistics module */
- if (result == DNS_ERR_TIMEOUT) {
- /* libevent timed out while resolving a name. However, because libevent
- * handles retries and timeouts internally, this means that all attempts of
- * libevent timed out. If we wanted to get more granular information about
- * individual libevent attempts, we would have to implement our own DNS
- * timeout/retry logic */
- rep_hist_note_overload(OVERLOAD_GENERAL);
- }
-
/* Keep track of whether IPv6 is working */
if (type == DNS_IPv6_AAAA) {
if (result == DNS_ERR_TIMEOUT) {
@@ -1659,6 +1649,10 @@ evdns_callback(int result, char type, int count, int ttl, void *addresses,
dns_found_answer(string_address, orig_query_type,
result, &addr, hostname, ttl);
+ /* The result can be changed within this function thus why we note the result
+ * at the end. */
+ rep_hist_note_dns_query(type, result);
+
tor_free(arg_);
}
diff --git a/src/feature/stats/rephist.c b/src/feature/stats/rephist.c
index e25c01331d..c3a281a8c2 100644
--- a/src/feature/stats/rephist.c
+++ b/src/feature/stats/rephist.c
@@ -84,6 +84,8 @@
#include "feature/nodelist/networkstatus_st.h"
#include "core/or/or_circuit_st.h"
+#include <event2/dns.h>
+
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif
@@ -204,6 +206,54 @@ typedef struct {
uint64_t overload_fd_exhausted;
} overload_stats_t;
+/***** DNS statistics *****/
+
+/** Represents the statistics of DNS queries seen if it is an Exit. */
+typedef struct {
+ /** Total number of DNS request seen at an Exit. They might not all end
+ * successfully or might even be lost by tor. This counter is incremented
+ * right before the DNS request is initiated. */
+ uint64_t stats_n_request;
+
+ /** Total number of DNS timeout errors. */
+ uint64_t stats_n_error_timeout;
+
+ /** When is the next assessment time of the general overload for DNS errors.
+ * Once this time is reached, all stats are reset and this time is set to the
+ * next assessment time. */
+ time_t next_assessment_time;
+} overload_dns_stats_t;
+
+/** Keep track of the DNS requests for the general overload state. */
+static overload_dns_stats_t overload_dns_stats;
+
+/* We use a scale here so we can represent percentages with decimal points by
+ * scaling the value by this factor and so 0.5% becomes a value of 500.
+ * Default is 1% and thus min and max range is 0 to 100%. */
+#define OVERLOAD_DNS_TIMEOUT_PERCENT_SCALE 1000.0
+#define OVERLOAD_DNS_TIMEOUT_PERCENT_DEFAULT 1000
+#define OVERLOAD_DNS_TIMEOUT_PERCENT_MIN 0
+#define OVERLOAD_DNS_TIMEOUT_PERCENT_MAX 100000
+
+/** Consensus parameter: indicate what fraction of DNS timeout errors over the
+ * total number of DNS requests must be reached before we trigger a general
+ * overload signal .*/
+static double overload_dns_timeout_fraction =
+ OVERLOAD_DNS_TIMEOUT_PERCENT_DEFAULT /
+ OVERLOAD_DNS_TIMEOUT_PERCENT_SCALE / 100.0;
+
+/* Number of seconds for the assessment period. Default is 10 minutes (600) and
+ * the min max range is within a 32bit value. */
+#define OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_DEFAULT (10 * 60)
+#define OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_MIN 0
+#define OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_MAX INT32_MAX
+
+/** Consensus parameter: Period, in seconds, over which we count the number of
+ * DNS requests and timeout errors. After that period, we assess if we trigger
+ * an overload or not. */
+static int32_t overload_dns_timeout_period_secs =
+ OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_DEFAULT;
+
/** Current state of overload stats */
static overload_stats_t overload_stats;
@@ -218,6 +268,89 @@ overload_happened_recently(time_t overload_time, int n_hours)
return false;
}
+/** Assess the DNS timeout errors and if we have enough to trigger a general
+ * overload. */
+static void
+overload_general_dns_assessment(void)
+{
+ /* Initialize the time. Should be done once. */
+ if (overload_dns_stats.next_assessment_time == 0) {
+ goto reset;
+ }
+
+ /* Not the time yet. */
+ if (overload_dns_stats.next_assessment_time > approx_time()) {
+ return;
+ }
+
+ /* Lets see if we can signal a general overload. */
+ double fraction = (double) overload_dns_stats.stats_n_error_timeout /
+ (double) overload_dns_stats.stats_n_request;
+ if (fraction >= overload_dns_timeout_fraction) {
+ log_notice(LD_HIST, "General overload -> DNS timeouts (%" PRIu64 ") "
+ "fraction %.4f%% is above threshold of %.4f%%",
+ overload_dns_stats.stats_n_error_timeout,
+ fraction * 100.0,
+ overload_dns_timeout_fraction * 100.0);
+ rep_hist_note_overload(OVERLOAD_GENERAL);
+ }
+
+ reset:
+ /* Reset counters for the next period. */
+ overload_dns_stats.stats_n_error_timeout = 0;
+ overload_dns_stats.stats_n_request = 0;
+ overload_dns_stats.next_assessment_time =
+ approx_time() + overload_dns_timeout_period_secs;
+}
+
+/** Called just before the consensus will be replaced. Update the consensus
+ * parameters in case they changed. */
+void
+rep_hist_consensus_has_changed(const networkstatus_t *ns)
+{
+ overload_dns_timeout_fraction =
+ networkstatus_get_param(ns, "overload_dns_timeout_scale_percent",
+ OVERLOAD_DNS_TIMEOUT_PERCENT_DEFAULT,
+ OVERLOAD_DNS_TIMEOUT_PERCENT_MIN,
+ OVERLOAD_DNS_TIMEOUT_PERCENT_MAX) /
+ OVERLOAD_DNS_TIMEOUT_PERCENT_SCALE / 100.0;
+
+ overload_dns_timeout_period_secs =
+ networkstatus_get_param(ns, "overload_dns_timeout_period_secs",
+ OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_DEFAULT,
+ OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_MIN,
+ OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_MAX);
+}
+
+/** Note a DNS error for the given given libevent DNS record type and error
+ * code. Possible types are: DNS_IPv4_A, DNS_PTR, DNS_IPv6_AAAA.
+ *
+ * IMPORTANT: Libevent is _not_ returning the type in case of an error and so
+ * if error is anything but DNS_ERR_NONE, the type is not usable and set to 0.
+ *
+ * See: https://gitlab.torproject.org/tpo/core/tor/-/issues/40490 */
+void
+rep_hist_note_dns_query(int type, uint8_t error)
+{
+ (void) type;
+
+ /* Assess if we need to trigger a general overload with regards to the DNS
+ * errors or not. */
+ overload_general_dns_assessment();
+
+ /* We only care about timeouts for the moment. */
+ switch (error) {
+ case DNS_ERR_TIMEOUT:
+ overload_dns_stats.stats_n_error_timeout++;
+ break;
+ default:
+ break;
+ }
+
+ /* Increment total number of requests. */
+ overload_dns_stats.stats_n_request++;
+}
+
/* The current version of the overload stats version */
#define OVERLOAD_STATS_VERSION 1
diff --git a/src/feature/stats/rephist.h b/src/feature/stats/rephist.h
index d4a2f301cf..749b4996a8 100644
--- a/src/feature/stats/rephist.h
+++ b/src/feature/stats/rephist.h
@@ -72,11 +72,14 @@ void rep_hist_seen_new_rp_cell(bool is_v2);
char *rep_hist_get_hs_v3_stats_string(void);
void rep_hist_hsdir_stored_maybe_new_v3_onion(const uint8_t *blinded_key);
+void rep_hist_note_dns_query(int type, uint8_t error);
+
void rep_hist_free_all(void);
void rep_hist_note_negotiated_link_proto(unsigned link_proto,
int started_here);
void rep_hist_log_link_protocol_counts(void);
+void rep_hist_consensus_has_changed(const networkstatus_t *ns);
extern uint64_t rephist_total_alloc;
extern uint32_t rephist_total_num;
diff --git a/src/test/test_hs_common.c b/src/test/test_hs_common.c
index 347a5b7174..7cb6a36f8e 100644
--- a/src/test/test_hs_common.c
+++ b/src/test/test_hs_common.c
@@ -808,11 +808,13 @@ test_parse_extended_hostname(void *arg)
tt_assert(parse_extended_hostname(address4, &type));
tt_int_op(type, OP_EQ, NORMAL_HOSTNAME);
- tt_assert(!parse_extended_hostname(address5, &type));
- tt_int_op(type, OP_EQ, BAD_HOSTNAME);
+ tt_assert(parse_extended_hostname(address5, &type));
+ tt_int_op(type, OP_EQ, ONION_V2_HOSTNAME);
+ tt_str_op(address5, OP_EQ, "abcdefghijklmnop");
- tt_assert(!parse_extended_hostname(address6, &type));
- tt_int_op(type, OP_EQ, BAD_HOSTNAME);
+ tt_assert(parse_extended_hostname(address6, &type));
+ tt_int_op(type, OP_EQ, ONION_V2_HOSTNAME);
+ tt_str_op(address6, OP_EQ, "abcdefghijklmnop");
tt_assert(!parse_extended_hostname(address7, &type));
tt_int_op(type, OP_EQ, BAD_HOSTNAME);
diff --git a/src/test/test_stats.c b/src/test/test_stats.c
index 081ae22cd5..3b9a192c75 100644
--- a/src/test/test_stats.c
+++ b/src/test/test_stats.c
@@ -51,6 +51,8 @@
#include "feature/stats/bw_array_st.h"
#include "feature/relay/router.h"
+#include "event2/dns.h"
+
/** Run unit tests for some stats code. */
static void
test_stats(void *arg)
@@ -865,6 +867,81 @@ test_overload_stats(void *arg)
tor_free(stats_str);
}
+/** Test the overload stats logic. */
+static void
+test_overload_dns_timeout(void *arg)
+{
+ char *stats_str = NULL;
+ (void) arg;
+
+ /* Lets simulate a series of timeouts but below our default 1% threshold. */
+
+ for (int i = 0; i < 1000; i++) {
+ /* This should trigger 9 timeouts which is just below 1% (10) */
+ if (i > 0 && !(i % 100)) {
+ rep_hist_note_dns_query(0, DNS_ERR_TIMEOUT);
+ } else {
+ rep_hist_note_dns_query(0, DNS_ERR_NONE);
+ }
+ }
+
+ /* No overload yet. */
+ stats_str = rep_hist_get_overload_general_line();
+ tt_assert(!stats_str);
+
+ /* Move it 10 minutes in the future and see if we get a general overload. */
+ update_approx_time(approx_time() + (10 * 60));
+
+ /* This query should NOT trigger the general overload because we are below
+ * our default of 1%. */
+ rep_hist_note_dns_query(0, DNS_ERR_NONE);
+ stats_str = rep_hist_get_overload_general_line();
+ tt_assert(!stats_str);
+
+ /* We'll now go above our 1% threshold. */
+ for (int i = 0; i < 1000; i++) {
+ /* This should trigger 10 timeouts which is our threshold of 1% (10) */
+ if (!(i % 10)) {
+ rep_hist_note_dns_query(0, DNS_ERR_TIMEOUT);
+ } else {
+ rep_hist_note_dns_query(0, DNS_ERR_NONE);
+ }
+ }
+
+ /* Move it 10 minutes in the future and see if we get a general overload. */
+ update_approx_time(approx_time() + (10 * 60));
+
+ /* This query should trigger the general overload because we are above 1%. */
+ rep_hist_note_dns_query(0, DNS_ERR_NONE);
+ stats_str = rep_hist_get_overload_general_line();
+ tt_assert(stats_str);
+ tor_free(stats_str);
+
+ /* Move 72h in the future, we should NOT get an overload anymore. */
+ update_approx_time(approx_time() + (72 * 3600));
+
+ stats_str = rep_hist_get_overload_general_line();
+ tt_assert(!stats_str);
+
+ /* This query should NOT trigger the general overload. */
+ rep_hist_note_dns_query(0, DNS_ERR_TIMEOUT);
+ stats_str = rep_hist_get_overload_general_line();
+ tt_assert(!stats_str);
+
+ /* Move it 10 minutes in the future and see if we get a general overload. We
+ * have now 100% of requests timing out. */
+ update_approx_time(approx_time() + (10 * 60));
+
+ /* This query should trigger the general overload with 50% of timeouts. */
+ rep_hist_note_dns_query(0, DNS_ERR_NONE);
+ stats_str = rep_hist_get_overload_general_line();
+ tt_assert(stats_str);
+ tor_free(stats_str);
+
+ done:
+ tor_free(stats_str);
+}
+
#define ENT(name) \
{ #name, test_ ## name , 0, NULL, NULL }
#define FORK(name) \
@@ -881,6 +958,7 @@ struct testcase_t stats_tests[] = {
FORK(rephist_v3_onions),
FORK(load_stats_file),
FORK(overload_stats),
+ FORK(overload_dns_timeout),
END_OF_TESTCASES
};