From af48f5736aec6ae67b52bc26e3cbb74d47dd8db1 Mon Sep 17 00:00:00 2001 From: David Goulet Date: Tue, 19 Oct 2021 10:55:29 -0400 Subject: hs: Fix merge conflicts after merging forward 40476 Signed-off-by: David Goulet --- changes/ticket40476 | 11 +++-------- src/core/or/connection_edge.c | 28 +++++++++++++++++++++++++++- src/feature/dircache/dircache.c | 2 -- src/test/test_hs_common.c | 10 ++++++---- 4 files changed, 36 insertions(+), 15 deletions(-) diff --git a/changes/ticket40476 b/changes/ticket40476 index 062e36f9bc..86e4377a1c 100644 --- a/changes/ticket40476 +++ b/changes/ticket40476 @@ -1,8 +1,3 @@ - o Major feature (onion service v2): - - Tor does NOT allow anymore to create v2 services, to connect as a client - to a v2 service and for a relay to be a v2 HSDir or introduction point. - This effectively disable onion service version 2 tor wide. Closes 40476. - - The control port command HSFETCH and HSPOST don't allow version 2 as well. - It is also not possible to create a v2 service with ADD_ONION. - - See https://blog.torproject.org/v2-deprecation-timeline for details on - how to transition from v2 to v3. + o Minor bugfix (onion service): + - Improve logging when a bad HS version is given. Fixes bug 40476; bugfix on + 0.4.6.1-alpha. diff --git a/src/core/or/connection_edge.c b/src/core/or/connection_edge.c index 60a2f88ccb..d3979b3a7e 100644 --- a/src/core/or/connection_edge.c +++ b/src/core/or/connection_edge.c @@ -1612,6 +1612,23 @@ consider_plaintext_ports(entry_connection_t *conn, uint16_t port) return 0; } +/** Return true iff query is a syntactically valid service ID (as + * generated by rend_get_service_id). */ +static int +rend_valid_v2_service_id(const char *query) +{ + /** Length of 'y' portion of 'y.onion' URL. */ +#define REND_SERVICE_ID_LEN_BASE32 16 + + if (strlen(query) != REND_SERVICE_ID_LEN_BASE32) + return 0; + + if (strspn(query, BASE32_CHARS) != REND_SERVICE_ID_LEN_BASE32) + return 0; + + return 1; +} + /** Parse the given hostname in address. Returns true if the parsing was * successful and type_out contains the type of the hostname. Else, false is * returned which means it was not recognized and type_out is set to @@ -1675,6 +1692,14 @@ parse_extended_hostname(char *address, hostname_type_t *type_out) if (q != address) { memmove(address, q, strlen(q) + 1 /* also get \0 */); } + /* v2 onion address check. */ + if (strlen(query) == REND_SERVICE_ID_LEN_BASE32) { + *type_out = ONION_V2_HOSTNAME; + if (rend_valid_v2_service_id(query)) { + goto success; + } + goto failed; + } /* v3 onion address check. */ if (strlen(query) == HS_SERVICE_ADDR_LEN_BASE32) { @@ -1694,7 +1719,8 @@ parse_extended_hostname(char *address, hostname_type_t *type_out) failed: /* otherwise, return to previous state and return 0 */ *s = '.'; - const bool is_onion = (*type_out == ONION_V3_HOSTNAME); + const bool is_onion = (*type_out == ONION_V2_HOSTNAME) || + (*type_out == ONION_V3_HOSTNAME); log_warn(LD_APP, "Invalid %shostname %s; rejecting", is_onion ? "onion " : "", safe_str_client(address)); diff --git a/src/feature/dircache/dircache.c b/src/feature/dircache/dircache.c index 7319b96caf..7fdb1bc70f 100644 --- a/src/feature/dircache/dircache.c +++ b/src/feature/dircache/dircache.c @@ -1569,8 +1569,6 @@ directory_handle_command_post,(dir_connection_t *conn, const char *headers, char *url = NULL; const or_options_t *options = get_options(); - (void) body_len; - log_debug(LD_DIRSERV,"Received POST command."); conn->base_.state = DIR_CONN_STATE_SERVER_WRITING; diff --git a/src/test/test_hs_common.c b/src/test/test_hs_common.c index 347a5b7174..7cb6a36f8e 100644 --- a/src/test/test_hs_common.c +++ b/src/test/test_hs_common.c @@ -808,11 +808,13 @@ test_parse_extended_hostname(void *arg) tt_assert(parse_extended_hostname(address4, &type)); tt_int_op(type, OP_EQ, NORMAL_HOSTNAME); - tt_assert(!parse_extended_hostname(address5, &type)); - tt_int_op(type, OP_EQ, BAD_HOSTNAME); + tt_assert(parse_extended_hostname(address5, &type)); + tt_int_op(type, OP_EQ, ONION_V2_HOSTNAME); + tt_str_op(address5, OP_EQ, "abcdefghijklmnop"); - tt_assert(!parse_extended_hostname(address6, &type)); - tt_int_op(type, OP_EQ, BAD_HOSTNAME); + tt_assert(parse_extended_hostname(address6, &type)); + tt_int_op(type, OP_EQ, ONION_V2_HOSTNAME); + tt_str_op(address6, OP_EQ, "abcdefghijklmnop"); tt_assert(!parse_extended_hostname(address7, &type)); tt_int_op(type, OP_EQ, BAD_HOSTNAME); -- cgit v1.2.3-54-g00ecf From de907893befdb2bf6bb0b95b183cc1f01b8a464c Mon Sep 17 00:00:00 2001 From: David Goulet Date: Tue, 19 Oct 2021 14:13:33 -0400 Subject: relay: Overload state on DNS timeout is now X% over Y secs With this commit, we will only report a general overload state if we've seen more than X% of DNS timeout errors over Y seconds. Previous behavior was to report when a single timeout occured which is really too small of a threshold. The value X is a consensus parameters called "overload_dns_timeout_scale_percent" which is a scaled percentage (factor of 1000) so we can represent decimal points for X like 0.5% for instance. Its default is 1000 which ends up being 1%. The value Y is a consensus parameters called "overload_dns_timeout_period_secs" which is the time period for which will gather DNS errors and once over, we assess if that X% has been reached ultimately triggering a general overload signal. Closes #40491 Signed-off-by: David Goulet --- src/feature/nodelist/networkstatus.c | 2 + src/feature/relay/dns.c | 14 ++-- src/feature/stats/rephist.c | 133 +++++++++++++++++++++++++++++++++++ src/feature/stats/rephist.h | 3 + 4 files changed, 142 insertions(+), 10 deletions(-) diff --git a/src/feature/nodelist/networkstatus.c b/src/feature/nodelist/networkstatus.c index 2ffa6da1a3..af808a6ba7 100644 --- a/src/feature/nodelist/networkstatus.c +++ b/src/feature/nodelist/networkstatus.c @@ -103,6 +103,7 @@ #include "feature/dirauth/vote_microdesc_hash_st.h" #include "feature/nodelist/vote_routerstatus_st.h" #include "feature/nodelist/routerstatus_st.h" +#include "feature/stats/rephist.h" #ifdef HAVE_UNISTD_H #include @@ -1663,6 +1664,7 @@ notify_before_networkstatus_changes(const networkstatus_t *old_c, dos_consensus_has_changed(new_c); relay_consensus_has_changed(new_c); hs_dos_consensus_has_changed(new_c); + rep_hist_consensus_has_changed(new_c); } /* Called after a new consensus has been put in the global state. It is safe diff --git a/src/feature/relay/dns.c b/src/feature/relay/dns.c index 22f929808e..ed8d235e92 100644 --- a/src/feature/relay/dns.c +++ b/src/feature/relay/dns.c @@ -1548,16 +1548,6 @@ evdns_callback(int result, char type, int count, int ttl, void *addresses, tor_addr_make_unspec(&addr); - /* Note down any DNS errors to the statistics module */ - if (result == DNS_ERR_TIMEOUT) { - /* libevent timed out while resolving a name. However, because libevent - * handles retries and timeouts internally, this means that all attempts of - * libevent timed out. If we wanted to get more granular information about - * individual libevent attempts, we would have to implement our own DNS - * timeout/retry logic */ - rep_hist_note_overload(OVERLOAD_GENERAL); - } - /* Keep track of whether IPv6 is working */ if (type == DNS_IPv6_AAAA) { if (result == DNS_ERR_TIMEOUT) { @@ -1659,6 +1649,10 @@ evdns_callback(int result, char type, int count, int ttl, void *addresses, dns_found_answer(string_address, orig_query_type, result, &addr, hostname, ttl); + /* The result can be changed within this function thus why we note the result + * at the end. */ + rep_hist_note_dns_query(type, result); + tor_free(arg_); } diff --git a/src/feature/stats/rephist.c b/src/feature/stats/rephist.c index e25c01331d..c3a281a8c2 100644 --- a/src/feature/stats/rephist.c +++ b/src/feature/stats/rephist.c @@ -84,6 +84,8 @@ #include "feature/nodelist/networkstatus_st.h" #include "core/or/or_circuit_st.h" +#include + #ifdef HAVE_FCNTL_H #include #endif @@ -204,6 +206,54 @@ typedef struct { uint64_t overload_fd_exhausted; } overload_stats_t; +/***** DNS statistics *****/ + +/** Represents the statistics of DNS queries seen if it is an Exit. */ +typedef struct { + /** Total number of DNS request seen at an Exit. They might not all end + * successfully or might even be lost by tor. This counter is incremented + * right before the DNS request is initiated. */ + uint64_t stats_n_request; + + /** Total number of DNS timeout errors. */ + uint64_t stats_n_error_timeout; + + /** When is the next assessment time of the general overload for DNS errors. + * Once this time is reached, all stats are reset and this time is set to the + * next assessment time. */ + time_t next_assessment_time; +} overload_dns_stats_t; + +/** Keep track of the DNS requests for the general overload state. */ +static overload_dns_stats_t overload_dns_stats; + +/* We use a scale here so we can represent percentages with decimal points by + * scaling the value by this factor and so 0.5% becomes a value of 500. + * Default is 1% and thus min and max range is 0 to 100%. */ +#define OVERLOAD_DNS_TIMEOUT_PERCENT_SCALE 1000.0 +#define OVERLOAD_DNS_TIMEOUT_PERCENT_DEFAULT 1000 +#define OVERLOAD_DNS_TIMEOUT_PERCENT_MIN 0 +#define OVERLOAD_DNS_TIMEOUT_PERCENT_MAX 100000 + +/** Consensus parameter: indicate what fraction of DNS timeout errors over the + * total number of DNS requests must be reached before we trigger a general + * overload signal .*/ +static double overload_dns_timeout_fraction = + OVERLOAD_DNS_TIMEOUT_PERCENT_DEFAULT / + OVERLOAD_DNS_TIMEOUT_PERCENT_SCALE / 100.0; + +/* Number of seconds for the assessment period. Default is 10 minutes (600) and + * the min max range is within a 32bit value. */ +#define OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_DEFAULT (10 * 60) +#define OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_MIN 0 +#define OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_MAX INT32_MAX + +/** Consensus parameter: Period, in seconds, over which we count the number of + * DNS requests and timeout errors. After that period, we assess if we trigger + * an overload or not. */ +static int32_t overload_dns_timeout_period_secs = + OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_DEFAULT; + /** Current state of overload stats */ static overload_stats_t overload_stats; @@ -218,6 +268,89 @@ overload_happened_recently(time_t overload_time, int n_hours) return false; } +/** Assess the DNS timeout errors and if we have enough to trigger a general + * overload. */ +static void +overload_general_dns_assessment(void) +{ + /* Initialize the time. Should be done once. */ + if (overload_dns_stats.next_assessment_time == 0) { + goto reset; + } + + /* Not the time yet. */ + if (overload_dns_stats.next_assessment_time > approx_time()) { + return; + } + + /* Lets see if we can signal a general overload. */ + double fraction = (double) overload_dns_stats.stats_n_error_timeout / + (double) overload_dns_stats.stats_n_request; + if (fraction >= overload_dns_timeout_fraction) { + log_notice(LD_HIST, "General overload -> DNS timeouts (%" PRIu64 ") " + "fraction %.4f%% is above threshold of %.4f%%", + overload_dns_stats.stats_n_error_timeout, + fraction * 100.0, + overload_dns_timeout_fraction * 100.0); + rep_hist_note_overload(OVERLOAD_GENERAL); + } + + reset: + /* Reset counters for the next period. */ + overload_dns_stats.stats_n_error_timeout = 0; + overload_dns_stats.stats_n_request = 0; + overload_dns_stats.next_assessment_time = + approx_time() + overload_dns_timeout_period_secs; +} + +/** Called just before the consensus will be replaced. Update the consensus + * parameters in case they changed. */ +void +rep_hist_consensus_has_changed(const networkstatus_t *ns) +{ + overload_dns_timeout_fraction = + networkstatus_get_param(ns, "overload_dns_timeout_scale_percent", + OVERLOAD_DNS_TIMEOUT_PERCENT_DEFAULT, + OVERLOAD_DNS_TIMEOUT_PERCENT_MIN, + OVERLOAD_DNS_TIMEOUT_PERCENT_MAX) / + OVERLOAD_DNS_TIMEOUT_PERCENT_SCALE / 100.0; + + overload_dns_timeout_period_secs = + networkstatus_get_param(ns, "overload_dns_timeout_period_secs", + OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_DEFAULT, + OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_MIN, + OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_MAX); +} + +/** Note a DNS error for the given given libevent DNS record type and error + * code. Possible types are: DNS_IPv4_A, DNS_PTR, DNS_IPv6_AAAA. + * + * IMPORTANT: Libevent is _not_ returning the type in case of an error and so + * if error is anything but DNS_ERR_NONE, the type is not usable and set to 0. + * + * See: https://gitlab.torproject.org/tpo/core/tor/-/issues/40490 */ +void +rep_hist_note_dns_query(int type, uint8_t error) +{ + (void) type; + + /* Assess if we need to trigger a general overload with regards to the DNS + * errors or not. */ + overload_general_dns_assessment(); + + /* We only care about timeouts for the moment. */ + switch (error) { + case DNS_ERR_TIMEOUT: + overload_dns_stats.stats_n_error_timeout++; + break; + default: + break; + } + + /* Increment total number of requests. */ + overload_dns_stats.stats_n_request++; +} + /* The current version of the overload stats version */ #define OVERLOAD_STATS_VERSION 1 diff --git a/src/feature/stats/rephist.h b/src/feature/stats/rephist.h index d4a2f301cf..749b4996a8 100644 --- a/src/feature/stats/rephist.h +++ b/src/feature/stats/rephist.h @@ -72,11 +72,14 @@ void rep_hist_seen_new_rp_cell(bool is_v2); char *rep_hist_get_hs_v3_stats_string(void); void rep_hist_hsdir_stored_maybe_new_v3_onion(const uint8_t *blinded_key); +void rep_hist_note_dns_query(int type, uint8_t error); + void rep_hist_free_all(void); void rep_hist_note_negotiated_link_proto(unsigned link_proto, int started_here); void rep_hist_log_link_protocol_counts(void); +void rep_hist_consensus_has_changed(const networkstatus_t *ns); extern uint64_t rephist_total_alloc; extern uint32_t rephist_total_num; -- cgit v1.2.3-54-g00ecf From 996409c9c4691fbb6cccaba11ceb84c1a5ef7906 Mon Sep 17 00:00:00 2001 From: David Goulet Date: Tue, 19 Oct 2021 15:07:11 -0400 Subject: test: Add unit tests for DNS timeout overload state Signed-off-by: David Goulet --- src/test/test_stats.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/src/test/test_stats.c b/src/test/test_stats.c index 081ae22cd5..3b9a192c75 100644 --- a/src/test/test_stats.c +++ b/src/test/test_stats.c @@ -51,6 +51,8 @@ #include "feature/stats/bw_array_st.h" #include "feature/relay/router.h" +#include "event2/dns.h" + /** Run unit tests for some stats code. */ static void test_stats(void *arg) @@ -865,6 +867,81 @@ test_overload_stats(void *arg) tor_free(stats_str); } +/** Test the overload stats logic. */ +static void +test_overload_dns_timeout(void *arg) +{ + char *stats_str = NULL; + (void) arg; + + /* Lets simulate a series of timeouts but below our default 1% threshold. */ + + for (int i = 0; i < 1000; i++) { + /* This should trigger 9 timeouts which is just below 1% (10) */ + if (i > 0 && !(i % 100)) { + rep_hist_note_dns_query(0, DNS_ERR_TIMEOUT); + } else { + rep_hist_note_dns_query(0, DNS_ERR_NONE); + } + } + + /* No overload yet. */ + stats_str = rep_hist_get_overload_general_line(); + tt_assert(!stats_str); + + /* Move it 10 minutes in the future and see if we get a general overload. */ + update_approx_time(approx_time() + (10 * 60)); + + /* This query should NOT trigger the general overload because we are below + * our default of 1%. */ + rep_hist_note_dns_query(0, DNS_ERR_NONE); + stats_str = rep_hist_get_overload_general_line(); + tt_assert(!stats_str); + + /* We'll now go above our 1% threshold. */ + for (int i = 0; i < 1000; i++) { + /* This should trigger 10 timeouts which is our threshold of 1% (10) */ + if (!(i % 10)) { + rep_hist_note_dns_query(0, DNS_ERR_TIMEOUT); + } else { + rep_hist_note_dns_query(0, DNS_ERR_NONE); + } + } + + /* Move it 10 minutes in the future and see if we get a general overload. */ + update_approx_time(approx_time() + (10 * 60)); + + /* This query should trigger the general overload because we are above 1%. */ + rep_hist_note_dns_query(0, DNS_ERR_NONE); + stats_str = rep_hist_get_overload_general_line(); + tt_assert(stats_str); + tor_free(stats_str); + + /* Move 72h in the future, we should NOT get an overload anymore. */ + update_approx_time(approx_time() + (72 * 3600)); + + stats_str = rep_hist_get_overload_general_line(); + tt_assert(!stats_str); + + /* This query should NOT trigger the general overload. */ + rep_hist_note_dns_query(0, DNS_ERR_TIMEOUT); + stats_str = rep_hist_get_overload_general_line(); + tt_assert(!stats_str); + + /* Move it 10 minutes in the future and see if we get a general overload. We + * have now 100% of requests timing out. */ + update_approx_time(approx_time() + (10 * 60)); + + /* This query should trigger the general overload with 50% of timeouts. */ + rep_hist_note_dns_query(0, DNS_ERR_NONE); + stats_str = rep_hist_get_overload_general_line(); + tt_assert(stats_str); + tor_free(stats_str); + + done: + tor_free(stats_str); +} + #define ENT(name) \ { #name, test_ ## name , 0, NULL, NULL } #define FORK(name) \ @@ -881,6 +958,7 @@ struct testcase_t stats_tests[] = { FORK(rephist_v3_onions), FORK(load_stats_file), FORK(overload_stats), + FORK(overload_dns_timeout), END_OF_TESTCASES }; -- cgit v1.2.3-54-g00ecf From caa305a5adec8a0a50006d1bed3e6b019e016dd4 Mon Sep 17 00:00:00 2001 From: David Goulet Date: Wed, 20 Oct 2021 09:15:51 -0400 Subject: changes: Add file for ticket 40491 Signed-off-by: David Goulet --- changes/ticket40491 | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 changes/ticket40491 diff --git a/changes/ticket40491 b/changes/ticket40491 new file mode 100644 index 0000000000..01c6c7d748 --- /dev/null +++ b/changes/ticket40491 @@ -0,0 +1,7 @@ + o Major bugfixes (relay, overload state): + - Report the general overload state for DNS timeout errors only if X% of all + DNS queries over Y seconds are errors. Before that, it only took 1 timeout + to report the overload state which was just too low of a threshold. The X + and Y values are 1% and 10 minutes respectively but they are also + controlled by consensus parameters. Fixes bug 40491; bugfix on + 0.4.6.1-alpha. -- cgit v1.2.3-54-g00ecf