From cac612af42798bc76d8933837a9da97ddc039c9b Mon Sep 17 00:00:00 2001 From: David Goulet Date: Tue, 17 Aug 2021 12:43:58 -0400 Subject: dir: Do not flag non-running failing HSDir When a directory request fails, we flag the relay as non Running so we don't use it anymore. This can be problematic with onion services because there are cases where a tor instance could have a lot of services, ephemeral ones, and keeps failing to upload descriptors, let say due to a bad network, and thus flag a lot of nodes as non Running which then in turn can not be used for circuit building. This commit makes it that we never flag nodes as non Running on a onion service directory request (upload or fetch) failure as to keep the hashring intact and not affect other parts of tor. Fortunately, the onion service hashring is _not_ selected by looking at the Running flag but since we do a 3-hop circuit to the HSDir, other services on the same instance can influence each other by removing nodes from the consensus for path selection. This was made apparent with a small network that ran out of nodes to used due to rapid succession of onion services uploading and failing. See #40434 for details. Fixes #40434 Signed-off-by: David Goulet --- changes/ticket40434 | 6 ++++++ src/feature/dirclient/dirclient.c | 17 ++++++++++++++++- src/feature/dircommon/directory.h | 6 ++++++ 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 changes/ticket40434 diff --git a/changes/ticket40434 b/changes/ticket40434 new file mode 100644 index 0000000000..988bb416be --- /dev/null +++ b/changes/ticket40434 @@ -0,0 +1,6 @@ + o Minor bugfix (onion service): + - Do not flag an HSDir as non-running in case the descriptor upload or + fetch fails. An onion service closes pending directory connections + before uploading a new descriptor which can thus lead to wrongly + flagging many relays and thus affecting circuit building path selection. + Fixes bug 40434; bugfix on 0.2.0.13-alpha. diff --git a/src/feature/dirclient/dirclient.c b/src/feature/dirclient/dirclient.c index a5dd856729..f2e1e5b5ff 100644 --- a/src/feature/dirclient/dirclient.c +++ b/src/feature/dirclient/dirclient.c @@ -738,7 +738,22 @@ connection_dir_client_request_failed(dir_connection_t *conn) return; /* this was a test fetch. don't retry. */ } if (!entry_list_is_constrained(get_options())) - router_set_status(conn->identity_digest, 0); /* don't try this one again */ + /* We must not set a directory to non-running for HS purposes else we end + * up flagging nodes from the hashring has unusable. It doesn't have direct + * effect on the HS subsystem because the nodes are selected regardless of + * their status but still, we shouldn't flag them as non running. + * + * One example where this can go bad is if a tor instance gets added a lot + * of ephemeral services and with a network with problem then many nodes in + * the consenus ends up unusable. + * + * Furthermore, a service does close any pending directory connections + * before uploading a descriptor and thus we can end up here in a natural + * way since closing a pending directory connection leads to this code + * path. */ + if (!DIR_PURPOSE_IS_HS(TO_CONN(conn)->purpose)) { + router_set_status(conn->identity_digest, 0); + } if (conn->base_.purpose == DIR_PURPOSE_FETCH_SERVERDESC || conn->base_.purpose == DIR_PURPOSE_FETCH_EXTRAINFO) { log_info(LD_DIR, "Giving up on serverdesc/extrainfo fetch from " diff --git a/src/feature/dircommon/directory.h b/src/feature/dircommon/directory.h index 0aa2ff53ef..2cd9c176c8 100644 --- a/src/feature/dircommon/directory.h +++ b/src/feature/dircommon/directory.h @@ -87,6 +87,12 @@ const dir_connection_t *CONST_TO_DIR_CONN(const connection_t *c); (p)==DIR_PURPOSE_UPLOAD_RENDDESC_V2 || \ (p)==DIR_PURPOSE_UPLOAD_HSDESC) +/** True iff p is a purpose corresponding to onion service that is either + * uploading or fetching actions. */ +#define DIR_PURPOSE_IS_HS(p) \ + ((p) == DIR_PURPOSE_FETCH_HSDESC || \ + (p) == DIR_PURPOSE_UPLOAD_HSDESC) + enum compress_method_t; int parse_http_response(const char *headers, int *code, time_t *date, enum compress_method_t *compression, char **response); -- cgit v1.2.3-54-g00ecf