aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDavid Goulet <dgoulet@torproject.org>2021-08-17 12:43:58 -0400
committerDavid Goulet <dgoulet@torproject.org>2021-08-17 13:27:14 -0400
commitcac612af42798bc76d8933837a9da97ddc039c9b (patch)
tree6fa03b1f282d3c5c2556714db4a2006c96b944ae /src
parentda9ff3936d71a607a7af5b07f87d18a4aa303060 (diff)
downloadtor-cac612af42798bc76d8933837a9da97ddc039c9b.tar.gz
tor-cac612af42798bc76d8933837a9da97ddc039c9b.zip
dir: Do not flag non-running failing HSDir
When a directory request fails, we flag the relay as non Running so we don't use it anymore. This can be problematic with onion services because there are cases where a tor instance could have a lot of services, ephemeral ones, and keeps failing to upload descriptors, let say due to a bad network, and thus flag a lot of nodes as non Running which then in turn can not be used for circuit building. This commit makes it that we never flag nodes as non Running on a onion service directory request (upload or fetch) failure as to keep the hashring intact and not affect other parts of tor. Fortunately, the onion service hashring is _not_ selected by looking at the Running flag but since we do a 3-hop circuit to the HSDir, other services on the same instance can influence each other by removing nodes from the consensus for path selection. This was made apparent with a small network that ran out of nodes to used due to rapid succession of onion services uploading and failing. See #40434 for details. Fixes #40434 Signed-off-by: David Goulet <dgoulet@torproject.org>
Diffstat (limited to 'src')
-rw-r--r--src/feature/dirclient/dirclient.c17
-rw-r--r--src/feature/dircommon/directory.h6
2 files changed, 22 insertions, 1 deletions
diff --git a/src/feature/dirclient/dirclient.c b/src/feature/dirclient/dirclient.c
index a5dd856729..f2e1e5b5ff 100644
--- a/src/feature/dirclient/dirclient.c
+++ b/src/feature/dirclient/dirclient.c
@@ -738,7 +738,22 @@ connection_dir_client_request_failed(dir_connection_t *conn)
return; /* this was a test fetch. don't retry. */
}
if (!entry_list_is_constrained(get_options()))
- router_set_status(conn->identity_digest, 0); /* don't try this one again */
+ /* We must not set a directory to non-running for HS purposes else we end
+ * up flagging nodes from the hashring has unusable. It doesn't have direct
+ * effect on the HS subsystem because the nodes are selected regardless of
+ * their status but still, we shouldn't flag them as non running.
+ *
+ * One example where this can go bad is if a tor instance gets added a lot
+ * of ephemeral services and with a network with problem then many nodes in
+ * the consenus ends up unusable.
+ *
+ * Furthermore, a service does close any pending directory connections
+ * before uploading a descriptor and thus we can end up here in a natural
+ * way since closing a pending directory connection leads to this code
+ * path. */
+ if (!DIR_PURPOSE_IS_HS(TO_CONN(conn)->purpose)) {
+ router_set_status(conn->identity_digest, 0);
+ }
if (conn->base_.purpose == DIR_PURPOSE_FETCH_SERVERDESC ||
conn->base_.purpose == DIR_PURPOSE_FETCH_EXTRAINFO) {
log_info(LD_DIR, "Giving up on serverdesc/extrainfo fetch from "
diff --git a/src/feature/dircommon/directory.h b/src/feature/dircommon/directory.h
index 0aa2ff53ef..2cd9c176c8 100644
--- a/src/feature/dircommon/directory.h
+++ b/src/feature/dircommon/directory.h
@@ -87,6 +87,12 @@ const dir_connection_t *CONST_TO_DIR_CONN(const connection_t *c);
(p)==DIR_PURPOSE_UPLOAD_RENDDESC_V2 || \
(p)==DIR_PURPOSE_UPLOAD_HSDESC)
+/** True iff p is a purpose corresponding to onion service that is either
+ * uploading or fetching actions. */
+#define DIR_PURPOSE_IS_HS(p) \
+ ((p) == DIR_PURPOSE_FETCH_HSDESC || \
+ (p) == DIR_PURPOSE_UPLOAD_HSDESC)
+
enum compress_method_t;
int parse_http_response(const char *headers, int *code, time_t *date,
enum compress_method_t *compression, char **response);