diff options
author | Nick Mathewson <nickm@torproject.org> | 2018-07-05 16:31:38 -0400 |
---|---|---|
committer | Nick Mathewson <nickm@torproject.org> | 2018-07-05 17:15:50 -0400 |
commit | 63b4ea22af8e8314dd718f02046de5f4b91edf9d (patch) | |
tree | af52b6fba37f22c86447fd5267dd5eb557807c8b /src/feature/dircache | |
parent | ce84200542f48a92e8b56a8d032401ecd153e90c (diff) | |
download | tor-63b4ea22af8e8314dd718f02046de5f4b91edf9d.tar.gz tor-63b4ea22af8e8314dd718f02046de5f4b91edf9d.zip |
Move literally everything out of src/or
This commit won't build yet -- it just puts everything in a slightly
more logical place.
The reasoning here is that "src/core" will hold the stuff that every (or
nearly every) tor instance will need in order to do onion routing.
Other features (including some necessary ones) will live in
"src/feature". The "src/app" directory will hold the stuff needed
to have Tor be an application you can actually run.
This commit DOES NOT refactor the former contents of src/or into a
logical set of acyclic libraries, or change any code at all. That
will have to come in the future.
We will continue to move things around and split them in the future,
but I hope this lays a reasonable groundwork for doing so.
Diffstat (limited to 'src/feature/dircache')
-rw-r--r-- | src/feature/dircache/cached_dir_st.h | 25 | ||||
-rw-r--r-- | src/feature/dircache/conscache.c | 627 | ||||
-rw-r--r-- | src/feature/dircache/conscache.h | 66 | ||||
-rw-r--r-- | src/feature/dircache/consdiffmgr.c | 1945 | ||||
-rw-r--r-- | src/feature/dircache/consdiffmgr.h | 75 | ||||
-rw-r--r-- | src/feature/dircache/directory.c | 5966 | ||||
-rw-r--r-- | src/feature/dircache/directory.h | 347 | ||||
-rw-r--r-- | src/feature/dircache/dirserv.c | 3598 | ||||
-rw-r--r-- | src/feature/dircache/dirserv.h | 239 |
9 files changed, 12888 insertions, 0 deletions
diff --git a/src/feature/dircache/cached_dir_st.h b/src/feature/dircache/cached_dir_st.h new file mode 100644 index 0000000000..38ae86d975 --- /dev/null +++ b/src/feature/dircache/cached_dir_st.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2001 Matej Pfajfar. + * Copyright (c) 2001-2004, Roger Dingledine. + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#ifndef CACHED_DIR_ST_H +#define CACHED_DIR_ST_H + +/** A cached_dir_t represents a cacheable directory object, along with its + * compressed form. */ +struct cached_dir_t { + char *dir; /**< Contents of this object, NUL-terminated. */ + char *dir_compressed; /**< Compressed contents of this object. */ + size_t dir_len; /**< Length of <b>dir</b> (not counting its NUL). */ + size_t dir_compressed_len; /**< Length of <b>dir_compressed</b>. */ + time_t published; /**< When was this object published. */ + common_digests_t digests; /**< Digests of this object (networkstatus only) */ + /** Sha3 digest (also ns only) */ + uint8_t digest_sha3_as_signed[DIGEST256_LEN]; + int refcnt; /**< Reference count for this cached_dir_t. */ +}; + +#endif + diff --git a/src/feature/dircache/conscache.c b/src/feature/dircache/conscache.c new file mode 100644 index 0000000000..bc5928ff23 --- /dev/null +++ b/src/feature/dircache/conscache.c @@ -0,0 +1,627 @@ +/* Copyright (c) 2017-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#include "or/or.h" + +#include "or/config.h" +#include "or/conscache.h" +#include "lib/crypt_ops/crypto_util.h" +#include "lib/fs/storagedir.h" +#include "lib/encoding/confline.h" + +#define CCE_MAGIC 0x17162253 + +#ifdef _WIN32 +/* On Windows, unlink won't work on a file if the file is actively mmap()ed. + * That forces us to be less aggressive about unlinking files, and causes other + * changes throughout our logic. + */ +#define MUST_UNMAP_TO_UNLINK +#endif /* defined(_WIN32) */ + +/** + * A consensus_cache_entry_t is a reference-counted handle to an + * item in a consensus_cache_t. It can be mmapped into RAM, or not, + * depending whether it's currently in use. + */ +struct consensus_cache_entry_t { + uint32_t magic; /**< Must be set to CCE_MAGIC */ + HANDLE_ENTRY(consensus_cache_entry, consensus_cache_entry_t); + int32_t refcnt; /**< Reference count. */ + unsigned can_remove : 1; /**< If true, we want to delete this file. */ + /** If true, we intend to unmap this file as soon as we're done with it. */ + unsigned release_aggressively : 1; + + /** Filename for this object within the storage_dir_t */ + char *fname; + /** Labels associated with this object. Immutable once the object + * is created. */ + config_line_t *labels; + /** Pointer to the cache that includes this entry (if any). */ + consensus_cache_t *in_cache; + + /** Since what time has this object been mapped into RAM, but with the cache + * being the only having a reference to it? */ + time_t unused_since; + /** mmaped contents of the underlying file. May be NULL */ + tor_mmap_t *map; + /** Length of the body within <b>map</b>. */ + size_t bodylen; + /** Pointer to the body within <b>map</b>. */ + const uint8_t *body; +}; + +/** + * A consensus_cache_t holds a directory full of labeled items. + */ +struct consensus_cache_t { + /** Underling storage_dir_t to handle persistence */ + storage_dir_t *dir; + /** List of all the entries in the directory. */ + smartlist_t *entries; + + /** The maximum number of entries that we'd like to allow in this cache. + * This is the same as the storagedir limit when MUST_UNMAP_TO_UNLINK is + * not defined. */ + unsigned max_entries; +}; + +static void consensus_cache_clear(consensus_cache_t *cache); +static void consensus_cache_rescan(consensus_cache_t *); +static void consensus_cache_entry_map(consensus_cache_t *, + consensus_cache_entry_t *); +static void consensus_cache_entry_unmap(consensus_cache_entry_t *ent); + +/** + * Helper: Open a consensus cache in subdirectory <b>subdir</b> of the + * data directory, to hold up to <b>max_entries</b> of data. + */ +consensus_cache_t * +consensus_cache_open(const char *subdir, int max_entries) +{ + int storagedir_max_entries; + consensus_cache_t *cache = tor_malloc_zero(sizeof(consensus_cache_t)); + char *directory = get_cachedir_fname(subdir); + cache->max_entries = max_entries; + +#ifdef MUST_UNMAP_TO_UNLINK + /* If we can't unlink the files that we're still using, then we need to + * tell the storagedir backend to allow far more files than this consensus + * cache actually wants, so that it can hold files which, from this cache's + * perspective, have become useless. + */ +#define VERY_LARGE_STORAGEDIR_LIMIT (1000*1000) + storagedir_max_entries = VERY_LARGE_STORAGEDIR_LIMIT; +#else /* !(defined(MUST_UNMAP_TO_UNLINK)) */ + /* Otherwise, we can just tell the storagedir to use the same limits + * as this cache. */ + storagedir_max_entries = max_entries; +#endif /* defined(MUST_UNMAP_TO_UNLINK) */ + + cache->dir = storage_dir_new(directory, storagedir_max_entries); + tor_free(directory); + if (!cache->dir) { + tor_free(cache); + return NULL; + } + + consensus_cache_rescan(cache); + return cache; +} + +/** Return true if it's okay to put more entries in this cache than + * its official file limit. + * + * (We need this method on Windows, where we can't unlink files that are still + * in use, and therefore might need to temporarily exceed the file limit until + * the no-longer-wanted files are deletable.) + */ +int +consensus_cache_may_overallocate(consensus_cache_t *cache) +{ + (void) cache; +#ifdef MUST_UNMAP_TO_UNLINK + return 1; +#else + return 0; +#endif +} + +/** + * Tell the sandbox (if any) configured by <b>cfg</b> to allow the + * operations that <b>cache</b> will need. + */ +int +consensus_cache_register_with_sandbox(consensus_cache_t *cache, + struct sandbox_cfg_elem **cfg) +{ +#ifdef MUST_UNMAP_TO_UNLINK + /* Our Linux sandbox doesn't support huge file lists like the one that would + * be generated by using VERY_LARGE_STORAGEDIR_LIMIT above in + * consensus_cache_open(). Since the Linux sandbox is the only one we have + * right now, we just assert that we never reach this point when we've had + * to use VERY_LARGE_STORAGEDIR_LIMIT. + * + * If at some point in the future we have a different sandbox mechanism that + * can handle huge file lists, we can remove this assertion or make it + * conditional. + */ + tor_assert_nonfatal_unreached(); +#endif /* defined(MUST_UNMAP_TO_UNLINK) */ + return storage_dir_register_with_sandbox(cache->dir, cfg); +} + +/** + * Helper: clear all entries from <b>cache</b> (but do not delete + * any that aren't marked for removal + */ +static void +consensus_cache_clear(consensus_cache_t *cache) +{ + consensus_cache_delete_pending(cache, 0); + + SMARTLIST_FOREACH_BEGIN(cache->entries, consensus_cache_entry_t *, ent) { + ent->in_cache = NULL; + consensus_cache_entry_decref(ent); + } SMARTLIST_FOREACH_END(ent); + smartlist_free(cache->entries); + cache->entries = NULL; +} + +/** + * Drop all storage held by <b>cache</b>. + */ +void +consensus_cache_free_(consensus_cache_t *cache) +{ + if (! cache) + return; + + if (cache->entries) { + consensus_cache_clear(cache); + } + storage_dir_free(cache->dir); + tor_free(cache); +} + +/** + * Write <b>datalen</b> bytes of data at <b>data</b> into the <b>cache</b>, + * labeling that data with <b>labels</b>. On failure, return NULL. On + * success, return a newly created consensus_cache_entry_t. + * + * The returned value will be owned by the cache, and you will have a + * reference to it. Call consensus_cache_entry_decref() when you are + * done with it. + * + * The provided <b>labels</b> MUST have distinct keys: if they don't, + * this API does not specify which values (if any) for the duplicate keys + * will be considered. + */ +consensus_cache_entry_t * +consensus_cache_add(consensus_cache_t *cache, + const config_line_t *labels, + const uint8_t *data, + size_t datalen) +{ + char *fname = NULL; + int r = storage_dir_save_labeled_to_file(cache->dir, + labels, data, datalen, &fname); + if (r < 0 || fname == NULL) { + return NULL; + } + consensus_cache_entry_t *ent = + tor_malloc_zero(sizeof(consensus_cache_entry_t)); + ent->magic = CCE_MAGIC; + ent->fname = fname; + ent->labels = config_lines_dup(labels); + ent->in_cache = cache; + ent->unused_since = TIME_MAX; + smartlist_add(cache->entries, ent); + /* Start the reference count at 2: the caller owns one copy, and the + * cache owns another. + */ + ent->refcnt = 2; + + return ent; +} + +/** + * Given a <b>cache</b>, return some entry for which <b>key</b>=<b>value</b>. + * Return NULL if no such entry exists. + * + * Does not adjust reference counts. + */ +consensus_cache_entry_t * +consensus_cache_find_first(consensus_cache_t *cache, + const char *key, + const char *value) +{ + smartlist_t *tmp = smartlist_new(); + consensus_cache_find_all(tmp, cache, key, value); + consensus_cache_entry_t *ent = NULL; + if (smartlist_len(tmp)) + ent = smartlist_get(tmp, 0); + smartlist_free(tmp); + return ent; +} + +/** + * Given a <b>cache</b>, add every entry to <b>out<b> for which + * <b>key</b>=<b>value</b>. If <b>key</b> is NULL, add every entry. + * + * Do not add any entry that has been marked for removal. + * + * Does not adjust reference counts. + */ +void +consensus_cache_find_all(smartlist_t *out, + consensus_cache_t *cache, + const char *key, + const char *value) +{ + SMARTLIST_FOREACH_BEGIN(cache->entries, consensus_cache_entry_t *, ent) { + if (ent->can_remove == 1) { + /* We want to delete this; pretend it isn't there. */ + continue; + } + if (! key) { + smartlist_add(out, ent); + continue; + } + const char *found_val = consensus_cache_entry_get_value(ent, key); + if (found_val && !strcmp(value, found_val)) { + smartlist_add(out, ent); + } + } SMARTLIST_FOREACH_END(ent); +} + +/** + * Given a list of consensus_cache_entry_t, remove all those entries + * that do not have <b>key</b>=<b>value</b> in their labels. + * + * Does not adjust reference counts. + */ +void +consensus_cache_filter_list(smartlist_t *lst, + const char *key, + const char *value) +{ + if (BUG(lst == NULL)) + return; // LCOV_EXCL_LINE + if (key == NULL) + return; + SMARTLIST_FOREACH_BEGIN(lst, consensus_cache_entry_t *, ent) { + const char *found_val = consensus_cache_entry_get_value(ent, key); + if (! found_val || strcmp(value, found_val)) { + SMARTLIST_DEL_CURRENT(lst, ent); + } + } SMARTLIST_FOREACH_END(ent); +} + +/** + * If <b>ent</b> has a label with the given <b>key</b>, return its + * value. Otherwise return NULL. + * + * The return value is only guaranteed to be valid for as long as you + * hold a reference to <b>ent</b>. + */ +const char * +consensus_cache_entry_get_value(const consensus_cache_entry_t *ent, + const char *key) +{ + const config_line_t *match = config_line_find(ent->labels, key); + if (match) + return match->value; + else + return NULL; +} + +/** + * Return a pointer to the labels in <b>ent</b>. + * + * This pointer is only guaranteed to be valid for as long as you + * hold a reference to <b>ent</b>. + */ +const config_line_t * +consensus_cache_entry_get_labels(const consensus_cache_entry_t *ent) +{ + return ent->labels; +} + +/** + * Increase the reference count of <b>ent</b>. + */ +void +consensus_cache_entry_incref(consensus_cache_entry_t *ent) +{ + if (BUG(ent->magic != CCE_MAGIC)) + return; // LCOV_EXCL_LINE + ++ent->refcnt; + ent->unused_since = TIME_MAX; +} + +/** + * Release a reference held to <b>ent</b>. + * + * If it was the last reference, ent will be freed. Therefore, you must not + * use <b>ent</b> after calling this function. + */ +void +consensus_cache_entry_decref(consensus_cache_entry_t *ent) +{ + if (! ent) + return; + if (BUG(ent->refcnt <= 0)) + return; // LCOV_EXCL_LINE + if (BUG(ent->magic != CCE_MAGIC)) + return; // LCOV_EXCL_LINE + + --ent->refcnt; + + if (ent->refcnt == 1 && ent->in_cache) { + /* Only the cache has a reference: we don't need to keep the file + * mapped */ + if (ent->map) { + if (ent->release_aggressively) { + consensus_cache_entry_unmap(ent); + } else { + ent->unused_since = approx_time(); + } + } + return; + } + + if (ent->refcnt > 0) + return; + + /* Refcount is zero; we can free it. */ + if (ent->map) { + consensus_cache_entry_unmap(ent); + } + tor_free(ent->fname); + config_free_lines(ent->labels); + consensus_cache_entry_handles_clear(ent); + memwipe(ent, 0, sizeof(consensus_cache_entry_t)); + tor_free(ent); +} + +/** + * Mark <b>ent</b> for deletion from the cache. Deletion will not occur + * until the cache is the only place that holds a reference to <b>ent</b>. + */ +void +consensus_cache_entry_mark_for_removal(consensus_cache_entry_t *ent) +{ + ent->can_remove = 1; +} + +/** + * Mark <b>ent</b> as the kind of entry that we don't need to keep mmap'd for + * any longer than we're actually using it. + */ +void +consensus_cache_entry_mark_for_aggressive_release(consensus_cache_entry_t *ent) +{ + ent->release_aggressively = 1; +} + +/** + * Try to read the body of <b>ent</b> into memory if it isn't already + * loaded. On success, set *<b>body_out</b> to the body, *<b>sz_out</b> + * to its size, and return 0. On failure return -1. + * + * The resulting body pointer will only be valid for as long as you + * hold a reference to <b>ent</b>. + */ +int +consensus_cache_entry_get_body(const consensus_cache_entry_t *ent, + const uint8_t **body_out, + size_t *sz_out) +{ + if (BUG(ent->magic != CCE_MAGIC)) + return -1; // LCOV_EXCL_LINE + + if (! ent->map) { + if (! ent->in_cache) + return -1; + + consensus_cache_entry_map((consensus_cache_t *)ent->in_cache, + (consensus_cache_entry_t *)ent); + if (! ent->map) { + return -1; + } + } + + *body_out = ent->body; + *sz_out = ent->bodylen; + return 0; +} + +/** + * Unmap every mmap'd element of <b>cache</b> that has been unused + * since <b>cutoff</b>. + */ +void +consensus_cache_unmap_lazy(consensus_cache_t *cache, time_t cutoff) +{ + SMARTLIST_FOREACH_BEGIN(cache->entries, consensus_cache_entry_t *, ent) { + tor_assert_nonfatal(ent->in_cache == cache); + if (ent->refcnt > 1 || BUG(ent->in_cache == NULL)) { + /* Somebody is using this entry right now */ + continue; + } + if (ent->unused_since > cutoff) { + /* Has been unused only for a little while */ + continue; + } + if (ent->map == NULL) { + /* Not actually mapped. */ + continue; + } + consensus_cache_entry_unmap(ent); + } SMARTLIST_FOREACH_END(ent); +} + +/** + * Return the number of currently unused filenames available in this cache. + */ +int +consensus_cache_get_n_filenames_available(consensus_cache_t *cache) +{ + tor_assert(cache); + int max = cache->max_entries; + int used = smartlist_len(storage_dir_list(cache->dir)); +#ifdef MUST_UNMAP_TO_UNLINK + if (used > max) + return 0; +#else + tor_assert_nonfatal(max >= used); +#endif /* defined(MUST_UNMAP_TO_UNLINK) */ + return max - used; +} + +/** + * Delete every element of <b>cache</b> has been marked with + * consensus_cache_entry_mark_for_removal. If <b>force</b> is false, + * retain those entries which are in use by something other than the cache. + */ +void +consensus_cache_delete_pending(consensus_cache_t *cache, int force) +{ + SMARTLIST_FOREACH_BEGIN(cache->entries, consensus_cache_entry_t *, ent) { + tor_assert_nonfatal(ent->in_cache == cache); + int force_ent = force; +#ifdef MUST_UNMAP_TO_UNLINK + /* We cannot delete anything with an active mmap on win32, so no + * force-deletion. */ + if (ent->map) { + force_ent = 0; + } +#endif /* defined(MUST_UNMAP_TO_UNLINK) */ + if (! force_ent) { + if (ent->refcnt > 1 || BUG(ent->in_cache == NULL)) { + /* Somebody is using this entry right now */ + continue; + } + } + if (ent->can_remove == 0) { + /* Don't want to delete this. */ + continue; + } + if (BUG(ent->refcnt <= 0)) { + continue; // LCOV_EXCL_LINE + } + + SMARTLIST_DEL_CURRENT(cache->entries, ent); + ent->in_cache = NULL; + char *fname = tor_strdup(ent->fname); /* save a copy */ + consensus_cache_entry_decref(ent); + storage_dir_remove_file(cache->dir, fname); + tor_free(fname); + } SMARTLIST_FOREACH_END(ent); +} + +/** + * Internal helper: rescan <b>cache</b> and rebuild its list of entries. + */ +static void +consensus_cache_rescan(consensus_cache_t *cache) +{ + if (cache->entries) { + consensus_cache_clear(cache); + } + + cache->entries = smartlist_new(); + const smartlist_t *fnames = storage_dir_list(cache->dir); + SMARTLIST_FOREACH_BEGIN(fnames, const char *, fname) { + tor_mmap_t *map = NULL; + config_line_t *labels = NULL; + const uint8_t *body; + size_t bodylen; + map = storage_dir_map_labeled(cache->dir, fname, + &labels, &body, &bodylen); + if (! map) { + /* The ERANGE error might come from tor_mmap_file() -- it means the file + * was empty. EINVAL might come from ..map_labeled() -- it means the + * file was misformatted. In both cases, we should just delete it. + */ + if (errno == ERANGE || errno == EINVAL) { + log_warn(LD_FS, "Found %s file %s in consensus cache; removing it.", + errno == ERANGE ? "empty" : "misformatted", + escaped(fname)); + storage_dir_remove_file(cache->dir, fname); + } else { + /* Can't load this; continue */ + log_warn(LD_FS, "Unable to map file %s from consensus cache: %s", + escaped(fname), strerror(errno)); + } + continue; + } + consensus_cache_entry_t *ent = + tor_malloc_zero(sizeof(consensus_cache_entry_t)); + ent->magic = CCE_MAGIC; + ent->fname = tor_strdup(fname); + ent->labels = labels; + ent->refcnt = 1; + ent->in_cache = cache; + ent->unused_since = TIME_MAX; + smartlist_add(cache->entries, ent); + tor_munmap_file(map); /* don't actually need to keep this around */ + } SMARTLIST_FOREACH_END(fname); +} + +/** + * Make sure that <b>ent</b> is mapped into RAM. + */ +static void +consensus_cache_entry_map(consensus_cache_t *cache, + consensus_cache_entry_t *ent) +{ + if (ent->map) + return; + + ent->map = storage_dir_map_labeled(cache->dir, ent->fname, + NULL, &ent->body, &ent->bodylen); + ent->unused_since = TIME_MAX; +} + +/** + * Unmap <b>ent</b> from RAM. + * + * Do not call this if something other than the cache is holding a reference + * to <b>ent</b> + */ +static void +consensus_cache_entry_unmap(consensus_cache_entry_t *ent) +{ + ent->unused_since = TIME_MAX; + if (!ent->map) + return; + + tor_munmap_file(ent->map); + ent->map = NULL; + ent->body = NULL; + ent->bodylen = 0; + ent->unused_since = TIME_MAX; +} + +HANDLE_IMPL(consensus_cache_entry, consensus_cache_entry_t, ) + +#ifdef TOR_UNIT_TESTS +/** + * Testing only: Return true iff <b>ent</b> is mapped into memory. + * + * (In normal operation, this information is not exposed.) + */ +int +consensus_cache_entry_is_mapped(consensus_cache_entry_t *ent) +{ + if (ent->map) { + tor_assert(ent->body); + return 1; + } else { + tor_assert(!ent->body); + return 0; + } +} +#endif /* defined(TOR_UNIT_TESTS) */ diff --git a/src/feature/dircache/conscache.h b/src/feature/dircache/conscache.h new file mode 100644 index 0000000000..c274a60393 --- /dev/null +++ b/src/feature/dircache/conscache.h @@ -0,0 +1,66 @@ +/* Copyright (c) 2017-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#ifndef TOR_CONSCACHE_H +#define TOR_CONSCACHE_H + +#include "lib/container/handles.h" + +typedef struct consensus_cache_entry_t consensus_cache_entry_t; +typedef struct consensus_cache_t consensus_cache_t; + +HANDLE_DECL(consensus_cache_entry, consensus_cache_entry_t, ) +#define consensus_cache_entry_handle_free(h) \ + FREE_AND_NULL(consensus_cache_entry_handle_t, \ + consensus_cache_entry_handle_free_, (h)) + +consensus_cache_t *consensus_cache_open(const char *subdir, int max_entries); +void consensus_cache_free_(consensus_cache_t *cache); +#define consensus_cache_free(cache) \ + FREE_AND_NULL(consensus_cache_t, consensus_cache_free_, (cache)) +struct sandbox_cfg_elem; +int consensus_cache_may_overallocate(consensus_cache_t *cache); +int consensus_cache_register_with_sandbox(consensus_cache_t *cache, + struct sandbox_cfg_elem **cfg); +void consensus_cache_unmap_lazy(consensus_cache_t *cache, time_t cutoff); +void consensus_cache_delete_pending(consensus_cache_t *cache, + int force); +int consensus_cache_get_n_filenames_available(consensus_cache_t *cache); +consensus_cache_entry_t *consensus_cache_add(consensus_cache_t *cache, + const struct config_line_t *labels, + const uint8_t *data, + size_t datalen); + +consensus_cache_entry_t *consensus_cache_find_first( + consensus_cache_t *cache, + const char *key, + const char *value); + +void consensus_cache_find_all(smartlist_t *out, + consensus_cache_t *cache, + const char *key, + const char *value); +void consensus_cache_filter_list(smartlist_t *lst, + const char *key, + const char *value); + +const char *consensus_cache_entry_get_value(const consensus_cache_entry_t *ent, + const char *key); +const struct config_line_t *consensus_cache_entry_get_labels( + const consensus_cache_entry_t *ent); + +void consensus_cache_entry_incref(consensus_cache_entry_t *ent); +void consensus_cache_entry_decref(consensus_cache_entry_t *ent); + +void consensus_cache_entry_mark_for_removal(consensus_cache_entry_t *ent); +void consensus_cache_entry_mark_for_aggressive_release( + consensus_cache_entry_t *ent); +int consensus_cache_entry_get_body(const consensus_cache_entry_t *ent, + const uint8_t **body_out, + size_t *sz_out); + +#ifdef TOR_UNIT_TESTS +int consensus_cache_entry_is_mapped(consensus_cache_entry_t *ent); +#endif + +#endif /* !defined(TOR_CONSCACHE_H) */ diff --git a/src/feature/dircache/consdiffmgr.c b/src/feature/dircache/consdiffmgr.c new file mode 100644 index 0000000000..6d5183f934 --- /dev/null +++ b/src/feature/dircache/consdiffmgr.c @@ -0,0 +1,1945 @@ +/* Copyright (c) 2017-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file consdiffmsr.c + * + * \brief consensus diff manager functions + * + * This module is run by directory authorities and caches in order + * to remember a number of past consensus documents, and to generate + * and serve the diffs from those documents to the latest consensus. + */ + +#define CONSDIFFMGR_PRIVATE + +#include "or/or.h" +#include "or/config.h" +#include "or/conscache.h" +#include "or/consdiff.h" +#include "or/consdiffmgr.h" +#include "or/cpuworker.h" +#include "or/networkstatus.h" +#include "or/routerparse.h" +#include "lib/evloop/compat_libevent.h" +#include "lib/evloop/workqueue.h" +#include "lib/compress/compress.h" +#include "lib/encoding/confline.h" + +#include "or/networkstatus_st.h" +#include "or/networkstatus_voter_info_st.h" + +/** + * Labels to apply to items in the conscache object. + * + * @{ + */ +/* One of DOCTYPE_CONSENSUS or DOCTYPE_CONSENSUS_DIFF */ +#define LABEL_DOCTYPE "document-type" +/* The valid-after time for a consensus (or for the target consensus of a + * diff), encoded as ISO UTC. */ +#define LABEL_VALID_AFTER "consensus-valid-after" +/* The fresh-until time for a consensus (or for the target consensus of a + * diff), encoded as ISO UTC. */ +#define LABEL_FRESH_UNTIL "consensus-fresh-until" +/* The valid-until time for a consensus (or for the target consensus of a + * diff), encoded as ISO UTC. */ +#define LABEL_VALID_UNTIL "consensus-valid-until" +/* Comma-separated list of hex-encoded identity digests for the voting + * authorities. */ +#define LABEL_SIGNATORIES "consensus-signatories" +/* A hex encoded SHA3 digest of the object, as compressed (if any) */ +#define LABEL_SHA3_DIGEST "sha3-digest" +/* A hex encoded SHA3 digest of the object before compression. */ +#define LABEL_SHA3_DIGEST_UNCOMPRESSED "sha3-digest-uncompressed" +/* A hex encoded SHA3 digest-as-signed of a consensus */ +#define LABEL_SHA3_DIGEST_AS_SIGNED "sha3-digest-as-signed" +/* The flavor of the consensus or consensuses diff */ +#define LABEL_FLAVOR "consensus-flavor" +/* Diff only: the SHA3 digest-as-signed of the source consensus. */ +#define LABEL_FROM_SHA3_DIGEST "from-sha3-digest" +/* Diff only: the SHA3 digest-in-full of the target consensus. */ +#define LABEL_TARGET_SHA3_DIGEST "target-sha3-digest" +/* Diff only: the valid-after date of the source consensus. */ +#define LABEL_FROM_VALID_AFTER "from-valid-after" +/* What kind of compression was used? */ +#define LABEL_COMPRESSION_TYPE "compression" +/** @} */ + +#define DOCTYPE_CONSENSUS "consensus" +#define DOCTYPE_CONSENSUS_DIFF "consensus-diff" + +/** + * Underlying directory that stores consensuses and consensus diffs. Don't + * use this directly: use cdm_cache_get() instead. + */ +static consensus_cache_t *cons_diff_cache = NULL; +/** + * If true, we have learned at least one new consensus since the + * consensus cache was last up-to-date. + */ +static int cdm_cache_dirty = 0; +/** + * If true, we have scanned the cache to update our hashtable of diffs. + */ +static int cdm_cache_loaded = 0; + +/** + * Possible status values for cdm_diff_t.cdm_diff_status + **/ +typedef enum cdm_diff_status_t { + CDM_DIFF_PRESENT=1, + CDM_DIFF_IN_PROGRESS=2, + CDM_DIFF_ERROR=3, +} cdm_diff_status_t; + +/** Which methods do we use for precompressing diffs? */ +static const compress_method_t compress_diffs_with[] = { + NO_METHOD, + GZIP_METHOD, +#ifdef HAVE_LZMA + LZMA_METHOD, +#endif +#ifdef HAVE_ZSTD + ZSTD_METHOD, +#endif +}; + +/** + * Event for rescanning the cache. + */ +static mainloop_event_t *consdiffmgr_rescan_ev = NULL; + +static void consdiffmgr_rescan_cb(mainloop_event_t *ev, void *arg); +static void mark_cdm_cache_dirty(void); + +/** How many different methods will we try to use for diff compression? */ +STATIC unsigned +n_diff_compression_methods(void) +{ + return ARRAY_LENGTH(compress_diffs_with); +} + +/** Which methods do we use for precompressing consensuses? */ +static const compress_method_t compress_consensus_with[] = { + ZLIB_METHOD, +#ifdef HAVE_LZMA + LZMA_METHOD, +#endif +#ifdef HAVE_ZSTD + ZSTD_METHOD, +#endif +}; + +/** How many different methods will we try to use for diff compression? */ +STATIC unsigned +n_consensus_compression_methods(void) +{ + return ARRAY_LENGTH(compress_consensus_with); +} + +/** For which compression method do we retain old consensuses? There's no + * need to keep all of them, since we won't be serving them. We'll + * go with ZLIB_METHOD because it's pretty fast and everyone has it. + */ +#define RETAIN_CONSENSUS_COMPRESSED_WITH_METHOD ZLIB_METHOD + +/** Handles pointing to the latest consensus entries as compressed and + * stored. */ +static consensus_cache_entry_handle_t * + latest_consensus[N_CONSENSUS_FLAVORS] + [ARRAY_LENGTH(compress_consensus_with)]; + +/** Hashtable node used to remember the current status of the diff + * from a given sha3 digest to the current consensus. */ +typedef struct cdm_diff_t { + HT_ENTRY(cdm_diff_t) node; + + /** Consensus flavor for this diff (part of ht key) */ + consensus_flavor_t flavor; + /** SHA3-256 digest of the consensus that this diff is _from_. (part of the + * ht key) */ + uint8_t from_sha3[DIGEST256_LEN]; + /** Method by which the diff is compressed. (part of the ht key */ + compress_method_t compress_method; + + /** One of the CDM_DIFF_* values, depending on whether this diff + * is available, in progress, or impossible to compute. */ + cdm_diff_status_t cdm_diff_status; + /** SHA3-256 digest of the consensus that this diff is _to. */ + uint8_t target_sha3[DIGEST256_LEN]; + + /** Handle to the cache entry for this diff, if any. We use a handle here + * to avoid thinking too hard about cache entry lifetime issues. */ + consensus_cache_entry_handle_t *entry; +} cdm_diff_t; + +/** Hashtable mapping flavor and source consensus digest to status. */ +static HT_HEAD(cdm_diff_ht, cdm_diff_t) cdm_diff_ht = HT_INITIALIZER(); + +/** + * Configuration for this module + */ +static consdiff_cfg_t consdiff_cfg = { + // XXXX I'd like to make this number bigger, but it interferes with the + // XXXX seccomp2 syscall filter, which tops out at BPF_MAXINS (4096) + // XXXX rules. + /* .cache_max_num = */ 128 +}; + +static int consdiffmgr_ensure_space_for_files(int n); +static int consensus_queue_compression_work(const char *consensus, + const networkstatus_t *as_parsed); +static int consensus_diff_queue_diff_work(consensus_cache_entry_t *diff_from, + consensus_cache_entry_t *diff_to); +static void consdiffmgr_set_cache_flags(void); + +/* ===== + * Hashtable setup + * ===== */ + +/** Helper: hash the key of a cdm_diff_t. */ +static unsigned +cdm_diff_hash(const cdm_diff_t *diff) +{ + uint8_t tmp[DIGEST256_LEN + 2]; + memcpy(tmp, diff->from_sha3, DIGEST256_LEN); + tmp[DIGEST256_LEN] = (uint8_t) diff->flavor; + tmp[DIGEST256_LEN+1] = (uint8_t) diff->compress_method; + return (unsigned) siphash24g(tmp, sizeof(tmp)); +} +/** Helper: compare two cdm_diff_t objects for key equality */ +static int +cdm_diff_eq(const cdm_diff_t *diff1, const cdm_diff_t *diff2) +{ + return fast_memeq(diff1->from_sha3, diff2->from_sha3, DIGEST256_LEN) && + diff1->flavor == diff2->flavor && + diff1->compress_method == diff2->compress_method; +} + +HT_PROTOTYPE(cdm_diff_ht, cdm_diff_t, node, cdm_diff_hash, cdm_diff_eq) +HT_GENERATE2(cdm_diff_ht, cdm_diff_t, node, cdm_diff_hash, cdm_diff_eq, + 0.6, tor_reallocarray, tor_free_) + +#define cdm_diff_free(diff) \ + FREE_AND_NULL(cdm_diff_t, cdm_diff_free_, (diff)) + +/** Release all storage held in <b>diff</b>. */ +static void +cdm_diff_free_(cdm_diff_t *diff) +{ + if (!diff) + return; + consensus_cache_entry_handle_free(diff->entry); + tor_free(diff); +} + +/** Create and return a new cdm_diff_t with the given values. Does not + * add it to the hashtable. */ +static cdm_diff_t * +cdm_diff_new(consensus_flavor_t flav, + const uint8_t *from_sha3, + const uint8_t *target_sha3, + compress_method_t method) +{ + cdm_diff_t *ent; + ent = tor_malloc_zero(sizeof(cdm_diff_t)); + ent->flavor = flav; + memcpy(ent->from_sha3, from_sha3, DIGEST256_LEN); + memcpy(ent->target_sha3, target_sha3, DIGEST256_LEN); + ent->compress_method = method; + return ent; +} + +/** + * Examine the diff hashtable to see whether we know anything about computing + * a diff of type <b>flav</b> between consensuses with the two provided + * SHA3-256 digests. If a computation is in progress, or if the computation + * has already been tried and failed, return 1. Otherwise, note the + * computation as "in progress" so that we don't reattempt it later, and + * return 0. + */ +static int +cdm_diff_ht_check_and_note_pending(consensus_flavor_t flav, + const uint8_t *from_sha3, + const uint8_t *target_sha3) +{ + struct cdm_diff_t search, *ent; + unsigned u; + int result = 0; + for (u = 0; u < n_diff_compression_methods(); ++u) { + compress_method_t method = compress_diffs_with[u]; + memset(&search, 0, sizeof(cdm_diff_t)); + search.flavor = flav; + search.compress_method = method; + memcpy(search.from_sha3, from_sha3, DIGEST256_LEN); + ent = HT_FIND(cdm_diff_ht, &cdm_diff_ht, &search); + if (ent) { + tor_assert_nonfatal(ent->cdm_diff_status != CDM_DIFF_PRESENT); + result = 1; + continue; + } + ent = cdm_diff_new(flav, from_sha3, target_sha3, method); + ent->cdm_diff_status = CDM_DIFF_IN_PROGRESS; + HT_INSERT(cdm_diff_ht, &cdm_diff_ht, ent); + } + return result; +} + +/** + * Update the status of the diff of type <b>flav</b> between consensuses with + * the two provided SHA3-256 digests, so that its status becomes + * <b>status</b>, and its value becomes the <b>handle</b>. If <b>handle</b> + * is NULL, then the old handle (if any) is freed, and replaced with NULL. + */ +static void +cdm_diff_ht_set_status(consensus_flavor_t flav, + const uint8_t *from_sha3, + const uint8_t *to_sha3, + compress_method_t method, + int status, + consensus_cache_entry_handle_t *handle) +{ + if (handle == NULL) { + tor_assert_nonfatal(status != CDM_DIFF_PRESENT); + } + + struct cdm_diff_t search, *ent; + memset(&search, 0, sizeof(cdm_diff_t)); + search.flavor = flav; + search.compress_method = method, + memcpy(search.from_sha3, from_sha3, DIGEST256_LEN); + ent = HT_FIND(cdm_diff_ht, &cdm_diff_ht, &search); + if (!ent) { + ent = cdm_diff_new(flav, from_sha3, to_sha3, method); + ent->cdm_diff_status = CDM_DIFF_IN_PROGRESS; + HT_INSERT(cdm_diff_ht, &cdm_diff_ht, ent); + } else if (fast_memneq(ent->target_sha3, to_sha3, DIGEST256_LEN)) { + // This can happen under certain really pathological conditions + // if we decide we don't care about a diff before it is actually + // done computing. + return; + } + + tor_assert_nonfatal(ent->cdm_diff_status == CDM_DIFF_IN_PROGRESS); + + ent->cdm_diff_status = status; + consensus_cache_entry_handle_free(ent->entry); + ent->entry = handle; +} + +/** + * Helper: Remove from the hash table every present (actually computed) diff + * of type <b>flav</b> whose target digest does not match + * <b>unless_target_sha3_matches</b>. + * + * This function is used for the hash table to throw away references to diffs + * that do not lead to the most given consensus of a given flavor. + */ +static void +cdm_diff_ht_purge(consensus_flavor_t flav, + const uint8_t *unless_target_sha3_matches) +{ + cdm_diff_t **diff, **next; + for (diff = HT_START(cdm_diff_ht, &cdm_diff_ht); diff; diff = next) { + cdm_diff_t *this = *diff; + + if ((*diff)->cdm_diff_status == CDM_DIFF_PRESENT && + flav == (*diff)->flavor) { + + if (BUG((*diff)->entry == NULL) || + consensus_cache_entry_handle_get((*diff)->entry) == NULL) { + /* the underlying entry has gone away; drop this. */ + next = HT_NEXT_RMV(cdm_diff_ht, &cdm_diff_ht, diff); + cdm_diff_free(this); + continue; + } + + if (unless_target_sha3_matches && + fast_memneq(unless_target_sha3_matches, (*diff)->target_sha3, + DIGEST256_LEN)) { + /* target hash doesn't match; drop this. */ + next = HT_NEXT_RMV(cdm_diff_ht, &cdm_diff_ht, diff); + cdm_diff_free(this); + continue; + } + } + next = HT_NEXT(cdm_diff_ht, &cdm_diff_ht, diff); + } +} + +/** + * Helper: initialize <b>cons_diff_cache</b>. + */ +static void +cdm_cache_init(void) +{ + unsigned n_entries = consdiff_cfg.cache_max_num * 2; + + tor_assert(cons_diff_cache == NULL); + cons_diff_cache = consensus_cache_open("diff-cache", n_entries); + if (cons_diff_cache == NULL) { + // LCOV_EXCL_START + log_err(LD_FS, "Error: Couldn't open storage for consensus diffs."); + tor_assert_unreached(); + // LCOV_EXCL_STOP + } else { + consdiffmgr_set_cache_flags(); + } + consdiffmgr_rescan_ev = + mainloop_event_postloop_new(consdiffmgr_rescan_cb, NULL); + mark_cdm_cache_dirty(); + cdm_cache_loaded = 0; +} + +/** + * Helper: return the consensus_cache_t * that backs this manager, + * initializing it if needed. + */ +STATIC consensus_cache_t * +cdm_cache_get(void) +{ + if (PREDICT_UNLIKELY(cons_diff_cache == NULL)) { + cdm_cache_init(); + } + return cons_diff_cache; +} + +/** + * Helper: given a list of labels, prepend the hex-encoded SHA3 digest + * of the <b>bodylen</b>-byte object at <b>body</b> to those labels, + * with <b>label</b> as its label. + */ +static void +cdm_labels_prepend_sha3(config_line_t **labels, + const char *label, + const uint8_t *body, + size_t bodylen) +{ + uint8_t sha3_digest[DIGEST256_LEN]; + char hexdigest[HEX_DIGEST256_LEN+1]; + crypto_digest256((char *)sha3_digest, + (const char *)body, bodylen, DIGEST_SHA3_256); + base16_encode(hexdigest, sizeof(hexdigest), + (const char *)sha3_digest, sizeof(sha3_digest)); + + config_line_prepend(labels, label, hexdigest); +} + +/** Helper: if there is a sha3-256 hex-encoded digest in <b>ent</b> with the + * given label, set <b>digest_out</b> to that value (decoded), and return 0. + * + * Return -1 if there is no such label, and -2 if it is badly formatted. */ +STATIC int +cdm_entry_get_sha3_value(uint8_t *digest_out, + consensus_cache_entry_t *ent, + const char *label) +{ + if (ent == NULL) + return -1; + + const char *hex = consensus_cache_entry_get_value(ent, label); + if (hex == NULL) + return -1; + + int n = base16_decode((char*)digest_out, DIGEST256_LEN, hex, strlen(hex)); + if (n != DIGEST256_LEN) + return -2; + else + return 0; +} + +/** + * Helper: look for a consensus with the given <b>flavor</b> and + * <b>valid_after</b> time in the cache. Return that consensus if it's + * present, or NULL if it's missing. + */ +STATIC consensus_cache_entry_t * +cdm_cache_lookup_consensus(consensus_flavor_t flavor, time_t valid_after) +{ + char formatted_time[ISO_TIME_LEN+1]; + format_iso_time_nospace(formatted_time, valid_after); + const char *flavname = networkstatus_get_flavor_name(flavor); + + /* We'll filter by valid-after time first, since that should + * match the fewest documents. */ + /* We could add an extra hashtable here, but since we only do this scan + * when adding a new consensus, it probably doesn't matter much. */ + smartlist_t *matches = smartlist_new(); + consensus_cache_find_all(matches, cdm_cache_get(), + LABEL_VALID_AFTER, formatted_time); + consensus_cache_filter_list(matches, LABEL_FLAVOR, flavname); + consensus_cache_filter_list(matches, LABEL_DOCTYPE, DOCTYPE_CONSENSUS); + + consensus_cache_entry_t *result = NULL; + if (smartlist_len(matches)) { + result = smartlist_get(matches, 0); + } + smartlist_free(matches); + + return result; +} + +/** Return the maximum age (in seconds) of consensuses that we should consider + * storing. The available space in the directory may impose additional limits + * on how much we store. */ +static int32_t +get_max_age_to_cache(void) +{ + const int32_t DEFAULT_MAX_AGE_TO_CACHE = 8192; + const int32_t MIN_MAX_AGE_TO_CACHE = 0; + const int32_t MAX_MAX_AGE_TO_CACHE = 8192; + const char MAX_AGE_TO_CACHE_NAME[] = "max-consensus-age-to-cache-for-diff"; + + const or_options_t *options = get_options(); + + if (options->MaxConsensusAgeForDiffs) { + const int v = options->MaxConsensusAgeForDiffs; + if (v >= MAX_MAX_AGE_TO_CACHE * 3600) + return MAX_MAX_AGE_TO_CACHE; + else + return v; + } + + /* The parameter is in hours, so we multiply */ + return 3600 * networkstatus_get_param(NULL, + MAX_AGE_TO_CACHE_NAME, + DEFAULT_MAX_AGE_TO_CACHE, + MIN_MAX_AGE_TO_CACHE, + MAX_MAX_AGE_TO_CACHE); +} + +/** + * Given a string containing a networkstatus consensus, and the results of + * having parsed that consensus, add that consensus to the cache if it is not + * already present and not too old. Create new consensus diffs from or to + * that consensus as appropriate. + * + * Return 0 on success and -1 on failure. + */ +int +consdiffmgr_add_consensus(const char *consensus, + const networkstatus_t *as_parsed) +{ + if (BUG(consensus == NULL) || BUG(as_parsed == NULL)) + return -1; // LCOV_EXCL_LINE + if (BUG(as_parsed->type != NS_TYPE_CONSENSUS)) + return -1; // LCOV_EXCL_LINE + + const consensus_flavor_t flavor = as_parsed->flavor; + const time_t valid_after = as_parsed->valid_after; + + if (valid_after < approx_time() - get_max_age_to_cache()) { + log_info(LD_DIRSERV, "We don't care about this consensus document; it's " + "too old."); + return -1; + } + + /* Do we already have this one? */ + consensus_cache_entry_t *entry = + cdm_cache_lookup_consensus(flavor, valid_after); + if (entry) { + log_info(LD_DIRSERV, "We already have a copy of that consensus"); + return -1; + } + + /* We don't have it. Add it to the cache. */ + return consensus_queue_compression_work(consensus, as_parsed); +} + +/** + * Helper: used to sort two smartlists of consensus_cache_entry_t by their + * LABEL_VALID_AFTER labels. + */ +static int +compare_by_valid_after_(const void **a, const void **b) +{ + const consensus_cache_entry_t *e1 = *a; + const consensus_cache_entry_t *e2 = *b; + /* We're in luck here: sorting UTC iso-encoded values lexically will work + * fine (until 9999). */ + return strcmp_opt(consensus_cache_entry_get_value(e1, LABEL_VALID_AFTER), + consensus_cache_entry_get_value(e2, LABEL_VALID_AFTER)); +} + +/** + * Helper: Sort <b>lst</b> by LABEL_VALID_AFTER and return the most recent + * entry. + */ +static consensus_cache_entry_t * +sort_and_find_most_recent(smartlist_t *lst) +{ + smartlist_sort(lst, compare_by_valid_after_); + if (smartlist_len(lst)) { + return smartlist_get(lst, smartlist_len(lst) - 1); + } else { + return NULL; + } +} + +/** Return i such that compress_consensus_with[i] == method. Return + * -1 if no such i exists. */ +static int +consensus_compression_method_pos(compress_method_t method) +{ + unsigned i; + for (i = 0; i < n_consensus_compression_methods(); ++i) { + if (compress_consensus_with[i] == method) { + return i; + } + } + return -1; +} + +/** + * If we know a consensus with the flavor <b>flavor</b> compressed with + * <b>method</b>, set *<b>entry_out</b> to that value. Return values are as + * for consdiffmgr_find_diff_from(). + */ +consdiff_status_t +consdiffmgr_find_consensus(struct consensus_cache_entry_t **entry_out, + consensus_flavor_t flavor, + compress_method_t method) +{ + tor_assert(entry_out); + tor_assert((int)flavor < N_CONSENSUS_FLAVORS); + + int pos = consensus_compression_method_pos(method); + if (pos < 0) { + // We don't compress consensuses with this method. + return CONSDIFF_NOT_FOUND; + } + consensus_cache_entry_handle_t *handle = latest_consensus[flavor][pos]; + if (!handle) + return CONSDIFF_NOT_FOUND; + *entry_out = consensus_cache_entry_handle_get(handle); + if (*entry_out) + return CONSDIFF_AVAILABLE; + else + return CONSDIFF_NOT_FOUND; +} + +/** + * Look up consensus_cache_entry_t for the consensus of type <b>flavor</b>, + * from the source consensus with the specified digest (which must be SHA3). + * + * If the diff is present, store it into *<b>entry_out</b> and return + * CONSDIFF_AVAILABLE. Otherwise return CONSDIFF_NOT_FOUND or + * CONSDIFF_IN_PROGRESS. + */ +consdiff_status_t +consdiffmgr_find_diff_from(consensus_cache_entry_t **entry_out, + consensus_flavor_t flavor, + int digest_type, + const uint8_t *digest, + size_t digestlen, + compress_method_t method) +{ + if (BUG(digest_type != DIGEST_SHA3_256) || + BUG(digestlen != DIGEST256_LEN)) { + return CONSDIFF_NOT_FOUND; // LCOV_EXCL_LINE + } + + // Try to look up the entry in the hashtable. + cdm_diff_t search, *ent; + memset(&search, 0, sizeof(search)); + search.flavor = flavor; + search.compress_method = method; + memcpy(search.from_sha3, digest, DIGEST256_LEN); + ent = HT_FIND(cdm_diff_ht, &cdm_diff_ht, &search); + + if (ent == NULL || + ent->cdm_diff_status == CDM_DIFF_ERROR) { + return CONSDIFF_NOT_FOUND; + } else if (ent->cdm_diff_status == CDM_DIFF_IN_PROGRESS) { + return CONSDIFF_IN_PROGRESS; + } else if (BUG(ent->cdm_diff_status != CDM_DIFF_PRESENT)) { + return CONSDIFF_IN_PROGRESS; + } + + if (BUG(ent->entry == NULL)) { + return CONSDIFF_NOT_FOUND; + } + *entry_out = consensus_cache_entry_handle_get(ent->entry); + return (*entry_out) ? CONSDIFF_AVAILABLE : CONSDIFF_NOT_FOUND; + +#if 0 + // XXXX Remove this. I'm keeping it around for now in case we need to + // XXXX debug issues in the hashtable. + char hex[HEX_DIGEST256_LEN+1]; + base16_encode(hex, sizeof(hex), (const char *)digest, digestlen); + const char *flavname = networkstatus_get_flavor_name(flavor); + + smartlist_t *matches = smartlist_new(); + consensus_cache_find_all(matches, cdm_cache_get(), + LABEL_FROM_SHA3_DIGEST, hex); + consensus_cache_filter_list(matches, LABEL_FLAVOR, flavname); + consensus_cache_filter_list(matches, LABEL_DOCTYPE, DOCTYPE_CONSENSUS_DIFF); + + *entry_out = sort_and_find_most_recent(matches); + consdiff_status_t result = + (*entry_out) ? CONSDIFF_AVAILABLE : CONSDIFF_NOT_FOUND; + smartlist_free(matches); + + return result; +#endif /* 0 */ +} + +/** + * Perform periodic cleanup tasks on the consensus diff cache. Return + * the number of objects marked for deletion. + */ +int +consdiffmgr_cleanup(void) +{ + smartlist_t *objects = smartlist_new(); + smartlist_t *consensuses = smartlist_new(); + smartlist_t *diffs = smartlist_new(); + int n_to_delete = 0; + + log_debug(LD_DIRSERV, "Looking for consdiffmgr entries to remove"); + + // 1. Delete any consensus or diff or anything whose valid_after is too old. + const time_t valid_after_cutoff = approx_time() - get_max_age_to_cache(); + + consensus_cache_find_all(objects, cdm_cache_get(), + NULL, NULL); + SMARTLIST_FOREACH_BEGIN(objects, consensus_cache_entry_t *, ent) { + const char *lv_valid_after = + consensus_cache_entry_get_value(ent, LABEL_VALID_AFTER); + if (! lv_valid_after) { + log_debug(LD_DIRSERV, "Ignoring entry because it had no %s label", + LABEL_VALID_AFTER); + continue; + } + time_t valid_after = 0; + if (parse_iso_time_nospace(lv_valid_after, &valid_after) < 0) { + log_debug(LD_DIRSERV, "Ignoring entry because its %s value (%s) was " + "unparseable", LABEL_VALID_AFTER, escaped(lv_valid_after)); + continue; + } + if (valid_after < valid_after_cutoff) { + log_debug(LD_DIRSERV, "Deleting entry because its %s value (%s) was " + "too old", LABEL_VALID_AFTER, lv_valid_after); + consensus_cache_entry_mark_for_removal(ent); + ++n_to_delete; + } + } SMARTLIST_FOREACH_END(ent); + + // 2. Delete all diffs that lead to a consensus whose valid-after is not the + // latest. + for (int flav = 0; flav < N_CONSENSUS_FLAVORS; ++flav) { + const char *flavname = networkstatus_get_flavor_name(flav); + /* Determine the most recent consensus of this flavor */ + consensus_cache_find_all(consensuses, cdm_cache_get(), + LABEL_DOCTYPE, DOCTYPE_CONSENSUS); + consensus_cache_filter_list(consensuses, LABEL_FLAVOR, flavname); + consensus_cache_entry_t *most_recent = + sort_and_find_most_recent(consensuses); + if (most_recent == NULL) + continue; + const char *most_recent_sha3 = + consensus_cache_entry_get_value(most_recent, + LABEL_SHA3_DIGEST_UNCOMPRESSED); + if (BUG(most_recent_sha3 == NULL)) + continue; // LCOV_EXCL_LINE + + /* consider all such-flavored diffs, and look to see if they match. */ + consensus_cache_find_all(diffs, cdm_cache_get(), + LABEL_DOCTYPE, DOCTYPE_CONSENSUS_DIFF); + consensus_cache_filter_list(diffs, LABEL_FLAVOR, flavname); + SMARTLIST_FOREACH_BEGIN(diffs, consensus_cache_entry_t *, diff) { + const char *this_diff_target_sha3 = + consensus_cache_entry_get_value(diff, LABEL_TARGET_SHA3_DIGEST); + if (!this_diff_target_sha3) + continue; + if (strcmp(this_diff_target_sha3, most_recent_sha3)) { + consensus_cache_entry_mark_for_removal(diff); + ++n_to_delete; + } + } SMARTLIST_FOREACH_END(diff); + smartlist_clear(consensuses); + smartlist_clear(diffs); + } + + // 3. Delete all consensuses except the most recent that are compressed with + // an un-preferred method. + for (int flav = 0; flav < N_CONSENSUS_FLAVORS; ++flav) { + const char *flavname = networkstatus_get_flavor_name(flav); + /* Determine the most recent consensus of this flavor */ + consensus_cache_find_all(consensuses, cdm_cache_get(), + LABEL_DOCTYPE, DOCTYPE_CONSENSUS); + consensus_cache_filter_list(consensuses, LABEL_FLAVOR, flavname); + consensus_cache_entry_t *most_recent = + sort_and_find_most_recent(consensuses); + if (most_recent == NULL) + continue; + const char *most_recent_sha3_uncompressed = + consensus_cache_entry_get_value(most_recent, + LABEL_SHA3_DIGEST_UNCOMPRESSED); + const char *retain_methodname = compression_method_get_name( + RETAIN_CONSENSUS_COMPRESSED_WITH_METHOD); + + if (BUG(most_recent_sha3_uncompressed == NULL)) + continue; + SMARTLIST_FOREACH_BEGIN(consensuses, consensus_cache_entry_t *, ent) { + const char *lv_sha3_uncompressed = + consensus_cache_entry_get_value(ent, LABEL_SHA3_DIGEST_UNCOMPRESSED); + if (BUG(! lv_sha3_uncompressed)) + continue; + if (!strcmp(lv_sha3_uncompressed, most_recent_sha3_uncompressed)) + continue; // This _is_ the most recent. + const char *lv_methodname = + consensus_cache_entry_get_value(ent, LABEL_COMPRESSION_TYPE); + if (! lv_methodname || strcmp(lv_methodname, retain_methodname)) { + consensus_cache_entry_mark_for_removal(ent); + ++n_to_delete; + } + } SMARTLIST_FOREACH_END(ent); + } + + smartlist_free(objects); + smartlist_free(consensuses); + smartlist_free(diffs); + + // Actually remove files, if they're not used. + consensus_cache_delete_pending(cdm_cache_get(), 0); + return n_to_delete; +} + +/** + * Initialize the consensus diff manager and its cache, and configure + * its parameters based on the latest torrc and networkstatus parameters. + */ +void +consdiffmgr_configure(const consdiff_cfg_t *cfg) +{ + if (cfg) + memcpy(&consdiff_cfg, cfg, sizeof(consdiff_cfg)); + + (void) cdm_cache_get(); +} + +/** + * Tell the sandbox (if any) configured by <b>cfg</b> to allow the + * operations that the consensus diff manager will need. + */ +int +consdiffmgr_register_with_sandbox(struct sandbox_cfg_elem **cfg) +{ + return consensus_cache_register_with_sandbox(cdm_cache_get(), cfg); +} + +/** + * Scan the consensus diff manager's cache for any grossly malformed entries, + * and mark them as deletable. Return 0 if no problems were found; 1 + * if problems were found and fixed. + */ +int +consdiffmgr_validate(void) +{ + /* Right now, we only check for entries that have bad sha3 values */ + int problems = 0; + + smartlist_t *objects = smartlist_new(); + consensus_cache_find_all(objects, cdm_cache_get(), + NULL, NULL); + SMARTLIST_FOREACH_BEGIN(objects, consensus_cache_entry_t *, obj) { + uint8_t sha3_expected[DIGEST256_LEN]; + uint8_t sha3_received[DIGEST256_LEN]; + int r = cdm_entry_get_sha3_value(sha3_expected, obj, LABEL_SHA3_DIGEST); + if (r == -1) { + /* digest isn't there; that's allowed */ + continue; + } else if (r == -2) { + /* digest is malformed; that's not allowed */ + problems = 1; + consensus_cache_entry_mark_for_removal(obj); + continue; + } + const uint8_t *body; + size_t bodylen; + consensus_cache_entry_incref(obj); + r = consensus_cache_entry_get_body(obj, &body, &bodylen); + if (r == 0) { + crypto_digest256((char *)sha3_received, (const char *)body, bodylen, + DIGEST_SHA3_256); + } + consensus_cache_entry_decref(obj); + if (r < 0) + continue; + + // Deconfuse coverity about the possibility of sha3_received being + // uninitialized + tor_assert(r <= 0); + + if (fast_memneq(sha3_received, sha3_expected, DIGEST256_LEN)) { + problems = 1; + consensus_cache_entry_mark_for_removal(obj); + continue; + } + + } SMARTLIST_FOREACH_END(obj); + smartlist_free(objects); + return problems; +} + +/** + * Helper: build new diffs of <b>flavor</b> as needed + */ +static void +consdiffmgr_rescan_flavor_(consensus_flavor_t flavor) +{ + smartlist_t *matches = NULL; + smartlist_t *diffs = NULL; + smartlist_t *compute_diffs_from = NULL; + strmap_t *have_diff_from = NULL; + + // look for the most recent consensus, and for all previous in-range + // consensuses. Do they all have diffs to it? + const char *flavname = networkstatus_get_flavor_name(flavor); + + // 1. find the most recent consensus, and the ones that we might want + // to diff to it. + const char *methodname = compression_method_get_name( + RETAIN_CONSENSUS_COMPRESSED_WITH_METHOD); + + matches = smartlist_new(); + consensus_cache_find_all(matches, cdm_cache_get(), + LABEL_FLAVOR, flavname); + consensus_cache_filter_list(matches, LABEL_DOCTYPE, DOCTYPE_CONSENSUS); + consensus_cache_filter_list(matches, LABEL_COMPRESSION_TYPE, methodname); + consensus_cache_entry_t *most_recent = sort_and_find_most_recent(matches); + if (!most_recent) { + log_info(LD_DIRSERV, "No 'most recent' %s consensus found; " + "not making diffs", flavname); + goto done; + } + tor_assert(smartlist_len(matches)); + smartlist_del(matches, smartlist_len(matches) - 1); + + const char *most_recent_valid_after = + consensus_cache_entry_get_value(most_recent, LABEL_VALID_AFTER); + if (BUG(most_recent_valid_after == NULL)) + goto done; //LCOV_EXCL_LINE + uint8_t most_recent_sha3[DIGEST256_LEN]; + if (BUG(cdm_entry_get_sha3_value(most_recent_sha3, most_recent, + LABEL_SHA3_DIGEST_UNCOMPRESSED) < 0)) + goto done; //LCOV_EXCL_LINE + + // 2. Find all the relevant diffs _to_ this consensus. These are ones + // that we don't need to compute. + diffs = smartlist_new(); + consensus_cache_find_all(diffs, cdm_cache_get(), + LABEL_VALID_AFTER, most_recent_valid_after); + consensus_cache_filter_list(diffs, LABEL_DOCTYPE, DOCTYPE_CONSENSUS_DIFF); + consensus_cache_filter_list(diffs, LABEL_FLAVOR, flavname); + have_diff_from = strmap_new(); + SMARTLIST_FOREACH_BEGIN(diffs, consensus_cache_entry_t *, diff) { + const char *va = consensus_cache_entry_get_value(diff, + LABEL_FROM_VALID_AFTER); + if (BUG(va == NULL)) + continue; // LCOV_EXCL_LINE + strmap_set(have_diff_from, va, diff); + } SMARTLIST_FOREACH_END(diff); + + // 3. See which consensuses in 'matches' don't have diffs yet. + smartlist_reverse(matches); // from newest to oldest. + compute_diffs_from = smartlist_new(); + SMARTLIST_FOREACH_BEGIN(matches, consensus_cache_entry_t *, ent) { + const char *va = consensus_cache_entry_get_value(ent, LABEL_VALID_AFTER); + if (BUG(va == NULL)) + continue; // LCOV_EXCL_LINE + if (strmap_get(have_diff_from, va) != NULL) + continue; /* we already have this one. */ + smartlist_add(compute_diffs_from, ent); + /* Since we are not going to serve this as the most recent consensus + * any more, we should stop keeping it mmap'd when it's not in use. + */ + consensus_cache_entry_mark_for_aggressive_release(ent); + } SMARTLIST_FOREACH_END(ent); + + log_info(LD_DIRSERV, + "The most recent %s consensus is valid-after %s. We have diffs to " + "this consensus for %d/%d older %s consensuses. Generating diffs " + "for the other %d.", + flavname, + most_recent_valid_after, + smartlist_len(matches) - smartlist_len(compute_diffs_from), + smartlist_len(matches), + flavname, + smartlist_len(compute_diffs_from)); + + // 4. Update the hashtable; remove entries in this flavor to other + // target consensuses. + cdm_diff_ht_purge(flavor, most_recent_sha3); + + // 5. Actually launch the requests. + SMARTLIST_FOREACH_BEGIN(compute_diffs_from, consensus_cache_entry_t *, c) { + if (BUG(c == most_recent)) + continue; // LCOV_EXCL_LINE + + uint8_t this_sha3[DIGEST256_LEN]; + if (cdm_entry_get_sha3_value(this_sha3, c, + LABEL_SHA3_DIGEST_AS_SIGNED)<0) { + // Not actually a bug, since we might be running with a directory + // with stale files from before the #22143 fixes. + continue; + } + if (cdm_diff_ht_check_and_note_pending(flavor, + this_sha3, most_recent_sha3)) { + // This is already pending, or we encountered an error. + continue; + } + consensus_diff_queue_diff_work(c, most_recent); + } SMARTLIST_FOREACH_END(c); + + done: + smartlist_free(matches); + smartlist_free(diffs); + smartlist_free(compute_diffs_from); + strmap_free(have_diff_from, NULL); +} + +/** + * Scan the cache for the latest consensuses and add their handles to + * latest_consensus + */ +static void +consdiffmgr_consensus_load(void) +{ + smartlist_t *matches = smartlist_new(); + for (int flav = 0; flav < N_CONSENSUS_FLAVORS; ++flav) { + const char *flavname = networkstatus_get_flavor_name(flav); + smartlist_clear(matches); + consensus_cache_find_all(matches, cdm_cache_get(), + LABEL_FLAVOR, flavname); + consensus_cache_filter_list(matches, LABEL_DOCTYPE, DOCTYPE_CONSENSUS); + consensus_cache_entry_t *most_recent = sort_and_find_most_recent(matches); + if (! most_recent) + continue; // no consensuses. + const char *most_recent_sha3 = + consensus_cache_entry_get_value(most_recent, + LABEL_SHA3_DIGEST_UNCOMPRESSED); + if (BUG(most_recent_sha3 == NULL)) + continue; // LCOV_EXCL_LINE + consensus_cache_filter_list(matches, LABEL_SHA3_DIGEST_UNCOMPRESSED, + most_recent_sha3); + + // Everything that remains matches the most recent consensus of this + // flavor. + SMARTLIST_FOREACH_BEGIN(matches, consensus_cache_entry_t *, ent) { + const char *lv_compression = + consensus_cache_entry_get_value(ent, LABEL_COMPRESSION_TYPE); + compress_method_t method = + compression_method_get_by_name(lv_compression); + int pos = consensus_compression_method_pos(method); + if (pos < 0) + continue; + consensus_cache_entry_handle_free(latest_consensus[flav][pos]); + latest_consensus[flav][pos] = consensus_cache_entry_handle_new(ent); + } SMARTLIST_FOREACH_END(ent); + } + smartlist_free(matches); +} + +/** + * Scan the cache for diffs, and add them to the hashtable. + */ +static void +consdiffmgr_diffs_load(void) +{ + smartlist_t *diffs = smartlist_new(); + consensus_cache_find_all(diffs, cdm_cache_get(), + LABEL_DOCTYPE, DOCTYPE_CONSENSUS_DIFF); + SMARTLIST_FOREACH_BEGIN(diffs, consensus_cache_entry_t *, diff) { + const char *lv_flavor = + consensus_cache_entry_get_value(diff, LABEL_FLAVOR); + if (!lv_flavor) + continue; + int flavor = networkstatus_parse_flavor_name(lv_flavor); + if (flavor < 0) + continue; + const char *lv_compression = + consensus_cache_entry_get_value(diff, LABEL_COMPRESSION_TYPE); + compress_method_t method = NO_METHOD; + if (lv_compression) { + method = compression_method_get_by_name(lv_compression); + if (method == UNKNOWN_METHOD) { + continue; + } + } + + uint8_t from_sha3[DIGEST256_LEN]; + uint8_t to_sha3[DIGEST256_LEN]; + if (cdm_entry_get_sha3_value(from_sha3, diff, LABEL_FROM_SHA3_DIGEST)<0) + continue; + if (cdm_entry_get_sha3_value(to_sha3, diff, LABEL_TARGET_SHA3_DIGEST)<0) + continue; + + cdm_diff_ht_set_status(flavor, from_sha3, to_sha3, + method, + CDM_DIFF_PRESENT, + consensus_cache_entry_handle_new(diff)); + } SMARTLIST_FOREACH_END(diff); + smartlist_free(diffs); +} + +/** + * Build new diffs as needed. + */ +void +consdiffmgr_rescan(void) +{ + if (cdm_cache_dirty == 0) + return; + + // Clean up here to make room for new diffs, and to ensure that older + // consensuses do not have any entries. + consdiffmgr_cleanup(); + + if (cdm_cache_loaded == 0) { + consdiffmgr_diffs_load(); + consdiffmgr_consensus_load(); + cdm_cache_loaded = 1; + } + + for (int flav = 0; flav < N_CONSENSUS_FLAVORS; ++flav) { + consdiffmgr_rescan_flavor_((consensus_flavor_t) flav); + } + + cdm_cache_dirty = 0; +} + +/** Callback wrapper for consdiffmgr_rescan */ +static void +consdiffmgr_rescan_cb(mainloop_event_t *ev, void *arg) +{ + (void)ev; + (void)arg; + consdiffmgr_rescan(); +} + +/** Mark the cache as dirty, and schedule a rescan event. */ +static void +mark_cdm_cache_dirty(void) +{ + cdm_cache_dirty = 1; + tor_assert(consdiffmgr_rescan_ev); + mainloop_event_activate(consdiffmgr_rescan_ev); +} + +/** + * Helper: compare two files by their from-valid-after and valid-after labels, + * trying to sort in ascending order by from-valid-after (when present) and + * valid-after (when not). Place everything that has neither label first in + * the list. + */ +static int +compare_by_staleness_(const void **a, const void **b) +{ + const consensus_cache_entry_t *e1 = *a; + const consensus_cache_entry_t *e2 = *b; + const char *va1, *fva1, *va2, *fva2; + va1 = consensus_cache_entry_get_value(e1, LABEL_VALID_AFTER); + va2 = consensus_cache_entry_get_value(e2, LABEL_VALID_AFTER); + fva1 = consensus_cache_entry_get_value(e1, LABEL_FROM_VALID_AFTER); + fva2 = consensus_cache_entry_get_value(e2, LABEL_FROM_VALID_AFTER); + + if (fva1) + va1 = fva1; + if (fva2) + va2 = fva2; + + /* See note about iso-encoded values in compare_by_valid_after_. Also note + * that missing dates will get placed first. */ + return strcmp_opt(va1, va2); +} + +/** If there are not enough unused filenames to store <b>n</b> files, then + * delete old consensuses until there are. (We have to keep track of the + * number of filenames because of the way that the seccomp2 cache works.) + * + * Return 0 on success, -1 on failure. + **/ +static int +consdiffmgr_ensure_space_for_files(int n) +{ + consensus_cache_t *cache = cdm_cache_get(); + if (consensus_cache_get_n_filenames_available(cache) >= n) { + // there are already enough unused filenames. + return 0; + } + // Try a cheap deletion of stuff that's waiting to get deleted. + consensus_cache_delete_pending(cache, 0); + if (consensus_cache_get_n_filenames_available(cache) >= n) { + // okay, _that_ made enough filenames available. + return 0; + } + // Let's get more assertive: clean out unused stuff, and force-remove + // the files that we can. + consdiffmgr_cleanup(); + consensus_cache_delete_pending(cache, 1); + const int n_to_remove = n - consensus_cache_get_n_filenames_available(cache); + if (n_to_remove <= 0) { + // okay, finally! + return 0; + } + + // At this point, we're going to have to throw out objects that will be + // missed. Too bad! + smartlist_t *objects = smartlist_new(); + consensus_cache_find_all(objects, cache, NULL, NULL); + smartlist_sort(objects, compare_by_staleness_); + int n_marked = 0; + SMARTLIST_FOREACH_BEGIN(objects, consensus_cache_entry_t *, ent) { + consensus_cache_entry_mark_for_removal(ent); + if (++n_marked >= n_to_remove) + break; + } SMARTLIST_FOREACH_END(ent); + smartlist_free(objects); + + consensus_cache_delete_pending(cache, 1); + + if (consensus_cache_may_overallocate(cache)) { + /* If we're allowed to throw extra files into the cache, let's do so + * rather getting upset. + */ + return 0; + } + + if (BUG(n_marked < n_to_remove)) + return -1; + else + return 0; +} + +/** + * Set consensus cache flags on the objects in this consdiffmgr. + */ +static void +consdiffmgr_set_cache_flags(void) +{ + /* Right now, we just mark the consensus objects for aggressive release, + * so that they get mmapped for as little time as possible. */ + smartlist_t *objects = smartlist_new(); + consensus_cache_find_all(objects, cdm_cache_get(), LABEL_DOCTYPE, + DOCTYPE_CONSENSUS); + SMARTLIST_FOREACH_BEGIN(objects, consensus_cache_entry_t *, ent) { + consensus_cache_entry_mark_for_aggressive_release(ent); + } SMARTLIST_FOREACH_END(ent); + smartlist_free(objects); +} + +/** + * Called before shutdown: drop all storage held by the consdiffmgr.c module. + */ +void +consdiffmgr_free_all(void) +{ + cdm_diff_t **diff, **next; + for (diff = HT_START(cdm_diff_ht, &cdm_diff_ht); diff; diff = next) { + cdm_diff_t *this = *diff; + next = HT_NEXT_RMV(cdm_diff_ht, &cdm_diff_ht, diff); + cdm_diff_free(this); + } + int i; + unsigned j; + for (i = 0; i < N_CONSENSUS_FLAVORS; ++i) { + for (j = 0; j < n_consensus_compression_methods(); ++j) { + consensus_cache_entry_handle_free(latest_consensus[i][j]); + } + } + memset(latest_consensus, 0, sizeof(latest_consensus)); + consensus_cache_free(cons_diff_cache); + cons_diff_cache = NULL; + mainloop_event_free(consdiffmgr_rescan_ev); +} + +/* ===== + Thread workers + =====*/ + +typedef struct compressed_result_t { + config_line_t *labels; + /** + * Output: Body of the diff, as compressed. + */ + uint8_t *body; + /** + * Output: length of body_out + */ + size_t bodylen; +} compressed_result_t; + +/** + * Compress the bytestring <b>input</b> of length <b>len</b> using the + * <n>n_methods</b> compression methods listed in the array <b>methods</b>. + * + * For each successful compression, set the fields in the <b>results_out</b> + * array in the position corresponding to the compression method. Use + * <b>labels_in</b> as a basis for the labels of the result. + * + * Return 0 if all compression succeeded; -1 if any failed. + */ +static int +compress_multiple(compressed_result_t *results_out, int n_methods, + const compress_method_t *methods, + const uint8_t *input, size_t len, + const config_line_t *labels_in) +{ + int rv = 0; + int i; + for (i = 0; i < n_methods; ++i) { + compress_method_t method = methods[i]; + const char *methodname = compression_method_get_name(method); + char *result; + size_t sz; + if (0 == tor_compress(&result, &sz, (const char*)input, len, method)) { + results_out[i].body = (uint8_t*)result; + results_out[i].bodylen = sz; + results_out[i].labels = config_lines_dup(labels_in); + cdm_labels_prepend_sha3(&results_out[i].labels, LABEL_SHA3_DIGEST, + results_out[i].body, + results_out[i].bodylen); + config_line_prepend(&results_out[i].labels, + LABEL_COMPRESSION_TYPE, + methodname); + } else { + rv = -1; + } + } + return rv; +} + +/** + * Given an array of <b>n</b> compressed_result_t in <b>results</b>, + * as produced by compress_multiple, store them all into the + * consdiffmgr, and store handles to them in the <b>handles_out</b> + * array. + * + * Return CDM_DIFF_PRESENT if any was stored, and CDM_DIFF_ERROR if none + * was stored. + */ +static cdm_diff_status_t +store_multiple(consensus_cache_entry_handle_t **handles_out, + int n, + const compress_method_t *methods, + const compressed_result_t *results, + const char *description) +{ + cdm_diff_status_t status = CDM_DIFF_ERROR; + consdiffmgr_ensure_space_for_files(n); + + int i; + for (i = 0; i < n; ++i) { + compress_method_t method = methods[i]; + uint8_t *body_out = results[i].body; + size_t bodylen_out = results[i].bodylen; + config_line_t *labels = results[i].labels; + const char *methodname = compression_method_get_name(method); + if (body_out && bodylen_out && labels) { + /* Success! Store the results */ + log_info(LD_DIRSERV, "Adding %s, compressed with %s", + description, methodname); + + consensus_cache_entry_t *ent = + consensus_cache_add(cdm_cache_get(), + labels, + body_out, + bodylen_out); + if (ent == NULL) { + static ratelim_t cant_store_ratelim = RATELIM_INIT(5*60); + log_fn_ratelim(&cant_store_ratelim, LOG_WARN, LD_FS, + "Unable to store object %s compressed with %s.", + description, methodname); + continue; + } + + status = CDM_DIFF_PRESENT; + handles_out[i] = consensus_cache_entry_handle_new(ent); + consensus_cache_entry_decref(ent); + } + } + return status; +} + +/** + * An object passed to a worker thread that will try to produce a consensus + * diff. + */ +typedef struct consensus_diff_worker_job_t { + /** + * Input: The consensus to compute the diff from. Holds a reference to the + * cache entry, which must not be released until the job is passed back to + * the main thread. The body must be mapped into memory in the main thread. + */ + consensus_cache_entry_t *diff_from; + /** + * Input: The consensus to compute the diff to. Holds a reference to the + * cache entry, which must not be released until the job is passed back to + * the main thread. The body must be mapped into memory in the main thread. + */ + consensus_cache_entry_t *diff_to; + + /** Output: labels and bodies */ + compressed_result_t out[ARRAY_LENGTH(compress_diffs_with)]; +} consensus_diff_worker_job_t; + +/** Given a consensus_cache_entry_t, check whether it has a label claiming + * that it was compressed. If so, uncompress its contents into <b>out</b> and + * set <b>outlen</b> to hold their size. If not, just copy the body into + * <b>out</b> and set <b>outlen</b> to its length. Return 0 on success, + * -1 on failure. + * + * In all cases, the output is nul-terminated. */ +STATIC int +uncompress_or_copy(char **out, size_t *outlen, + consensus_cache_entry_t *ent) +{ + const uint8_t *body; + size_t bodylen; + + if (consensus_cache_entry_get_body(ent, &body, &bodylen) < 0) + return -1; + + const char *lv_compression = + consensus_cache_entry_get_value(ent, LABEL_COMPRESSION_TYPE); + compress_method_t method = NO_METHOD; + + if (lv_compression) + method = compression_method_get_by_name(lv_compression); + + return tor_uncompress(out, outlen, (const char *)body, bodylen, + method, 1, LOG_WARN); +} + +/** + * Worker function. This function runs inside a worker thread and receives + * a consensus_diff_worker_job_t as its input. + */ +static workqueue_reply_t +consensus_diff_worker_threadfn(void *state_, void *work_) +{ + (void)state_; + consensus_diff_worker_job_t *job = work_; + const uint8_t *diff_from, *diff_to; + size_t len_from, len_to; + int r; + /* We need to have the body already mapped into RAM here. + */ + r = consensus_cache_entry_get_body(job->diff_from, &diff_from, &len_from); + if (BUG(r < 0)) + return WQ_RPL_REPLY; // LCOV_EXCL_LINE + r = consensus_cache_entry_get_body(job->diff_to, &diff_to, &len_to); + if (BUG(r < 0)) + return WQ_RPL_REPLY; // LCOV_EXCL_LINE + + const char *lv_to_valid_after = + consensus_cache_entry_get_value(job->diff_to, LABEL_VALID_AFTER); + const char *lv_to_fresh_until = + consensus_cache_entry_get_value(job->diff_to, LABEL_FRESH_UNTIL); + const char *lv_to_valid_until = + consensus_cache_entry_get_value(job->diff_to, LABEL_VALID_UNTIL); + const char *lv_to_signatories = + consensus_cache_entry_get_value(job->diff_to, LABEL_SIGNATORIES); + const char *lv_from_valid_after = + consensus_cache_entry_get_value(job->diff_from, LABEL_VALID_AFTER); + const char *lv_from_digest = + consensus_cache_entry_get_value(job->diff_from, + LABEL_SHA3_DIGEST_AS_SIGNED); + const char *lv_from_flavor = + consensus_cache_entry_get_value(job->diff_from, LABEL_FLAVOR); + const char *lv_to_flavor = + consensus_cache_entry_get_value(job->diff_to, LABEL_FLAVOR); + const char *lv_to_digest = + consensus_cache_entry_get_value(job->diff_to, + LABEL_SHA3_DIGEST_UNCOMPRESSED); + + if (! lv_from_digest) { + /* This isn't a bug right now, since it can happen if you're migrating + * from an older version of master to a newer one. The older ones didn't + * annotate their stored consensus objects with sha3-digest-as-signed. + */ + return WQ_RPL_REPLY; // LCOV_EXCL_LINE + } + + /* All these values are mandatory on the input */ + if (BUG(!lv_to_valid_after) || + BUG(!lv_from_valid_after) || + BUG(!lv_from_flavor) || + BUG(!lv_to_flavor)) { + return WQ_RPL_REPLY; // LCOV_EXCL_LINE + } + /* The flavors need to match */ + if (BUG(strcmp(lv_from_flavor, lv_to_flavor))) { + return WQ_RPL_REPLY; // LCOV_EXCL_LINE + } + + char *consensus_diff; + { + char *diff_from_nt = NULL, *diff_to_nt = NULL; + size_t diff_from_nt_len, diff_to_nt_len; + + if (uncompress_or_copy(&diff_from_nt, &diff_from_nt_len, + job->diff_from) < 0) { + return WQ_RPL_REPLY; + } + if (uncompress_or_copy(&diff_to_nt, &diff_to_nt_len, + job->diff_to) < 0) { + tor_free(diff_from_nt); + return WQ_RPL_REPLY; + } + tor_assert(diff_from_nt); + tor_assert(diff_to_nt); + + // XXXX ugh; this is going to calculate the SHA3 of both its + // XXXX inputs again, even though we already have that. Maybe it's time + // XXXX to change the API here? + consensus_diff = consensus_diff_generate(diff_from_nt, diff_to_nt); + tor_free(diff_from_nt); + tor_free(diff_to_nt); + } + if (!consensus_diff) { + /* Couldn't generate consensus; we'll leave the reply blank. */ + return WQ_RPL_REPLY; + } + + /* Compress the results and send the reply */ + tor_assert(compress_diffs_with[0] == NO_METHOD); + size_t difflen = strlen(consensus_diff); + job->out[0].body = (uint8_t *) consensus_diff; + job->out[0].bodylen = difflen; + + config_line_t *common_labels = NULL; + if (lv_to_valid_until) + config_line_prepend(&common_labels, LABEL_VALID_UNTIL, lv_to_valid_until); + if (lv_to_fresh_until) + config_line_prepend(&common_labels, LABEL_FRESH_UNTIL, lv_to_fresh_until); + if (lv_to_signatories) + config_line_prepend(&common_labels, LABEL_SIGNATORIES, lv_to_signatories); + cdm_labels_prepend_sha3(&common_labels, + LABEL_SHA3_DIGEST_UNCOMPRESSED, + job->out[0].body, + job->out[0].bodylen); + config_line_prepend(&common_labels, LABEL_FROM_VALID_AFTER, + lv_from_valid_after); + config_line_prepend(&common_labels, LABEL_VALID_AFTER, + lv_to_valid_after); + config_line_prepend(&common_labels, LABEL_FLAVOR, lv_from_flavor); + config_line_prepend(&common_labels, LABEL_FROM_SHA3_DIGEST, + lv_from_digest); + config_line_prepend(&common_labels, LABEL_TARGET_SHA3_DIGEST, + lv_to_digest); + config_line_prepend(&common_labels, LABEL_DOCTYPE, + DOCTYPE_CONSENSUS_DIFF); + + job->out[0].labels = config_lines_dup(common_labels); + cdm_labels_prepend_sha3(&job->out[0].labels, + LABEL_SHA3_DIGEST, + job->out[0].body, + job->out[0].bodylen); + + compress_multiple(job->out+1, + n_diff_compression_methods()-1, + compress_diffs_with+1, + (const uint8_t*)consensus_diff, difflen, common_labels); + + config_free_lines(common_labels); + return WQ_RPL_REPLY; +} + +#define consensus_diff_worker_job_free(job) \ + FREE_AND_NULL(consensus_diff_worker_job_t, \ + consensus_diff_worker_job_free_, (job)) + +/** + * Helper: release all storage held in <b>job</b>. + */ +static void +consensus_diff_worker_job_free_(consensus_diff_worker_job_t *job) +{ + if (!job) + return; + unsigned u; + for (u = 0; u < n_diff_compression_methods(); ++u) { + config_free_lines(job->out[u].labels); + tor_free(job->out[u].body); + } + consensus_cache_entry_decref(job->diff_from); + consensus_cache_entry_decref(job->diff_to); + tor_free(job); +} + +/** + * Worker function: This function runs in the main thread, and receives + * a consensus_diff_worker_job_t that the worker thread has already + * processed. + */ +static void +consensus_diff_worker_replyfn(void *work_) +{ + tor_assert(in_main_thread()); + tor_assert(work_); + + consensus_diff_worker_job_t *job = work_; + + const char *lv_from_digest = + consensus_cache_entry_get_value(job->diff_from, + LABEL_SHA3_DIGEST_AS_SIGNED); + const char *lv_to_digest = + consensus_cache_entry_get_value(job->diff_to, + LABEL_SHA3_DIGEST_UNCOMPRESSED); + const char *lv_flavor = + consensus_cache_entry_get_value(job->diff_to, LABEL_FLAVOR); + if (BUG(lv_from_digest == NULL)) + lv_from_digest = "???"; // LCOV_EXCL_LINE + if (BUG(lv_to_digest == NULL)) + lv_to_digest = "???"; // LCOV_EXCL_LINE + + uint8_t from_sha3[DIGEST256_LEN]; + uint8_t to_sha3[DIGEST256_LEN]; + int flav = -1; + int cache = 1; + if (BUG(cdm_entry_get_sha3_value(from_sha3, job->diff_from, + LABEL_SHA3_DIGEST_AS_SIGNED) < 0)) + cache = 0; + if (BUG(cdm_entry_get_sha3_value(to_sha3, job->diff_to, + LABEL_SHA3_DIGEST_UNCOMPRESSED) < 0)) + cache = 0; + if (BUG(lv_flavor == NULL)) { + cache = 0; + } else if ((flav = networkstatus_parse_flavor_name(lv_flavor)) < 0) { + cache = 0; + } + + consensus_cache_entry_handle_t *handles[ARRAY_LENGTH(compress_diffs_with)]; + memset(handles, 0, sizeof(handles)); + + char description[128]; + tor_snprintf(description, sizeof(description), + "consensus diff from %s to %s", + lv_from_digest, lv_to_digest); + + int status = store_multiple(handles, + n_diff_compression_methods(), + compress_diffs_with, + job->out, + description); + + if (status != CDM_DIFF_PRESENT) { + /* Failure! Nothing to do but complain */ + log_warn(LD_DIRSERV, + "Worker was unable to compute consensus diff " + "from %s to %s", lv_from_digest, lv_to_digest); + /* Cache this error so we don't try to compute this one again. */ + status = CDM_DIFF_ERROR; + } + + unsigned u; + for (u = 0; u < ARRAY_LENGTH(handles); ++u) { + compress_method_t method = compress_diffs_with[u]; + if (cache) { + consensus_cache_entry_handle_t *h = handles[u]; + int this_status = status; + if (h == NULL) { + this_status = CDM_DIFF_ERROR; + } + tor_assert_nonfatal(h != NULL || this_status == CDM_DIFF_ERROR); + cdm_diff_ht_set_status(flav, from_sha3, to_sha3, method, this_status, h); + } else { + consensus_cache_entry_handle_free(handles[u]); + } + } + + consensus_diff_worker_job_free(job); +} + +/** + * Queue the job of computing the diff from <b>diff_from</b> to <b>diff_to</b> + * in a worker thread. + */ +static int +consensus_diff_queue_diff_work(consensus_cache_entry_t *diff_from, + consensus_cache_entry_t *diff_to) +{ + tor_assert(in_main_thread()); + + consensus_cache_entry_incref(diff_from); + consensus_cache_entry_incref(diff_to); + + consensus_diff_worker_job_t *job = tor_malloc_zero(sizeof(*job)); + job->diff_from = diff_from; + job->diff_to = diff_to; + + /* Make sure body is mapped. */ + const uint8_t *body; + size_t bodylen; + int r1 = consensus_cache_entry_get_body(diff_from, &body, &bodylen); + int r2 = consensus_cache_entry_get_body(diff_to, &body, &bodylen); + if (r1 < 0 || r2 < 0) + goto err; + + workqueue_entry_t *work; + work = cpuworker_queue_work(WQ_PRI_LOW, + consensus_diff_worker_threadfn, + consensus_diff_worker_replyfn, + job); + if (!work) + goto err; + + return 0; + err: + consensus_diff_worker_job_free(job); // includes decrefs. + return -1; +} + +/** + * Holds requests and replies for consensus_compress_workers. + */ +typedef struct consensus_compress_worker_job_t { + char *consensus; + size_t consensus_len; + consensus_flavor_t flavor; + config_line_t *labels_in; + compressed_result_t out[ARRAY_LENGTH(compress_consensus_with)]; +} consensus_compress_worker_job_t; + +#define consensus_compress_worker_job_free(job) \ + FREE_AND_NULL(consensus_compress_worker_job_t, \ + consensus_compress_worker_job_free_, (job)) + +/** + * Free all resources held in <b>job</b> + */ +static void +consensus_compress_worker_job_free_(consensus_compress_worker_job_t *job) +{ + if (!job) + return; + tor_free(job->consensus); + config_free_lines(job->labels_in); + unsigned u; + for (u = 0; u < n_consensus_compression_methods(); ++u) { + config_free_lines(job->out[u].labels); + tor_free(job->out[u].body); + } + tor_free(job); +} +/** + * Worker function. This function runs inside a worker thread and receives + * a consensus_compress_worker_job_t as its input. + */ +static workqueue_reply_t +consensus_compress_worker_threadfn(void *state_, void *work_) +{ + (void)state_; + consensus_compress_worker_job_t *job = work_; + consensus_flavor_t flavor = job->flavor; + const char *consensus = job->consensus; + size_t bodylen = job->consensus_len; + + config_line_t *labels = config_lines_dup(job->labels_in); + const char *flavname = networkstatus_get_flavor_name(flavor); + + cdm_labels_prepend_sha3(&labels, LABEL_SHA3_DIGEST_UNCOMPRESSED, + (const uint8_t *)consensus, bodylen); + { + const char *start, *end; + if (router_get_networkstatus_v3_signed_boundaries(consensus, + &start, &end) < 0) { + start = consensus; + end = consensus+bodylen; + } + cdm_labels_prepend_sha3(&labels, LABEL_SHA3_DIGEST_AS_SIGNED, + (const uint8_t *)start, + end - start); + } + config_line_prepend(&labels, LABEL_FLAVOR, flavname); + config_line_prepend(&labels, LABEL_DOCTYPE, DOCTYPE_CONSENSUS); + + compress_multiple(job->out, + n_consensus_compression_methods(), + compress_consensus_with, + (const uint8_t*)consensus, bodylen, labels); + config_free_lines(labels); + return WQ_RPL_REPLY; +} + +/** + * Worker function: This function runs in the main thread, and receives + * a consensus_diff_compress_job_t that the worker thread has already + * processed. + */ +static void +consensus_compress_worker_replyfn(void *work_) +{ + consensus_compress_worker_job_t *job = work_; + + consensus_cache_entry_handle_t *handles[ + ARRAY_LENGTH(compress_consensus_with)]; + memset(handles, 0, sizeof(handles)); + + store_multiple(handles, + n_consensus_compression_methods(), + compress_consensus_with, + job->out, + "consensus"); + mark_cdm_cache_dirty(); + + unsigned u; + consensus_flavor_t f = job->flavor; + tor_assert((int)f < N_CONSENSUS_FLAVORS); + for (u = 0; u < ARRAY_LENGTH(handles); ++u) { + if (handles[u] == NULL) + continue; + consensus_cache_entry_handle_free(latest_consensus[f][u]); + latest_consensus[f][u] = handles[u]; + } + + consensus_compress_worker_job_free(job); +} + +/** + * If true, we compress in worker threads. + */ +static int background_compression = 0; + +/** + * Queue a job to compress <b>consensus</b> and store its compressed + * text in the cache. + */ +static int +consensus_queue_compression_work(const char *consensus, + const networkstatus_t *as_parsed) +{ + tor_assert(consensus); + tor_assert(as_parsed); + + consensus_compress_worker_job_t *job = tor_malloc_zero(sizeof(*job)); + job->consensus = tor_strdup(consensus); + job->consensus_len = strlen(consensus); + job->flavor = as_parsed->flavor; + + char va_str[ISO_TIME_LEN+1]; + char vu_str[ISO_TIME_LEN+1]; + char fu_str[ISO_TIME_LEN+1]; + format_iso_time_nospace(va_str, as_parsed->valid_after); + format_iso_time_nospace(fu_str, as_parsed->fresh_until); + format_iso_time_nospace(vu_str, as_parsed->valid_until); + config_line_append(&job->labels_in, LABEL_VALID_AFTER, va_str); + config_line_append(&job->labels_in, LABEL_FRESH_UNTIL, fu_str); + config_line_append(&job->labels_in, LABEL_VALID_UNTIL, vu_str); + if (as_parsed->voters) { + smartlist_t *hexvoters = smartlist_new(); + SMARTLIST_FOREACH_BEGIN(as_parsed->voters, + networkstatus_voter_info_t *, vi) { + if (smartlist_len(vi->sigs) == 0) + continue; // didn't sign. + char d[HEX_DIGEST_LEN+1]; + base16_encode(d, sizeof(d), vi->identity_digest, DIGEST_LEN); + smartlist_add_strdup(hexvoters, d); + } SMARTLIST_FOREACH_END(vi); + char *signers = smartlist_join_strings(hexvoters, ",", 0, NULL); + config_line_prepend(&job->labels_in, LABEL_SIGNATORIES, signers); + tor_free(signers); + SMARTLIST_FOREACH(hexvoters, char *, cp, tor_free(cp)); + smartlist_free(hexvoters); + } + + if (background_compression) { + workqueue_entry_t *work; + work = cpuworker_queue_work(WQ_PRI_LOW, + consensus_compress_worker_threadfn, + consensus_compress_worker_replyfn, + job); + if (!work) { + consensus_compress_worker_job_free(job); + return -1; + } + + return 0; + } else { + consensus_compress_worker_threadfn(NULL, job); + consensus_compress_worker_replyfn(job); + return 0; + } +} + +/** + * Tell the consdiffmgr backend to compress consensuses in worker threads. + */ +void +consdiffmgr_enable_background_compression(void) +{ + // This isn't the default behavior because it would break unit tests. + background_compression = 1; +} + +/** Read the set of voters from the cached object <b>ent</b> into + * <b>out</b>, as a list of hex-encoded digests. Return 0 on success, + * -1 if no signatories were recorded. */ +int +consensus_cache_entry_get_voter_id_digests(const consensus_cache_entry_t *ent, + smartlist_t *out) +{ + tor_assert(ent); + tor_assert(out); + const char *s; + s = consensus_cache_entry_get_value(ent, LABEL_SIGNATORIES); + if (s == NULL) + return -1; + smartlist_split_string(out, s, ",", SPLIT_SKIP_SPACE|SPLIT_STRIP_SPACE, 0); + return 0; +} + +/** Read the fresh-until time of cached object <b>ent</b> into *<b>out</b> + * and return 0, or return -1 if no such time was recorded. */ +int +consensus_cache_entry_get_fresh_until(const consensus_cache_entry_t *ent, + time_t *out) +{ + tor_assert(ent); + tor_assert(out); + const char *s; + s = consensus_cache_entry_get_value(ent, LABEL_FRESH_UNTIL); + if (s == NULL || parse_iso_time_nospace(s, out) < 0) + return -1; + else + return 0; +} + +/** Read the valid until timestamp from the cached object <b>ent</b> into + * *<b>out</b> and return 0, or return -1 if no such time was recorded. */ +int +consensus_cache_entry_get_valid_until(const consensus_cache_entry_t *ent, + time_t *out) +{ + tor_assert(ent); + tor_assert(out); + + const char *s; + s = consensus_cache_entry_get_value(ent, LABEL_VALID_UNTIL); + if (s == NULL || parse_iso_time_nospace(s, out) < 0) + return -1; + else + return 0; +} + +/** Read the valid after timestamp from the cached object <b>ent</b> into + * *<b>out</b> and return 0, or return -1 if no such time was recorded. */ +int +consensus_cache_entry_get_valid_after(const consensus_cache_entry_t *ent, + time_t *out) +{ + tor_assert(ent); + tor_assert(out); + + const char *s; + s = consensus_cache_entry_get_value(ent, LABEL_VALID_AFTER); + + if (s == NULL || parse_iso_time_nospace(s, out) < 0) + return -1; + else + return 0; +} diff --git a/src/feature/dircache/consdiffmgr.h b/src/feature/dircache/consdiffmgr.h new file mode 100644 index 0000000000..66c3d65002 --- /dev/null +++ b/src/feature/dircache/consdiffmgr.h @@ -0,0 +1,75 @@ +/* Copyright (c) 2017-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#ifndef TOR_CONSDIFFMGR_H +#define TOR_CONSDIFFMGR_H + +enum compress_method_t; + +/** + * Possible outcomes from trying to look up a given consensus diff. + */ +typedef enum consdiff_status_t { + CONSDIFF_AVAILABLE, + CONSDIFF_NOT_FOUND, + CONSDIFF_IN_PROGRESS, +} consdiff_status_t; + +typedef struct consdiff_cfg_t { + int32_t cache_max_num; +} consdiff_cfg_t; + +struct consensus_cache_entry_t; // from conscache.h + +int consdiffmgr_add_consensus(const char *consensus, + const networkstatus_t *as_parsed); + +consdiff_status_t consdiffmgr_find_consensus( + struct consensus_cache_entry_t **entry_out, + consensus_flavor_t flavor, + enum compress_method_t method); + +consdiff_status_t consdiffmgr_find_diff_from( + struct consensus_cache_entry_t **entry_out, + consensus_flavor_t flavor, + int digest_type, + const uint8_t *digest, + size_t digestlen, + enum compress_method_t method); + +int consensus_cache_entry_get_voter_id_digests( + const struct consensus_cache_entry_t *ent, + smartlist_t *out); +int consensus_cache_entry_get_fresh_until( + const struct consensus_cache_entry_t *ent, + time_t *out); +int consensus_cache_entry_get_valid_until( + const struct consensus_cache_entry_t *ent, + time_t *out); +int consensus_cache_entry_get_valid_after( + const struct consensus_cache_entry_t *ent, + time_t *out); + +void consdiffmgr_rescan(void); +int consdiffmgr_cleanup(void); +void consdiffmgr_enable_background_compression(void); +void consdiffmgr_configure(const consdiff_cfg_t *cfg); +struct sandbox_cfg_elem; +int consdiffmgr_register_with_sandbox(struct sandbox_cfg_elem **cfg); +void consdiffmgr_free_all(void); +int consdiffmgr_validate(void); + +#ifdef CONSDIFFMGR_PRIVATE +STATIC unsigned n_diff_compression_methods(void); +STATIC unsigned n_consensus_compression_methods(void); +STATIC consensus_cache_t *cdm_cache_get(void); +STATIC consensus_cache_entry_t *cdm_cache_lookup_consensus( + consensus_flavor_t flavor, time_t valid_after); +STATIC int cdm_entry_get_sha3_value(uint8_t *digest_out, + consensus_cache_entry_t *ent, + const char *label); +STATIC int uncompress_or_copy(char **out, size_t *outlen, + consensus_cache_entry_t *ent); +#endif /* defined(CONSDIFFMGR_PRIVATE) */ + +#endif /* !defined(TOR_CONSDIFFMGR_H) */ diff --git a/src/feature/dircache/directory.c b/src/feature/dircache/directory.c new file mode 100644 index 0000000000..8f2bd8d3a7 --- /dev/null +++ b/src/feature/dircache/directory.c @@ -0,0 +1,5966 @@ +/* Copyright (c) 2001-2004, Roger Dingledine. + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#define DIRECTORY_PRIVATE + +#include "or/or.h" +#include "lib/err/backtrace.h" +#include "or/bridges.h" +#include "lib/container/buffers.h" +#include "or/circuitbuild.h" +#include "or/config.h" +#include "or/connection.h" +#include "or/connection_edge.h" +#include "or/conscache.h" +#include "or/consdiff.h" +#include "or/consdiffmgr.h" +#include "or/control.h" +#include "lib/compress/compress.h" +#include "lib/crypt_ops/crypto_rand.h" +#include "lib/crypt_ops/crypto_util.h" +#include "or/directory.h" +#include "or/dirserv.h" +#include "or/entrynodes.h" +#include "or/fp_pair.h" +#include "or/geoip.h" +#include "or/hs_cache.h" +#include "or/hs_common.h" +#include "or/hs_control.h" +#include "or/hs_client.h" +#include "or/main.h" +#include "or/microdesc.h" +#include "or/networkstatus.h" +#include "or/nodelist.h" +#include "or/policies.h" +#include "or/relay.h" +#include "or/rendclient.h" +#include "or/rendcommon.h" +#include "or/rendservice.h" +#include "or/rephist.h" +#include "or/router.h" +#include "or/routerlist.h" +#include "or/routerparse.h" +#include "or/routerset.h" +#include "lib/encoding/confline.h" +#include "lib/crypt_ops/crypto_format.h" + +#if defined(EXPORTMALLINFO) && defined(HAVE_MALLOC_H) && defined(HAVE_MALLINFO) +#if !defined(OpenBSD) +#include <malloc.h> +#endif +#endif + +#include "or/dirauth/dirvote.h" +#include "or/dirauth/mode.h" +#include "or/dirauth/shared_random.h" + +#include "or/authority_cert_st.h" +#include "or/cached_dir_st.h" +#include "or/dir_connection_st.h" +#include "or/dir_server_st.h" +#include "or/entry_connection_st.h" +#include "or/networkstatus_st.h" +#include "or/node_st.h" +#include "or/rend_service_descriptor_st.h" +#include "or/routerinfo_st.h" + +/** + * \file directory.c + * \brief Code to send and fetch information from directory authorities and + * caches via HTTP. + * + * Directory caches and authorities use dirserv.c to generate the results of a + * query and stream them to the connection; clients use routerparse.c to parse + * them. + * + * Every directory request has a dir_connection_t on the client side and on + * the server side. In most cases, the dir_connection_t object is a linked + * connection, tunneled through an edge_connection_t so that it can be a + * stream on the Tor network. The only non-tunneled connections are those + * that are used to upload material (descriptors and votes) to authorities. + * Among tunneled connections, some use one-hop circuits, and others use + * multi-hop circuits for anonymity. + * + * Directory requests are launched by calling + * directory_initiate_request(). This + * launch the connection, will construct an HTTP request with + * directory_send_command(), send the and wait for a response. The client + * later handles the response with connection_dir_client_reached_eof(), + * which passes the information received to another part of Tor. + * + * On the server side, requests are read in directory_handle_command(), + * which dispatches first on the request type (GET or POST), and then on + * the URL requested. GET requests are processed with a table-based + * dispatcher in url_table[]. The process of handling larger GET requests + * is complicated because we need to avoid allocating a copy of all the + * data to be sent to the client in one huge buffer. Instead, we spool the + * data into the buffer using logic in connection_dirserv_flushed_some() in + * dirserv.c. (TODO: If we extended buf.c to have a zero-copy + * reference-based buffer type, we could remove most of that code, at the + * cost of a bit more reference counting.) + **/ + +/* In-points to directory.c: + * + * - directory_post_to_dirservers(), called from + * router_upload_dir_desc_to_dirservers() in router.c + * upload_service_descriptor() in rendservice.c + * - directory_get_from_dirserver(), called from + * rend_client_refetch_renddesc() in rendclient.c + * run_scheduled_events() in main.c + * do_hup() in main.c + * - connection_dir_process_inbuf(), called from + * connection_process_inbuf() in connection.c + * - connection_dir_finished_flushing(), called from + * connection_finished_flushing() in connection.c + * - connection_dir_finished_connecting(), called from + * connection_finished_connecting() in connection.c + */ +static void directory_send_command(dir_connection_t *conn, + int direct, + const directory_request_t *request); +static int body_is_plausible(const char *body, size_t body_len, int purpose); +static void http_set_address_origin(const char *headers, connection_t *conn); +static void connection_dir_download_routerdesc_failed(dir_connection_t *conn); +static void connection_dir_bridge_routerdesc_failed(dir_connection_t *conn); +static void connection_dir_download_cert_failed( + dir_connection_t *conn, int status_code); +static void connection_dir_retry_bridges(smartlist_t *descs); +static void dir_routerdesc_download_failed(smartlist_t *failed, + int status_code, + int router_purpose, + int was_extrainfo, + int was_descriptor_digests); +static void dir_microdesc_download_failed(smartlist_t *failed, + int status_code, + const char *dir_id); +static int client_likes_consensus(const struct consensus_cache_entry_t *ent, + const char *want_url); + +static void connection_dir_close_consensus_fetches( + dir_connection_t *except_this_one, const char *resource); + +/********* START VARIABLES **********/ + +/** Maximum size, in bytes, for resized buffers. */ +#define MAX_BUF_SIZE ((1<<24)-1) /* 16MB-1 */ +/** Maximum size, in bytes, for any directory object that we've downloaded. */ +#define MAX_DIR_DL_SIZE MAX_BUF_SIZE + +/** Maximum size, in bytes, for any directory object that we're accepting + * as an upload. */ +#define MAX_DIR_UL_SIZE MAX_BUF_SIZE + +/** How far in the future do we allow a directory server to tell us it is + * before deciding that one of us has the wrong time? */ +#define ALLOW_DIRECTORY_TIME_SKEW (30*60) + +#define X_ADDRESS_HEADER "X-Your-Address-Is: " +#define X_OR_DIFF_FROM_CONSENSUS_HEADER "X-Or-Diff-From-Consensus: " + +/** HTTP cache control: how long do we tell proxies they can cache each + * kind of document we serve? */ +#define FULL_DIR_CACHE_LIFETIME (60*60) +#define RUNNINGROUTERS_CACHE_LIFETIME (20*60) +#define DIRPORTFRONTPAGE_CACHE_LIFETIME (20*60) +#define NETWORKSTATUS_CACHE_LIFETIME (5*60) +#define ROUTERDESC_CACHE_LIFETIME (30*60) +#define ROUTERDESC_BY_DIGEST_CACHE_LIFETIME (48*60*60) +#define ROBOTS_CACHE_LIFETIME (24*60*60) +#define MICRODESC_CACHE_LIFETIME (48*60*60) + +/********* END VARIABLES ************/ + +/** Convert a connection_t* to a dir_connection_t*; assert if the cast is + * invalid. */ +dir_connection_t * +TO_DIR_CONN(connection_t *c) +{ + tor_assert(c->magic == DIR_CONNECTION_MAGIC); + return DOWNCAST(dir_connection_t, c); +} + +/** Return false if the directory purpose <b>dir_purpose</b> + * does not require an anonymous (three-hop) connection. + * + * Return true 1) by default, 2) if all directory actions have + * specifically been configured to be over an anonymous connection, + * or 3) if the router is a bridge */ +int +purpose_needs_anonymity(uint8_t dir_purpose, uint8_t router_purpose, + const char *resource) +{ + if (get_options()->AllDirActionsPrivate) + return 1; + + if (router_purpose == ROUTER_PURPOSE_BRIDGE) { + if (dir_purpose == DIR_PURPOSE_FETCH_SERVERDESC + && resource && !strcmp(resource, "authority.z")) { + /* We are asking a bridge for its own descriptor. That doesn't need + anonymity. */ + return 0; + } + /* Assume all other bridge stuff needs anonymity. */ + return 1; /* if no circuits yet, this might break bootstrapping, but it's + * needed to be safe. */ + } + + switch (dir_purpose) + { + case DIR_PURPOSE_UPLOAD_DIR: + case DIR_PURPOSE_UPLOAD_VOTE: + case DIR_PURPOSE_UPLOAD_SIGNATURES: + case DIR_PURPOSE_FETCH_STATUS_VOTE: + case DIR_PURPOSE_FETCH_DETACHED_SIGNATURES: + case DIR_PURPOSE_FETCH_CONSENSUS: + case DIR_PURPOSE_FETCH_CERTIFICATE: + case DIR_PURPOSE_FETCH_SERVERDESC: + case DIR_PURPOSE_FETCH_EXTRAINFO: + case DIR_PURPOSE_FETCH_MICRODESC: + return 0; + case DIR_PURPOSE_HAS_FETCHED_HSDESC: + case DIR_PURPOSE_HAS_FETCHED_RENDDESC_V2: + case DIR_PURPOSE_UPLOAD_RENDDESC_V2: + case DIR_PURPOSE_FETCH_RENDDESC_V2: + case DIR_PURPOSE_FETCH_HSDESC: + case DIR_PURPOSE_UPLOAD_HSDESC: + return 1; + case DIR_PURPOSE_SERVER: + default: + log_warn(LD_BUG, "Called with dir_purpose=%d, router_purpose=%d", + dir_purpose, router_purpose); + tor_assert_nonfatal_unreached(); + return 1; /* Assume it needs anonymity; better safe than sorry. */ + } +} + +/** Return a newly allocated string describing <b>auth</b>. Only describes + * authority features. */ +STATIC char * +authdir_type_to_string(dirinfo_type_t auth) +{ + char *result; + smartlist_t *lst = smartlist_new(); + if (auth & V3_DIRINFO) + smartlist_add(lst, (void*)"V3"); + if (auth & BRIDGE_DIRINFO) + smartlist_add(lst, (void*)"Bridge"); + if (smartlist_len(lst)) { + result = smartlist_join_strings(lst, ", ", 0, NULL); + } else { + result = tor_strdup("[Not an authority]"); + } + smartlist_free(lst); + return result; +} + +/** Return a string describing a given directory connection purpose. */ +STATIC const char * +dir_conn_purpose_to_string(int purpose) +{ + switch (purpose) + { + case DIR_PURPOSE_UPLOAD_DIR: + return "server descriptor upload"; + case DIR_PURPOSE_UPLOAD_VOTE: + return "server vote upload"; + case DIR_PURPOSE_UPLOAD_SIGNATURES: + return "consensus signature upload"; + case DIR_PURPOSE_FETCH_SERVERDESC: + return "server descriptor fetch"; + case DIR_PURPOSE_FETCH_EXTRAINFO: + return "extra-info fetch"; + case DIR_PURPOSE_FETCH_CONSENSUS: + return "consensus network-status fetch"; + case DIR_PURPOSE_FETCH_CERTIFICATE: + return "authority cert fetch"; + case DIR_PURPOSE_FETCH_STATUS_VOTE: + return "status vote fetch"; + case DIR_PURPOSE_FETCH_DETACHED_SIGNATURES: + return "consensus signature fetch"; + case DIR_PURPOSE_FETCH_RENDDESC_V2: + return "hidden-service v2 descriptor fetch"; + case DIR_PURPOSE_UPLOAD_RENDDESC_V2: + return "hidden-service v2 descriptor upload"; + case DIR_PURPOSE_FETCH_HSDESC: + return "hidden-service descriptor fetch"; + case DIR_PURPOSE_UPLOAD_HSDESC: + return "hidden-service descriptor upload"; + case DIR_PURPOSE_FETCH_MICRODESC: + return "microdescriptor fetch"; + } + + log_warn(LD_BUG, "Called with unknown purpose %d", purpose); + return "(unknown)"; +} + +/** Return the requisite directory information types. */ +STATIC dirinfo_type_t +dir_fetch_type(int dir_purpose, int router_purpose, const char *resource) +{ + dirinfo_type_t type; + switch (dir_purpose) { + case DIR_PURPOSE_FETCH_EXTRAINFO: + type = EXTRAINFO_DIRINFO; + if (router_purpose == ROUTER_PURPOSE_BRIDGE) + type |= BRIDGE_DIRINFO; + else + type |= V3_DIRINFO; + break; + case DIR_PURPOSE_FETCH_SERVERDESC: + if (router_purpose == ROUTER_PURPOSE_BRIDGE) + type = BRIDGE_DIRINFO; + else + type = V3_DIRINFO; + break; + case DIR_PURPOSE_FETCH_STATUS_VOTE: + case DIR_PURPOSE_FETCH_DETACHED_SIGNATURES: + case DIR_PURPOSE_FETCH_CERTIFICATE: + type = V3_DIRINFO; + break; + case DIR_PURPOSE_FETCH_CONSENSUS: + type = V3_DIRINFO; + if (resource && !strcmp(resource, "microdesc")) + type |= MICRODESC_DIRINFO; + break; + case DIR_PURPOSE_FETCH_MICRODESC: + type = MICRODESC_DIRINFO; + break; + default: + log_warn(LD_BUG, "Unexpected purpose %d", (int)dir_purpose); + type = NO_DIRINFO; + break; + } + return type; +} + +/** Return true iff <b>identity_digest</b> is the digest of a router which + * says that it caches extrainfos. (If <b>is_authority</b> we always + * believe that to be true.) */ +int +router_supports_extrainfo(const char *identity_digest, int is_authority) +{ + const node_t *node = node_get_by_id(identity_digest); + + if (node && node->ri) { + if (node->ri->caches_extra_info) + return 1; + } + if (is_authority) { + return 1; + } + return 0; +} + +/** Return true iff any trusted directory authority has accepted our + * server descriptor. + * + * We consider any authority sufficient because waiting for all of + * them means it never happens while any authority is down; we don't + * go for something more complex in the middle (like \>1/3 or \>1/2 or + * \>=1/2) because that doesn't seem necessary yet. + */ +int +directories_have_accepted_server_descriptor(void) +{ + const smartlist_t *servers = router_get_trusted_dir_servers(); + const or_options_t *options = get_options(); + SMARTLIST_FOREACH(servers, dir_server_t *, d, { + if ((d->type & options->PublishServerDescriptor_) && + d->has_accepted_serverdesc) { + return 1; + } + }); + return 0; +} + +/** Start a connection to every suitable directory authority, using + * connection purpose <b>dir_purpose</b> and uploading <b>payload</b> + * (of length <b>payload_len</b>). The dir_purpose should be one of + * 'DIR_PURPOSE_UPLOAD_{DIR|VOTE|SIGNATURES}'. + * + * <b>router_purpose</b> describes the type of descriptor we're + * publishing, if we're publishing a descriptor -- e.g. general or bridge. + * + * <b>type</b> specifies what sort of dir authorities (V3, + * BRIDGE, etc) we should upload to. + * + * If <b>extrainfo_len</b> is nonzero, the first <b>payload_len</b> bytes of + * <b>payload</b> hold a router descriptor, and the next <b>extrainfo_len</b> + * bytes of <b>payload</b> hold an extra-info document. Upload the descriptor + * to all authorities, and the extra-info document to all authorities that + * support it. + */ +void +directory_post_to_dirservers(uint8_t dir_purpose, uint8_t router_purpose, + dirinfo_type_t type, + const char *payload, + size_t payload_len, size_t extrainfo_len) +{ + const or_options_t *options = get_options(); + dir_indirection_t indirection; + const smartlist_t *dirservers = router_get_trusted_dir_servers(); + int found = 0; + const int exclude_self = (dir_purpose == DIR_PURPOSE_UPLOAD_VOTE || + dir_purpose == DIR_PURPOSE_UPLOAD_SIGNATURES); + tor_assert(dirservers); + /* This tries dirservers which we believe to be down, but ultimately, that's + * harmless, and we may as well err on the side of getting things uploaded. + */ + SMARTLIST_FOREACH_BEGIN(dirservers, dir_server_t *, ds) { + routerstatus_t *rs = &(ds->fake_status); + size_t upload_len = payload_len; + + if ((type & ds->type) == 0) + continue; + + if (exclude_self && router_digest_is_me(ds->digest)) { + /* we don't upload to ourselves, but at least there's now at least + * one authority of this type that has what we wanted to upload. */ + found = 1; + continue; + } + + if (options->StrictNodes && + routerset_contains_routerstatus(options->ExcludeNodes, rs, -1)) { + log_warn(LD_DIR, "Wanted to contact authority '%s' for %s, but " + "it's in our ExcludedNodes list and StrictNodes is set. " + "Skipping.", + ds->nickname, + dir_conn_purpose_to_string(dir_purpose)); + continue; + } + + found = 1; /* at least one authority of this type was listed */ + if (dir_purpose == DIR_PURPOSE_UPLOAD_DIR) + ds->has_accepted_serverdesc = 0; + + if (extrainfo_len && router_supports_extrainfo(ds->digest, 1)) { + upload_len += extrainfo_len; + log_info(LD_DIR, "Uploading an extrainfo too (length %d)", + (int) extrainfo_len); + } + if (purpose_needs_anonymity(dir_purpose, router_purpose, NULL)) { + indirection = DIRIND_ANONYMOUS; + } else if (!fascist_firewall_allows_dir_server(ds, + FIREWALL_DIR_CONNECTION, + 0)) { + if (fascist_firewall_allows_dir_server(ds, FIREWALL_OR_CONNECTION, 0)) + indirection = DIRIND_ONEHOP; + else + indirection = DIRIND_ANONYMOUS; + } else { + indirection = DIRIND_DIRECT_CONN; + } + + directory_request_t *req = directory_request_new(dir_purpose); + directory_request_set_routerstatus(req, rs); + directory_request_set_router_purpose(req, router_purpose); + directory_request_set_indirection(req, indirection); + directory_request_set_payload(req, payload, upload_len); + directory_initiate_request(req); + directory_request_free(req); + } SMARTLIST_FOREACH_END(ds); + if (!found) { + char *s = authdir_type_to_string(type); + log_warn(LD_DIR, "Publishing server descriptor to directory authorities " + "of type '%s', but no authorities of that type listed!", s); + tor_free(s); + } +} + +/** Return true iff, according to the values in <b>options</b>, we should be + * using directory guards for direct downloads of directory information. */ +STATIC int +should_use_directory_guards(const or_options_t *options) +{ + /* Public (non-bridge) servers never use directory guards. */ + if (public_server_mode(options)) + return 0; + /* If guards are disabled, we can't use directory guards. + */ + if (!options->UseEntryGuards) + return 0; + /* If we're configured to fetch directory info aggressively or of a + * nonstandard type, don't use directory guards. */ + if (options->DownloadExtraInfo || options->FetchDirInfoEarly || + options->FetchDirInfoExtraEarly || options->FetchUselessDescriptors) + return 0; + return 1; +} + +/** Pick an unconstrained directory server from among our guards, the latest + * networkstatus, or the fallback dirservers, for use in downloading + * information of type <b>type</b>, and return its routerstatus. */ +static const routerstatus_t * +directory_pick_generic_dirserver(dirinfo_type_t type, int pds_flags, + uint8_t dir_purpose, + circuit_guard_state_t **guard_state_out) +{ + const routerstatus_t *rs = NULL; + const or_options_t *options = get_options(); + + if (options->UseBridges) + log_warn(LD_BUG, "Called when we have UseBridges set."); + + if (should_use_directory_guards(options)) { + const node_t *node = guards_choose_dirguard(dir_purpose, guard_state_out); + if (node) + rs = node->rs; + } else { + /* anybody with a non-zero dirport will do */ + rs = router_pick_directory_server(type, pds_flags); + } + if (!rs) { + log_info(LD_DIR, "No router found for %s; falling back to " + "dirserver list.", dir_conn_purpose_to_string(dir_purpose)); + rs = router_pick_fallback_dirserver(type, pds_flags); + } + + return rs; +} + +/** + * Set the extra fields in <b>req</b> that are used when requesting a + * consensus of type <b>resource</b>. + * + * Right now, these fields are if-modified-since and x-or-diff-from-consensus. + */ +static void +dir_consensus_request_set_additional_headers(directory_request_t *req, + const char *resource) +{ + time_t if_modified_since = 0; + uint8_t or_diff_from[DIGEST256_LEN]; + int or_diff_from_is_set = 0; + + /* DEFAULT_IF_MODIFIED_SINCE_DELAY is 1/20 of the default consensus + * period of 1 hour. + */ + const int DEFAULT_IF_MODIFIED_SINCE_DELAY = 180; + const int32_t DEFAULT_TRY_DIFF_FOR_CONSENSUS_NEWER = 72; + const int32_t MIN_TRY_DIFF_FOR_CONSENSUS_NEWER = 0; + const int32_t MAX_TRY_DIFF_FOR_CONSENSUS_NEWER = 8192; + const char TRY_DIFF_FOR_CONSENSUS_NEWER_NAME[] = + "try-diff-for-consensus-newer-than"; + + int flav = FLAV_NS; + if (resource) + flav = networkstatus_parse_flavor_name(resource); + + int32_t max_age_for_diff = 3600 * + networkstatus_get_param(NULL, + TRY_DIFF_FOR_CONSENSUS_NEWER_NAME, + DEFAULT_TRY_DIFF_FOR_CONSENSUS_NEWER, + MIN_TRY_DIFF_FOR_CONSENSUS_NEWER, + MAX_TRY_DIFF_FOR_CONSENSUS_NEWER); + + if (flav != -1) { + /* IF we have a parsed consensus of this type, we can do an + * if-modified-time based on it. */ + networkstatus_t *v; + v = networkstatus_get_latest_consensus_by_flavor(flav); + if (v) { + /* In networks with particularly short V3AuthVotingIntervals, + * ask for the consensus if it's been modified since half the + * V3AuthVotingInterval of the most recent consensus. */ + time_t ims_delay = DEFAULT_IF_MODIFIED_SINCE_DELAY; + if (v->fresh_until > v->valid_after + && ims_delay > (v->fresh_until - v->valid_after)/2) { + ims_delay = (v->fresh_until - v->valid_after)/2; + } + if_modified_since = v->valid_after + ims_delay; + if (v->valid_after >= approx_time() - max_age_for_diff) { + memcpy(or_diff_from, v->digest_sha3_as_signed, DIGEST256_LEN); + or_diff_from_is_set = 1; + } + } + } else { + /* Otherwise it might be a consensus we don't parse, but which we + * do cache. Look at the cached copy, perhaps. */ + cached_dir_t *cd = dirserv_get_consensus(resource); + /* We have no method of determining the voting interval from an + * unparsed consensus, so we use the default. */ + if (cd) { + if_modified_since = cd->published + DEFAULT_IF_MODIFIED_SINCE_DELAY; + if (cd->published >= approx_time() - max_age_for_diff) { + memcpy(or_diff_from, cd->digest_sha3_as_signed, DIGEST256_LEN); + or_diff_from_is_set = 1; + } + } + } + + if (if_modified_since > 0) + directory_request_set_if_modified_since(req, if_modified_since); + if (or_diff_from_is_set) { + char hex[HEX_DIGEST256_LEN + 1]; + base16_encode(hex, sizeof(hex), + (const char*)or_diff_from, sizeof(or_diff_from)); + directory_request_add_header(req, X_OR_DIFF_FROM_CONSENSUS_HEADER, hex); + } +} + +/** Start a connection to a random running directory server, using + * connection purpose <b>dir_purpose</b>, intending to fetch descriptors + * of purpose <b>router_purpose</b>, and requesting <b>resource</b>. + * Use <b>pds_flags</b> as arguments to router_pick_directory_server() + * or router_pick_trusteddirserver(). + */ +MOCK_IMPL(void, +directory_get_from_dirserver,( + uint8_t dir_purpose, + uint8_t router_purpose, + const char *resource, + int pds_flags, + download_want_authority_t want_authority)) +{ + const routerstatus_t *rs = NULL; + const or_options_t *options = get_options(); + int prefer_authority = (directory_fetches_from_authorities(options) + || want_authority == DL_WANT_AUTHORITY); + int require_authority = 0; + int get_via_tor = purpose_needs_anonymity(dir_purpose, router_purpose, + resource); + dirinfo_type_t type = dir_fetch_type(dir_purpose, router_purpose, resource); + + if (type == NO_DIRINFO) + return; + + if (!options->FetchServerDescriptors) + return; + + circuit_guard_state_t *guard_state = NULL; + if (!get_via_tor) { + if (options->UseBridges && !(type & BRIDGE_DIRINFO)) { + /* We want to ask a running bridge for which we have a descriptor. + * + * When we ask choose_random_entry() for a bridge, we specify what + * sort of dir fetch we'll be doing, so it won't return a bridge + * that can't answer our question. + */ + const node_t *node = guards_choose_dirguard(dir_purpose, &guard_state); + if (node && node->ri) { + /* every bridge has a routerinfo. */ + routerinfo_t *ri = node->ri; + /* clients always make OR connections to bridges */ + tor_addr_port_t or_ap; + directory_request_t *req = directory_request_new(dir_purpose); + /* we are willing to use a non-preferred address if we need to */ + fascist_firewall_choose_address_node(node, FIREWALL_OR_CONNECTION, 0, + &or_ap); + directory_request_set_or_addr_port(req, &or_ap); + directory_request_set_directory_id_digest(req, + ri->cache_info.identity_digest); + directory_request_set_router_purpose(req, router_purpose); + directory_request_set_resource(req, resource); + if (dir_purpose == DIR_PURPOSE_FETCH_CONSENSUS) + dir_consensus_request_set_additional_headers(req, resource); + directory_request_set_guard_state(req, guard_state); + directory_initiate_request(req); + directory_request_free(req); + } else { + if (guard_state) { + entry_guard_cancel(&guard_state); + } + log_notice(LD_DIR, "Ignoring directory request, since no bridge " + "nodes are available yet."); + } + + return; + } else { + if (prefer_authority || (type & BRIDGE_DIRINFO)) { + /* only ask authdirservers, and don't ask myself */ + rs = router_pick_trusteddirserver(type, pds_flags); + if (rs == NULL && (pds_flags & (PDS_NO_EXISTING_SERVERDESC_FETCH| + PDS_NO_EXISTING_MICRODESC_FETCH))) { + /* We don't want to fetch from any authorities that we're currently + * fetching server descriptors from, and we got no match. Did we + * get no match because all the authorities have connections + * fetching server descriptors (in which case we should just + * return,) or because all the authorities are down or on fire or + * unreachable or something (in which case we should go on with + * our fallback code)? */ + pds_flags &= ~(PDS_NO_EXISTING_SERVERDESC_FETCH| + PDS_NO_EXISTING_MICRODESC_FETCH); + rs = router_pick_trusteddirserver(type, pds_flags); + if (rs) { + log_debug(LD_DIR, "Deferring serverdesc fetch: all authorities " + "are in use."); + return; + } + } + if (rs == NULL && require_authority) { + log_info(LD_DIR, "No authorities were available for %s: will try " + "later.", dir_conn_purpose_to_string(dir_purpose)); + return; + } + } + if (!rs && !(type & BRIDGE_DIRINFO)) { + rs = directory_pick_generic_dirserver(type, pds_flags, + dir_purpose, + &guard_state); + if (!rs) + get_via_tor = 1; /* last resort: try routing it via Tor */ + } + } + } + + if (get_via_tor) { + /* Never use fascistfirewall; we're going via Tor. */ + pds_flags |= PDS_IGNORE_FASCISTFIREWALL; + rs = router_pick_directory_server(type, pds_flags); + } + + /* If we have any hope of building an indirect conn, we know some router + * descriptors. If (rs==NULL), we can't build circuits anyway, so + * there's no point in falling back to the authorities in this case. */ + if (rs) { + const dir_indirection_t indirection = + get_via_tor ? DIRIND_ANONYMOUS : DIRIND_ONEHOP; + directory_request_t *req = directory_request_new(dir_purpose); + directory_request_set_routerstatus(req, rs); + directory_request_set_router_purpose(req, router_purpose); + directory_request_set_indirection(req, indirection); + directory_request_set_resource(req, resource); + if (dir_purpose == DIR_PURPOSE_FETCH_CONSENSUS) + dir_consensus_request_set_additional_headers(req, resource); + if (guard_state) + directory_request_set_guard_state(req, guard_state); + directory_initiate_request(req); + directory_request_free(req); + } else { + log_notice(LD_DIR, + "While fetching directory info, " + "no running dirservers known. Will try again later. " + "(purpose %d)", dir_purpose); + if (!purpose_needs_anonymity(dir_purpose, router_purpose, resource)) { + /* remember we tried them all and failed. */ + directory_all_unreachable(time(NULL)); + } + } +} + +/** As directory_get_from_dirserver, but initiates a request to <i>every</i> + * directory authority other than ourself. Only for use by authorities when + * searching for missing information while voting. */ +void +directory_get_from_all_authorities(uint8_t dir_purpose, + uint8_t router_purpose, + const char *resource) +{ + tor_assert(dir_purpose == DIR_PURPOSE_FETCH_STATUS_VOTE || + dir_purpose == DIR_PURPOSE_FETCH_DETACHED_SIGNATURES); + + SMARTLIST_FOREACH_BEGIN(router_get_trusted_dir_servers(), + dir_server_t *, ds) { + if (router_digest_is_me(ds->digest)) + continue; + if (!(ds->type & V3_DIRINFO)) + continue; + const routerstatus_t *rs = &ds->fake_status; + directory_request_t *req = directory_request_new(dir_purpose); + directory_request_set_routerstatus(req, rs); + directory_request_set_router_purpose(req, router_purpose); + directory_request_set_resource(req, resource); + directory_initiate_request(req); + directory_request_free(req); + } SMARTLIST_FOREACH_END(ds); +} + +/** Return true iff <b>ind</b> requires a multihop circuit. */ +static int +dirind_is_anon(dir_indirection_t ind) +{ + return ind == DIRIND_ANON_DIRPORT || ind == DIRIND_ANONYMOUS; +} + +/* Choose reachable OR and Dir addresses and ports from status, copying them + * into use_or_ap and use_dir_ap. If indirection is anonymous, then we're + * connecting via another relay, so choose the primary IPv4 address and ports. + * + * status should have at least one reachable address, if we can't choose a + * reachable address, warn and return -1. Otherwise, return 0. + */ +static int +directory_choose_address_routerstatus(const routerstatus_t *status, + dir_indirection_t indirection, + tor_addr_port_t *use_or_ap, + tor_addr_port_t *use_dir_ap) +{ + tor_assert(status != NULL); + tor_assert(use_or_ap != NULL); + tor_assert(use_dir_ap != NULL); + + const or_options_t *options = get_options(); + int have_or = 0, have_dir = 0; + + /* We expect status to have at least one reachable address if we're + * connecting to it directly. + * + * Therefore, we can simply use the other address if the one we want isn't + * allowed by the firewall. + * + * (When Tor uploads and downloads a hidden service descriptor, it uses + * DIRIND_ANONYMOUS, except for Tor2Web, which uses DIRIND_ONEHOP. + * So this code will only modify the address for Tor2Web's HS descriptor + * fetches. Even Single Onion Servers (NYI) use DIRIND_ANONYMOUS, to avoid + * HSDirs denying service by rejecting descriptors.) + */ + + /* Initialise the OR / Dir addresses */ + tor_addr_make_null(&use_or_ap->addr, AF_UNSPEC); + use_or_ap->port = 0; + tor_addr_make_null(&use_dir_ap->addr, AF_UNSPEC); + use_dir_ap->port = 0; + + /* ORPort connections */ + if (indirection == DIRIND_ANONYMOUS) { + if (status->addr) { + /* Since we're going to build a 3-hop circuit and ask the 2nd relay + * to extend to this address, always use the primary (IPv4) OR address */ + tor_addr_from_ipv4h(&use_or_ap->addr, status->addr); + use_or_ap->port = status->or_port; + have_or = 1; + } + } else if (indirection == DIRIND_ONEHOP) { + /* We use an IPv6 address if we have one and we prefer it. + * Use the preferred address and port if they are reachable, otherwise, + * use the alternate address and port (if any). + */ + fascist_firewall_choose_address_rs(status, FIREWALL_OR_CONNECTION, 0, + use_or_ap); + have_or = tor_addr_port_is_valid_ap(use_or_ap, 0); + } + + /* DirPort connections + * DIRIND_ONEHOP uses ORPort, but may fall back to the DirPort on relays */ + if (indirection == DIRIND_DIRECT_CONN || + indirection == DIRIND_ANON_DIRPORT || + (indirection == DIRIND_ONEHOP + && !directory_must_use_begindir(options))) { + fascist_firewall_choose_address_rs(status, FIREWALL_DIR_CONNECTION, 0, + use_dir_ap); + have_dir = tor_addr_port_is_valid_ap(use_dir_ap, 0); + } + + /* We rejected all addresses in the relay's status. This means we can't + * connect to it. */ + if (!have_or && !have_dir) { + static int logged_backtrace = 0; + log_info(LD_BUG, "Rejected all OR and Dir addresses from %s when " + "launching an outgoing directory connection to: IPv4 %s OR %d " + "Dir %d IPv6 %s OR %d Dir %d", routerstatus_describe(status), + fmt_addr32(status->addr), status->or_port, + status->dir_port, fmt_addr(&status->ipv6_addr), + status->ipv6_orport, status->dir_port); + if (!logged_backtrace) { + log_backtrace(LOG_INFO, LD_BUG, "Addresses came from"); + logged_backtrace = 1; + } + return -1; + } + + return 0; +} + +/** Return true iff <b>conn</b> is the client side of a directory connection + * we launched to ourself in order to determine the reachability of our + * dir_port. */ +static int +directory_conn_is_self_reachability_test(dir_connection_t *conn) +{ + if (conn->requested_resource && + !strcmpstart(conn->requested_resource,"authority")) { + const routerinfo_t *me = router_get_my_routerinfo(); + if (me && + router_digest_is_me(conn->identity_digest) && + tor_addr_eq_ipv4h(&conn->base_.addr, me->addr) && /*XXXX prop 118*/ + me->dir_port == conn->base_.port) + return 1; + } + return 0; +} + +/** Called when we are unable to complete the client's request to a directory + * server due to a network error: Mark the router as down and try again if + * possible. + */ +static void +connection_dir_request_failed(dir_connection_t *conn) +{ + if (conn->guard_state) { + /* We haven't seen a success on this guard state, so consider it to have + * failed. */ + entry_guard_failed(&conn->guard_state); + } + if (directory_conn_is_self_reachability_test(conn)) { + return; /* this was a test fetch. don't retry. */ + } + if (!entry_list_is_constrained(get_options())) + router_set_status(conn->identity_digest, 0); /* don't try this one again */ + if (conn->base_.purpose == DIR_PURPOSE_FETCH_SERVERDESC || + conn->base_.purpose == DIR_PURPOSE_FETCH_EXTRAINFO) { + log_info(LD_DIR, "Giving up on serverdesc/extrainfo fetch from " + "directory server at '%s'; retrying", + conn->base_.address); + if (conn->router_purpose == ROUTER_PURPOSE_BRIDGE) + connection_dir_bridge_routerdesc_failed(conn); + connection_dir_download_routerdesc_failed(conn); + } else if (conn->base_.purpose == DIR_PURPOSE_FETCH_CONSENSUS) { + if (conn->requested_resource) + networkstatus_consensus_download_failed(0, conn->requested_resource); + } else if (conn->base_.purpose == DIR_PURPOSE_FETCH_CERTIFICATE) { + log_info(LD_DIR, "Giving up on certificate fetch from directory server " + "at '%s'; retrying", + conn->base_.address); + connection_dir_download_cert_failed(conn, 0); + } else if (conn->base_.purpose == DIR_PURPOSE_FETCH_DETACHED_SIGNATURES) { + log_info(LD_DIR, "Giving up downloading detached signatures from '%s'", + conn->base_.address); + } else if (conn->base_.purpose == DIR_PURPOSE_FETCH_STATUS_VOTE) { + log_info(LD_DIR, "Giving up downloading votes from '%s'", + conn->base_.address); + } else if (conn->base_.purpose == DIR_PURPOSE_FETCH_MICRODESC) { + log_info(LD_DIR, "Giving up on downloading microdescriptors from " + "directory server at '%s'; will retry", conn->base_.address); + connection_dir_download_routerdesc_failed(conn); + } +} + +/** Helper: Attempt to fetch directly the descriptors of each bridge + * listed in <b>failed</b>. + */ +static void +connection_dir_retry_bridges(smartlist_t *descs) +{ + char digest[DIGEST_LEN]; + SMARTLIST_FOREACH(descs, const char *, cp, + { + if (base16_decode(digest, DIGEST_LEN, cp, strlen(cp)) != DIGEST_LEN) { + log_warn(LD_BUG, "Malformed fingerprint in list: %s", + escaped(cp)); + continue; + } + retry_bridge_descriptor_fetch_directly(digest); + }); +} + +/** Called when an attempt to download one or more router descriptors + * or extra-info documents on connection <b>conn</b> failed. + */ +static void +connection_dir_download_routerdesc_failed(dir_connection_t *conn) +{ + /* No need to increment the failure count for routerdescs, since + * it's not their fault. */ + + /* No need to relaunch descriptor downloads here: we already do it + * every 10 or 60 seconds (FOO_DESCRIPTOR_RETRY_INTERVAL) in main.c. */ + tor_assert(conn->base_.purpose == DIR_PURPOSE_FETCH_SERVERDESC || + conn->base_.purpose == DIR_PURPOSE_FETCH_EXTRAINFO || + conn->base_.purpose == DIR_PURPOSE_FETCH_MICRODESC); + + (void) conn; +} + +/** Called when an attempt to download a bridge's routerdesc from + * one of the authorities failed due to a network error. If + * possible attempt to download descriptors from the bridge directly. + */ +static void +connection_dir_bridge_routerdesc_failed(dir_connection_t *conn) +{ + smartlist_t *which = NULL; + + /* Requests for bridge descriptors are in the form 'fp/', so ignore + anything else. */ + if (!conn->requested_resource || strcmpstart(conn->requested_resource,"fp/")) + return; + + which = smartlist_new(); + dir_split_resource_into_fingerprints(conn->requested_resource + + strlen("fp/"), + which, NULL, 0); + + tor_assert(conn->base_.purpose != DIR_PURPOSE_FETCH_EXTRAINFO); + if (smartlist_len(which)) { + connection_dir_retry_bridges(which); + SMARTLIST_FOREACH(which, char *, cp, tor_free(cp)); + } + smartlist_free(which); +} + +/** Called when an attempt to fetch a certificate fails. */ +static void +connection_dir_download_cert_failed(dir_connection_t *conn, int status) +{ + const char *fp_pfx = "fp/"; + const char *fpsk_pfx = "fp-sk/"; + smartlist_t *failed; + tor_assert(conn->base_.purpose == DIR_PURPOSE_FETCH_CERTIFICATE); + + if (!conn->requested_resource) + return; + failed = smartlist_new(); + /* + * We have two cases download by fingerprint (resource starts + * with "fp/") or download by fingerprint/signing key pair + * (resource starts with "fp-sk/"). + */ + if (!strcmpstart(conn->requested_resource, fp_pfx)) { + /* Download by fingerprint case */ + dir_split_resource_into_fingerprints(conn->requested_resource + + strlen(fp_pfx), + failed, NULL, DSR_HEX); + SMARTLIST_FOREACH_BEGIN(failed, char *, cp) { + /* Null signing key digest indicates download by fp only */ + authority_cert_dl_failed(cp, NULL, status); + tor_free(cp); + } SMARTLIST_FOREACH_END(cp); + } else if (!strcmpstart(conn->requested_resource, fpsk_pfx)) { + /* Download by (fp,sk) pairs */ + dir_split_resource_into_fingerprint_pairs(conn->requested_resource + + strlen(fpsk_pfx), failed); + SMARTLIST_FOREACH_BEGIN(failed, fp_pair_t *, cp) { + authority_cert_dl_failed(cp->first, cp->second, status); + tor_free(cp); + } SMARTLIST_FOREACH_END(cp); + } else { + log_warn(LD_DIR, + "Don't know what to do with failure for cert fetch %s", + conn->requested_resource); + } + + smartlist_free(failed); + + update_certificate_downloads(time(NULL)); +} + +/* Should this tor instance only use begindir for all its directory requests? + */ +int +directory_must_use_begindir(const or_options_t *options) +{ + /* Clients, onion services, and bridges must use begindir, + * relays and authorities do not have to */ + return !public_server_mode(options); +} + +/** Evaluate the situation and decide if we should use an encrypted + * "begindir-style" connection for this directory request. + * 0) If there is no DirPort, yes. + * 1) If or_port is 0, or it's a direct conn and or_port is firewalled + * or we're a dir mirror, no. + * 2) If we prefer to avoid begindir conns, and we're not fetching or + * publishing a bridge relay descriptor, no. + * 3) Else yes. + * If returning 0, return in *reason why we can't use begindir. + * reason must not be NULL. + */ +static int +directory_command_should_use_begindir(const or_options_t *options, + const directory_request_t *req, + const char **reason) +{ + const tor_addr_t *or_addr = &req->or_addr_port.addr; + //const tor_addr_t *dir_addr = &req->dir_addr_port.addr; + const int or_port = req->or_addr_port.port; + const int dir_port = req->dir_addr_port.port; + + const dir_indirection_t indirection = req->indirection; + + tor_assert(reason); + *reason = NULL; + + /* Reasons why we must use begindir */ + if (!dir_port) { + *reason = "(using begindir - directory with no DirPort)"; + return 1; /* We don't know a DirPort -- must begindir. */ + } + /* Reasons why we can't possibly use begindir */ + if (!or_port) { + *reason = "directory with unknown ORPort"; + return 0; /* We don't know an ORPort -- no chance. */ + } + if (indirection == DIRIND_DIRECT_CONN || + indirection == DIRIND_ANON_DIRPORT) { + *reason = "DirPort connection"; + return 0; + } + if (indirection == DIRIND_ONEHOP) { + /* We're firewalled and want a direct OR connection */ + if (!fascist_firewall_allows_address_addr(or_addr, or_port, + FIREWALL_OR_CONNECTION, 0, 0)) { + *reason = "ORPort not reachable"; + return 0; + } + } + /* Reasons why we want to avoid using begindir */ + if (indirection == DIRIND_ONEHOP) { + if (!directory_must_use_begindir(options)) { + *reason = "in relay mode"; + return 0; + } + } + /* DIRIND_ONEHOP on a client, or DIRIND_ANONYMOUS + */ + *reason = "(using begindir)"; + return 1; +} + +/** + * Create and return a new directory_request_t with purpose + * <b>dir_purpose</b>. + */ +directory_request_t * +directory_request_new(uint8_t dir_purpose) +{ + tor_assert(dir_purpose >= DIR_PURPOSE_MIN_); + tor_assert(dir_purpose <= DIR_PURPOSE_MAX_); + tor_assert(dir_purpose != DIR_PURPOSE_SERVER); + tor_assert(dir_purpose != DIR_PURPOSE_HAS_FETCHED_RENDDESC_V2); + tor_assert(dir_purpose != DIR_PURPOSE_HAS_FETCHED_HSDESC); + + directory_request_t *result = tor_malloc_zero(sizeof(*result)); + tor_addr_make_null(&result->or_addr_port.addr, AF_INET); + result->or_addr_port.port = 0; + tor_addr_make_null(&result->dir_addr_port.addr, AF_INET); + result->dir_addr_port.port = 0; + result->dir_purpose = dir_purpose; + result->router_purpose = ROUTER_PURPOSE_GENERAL; + result->indirection = DIRIND_ONEHOP; + return result; +} +/** + * Release all resources held by <b>req</b>. + */ +void +directory_request_free_(directory_request_t *req) +{ + if (req == NULL) + return; + config_free_lines(req->additional_headers); + tor_free(req); +} +/** + * Set the address and OR port to use for this directory request. If there is + * no OR port, we'll have to connect over the dirport. (If there are both, + * the indirection setting determines which to use.) + */ +void +directory_request_set_or_addr_port(directory_request_t *req, + const tor_addr_port_t *p) +{ + memcpy(&req->or_addr_port, p, sizeof(*p)); +} +/** + * Set the address and dirport to use for this directory request. If there + * is no dirport, we'll have to connect over the OR port. (If there are both, + * the indirection setting determines which to use.) + */ +void +directory_request_set_dir_addr_port(directory_request_t *req, + const tor_addr_port_t *p) +{ + memcpy(&req->dir_addr_port, p, sizeof(*p)); +} +/** + * Set the RSA identity digest of the directory to use for this directory + * request. + */ +void +directory_request_set_directory_id_digest(directory_request_t *req, + const char *digest) +{ + memcpy(req->digest, digest, DIGEST_LEN); +} +/** + * Set the router purpose associated with uploaded and downloaded router + * descriptors and extrainfo documents in this directory request. The purpose + * must be one of ROUTER_PURPOSE_GENERAL (the default) or + * ROUTER_PURPOSE_BRIDGE. + */ +void +directory_request_set_router_purpose(directory_request_t *req, + uint8_t router_purpose) +{ + tor_assert(router_purpose == ROUTER_PURPOSE_GENERAL || + router_purpose == ROUTER_PURPOSE_BRIDGE); + // assert that it actually makes sense to set this purpose, given + // the dir_purpose. + req->router_purpose = router_purpose; +} +/** + * Set the indirection to be used for the directory request. The indirection + * parameter configures whether to connect to a DirPort or ORPort, and whether + * to anonymize the connection. DIRIND_ONEHOP (use ORPort, don't anonymize) + * is the default. See dir_indirection_t for more information. + */ +void +directory_request_set_indirection(directory_request_t *req, + dir_indirection_t indirection) +{ + req->indirection = indirection; +} + +/** + * Set a pointer to the resource to request from a directory. Different + * request types use resources to indicate different components of their URL. + * Note that only an alias to <b>resource</b> is stored, so the + * <b>resource</b> must outlive the request. + */ +void +directory_request_set_resource(directory_request_t *req, + const char *resource) +{ + req->resource = resource; +} +/** + * Set a pointer to the payload to include with this directory request, along + * with its length. Note that only an alias to <b>payload</b> is stored, so + * the <b>payload</b> must outlive the request. + */ +void +directory_request_set_payload(directory_request_t *req, + const char *payload, + size_t payload_len) +{ + tor_assert(DIR_PURPOSE_IS_UPLOAD(req->dir_purpose)); + + req->payload = payload; + req->payload_len = payload_len; +} +/** + * Set an if-modified-since date to send along with the request. The + * default is 0 (meaning, send no if-modified-since header). + */ +void +directory_request_set_if_modified_since(directory_request_t *req, + time_t if_modified_since) +{ + req->if_modified_since = if_modified_since; +} + +/** Include a header of name <b>key</b> with content <b>val</b> in the + * request. Neither may include newlines or other odd characters. Their + * ordering is not currently guaranteed. + * + * Note that, as elsewhere in this module, header keys include a trailing + * colon and space. + */ +void +directory_request_add_header(directory_request_t *req, + const char *key, + const char *val) +{ + config_line_prepend(&req->additional_headers, key, val); +} +/** + * Set an object containing HS data to be associated with this request. Note + * that only an alias to <b>query</b> is stored, so the <b>query</b> object + * must outlive the request. + */ +void +directory_request_set_rend_query(directory_request_t *req, + const rend_data_t *query) +{ + if (query) { + tor_assert(req->dir_purpose == DIR_PURPOSE_FETCH_RENDDESC_V2 || + req->dir_purpose == DIR_PURPOSE_UPLOAD_RENDDESC_V2); + } + req->rend_query = query; +} +/** + * Set an object containing HS connection identifier to be associated with + * this request. Note that only an alias to <b>ident</b> is stored, so the + * <b>ident</b> object must outlive the request. + */ +void +directory_request_upload_set_hs_ident(directory_request_t *req, + const hs_ident_dir_conn_t *ident) +{ + if (ident) { + tor_assert(req->dir_purpose == DIR_PURPOSE_UPLOAD_HSDESC); + } + req->hs_ident = ident; +} +/** + * Set an object containing HS connection identifier to be associated with + * this fetch request. Note that only an alias to <b>ident</b> is stored, so + * the <b>ident</b> object must outlive the request. + */ +void +directory_request_fetch_set_hs_ident(directory_request_t *req, + const hs_ident_dir_conn_t *ident) +{ + if (ident) { + tor_assert(req->dir_purpose == DIR_PURPOSE_FETCH_HSDESC); + } + req->hs_ident = ident; +} +/** Set a static circuit_guard_state_t object to affliate with the request in + * <b>req</b>. This object will receive notification when the attempt to + * connect to the guard either succeeds or fails. */ +void +directory_request_set_guard_state(directory_request_t *req, + circuit_guard_state_t *state) +{ + req->guard_state = state; +} + +/** + * Internal: Return true if any information for contacting the directory in + * <b>req</b> has been set, other than by the routerstatus. */ +static int +directory_request_dir_contact_info_specified(const directory_request_t *req) +{ + /* We only check for ports here, since we don't use an addr unless the port + * is set */ + return (req->or_addr_port.port || + req->dir_addr_port.port || + ! tor_digest_is_zero(req->digest)); +} + +/** + * Set the routerstatus to use for the directory associated with this + * request. If this option is set, then no other function to set the + * directory's address or identity should be called. + */ +void +directory_request_set_routerstatus(directory_request_t *req, + const routerstatus_t *status) +{ + req->routerstatus = status; +} +/** + * Helper: update the addresses, ports, and identities in <b>req</b> + * from the routerstatus object in <b>req</b>. Return 0 on success. + * On failure, warn and return -1. + */ +static int +directory_request_set_dir_from_routerstatus(directory_request_t *req) + +{ + const routerstatus_t *status = req->routerstatus; + if (BUG(status == NULL)) + return -1; + const or_options_t *options = get_options(); + const node_t *node; + tor_addr_port_t use_or_ap, use_dir_ap; + const int anonymized_connection = dirind_is_anon(req->indirection); + + tor_assert(status != NULL); + + node = node_get_by_id(status->identity_digest); + + /* XXX The below check is wrong: !node means it's not in the consensus, + * but we haven't checked if we have a descriptor for it -- and also, + * we only care about the descriptor if it's a begindir-style anonymized + * connection. */ + if (!node && anonymized_connection) { + log_info(LD_DIR, "Not sending anonymized request to directory '%s'; we " + "don't have its router descriptor.", + routerstatus_describe(status)); + return -1; + } + + if (options->ExcludeNodes && options->StrictNodes && + routerset_contains_routerstatus(options->ExcludeNodes, status, -1)) { + log_warn(LD_DIR, "Wanted to contact directory mirror %s for %s, but " + "it's in our ExcludedNodes list and StrictNodes is set. " + "Skipping. This choice might make your Tor not work.", + routerstatus_describe(status), + dir_conn_purpose_to_string(req->dir_purpose)); + return -1; + } + + /* At this point, if we are a client making a direct connection to a + * directory server, we have selected a server that has at least one address + * allowed by ClientUseIPv4/6 and Reachable{"",OR,Dir}Addresses. This + * selection uses the preference in ClientPreferIPv6{OR,Dir}Port, if + * possible. (If UseBridges is set, clients always use IPv6, and prefer it + * by default.) + * + * Now choose an address that we can use to connect to the directory server. + */ + if (directory_choose_address_routerstatus(status, + req->indirection, &use_or_ap, + &use_dir_ap) < 0) { + return -1; + } + + directory_request_set_or_addr_port(req, &use_or_ap); + directory_request_set_dir_addr_port(req, &use_dir_ap); + directory_request_set_directory_id_digest(req, status->identity_digest); + return 0; +} + +/** + * Launch the provided directory request, configured in <b>request</b>. + * After this function is called, you can free <b>request</b>. + */ +MOCK_IMPL(void, +directory_initiate_request,(directory_request_t *request)) +{ + tor_assert(request); + if (request->routerstatus) { + tor_assert_nonfatal( + ! directory_request_dir_contact_info_specified(request)); + if (directory_request_set_dir_from_routerstatus(request) < 0) { + return; + } + } + + const tor_addr_port_t *or_addr_port = &request->or_addr_port; + const tor_addr_port_t *dir_addr_port = &request->dir_addr_port; + const char *digest = request->digest; + const uint8_t dir_purpose = request->dir_purpose; + const uint8_t router_purpose = request->router_purpose; + const dir_indirection_t indirection = request->indirection; + const char *resource = request->resource; + const rend_data_t *rend_query = request->rend_query; + const hs_ident_dir_conn_t *hs_ident = request->hs_ident; + circuit_guard_state_t *guard_state = request->guard_state; + + tor_assert(or_addr_port->port || dir_addr_port->port); + tor_assert(digest); + + dir_connection_t *conn; + const or_options_t *options = get_options(); + int socket_error = 0; + const char *begindir_reason = NULL; + /* Should the connection be to a relay's OR port (and inside that we will + * send our directory request)? */ + const int use_begindir = + directory_command_should_use_begindir(options, request, &begindir_reason); + + /* Will the connection go via a three-hop Tor circuit? Note that this + * is separate from whether it will use_begindir. */ + const int anonymized_connection = dirind_is_anon(indirection); + + /* What is the address we want to make the directory request to? If + * we're making a begindir request this is the ORPort of the relay + * we're contacting; if not a begindir request, this is its DirPort. + * Note that if anonymized_connection is true, we won't be initiating + * a connection directly to this address. */ + tor_addr_t addr; + tor_addr_copy(&addr, &(use_begindir ? or_addr_port : dir_addr_port)->addr); + uint16_t port = (use_begindir ? or_addr_port : dir_addr_port)->port; + + log_debug(LD_DIR, "anonymized %d, use_begindir %d.", + anonymized_connection, use_begindir); + + log_debug(LD_DIR, "Initiating %s", dir_conn_purpose_to_string(dir_purpose)); + + if (purpose_needs_anonymity(dir_purpose, router_purpose, resource)) { + tor_assert(anonymized_connection || + rend_non_anonymous_mode_enabled(options)); + } + + /* use encrypted begindir connections for everything except relays + * this provides better protection for directory fetches */ + if (!use_begindir && directory_must_use_begindir(options)) { + log_warn(LD_BUG, "Client could not use begindir connection: %s", + begindir_reason ? begindir_reason : "(NULL)"); + return; + } + + /* ensure that we don't make direct connections when a SOCKS server is + * configured. */ + if (!anonymized_connection && !use_begindir && !options->HTTPProxy && + (options->Socks4Proxy || options->Socks5Proxy)) { + log_warn(LD_DIR, "Cannot connect to a directory server through a " + "SOCKS proxy!"); + return; + } + + /* Make sure that the destination addr and port we picked is viable. */ + if (!port || tor_addr_is_null(&addr)) { + static int logged_backtrace = 0; + log_warn(LD_DIR, + "Cannot make an outgoing %sconnection without a remote %sPort.", + use_begindir ? "begindir " : "", + use_begindir ? "OR" : "Dir"); + if (!logged_backtrace) { + log_backtrace(LOG_INFO, LD_BUG, "Address came from"); + logged_backtrace = 1; + } + return; + } + + conn = dir_connection_new(tor_addr_family(&addr)); + + /* set up conn so it's got all the data we need to remember */ + tor_addr_copy(&conn->base_.addr, &addr); + conn->base_.port = port; + conn->base_.address = tor_addr_to_str_dup(&addr); + memcpy(conn->identity_digest, digest, DIGEST_LEN); + + conn->base_.purpose = dir_purpose; + conn->router_purpose = router_purpose; + + /* give it an initial state */ + conn->base_.state = DIR_CONN_STATE_CONNECTING; + + /* decide whether we can learn our IP address from this conn */ + /* XXXX This is a bad name for this field now. */ + conn->dirconn_direct = !anonymized_connection; + + /* copy rendezvous data, if any */ + if (rend_query) { + /* We can't have both v2 and v3+ identifier. */ + tor_assert_nonfatal(!hs_ident); + conn->rend_data = rend_data_dup(rend_query); + } + if (hs_ident) { + /* We can't have both v2 and v3+ identifier. */ + tor_assert_nonfatal(!rend_query); + conn->hs_ident = hs_ident_dir_conn_dup(hs_ident); + } + + if (!anonymized_connection && !use_begindir) { + /* then we want to connect to dirport directly */ + + if (options->HTTPProxy) { + tor_addr_copy(&addr, &options->HTTPProxyAddr); + port = options->HTTPProxyPort; + } + + // In this case we should not have picked a directory guard. + if (BUG(guard_state)) { + entry_guard_cancel(&guard_state); + } + + switch (connection_connect(TO_CONN(conn), conn->base_.address, &addr, + port, &socket_error)) { + case -1: + connection_mark_for_close(TO_CONN(conn)); + return; + case 1: + /* start flushing conn */ + conn->base_.state = DIR_CONN_STATE_CLIENT_SENDING; + /* fall through */ + case 0: + /* queue the command on the outbuf */ + directory_send_command(conn, 1, request); + connection_watch_events(TO_CONN(conn), READ_EVENT | WRITE_EVENT); + /* writable indicates finish, readable indicates broken link, + error indicates broken link in windowsland. */ + } + } else { + /* We will use a Tor circuit (maybe 1-hop, maybe 3-hop, maybe with + * begindir, maybe not with begindir) */ + + entry_connection_t *linked_conn; + + /* Anonymized tunneled connections can never share a circuit. + * One-hop directory connections can share circuits with each other + * but nothing else. */ + int iso_flags = anonymized_connection ? ISO_STREAM : ISO_SESSIONGRP; + + /* If it's an anonymized connection, remember the fact that we + * wanted it for later: maybe we'll want it again soon. */ + if (anonymized_connection && use_begindir) + rep_hist_note_used_internal(time(NULL), 0, 1); + else if (anonymized_connection && !use_begindir) + rep_hist_note_used_port(time(NULL), conn->base_.port); + + // In this case we should not have a directory guard; we'll + // get a regular guard later when we build the circuit. + if (BUG(anonymized_connection && guard_state)) { + entry_guard_cancel(&guard_state); + } + + conn->guard_state = guard_state; + + /* make an AP connection + * populate it and add it at the right state + * hook up both sides + */ + linked_conn = + connection_ap_make_link(TO_CONN(conn), + conn->base_.address, conn->base_.port, + digest, + SESSION_GROUP_DIRCONN, iso_flags, + use_begindir, !anonymized_connection); + if (!linked_conn) { + log_warn(LD_NET,"Making tunnel to dirserver failed."); + connection_mark_for_close(TO_CONN(conn)); + return; + } + + if (connection_add(TO_CONN(conn)) < 0) { + log_warn(LD_NET,"Unable to add connection for link to dirserver."); + connection_mark_for_close(TO_CONN(conn)); + return; + } + conn->base_.state = DIR_CONN_STATE_CLIENT_SENDING; + /* queue the command on the outbuf */ + directory_send_command(conn, 0, request); + + connection_watch_events(TO_CONN(conn), READ_EVENT|WRITE_EVENT); + connection_start_reading(ENTRY_TO_CONN(linked_conn)); + } +} + +/** Return true iff anything we say on <b>conn</b> is being encrypted before + * we send it to the client/server. */ +int +connection_dir_is_encrypted(const dir_connection_t *conn) +{ + /* Right now it's sufficient to see if conn is or has been linked, since + * the only thing it could be linked to is an edge connection on a + * circuit, and the only way it could have been unlinked is at the edge + * connection getting closed. + */ + return TO_CONN(conn)->linked; +} + +/** Helper for sorting + * + * sort strings alphabetically + */ +static int +compare_strs_(const void **a, const void **b) +{ + const char *s1 = *a, *s2 = *b; + return strcmp(s1, s2); +} + +#define CONDITIONAL_CONSENSUS_FPR_LEN 3 +#if (CONDITIONAL_CONSENSUS_FPR_LEN > DIGEST_LEN) +#error "conditional consensus fingerprint length is larger than digest length" +#endif + +/** Return the URL we should use for a consensus download. + * + * Use the "conditional consensus downloading" feature described in + * dir-spec.txt, i.e. + * GET .../consensus/<b>fpr</b>+<b>fpr</b>+<b>fpr</b> + * + * If 'resource' is provided, it is the name of a consensus flavor to request. + */ +static char * +directory_get_consensus_url(const char *resource) +{ + char *url = NULL; + const char *hyphen, *flavor; + if (resource==NULL || strcmp(resource, "ns")==0) { + flavor = ""; /* Request ns consensuses as "", so older servers will work*/ + hyphen = ""; + } else { + flavor = resource; + hyphen = "-"; + } + + { + char *authority_id_list; + smartlist_t *authority_digests = smartlist_new(); + + SMARTLIST_FOREACH_BEGIN(router_get_trusted_dir_servers(), + dir_server_t *, ds) { + char *hex; + if (!(ds->type & V3_DIRINFO)) + continue; + + hex = tor_malloc(2*CONDITIONAL_CONSENSUS_FPR_LEN+1); + base16_encode(hex, 2*CONDITIONAL_CONSENSUS_FPR_LEN+1, + ds->v3_identity_digest, CONDITIONAL_CONSENSUS_FPR_LEN); + smartlist_add(authority_digests, hex); + } SMARTLIST_FOREACH_END(ds); + smartlist_sort(authority_digests, compare_strs_); + authority_id_list = smartlist_join_strings(authority_digests, + "+", 0, NULL); + + tor_asprintf(&url, "/tor/status-vote/current/consensus%s%s/%s.z", + hyphen, flavor, authority_id_list); + + SMARTLIST_FOREACH(authority_digests, char *, cp, tor_free(cp)); + smartlist_free(authority_digests); + tor_free(authority_id_list); + } + return url; +} + +/** + * Copies the ipv6 from source to destination, subject to buffer size limit + * size. If decorate is true, makes sure the copied address is decorated. + */ +static void +copy_ipv6_address(char* destination, const char* source, size_t len, + int decorate) { + tor_assert(destination); + tor_assert(source); + + if (decorate && source[0] != '[') { + tor_snprintf(destination, len, "[%s]", source); + } else { + strlcpy(destination, source, len); + } +} + +/** Queue an appropriate HTTP command for <b>request</b> on + * <b>conn</b>-\>outbuf. If <b>direct</b> is true, we're making a + * non-anonymized connection to the dirport. + */ +static void +directory_send_command(dir_connection_t *conn, + const int direct, + const directory_request_t *req) +{ + tor_assert(req); + const int purpose = req->dir_purpose; + const char *resource = req->resource; + const char *payload = req->payload; + const size_t payload_len = req->payload_len; + const time_t if_modified_since = req->if_modified_since; + const int anonymized_connection = dirind_is_anon(req->indirection); + + char proxystring[256]; + char hoststring[128]; + /* NEEDS to be the same size hoststring. + Will be decorated with brackets around it if it is ipv6. */ + char decorated_address[128]; + smartlist_t *headers = smartlist_new(); + char *url; + char *accept_encoding; + size_t url_len; + char request[8192]; + size_t request_len, total_request_len = 0; + const char *httpcommand = NULL; + + tor_assert(conn); + tor_assert(conn->base_.type == CONN_TYPE_DIR); + + tor_free(conn->requested_resource); + if (resource) + conn->requested_resource = tor_strdup(resource); + + /* decorate the ip address if it is ipv6 */ + if (strchr(conn->base_.address, ':')) { + copy_ipv6_address(decorated_address, conn->base_.address, + sizeof(decorated_address), 1); + } else { + strlcpy(decorated_address, conn->base_.address, sizeof(decorated_address)); + } + + /* come up with a string for which Host: we want */ + if (conn->base_.port == 80) { + strlcpy(hoststring, decorated_address, sizeof(hoststring)); + } else { + tor_snprintf(hoststring, sizeof(hoststring), "%s:%d", + decorated_address, conn->base_.port); + } + + /* Format if-modified-since */ + if (if_modified_since) { + char b[RFC1123_TIME_LEN+1]; + format_rfc1123_time(b, if_modified_since); + smartlist_add_asprintf(headers, "If-Modified-Since: %s\r\n", b); + } + + /* come up with some proxy lines, if we're using one. */ + if (direct && get_options()->HTTPProxy) { + char *base64_authenticator=NULL; + const char *authenticator = get_options()->HTTPProxyAuthenticator; + + tor_snprintf(proxystring, sizeof(proxystring),"http://%s", hoststring); + if (authenticator) { + base64_authenticator = alloc_http_authenticator(authenticator); + if (!base64_authenticator) + log_warn(LD_BUG, "Encoding http authenticator failed"); + } + if (base64_authenticator) { + smartlist_add_asprintf(headers, + "Proxy-Authorization: Basic %s\r\n", + base64_authenticator); + tor_free(base64_authenticator); + } + } else { + proxystring[0] = 0; + } + + if (! anonymized_connection) { + /* Add Accept-Encoding. */ + accept_encoding = accept_encoding_header(); + smartlist_add_asprintf(headers, "Accept-Encoding: %s\r\n", + accept_encoding); + tor_free(accept_encoding); + } + + /* Add additional headers, if any */ + { + config_line_t *h; + for (h = req->additional_headers; h; h = h->next) { + smartlist_add_asprintf(headers, "%s%s\r\n", h->key, h->value); + } + } + + switch (purpose) { + case DIR_PURPOSE_FETCH_CONSENSUS: + /* resource is optional. If present, it's a flavor name */ + tor_assert(!payload); + httpcommand = "GET"; + url = directory_get_consensus_url(resource); + log_info(LD_DIR, "Downloading consensus from %s using %s", + hoststring, url); + break; + case DIR_PURPOSE_FETCH_CERTIFICATE: + tor_assert(resource); + tor_assert(!payload); + httpcommand = "GET"; + tor_asprintf(&url, "/tor/keys/%s", resource); + break; + case DIR_PURPOSE_FETCH_STATUS_VOTE: + tor_assert(resource); + tor_assert(!payload); + httpcommand = "GET"; + tor_asprintf(&url, "/tor/status-vote/next/%s.z", resource); + break; + case DIR_PURPOSE_FETCH_DETACHED_SIGNATURES: + tor_assert(!resource); + tor_assert(!payload); + httpcommand = "GET"; + url = tor_strdup("/tor/status-vote/next/consensus-signatures.z"); + break; + case DIR_PURPOSE_FETCH_SERVERDESC: + tor_assert(resource); + httpcommand = "GET"; + tor_asprintf(&url, "/tor/server/%s", resource); + break; + case DIR_PURPOSE_FETCH_EXTRAINFO: + tor_assert(resource); + httpcommand = "GET"; + tor_asprintf(&url, "/tor/extra/%s", resource); + break; + case DIR_PURPOSE_FETCH_MICRODESC: + tor_assert(resource); + httpcommand = "GET"; + tor_asprintf(&url, "/tor/micro/%s", resource); + break; + case DIR_PURPOSE_UPLOAD_DIR: { + const char *why = router_get_descriptor_gen_reason(); + tor_assert(!resource); + tor_assert(payload); + httpcommand = "POST"; + url = tor_strdup("/tor/"); + if (!why) { + why = "for no reason at all"; + } + smartlist_add_asprintf(headers, "X-Desc-Gen-Reason: %s\r\n", why); + break; + } + case DIR_PURPOSE_UPLOAD_VOTE: + tor_assert(!resource); + tor_assert(payload); + httpcommand = "POST"; + url = tor_strdup("/tor/post/vote"); + break; + case DIR_PURPOSE_UPLOAD_SIGNATURES: + tor_assert(!resource); + tor_assert(payload); + httpcommand = "POST"; + url = tor_strdup("/tor/post/consensus-signature"); + break; + case DIR_PURPOSE_FETCH_RENDDESC_V2: + tor_assert(resource); + tor_assert(strlen(resource) <= REND_DESC_ID_V2_LEN_BASE32); + tor_assert(!payload); + httpcommand = "GET"; + tor_asprintf(&url, "/tor/rendezvous2/%s", resource); + break; + case DIR_PURPOSE_FETCH_HSDESC: + tor_assert(resource); + tor_assert(strlen(resource) <= ED25519_BASE64_LEN); + tor_assert(!payload); + httpcommand = "GET"; + tor_asprintf(&url, "/tor/hs/3/%s", resource); + break; + case DIR_PURPOSE_UPLOAD_RENDDESC_V2: + tor_assert(!resource); + tor_assert(payload); + httpcommand = "POST"; + url = tor_strdup("/tor/rendezvous2/publish"); + break; + case DIR_PURPOSE_UPLOAD_HSDESC: + tor_assert(resource); + tor_assert(payload); + httpcommand = "POST"; + tor_asprintf(&url, "/tor/hs/%s/publish", resource); + break; + default: + tor_assert(0); + return; + } + + /* warn in the non-tunneled case */ + if (direct && (strlen(proxystring) + strlen(url) >= 4096)) { + log_warn(LD_BUG, + "Squid does not like URLs longer than 4095 bytes, and this " + "one is %d bytes long: %s%s", + (int)(strlen(proxystring) + strlen(url)), proxystring, url); + } + + tor_snprintf(request, sizeof(request), "%s %s", httpcommand, proxystring); + + request_len = strlen(request); + total_request_len += request_len; + connection_buf_add(request, request_len, TO_CONN(conn)); + + url_len = strlen(url); + total_request_len += url_len; + connection_buf_add(url, url_len, TO_CONN(conn)); + tor_free(url); + + if (!strcmp(httpcommand, "POST") || payload) { + smartlist_add_asprintf(headers, "Content-Length: %lu\r\n", + payload ? (unsigned long)payload_len : 0); + } + + { + char *header = smartlist_join_strings(headers, "", 0, NULL); + tor_snprintf(request, sizeof(request), " HTTP/1.0\r\nHost: %s\r\n%s\r\n", + hoststring, header); + tor_free(header); + } + + request_len = strlen(request); + total_request_len += request_len; + connection_buf_add(request, request_len, TO_CONN(conn)); + + if (payload) { + /* then send the payload afterwards too */ + connection_buf_add(payload, payload_len, TO_CONN(conn)); + total_request_len += payload_len; + } + + SMARTLIST_FOREACH(headers, char *, h, tor_free(h)); + smartlist_free(headers); + + log_debug(LD_DIR, + "Sent request to directory server '%s:%d': " + "(purpose: %d, request size: %"TOR_PRIuSZ", " + "payload size: %"TOR_PRIuSZ")", + conn->base_.address, conn->base_.port, + conn->base_.purpose, + (total_request_len), + (payload ? payload_len : 0)); +} + +/** Parse an HTTP request string <b>headers</b> of the form + * \verbatim + * "\%s [http[s]://]\%s HTTP/1..." + * \endverbatim + * If it's well-formed, strdup the second \%s into *<b>url</b>, and + * nul-terminate it. If the url doesn't start with "/tor/", rewrite it + * so it does. Return 0. + * Otherwise, return -1. + */ +STATIC int +parse_http_url(const char *headers, char **url) +{ + char *command = NULL; + if (parse_http_command(headers, &command, url) < 0) { + return -1; + } + if (strcmpstart(*url, "/tor/")) { + char *new_url = NULL; + tor_asprintf(&new_url, "/tor%s%s", + *url[0] == '/' ? "" : "/", + *url); + tor_free(*url); + *url = new_url; + } + tor_free(command); + return 0; +} + +/** Parse an HTTP request line at the start of a headers string. On failure, + * return -1. On success, set *<b>command_out</b> to a copy of the HTTP + * command ("get", "post", etc), set *<b>url_out</b> to a copy of the URL, and + * return 0. */ +int +parse_http_command(const char *headers, char **command_out, char **url_out) +{ + const char *command, *end_of_command; + char *s, *start, *tmp; + + s = (char *)eat_whitespace_no_nl(headers); + if (!*s) return -1; + command = s; + s = (char *)find_whitespace(s); /* get past GET/POST */ + if (!*s) return -1; + end_of_command = s; + s = (char *)eat_whitespace_no_nl(s); + if (!*s) return -1; + start = s; /* this is the URL, assuming it's valid */ + s = (char *)find_whitespace(start); + if (!*s) return -1; + + /* tolerate the http[s] proxy style of putting the hostname in the url */ + if (s-start >= 4 && !strcmpstart(start,"http")) { + tmp = start + 4; + if (*tmp == 's') + tmp++; + if (s-tmp >= 3 && !strcmpstart(tmp,"://")) { + tmp = strchr(tmp+3, '/'); + if (tmp && tmp < s) { + log_debug(LD_DIR,"Skipping over 'http[s]://hostname/' string"); + start = tmp; + } + } + } + + /* Check if the header is well formed (next sequence + * should be HTTP/1.X\r\n). Assumes we're supporting 1.0? */ + { + unsigned minor_ver; + char ch; + char *e = (char *)eat_whitespace_no_nl(s); + if (2 != tor_sscanf(e, "HTTP/1.%u%c", &minor_ver, &ch)) { + return -1; + } + if (ch != '\r') + return -1; + } + + *url_out = tor_memdup_nulterm(start, s-start); + *command_out = tor_memdup_nulterm(command, end_of_command - command); + return 0; +} + +/** Return a copy of the first HTTP header in <b>headers</b> whose key is + * <b>which</b>. The key should be given with a terminating colon and space; + * this function copies everything after, up to but not including the + * following \\r\\n. */ +char * +http_get_header(const char *headers, const char *which) +{ + const char *cp = headers; + while (cp) { + if (!strcasecmpstart(cp, which)) { + char *eos; + cp += strlen(which); + if ((eos = strchr(cp,'\r'))) + return tor_strndup(cp, eos-cp); + else + return tor_strdup(cp); + } + cp = strchr(cp, '\n'); + if (cp) + ++cp; + } + return NULL; +} + +/** If <b>headers</b> indicates that a proxy was involved, then rewrite + * <b>conn</b>-\>address to describe our best guess of the address that + * originated this HTTP request. */ +static void +http_set_address_origin(const char *headers, connection_t *conn) +{ + char *fwd; + + fwd = http_get_header(headers, "Forwarded-For: "); + if (!fwd) + fwd = http_get_header(headers, "X-Forwarded-For: "); + if (fwd) { + tor_addr_t toraddr; + if (tor_addr_parse(&toraddr,fwd) == -1 || + tor_addr_is_internal(&toraddr,0)) { + log_debug(LD_DIR, "Ignoring local/internal IP %s", escaped(fwd)); + tor_free(fwd); + return; + } + + tor_free(conn->address); + conn->address = tor_strdup(fwd); + tor_free(fwd); + } +} + +/** Parse an HTTP response string <b>headers</b> of the form + * \verbatim + * "HTTP/1.\%d \%d\%s\r\n...". + * \endverbatim + * + * If it's well-formed, assign the status code to *<b>code</b> and + * return 0. Otherwise, return -1. + * + * On success: If <b>date</b> is provided, set *date to the Date + * header in the http headers, or 0 if no such header is found. If + * <b>compression</b> is provided, set *<b>compression</b> to the + * compression method given in the Content-Encoding header, or 0 if no + * such header is found, or -1 if the value of the header is not + * recognized. If <b>reason</b> is provided, strdup the reason string + * into it. + */ +int +parse_http_response(const char *headers, int *code, time_t *date, + compress_method_t *compression, char **reason) +{ + unsigned n1, n2; + char datestr[RFC1123_TIME_LEN+1]; + smartlist_t *parsed_headers; + tor_assert(headers); + tor_assert(code); + + while (TOR_ISSPACE(*headers)) headers++; /* tolerate leading whitespace */ + + if (tor_sscanf(headers, "HTTP/1.%u %u", &n1, &n2) < 2 || + (n1 != 0 && n1 != 1) || + (n2 < 100 || n2 >= 600)) { + log_warn(LD_HTTP,"Failed to parse header %s",escaped(headers)); + return -1; + } + *code = n2; + + parsed_headers = smartlist_new(); + smartlist_split_string(parsed_headers, headers, "\n", + SPLIT_SKIP_SPACE|SPLIT_IGNORE_BLANK, -1); + if (reason) { + smartlist_t *status_line_elements = smartlist_new(); + tor_assert(smartlist_len(parsed_headers)); + smartlist_split_string(status_line_elements, + smartlist_get(parsed_headers, 0), + " ", SPLIT_SKIP_SPACE|SPLIT_IGNORE_BLANK, 3); + tor_assert(smartlist_len(status_line_elements) <= 3); + if (smartlist_len(status_line_elements) == 3) { + *reason = smartlist_get(status_line_elements, 2); + smartlist_set(status_line_elements, 2, NULL); /* Prevent free */ + } + SMARTLIST_FOREACH(status_line_elements, char *, cp, tor_free(cp)); + smartlist_free(status_line_elements); + } + if (date) { + *date = 0; + SMARTLIST_FOREACH(parsed_headers, const char *, s, + if (!strcmpstart(s, "Date: ")) { + strlcpy(datestr, s+6, sizeof(datestr)); + /* This will do nothing on failure, so we don't need to check + the result. We shouldn't warn, since there are many other valid + date formats besides the one we use. */ + parse_rfc1123_time(datestr, date); + break; + }); + } + if (compression) { + const char *enc = NULL; + SMARTLIST_FOREACH(parsed_headers, const char *, s, + if (!strcmpstart(s, "Content-Encoding: ")) { + enc = s+18; break; + }); + + if (enc == NULL) + *compression = NO_METHOD; + else { + *compression = compression_method_get_by_name(enc); + + if (*compression == UNKNOWN_METHOD) + log_info(LD_HTTP, "Unrecognized content encoding: %s. Trying to deal.", + escaped(enc)); + } + } + SMARTLIST_FOREACH(parsed_headers, char *, s, tor_free(s)); + smartlist_free(parsed_headers); + + return 0; +} + +/** Return true iff <b>body</b> doesn't start with a plausible router or + * network-status or microdescriptor opening. This is a sign of possible + * compression. */ +static int +body_is_plausible(const char *body, size_t len, int purpose) +{ + int i; + if (len == 0) + return 1; /* empty bodies don't need decompression */ + if (len < 32) + return 0; + if (purpose == DIR_PURPOSE_FETCH_MICRODESC) { + return (!strcmpstart(body,"onion-key")); + } + + if (!strcmpstart(body,"router") || + !strcmpstart(body,"network-status")) + return 1; + for (i=0;i<32;++i) { + if (!TOR_ISPRINT(body[i]) && !TOR_ISSPACE(body[i])) + return 0; + } + + return 1; +} + +/** Called when we've just fetched a bunch of router descriptors in + * <b>body</b>. The list <b>which</b>, if present, holds digests for + * descriptors we requested: descriptor digests if <b>descriptor_digests</b> + * is true, or identity digests otherwise. Parse the descriptors, validate + * them, and annotate them as having purpose <b>purpose</b> and as having been + * downloaded from <b>source</b>. + * + * Return the number of routers actually added. */ +static int +load_downloaded_routers(const char *body, smartlist_t *which, + int descriptor_digests, + int router_purpose, + const char *source) +{ + char buf[256]; + char time_buf[ISO_TIME_LEN+1]; + int added = 0; + int general = router_purpose == ROUTER_PURPOSE_GENERAL; + format_iso_time(time_buf, time(NULL)); + tor_assert(source); + + if (tor_snprintf(buf, sizeof(buf), + "@downloaded-at %s\n" + "@source %s\n" + "%s%s%s", time_buf, escaped(source), + !general ? "@purpose " : "", + !general ? router_purpose_to_string(router_purpose) : "", + !general ? "\n" : "")<0) + return added; + + added = router_load_routers_from_string(body, NULL, SAVED_NOWHERE, which, + descriptor_digests, buf); + if (added && general) + control_event_bootstrap(BOOTSTRAP_STATUS_LOADING_DESCRIPTORS, + count_loading_descriptors_progress()); + return added; +} + +static int handle_response_fetch_certificate(dir_connection_t *, + const response_handler_args_t *); +static int handle_response_fetch_status_vote(dir_connection_t *, + const response_handler_args_t *); +static int handle_response_fetch_detached_signatures(dir_connection_t *, + const response_handler_args_t *); +static int handle_response_fetch_desc(dir_connection_t *, + const response_handler_args_t *); +static int handle_response_upload_dir(dir_connection_t *, + const response_handler_args_t *); +static int handle_response_upload_vote(dir_connection_t *, + const response_handler_args_t *); +static int handle_response_upload_signatures(dir_connection_t *, + const response_handler_args_t *); +static int handle_response_fetch_renddesc_v2(dir_connection_t *, + const response_handler_args_t *); +static int handle_response_upload_renddesc_v2(dir_connection_t *, + const response_handler_args_t *); +static int handle_response_upload_hsdesc(dir_connection_t *, + const response_handler_args_t *); + +static int +dir_client_decompress_response_body(char **bodyp, size_t *bodylenp, + dir_connection_t *conn, + compress_method_t compression, + int anonymized_connection) +{ + int rv = 0; + const char *body = *bodyp; + size_t body_len = *bodylenp; + int allow_partial = (conn->base_.purpose == DIR_PURPOSE_FETCH_SERVERDESC || + conn->base_.purpose == DIR_PURPOSE_FETCH_EXTRAINFO || + conn->base_.purpose == DIR_PURPOSE_FETCH_MICRODESC); + + int plausible = body_is_plausible(body, body_len, conn->base_.purpose); + + if (plausible && compression == NO_METHOD) { + return 0; + } + + int severity = LOG_DEBUG; + char *new_body = NULL; + size_t new_len = 0; + const char *description1, *description2; + int want_to_try_both = 0; + int tried_both = 0; + compress_method_t guessed = detect_compression_method(body, body_len); + + description1 = compression_method_get_human_name(compression); + + if (BUG(description1 == NULL)) + description1 = compression_method_get_human_name(UNKNOWN_METHOD); + + if (guessed == UNKNOWN_METHOD && !plausible) + description2 = "confusing binary junk"; + else + description2 = compression_method_get_human_name(guessed); + + /* Tell the user if we don't believe what we're told about compression.*/ + want_to_try_both = (compression == UNKNOWN_METHOD || + guessed != compression); + if (want_to_try_both) { + severity = LOG_PROTOCOL_WARN; + } + + tor_log(severity, LD_HTTP, + "HTTP body from server '%s:%d' was labeled as %s, " + "%s it seems to be %s.%s", + conn->base_.address, conn->base_.port, description1, + guessed != compression?"but":"and", + description2, + (compression>0 && guessed>0 && want_to_try_both)? + " Trying both.":""); + + /* Try declared compression first if we can. + * tor_compress_supports_method() also returns true for NO_METHOD. + * Ensure that the server is not sending us data compressed using a + * compression method that is not allowed for anonymous connections. */ + if (anonymized_connection && + ! allowed_anonymous_connection_compression_method(compression)) { + warn_disallowed_anonymous_compression_method(compression); + rv = -1; + goto done; + } + + if (tor_compress_supports_method(compression)) { + tor_uncompress(&new_body, &new_len, body, body_len, compression, + !allow_partial, LOG_PROTOCOL_WARN); + if (new_body) { + /* We succeeded with the declared compression method. Great! */ + rv = 0; + goto done; + } + } + + /* Okay, if that didn't work, and we think that it was compressed + * differently, try that. */ + if (anonymized_connection && + ! allowed_anonymous_connection_compression_method(guessed)) { + warn_disallowed_anonymous_compression_method(guessed); + rv = -1; + goto done; + } + + if (tor_compress_supports_method(guessed) && + compression != guessed) { + tor_uncompress(&new_body, &new_len, body, body_len, guessed, + !allow_partial, LOG_INFO); + tried_both = 1; + } + /* If we're pretty sure that we have a compressed directory, and + * we didn't manage to uncompress it, then warn and bail. */ + if (!plausible && !new_body) { + log_fn(LOG_PROTOCOL_WARN, LD_HTTP, + "Unable to decompress HTTP body (tried %s%s%s, server '%s:%d').", + description1, + tried_both?" and ":"", + tried_both?description2:"", + conn->base_.address, conn->base_.port); + rv = -1; + goto done; + } + + done: + if (new_body) { + if (rv == 0) { + /* success! */ + tor_free(*bodyp); + *bodyp = new_body; + *bodylenp = new_len; + } else { + tor_free(new_body); + } + } + + return rv; +} + +/** We are a client, and we've finished reading the server's + * response. Parse it and act appropriately. + * + * If we're still happy with using this directory server in the future, return + * 0. Otherwise return -1; and the caller should consider trying the request + * again. + * + * The caller will take care of marking the connection for close. + */ +static int +connection_dir_client_reached_eof(dir_connection_t *conn) +{ + char *body = NULL; + char *headers = NULL; + char *reason = NULL; + size_t body_len = 0; + int status_code; + time_t date_header = 0; + long apparent_skew; + compress_method_t compression; + int skewed = 0; + int rv; + int allow_partial = (conn->base_.purpose == DIR_PURPOSE_FETCH_SERVERDESC || + conn->base_.purpose == DIR_PURPOSE_FETCH_EXTRAINFO || + conn->base_.purpose == DIR_PURPOSE_FETCH_MICRODESC); + size_t received_bytes; + const int anonymized_connection = + purpose_needs_anonymity(conn->base_.purpose, + conn->router_purpose, + conn->requested_resource); + + received_bytes = connection_get_inbuf_len(TO_CONN(conn)); + + switch (connection_fetch_from_buf_http(TO_CONN(conn), + &headers, MAX_HEADERS_SIZE, + &body, &body_len, MAX_DIR_DL_SIZE, + allow_partial)) { + case -1: /* overflow */ + log_warn(LD_PROTOCOL, + "'fetch' response too large (server '%s:%d'). Closing.", + conn->base_.address, conn->base_.port); + return -1; + case 0: + log_info(LD_HTTP, + "'fetch' response not all here, but we're at eof. Closing."); + return -1; + /* case 1, fall through */ + } + + if (parse_http_response(headers, &status_code, &date_header, + &compression, &reason) < 0) { + log_warn(LD_HTTP,"Unparseable headers (server '%s:%d'). Closing.", + conn->base_.address, conn->base_.port); + + rv = -1; + goto done; + } + if (!reason) reason = tor_strdup("[no reason given]"); + + tor_log(LOG_DEBUG, LD_DIR, + "Received response from directory server '%s:%d': %d %s " + "(purpose: %d, response size: %"TOR_PRIuSZ +#ifdef MEASUREMENTS_21206 + ", data cells received: %d, data cells sent: %d" +#endif + ", compression: %d)", + conn->base_.address, conn->base_.port, status_code, + escaped(reason), conn->base_.purpose, + (received_bytes), +#ifdef MEASUREMENTS_21206 + conn->data_cells_received, conn->data_cells_sent, +#endif + compression); + + if (conn->guard_state) { + /* we count the connection as successful once we can read from it. We do + * not, however, delay use of the circuit here, since it's just for a + * one-hop directory request. */ + /* XXXXprop271 note that this will not do the right thing for other + * waiting circuits that would be triggered by this circuit becoming + * complete/usable. But that's ok, I think. + */ + entry_guard_succeeded(&conn->guard_state); + circuit_guard_state_free(conn->guard_state); + conn->guard_state = NULL; + } + + /* now check if it's got any hints for us about our IP address. */ + if (conn->dirconn_direct) { + char *guess = http_get_header(headers, X_ADDRESS_HEADER); + if (guess) { + router_new_address_suggestion(guess, conn); + tor_free(guess); + } + } + + if (date_header > 0) { + /* The date header was written very soon after we sent our request, + * so compute the skew as the difference between sending the request + * and the date header. (We used to check now-date_header, but that's + * inaccurate if we spend a lot of time downloading.) + */ + apparent_skew = conn->base_.timestamp_last_write_allowed - date_header; + if (labs(apparent_skew)>ALLOW_DIRECTORY_TIME_SKEW) { + int trusted = router_digest_is_trusted_dir(conn->identity_digest); + clock_skew_warning(TO_CONN(conn), apparent_skew, trusted, LD_HTTP, + "directory", "DIRSERV"); + skewed = 1; /* don't check the recommended-versions line */ + } else { + log_debug(LD_HTTP, "Time on received directory is within tolerance; " + "we are %ld seconds skewed. (That's okay.)", apparent_skew); + } + } + (void) skewed; /* skewed isn't used yet. */ + + if (status_code == 503) { + routerstatus_t *rs; + dir_server_t *ds; + const char *id_digest = conn->identity_digest; + log_info(LD_DIR,"Received http status code %d (%s) from server " + "'%s:%d'. I'll try again soon.", + status_code, escaped(reason), conn->base_.address, + conn->base_.port); + time_t now = approx_time(); + if ((rs = router_get_mutable_consensus_status_by_id(id_digest))) + rs->last_dir_503_at = now; + if ((ds = router_get_fallback_dirserver_by_digest(id_digest))) + ds->fake_status.last_dir_503_at = now; + + rv = -1; + goto done; + } + + if (dir_client_decompress_response_body(&body, &body_len, + conn, compression, anonymized_connection) < 0) { + rv = -1; + goto done; + } + + response_handler_args_t args; + memset(&args, 0, sizeof(args)); + args.status_code = status_code; + args.reason = reason; + args.body = body; + args.body_len = body_len; + args.headers = headers; + + switch (conn->base_.purpose) { + case DIR_PURPOSE_FETCH_CONSENSUS: + rv = handle_response_fetch_consensus(conn, &args); + break; + case DIR_PURPOSE_FETCH_CERTIFICATE: + rv = handle_response_fetch_certificate(conn, &args); + break; + case DIR_PURPOSE_FETCH_STATUS_VOTE: + rv = handle_response_fetch_status_vote(conn, &args); + break; + case DIR_PURPOSE_FETCH_DETACHED_SIGNATURES: + rv = handle_response_fetch_detached_signatures(conn, &args); + break; + case DIR_PURPOSE_FETCH_SERVERDESC: + case DIR_PURPOSE_FETCH_EXTRAINFO: + rv = handle_response_fetch_desc(conn, &args); + break; + case DIR_PURPOSE_FETCH_MICRODESC: + rv = handle_response_fetch_microdesc(conn, &args); + break; + case DIR_PURPOSE_FETCH_RENDDESC_V2: + rv = handle_response_fetch_renddesc_v2(conn, &args); + break; + case DIR_PURPOSE_UPLOAD_DIR: + rv = handle_response_upload_dir(conn, &args); + break; + case DIR_PURPOSE_UPLOAD_SIGNATURES: + rv = handle_response_upload_signatures(conn, &args); + break; + case DIR_PURPOSE_UPLOAD_VOTE: + rv = handle_response_upload_vote(conn, &args); + break; + case DIR_PURPOSE_UPLOAD_RENDDESC_V2: + rv = handle_response_upload_renddesc_v2(conn, &args); + break; + case DIR_PURPOSE_UPLOAD_HSDESC: + rv = handle_response_upload_hsdesc(conn, &args); + break; + case DIR_PURPOSE_FETCH_HSDESC: + rv = handle_response_fetch_hsdesc_v3(conn, &args); + break; + default: + tor_assert_nonfatal_unreached(); + rv = -1; + break; + } + + done: + tor_free(body); + tor_free(headers); + tor_free(reason); + return rv; +} + +/** + * Handler function: processes a response to a request for a networkstatus + * consensus document by checking the consensus, storing it, and marking + * router requests as reachable. + **/ +STATIC int +handle_response_fetch_consensus(dir_connection_t *conn, + const response_handler_args_t *args) +{ + tor_assert(conn->base_.purpose == DIR_PURPOSE_FETCH_CONSENSUS); + const int status_code = args->status_code; + const char *body = args->body; + const size_t body_len = args->body_len; + const char *reason = args->reason; + const time_t now = approx_time(); + + const char *consensus; + char *new_consensus = NULL; + const char *sourcename; + + int r; + const char *flavname = conn->requested_resource; + if (status_code != 200) { + int severity = (status_code == 304) ? LOG_INFO : LOG_WARN; + tor_log(severity, LD_DIR, + "Received http status code %d (%s) from server " + "'%s:%d' while fetching consensus directory.", + status_code, escaped(reason), conn->base_.address, + conn->base_.port); + networkstatus_consensus_download_failed(status_code, flavname); + return -1; + } + + if (looks_like_a_consensus_diff(body, body_len)) { + /* First find our previous consensus. Maybe it's in ram, maybe not. */ + cached_dir_t *cd = dirserv_get_consensus(flavname); + const char *consensus_body; + char *owned_consensus = NULL; + if (cd) { + consensus_body = cd->dir; + } else { + owned_consensus = networkstatus_read_cached_consensus(flavname); + consensus_body = owned_consensus; + } + if (!consensus_body) { + log_warn(LD_DIR, "Received a consensus diff, but we can't find " + "any %s-flavored consensus in our current cache.",flavname); + networkstatus_consensus_download_failed(0, flavname); + // XXXX if this happens too much, see below + return -1; + } + + new_consensus = consensus_diff_apply(consensus_body, body); + tor_free(owned_consensus); + if (new_consensus == NULL) { + log_warn(LD_DIR, "Could not apply consensus diff received from server " + "'%s:%d'", conn->base_.address, conn->base_.port); + // XXXX If this happens too many times, we should maybe not use + // XXXX this directory for diffs any more? + networkstatus_consensus_download_failed(0, flavname); + return -1; + } + log_info(LD_DIR, "Applied consensus diff (size %d) from server " + "'%s:%d', resulting in a new consensus document (size %d).", + (int)body_len, conn->base_.address, conn->base_.port, + (int)strlen(new_consensus)); + consensus = new_consensus; + sourcename = "generated based on a diff"; + } else { + log_info(LD_DIR,"Received consensus directory (body size %d) from server " + "'%s:%d'", (int)body_len, conn->base_.address, conn->base_.port); + consensus = body; + sourcename = "downloaded"; + } + + if ((r=networkstatus_set_current_consensus(consensus, flavname, 0, + conn->identity_digest))<0) { + log_fn(r<-1?LOG_WARN:LOG_INFO, LD_DIR, + "Unable to load %s consensus directory %s from " + "server '%s:%d'. I'll try again soon.", + flavname, sourcename, conn->base_.address, conn->base_.port); + networkstatus_consensus_download_failed(0, flavname); + tor_free(new_consensus); + return -1; + } + + /* If we launched other fetches for this consensus, cancel them. */ + connection_dir_close_consensus_fetches(conn, flavname); + + /* update the list of routers and directory guards */ + routers_update_all_from_networkstatus(now, 3); + update_microdescs_from_networkstatus(now); + directory_info_has_arrived(now, 0, 0); + + if (authdir_mode_v3(get_options())) { + sr_act_post_consensus( + networkstatus_get_latest_consensus_by_flavor(FLAV_NS)); + } + log_info(LD_DIR, "Successfully loaded consensus."); + + tor_free(new_consensus); + return 0; +} + +/** + * Handler function: processes a response to a request for one or more + * authority certificates + **/ +static int +handle_response_fetch_certificate(dir_connection_t *conn, + const response_handler_args_t *args) +{ + tor_assert(conn->base_.purpose == DIR_PURPOSE_FETCH_CERTIFICATE); + const int status_code = args->status_code; + const char *reason = args->reason; + const char *body = args->body; + const size_t body_len = args->body_len; + + if (status_code != 200) { + log_warn(LD_DIR, + "Received http status code %d (%s) from server " + "'%s:%d' while fetching \"/tor/keys/%s\".", + status_code, escaped(reason), conn->base_.address, + conn->base_.port, conn->requested_resource); + connection_dir_download_cert_failed(conn, status_code); + return -1; + } + log_info(LD_DIR,"Received authority certificates (body size %d) from " + "server '%s:%d'", + (int)body_len, conn->base_.address, conn->base_.port); + + /* + * Tell trusted_dirs_load_certs_from_string() whether it was by fp + * or fp-sk pair. + */ + int src_code = -1; + if (!strcmpstart(conn->requested_resource, "fp/")) { + src_code = TRUSTED_DIRS_CERTS_SRC_DL_BY_ID_DIGEST; + } else if (!strcmpstart(conn->requested_resource, "fp-sk/")) { + src_code = TRUSTED_DIRS_CERTS_SRC_DL_BY_ID_SK_DIGEST; + } + + if (src_code != -1) { + if (trusted_dirs_load_certs_from_string(body, src_code, 1, + conn->identity_digest)<0) { + log_warn(LD_DIR, "Unable to parse fetched certificates"); + /* if we fetched more than one and only some failed, the successful + * ones got flushed to disk so it's safe to call this on them */ + connection_dir_download_cert_failed(conn, status_code); + } else { + time_t now = approx_time(); + directory_info_has_arrived(now, 0, 0); + log_info(LD_DIR, "Successfully loaded certificates from fetch."); + } + } else { + log_warn(LD_DIR, + "Couldn't figure out what to do with fetched certificates for " + "unknown resource %s", + conn->requested_resource); + connection_dir_download_cert_failed(conn, status_code); + } + return 0; +} + +/** + * Handler function: processes a response to a request for an authority's + * current networkstatus vote. + **/ +static int +handle_response_fetch_status_vote(dir_connection_t *conn, + const response_handler_args_t *args) +{ + tor_assert(conn->base_.purpose == DIR_PURPOSE_FETCH_STATUS_VOTE); + const int status_code = args->status_code; + const char *reason = args->reason; + const char *body = args->body; + const size_t body_len = args->body_len; + + const char *msg; + int st; + log_info(LD_DIR,"Got votes (body size %d) from server %s:%d", + (int)body_len, conn->base_.address, conn->base_.port); + if (status_code != 200) { + log_warn(LD_DIR, + "Received http status code %d (%s) from server " + "'%s:%d' while fetching \"/tor/status-vote/next/%s.z\".", + status_code, escaped(reason), conn->base_.address, + conn->base_.port, conn->requested_resource); + return -1; + } + dirvote_add_vote(body, &msg, &st); + if (st > 299) { + log_warn(LD_DIR, "Error adding retrieved vote: %s", msg); + } else { + log_info(LD_DIR, "Added vote(s) successfully [msg: %s]", msg); + } + + return 0; +} + +/** + * Handler function: processes a response to a request for the signatures + * that an authority knows about on a given consensus. + **/ +static int +handle_response_fetch_detached_signatures(dir_connection_t *conn, + const response_handler_args_t *args) +{ + tor_assert(conn->base_.purpose == DIR_PURPOSE_FETCH_DETACHED_SIGNATURES); + const int status_code = args->status_code; + const char *reason = args->reason; + const char *body = args->body; + const size_t body_len = args->body_len; + + const char *msg = NULL; + log_info(LD_DIR,"Got detached signatures (body size %d) from server %s:%d", + (int)body_len, conn->base_.address, conn->base_.port); + if (status_code != 200) { + log_warn(LD_DIR, + "Received http status code %d (%s) from server '%s:%d' while fetching " + "\"/tor/status-vote/next/consensus-signatures.z\".", + status_code, escaped(reason), conn->base_.address, + conn->base_.port); + return -1; + } + if (dirvote_add_signatures(body, conn->base_.address, &msg)<0) { + log_warn(LD_DIR, "Problem adding detached signatures from %s:%d: %s", + conn->base_.address, conn->base_.port, msg?msg:"???"); + } + + return 0; +} + +/** + * Handler function: processes a response to a request for a group of server + * descriptors or an extrainfo documents. + **/ +static int +handle_response_fetch_desc(dir_connection_t *conn, + const response_handler_args_t *args) +{ + tor_assert(conn->base_.purpose == DIR_PURPOSE_FETCH_SERVERDESC || + conn->base_.purpose == DIR_PURPOSE_FETCH_EXTRAINFO); + const int status_code = args->status_code; + const char *reason = args->reason; + const char *body = args->body; + const size_t body_len = args->body_len; + + int was_ei = conn->base_.purpose == DIR_PURPOSE_FETCH_EXTRAINFO; + smartlist_t *which = NULL; + int n_asked_for = 0; + int descriptor_digests = conn->requested_resource && + !strcmpstart(conn->requested_resource,"d/"); + log_info(LD_DIR,"Received %s (body size %d) from server '%s:%d'", + was_ei ? "extra server info" : "server info", + (int)body_len, conn->base_.address, conn->base_.port); + if (conn->requested_resource && + (!strcmpstart(conn->requested_resource,"d/") || + !strcmpstart(conn->requested_resource,"fp/"))) { + which = smartlist_new(); + dir_split_resource_into_fingerprints(conn->requested_resource + + (descriptor_digests ? 2 : 3), + which, NULL, 0); + n_asked_for = smartlist_len(which); + } + if (status_code != 200) { + int dir_okay = status_code == 404 || + (status_code == 400 && !strcmp(reason, "Servers unavailable.")); + /* 404 means that it didn't have them; no big deal. + * Older (pre-0.1.1.8) servers said 400 Servers unavailable instead. */ + log_fn(dir_okay ? LOG_INFO : LOG_WARN, LD_DIR, + "Received http status code %d (%s) from server '%s:%d' " + "while fetching \"/tor/server/%s\". I'll try again soon.", + status_code, escaped(reason), conn->base_.address, + conn->base_.port, conn->requested_resource); + if (!which) { + connection_dir_download_routerdesc_failed(conn); + } else { + dir_routerdesc_download_failed(which, status_code, + conn->router_purpose, + was_ei, descriptor_digests); + SMARTLIST_FOREACH(which, char *, cp, tor_free(cp)); + smartlist_free(which); + } + return dir_okay ? 0 : -1; + } + /* Learn the routers, assuming we requested by fingerprint or "all" + * or "authority". + * + * We use "authority" to fetch our own descriptor for + * testing, and to fetch bridge descriptors for bootstrapping. Ignore + * the output of "authority" requests unless we are using bridges, + * since otherwise they'll be the response from reachability tests, + * and we don't really want to add that to our routerlist. */ + if (which || (conn->requested_resource && + (!strcmpstart(conn->requested_resource, "all") || + (!strcmpstart(conn->requested_resource, "authority") && + get_options()->UseBridges)))) { + /* as we learn from them, we remove them from 'which' */ + if (was_ei) { + router_load_extrainfo_from_string(body, NULL, SAVED_NOWHERE, which, + descriptor_digests); + } else { + //router_load_routers_from_string(body, NULL, SAVED_NOWHERE, which, + // descriptor_digests, conn->router_purpose); + if (load_downloaded_routers(body, which, descriptor_digests, + conn->router_purpose, + conn->base_.address)) { + time_t now = approx_time(); + directory_info_has_arrived(now, 0, 1); + } + } + } + if (which) { /* mark remaining ones as failed */ + log_info(LD_DIR, "Received %d/%d %s requested from %s:%d", + n_asked_for-smartlist_len(which), n_asked_for, + was_ei ? "extra-info documents" : "router descriptors", + conn->base_.address, (int)conn->base_.port); + if (smartlist_len(which)) { + dir_routerdesc_download_failed(which, status_code, + conn->router_purpose, + was_ei, descriptor_digests); + } + SMARTLIST_FOREACH(which, char *, cp, tor_free(cp)); + smartlist_free(which); + } + if (directory_conn_is_self_reachability_test(conn)) + router_dirport_found_reachable(); + + return 0; +} + +/** + * Handler function: processes a response to a request for a group of + * microdescriptors + **/ +STATIC int +handle_response_fetch_microdesc(dir_connection_t *conn, + const response_handler_args_t *args) +{ + tor_assert(conn->base_.purpose == DIR_PURPOSE_FETCH_MICRODESC); + const int status_code = args->status_code; + const char *reason = args->reason; + const char *body = args->body; + const size_t body_len = args->body_len; + + smartlist_t *which = NULL; + log_info(LD_DIR,"Received answer to microdescriptor request (status %d, " + "body size %d) from server '%s:%d'", + status_code, (int)body_len, conn->base_.address, + conn->base_.port); + tor_assert(conn->requested_resource && + !strcmpstart(conn->requested_resource, "d/")); + tor_assert_nonfatal(!tor_mem_is_zero(conn->identity_digest, DIGEST_LEN)); + which = smartlist_new(); + dir_split_resource_into_fingerprints(conn->requested_resource+2, + which, NULL, + DSR_DIGEST256|DSR_BASE64); + if (status_code != 200) { + log_info(LD_DIR, "Received status code %d (%s) from server " + "'%s:%d' while fetching \"/tor/micro/%s\". I'll try again " + "soon.", + status_code, escaped(reason), conn->base_.address, + (int)conn->base_.port, conn->requested_resource); + dir_microdesc_download_failed(which, status_code, conn->identity_digest); + SMARTLIST_FOREACH(which, char *, cp, tor_free(cp)); + smartlist_free(which); + return 0; + } else { + smartlist_t *mds; + time_t now = approx_time(); + mds = microdescs_add_to_cache(get_microdesc_cache(), + body, body+body_len, SAVED_NOWHERE, 0, + now, which); + if (smartlist_len(which)) { + /* Mark remaining ones as failed. */ + dir_microdesc_download_failed(which, status_code, conn->identity_digest); + } + if (mds && smartlist_len(mds)) { + control_event_bootstrap(BOOTSTRAP_STATUS_LOADING_DESCRIPTORS, + count_loading_descriptors_progress()); + directory_info_has_arrived(now, 0, 1); + } + SMARTLIST_FOREACH(which, char *, cp, tor_free(cp)); + smartlist_free(which); + smartlist_free(mds); + } + + return 0; +} + +/** + * Handler function: processes a response to a POST request to upload our + * router descriptor. + **/ +static int +handle_response_upload_dir(dir_connection_t *conn, + const response_handler_args_t *args) +{ + tor_assert(conn->base_.purpose == DIR_PURPOSE_UPLOAD_DIR); + const int status_code = args->status_code; + const char *reason = args->reason; + const char *headers = args->headers; + + switch (status_code) { + case 200: { + dir_server_t *ds = + router_get_trusteddirserver_by_digest(conn->identity_digest); + char *rejected_hdr = http_get_header(headers, + "X-Descriptor-Not-New: "); + if (rejected_hdr) { + if (!strcmp(rejected_hdr, "Yes")) { + log_info(LD_GENERAL, + "Authority '%s' declined our descriptor (not new)", + ds->nickname); + /* XXXX use this information; be sure to upload next one + * sooner. -NM */ + /* XXXX++ On further thought, the task above implies that we're + * basing our regenerate-descriptor time on when we uploaded the + * last descriptor, not on the published time of the last + * descriptor. If those are different, that's a bad thing to + * do. -NM */ + } + tor_free(rejected_hdr); + } + log_info(LD_GENERAL,"eof (status 200) after uploading server " + "descriptor: finished."); + control_event_server_status( + LOG_NOTICE, "ACCEPTED_SERVER_DESCRIPTOR DIRAUTH=%s:%d", + conn->base_.address, conn->base_.port); + + ds->has_accepted_serverdesc = 1; + if (directories_have_accepted_server_descriptor()) + control_event_server_status(LOG_NOTICE, "GOOD_SERVER_DESCRIPTOR"); + } + break; + case 400: + log_warn(LD_GENERAL,"http status 400 (%s) response from " + "dirserver '%s:%d'. Please correct.", + escaped(reason), conn->base_.address, conn->base_.port); + control_event_server_status(LOG_WARN, + "BAD_SERVER_DESCRIPTOR DIRAUTH=%s:%d REASON=\"%s\"", + conn->base_.address, conn->base_.port, escaped(reason)); + break; + default: + log_warn(LD_GENERAL, + "HTTP status %d (%s) was unexpected while uploading " + "descriptor to server '%s:%d'. Possibly the server is " + "misconfigured?", + status_code, escaped(reason), conn->base_.address, + conn->base_.port); + break; + } + /* return 0 in all cases, since we don't want to mark any + * dirservers down just because they don't like us. */ + + return 0; +} + +/** + * Handler function: processes a response to POST request to upload our + * own networkstatus vote. + **/ +static int +handle_response_upload_vote(dir_connection_t *conn, + const response_handler_args_t *args) +{ + tor_assert(conn->base_.purpose == DIR_PURPOSE_UPLOAD_VOTE); + const int status_code = args->status_code; + const char *reason = args->reason; + + switch (status_code) { + case 200: { + log_notice(LD_DIR,"Uploaded a vote to dirserver %s:%d", + conn->base_.address, conn->base_.port); + } + break; + case 400: + log_warn(LD_DIR,"http status 400 (%s) response after uploading " + "vote to dirserver '%s:%d'. Please correct.", + escaped(reason), conn->base_.address, conn->base_.port); + break; + default: + log_warn(LD_GENERAL, + "HTTP status %d (%s) was unexpected while uploading " + "vote to server '%s:%d'.", + status_code, escaped(reason), conn->base_.address, + conn->base_.port); + break; + } + /* return 0 in all cases, since we don't want to mark any + * dirservers down just because they don't like us. */ + return 0; +} + +/** + * Handler function: processes a response to POST request to upload our + * view of the signatures on the current consensus. + **/ +static int +handle_response_upload_signatures(dir_connection_t *conn, + const response_handler_args_t *args) +{ + tor_assert(conn->base_.purpose == DIR_PURPOSE_UPLOAD_SIGNATURES); + const int status_code = args->status_code; + const char *reason = args->reason; + + switch (status_code) { + case 200: { + log_notice(LD_DIR,"Uploaded signature(s) to dirserver %s:%d", + conn->base_.address, conn->base_.port); + } + break; + case 400: + log_warn(LD_DIR,"http status 400 (%s) response after uploading " + "signatures to dirserver '%s:%d'. Please correct.", + escaped(reason), conn->base_.address, conn->base_.port); + break; + default: + log_warn(LD_GENERAL, + "HTTP status %d (%s) was unexpected while uploading " + "signatures to server '%s:%d'.", + status_code, escaped(reason), conn->base_.address, + conn->base_.port); + break; + } + /* return 0 in all cases, since we don't want to mark any + * dirservers down just because they don't like us. */ + + return 0; +} + +/** + * Handler function: processes a response to a request for a v3 hidden service + * descriptor. + **/ +STATIC int +handle_response_fetch_hsdesc_v3(dir_connection_t *conn, + const response_handler_args_t *args) +{ + const int status_code = args->status_code; + const char *reason = args->reason; + const char *body = args->body; + const size_t body_len = args->body_len; + + tor_assert(conn->hs_ident); + + log_info(LD_REND,"Received v3 hsdesc (body size %d, status %d (%s))", + (int)body_len, status_code, escaped(reason)); + + switch (status_code) { + case 200: + /* We got something: Try storing it in the cache. */ + if (hs_cache_store_as_client(body, &conn->hs_ident->identity_pk) < 0) { + log_warn(LD_REND, "Failed to store hidden service descriptor"); + /* Fire control port FAILED event. */ + hs_control_desc_event_failed(conn->hs_ident, conn->identity_digest, + "BAD_DESC"); + hs_control_desc_event_content(conn->hs_ident, conn->identity_digest, + NULL); + } else { + log_info(LD_REND, "Stored hidden service descriptor successfully."); + TO_CONN(conn)->purpose = DIR_PURPOSE_HAS_FETCHED_HSDESC; + hs_client_desc_has_arrived(conn->hs_ident); + /* Fire control port RECEIVED event. */ + hs_control_desc_event_received(conn->hs_ident, conn->identity_digest); + hs_control_desc_event_content(conn->hs_ident, conn->identity_digest, + body); + } + break; + case 404: + /* Not there. We'll retry when connection_about_to_close_connection() + * tries to clean this conn up. */ + log_info(LD_REND, "Fetching hidden service v3 descriptor not found: " + "Retrying at another directory."); + /* Fire control port FAILED event. */ + hs_control_desc_event_failed(conn->hs_ident, conn->identity_digest, + "NOT_FOUND"); + hs_control_desc_event_content(conn->hs_ident, conn->identity_digest, + NULL); + break; + case 400: + log_warn(LD_REND, "Fetching v3 hidden service descriptor failed: " + "http status 400 (%s). Dirserver didn't like our " + "query? Retrying at another directory.", + escaped(reason)); + /* Fire control port FAILED event. */ + hs_control_desc_event_failed(conn->hs_ident, conn->identity_digest, + "QUERY_REJECTED"); + hs_control_desc_event_content(conn->hs_ident, conn->identity_digest, + NULL); + break; + default: + log_warn(LD_REND, "Fetching v3 hidden service descriptor failed: " + "http status %d (%s) response unexpected from HSDir server " + "'%s:%d'. Retrying at another directory.", + status_code, escaped(reason), TO_CONN(conn)->address, + TO_CONN(conn)->port); + /* Fire control port FAILED event. */ + hs_control_desc_event_failed(conn->hs_ident, conn->identity_digest, + "UNEXPECTED"); + hs_control_desc_event_content(conn->hs_ident, conn->identity_digest, + NULL); + break; + } + + return 0; +} + +/** + * Handler function: processes a response to a request for a v2 hidden service + * descriptor. + **/ +static int +handle_response_fetch_renddesc_v2(dir_connection_t *conn, + const response_handler_args_t *args) +{ + tor_assert(conn->base_.purpose == DIR_PURPOSE_FETCH_RENDDESC_V2); + const int status_code = args->status_code; + const char *reason = args->reason; + const char *body = args->body; + const size_t body_len = args->body_len; + +#define SEND_HS_DESC_FAILED_EVENT(reason) \ + (control_event_hsv2_descriptor_failed(conn->rend_data, \ + conn->identity_digest, \ + reason)) +#define SEND_HS_DESC_FAILED_CONTENT() \ + (control_event_hs_descriptor_content( \ + rend_data_get_address(conn->rend_data), \ + conn->requested_resource, \ + conn->identity_digest, \ + NULL)) + + tor_assert(conn->rend_data); + log_info(LD_REND,"Received rendezvous descriptor (body size %d, status %d " + "(%s))", + (int)body_len, status_code, escaped(reason)); + switch (status_code) { + case 200: + { + rend_cache_entry_t *entry = NULL; + + if (rend_cache_store_v2_desc_as_client(body, + conn->requested_resource, + conn->rend_data, &entry) < 0) { + log_warn(LD_REND,"Fetching v2 rendezvous descriptor failed. " + "Retrying at another directory."); + /* We'll retry when connection_about_to_close_connection() + * cleans this dir conn up. */ + SEND_HS_DESC_FAILED_EVENT("BAD_DESC"); + SEND_HS_DESC_FAILED_CONTENT(); + } else { + char service_id[REND_SERVICE_ID_LEN_BASE32 + 1]; + /* Should never be NULL here if we found the descriptor. */ + tor_assert(entry); + rend_get_service_id(entry->parsed->pk, service_id); + + /* success. notify pending connections about this. */ + log_info(LD_REND, "Successfully fetched v2 rendezvous " + "descriptor."); + control_event_hsv2_descriptor_received(service_id, + conn->rend_data, + conn->identity_digest); + control_event_hs_descriptor_content(service_id, + conn->requested_resource, + conn->identity_digest, + body); + conn->base_.purpose = DIR_PURPOSE_HAS_FETCHED_RENDDESC_V2; + rend_client_desc_trynow(service_id); + memwipe(service_id, 0, sizeof(service_id)); + } + break; + } + case 404: + /* Not there. We'll retry when + * connection_about_to_close_connection() cleans this conn up. */ + log_info(LD_REND,"Fetching v2 rendezvous descriptor failed: " + "Retrying at another directory."); + SEND_HS_DESC_FAILED_EVENT("NOT_FOUND"); + SEND_HS_DESC_FAILED_CONTENT(); + break; + case 400: + log_warn(LD_REND, "Fetching v2 rendezvous descriptor failed: " + "http status 400 (%s). Dirserver didn't like our " + "v2 rendezvous query? Retrying at another directory.", + escaped(reason)); + SEND_HS_DESC_FAILED_EVENT("QUERY_REJECTED"); + SEND_HS_DESC_FAILED_CONTENT(); + break; + default: + log_warn(LD_REND, "Fetching v2 rendezvous descriptor failed: " + "http status %d (%s) response unexpected while " + "fetching v2 hidden service descriptor (server '%s:%d'). " + "Retrying at another directory.", + status_code, escaped(reason), conn->base_.address, + conn->base_.port); + SEND_HS_DESC_FAILED_EVENT("UNEXPECTED"); + SEND_HS_DESC_FAILED_CONTENT(); + break; + } + + return 0; +} + +/** + * Handler function: processes a response to a POST request to upload a v2 + * hidden service descriptor. + **/ +static int +handle_response_upload_renddesc_v2(dir_connection_t *conn, + const response_handler_args_t *args) +{ + tor_assert(conn->base_.purpose == DIR_PURPOSE_UPLOAD_RENDDESC_V2); + const int status_code = args->status_code; + const char *reason = args->reason; + +#define SEND_HS_DESC_UPLOAD_FAILED_EVENT(reason) \ + (control_event_hs_descriptor_upload_failed( \ + conn->identity_digest, \ + rend_data_get_address(conn->rend_data), \ + reason)) + + log_info(LD_REND,"Uploaded rendezvous descriptor (status %d " + "(%s))", + status_code, escaped(reason)); + /* Without the rend data, we'll have a problem identifying what has been + * uploaded for which service. */ + tor_assert(conn->rend_data); + switch (status_code) { + case 200: + log_info(LD_REND, + "Uploading rendezvous descriptor: finished with status " + "200 (%s)", escaped(reason)); + control_event_hs_descriptor_uploaded(conn->identity_digest, + rend_data_get_address(conn->rend_data)); + rend_service_desc_has_uploaded(conn->rend_data); + break; + case 400: + log_warn(LD_REND,"http status 400 (%s) response from dirserver " + "'%s:%d'. Malformed rendezvous descriptor?", + escaped(reason), conn->base_.address, conn->base_.port); + SEND_HS_DESC_UPLOAD_FAILED_EVENT("UPLOAD_REJECTED"); + break; + default: + log_warn(LD_REND,"http status %d (%s) response unexpected (server " + "'%s:%d').", + status_code, escaped(reason), conn->base_.address, + conn->base_.port); + SEND_HS_DESC_UPLOAD_FAILED_EVENT("UNEXPECTED"); + break; + } + + return 0; +} + +/** + * Handler function: processes a response to a POST request to upload an + * hidden service descriptor. + **/ +static int +handle_response_upload_hsdesc(dir_connection_t *conn, + const response_handler_args_t *args) +{ + const int status_code = args->status_code; + const char *reason = args->reason; + + tor_assert(conn); + tor_assert(conn->base_.purpose == DIR_PURPOSE_UPLOAD_HSDESC); + + log_info(LD_REND, "Uploaded hidden service descriptor (status %d " + "(%s))", + status_code, escaped(reason)); + /* For this directory response, it MUST have an hidden service identifier on + * this connection. */ + tor_assert(conn->hs_ident); + switch (status_code) { + case 200: + log_info(LD_REND, "Uploading hidden service descriptor: " + "finished with status 200 (%s)", escaped(reason)); + hs_control_desc_event_uploaded(conn->hs_ident, conn->identity_digest); + break; + case 400: + log_fn(LOG_PROTOCOL_WARN, LD_REND, + "Uploading hidden service descriptor: http " + "status 400 (%s) response from dirserver " + "'%s:%d'. Malformed hidden service descriptor?", + escaped(reason), conn->base_.address, conn->base_.port); + hs_control_desc_event_failed(conn->hs_ident, conn->identity_digest, + "UPLOAD_REJECTED"); + break; + default: + log_warn(LD_REND, "Uploading hidden service descriptor: http " + "status %d (%s) response unexpected (server " + "'%s:%d').", + status_code, escaped(reason), conn->base_.address, + conn->base_.port); + hs_control_desc_event_failed(conn->hs_ident, conn->identity_digest, + "UNEXPECTED"); + break; + } + + return 0; +} + +/** Called when a directory connection reaches EOF. */ +int +connection_dir_reached_eof(dir_connection_t *conn) +{ + int retval; + if (conn->base_.state != DIR_CONN_STATE_CLIENT_READING) { + log_info(LD_HTTP,"conn reached eof, not reading. [state=%d] Closing.", + conn->base_.state); + connection_close_immediate(TO_CONN(conn)); /* error: give up on flushing */ + connection_mark_for_close(TO_CONN(conn)); + return -1; + } + + retval = connection_dir_client_reached_eof(conn); + if (retval == 0) /* success */ + conn->base_.state = DIR_CONN_STATE_CLIENT_FINISHED; + connection_mark_for_close(TO_CONN(conn)); + return retval; +} + +/** If any directory object is arriving, and it's over 10MB large, we're + * getting DoS'd. (As of 0.1.2.x, raw directories are about 1MB, and we never + * ask for more than 96 router descriptors at a time.) + */ +#define MAX_DIRECTORY_OBJECT_SIZE (10*(1<<20)) + +#define MAX_VOTE_DL_SIZE (MAX_DIRECTORY_OBJECT_SIZE * 5) + +/** Read handler for directory connections. (That's connections <em>to</em> + * directory servers and connections <em>at</em> directory servers.) + */ +int +connection_dir_process_inbuf(dir_connection_t *conn) +{ + size_t max_size; + tor_assert(conn); + tor_assert(conn->base_.type == CONN_TYPE_DIR); + + /* Directory clients write, then read data until they receive EOF; + * directory servers read data until they get an HTTP command, then + * write their response (when it's finished flushing, they mark for + * close). + */ + + /* If we're on the dirserver side, look for a command. */ + if (conn->base_.state == DIR_CONN_STATE_SERVER_COMMAND_WAIT) { + if (directory_handle_command(conn) < 0) { + connection_mark_for_close(TO_CONN(conn)); + return -1; + } + return 0; + } + + max_size = + (TO_CONN(conn)->purpose == DIR_PURPOSE_FETCH_STATUS_VOTE) ? + MAX_VOTE_DL_SIZE : MAX_DIRECTORY_OBJECT_SIZE; + + if (connection_get_inbuf_len(TO_CONN(conn)) > max_size) { + log_warn(LD_HTTP, + "Too much data received from directory connection (%s): " + "denial of service attempt, or you need to upgrade?", + conn->base_.address); + connection_mark_for_close(TO_CONN(conn)); + return -1; + } + + if (!conn->base_.inbuf_reached_eof) + log_debug(LD_HTTP,"Got data, not eof. Leaving on inbuf."); + return 0; +} + +/** We are closing a dir connection: If <b>dir_conn</b> is a dir connection + * that tried to fetch an HS descriptor, check if it successfully fetched it, + * or if we need to try again. */ +static void +refetch_hsdesc_if_needed(dir_connection_t *dir_conn) +{ + connection_t *conn = TO_CONN(dir_conn); + + /* If we were trying to fetch a v2 rend desc and did not succeed, retry as + * needed. (If a fetch is successful, the connection state is changed to + * DIR_PURPOSE_HAS_FETCHED_RENDDESC_V2 or DIR_PURPOSE_HAS_FETCHED_HSDESC to + * mark that refetching is unnecessary.) */ + if (conn->purpose == DIR_PURPOSE_FETCH_RENDDESC_V2 && + dir_conn->rend_data && + rend_valid_v2_service_id( + rend_data_get_address(dir_conn->rend_data))) { + rend_client_refetch_v2_renddesc(dir_conn->rend_data); + } + + /* Check for v3 rend desc fetch */ + if (conn->purpose == DIR_PURPOSE_FETCH_HSDESC && + dir_conn->hs_ident && + !ed25519_public_key_is_zero(&dir_conn->hs_ident->identity_pk)) { + hs_client_refetch_hsdesc(&dir_conn->hs_ident->identity_pk); + } +} + +/** Called when we're about to finally unlink and free a directory connection: + * perform necessary accounting and cleanup */ +void +connection_dir_about_to_close(dir_connection_t *dir_conn) +{ + connection_t *conn = TO_CONN(dir_conn); + + if (conn->state < DIR_CONN_STATE_CLIENT_FINISHED) { + /* It's a directory connection and connecting or fetching + * failed: forget about this router, and maybe try again. */ + connection_dir_request_failed(dir_conn); + } + + refetch_hsdesc_if_needed(dir_conn); +} + +/** Create an http response for the client <b>conn</b> out of + * <b>status</b> and <b>reason_phrase</b>. Write it to <b>conn</b>. + */ +static void +write_short_http_response(dir_connection_t *conn, int status, + const char *reason_phrase) +{ + char *buf = NULL; + char *datestring = NULL; + + IF_BUG_ONCE(!reason_phrase) { /* bullet-proofing */ + reason_phrase = "unspecified"; + } + + if (server_mode(get_options())) { + /* include the Date: header, but only if we're a relay or bridge */ + char datebuf[RFC1123_TIME_LEN+1]; + format_rfc1123_time(datebuf, time(NULL)); + tor_asprintf(&datestring, "Date: %s\r\n", datebuf); + } + + tor_asprintf(&buf, "HTTP/1.0 %d %s\r\n%s\r\n", + status, reason_phrase, datestring?datestring:""); + + log_debug(LD_DIRSERV,"Wrote status 'HTTP/1.0 %d %s'", status, reason_phrase); + connection_buf_add(buf, strlen(buf), TO_CONN(conn)); + + tor_free(datestring); + tor_free(buf); +} + +/** Write the header for an HTTP/1.0 response onto <b>conn</b>-\>outbuf, + * with <b>type</b> as the Content-Type. + * + * If <b>length</b> is nonnegative, it is the Content-Length. + * If <b>encoding</b> is provided, it is the Content-Encoding. + * If <b>cache_lifetime</b> is greater than 0, the content may be cached for + * up to cache_lifetime seconds. Otherwise, the content may not be cached. */ +static void +write_http_response_header_impl(dir_connection_t *conn, ssize_t length, + const char *type, const char *encoding, + const char *extra_headers, + long cache_lifetime) +{ + char date[RFC1123_TIME_LEN+1]; + time_t now = time(NULL); + buf_t *buf = buf_new_with_capacity(1024); + + tor_assert(conn); + + format_rfc1123_time(date, now); + + buf_add_printf(buf, "HTTP/1.0 200 OK\r\nDate: %s\r\n", date); + if (type) { + buf_add_printf(buf, "Content-Type: %s\r\n", type); + } + if (!is_local_addr(&conn->base_.addr)) { + /* Don't report the source address for a nearby/private connection. + * Otherwise we tend to mis-report in cases where incoming ports are + * being forwarded to a Tor server running behind the firewall. */ + buf_add_printf(buf, X_ADDRESS_HEADER "%s\r\n", conn->base_.address); + } + if (encoding) { + buf_add_printf(buf, "Content-Encoding: %s\r\n", encoding); + } + if (length >= 0) { + buf_add_printf(buf, "Content-Length: %ld\r\n", (long)length); + } + if (cache_lifetime > 0) { + char expbuf[RFC1123_TIME_LEN+1]; + format_rfc1123_time(expbuf, (time_t)(now + cache_lifetime)); + /* We could say 'Cache-control: max-age=%d' here if we start doing + * http/1.1 */ + buf_add_printf(buf, "Expires: %s\r\n", expbuf); + } else if (cache_lifetime == 0) { + /* We could say 'Cache-control: no-cache' here if we start doing + * http/1.1 */ + buf_add_string(buf, "Pragma: no-cache\r\n"); + } + if (extra_headers) { + buf_add_string(buf, extra_headers); + } + buf_add_string(buf, "\r\n"); + + connection_buf_add_buf(TO_CONN(conn), buf); + buf_free(buf); +} + +/** As write_http_response_header_impl, but sets encoding and content-typed + * based on whether the response will be <b>compressed</b> or not. */ +static void +write_http_response_headers(dir_connection_t *conn, ssize_t length, + compress_method_t method, + const char *extra_headers, long cache_lifetime) +{ + const char *methodname = compression_method_get_name(method); + const char *doctype; + if (method == NO_METHOD) + doctype = "text/plain"; + else + doctype = "application/octet-stream"; + write_http_response_header_impl(conn, length, + doctype, + methodname, + extra_headers, + cache_lifetime); +} + +/** As write_http_response_headers, but assumes extra_headers is NULL */ +static void +write_http_response_header(dir_connection_t *conn, ssize_t length, + compress_method_t method, + long cache_lifetime) +{ + write_http_response_headers(conn, length, method, NULL, cache_lifetime); +} + +/** Array of compression methods to use (if supported) for serving + * precompressed data, ordered from best to worst. */ +static compress_method_t srv_meth_pref_precompressed[] = { + LZMA_METHOD, + ZSTD_METHOD, + ZLIB_METHOD, + GZIP_METHOD, + NO_METHOD +}; + +/** Array of compression methods to use (if supported) for serving + * streamed data, ordered from best to worst. */ +static compress_method_t srv_meth_pref_streaming_compression[] = { + ZSTD_METHOD, + ZLIB_METHOD, + GZIP_METHOD, + NO_METHOD +}; + +/** Array of allowed compression methods to use (if supported) when receiving a + * response from a request that was required to be anonymous. */ +static compress_method_t client_meth_allowed_anonymous_compression[] = { + ZLIB_METHOD, + GZIP_METHOD, + NO_METHOD +}; + +/** Parse the compression methods listed in an Accept-Encoding header <b>h</b>, + * and convert them to a bitfield where compression method x is supported if + * and only if 1 << x is set in the bitfield. */ +STATIC unsigned +parse_accept_encoding_header(const char *h) +{ + unsigned result = (1u << NO_METHOD); + smartlist_t *methods = smartlist_new(); + smartlist_split_string(methods, h, ",", + SPLIT_SKIP_SPACE|SPLIT_STRIP_SPACE|SPLIT_IGNORE_BLANK, 0); + + SMARTLIST_FOREACH_BEGIN(methods, const char *, m) { + compress_method_t method = compression_method_get_by_name(m); + if (method != UNKNOWN_METHOD) { + tor_assert(((unsigned)method) < 8*sizeof(unsigned)); + result |= (1u << method); + } + } SMARTLIST_FOREACH_END(m); + SMARTLIST_FOREACH_BEGIN(methods, char *, m) { + tor_free(m); + } SMARTLIST_FOREACH_END(m); + smartlist_free(methods); + return result; +} + +/** Array of compression methods to use (if supported) for requesting + * compressed data, ordered from best to worst. */ +static compress_method_t client_meth_pref[] = { + LZMA_METHOD, + ZSTD_METHOD, + ZLIB_METHOD, + GZIP_METHOD, + NO_METHOD +}; + +/** Return a newly allocated string containing a comma separated list of + * supported encodings. */ +STATIC char * +accept_encoding_header(void) +{ + smartlist_t *methods = smartlist_new(); + char *header = NULL; + compress_method_t method; + unsigned i; + + for (i = 0; i < ARRAY_LENGTH(client_meth_pref); ++i) { + method = client_meth_pref[i]; + if (tor_compress_supports_method(method)) + smartlist_add(methods, (char *)compression_method_get_name(method)); + } + + header = smartlist_join_strings(methods, ", ", 0, NULL); + smartlist_free(methods); + + return header; +} + +/** Decide whether a client would accept the consensus we have. + * + * Clients can say they only want a consensus if it's signed by more + * than half the authorities in a list. They pass this list in + * the url as "...consensus/<b>fpr</b>+<b>fpr</b>+<b>fpr</b>". + * + * <b>fpr</b> may be an abbreviated fingerprint, i.e. only a left substring + * of the full authority identity digest. (Only strings of even length, + * i.e. encodings of full bytes, are handled correctly. In the case + * of an odd number of hex digits the last one is silently ignored.) + * + * Returns 1 if more than half of the requested authorities signed the + * consensus, 0 otherwise. + */ +int +client_likes_consensus(const struct consensus_cache_entry_t *ent, + const char *want_url) +{ + smartlist_t *voters = smartlist_new(); + int need_at_least; + int have = 0; + + if (consensus_cache_entry_get_voter_id_digests(ent, voters) != 0) { + smartlist_free(voters); + return 1; // We don't know the voters; assume the client won't mind. */ + } + + smartlist_t *want_authorities = smartlist_new(); + dir_split_resource_into_fingerprints(want_url, want_authorities, NULL, 0); + need_at_least = smartlist_len(want_authorities)/2+1; + + SMARTLIST_FOREACH_BEGIN(want_authorities, const char *, want_digest) { + + SMARTLIST_FOREACH_BEGIN(voters, const char *, digest) { + if (!strcasecmpstart(digest, want_digest)) { + have++; + break; + }; + } SMARTLIST_FOREACH_END(digest); + + /* early exit, if we already have enough */ + if (have >= need_at_least) + break; + } SMARTLIST_FOREACH_END(want_digest); + + SMARTLIST_FOREACH(want_authorities, char *, d, tor_free(d)); + smartlist_free(want_authorities); + SMARTLIST_FOREACH(voters, char *, cp, tor_free(cp)); + smartlist_free(voters); + return (have >= need_at_least); +} + +/** Return the compression level we should use for sending a compressed + * response of size <b>n_bytes</b>. */ +STATIC compression_level_t +choose_compression_level(ssize_t n_bytes) +{ + if (! have_been_under_memory_pressure()) { + return HIGH_COMPRESSION; /* we have plenty of RAM. */ + } else if (n_bytes < 0) { + return HIGH_COMPRESSION; /* unknown; might be big. */ + } else if (n_bytes < 1024) { + return LOW_COMPRESSION; + } else if (n_bytes < 2048) { + return MEDIUM_COMPRESSION; + } else { + return HIGH_COMPRESSION; + } +} + +/** Information passed to handle a GET request. */ +typedef struct get_handler_args_t { + /** Bitmask of compression methods that the client said (or implied) it + * supported. */ + unsigned compression_supported; + /** If nonzero, the time included an if-modified-since header with this + * value. */ + time_t if_modified_since; + /** String containing the requested URL or resource. */ + const char *url; + /** String containing the HTTP headers */ + const char *headers; +} get_handler_args_t; + +/** Entry for handling an HTTP GET request. + * + * This entry matches a request if "string" is equal to the requested + * resource, or if "is_prefix" is true and "string" is a prefix of the + * requested resource. + * + * The 'handler' function is called to handle the request. It receives + * an arguments structure, and must return 0 on success or -1 if we should + * close the connection. + **/ +typedef struct url_table_ent_s { + const char *string; + int is_prefix; + int (*handler)(dir_connection_t *conn, const get_handler_args_t *args); +} url_table_ent_t; + +static int handle_get_frontpage(dir_connection_t *conn, + const get_handler_args_t *args); +static int handle_get_current_consensus(dir_connection_t *conn, + const get_handler_args_t *args); +static int handle_get_status_vote(dir_connection_t *conn, + const get_handler_args_t *args); +static int handle_get_microdesc(dir_connection_t *conn, + const get_handler_args_t *args); +static int handle_get_descriptor(dir_connection_t *conn, + const get_handler_args_t *args); +static int handle_get_keys(dir_connection_t *conn, + const get_handler_args_t *args); +static int handle_get_hs_descriptor_v2(dir_connection_t *conn, + const get_handler_args_t *args); +static int handle_get_robots(dir_connection_t *conn, + const get_handler_args_t *args); +static int handle_get_networkstatus_bridges(dir_connection_t *conn, + const get_handler_args_t *args); + +/** Table for handling GET requests. */ +static const url_table_ent_t url_table[] = { + { "/tor/", 0, handle_get_frontpage }, + { "/tor/status-vote/current/consensus", 1, handle_get_current_consensus }, + { "/tor/status-vote/current/", 1, handle_get_status_vote }, + { "/tor/status-vote/next/", 1, handle_get_status_vote }, + { "/tor/micro/d/", 1, handle_get_microdesc }, + { "/tor/server/", 1, handle_get_descriptor }, + { "/tor/extra/", 1, handle_get_descriptor }, + { "/tor/keys/", 1, handle_get_keys }, + { "/tor/rendezvous2/", 1, handle_get_hs_descriptor_v2 }, + { "/tor/hs/3/", 1, handle_get_hs_descriptor_v3 }, + { "/tor/robots.txt", 0, handle_get_robots }, + { "/tor/networkstatus-bridges", 0, handle_get_networkstatus_bridges }, + { NULL, 0, NULL }, +}; + +/** Helper function: called when a dirserver gets a complete HTTP GET + * request. Look for a request for a directory or for a rendezvous + * service descriptor. On finding one, write a response into + * conn-\>outbuf. If the request is unrecognized, send a 404. + * Return 0 if we handled this successfully, or -1 if we need to close + * the connection. */ +MOCK_IMPL(STATIC int, +directory_handle_command_get,(dir_connection_t *conn, const char *headers, + const char *req_body, size_t req_body_len)) +{ + char *url, *url_mem, *header; + time_t if_modified_since = 0; + int zlib_compressed_in_url; + unsigned compression_methods_supported; + + /* We ignore the body of a GET request. */ + (void)req_body; + (void)req_body_len; + + log_debug(LD_DIRSERV,"Received GET command."); + + conn->base_.state = DIR_CONN_STATE_SERVER_WRITING; + + if (parse_http_url(headers, &url) < 0) { + write_short_http_response(conn, 400, "Bad request"); + return 0; + } + if ((header = http_get_header(headers, "If-Modified-Since: "))) { + struct tm tm; + if (parse_http_time(header, &tm) == 0) { + if (tor_timegm(&tm, &if_modified_since)<0) { + if_modified_since = 0; + } else { + log_debug(LD_DIRSERV, "If-Modified-Since is '%s'.", escaped(header)); + } + } + /* The correct behavior on a malformed If-Modified-Since header is to + * act as if no If-Modified-Since header had been given. */ + tor_free(header); + } + log_debug(LD_DIRSERV,"rewritten url as '%s'.", escaped(url)); + + url_mem = url; + { + size_t url_len = strlen(url); + + zlib_compressed_in_url = url_len > 2 && !strcmp(url+url_len-2, ".z"); + if (zlib_compressed_in_url) { + url[url_len-2] = '\0'; + } + } + + if ((header = http_get_header(headers, "Accept-Encoding: "))) { + compression_methods_supported = parse_accept_encoding_header(header); + tor_free(header); + } else { + compression_methods_supported = (1u << NO_METHOD); + } + if (zlib_compressed_in_url) { + compression_methods_supported |= (1u << ZLIB_METHOD); + } + + /* Remove all methods that we don't both support. */ + compression_methods_supported &= tor_compress_get_supported_method_bitmask(); + + get_handler_args_t args; + args.url = url; + args.headers = headers; + args.if_modified_since = if_modified_since; + args.compression_supported = compression_methods_supported; + + int i, result = -1; + for (i = 0; url_table[i].string; ++i) { + int match; + if (url_table[i].is_prefix) { + match = !strcmpstart(url, url_table[i].string); + } else { + match = !strcmp(url, url_table[i].string); + } + if (match) { + result = url_table[i].handler(conn, &args); + goto done; + } + } + + /* we didn't recognize the url */ + write_short_http_response(conn, 404, "Not found"); + result = 0; + + done: + tor_free(url_mem); + return result; +} + +/** Helper function for GET / or GET /tor/ + */ +static int +handle_get_frontpage(dir_connection_t *conn, const get_handler_args_t *args) +{ + (void) args; /* unused */ + const char *frontpage = get_dirportfrontpage(); + + if (frontpage) { + size_t dlen; + dlen = strlen(frontpage); + /* Let's return a disclaimer page (users shouldn't use V1 anymore, + and caches don't fetch '/', so this is safe). */ + + /* [We don't check for write_bucket_low here, since we want to serve + * this page no matter what.] */ + write_http_response_header_impl(conn, dlen, "text/html", "identity", + NULL, DIRPORTFRONTPAGE_CACHE_LIFETIME); + connection_buf_add(frontpage, dlen, TO_CONN(conn)); + } else { + write_short_http_response(conn, 404, "Not found"); + } + return 0; +} + +/** Warn that the cached consensus <b>consensus</b> of type + * <b>flavor</b> is too old and will not be served to clients. Rate-limit the + * warning to avoid logging an entry on every request. + */ +static void +warn_consensus_is_too_old(const struct consensus_cache_entry_t *consensus, + const char *flavor, time_t now) +{ +#define TOO_OLD_WARNING_INTERVAL (60*60) + static ratelim_t warned = RATELIM_INIT(TOO_OLD_WARNING_INTERVAL); + char timestamp[ISO_TIME_LEN+1]; + time_t valid_until; + char *dupes; + + if (consensus_cache_entry_get_valid_until(consensus, &valid_until)) + return; + + if ((dupes = rate_limit_log(&warned, now))) { + format_local_iso_time(timestamp, valid_until); + log_warn(LD_DIRSERV, "Our %s%sconsensus is too old, so we will not " + "serve it to clients. It was valid until %s local time and we " + "continued to serve it for up to 24 hours after it expired.%s", + flavor ? flavor : "", flavor ? " " : "", timestamp, dupes); + tor_free(dupes); + } +} + +/** + * Parse a single hex-encoded sha3-256 digest from <b>hex</b> into + * <b>digest</b>. Return 0 on success. On failure, report that the hash came + * from <b>location</b>, report that we are taking <b>action</b> with it, and + * return -1. + */ +static int +parse_one_diff_hash(uint8_t *digest, const char *hex, const char *location, + const char *action) +{ + if (base16_decode((char*)digest, DIGEST256_LEN, hex, strlen(hex)) == + DIGEST256_LEN) { + return 0; + } else { + log_fn(LOG_PROTOCOL_WARN, LD_DIR, + "%s contained bogus digest %s; %s.", + location, escaped(hex), action); + return -1; + } +} + +/** If there is an X-Or-Diff-From-Consensus header included in <b>headers</b>, + * set <b>digest_out<b> to a new smartlist containing every 256-bit + * hex-encoded digest listed in that header and return 0. Otherwise return + * -1. */ +static int +parse_or_diff_from_header(smartlist_t **digests_out, const char *headers) +{ + char *hdr = http_get_header(headers, X_OR_DIFF_FROM_CONSENSUS_HEADER); + if (hdr == NULL) { + return -1; + } + smartlist_t *hex_digests = smartlist_new(); + *digests_out = smartlist_new(); + smartlist_split_string(hex_digests, hdr, " ", + SPLIT_SKIP_SPACE|SPLIT_IGNORE_BLANK, -1); + SMARTLIST_FOREACH_BEGIN(hex_digests, const char *, hex) { + uint8_t digest[DIGEST256_LEN]; + if (!parse_one_diff_hash(digest, hex, "X-Or-Diff-From-Consensus header", + "ignoring")) { + smartlist_add(*digests_out, tor_memdup(digest, sizeof(digest))); + } + } SMARTLIST_FOREACH_END(hex); + SMARTLIST_FOREACH(hex_digests, char *, cp, tor_free(cp)); + smartlist_free(hex_digests); + tor_free(hdr); + return 0; +} + +/** Fallback compression method. The fallback compression method is used in + * case a client requests a non-compressed document. We only store compressed + * documents, so we use this compression method to fetch the document and let + * the spooling system do the streaming decompression. + */ +#define FALLBACK_COMPRESS_METHOD ZLIB_METHOD + +/** + * Try to find the best consensus diff possible in order to serve a client + * request for a diff from one of the consensuses in <b>digests</b> to the + * current consensus of flavor <b>flav</b>. The client supports the + * compression methods listed in the <b>compression_methods</b> bitfield: + * place the method chosen (if any) into <b>compression_used_out</b>. + */ +static struct consensus_cache_entry_t * +find_best_diff(const smartlist_t *digests, int flav, + unsigned compression_methods, + compress_method_t *compression_used_out) +{ + struct consensus_cache_entry_t *result = NULL; + + SMARTLIST_FOREACH_BEGIN(digests, const uint8_t *, diff_from) { + unsigned u; + for (u = 0; u < ARRAY_LENGTH(srv_meth_pref_precompressed); ++u) { + compress_method_t method = srv_meth_pref_precompressed[u]; + if (0 == (compression_methods & (1u<<method))) + continue; // client doesn't like this one, or we don't have it. + if (consdiffmgr_find_diff_from(&result, flav, DIGEST_SHA3_256, + diff_from, DIGEST256_LEN, + method) == CONSDIFF_AVAILABLE) { + tor_assert_nonfatal(result); + *compression_used_out = method; + return result; + } + } + } SMARTLIST_FOREACH_END(diff_from); + + SMARTLIST_FOREACH_BEGIN(digests, const uint8_t *, diff_from) { + if (consdiffmgr_find_diff_from(&result, flav, DIGEST_SHA3_256, diff_from, + DIGEST256_LEN, FALLBACK_COMPRESS_METHOD) == CONSDIFF_AVAILABLE) { + tor_assert_nonfatal(result); + *compression_used_out = FALLBACK_COMPRESS_METHOD; + return result; + } + } SMARTLIST_FOREACH_END(diff_from); + + return NULL; +} + +/** Lookup the cached consensus document by the flavor found in <b>flav</b>. + * The preferred set of compression methods should be listed in the + * <b>compression_methods</b> bitfield. The compression method chosen (if any) + * is stored in <b>compression_used_out</b>. */ +static struct consensus_cache_entry_t * +find_best_consensus(int flav, + unsigned compression_methods, + compress_method_t *compression_used_out) +{ + struct consensus_cache_entry_t *result = NULL; + unsigned u; + + for (u = 0; u < ARRAY_LENGTH(srv_meth_pref_precompressed); ++u) { + compress_method_t method = srv_meth_pref_precompressed[u]; + + if (0 == (compression_methods & (1u<<method))) + continue; + + if (consdiffmgr_find_consensus(&result, flav, + method) == CONSDIFF_AVAILABLE) { + tor_assert_nonfatal(result); + *compression_used_out = method; + return result; + } + } + + if (consdiffmgr_find_consensus(&result, flav, + FALLBACK_COMPRESS_METHOD) == CONSDIFF_AVAILABLE) { + tor_assert_nonfatal(result); + *compression_used_out = FALLBACK_COMPRESS_METHOD; + return result; + } + + return NULL; +} + +/** Try to find the best supported compression method possible from a given + * <b>compression_methods</b>. Return NO_METHOD if no mutually supported + * compression method could be found. */ +static compress_method_t +find_best_compression_method(unsigned compression_methods, int stream) +{ + unsigned u; + compress_method_t *methods; + size_t length; + + if (stream) { + methods = srv_meth_pref_streaming_compression; + length = ARRAY_LENGTH(srv_meth_pref_streaming_compression); + } else { + methods = srv_meth_pref_precompressed; + length = ARRAY_LENGTH(srv_meth_pref_precompressed); + } + + for (u = 0; u < length; ++u) { + compress_method_t method = methods[u]; + if (compression_methods & (1u<<method)) + return method; + } + + return NO_METHOD; +} + +/** Check if any of the digests in <b>digests</b> matches the latest consensus + * flavor (given in <b>flavor</b>) that we have available. */ +static int +digest_list_contains_best_consensus(consensus_flavor_t flavor, + const smartlist_t *digests) +{ + const networkstatus_t *ns = NULL; + + if (digests == NULL) + return 0; + + ns = networkstatus_get_latest_consensus_by_flavor(flavor); + + if (ns == NULL) + return 0; + + SMARTLIST_FOREACH_BEGIN(digests, const uint8_t *, digest) { + if (tor_memeq(ns->digest_sha3_as_signed, digest, DIGEST256_LEN)) + return 1; + } SMARTLIST_FOREACH_END(digest); + + return 0; +} + +/** Check if the given compression method is allowed for a connection that is + * supposed to be anonymous. Returns 1 if the compression method is allowed, + * otherwise 0. */ +STATIC int +allowed_anonymous_connection_compression_method(compress_method_t method) +{ + unsigned u; + + for (u = 0; u < ARRAY_LENGTH(client_meth_allowed_anonymous_compression); + ++u) { + compress_method_t allowed_method = + client_meth_allowed_anonymous_compression[u]; + + if (! tor_compress_supports_method(allowed_method)) + continue; + + if (method == allowed_method) + return 1; + } + + return 0; +} + +/** Log a warning when a remote server has sent us a document using a + * compression method that is not allowed for anonymous directory requests. */ +STATIC void +warn_disallowed_anonymous_compression_method(compress_method_t method) +{ + log_fn(LOG_PROTOCOL_WARN, LD_HTTP, + "Received a %s HTTP response, which is not " + "allowed for anonymous directory requests.", + compression_method_get_human_name(method)); +} + +/** Encodes the results of parsing a consensus request to figure out what + * consensus, and possibly what diffs, the user asked for. */ +typedef struct { + /** name of the flavor to retrieve. */ + char *flavor; + /** flavor to retrive, as enum. */ + consensus_flavor_t flav; + /** plus-separated list of authority fingerprints; see + * client_likes_consensus(). Aliases the URL in the request passed to + * parse_consensus_request(). */ + const char *want_fps; + /** Optionally, a smartlist of sha3 digests-as-signed of the consensuses + * to return a diff from. */ + smartlist_t *diff_from_digests; + /** If true, never send a full consensus. If there is no diff, send + * a 404 instead. */ + int diff_only; +} parsed_consensus_request_t; + +/** Remove all data held in <b>req</b>. Do not free <b>req</b> itself, since + * it is stack-allocated. */ +static void +parsed_consensus_request_clear(parsed_consensus_request_t *req) +{ + if (!req) + return; + tor_free(req->flavor); + if (req->diff_from_digests) { + SMARTLIST_FOREACH(req->diff_from_digests, uint8_t *, d, tor_free(d)); + smartlist_free(req->diff_from_digests); + } + memset(req, 0, sizeof(parsed_consensus_request_t)); +} + +/** + * Parse the URL and relevant headers of <b>args</b> for a current-consensus + * request to learn what flavor of consensus we want, what keys it must be + * signed with, and what diffs we would accept (or demand) instead. Return 0 + * on success and -1 on failure. + */ +static int +parse_consensus_request(parsed_consensus_request_t *out, + const get_handler_args_t *args) +{ + const char *url = args->url; + memset(out, 0, sizeof(parsed_consensus_request_t)); + out->flav = FLAV_NS; + + const char CONSENSUS_URL_PREFIX[] = "/tor/status-vote/current/consensus/"; + const char CONSENSUS_FLAVORED_PREFIX[] = + "/tor/status-vote/current/consensus-"; + + /* figure out the flavor if any, and who we wanted to sign the thing */ + const char *after_flavor = NULL; + + if (!strcmpstart(url, CONSENSUS_FLAVORED_PREFIX)) { + const char *f, *cp; + f = url + strlen(CONSENSUS_FLAVORED_PREFIX); + cp = strchr(f, '/'); + if (cp) { + after_flavor = cp+1; + out->flavor = tor_strndup(f, cp-f); + } else { + out->flavor = tor_strdup(f); + } + int flav = networkstatus_parse_flavor_name(out->flavor); + if (flav < 0) + flav = FLAV_NS; + out->flav = flav; + } else { + if (!strcmpstart(url, CONSENSUS_URL_PREFIX)) + after_flavor = url+strlen(CONSENSUS_URL_PREFIX); + } + + /* see whether we've been asked explicitly for a diff from an older + * consensus. (The user might also have said that a diff would be okay, + * via X-Or-Diff-From-Consensus */ + const char DIFF_COMPONENT[] = "diff/"; + char *diff_hash_in_url = NULL; + if (after_flavor && !strcmpstart(after_flavor, DIFF_COMPONENT)) { + after_flavor += strlen(DIFF_COMPONENT); + const char *cp = strchr(after_flavor, '/'); + if (cp) { + diff_hash_in_url = tor_strndup(after_flavor, cp-after_flavor); + out->want_fps = cp+1; + } else { + diff_hash_in_url = tor_strdup(after_flavor); + out->want_fps = NULL; + } + } else { + out->want_fps = after_flavor; + } + + if (diff_hash_in_url) { + uint8_t diff_from[DIGEST256_LEN]; + out->diff_from_digests = smartlist_new(); + out->diff_only = 1; + int ok = !parse_one_diff_hash(diff_from, diff_hash_in_url, "URL", + "rejecting"); + tor_free(diff_hash_in_url); + if (ok) { + smartlist_add(out->diff_from_digests, + tor_memdup(diff_from, DIGEST256_LEN)); + } else { + return -1; + } + } else { + parse_or_diff_from_header(&out->diff_from_digests, args->headers); + } + + return 0; +} + +/** Helper function for GET /tor/status-vote/current/consensus + */ +static int +handle_get_current_consensus(dir_connection_t *conn, + const get_handler_args_t *args) +{ + const compress_method_t compress_method = + find_best_compression_method(args->compression_supported, 0); + const time_t if_modified_since = args->if_modified_since; + int clear_spool = 0; + + /* v3 network status fetch. */ + long lifetime = NETWORKSTATUS_CACHE_LIFETIME; + + time_t now = time(NULL); + parsed_consensus_request_t req; + + if (parse_consensus_request(&req, args) < 0) { + write_short_http_response(conn, 404, "Couldn't parse request"); + goto done; + } + + if (digest_list_contains_best_consensus(req.flav, + req.diff_from_digests)) { + write_short_http_response(conn, 304, "Not modified"); + geoip_note_ns_response(GEOIP_REJECT_NOT_MODIFIED); + goto done; + } + + struct consensus_cache_entry_t *cached_consensus = NULL; + + compress_method_t compression_used = NO_METHOD; + if (req.diff_from_digests) { + cached_consensus = find_best_diff(req.diff_from_digests, req.flav, + args->compression_supported, + &compression_used); + } + + if (req.diff_only && !cached_consensus) { + write_short_http_response(conn, 404, "No such diff available"); + // XXXX warn_consensus_is_too_old(v, req.flavor, now); + geoip_note_ns_response(GEOIP_REJECT_NOT_FOUND); + goto done; + } + + if (! cached_consensus) { + cached_consensus = find_best_consensus(req.flav, + args->compression_supported, + &compression_used); + } + + time_t fresh_until, valid_until; + int have_fresh_until = 0, have_valid_until = 0; + if (cached_consensus) { + have_fresh_until = + !consensus_cache_entry_get_fresh_until(cached_consensus, &fresh_until); + have_valid_until = + !consensus_cache_entry_get_valid_until(cached_consensus, &valid_until); + } + + if (cached_consensus && have_valid_until && + !networkstatus_valid_until_is_reasonably_live(valid_until, now)) { + write_short_http_response(conn, 404, "Consensus is too old"); + warn_consensus_is_too_old(cached_consensus, req.flavor, now); + geoip_note_ns_response(GEOIP_REJECT_NOT_FOUND); + goto done; + } + + if (cached_consensus && req.want_fps && + !client_likes_consensus(cached_consensus, req.want_fps)) { + write_short_http_response(conn, 404, "Consensus not signed by sufficient " + "number of requested authorities"); + geoip_note_ns_response(GEOIP_REJECT_NOT_ENOUGH_SIGS); + goto done; + } + + conn->spool = smartlist_new(); + clear_spool = 1; + { + spooled_resource_t *spooled; + if (cached_consensus) { + spooled = spooled_resource_new_from_cache_entry(cached_consensus); + smartlist_add(conn->spool, spooled); + } + } + + lifetime = (have_fresh_until && fresh_until > now) ? fresh_until - now : 0; + + size_t size_guess = 0; + int n_expired = 0; + dirserv_spool_remove_missing_and_guess_size(conn, if_modified_since, + compress_method != NO_METHOD, + &size_guess, + &n_expired); + + if (!smartlist_len(conn->spool) && !n_expired) { + write_short_http_response(conn, 404, "Not found"); + geoip_note_ns_response(GEOIP_REJECT_NOT_FOUND); + goto done; + } else if (!smartlist_len(conn->spool)) { + write_short_http_response(conn, 304, "Not modified"); + geoip_note_ns_response(GEOIP_REJECT_NOT_MODIFIED); + goto done; + } + + if (global_write_bucket_low(TO_CONN(conn), size_guess, 2)) { + log_debug(LD_DIRSERV, + "Client asked for network status lists, but we've been " + "writing too many bytes lately. Sending 503 Dir busy."); + write_short_http_response(conn, 503, "Directory busy, try again later"); + geoip_note_ns_response(GEOIP_REJECT_BUSY); + goto done; + } + + tor_addr_t addr; + if (tor_addr_parse(&addr, (TO_CONN(conn))->address) >= 0) { + geoip_note_client_seen(GEOIP_CLIENT_NETWORKSTATUS, + &addr, NULL, + time(NULL)); + geoip_note_ns_response(GEOIP_SUCCESS); + /* Note that a request for a network status has started, so that we + * can measure the download time later on. */ + if (conn->dirreq_id) + geoip_start_dirreq(conn->dirreq_id, size_guess, DIRREQ_TUNNELED); + else + geoip_start_dirreq(TO_CONN(conn)->global_identifier, size_guess, + DIRREQ_DIRECT); + } + + /* Use this header to tell caches that the response depends on the + * X-Or-Diff-From-Consensus header (or lack thereof). */ + const char vary_header[] = "Vary: X-Or-Diff-From-Consensus\r\n"; + + clear_spool = 0; + + // The compress_method might have been NO_METHOD, but we store the data + // compressed. Decompress them using `compression_used`. See fallback code in + // find_best_consensus() and find_best_diff(). + write_http_response_headers(conn, -1, + compress_method == NO_METHOD ? + NO_METHOD : compression_used, + vary_header, + smartlist_len(conn->spool) == 1 ? lifetime : 0); + + if (compress_method == NO_METHOD && smartlist_len(conn->spool)) + conn->compress_state = tor_compress_new(0, compression_used, + HIGH_COMPRESSION); + + /* Prime the connection with some data. */ + const int initial_flush_result = connection_dirserv_flushed_some(conn); + tor_assert_nonfatal(initial_flush_result == 0); + goto done; + + done: + parsed_consensus_request_clear(&req); + if (clear_spool) { + dir_conn_clear_spool(conn); + } + return 0; +} + +/** Helper function for GET /tor/status-vote/{current,next}/... + */ +static int +handle_get_status_vote(dir_connection_t *conn, const get_handler_args_t *args) +{ + const char *url = args->url; + { + ssize_t body_len = 0; + ssize_t estimated_len = 0; + int lifetime = 60; /* XXXX?? should actually use vote intervals. */ + /* This smartlist holds strings that we can compress on the fly. */ + smartlist_t *items = smartlist_new(); + /* This smartlist holds cached_dir_t objects that have a precompressed + * deflated version. */ + smartlist_t *dir_items = smartlist_new(); + dirvote_dirreq_get_status_vote(url, items, dir_items); + if (!smartlist_len(dir_items) && !smartlist_len(items)) { + write_short_http_response(conn, 404, "Not found"); + goto vote_done; + } + + /* We're sending items from at most one kind of source */ + tor_assert_nonfatal(smartlist_len(items) == 0 || + smartlist_len(dir_items) == 0); + + int streaming; + unsigned mask; + if (smartlist_len(items)) { + /* We're taking strings and compressing them on the fly. */ + streaming = 1; + mask = ~0u; + } else { + /* We're taking cached_dir_t objects. We only have them uncompressed + * or deflated. */ + streaming = 0; + mask = (1u<<NO_METHOD) | (1u<<ZLIB_METHOD); + } + const compress_method_t compress_method = find_best_compression_method( + args->compression_supported&mask, streaming); + + SMARTLIST_FOREACH(dir_items, cached_dir_t *, d, + body_len += compress_method != NO_METHOD ? + d->dir_compressed_len : d->dir_len); + estimated_len += body_len; + SMARTLIST_FOREACH(items, const char *, item, { + size_t ln = strlen(item); + if (compress_method != NO_METHOD) { + estimated_len += ln/2; + } else { + body_len += ln; estimated_len += ln; + } + }); + + if (global_write_bucket_low(TO_CONN(conn), estimated_len, 2)) { + write_short_http_response(conn, 503, "Directory busy, try again later"); + goto vote_done; + } + write_http_response_header(conn, body_len ? body_len : -1, + compress_method, + lifetime); + + if (smartlist_len(items)) { + if (compress_method != NO_METHOD) { + conn->compress_state = tor_compress_new(1, compress_method, + choose_compression_level(estimated_len)); + SMARTLIST_FOREACH(items, const char *, c, + connection_buf_add_compress(c, strlen(c), conn, 0)); + connection_buf_add_compress("", 0, conn, 1); + } else { + SMARTLIST_FOREACH(items, const char *, c, + connection_buf_add(c, strlen(c), TO_CONN(conn))); + } + } else { + SMARTLIST_FOREACH(dir_items, cached_dir_t *, d, + connection_buf_add(compress_method != NO_METHOD ? + d->dir_compressed : d->dir, + compress_method != NO_METHOD ? + d->dir_compressed_len : d->dir_len, + TO_CONN(conn))); + } + vote_done: + smartlist_free(items); + smartlist_free(dir_items); + goto done; + } + done: + return 0; +} + +/** Helper function for GET /tor/micro/d/... + */ +static int +handle_get_microdesc(dir_connection_t *conn, const get_handler_args_t *args) +{ + const char *url = args->url; + const compress_method_t compress_method = + find_best_compression_method(args->compression_supported, 1); + int clear_spool = 1; + { + conn->spool = smartlist_new(); + + dir_split_resource_into_spoolable(url+strlen("/tor/micro/d/"), + DIR_SPOOL_MICRODESC, + conn->spool, NULL, + DSR_DIGEST256|DSR_BASE64|DSR_SORT_UNIQ); + + size_t size_guess = 0; + dirserv_spool_remove_missing_and_guess_size(conn, 0, + compress_method != NO_METHOD, + &size_guess, NULL); + if (smartlist_len(conn->spool) == 0) { + write_short_http_response(conn, 404, "Not found"); + goto done; + } + if (global_write_bucket_low(TO_CONN(conn), size_guess, 2)) { + log_info(LD_DIRSERV, + "Client asked for server descriptors, but we've been " + "writing too many bytes lately. Sending 503 Dir busy."); + write_short_http_response(conn, 503, "Directory busy, try again later"); + goto done; + } + + clear_spool = 0; + write_http_response_header(conn, -1, + compress_method, + MICRODESC_CACHE_LIFETIME); + + if (compress_method != NO_METHOD) + conn->compress_state = tor_compress_new(1, compress_method, + choose_compression_level(size_guess)); + + const int initial_flush_result = connection_dirserv_flushed_some(conn); + tor_assert_nonfatal(initial_flush_result == 0); + goto done; + } + + done: + if (clear_spool) { + dir_conn_clear_spool(conn); + } + return 0; +} + +/** Helper function for GET /tor/{server,extra}/... + */ +static int +handle_get_descriptor(dir_connection_t *conn, const get_handler_args_t *args) +{ + const char *url = args->url; + const compress_method_t compress_method = + find_best_compression_method(args->compression_supported, 1); + const or_options_t *options = get_options(); + int clear_spool = 1; + if (!strcmpstart(url,"/tor/server/") || + (!options->BridgeAuthoritativeDir && + !options->BridgeRelay && !strcmpstart(url,"/tor/extra/"))) { + int res; + const char *msg = NULL; + int cache_lifetime = 0; + int is_extra = !strcmpstart(url,"/tor/extra/"); + url += is_extra ? strlen("/tor/extra/") : strlen("/tor/server/"); + dir_spool_source_t source; + time_t publish_cutoff = 0; + if (!strcmpstart(url, "d/")) { + source = + is_extra ? DIR_SPOOL_EXTRA_BY_DIGEST : DIR_SPOOL_SERVER_BY_DIGEST; + } else { + source = + is_extra ? DIR_SPOOL_EXTRA_BY_FP : DIR_SPOOL_SERVER_BY_FP; + /* We only want to apply a publish cutoff when we're requesting + * resources by fingerprint. */ + publish_cutoff = time(NULL) - ROUTER_MAX_AGE_TO_PUBLISH; + } + + conn->spool = smartlist_new(); + res = dirserv_get_routerdesc_spool(conn->spool, url, + source, + connection_dir_is_encrypted(conn), + &msg); + + if (!strcmpstart(url, "all")) { + cache_lifetime = FULL_DIR_CACHE_LIFETIME; + } else if (smartlist_len(conn->spool) == 1) { + cache_lifetime = ROUTERDESC_BY_DIGEST_CACHE_LIFETIME; + } + + size_t size_guess = 0; + int n_expired = 0; + dirserv_spool_remove_missing_and_guess_size(conn, publish_cutoff, + compress_method != NO_METHOD, + &size_guess, &n_expired); + + /* If we are the bridge authority and the descriptor is a bridge + * descriptor, remember that we served this descriptor for desc stats. */ + /* XXXX it's a bit of a kludge to have this here. */ + if (get_options()->BridgeAuthoritativeDir && + source == DIR_SPOOL_SERVER_BY_FP) { + SMARTLIST_FOREACH_BEGIN(conn->spool, spooled_resource_t *, spooled) { + const routerinfo_t *router = + router_get_by_id_digest((const char *)spooled->digest); + /* router can be NULL here when the bridge auth is asked for its own + * descriptor. */ + if (router && router->purpose == ROUTER_PURPOSE_BRIDGE) + rep_hist_note_desc_served(router->cache_info.identity_digest); + } SMARTLIST_FOREACH_END(spooled); + } + + if (res < 0 || size_guess == 0 || smartlist_len(conn->spool) == 0) { + if (msg == NULL) + msg = "Not found"; + write_short_http_response(conn, 404, msg); + } else { + if (global_write_bucket_low(TO_CONN(conn), size_guess, 2)) { + log_info(LD_DIRSERV, + "Client asked for server descriptors, but we've been " + "writing too many bytes lately. Sending 503 Dir busy."); + write_short_http_response(conn, 503, + "Directory busy, try again later"); + dir_conn_clear_spool(conn); + goto done; + } + write_http_response_header(conn, -1, compress_method, cache_lifetime); + if (compress_method != NO_METHOD) + conn->compress_state = tor_compress_new(1, compress_method, + choose_compression_level(size_guess)); + clear_spool = 0; + /* Prime the connection with some data. */ + int initial_flush_result = connection_dirserv_flushed_some(conn); + tor_assert_nonfatal(initial_flush_result == 0); + } + goto done; + } + done: + if (clear_spool) + dir_conn_clear_spool(conn); + return 0; +} + +/** Helper function for GET /tor/keys/... + */ +static int +handle_get_keys(dir_connection_t *conn, const get_handler_args_t *args) +{ + const char *url = args->url; + const compress_method_t compress_method = + find_best_compression_method(args->compression_supported, 1); + const time_t if_modified_since = args->if_modified_since; + { + smartlist_t *certs = smartlist_new(); + ssize_t len = -1; + if (!strcmp(url, "/tor/keys/all")) { + authority_cert_get_all(certs); + } else if (!strcmp(url, "/tor/keys/authority")) { + authority_cert_t *cert = get_my_v3_authority_cert(); + if (cert) + smartlist_add(certs, cert); + } else if (!strcmpstart(url, "/tor/keys/fp/")) { + smartlist_t *fps = smartlist_new(); + dir_split_resource_into_fingerprints(url+strlen("/tor/keys/fp/"), + fps, NULL, + DSR_HEX|DSR_SORT_UNIQ); + SMARTLIST_FOREACH(fps, char *, d, { + authority_cert_t *c = authority_cert_get_newest_by_id(d); + if (c) smartlist_add(certs, c); + tor_free(d); + }); + smartlist_free(fps); + } else if (!strcmpstart(url, "/tor/keys/sk/")) { + smartlist_t *fps = smartlist_new(); + dir_split_resource_into_fingerprints(url+strlen("/tor/keys/sk/"), + fps, NULL, + DSR_HEX|DSR_SORT_UNIQ); + SMARTLIST_FOREACH(fps, char *, d, { + authority_cert_t *c = authority_cert_get_by_sk_digest(d); + if (c) smartlist_add(certs, c); + tor_free(d); + }); + smartlist_free(fps); + } else if (!strcmpstart(url, "/tor/keys/fp-sk/")) { + smartlist_t *fp_sks = smartlist_new(); + dir_split_resource_into_fingerprint_pairs(url+strlen("/tor/keys/fp-sk/"), + fp_sks); + SMARTLIST_FOREACH(fp_sks, fp_pair_t *, pair, { + authority_cert_t *c = authority_cert_get_by_digests(pair->first, + pair->second); + if (c) smartlist_add(certs, c); + tor_free(pair); + }); + smartlist_free(fp_sks); + } else { + write_short_http_response(conn, 400, "Bad request"); + goto keys_done; + } + if (!smartlist_len(certs)) { + write_short_http_response(conn, 404, "Not found"); + goto keys_done; + } + SMARTLIST_FOREACH(certs, authority_cert_t *, c, + if (c->cache_info.published_on < if_modified_since) + SMARTLIST_DEL_CURRENT(certs, c)); + if (!smartlist_len(certs)) { + write_short_http_response(conn, 304, "Not modified"); + goto keys_done; + } + len = 0; + SMARTLIST_FOREACH(certs, authority_cert_t *, c, + len += c->cache_info.signed_descriptor_len); + + if (global_write_bucket_low(TO_CONN(conn), + compress_method != NO_METHOD ? len/2 : len, + 2)) { + write_short_http_response(conn, 503, "Directory busy, try again later"); + goto keys_done; + } + + write_http_response_header(conn, + compress_method != NO_METHOD ? -1 : len, + compress_method, + 60*60); + if (compress_method != NO_METHOD) { + conn->compress_state = tor_compress_new(1, compress_method, + choose_compression_level(len)); + SMARTLIST_FOREACH(certs, authority_cert_t *, c, + connection_buf_add_compress( + c->cache_info.signed_descriptor_body, + c->cache_info.signed_descriptor_len, + conn, 0)); + connection_buf_add_compress("", 0, conn, 1); + } else { + SMARTLIST_FOREACH(certs, authority_cert_t *, c, + connection_buf_add(c->cache_info.signed_descriptor_body, + c->cache_info.signed_descriptor_len, + TO_CONN(conn))); + } + keys_done: + smartlist_free(certs); + goto done; + } + done: + return 0; +} + +/** Helper function for GET /tor/rendezvous2/ + */ +static int +handle_get_hs_descriptor_v2(dir_connection_t *conn, + const get_handler_args_t *args) +{ + const char *url = args->url; + if (connection_dir_is_encrypted(conn)) { + /* Handle v2 rendezvous descriptor fetch request. */ + const char *descp; + const char *query = url + strlen("/tor/rendezvous2/"); + if (rend_valid_descriptor_id(query)) { + log_info(LD_REND, "Got a v2 rendezvous descriptor request for ID '%s'", + safe_str(escaped(query))); + switch (rend_cache_lookup_v2_desc_as_dir(query, &descp)) { + case 1: /* valid */ + write_http_response_header(conn, strlen(descp), NO_METHOD, 0); + connection_buf_add(descp, strlen(descp), TO_CONN(conn)); + break; + case 0: /* well-formed but not present */ + write_short_http_response(conn, 404, "Not found"); + break; + case -1: /* not well-formed */ + write_short_http_response(conn, 400, "Bad request"); + break; + } + } else { /* not well-formed */ + write_short_http_response(conn, 400, "Bad request"); + } + goto done; + } else { + /* Not encrypted! */ + write_short_http_response(conn, 404, "Not found"); + } + done: + return 0; +} + +/** Helper function for GET /tor/hs/3/<z>. Only for version 3. + */ +STATIC int +handle_get_hs_descriptor_v3(dir_connection_t *conn, + const get_handler_args_t *args) +{ + int retval; + const char *desc_str = NULL; + const char *pubkey_str = NULL; + const char *url = args->url; + + /* Reject unencrypted dir connections */ + if (!connection_dir_is_encrypted(conn)) { + write_short_http_response(conn, 404, "Not found"); + goto done; + } + + /* After the path prefix follows the base64 encoded blinded pubkey which we + * use to get the descriptor from the cache. Skip the prefix and get the + * pubkey. */ + tor_assert(!strcmpstart(url, "/tor/hs/3/")); + pubkey_str = url + strlen("/tor/hs/3/"); + retval = hs_cache_lookup_as_dir(HS_VERSION_THREE, + pubkey_str, &desc_str); + if (retval <= 0 || desc_str == NULL) { + write_short_http_response(conn, 404, "Not found"); + goto done; + } + + /* Found requested descriptor! Pass it to this nice client. */ + write_http_response_header(conn, strlen(desc_str), NO_METHOD, 0); + connection_buf_add(desc_str, strlen(desc_str), TO_CONN(conn)); + + done: + return 0; +} + +/** Helper function for GET /tor/networkstatus-bridges + */ +static int +handle_get_networkstatus_bridges(dir_connection_t *conn, + const get_handler_args_t *args) +{ + const char *headers = args->headers; + + const or_options_t *options = get_options(); + if (options->BridgeAuthoritativeDir && + options->BridgePassword_AuthDigest_ && + connection_dir_is_encrypted(conn)) { + char *status; + char digest[DIGEST256_LEN]; + + char *header = http_get_header(headers, "Authorization: Basic "); + if (header) + crypto_digest256(digest, header, strlen(header), DIGEST_SHA256); + + /* now make sure the password is there and right */ + if (!header || + tor_memneq(digest, + options->BridgePassword_AuthDigest_, DIGEST256_LEN)) { + write_short_http_response(conn, 404, "Not found"); + tor_free(header); + goto done; + } + tor_free(header); + + /* all happy now. send an answer. */ + status = networkstatus_getinfo_by_purpose("bridge", time(NULL)); + size_t dlen = strlen(status); + write_http_response_header(conn, dlen, NO_METHOD, 0); + connection_buf_add(status, dlen, TO_CONN(conn)); + tor_free(status); + goto done; + } + done: + return 0; +} + +/** Helper function for GET robots.txt or /tor/robots.txt */ +static int +handle_get_robots(dir_connection_t *conn, const get_handler_args_t *args) +{ + (void)args; + { + const char robots[] = "User-agent: *\r\nDisallow: /\r\n"; + size_t len = strlen(robots); + write_http_response_header(conn, len, NO_METHOD, ROBOTS_CACHE_LIFETIME); + connection_buf_add(robots, len, TO_CONN(conn)); + } + return 0; +} + +/* Given the <b>url</b> from a POST request, try to extract the version number + * using the provided <b>prefix</b>. The version should be after the prefix and + * ending with the separator "/". For instance: + * /tor/hs/3/publish + * + * On success, <b>end_pos</b> points to the position right after the version + * was found. On error, it is set to NULL. + * + * Return version on success else negative value. */ +STATIC int +parse_hs_version_from_post(const char *url, const char *prefix, + const char **end_pos) +{ + int ok; + unsigned long version; + const char *start; + char *end = NULL; + + tor_assert(url); + tor_assert(prefix); + tor_assert(end_pos); + + /* Check if the prefix does start the url. */ + if (strcmpstart(url, prefix)) { + goto err; + } + /* Move pointer to the end of the prefix string. */ + start = url + strlen(prefix); + /* Try this to be the HS version and if we are still at the separator, next + * will be move to the right value. */ + version = tor_parse_long(start, 10, 0, INT_MAX, &ok, &end); + if (!ok) { + goto err; + } + + *end_pos = end; + return (int) version; + err: + *end_pos = NULL; + return -1; +} + +/* Handle the POST request for a hidden service descripror. The request is in + * <b>url</b>, the body of the request is in <b>body</b>. Return 200 on success + * else return 400 indicating a bad request. */ +STATIC int +handle_post_hs_descriptor(const char *url, const char *body) +{ + int version; + const char *end_pos; + + tor_assert(url); + tor_assert(body); + + version = parse_hs_version_from_post(url, "/tor/hs/", &end_pos); + if (version < 0) { + goto err; + } + + /* We have a valid version number, now make sure it's a publish request. Use + * the end position just after the version and check for the command. */ + if (strcmpstart(end_pos, "/publish")) { + goto err; + } + + switch (version) { + case HS_VERSION_THREE: + if (hs_cache_store_as_dir(body) < 0) { + goto err; + } + log_info(LD_REND, "Publish request for HS descriptor handled " + "successfully."); + break; + default: + /* Unsupported version, return a bad request. */ + goto err; + } + + return 200; + err: + /* Bad request. */ + return 400; +} + +/** Helper function: called when a dirserver gets a complete HTTP POST + * request. Look for an uploaded server descriptor or rendezvous + * service descriptor. On finding one, process it and write a + * response into conn-\>outbuf. If the request is unrecognized, send a + * 400. Always return 0. */ +MOCK_IMPL(STATIC int, +directory_handle_command_post,(dir_connection_t *conn, const char *headers, + const char *body, size_t body_len)) +{ + char *url = NULL; + const or_options_t *options = get_options(); + + log_debug(LD_DIRSERV,"Received POST command."); + + conn->base_.state = DIR_CONN_STATE_SERVER_WRITING; + + if (!public_server_mode(options)) { + log_info(LD_DIR, "Rejected dir post request from %s " + "since we're not a public relay.", conn->base_.address); + write_short_http_response(conn, 503, "Not acting as a public relay"); + goto done; + } + + if (parse_http_url(headers, &url) < 0) { + write_short_http_response(conn, 400, "Bad request"); + return 0; + } + log_debug(LD_DIRSERV,"rewritten url as '%s'.", escaped(url)); + + /* Handle v2 rendezvous service publish request. */ + if (connection_dir_is_encrypted(conn) && + !strcmpstart(url,"/tor/rendezvous2/publish")) { + if (rend_cache_store_v2_desc_as_dir(body) < 0) { + log_warn(LD_REND, "Rejected v2 rend descriptor (body size %d) from %s.", + (int)body_len, conn->base_.address); + write_short_http_response(conn, 400, + "Invalid v2 service descriptor rejected"); + } else { + write_short_http_response(conn, 200, "Service descriptor (v2) stored"); + log_info(LD_REND, "Handled v2 rendezvous descriptor post: accepted"); + } + goto done; + } + + /* Handle HS descriptor publish request. */ + /* XXX: This should be disabled with a consensus param until we want to + * the prop224 be deployed and thus use. */ + if (connection_dir_is_encrypted(conn) && !strcmpstart(url, "/tor/hs/")) { + const char *msg = "HS descriptor stored successfully."; + + /* We most probably have a publish request for an HS descriptor. */ + int code = handle_post_hs_descriptor(url, body); + if (code != 200) { + msg = "Invalid HS descriptor. Rejected."; + } + write_short_http_response(conn, code, msg); + goto done; + } + + if (!authdir_mode(options)) { + /* we just provide cached directories; we don't want to + * receive anything. */ + write_short_http_response(conn, 400, "Nonauthoritative directory does not " + "accept posted server descriptors"); + goto done; + } + + if (authdir_mode(options) && + !strcmp(url,"/tor/")) { /* server descriptor post */ + const char *msg = "[None]"; + uint8_t purpose = authdir_mode_bridge(options) ? + ROUTER_PURPOSE_BRIDGE : ROUTER_PURPOSE_GENERAL; + was_router_added_t r = dirserv_add_multiple_descriptors(body, purpose, + conn->base_.address, &msg); + tor_assert(msg); + + if (r == ROUTER_ADDED_SUCCESSFULLY) { + write_short_http_response(conn, 200, msg); + } else if (WRA_WAS_OUTDATED(r)) { + write_http_response_header_impl(conn, -1, NULL, NULL, + "X-Descriptor-Not-New: Yes\r\n", -1); + } else { + log_info(LD_DIRSERV, + "Rejected router descriptor or extra-info from %s " + "(\"%s\").", + conn->base_.address, msg); + write_short_http_response(conn, 400, msg); + } + goto done; + } + + if (authdir_mode_v3(options) && + !strcmp(url,"/tor/post/vote")) { /* v3 networkstatus vote */ + const char *msg = "OK"; + int status; + if (dirvote_add_vote(body, &msg, &status)) { + write_short_http_response(conn, status, "Vote stored"); + } else { + tor_assert(msg); + log_warn(LD_DIRSERV, "Rejected vote from %s (\"%s\").", + conn->base_.address, msg); + write_short_http_response(conn, status, msg); + } + goto done; + } + + if (authdir_mode_v3(options) && + !strcmp(url,"/tor/post/consensus-signature")) { /* sigs on consensus. */ + const char *msg = NULL; + if (dirvote_add_signatures(body, conn->base_.address, &msg)>=0) { + write_short_http_response(conn, 200, msg?msg:"Signatures stored"); + } else { + log_warn(LD_DIR, "Unable to store signatures posted by %s: %s", + conn->base_.address, msg?msg:"???"); + write_short_http_response(conn, 400, + msg?msg:"Unable to store signatures"); + } + goto done; + } + + /* we didn't recognize the url */ + write_short_http_response(conn, 404, "Not found"); + + done: + tor_free(url); + return 0; +} + +/** Called when a dirserver receives data on a directory connection; + * looks for an HTTP request. If the request is complete, remove it + * from the inbuf, try to process it; otherwise, leave it on the + * buffer. Return a 0 on success, or -1 on error. + */ +STATIC int +directory_handle_command(dir_connection_t *conn) +{ + char *headers=NULL, *body=NULL; + size_t body_len=0; + int r; + + tor_assert(conn); + tor_assert(conn->base_.type == CONN_TYPE_DIR); + + switch (connection_fetch_from_buf_http(TO_CONN(conn), + &headers, MAX_HEADERS_SIZE, + &body, &body_len, MAX_DIR_UL_SIZE, 0)) { + case -1: /* overflow */ + log_warn(LD_DIRSERV, + "Request too large from address '%s' to DirPort. Closing.", + safe_str(conn->base_.address)); + return -1; + case 0: + log_debug(LD_DIRSERV,"command not all here yet."); + return 0; + /* case 1, fall through */ + } + + http_set_address_origin(headers, TO_CONN(conn)); + // we should escape headers here as well, + // but we can't call escaped() twice, as it uses the same buffer + //log_debug(LD_DIRSERV,"headers %s, body %s.", headers, escaped(body)); + + if (!strncasecmp(headers,"GET",3)) + r = directory_handle_command_get(conn, headers, body, body_len); + else if (!strncasecmp(headers,"POST",4)) + r = directory_handle_command_post(conn, headers, body, body_len); + else { + log_fn(LOG_PROTOCOL_WARN, LD_PROTOCOL, + "Got headers %s with unknown command. Closing.", + escaped(headers)); + r = -1; + } + + tor_free(headers); tor_free(body); + return r; +} + +/** Write handler for directory connections; called when all data has + * been flushed. Close the connection or wait for a response as + * appropriate. + */ +int +connection_dir_finished_flushing(dir_connection_t *conn) +{ + tor_assert(conn); + tor_assert(conn->base_.type == CONN_TYPE_DIR); + + /* Note that we have finished writing the directory response. For direct + * connections this means we're done; for tunneled connections it's only + * an intermediate step. */ + if (conn->dirreq_id) + geoip_change_dirreq_state(conn->dirreq_id, DIRREQ_TUNNELED, + DIRREQ_FLUSHING_DIR_CONN_FINISHED); + else + geoip_change_dirreq_state(TO_CONN(conn)->global_identifier, + DIRREQ_DIRECT, + DIRREQ_FLUSHING_DIR_CONN_FINISHED); + switch (conn->base_.state) { + case DIR_CONN_STATE_CONNECTING: + case DIR_CONN_STATE_CLIENT_SENDING: + log_debug(LD_DIR,"client finished sending command."); + conn->base_.state = DIR_CONN_STATE_CLIENT_READING; + return 0; + case DIR_CONN_STATE_SERVER_WRITING: + if (conn->spool) { + log_warn(LD_BUG, "Emptied a dirserv buffer, but it's still spooling!"); + connection_mark_for_close(TO_CONN(conn)); + } else { + log_debug(LD_DIRSERV, "Finished writing server response. Closing."); + connection_mark_for_close(TO_CONN(conn)); + } + return 0; + default: + log_warn(LD_BUG,"called in unexpected state %d.", + conn->base_.state); + tor_fragile_assert(); + return -1; + } + return 0; +} + +/* We just got a new consensus! If there are other in-progress requests + * for this consensus flavor (for example because we launched several in + * parallel), cancel them. + * + * We do this check here (not just in + * connection_ap_handshake_attach_circuit()) to handle the edge case where + * a consensus fetch begins and ends before some other one tries to attach to + * a circuit, in which case the other one won't know that we're all happy now. + * + * Don't mark the conn that just gave us the consensus -- otherwise we + * would end up double-marking it when it cleans itself up. + */ +static void +connection_dir_close_consensus_fetches(dir_connection_t *except_this_one, + const char *resource) +{ + smartlist_t *conns_to_close = + connection_dir_list_by_purpose_and_resource(DIR_PURPOSE_FETCH_CONSENSUS, + resource); + SMARTLIST_FOREACH_BEGIN(conns_to_close, dir_connection_t *, d) { + if (d == except_this_one) + continue; + log_info(LD_DIR, "Closing consensus fetch (to %s) since one " + "has just arrived.", TO_CONN(d)->address); + connection_mark_for_close(TO_CONN(d)); + } SMARTLIST_FOREACH_END(d); + smartlist_free(conns_to_close); +} + +/** Connected handler for directory connections: begin sending data to the + * server, and return 0. + * Only used when connections don't immediately connect. */ +int +connection_dir_finished_connecting(dir_connection_t *conn) +{ + tor_assert(conn); + tor_assert(conn->base_.type == CONN_TYPE_DIR); + tor_assert(conn->base_.state == DIR_CONN_STATE_CONNECTING); + + log_debug(LD_HTTP,"Dir connection to router %s:%u established.", + conn->base_.address,conn->base_.port); + + /* start flushing conn */ + conn->base_.state = DIR_CONN_STATE_CLIENT_SENDING; + return 0; +} + +/** Decide which download schedule we want to use based on descriptor type + * in <b>dls</b> and <b>options</b>. + * + * Then, return the initial delay for that download schedule, in seconds. + * + * Helper function for download_status_increment_failure(), + * download_status_reset(), and download_status_increment_attempt(). */ +STATIC int +find_dl_min_delay(const download_status_t *dls, const or_options_t *options) +{ + tor_assert(dls); + tor_assert(options); + + switch (dls->schedule) { + case DL_SCHED_GENERIC: + /* Any other directory document */ + if (dir_server_mode(options)) { + /* A directory authority or directory mirror */ + return options->TestingServerDownloadInitialDelay; + } else { + return options->TestingClientDownloadInitialDelay; + } + case DL_SCHED_CONSENSUS: + if (!networkstatus_consensus_can_use_multiple_directories(options)) { + /* A public relay */ + return options->TestingServerConsensusDownloadInitialDelay; + } else { + /* A client or bridge */ + if (networkstatus_consensus_is_bootstrapping(time(NULL))) { + /* During bootstrapping */ + if (!networkstatus_consensus_can_use_extra_fallbacks(options)) { + /* A bootstrapping client without extra fallback directories */ + return options-> + ClientBootstrapConsensusAuthorityOnlyDownloadInitialDelay; + } else if (dls->want_authority) { + /* A bootstrapping client with extra fallback directories, but + * connecting to an authority */ + return + options->ClientBootstrapConsensusAuthorityDownloadInitialDelay; + } else { + /* A bootstrapping client connecting to extra fallback directories + */ + return + options->ClientBootstrapConsensusFallbackDownloadInitialDelay; + } + } else { + /* A client with a reasonably live consensus, with or without + * certificates */ + return options->TestingClientConsensusDownloadInitialDelay; + } + } + case DL_SCHED_BRIDGE: + if (options->UseBridges && num_bridges_usable(0) > 0) { + /* A bridge client that is sure that one or more of its bridges are + * running can afford to wait longer to update bridge descriptors. */ + return options->TestingBridgeDownloadInitialDelay; + } else { + /* A bridge client which might have no running bridges, must try to + * get bridge descriptors straight away. */ + return options->TestingBridgeBootstrapDownloadInitialDelay; + } + default: + tor_assert(0); + } + + /* Impossible, but gcc will fail with -Werror without a `return`. */ + return 0; +} + +/** As next_random_exponential_delay() below, but does not compute a random + * value. Instead, compute the range of values that + * next_random_exponential_delay() should use when computing its random value. + * Store the low bound into *<b>low_bound_out</b>, and the high bound into + * *<b>high_bound_out</b>. Guarantees that the low bound is strictly less + * than the high bound. */ +STATIC void +next_random_exponential_delay_range(int *low_bound_out, + int *high_bound_out, + int delay, + int base_delay) +{ + // This is the "decorrelated jitter" approach, from + // https://www.awsarchitectureblog.com/2015/03/backoff.html + // The formula is + // sleep = min(cap, random_between(base, sleep * 3)) + + const int delay_times_3 = delay < INT_MAX/3 ? delay * 3 : INT_MAX; + *low_bound_out = base_delay; + if (delay_times_3 > base_delay) { + *high_bound_out = delay_times_3; + } else { + *high_bound_out = base_delay+1; + } +} + +/** Advance one delay step. The algorithm will generate a random delay, + * such that each failure is possibly (random) longer than the ones before. + * + * We then clamp that value to be no larger than max_delay, and return it. + * + * The <b>base_delay</b> parameter is lowest possible delay time (can't be + * zero); the <b>backoff_position</b> parameter is the number of times we've + * generated a delay; and the <b>delay</b> argument is the most recently used + * delay. + */ +STATIC int +next_random_exponential_delay(int delay, + int base_delay) +{ + /* Check preconditions */ + if (BUG(delay < 0)) + delay = 0; + + if (base_delay < 1) + base_delay = 1; + + int low_bound=0, high_bound=INT_MAX; + + next_random_exponential_delay_range(&low_bound, &high_bound, + delay, base_delay); + + return crypto_rand_int_range(low_bound, high_bound); +} + +/** Find the current delay for dls based on min_delay. + * + * This function sets dls->next_attempt_at based on now, and returns the delay. + * Helper for download_status_increment_failure and + * download_status_increment_attempt. */ +STATIC int +download_status_schedule_get_delay(download_status_t *dls, + int min_delay, + time_t now) +{ + tor_assert(dls); + /* If we're using random exponential backoff, we do need min/max delay */ + tor_assert(min_delay >= 0); + + int delay = INT_MAX; + uint8_t dls_schedule_position = (dls->increment_on + == DL_SCHED_INCREMENT_ATTEMPT + ? dls->n_download_attempts + : dls->n_download_failures); + + /* Check if we missed a reset somehow */ + IF_BUG_ONCE(dls->last_backoff_position > dls_schedule_position) { + dls->last_backoff_position = 0; + dls->last_delay_used = 0; + } + + if (dls_schedule_position > 0) { + delay = dls->last_delay_used; + + while (dls->last_backoff_position < dls_schedule_position) { + /* Do one increment step */ + delay = next_random_exponential_delay(delay, min_delay); + /* Update our position */ + ++(dls->last_backoff_position); + } + } else { + /* If we're just starting out, use the minimum delay */ + delay = min_delay; + } + + /* Clamp it within min/max if we have them */ + if (min_delay >= 0 && delay < min_delay) delay = min_delay; + + /* Store it for next time */ + dls->last_backoff_position = dls_schedule_position; + dls->last_delay_used = delay; + + /* A negative delay makes no sense. Knowing that delay is + * non-negative allows us to safely do the wrapping check below. */ + tor_assert(delay >= 0); + + /* Avoid now+delay overflowing TIME_MAX, by comparing with a subtraction + * that won't overflow (since delay is non-negative). */ + if (delay < INT_MAX && now <= TIME_MAX - delay) { + dls->next_attempt_at = now+delay; + } else { + dls->next_attempt_at = TIME_MAX; + } + + return delay; +} + +/* Log a debug message about item, which increments on increment_action, has + * incremented dls_n_download_increments times. The message varies based on + * was_schedule_incremented (if not, not_incremented_response is logged), and + * the values of increment, dls_next_attempt_at, and now. + * Helper for download_status_increment_failure and + * download_status_increment_attempt. */ +static void +download_status_log_helper(const char *item, int was_schedule_incremented, + const char *increment_action, + const char *not_incremented_response, + uint8_t dls_n_download_increments, int increment, + time_t dls_next_attempt_at, time_t now) +{ + if (item) { + if (!was_schedule_incremented) + log_debug(LD_DIR, "%s %s %d time(s); I'll try again %s.", + item, increment_action, (int)dls_n_download_increments, + not_incremented_response); + else if (increment == 0) + log_debug(LD_DIR, "%s %s %d time(s); I'll try again immediately.", + item, increment_action, (int)dls_n_download_increments); + else if (dls_next_attempt_at < TIME_MAX) + log_debug(LD_DIR, "%s %s %d time(s); I'll try again in %d seconds.", + item, increment_action, (int)dls_n_download_increments, + (int)(dls_next_attempt_at-now)); + else + log_debug(LD_DIR, "%s %s %d time(s); Giving up for a while.", + item, increment_action, (int)dls_n_download_increments); + } +} + +/** Determine when a failed download attempt should be retried. + * Called when an attempt to download <b>dls</b> has failed with HTTP status + * <b>status_code</b>. Increment the failure count (if the code indicates a + * real failure, or if we're a server) and set <b>dls</b>-\>next_attempt_at to + * an appropriate time in the future and return it. + * If <b>dls->increment_on</b> is DL_SCHED_INCREMENT_ATTEMPT, increment the + * failure count, and return a time in the far future for the next attempt (to + * avoid an immediate retry). */ +time_t +download_status_increment_failure(download_status_t *dls, int status_code, + const char *item, int server, time_t now) +{ + (void) status_code; // XXXX no longer used. + (void) server; // XXXX no longer used. + int increment = -1; + int min_delay = 0; + + tor_assert(dls); + + /* dls wasn't reset before it was used */ + if (dls->next_attempt_at == 0) { + download_status_reset(dls); + } + + /* count the failure */ + if (dls->n_download_failures < IMPOSSIBLE_TO_DOWNLOAD-1) { + ++dls->n_download_failures; + } + + if (dls->increment_on == DL_SCHED_INCREMENT_FAILURE) { + /* We don't find out that a failure-based schedule has attempted a + * connection until that connection fails. + * We'll never find out about successful connections, but this doesn't + * matter, because schedules are reset after a successful download. + */ + if (dls->n_download_attempts < IMPOSSIBLE_TO_DOWNLOAD-1) + ++dls->n_download_attempts; + + /* only return a failure retry time if this schedule increments on failures + */ + min_delay = find_dl_min_delay(dls, get_options()); + increment = download_status_schedule_get_delay(dls, min_delay, now); + } + + download_status_log_helper(item, !dls->increment_on, "failed", + "concurrently", dls->n_download_failures, + increment, + download_status_get_next_attempt_at(dls), + now); + + if (dls->increment_on == DL_SCHED_INCREMENT_ATTEMPT) { + /* stop this schedule retrying on failure, it will launch concurrent + * connections instead */ + return TIME_MAX; + } else { + return download_status_get_next_attempt_at(dls); + } +} + +/** Determine when the next download attempt should be made when using an + * attempt-based (potentially concurrent) download schedule. + * Called when an attempt to download <b>dls</b> is being initiated. + * Increment the attempt count and set <b>dls</b>-\>next_attempt_at to an + * appropriate time in the future and return it. + * If <b>dls->increment_on</b> is DL_SCHED_INCREMENT_FAILURE, don't increment + * the attempts, and return a time in the far future (to avoid launching a + * concurrent attempt). */ +time_t +download_status_increment_attempt(download_status_t *dls, const char *item, + time_t now) +{ + int delay = -1; + int min_delay = 0; + + tor_assert(dls); + + /* dls wasn't reset before it was used */ + if (dls->next_attempt_at == 0) { + download_status_reset(dls); + } + + if (dls->increment_on == DL_SCHED_INCREMENT_FAILURE) { + /* this schedule should retry on failure, and not launch any concurrent + attempts */ + log_warn(LD_BUG, "Tried to launch an attempt-based connection on a " + "failure-based schedule."); + return TIME_MAX; + } + + if (dls->n_download_attempts < IMPOSSIBLE_TO_DOWNLOAD-1) + ++dls->n_download_attempts; + + min_delay = find_dl_min_delay(dls, get_options()); + delay = download_status_schedule_get_delay(dls, min_delay, now); + + download_status_log_helper(item, dls->increment_on, "attempted", + "on failure", dls->n_download_attempts, + delay, download_status_get_next_attempt_at(dls), + now); + + return download_status_get_next_attempt_at(dls); +} + +static time_t +download_status_get_initial_delay_from_now(const download_status_t *dls) +{ + /* We use constant initial delays, even in exponential backoff + * schedules. */ + return time(NULL) + find_dl_min_delay(dls, get_options()); +} + +/** Reset <b>dls</b> so that it will be considered downloadable + * immediately, and/or to show that we don't need it anymore. + * + * Must be called to initialise a download schedule, otherwise the zeroth item + * in the schedule will never be used. + * + * (We find the zeroth element of the download schedule, and set + * next_attempt_at to be the appropriate offset from 'now'. In most + * cases this means setting it to 'now', so the item will be immediately + * downloadable; when using authorities with fallbacks, there is a few seconds' + * delay.) */ +void +download_status_reset(download_status_t *dls) +{ + if (dls->n_download_failures == IMPOSSIBLE_TO_DOWNLOAD + || dls->n_download_attempts == IMPOSSIBLE_TO_DOWNLOAD) + return; /* Don't reset this. */ + + dls->n_download_failures = 0; + dls->n_download_attempts = 0; + dls->next_attempt_at = download_status_get_initial_delay_from_now(dls); + dls->last_backoff_position = 0; + dls->last_delay_used = 0; + /* Don't reset dls->want_authority or dls->increment_on */ +} + +/** Return true iff, as of <b>now</b>, the resource tracked by <b>dls</b> is + * ready to get its download reattempted. */ +int +download_status_is_ready(download_status_t *dls, time_t now) +{ + /* dls wasn't reset before it was used */ + if (dls->next_attempt_at == 0) { + download_status_reset(dls); + } + + return download_status_get_next_attempt_at(dls) <= now; +} + +/** Mark <b>dl</b> as never downloadable. */ +void +download_status_mark_impossible(download_status_t *dl) +{ + dl->n_download_failures = IMPOSSIBLE_TO_DOWNLOAD; + dl->n_download_attempts = IMPOSSIBLE_TO_DOWNLOAD; +} + +/** Return the number of failures on <b>dls</b> since the last success (if + * any). */ +int +download_status_get_n_failures(const download_status_t *dls) +{ + return dls->n_download_failures; +} + +/** Return the number of attempts to download <b>dls</b> since the last success + * (if any). This can differ from download_status_get_n_failures() due to + * outstanding concurrent attempts. */ +int +download_status_get_n_attempts(const download_status_t *dls) +{ + return dls->n_download_attempts; +} + +/** Return the next time to attempt to download <b>dls</b>. */ +time_t +download_status_get_next_attempt_at(const download_status_t *dls) +{ + /* dls wasn't reset before it was used */ + if (dls->next_attempt_at == 0) { + /* so give the answer we would have given if it had been */ + return download_status_get_initial_delay_from_now(dls); + } + + return dls->next_attempt_at; +} + +/** Called when one or more routerdesc (or extrainfo, if <b>was_extrainfo</b>) + * fetches have failed (with uppercase fingerprints listed in <b>failed</b>, + * either as descriptor digests or as identity digests based on + * <b>was_descriptor_digests</b>). + */ +static void +dir_routerdesc_download_failed(smartlist_t *failed, int status_code, + int router_purpose, + int was_extrainfo, int was_descriptor_digests) +{ + char digest[DIGEST_LEN]; + time_t now = time(NULL); + int server = directory_fetches_from_authorities(get_options()); + if (!was_descriptor_digests) { + if (router_purpose == ROUTER_PURPOSE_BRIDGE) { + tor_assert(!was_extrainfo); + connection_dir_retry_bridges(failed); + } + return; /* FFFF should implement for other-than-router-purpose someday */ + } + SMARTLIST_FOREACH_BEGIN(failed, const char *, cp) { + download_status_t *dls = NULL; + if (base16_decode(digest, DIGEST_LEN, cp, strlen(cp)) != DIGEST_LEN) { + log_warn(LD_BUG, "Malformed fingerprint in list: %s", escaped(cp)); + continue; + } + if (was_extrainfo) { + signed_descriptor_t *sd = + router_get_by_extrainfo_digest(digest); + if (sd) + dls = &sd->ei_dl_status; + } else { + dls = router_get_dl_status_by_descriptor_digest(digest); + } + if (!dls) + continue; + download_status_increment_failure(dls, status_code, cp, server, now); + } SMARTLIST_FOREACH_END(cp); + + /* No need to relaunch descriptor downloads here: we already do it + * every 10 or 60 seconds (FOO_DESCRIPTOR_RETRY_INTERVAL) in main.c. */ +} + +/** Called when a connection to download microdescriptors from relay with + * <b>dir_id</b> has failed in whole or in part. <b>failed</b> is a list + * of every microdesc digest we didn't get. <b>status_code</b> is the http + * status code we received. Reschedule the microdesc downloads as + * appropriate. */ +static void +dir_microdesc_download_failed(smartlist_t *failed, + int status_code, const char *dir_id) +{ + networkstatus_t *consensus + = networkstatus_get_latest_consensus_by_flavor(FLAV_MICRODESC); + routerstatus_t *rs; + download_status_t *dls; + time_t now = time(NULL); + int server = directory_fetches_from_authorities(get_options()); + + if (! consensus) + return; + + /* We failed to fetch a microdescriptor from 'dir_id', note it down + * so that we don't try the same relay next time... */ + microdesc_note_outdated_dirserver(dir_id); + + SMARTLIST_FOREACH_BEGIN(failed, const char *, d) { + rs = router_get_mutable_consensus_status_by_descriptor_digest(consensus,d); + if (!rs) + continue; + dls = &rs->dl_status; + + { /* Increment the failure count for this md fetch */ + char buf[BASE64_DIGEST256_LEN+1]; + digest256_to_base64(buf, d); + log_info(LD_DIR, "Failed to download md %s from %s", + buf, hex_str(dir_id, DIGEST_LEN)); + download_status_increment_failure(dls, status_code, buf, + server, now); + } + } SMARTLIST_FOREACH_END(d); +} + +/** Helper. Compare two fp_pair_t objects, and return negative, 0, or + * positive as appropriate. */ +static int +compare_pairs_(const void **a, const void **b) +{ + const fp_pair_t *fp1 = *a, *fp2 = *b; + int r; + if ((r = fast_memcmp(fp1->first, fp2->first, DIGEST_LEN))) + return r; + else + return fast_memcmp(fp1->second, fp2->second, DIGEST_LEN); +} + +/** Divide a string <b>res</b> of the form FP1-FP2+FP3-FP4...[.z], where each + * FP is a hex-encoded fingerprint, into a sequence of distinct sorted + * fp_pair_t. Skip malformed pairs. On success, return 0 and add those + * fp_pair_t into <b>pairs_out</b>. On failure, return -1. */ +int +dir_split_resource_into_fingerprint_pairs(const char *res, + smartlist_t *pairs_out) +{ + smartlist_t *pairs_tmp = smartlist_new(); + smartlist_t *pairs_result = smartlist_new(); + + smartlist_split_string(pairs_tmp, res, "+", 0, 0); + if (smartlist_len(pairs_tmp)) { + char *last = smartlist_get(pairs_tmp,smartlist_len(pairs_tmp)-1); + size_t last_len = strlen(last); + if (last_len > 2 && !strcmp(last+last_len-2, ".z")) { + last[last_len-2] = '\0'; + } + } + SMARTLIST_FOREACH_BEGIN(pairs_tmp, char *, cp) { + if (strlen(cp) != HEX_DIGEST_LEN*2+1) { + log_info(LD_DIR, + "Skipping digest pair %s with non-standard length.", escaped(cp)); + } else if (cp[HEX_DIGEST_LEN] != '-') { + log_info(LD_DIR, + "Skipping digest pair %s with missing dash.", escaped(cp)); + } else { + fp_pair_t pair; + if (base16_decode(pair.first, DIGEST_LEN, + cp, HEX_DIGEST_LEN) != DIGEST_LEN || + base16_decode(pair.second,DIGEST_LEN, + cp+HEX_DIGEST_LEN+1, HEX_DIGEST_LEN) != DIGEST_LEN) { + log_info(LD_DIR, "Skipping non-decodable digest pair %s", escaped(cp)); + } else { + smartlist_add(pairs_result, tor_memdup(&pair, sizeof(pair))); + } + } + tor_free(cp); + } SMARTLIST_FOREACH_END(cp); + smartlist_free(pairs_tmp); + + /* Uniq-and-sort */ + smartlist_sort(pairs_result, compare_pairs_); + smartlist_uniq(pairs_result, compare_pairs_, tor_free_); + + smartlist_add_all(pairs_out, pairs_result); + smartlist_free(pairs_result); + return 0; +} + +/** Given a directory <b>resource</b> request, containing zero + * or more strings separated by plus signs, followed optionally by ".z", store + * the strings, in order, into <b>fp_out</b>. If <b>compressed_out</b> is + * non-NULL, set it to 1 if the resource ends in ".z", else set it to 0. + * + * If (flags & DSR_HEX), then delete all elements that aren't hex digests, and + * decode the rest. If (flags & DSR_BASE64), then use "-" rather than "+" as + * a separator, delete all the elements that aren't base64-encoded digests, + * and decode the rest. If (flags & DSR_DIGEST256), these digests should be + * 256 bits long; else they should be 160. + * + * If (flags & DSR_SORT_UNIQ), then sort the list and remove all duplicates. + */ +int +dir_split_resource_into_fingerprints(const char *resource, + smartlist_t *fp_out, int *compressed_out, + int flags) +{ + const int decode_hex = flags & DSR_HEX; + const int decode_base64 = flags & DSR_BASE64; + const int digests_are_256 = flags & DSR_DIGEST256; + const int sort_uniq = flags & DSR_SORT_UNIQ; + + const int digest_len = digests_are_256 ? DIGEST256_LEN : DIGEST_LEN; + const int hex_digest_len = digests_are_256 ? + HEX_DIGEST256_LEN : HEX_DIGEST_LEN; + const int base64_digest_len = digests_are_256 ? + BASE64_DIGEST256_LEN : BASE64_DIGEST_LEN; + smartlist_t *fp_tmp = smartlist_new(); + + tor_assert(!(decode_hex && decode_base64)); + tor_assert(fp_out); + + smartlist_split_string(fp_tmp, resource, decode_base64?"-":"+", 0, 0); + if (compressed_out) + *compressed_out = 0; + if (smartlist_len(fp_tmp)) { + char *last = smartlist_get(fp_tmp,smartlist_len(fp_tmp)-1); + size_t last_len = strlen(last); + if (last_len > 2 && !strcmp(last+last_len-2, ".z")) { + last[last_len-2] = '\0'; + if (compressed_out) + *compressed_out = 1; + } + } + if (decode_hex || decode_base64) { + const size_t encoded_len = decode_hex ? hex_digest_len : base64_digest_len; + int i; + char *cp, *d = NULL; + for (i = 0; i < smartlist_len(fp_tmp); ++i) { + cp = smartlist_get(fp_tmp, i); + if (strlen(cp) != encoded_len) { + log_info(LD_DIR, + "Skipping digest %s with non-standard length.", escaped(cp)); + smartlist_del_keeporder(fp_tmp, i--); + goto again; + } + d = tor_malloc_zero(digest_len); + if (decode_hex ? + (base16_decode(d, digest_len, cp, hex_digest_len) != digest_len) : + (base64_decode(d, digest_len, cp, base64_digest_len) + != digest_len)) { + log_info(LD_DIR, "Skipping non-decodable digest %s", escaped(cp)); + smartlist_del_keeporder(fp_tmp, i--); + goto again; + } + smartlist_set(fp_tmp, i, d); + d = NULL; + again: + tor_free(cp); + tor_free(d); + } + } + if (sort_uniq) { + if (decode_hex || decode_base64) { + if (digests_are_256) { + smartlist_sort_digests256(fp_tmp); + smartlist_uniq_digests256(fp_tmp); + } else { + smartlist_sort_digests(fp_tmp); + smartlist_uniq_digests(fp_tmp); + } + } else { + smartlist_sort_strings(fp_tmp); + smartlist_uniq_strings(fp_tmp); + } + } + smartlist_add_all(fp_out, fp_tmp); + smartlist_free(fp_tmp); + return 0; +} + +/** As dir_split_resource_into_fingerprints, but instead fills + * <b>spool_out</b> with a list of spoolable_resource_t for the resource + * identified through <b>source</b>. */ +int +dir_split_resource_into_spoolable(const char *resource, + dir_spool_source_t source, + smartlist_t *spool_out, + int *compressed_out, + int flags) +{ + smartlist_t *fingerprints = smartlist_new(); + + tor_assert(flags & (DSR_HEX|DSR_BASE64)); + const size_t digest_len = + (flags & DSR_DIGEST256) ? DIGEST256_LEN : DIGEST_LEN; + + int r = dir_split_resource_into_fingerprints(resource, fingerprints, + compressed_out, flags); + /* This is not a very efficient implementation XXXX */ + SMARTLIST_FOREACH_BEGIN(fingerprints, uint8_t *, digest) { + spooled_resource_t *spooled = + spooled_resource_new(source, digest, digest_len); + if (spooled) + smartlist_add(spool_out, spooled); + tor_free(digest); + } SMARTLIST_FOREACH_END(digest); + + smartlist_free(fingerprints); + return r; +} diff --git a/src/feature/dircache/directory.h b/src/feature/dircache/directory.h new file mode 100644 index 0000000000..992ff618fb --- /dev/null +++ b/src/feature/dircache/directory.h @@ -0,0 +1,347 @@ +/* Copyright (c) 2001 Matej Pfajfar. + * Copyright (c) 2001-2004, Roger Dingledine. + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file directory.h + * \brief Header file for directory.c. + **/ + +#ifndef TOR_DIRECTORY_H +#define TOR_DIRECTORY_H + +#include "or/hs_ident.h" +enum compress_method_t; + +dir_connection_t *TO_DIR_CONN(connection_t *c); + +#define DIR_CONN_STATE_MIN_ 1 +/** State for connection to directory server: waiting for connect(). */ +#define DIR_CONN_STATE_CONNECTING 1 +/** State for connection to directory server: sending HTTP request. */ +#define DIR_CONN_STATE_CLIENT_SENDING 2 +/** State for connection to directory server: reading HTTP response. */ +#define DIR_CONN_STATE_CLIENT_READING 3 +/** State for connection to directory server: happy and finished. */ +#define DIR_CONN_STATE_CLIENT_FINISHED 4 +/** State for connection at directory server: waiting for HTTP request. */ +#define DIR_CONN_STATE_SERVER_COMMAND_WAIT 5 +/** State for connection at directory server: sending HTTP response. */ +#define DIR_CONN_STATE_SERVER_WRITING 6 +#define DIR_CONN_STATE_MAX_ 6 + +#define DIR_PURPOSE_MIN_ 4 +/** A connection to a directory server: set after a v2 rendezvous + * descriptor is downloaded. */ +#define DIR_PURPOSE_HAS_FETCHED_RENDDESC_V2 4 +/** A connection to a directory server: download one or more server + * descriptors. */ +#define DIR_PURPOSE_FETCH_SERVERDESC 6 +/** A connection to a directory server: download one or more extra-info + * documents. */ +#define DIR_PURPOSE_FETCH_EXTRAINFO 7 +/** A connection to a directory server: upload a server descriptor. */ +#define DIR_PURPOSE_UPLOAD_DIR 8 +/** A connection to a directory server: upload a v3 networkstatus vote. */ +#define DIR_PURPOSE_UPLOAD_VOTE 10 +/** A connection to a directory server: upload a v3 consensus signature */ +#define DIR_PURPOSE_UPLOAD_SIGNATURES 11 +/** A connection to a directory server: download one or more v3 networkstatus + * votes. */ +#define DIR_PURPOSE_FETCH_STATUS_VOTE 12 +/** A connection to a directory server: download a v3 detached signatures + * object for a consensus. */ +#define DIR_PURPOSE_FETCH_DETACHED_SIGNATURES 13 +/** A connection to a directory server: download a v3 networkstatus + * consensus. */ +#define DIR_PURPOSE_FETCH_CONSENSUS 14 +/** A connection to a directory server: download one or more directory + * authority certificates. */ +#define DIR_PURPOSE_FETCH_CERTIFICATE 15 + +/** Purpose for connection at a directory server. */ +#define DIR_PURPOSE_SERVER 16 +/** A connection to a hidden service directory server: upload a v2 rendezvous + * descriptor. */ +#define DIR_PURPOSE_UPLOAD_RENDDESC_V2 17 +/** A connection to a hidden service directory server: download a v2 rendezvous + * descriptor. */ +#define DIR_PURPOSE_FETCH_RENDDESC_V2 18 +/** A connection to a directory server: download a microdescriptor. */ +#define DIR_PURPOSE_FETCH_MICRODESC 19 +/** A connection to a hidden service directory: upload a v3 descriptor. */ +#define DIR_PURPOSE_UPLOAD_HSDESC 20 +/** A connection to a hidden service directory: fetch a v3 descriptor. */ +#define DIR_PURPOSE_FETCH_HSDESC 21 +/** A connection to a directory server: set after a hidden service descriptor + * is downloaded. */ +#define DIR_PURPOSE_HAS_FETCHED_HSDESC 22 +#define DIR_PURPOSE_MAX_ 22 + +/** True iff <b>p</b> is a purpose corresponding to uploading + * data to a directory server. */ +#define DIR_PURPOSE_IS_UPLOAD(p) \ + ((p)==DIR_PURPOSE_UPLOAD_DIR || \ + (p)==DIR_PURPOSE_UPLOAD_VOTE || \ + (p)==DIR_PURPOSE_UPLOAD_SIGNATURES || \ + (p)==DIR_PURPOSE_UPLOAD_RENDDESC_V2 || \ + (p)==DIR_PURPOSE_UPLOAD_HSDESC) + +int directories_have_accepted_server_descriptor(void); +void directory_post_to_dirservers(uint8_t dir_purpose, uint8_t router_purpose, + dirinfo_type_t type, const char *payload, + size_t payload_len, size_t extrainfo_len); +MOCK_DECL(void, directory_get_from_dirserver, ( + uint8_t dir_purpose, + uint8_t router_purpose, + const char *resource, + int pds_flags, + download_want_authority_t want_authority)); +void directory_get_from_all_authorities(uint8_t dir_purpose, + uint8_t router_purpose, + const char *resource); + +/** Enumeration of ways to connect to a directory server */ +typedef enum { + /** Default: connect over a one-hop Tor circuit. Relays fall back to direct + * DirPort connections, clients, onion services, and bridges do not */ + DIRIND_ONEHOP=0, + /** Connect over a multi-hop anonymizing Tor circuit */ + DIRIND_ANONYMOUS=1, + /** Connect to the DirPort directly */ + DIRIND_DIRECT_CONN, + /** Connect over a multi-hop anonymizing Tor circuit to our dirport */ + DIRIND_ANON_DIRPORT, +} dir_indirection_t; + +int directory_must_use_begindir(const or_options_t *options); + +/** + * A directory_request_t describes the information about a directory request + * at the client side. It describes what we're going to ask for, which + * directory we're going to ask for it, how we're going to contact that + * directory, and (in some cases) what to do with it when we're done. + */ +typedef struct directory_request_t directory_request_t; +directory_request_t *directory_request_new(uint8_t dir_purpose); +void directory_request_free_(directory_request_t *req); +#define directory_request_free(req) \ + FREE_AND_NULL(directory_request_t, directory_request_free_, (req)) +void directory_request_set_or_addr_port(directory_request_t *req, + const tor_addr_port_t *p); +void directory_request_set_dir_addr_port(directory_request_t *req, + const tor_addr_port_t *p); +void directory_request_set_directory_id_digest(directory_request_t *req, + const char *digest); +struct circuit_guard_state_t; +void directory_request_set_guard_state(directory_request_t *req, + struct circuit_guard_state_t *state); +void directory_request_set_router_purpose(directory_request_t *req, + uint8_t router_purpose); +void directory_request_set_indirection(directory_request_t *req, + dir_indirection_t indirection); +void directory_request_set_resource(directory_request_t *req, + const char *resource); +void directory_request_set_payload(directory_request_t *req, + const char *payload, + size_t payload_len); +void directory_request_set_if_modified_since(directory_request_t *req, + time_t if_modified_since); +void directory_request_set_rend_query(directory_request_t *req, + const rend_data_t *query); +void directory_request_upload_set_hs_ident(directory_request_t *req, + const hs_ident_dir_conn_t *ident); +void directory_request_fetch_set_hs_ident(directory_request_t *req, + const hs_ident_dir_conn_t *ident); + +void directory_request_set_routerstatus(directory_request_t *req, + const routerstatus_t *rs); +void directory_request_add_header(directory_request_t *req, + const char *key, + const char *val); +MOCK_DECL(void, directory_initiate_request, (directory_request_t *request)); + +int parse_http_response(const char *headers, int *code, time_t *date, + enum compress_method_t *compression, char **response); +int parse_http_command(const char *headers, + char **command_out, char **url_out); +char *http_get_header(const char *headers, const char *which); + +int connection_dir_is_encrypted(const dir_connection_t *conn); +int connection_dir_reached_eof(dir_connection_t *conn); +int connection_dir_process_inbuf(dir_connection_t *conn); +int connection_dir_finished_flushing(dir_connection_t *conn); +int connection_dir_finished_connecting(dir_connection_t *conn); +void connection_dir_about_to_close(dir_connection_t *dir_conn); + +#define DSR_HEX (1<<0) +#define DSR_BASE64 (1<<1) +#define DSR_DIGEST256 (1<<2) +#define DSR_SORT_UNIQ (1<<3) +int dir_split_resource_into_fingerprints(const char *resource, + smartlist_t *fp_out, int *compressed_out, + int flags); +enum dir_spool_source_t; +int dir_split_resource_into_spoolable(const char *resource, + enum dir_spool_source_t source, + smartlist_t *spool_out, + int *compressed_out, + int flags); +int dir_split_resource_into_fingerprint_pairs(const char *res, + smartlist_t *pairs_out); +char *directory_dump_request_log(void); +void note_request(const char *key, size_t bytes); +int router_supports_extrainfo(const char *identity_digest, int is_authority); + +time_t download_status_increment_failure(download_status_t *dls, + int status_code, const char *item, + int server, time_t now); +time_t download_status_increment_attempt(download_status_t *dls, + const char *item, time_t now); +/** Increment the failure count of the download_status_t <b>dls</b>, with + * the optional status code <b>sc</b>. */ +#define download_status_failed(dls, sc) \ + download_status_increment_failure((dls), (sc), NULL, \ + dir_server_mode(get_options()), \ + time(NULL)) + +void download_status_reset(download_status_t *dls); +int download_status_is_ready(download_status_t *dls, time_t now); +time_t download_status_get_next_attempt_at(const download_status_t *dls); +void download_status_mark_impossible(download_status_t *dl); + +int download_status_get_n_failures(const download_status_t *dls); +int download_status_get_n_attempts(const download_status_t *dls); + +int purpose_needs_anonymity(uint8_t dir_purpose, uint8_t router_purpose, + const char *resource); + +#ifdef DIRECTORY_PRIVATE + +/** A structure to hold arguments passed into each directory response + * handler */ +typedef struct response_handler_args_t { + int status_code; + const char *reason; + const char *body; + size_t body_len; + const char *headers; +} response_handler_args_t; + +struct directory_request_t { + /** + * These fields specify which directory we're contacting. Routerstatus, + * if present, overrides the other fields. + * + * @{ */ + tor_addr_port_t or_addr_port; + tor_addr_port_t dir_addr_port; + char digest[DIGEST_LEN]; + + const routerstatus_t *routerstatus; + /** @} */ + /** One of DIR_PURPOSE_* other than DIR_PURPOSE_SERVER. Describes what + * kind of operation we'll be doing (upload/download), and of what kind + * of document. */ + uint8_t dir_purpose; + /** One of ROUTER_PURPOSE_*; used for uploads and downloads of routerinfo + * and extrainfo docs. */ + uint8_t router_purpose; + /** Enum: determines whether to anonymize, and whether to use dirport or + * orport. */ + dir_indirection_t indirection; + /** Alias to the variable part of the URL for this request */ + const char *resource; + /** Alias to the payload to upload (if any) */ + const char *payload; + /** Number of bytes to upload from payload</b> */ + size_t payload_len; + /** Value to send in an if-modified-since header, or 0 for none. */ + time_t if_modified_since; + /** Hidden-service-specific information v2. */ + const rend_data_t *rend_query; + /** Extra headers to append to the request */ + struct config_line_t *additional_headers; + /** Hidden-service-specific information for v3+. */ + const hs_ident_dir_conn_t *hs_ident; + /** Used internally to directory.c: gets informed when the attempt to + * connect to the directory succeeds or fails, if that attempt bears on the + * directory's usability as a directory guard. */ + struct circuit_guard_state_t *guard_state; +}; + +struct get_handler_args_t; +STATIC int handle_get_hs_descriptor_v3(dir_connection_t *conn, + const struct get_handler_args_t *args); +STATIC int directory_handle_command(dir_connection_t *conn); +STATIC char *accept_encoding_header(void); +STATIC int allowed_anonymous_connection_compression_method( + enum compress_method_t); +STATIC void warn_disallowed_anonymous_compression_method( + enum compress_method_t); + +STATIC int handle_response_fetch_hsdesc_v3(dir_connection_t *conn, + const response_handler_args_t *args); +STATIC int handle_response_fetch_microdesc(dir_connection_t *conn, + const response_handler_args_t *args); + +STATIC int handle_response_fetch_consensus(dir_connection_t *conn, + const response_handler_args_t *args); + +#endif /* defined(DIRECTORY_PRIVATE) */ + +#ifdef TOR_UNIT_TESTS +/* Used only by test_dir.c and test_hs_cache.c */ + +STATIC int parse_http_url(const char *headers, char **url); +STATIC dirinfo_type_t dir_fetch_type(int dir_purpose, int router_purpose, + const char *resource); +MOCK_DECL(STATIC int, directory_handle_command_get,(dir_connection_t *conn, + const char *headers, + const char *req_body, + size_t req_body_len)); +MOCK_DECL(STATIC int, directory_handle_command_post,(dir_connection_t *conn, + const char *headers, + const char *body, + size_t body_len)); +STATIC int download_status_schedule_get_delay(download_status_t *dls, + int min_delay, + time_t now); + +STATIC int handle_post_hs_descriptor(const char *url, const char *body); + +STATIC char* authdir_type_to_string(dirinfo_type_t auth); +STATIC const char * dir_conn_purpose_to_string(int purpose); +STATIC int should_use_directory_guards(const or_options_t *options); +enum compression_level_t; +STATIC enum compression_level_t choose_compression_level(ssize_t n_bytes); +STATIC int find_dl_min_delay(const download_status_t *dls, + const or_options_t *options); + +STATIC int next_random_exponential_delay(int delay, + int base_delay); + +STATIC void next_random_exponential_delay_range(int *low_bound_out, + int *high_bound_out, + int delay, + int base_delay); + +STATIC int parse_hs_version_from_post(const char *url, const char *prefix, + const char **end_pos); + +STATIC unsigned parse_accept_encoding_header(const char *h); +#endif /* defined(TOR_UNIT_TESTS) */ + +#if defined(TOR_UNIT_TESTS) || defined(DIRECTORY_PRIVATE) +/* Used only by directory.c and test_dir.c */ + +/* no more than quadruple the previous delay (multiplier + 1) */ +#define DIR_DEFAULT_RANDOM_MULTIPLIER (3) +/* no more than triple the previous delay */ +#define DIR_TEST_NET_RANDOM_MULTIPLIER (2) + +#endif /* defined(TOR_UNIT_TESTS) || defined(DIRECTORY_PRIVATE) */ + +#endif /* !defined(TOR_DIRECTORY_H) */ diff --git a/src/feature/dircache/dirserv.c b/src/feature/dircache/dirserv.c new file mode 100644 index 0000000000..2980d63f0a --- /dev/null +++ b/src/feature/dircache/dirserv.c @@ -0,0 +1,3598 @@ +/* Copyright (c) 2001-2004, Roger Dingledine. + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#define DIRSERV_PRIVATE +#include "or/or.h" +#include "lib/container/buffers.h" +#include "or/config.h" +#include "or/confparse.h" +#include "or/channel.h" +#include "or/channeltls.h" +#include "or/command.h" +#include "or/connection.h" +#include "or/connection_or.h" +#include "or/conscache.h" +#include "or/consdiffmgr.h" +#include "or/control.h" +#include "or/directory.h" +#include "or/dirserv.h" +#include "or/hibernate.h" +#include "or/keypin.h" +#include "or/main.h" +#include "or/microdesc.h" +#include "or/networkstatus.h" +#include "or/nodelist.h" +#include "or/policies.h" +#include "or/protover.h" +#include "or/rephist.h" +#include "or/router.h" +#include "or/routerlist.h" +#include "or/routerparse.h" +#include "or/routerset.h" +#include "or/torcert.h" +#include "or/voting_schedule.h" + +#include "or/dirauth/dirvote.h" + +#include "or/cached_dir_st.h" +#include "or/dir_connection_st.h" +#include "or/extrainfo_st.h" +#include "or/microdesc_st.h" +#include "or/node_st.h" +#include "or/routerinfo_st.h" +#include "or/routerlist_st.h" +#include "or/tor_version_st.h" +#include "or/vote_routerstatus_st.h" + +#include "lib/compress/compress.h" +#include "lib/container/order.h" +#include "lib/crypt_ops/crypto_format.h" +#include "lib/encoding/confline.h" + +/** + * \file dirserv.c + * \brief Directory server core implementation. Manages directory + * contents and generates directory documents. + * + * This module implements most of directory cache functionality, and some of + * the directory authority functionality. The directory.c module delegates + * here in order to handle incoming requests from clients, via + * connection_dirserv_flushed_some() and its kin. In order to save RAM, this + * module is responsible for spooling directory objects (in whole or in part) + * onto buf_t instances, and then closing the dir_connection_t once the + * objects are totally flushed. + * + * The directory.c module also delegates here for handling descriptor uploads + * via dirserv_add_multiple_descriptors(). + * + * Additionally, this module handles some aspects of voting, including: + * deciding how to vote on individual flags (based on decisions reached in + * rephist.c), of formatting routerstatus lines, and deciding what relays to + * include in an authority's vote. (TODO: Those functions could profitably be + * split off. They only live in this file because historically they were + * shared among the v1, v2, and v3 directory code.) + */ + +/** How far in the future do we allow a router to get? (seconds) */ +#define ROUTER_ALLOW_SKEW (60*60*12) +/** How many seconds do we wait before regenerating the directory? */ +#define DIR_REGEN_SLACK_TIME 30 +/** If we're a cache, keep this many networkstatuses around from non-trusted + * directory authorities. */ +#define MAX_UNTRUSTED_NETWORKSTATUSES 16 + +/** Total number of routers with measured bandwidth; this is set by + * dirserv_count_measured_bws() before the loop in + * dirserv_generate_networkstatus_vote_obj() and checked by + * dirserv_get_credible_bandwidth() and + * dirserv_compute_performance_thresholds() */ +static int routers_with_measured_bw = 0; + +static void directory_remove_invalid(void); +struct authdir_config_t; +static uint32_t +dirserv_get_status_impl(const char *fp, const char *nickname, + uint32_t addr, uint16_t or_port, + const char *platform, const char **msg, + int severity); +static void clear_cached_dir(cached_dir_t *d); +static const signed_descriptor_t *get_signed_descriptor_by_fp( + const uint8_t *fp, + int extrainfo); +static was_router_added_t dirserv_add_extrainfo(extrainfo_t *ei, + const char **msg); +static uint32_t dirserv_get_credible_bandwidth_kb(const routerinfo_t *ri); + +static int spooled_resource_lookup_body(const spooled_resource_t *spooled, + int conn_is_encrypted, + const uint8_t **body_out, + size_t *size_out, + time_t *published_out); +static cached_dir_t *spooled_resource_lookup_cached_dir( + const spooled_resource_t *spooled, + time_t *published_out); +static cached_dir_t *lookup_cached_dir_by_fp(const uint8_t *fp); + +/************** Fingerprint handling code ************/ + +/* 1 Historically used to indicate Named */ +#define FP_INVALID 2 /**< Believed invalid. */ +#define FP_REJECT 4 /**< We will not publish this router. */ +/* 8 Historically used to avoid using this as a dir. */ +#define FP_BADEXIT 16 /**< We'll tell clients not to use this as an exit. */ +/* 32 Historically used to indicade Unnamed */ + +/** Target of status_by_digest map. */ +typedef uint32_t router_status_t; + +static void add_fingerprint_to_dir(const char *fp, + struct authdir_config_t *list, + router_status_t add_status); + +/** List of nickname-\>identity fingerprint mappings for all the routers + * that we name. Used to prevent router impersonation. */ +typedef struct authdir_config_t { + strmap_t *fp_by_name; /**< Map from lc nickname to fingerprint. */ + digestmap_t *status_by_digest; /**< Map from digest to router_status_t. */ +} authdir_config_t; + +/** Should be static; exposed for testing. */ +static authdir_config_t *fingerprint_list = NULL; + +/** Allocate and return a new, empty, authdir_config_t. */ +static authdir_config_t * +authdir_config_new(void) +{ + authdir_config_t *list = tor_malloc_zero(sizeof(authdir_config_t)); + list->fp_by_name = strmap_new(); + list->status_by_digest = digestmap_new(); + return list; +} + +/** Add the fingerprint <b>fp</b> to the smartlist of fingerprint_entry_t's + * <b>list</b>, or-ing the currently set status flags with + * <b>add_status</b>. + */ +/* static */ void +add_fingerprint_to_dir(const char *fp, authdir_config_t *list, + router_status_t add_status) +{ + char *fingerprint; + char d[DIGEST_LEN]; + router_status_t *status; + tor_assert(fp); + tor_assert(list); + + fingerprint = tor_strdup(fp); + tor_strstrip(fingerprint, " "); + if (base16_decode(d, DIGEST_LEN, + fingerprint, strlen(fingerprint)) != DIGEST_LEN) { + log_warn(LD_DIRSERV, "Couldn't decode fingerprint \"%s\"", + escaped(fp)); + tor_free(fingerprint); + return; + } + + status = digestmap_get(list->status_by_digest, d); + if (!status) { + status = tor_malloc_zero(sizeof(router_status_t)); + digestmap_set(list->status_by_digest, d, status); + } + + tor_free(fingerprint); + *status |= add_status; + return; +} + +/** Add the fingerprint for this OR to the global list of recognized + * identity key fingerprints. */ +int +dirserv_add_own_fingerprint(crypto_pk_t *pk) +{ + char fp[FINGERPRINT_LEN+1]; + if (crypto_pk_get_fingerprint(pk, fp, 0)<0) { + log_err(LD_BUG, "Error computing fingerprint"); + return -1; + } + if (!fingerprint_list) + fingerprint_list = authdir_config_new(); + add_fingerprint_to_dir(fp, fingerprint_list, 0); + return 0; +} + +/** Load the nickname-\>fingerprint mappings stored in the approved-routers + * file. The file format is line-based, with each non-blank holding one + * nickname, some space, and a fingerprint for that nickname. On success, + * replace the current fingerprint list with the new list and return 0. On + * failure, leave the current fingerprint list untouched, and return -1. */ +int +dirserv_load_fingerprint_file(void) +{ + char *fname; + char *cf; + char *nickname, *fingerprint; + authdir_config_t *fingerprint_list_new; + int result; + config_line_t *front=NULL, *list; + + fname = get_datadir_fname("approved-routers"); + log_info(LD_GENERAL, + "Reloading approved fingerprints from \"%s\"...", fname); + + cf = read_file_to_str(fname, RFTS_IGNORE_MISSING, NULL); + if (!cf) { + log_warn(LD_FS, "Cannot open fingerprint file '%s'. That's ok.", fname); + tor_free(fname); + return 0; + } + tor_free(fname); + + result = config_get_lines(cf, &front, 0); + tor_free(cf); + if (result < 0) { + log_warn(LD_CONFIG, "Error reading from fingerprint file"); + return -1; + } + + fingerprint_list_new = authdir_config_new(); + + for (list=front; list; list=list->next) { + char digest_tmp[DIGEST_LEN]; + router_status_t add_status = 0; + nickname = list->key; fingerprint = list->value; + tor_strstrip(fingerprint, " "); /* remove spaces */ + if (strlen(fingerprint) != HEX_DIGEST_LEN || + base16_decode(digest_tmp, sizeof(digest_tmp), + fingerprint, HEX_DIGEST_LEN) != sizeof(digest_tmp)) { + log_notice(LD_CONFIG, + "Invalid fingerprint (nickname '%s', " + "fingerprint %s). Skipping.", + nickname, fingerprint); + continue; + } + if (!strcasecmp(nickname, "!reject")) { + add_status = FP_REJECT; + } else if (!strcasecmp(nickname, "!badexit")) { + add_status = FP_BADEXIT; + } else if (!strcasecmp(nickname, "!invalid")) { + add_status = FP_INVALID; + } + add_fingerprint_to_dir(fingerprint, fingerprint_list_new, add_status); + } + + config_free_lines(front); + dirserv_free_fingerprint_list(); + fingerprint_list = fingerprint_list_new; + /* Delete any routers whose fingerprints we no longer recognize */ + directory_remove_invalid(); + return 0; +} + +/* If this is set, then we don't allow routers that have advertised an Ed25519 + * identity to stop doing so. This is going to be essential for good identity + * security: otherwise anybody who can attack RSA-1024 but not Ed25519 could + * just sign fake descriptors missing the Ed25519 key. But we won't actually + * be able to prevent that kind of thing until we're confident that there isn't + * actually a legit reason to downgrade to 0.2.5. Now we are not recommending + * 0.2.5 anymore so there is no reason to keep the #undef. + */ + +#define DISABLE_DISABLING_ED25519 + +/** Check whether <b>router</b> has a nickname/identity key combination that + * we recognize from the fingerprint list, or an IP we automatically act on + * according to our configuration. Return the appropriate router status. + * + * If the status is 'FP_REJECT' and <b>msg</b> is provided, set + * *<b>msg</b> to an explanation of why. */ +uint32_t +dirserv_router_get_status(const routerinfo_t *router, const char **msg, + int severity) +{ + char d[DIGEST_LEN]; + const int key_pinning = get_options()->AuthDirPinKeys; + + if (crypto_pk_get_digest(router->identity_pkey, d)) { + log_warn(LD_BUG,"Error computing fingerprint"); + if (msg) + *msg = "Bug: Error computing fingerprint"; + return FP_REJECT; + } + + /* Check for the more usual versions to reject a router first. */ + const uint32_t r = dirserv_get_status_impl(d, router->nickname, + router->addr, router->or_port, + router->platform, msg, severity); + if (r) + return r; + + /* dirserv_get_status_impl already rejects versions older than 0.2.4.18-rc, + * and onion_curve25519_pkey was introduced in 0.2.4.8-alpha. + * But just in case a relay doesn't provide or lies about its version, or + * doesn't include an ntor key in its descriptor, check that it exists, + * and is non-zero (clients check that it's non-zero before using it). */ + if (!routerinfo_has_curve25519_onion_key(router)) { + log_fn(severity, LD_DIR, + "Descriptor from router %s is missing an ntor curve25519 onion " + "key.", router_describe(router)); + if (msg) + *msg = "Missing ntor curve25519 onion key. Please upgrade!"; + return FP_REJECT; + } + + if (router->cache_info.signing_key_cert) { + /* This has an ed25519 identity key. */ + if (KEYPIN_MISMATCH == + keypin_check((const uint8_t*)router->cache_info.identity_digest, + router->cache_info.signing_key_cert->signing_key.pubkey)) { + log_fn(severity, LD_DIR, + "Descriptor from router %s has an Ed25519 key, " + "but the <rsa,ed25519> keys don't match what they were before.", + router_describe(router)); + if (key_pinning) { + if (msg) { + *msg = "Ed25519 identity key or RSA identity key has changed."; + } + return FP_REJECT; + } + } + } else { + /* No ed25519 key */ + if (KEYPIN_MISMATCH == keypin_check_lone_rsa( + (const uint8_t*)router->cache_info.identity_digest)) { + log_fn(severity, LD_DIR, + "Descriptor from router %s has no Ed25519 key, " + "when we previously knew an Ed25519 for it. Ignoring for now, " + "since Ed25519 keys are fairly new.", + router_describe(router)); +#ifdef DISABLE_DISABLING_ED25519 + if (key_pinning) { + if (msg) { + *msg = "Ed25519 identity key has disappeared."; + } + return FP_REJECT; + } +#endif /* defined(DISABLE_DISABLING_ED25519) */ + } + } + + return 0; +} + +/** Return true if there is no point in downloading the router described by + * <b>rs</b> because this directory would reject it. */ +int +dirserv_would_reject_router(const routerstatus_t *rs) +{ + uint32_t res; + + res = dirserv_get_status_impl(rs->identity_digest, rs->nickname, + rs->addr, rs->or_port, + NULL, NULL, LOG_DEBUG); + + return (res & FP_REJECT) != 0; +} + +/** Helper: As dirserv_router_get_status, but takes the router fingerprint + * (hex, no spaces), nickname, address (used for logging only), IP address, OR + * port and platform (logging only) as arguments. + * + * Log messages at 'severity'. (There's not much point in + * logging that we're rejecting servers we'll not download.) + */ +static uint32_t +dirserv_get_status_impl(const char *id_digest, const char *nickname, + uint32_t addr, uint16_t or_port, + const char *platform, const char **msg, int severity) +{ + uint32_t result = 0; + router_status_t *status_by_digest; + + if (!fingerprint_list) + fingerprint_list = authdir_config_new(); + + log_debug(LD_DIRSERV, "%d fingerprints, %d digests known.", + strmap_size(fingerprint_list->fp_by_name), + digestmap_size(fingerprint_list->status_by_digest)); + + if (platform) { + tor_version_t ver_tmp; + if (tor_version_parse_platform(platform, &ver_tmp, 1) < 0) { + if (msg) { + *msg = "Malformed platform string."; + } + return FP_REJECT; + } + } + + /* Versions before Tor 0.2.4.18-rc are too old to support, and are + * missing some important security fixes too. Disable them. */ + if (platform && !tor_version_as_new_as(platform,"0.2.4.18-rc")) { + if (msg) + *msg = "Tor version is insecure or unsupported. Please upgrade!"; + return FP_REJECT; + } + + /* Tor 0.2.9.x where x<5 suffers from bug #20499, where relays don't + * keep their consensus up to date so they make bad guards. + * The simple fix is to just drop them from the network. */ + if (platform && + tor_version_as_new_as(platform,"0.2.9.0-alpha") && + !tor_version_as_new_as(platform,"0.2.9.5-alpha")) { + if (msg) + *msg = "Tor version contains bug 20499. Please upgrade!"; + return FP_REJECT; + } + + status_by_digest = digestmap_get(fingerprint_list->status_by_digest, + id_digest); + if (status_by_digest) + result |= *status_by_digest; + + if (result & FP_REJECT) { + if (msg) + *msg = "Fingerprint is marked rejected -- if you think this is a " + "mistake please set a valid email address in ContactInfo and " + "send an email to bad-relays@lists.torproject.org mentioning " + "your fingerprint(s)?"; + return FP_REJECT; + } else if (result & FP_INVALID) { + if (msg) + *msg = "Fingerprint is marked invalid"; + } + + if (authdir_policy_badexit_address(addr, or_port)) { + log_fn(severity, LD_DIRSERV, + "Marking '%s' as bad exit because of address '%s'", + nickname, fmt_addr32(addr)); + result |= FP_BADEXIT; + } + + if (!authdir_policy_permits_address(addr, or_port)) { + log_fn(severity, LD_DIRSERV, "Rejecting '%s' because of address '%s'", + nickname, fmt_addr32(addr)); + if (msg) + *msg = "Suspicious relay address range -- if you think this is a " + "mistake please set a valid email address in ContactInfo and " + "send an email to bad-relays@lists.torproject.org mentioning " + "your address(es) and fingerprint(s)?"; + return FP_REJECT; + } + if (!authdir_policy_valid_address(addr, or_port)) { + log_fn(severity, LD_DIRSERV, + "Not marking '%s' valid because of address '%s'", + nickname, fmt_addr32(addr)); + result |= FP_INVALID; + } + + return result; +} + +/** Clear the current fingerprint list. */ +void +dirserv_free_fingerprint_list(void) +{ + if (!fingerprint_list) + return; + + strmap_free(fingerprint_list->fp_by_name, tor_free_); + digestmap_free(fingerprint_list->status_by_digest, tor_free_); + tor_free(fingerprint_list); +} + +/* + * Descriptor list + */ + +/** Return -1 if <b>ri</b> has a private or otherwise bad address, + * unless we're configured to not care. Return 0 if all ok. */ +static int +dirserv_router_has_valid_address(routerinfo_t *ri) +{ + tor_addr_t addr; + if (get_options()->DirAllowPrivateAddresses) + return 0; /* whatever it is, we're fine with it */ + tor_addr_from_ipv4h(&addr, ri->addr); + + if (tor_addr_is_internal(&addr, 0)) { + log_info(LD_DIRSERV, + "Router %s published internal IP address. Refusing.", + router_describe(ri)); + return -1; /* it's a private IP, we should reject it */ + } + return 0; +} + +/** Check whether we, as a directory server, want to accept <b>ri</b>. If so, + * set its is_valid,running fields and return 0. Otherwise, return -1. + * + * If the router is rejected, set *<b>msg</b> to an explanation of why. + * + * If <b>complain</b> then explain at log-level 'notice' why we refused + * a descriptor; else explain at log-level 'info'. + */ +int +authdir_wants_to_reject_router(routerinfo_t *ri, const char **msg, + int complain, int *valid_out) +{ + /* Okay. Now check whether the fingerprint is recognized. */ + time_t now; + int severity = (complain && ri->contact_info) ? LOG_NOTICE : LOG_INFO; + uint32_t status = dirserv_router_get_status(ri, msg, severity); + tor_assert(msg); + if (status & FP_REJECT) + return -1; /* msg is already set. */ + + /* Is there too much clock skew? */ + now = time(NULL); + if (ri->cache_info.published_on > now+ROUTER_ALLOW_SKEW) { + log_fn(severity, LD_DIRSERV, "Publication time for %s is too " + "far (%d minutes) in the future; possible clock skew. Not adding " + "(%s)", + router_describe(ri), + (int)((ri->cache_info.published_on-now)/60), + esc_router_info(ri)); + *msg = "Rejected: Your clock is set too far in the future, or your " + "timezone is not correct."; + return -1; + } + if (ri->cache_info.published_on < now-ROUTER_MAX_AGE_TO_PUBLISH) { + log_fn(severity, LD_DIRSERV, + "Publication time for %s is too far " + "(%d minutes) in the past. Not adding (%s)", + router_describe(ri), + (int)((now-ri->cache_info.published_on)/60), + esc_router_info(ri)); + *msg = "Rejected: Server is expired, or your clock is too far in the past," + " or your timezone is not correct."; + return -1; + } + if (dirserv_router_has_valid_address(ri) < 0) { + log_fn(severity, LD_DIRSERV, + "Router %s has invalid address. Not adding (%s).", + router_describe(ri), + esc_router_info(ri)); + *msg = "Rejected: Address is a private address."; + return -1; + } + + *valid_out = ! (status & FP_INVALID); + + return 0; +} + +/** Update the relevant flags of <b>node</b> based on our opinion as a + * directory authority in <b>authstatus</b>, as returned by + * dirserv_router_get_status or equivalent. */ +void +dirserv_set_node_flags_from_authoritative_status(node_t *node, + uint32_t authstatus) +{ + node->is_valid = (authstatus & FP_INVALID) ? 0 : 1; + node->is_bad_exit = (authstatus & FP_BADEXIT) ? 1 : 0; +} + +/** True iff <b>a</b> is more severe than <b>b</b>. */ +static int +WRA_MORE_SEVERE(was_router_added_t a, was_router_added_t b) +{ + return a < b; +} + +/** As for dirserv_add_descriptor(), but accepts multiple documents, and + * returns the most severe error that occurred for any one of them. */ +was_router_added_t +dirserv_add_multiple_descriptors(const char *desc, uint8_t purpose, + const char *source, + const char **msg) +{ + was_router_added_t r, r_tmp; + const char *msg_out; + smartlist_t *list; + const char *s; + int n_parsed = 0; + time_t now = time(NULL); + char annotation_buf[ROUTER_ANNOTATION_BUF_LEN]; + char time_buf[ISO_TIME_LEN+1]; + int general = purpose == ROUTER_PURPOSE_GENERAL; + tor_assert(msg); + + r=ROUTER_ADDED_SUCCESSFULLY; /*Least severe return value. */ + + format_iso_time(time_buf, now); + if (tor_snprintf(annotation_buf, sizeof(annotation_buf), + "@uploaded-at %s\n" + "@source %s\n" + "%s%s%s", time_buf, escaped(source), + !general ? "@purpose " : "", + !general ? router_purpose_to_string(purpose) : "", + !general ? "\n" : "")<0) { + *msg = "Couldn't format annotations"; + /* XXX Not cool: we return -1 below, but (was_router_added_t)-1 is + * ROUTER_BAD_EI, which isn't what's gone wrong here. :( */ + return -1; + } + + s = desc; + list = smartlist_new(); + if (!router_parse_list_from_string(&s, NULL, list, SAVED_NOWHERE, 0, 0, + annotation_buf, NULL)) { + SMARTLIST_FOREACH(list, routerinfo_t *, ri, { + msg_out = NULL; + tor_assert(ri->purpose == purpose); + r_tmp = dirserv_add_descriptor(ri, &msg_out, source); + if (WRA_MORE_SEVERE(r_tmp, r)) { + r = r_tmp; + *msg = msg_out; + } + }); + } + n_parsed += smartlist_len(list); + smartlist_clear(list); + + s = desc; + if (!router_parse_list_from_string(&s, NULL, list, SAVED_NOWHERE, 1, 0, + NULL, NULL)) { + SMARTLIST_FOREACH(list, extrainfo_t *, ei, { + msg_out = NULL; + + r_tmp = dirserv_add_extrainfo(ei, &msg_out); + if (WRA_MORE_SEVERE(r_tmp, r)) { + r = r_tmp; + *msg = msg_out; + } + }); + } + n_parsed += smartlist_len(list); + smartlist_free(list); + + if (! *msg) { + if (!n_parsed) { + *msg = "No descriptors found in your POST."; + if (WRA_WAS_ADDED(r)) + r = ROUTER_IS_ALREADY_KNOWN; + } else { + *msg = "(no message)"; + } + } + + return r; +} + +/** Examine the parsed server descriptor in <b>ri</b> and maybe insert it into + * the list of server descriptors. Set *<b>msg</b> to a message that should be + * passed back to the origin of this descriptor, or NULL if there is no such + * message. Use <b>source</b> to produce better log messages. + * + * If <b>ri</b> is not added to the list of server descriptors, free it. + * That means the caller must not access <b>ri</b> after this function + * returns, since it might have been freed. + * + * Return the status of the operation. + * + * This function is only called when fresh descriptors are posted, not when + * we re-load the cache. + */ +was_router_added_t +dirserv_add_descriptor(routerinfo_t *ri, const char **msg, const char *source) +{ + was_router_added_t r; + routerinfo_t *ri_old; + char *desc, *nickname; + const size_t desclen = ri->cache_info.signed_descriptor_len + + ri->cache_info.annotations_len; + const int key_pinning = get_options()->AuthDirPinKeys; + *msg = NULL; + + /* If it's too big, refuse it now. Otherwise we'll cache it all over the + * network and it'll clog everything up. */ + if (ri->cache_info.signed_descriptor_len > MAX_DESCRIPTOR_UPLOAD_SIZE) { + log_notice(LD_DIR, "Somebody attempted to publish a router descriptor '%s'" + " (source: %s) with size %d. Either this is an attack, or the " + "MAX_DESCRIPTOR_UPLOAD_SIZE (%d) constant is too low.", + ri->nickname, source, (int)ri->cache_info.signed_descriptor_len, + MAX_DESCRIPTOR_UPLOAD_SIZE); + *msg = "Router descriptor was too large."; + r = ROUTER_AUTHDIR_REJECTS; + goto fail; + } + + /* Check whether this descriptor is semantically identical to the last one + * from this server. (We do this here and not in router_add_to_routerlist + * because we want to be able to accept the newest router descriptor that + * another authority has, so we all converge on the same one.) */ + ri_old = router_get_mutable_by_digest(ri->cache_info.identity_digest); + if (ri_old && ri_old->cache_info.published_on < ri->cache_info.published_on + && router_differences_are_cosmetic(ri_old, ri) + && !router_is_me(ri)) { + log_info(LD_DIRSERV, + "Not replacing descriptor from %s (source: %s); " + "differences are cosmetic.", + router_describe(ri), source); + *msg = "Not replacing router descriptor; no information has changed since " + "the last one with this identity."; + r = ROUTER_IS_ALREADY_KNOWN; + goto fail; + } + + /* Do keypinning again ... this time, to add the pin if appropriate */ + int keypin_status; + if (ri->cache_info.signing_key_cert) { + ed25519_public_key_t *pkey = &ri->cache_info.signing_key_cert->signing_key; + /* First let's validate this pubkey before pinning it */ + if (ed25519_validate_pubkey(pkey) < 0) { + log_warn(LD_DIRSERV, "Received bad key from %s (source %s)", + router_describe(ri), source); + routerinfo_free(ri); + return ROUTER_AUTHDIR_REJECTS; + } + + /* Now pin it! */ + keypin_status = keypin_check_and_add( + (const uint8_t*)ri->cache_info.identity_digest, + pkey->pubkey, ! key_pinning); + } else { + keypin_status = keypin_check_lone_rsa( + (const uint8_t*)ri->cache_info.identity_digest); +#ifndef DISABLE_DISABLING_ED25519 + if (keypin_status == KEYPIN_MISMATCH) + keypin_status = KEYPIN_NOT_FOUND; +#endif + } + if (keypin_status == KEYPIN_MISMATCH && key_pinning) { + log_info(LD_DIRSERV, "Dropping descriptor from %s (source: %s) because " + "its key did not match an older RSA/Ed25519 keypair", + router_describe(ri), source); + *msg = "Looks like your keypair has changed? This authority previously " + "recorded a different RSA identity for this Ed25519 identity (or vice " + "versa.) Did you replace or copy some of your key files, but not " + "the others? You should either restore the expected keypair, or " + "delete your keys and restart Tor to start your relay with a new " + "identity."; + r = ROUTER_AUTHDIR_REJECTS; + goto fail; + } + + /* Make a copy of desc, since router_add_to_routerlist might free + * ri and its associated signed_descriptor_t. */ + desc = tor_strndup(ri->cache_info.signed_descriptor_body, desclen); + nickname = tor_strdup(ri->nickname); + + /* Tell if we're about to need to launch a test if we add this. */ + ri->needs_retest_if_added = + dirserv_should_launch_reachability_test(ri, ri_old); + + r = router_add_to_routerlist(ri, msg, 0, 0); + if (!WRA_WAS_ADDED(r)) { + /* unless the routerinfo was fine, just out-of-date */ + log_info(LD_DIRSERV, + "Did not add descriptor from '%s' (source: %s): %s.", + nickname, source, *msg ? *msg : "(no message)"); + } else { + smartlist_t *changed; + + changed = smartlist_new(); + smartlist_add(changed, ri); + routerlist_descriptors_added(changed, 0); + smartlist_free(changed); + if (!*msg) { + *msg = "Descriptor accepted"; + } + log_info(LD_DIRSERV, + "Added descriptor from '%s' (source: %s): %s.", + nickname, source, *msg); + } + tor_free(desc); + tor_free(nickname); + return r; + fail: + { + const char *desc_digest = ri->cache_info.signed_descriptor_digest; + download_status_t *dls = + router_get_dl_status_by_descriptor_digest(desc_digest); + if (dls) { + log_info(LD_GENERAL, "Marking router with descriptor %s as rejected, " + "and therefore undownloadable", + hex_str(desc_digest, DIGEST_LEN)); + download_status_mark_impossible(dls); + } + routerinfo_free(ri); + } + return r; +} + +/** As dirserv_add_descriptor, but for an extrainfo_t <b>ei</b>. */ +static was_router_added_t +dirserv_add_extrainfo(extrainfo_t *ei, const char **msg) +{ + routerinfo_t *ri; + int r; + was_router_added_t rv; + tor_assert(msg); + *msg = NULL; + + /* Needs to be mutable so routerinfo_incompatible_with_extrainfo + * can mess with some of the flags in ri->cache_info. */ + ri = router_get_mutable_by_digest(ei->cache_info.identity_digest); + if (!ri) { + *msg = "No corresponding router descriptor for extra-info descriptor"; + rv = ROUTER_BAD_EI; + goto fail; + } + + /* If it's too big, refuse it now. Otherwise we'll cache it all over the + * network and it'll clog everything up. */ + if (ei->cache_info.signed_descriptor_len > MAX_EXTRAINFO_UPLOAD_SIZE) { + log_notice(LD_DIR, "Somebody attempted to publish an extrainfo " + "with size %d. Either this is an attack, or the " + "MAX_EXTRAINFO_UPLOAD_SIZE (%d) constant is too low.", + (int)ei->cache_info.signed_descriptor_len, + MAX_EXTRAINFO_UPLOAD_SIZE); + *msg = "Extrainfo document was too large"; + rv = ROUTER_BAD_EI; + goto fail; + } + + if ((r = routerinfo_incompatible_with_extrainfo(ri->identity_pkey, ei, + &ri->cache_info, msg))) { + if (r<0) { + extrainfo_free(ei); + return ROUTER_IS_ALREADY_KNOWN; + } + rv = ROUTER_BAD_EI; + goto fail; + } + router_add_extrainfo_to_routerlist(ei, msg, 0, 0); + return ROUTER_ADDED_SUCCESSFULLY; + fail: + { + const char *d = ei->cache_info.signed_descriptor_digest; + signed_descriptor_t *sd = router_get_by_extrainfo_digest((char*)d); + if (sd) { + log_info(LD_GENERAL, "Marking extrainfo with descriptor %s as " + "rejected, and therefore undownloadable", + hex_str((char*)d,DIGEST_LEN)); + download_status_mark_impossible(&sd->ei_dl_status); + } + extrainfo_free(ei); + } + return rv; +} + +/** Remove all descriptors whose nicknames or fingerprints no longer + * are allowed by our fingerprint list. (Descriptors that used to be + * good can become bad when we reload the fingerprint list.) + */ +static void +directory_remove_invalid(void) +{ + routerlist_t *rl = router_get_routerlist(); + smartlist_t *nodes = smartlist_new(); + smartlist_add_all(nodes, nodelist_get_list()); + + SMARTLIST_FOREACH_BEGIN(nodes, node_t *, node) { + const char *msg = NULL; + const char *description; + routerinfo_t *ent = node->ri; + uint32_t r; + if (!ent) + continue; + r = dirserv_router_get_status(ent, &msg, LOG_INFO); + description = router_describe(ent); + if (r & FP_REJECT) { + log_info(LD_DIRSERV, "Router %s is now rejected: %s", + description, msg?msg:""); + routerlist_remove(rl, ent, 0, time(NULL)); + continue; + } + if (bool_neq((r & FP_INVALID), !node->is_valid)) { + log_info(LD_DIRSERV, "Router '%s' is now %svalid.", description, + (r&FP_INVALID) ? "in" : ""); + node->is_valid = (r&FP_INVALID)?0:1; + } + if (bool_neq((r & FP_BADEXIT), node->is_bad_exit)) { + log_info(LD_DIRSERV, "Router '%s' is now a %s exit", description, + (r & FP_BADEXIT) ? "bad" : "good"); + node->is_bad_exit = (r&FP_BADEXIT) ? 1: 0; + } + } SMARTLIST_FOREACH_END(node); + + routerlist_assert_ok(rl); + smartlist_free(nodes); +} + +/** + * Allocate and return a description of the status of the server <b>desc</b>, + * for use in a v1-style router-status line. The server is listed + * as running iff <b>is_live</b> is true. + * + * This is deprecated: it's only used for controllers that want outputs in + * the old format. + */ +static char * +list_single_server_status(const routerinfo_t *desc, int is_live) +{ + char buf[MAX_NICKNAME_LEN+HEX_DIGEST_LEN+4]; /* !nickname=$hexdigest\0 */ + char *cp; + const node_t *node; + + tor_assert(desc); + + cp = buf; + if (!is_live) { + *cp++ = '!'; + } + node = node_get_by_id(desc->cache_info.identity_digest); + if (node && node->is_valid) { + strlcpy(cp, desc->nickname, sizeof(buf)-(cp-buf)); + cp += strlen(cp); + *cp++ = '='; + } + *cp++ = '$'; + base16_encode(cp, HEX_DIGEST_LEN+1, desc->cache_info.identity_digest, + DIGEST_LEN); + return tor_strdup(buf); +} + +/* DOCDOC running_long_enough_to_decide_unreachable */ +int +running_long_enough_to_decide_unreachable(void) +{ + return time_of_process_start + + get_options()->TestingAuthDirTimeToLearnReachability < approx_time(); +} + +/** Each server needs to have passed a reachability test no more + * than this number of seconds ago, or it is listed as down in + * the directory. */ +#define REACHABLE_TIMEOUT (45*60) + +/** If we tested a router and found it reachable _at least this long_ after it + * declared itself hibernating, it is probably done hibernating and we just + * missed a descriptor from it. */ +#define HIBERNATION_PUBLICATION_SKEW (60*60) + +/** Treat a router as alive if + * - It's me, and I'm not hibernating. + * or - We've found it reachable recently. */ +void +dirserv_set_router_is_running(routerinfo_t *router, time_t now) +{ + /*XXXX This function is a mess. Separate out the part that calculates + whether it's reachable and the part that tells rephist that the router was + unreachable. + */ + int answer; + const or_options_t *options = get_options(); + node_t *node = node_get_mutable_by_id(router->cache_info.identity_digest); + tor_assert(node); + + if (router_is_me(router)) { + /* We always know if we are shutting down or hibernating ourselves. */ + answer = ! we_are_hibernating(); + } else if (router->is_hibernating && + (router->cache_info.published_on + + HIBERNATION_PUBLICATION_SKEW) > node->last_reachable) { + /* A hibernating router is down unless we (somehow) had contact with it + * since it declared itself to be hibernating. */ + answer = 0; + } else if (options->AssumeReachable) { + /* If AssumeReachable, everybody is up unless they say they are down! */ + answer = 1; + } else { + /* Otherwise, a router counts as up if we found all announced OR + ports reachable in the last REACHABLE_TIMEOUT seconds. + + XXX prop186 For now there's always one IPv4 and at most one + IPv6 OR port. + + If we're not on IPv6, don't consider reachability of potential + IPv6 OR port since that'd kill all dual stack relays until a + majority of the dir auths have IPv6 connectivity. */ + answer = (now < node->last_reachable + REACHABLE_TIMEOUT && + (options->AuthDirHasIPv6Connectivity != 1 || + tor_addr_is_null(&router->ipv6_addr) || + now < node->last_reachable6 + REACHABLE_TIMEOUT)); + } + + if (!answer && running_long_enough_to_decide_unreachable()) { + /* Not considered reachable. tell rephist about that. + + Because we launch a reachability test for each router every + REACHABILITY_TEST_CYCLE_PERIOD seconds, then the router has probably + been down since at least that time after we last successfully reached + it. + + XXX ipv6 + */ + time_t when = now; + if (node->last_reachable && + node->last_reachable + REACHABILITY_TEST_CYCLE_PERIOD < now) + when = node->last_reachable + REACHABILITY_TEST_CYCLE_PERIOD; + rep_hist_note_router_unreachable(router->cache_info.identity_digest, when); + } + + node->is_running = answer; +} + +/** Based on the routerinfo_ts in <b>routers</b>, allocate the + * contents of a v1-style router-status line, and store it in + * *<b>router_status_out</b>. Return 0 on success, -1 on failure. + * + * If for_controller is true, include the routers with very old descriptors. + * + * This is deprecated: it's only used for controllers that want outputs in + * the old format. + */ +int +list_server_status_v1(smartlist_t *routers, char **router_status_out, + int for_controller) +{ + /* List of entries in a router-status style: An optional !, then an optional + * equals-suffixed nickname, then a dollar-prefixed hexdigest. */ + smartlist_t *rs_entries; + time_t now = time(NULL); + time_t cutoff = now - ROUTER_MAX_AGE_TO_PUBLISH; + const or_options_t *options = get_options(); + /* We include v2 dir auths here too, because they need to answer + * controllers. Eventually we'll deprecate this whole function; + * see also networkstatus_getinfo_by_purpose(). */ + int authdir = authdir_mode_publishes_statuses(options); + tor_assert(router_status_out); + + rs_entries = smartlist_new(); + + SMARTLIST_FOREACH_BEGIN(routers, routerinfo_t *, ri) { + const node_t *node = node_get_by_id(ri->cache_info.identity_digest); + tor_assert(node); + if (authdir) { + /* Update router status in routerinfo_t. */ + dirserv_set_router_is_running(ri, now); + } + if (for_controller) { + char name_buf[MAX_VERBOSE_NICKNAME_LEN+2]; + char *cp = name_buf; + if (!node->is_running) + *cp++ = '!'; + router_get_verbose_nickname(cp, ri); + smartlist_add_strdup(rs_entries, name_buf); + } else if (ri->cache_info.published_on >= cutoff) { + smartlist_add(rs_entries, list_single_server_status(ri, + node->is_running)); + } + } SMARTLIST_FOREACH_END(ri); + + *router_status_out = smartlist_join_strings(rs_entries, " ", 0, NULL); + + SMARTLIST_FOREACH(rs_entries, char *, cp, tor_free(cp)); + smartlist_free(rs_entries); + + return 0; +} + +/** Return 1 if <b>ri</b>'s descriptor is "active" -- running, valid, + * not hibernating, having observed bw greater 0, and not too old. Else + * return 0. + */ +static int +router_is_active(const routerinfo_t *ri, const node_t *node, time_t now) +{ + time_t cutoff = now - ROUTER_MAX_AGE_TO_PUBLISH; + if (ri->cache_info.published_on < cutoff) { + return 0; + } + if (!node->is_running || !node->is_valid || ri->is_hibernating) { + return 0; + } + /* Only require bandwidth capacity in non-test networks, or + * if TestingTorNetwork, and TestingMinExitFlagThreshold is non-zero */ + if (!ri->bandwidthcapacity) { + if (get_options()->TestingTorNetwork) { + if (get_options()->TestingMinExitFlagThreshold > 0) { + /* If we're in a TestingTorNetwork, and TestingMinExitFlagThreshold is, + * then require bandwidthcapacity */ + return 0; + } + } else { + /* If we're not in a TestingTorNetwork, then require bandwidthcapacity */ + return 0; + } + } + return 1; +} + +/********************************************************************/ + +/* A set of functions to answer questions about how we'd like to behave + * as a directory mirror/client. */ + +/** Return 1 if we fetch our directory material directly from the + * authorities, rather than from a mirror. */ +int +directory_fetches_from_authorities(const or_options_t *options) +{ + const routerinfo_t *me; + uint32_t addr; + int refuseunknown; + if (options->FetchDirInfoEarly) + return 1; + if (options->BridgeRelay == 1) + return 0; + if (server_mode(options) && + router_pick_published_address(options, &addr, 1) < 0) + return 1; /* we don't know our IP address; ask an authority. */ + refuseunknown = ! router_my_exit_policy_is_reject_star() && + should_refuse_unknown_exits(options); + if (!dir_server_mode(options) && !refuseunknown) + return 0; + if (!server_mode(options) || !advertised_server_mode()) + return 0; + me = router_get_my_routerinfo(); + if (!me || (!me->supports_tunnelled_dir_requests && !refuseunknown)) + return 0; /* if we don't service directory requests, return 0 too */ + return 1; +} + +/** Return 1 if we should fetch new networkstatuses, descriptors, etc + * on the "mirror" schedule rather than the "client" schedule. + */ +int +directory_fetches_dir_info_early(const or_options_t *options) +{ + return directory_fetches_from_authorities(options); +} + +/** Return 1 if we should fetch new networkstatuses, descriptors, etc + * on a very passive schedule -- waiting long enough for ordinary clients + * to probably have the info we want. These would include bridge users, + * and maybe others in the future e.g. if a Tor client uses another Tor + * client as a directory guard. + */ +int +directory_fetches_dir_info_later(const or_options_t *options) +{ + return options->UseBridges != 0; +} + +/** Return true iff we want to serve certificates for authorities + * that we don't acknowledge as authorities ourself. + * Use we_want_to_fetch_unknown_auth_certs to check if we want to fetch + * and keep these certificates. + */ +int +directory_caches_unknown_auth_certs(const or_options_t *options) +{ + return dir_server_mode(options) || options->BridgeRelay; +} + +/** Return 1 if we want to fetch and serve descriptors, networkstatuses, etc + * Else return 0. + * Check options->DirPort_set and directory_permits_begindir_requests() + * to see if we are willing to serve these directory documents to others via + * the DirPort and begindir-over-ORPort, respectively. + * + * To check if we should fetch documents, use we_want_to_fetch_flavor and + * we_want_to_fetch_unknown_auth_certs instead of this function. + */ +int +directory_caches_dir_info(const or_options_t *options) +{ + if (options->BridgeRelay || dir_server_mode(options)) + return 1; + if (!server_mode(options) || !advertised_server_mode()) + return 0; + /* We need an up-to-date view of network info if we're going to try to + * block exit attempts from unknown relays. */ + return ! router_my_exit_policy_is_reject_star() && + should_refuse_unknown_exits(options); +} + +/** Return 1 if we want to allow remote clients to ask us directory + * requests via the "begin_dir" interface, which doesn't require + * having any separate port open. */ +int +directory_permits_begindir_requests(const or_options_t *options) +{ + return options->BridgeRelay != 0 || dir_server_mode(options); +} + +/** Return 1 if we have no need to fetch new descriptors. This generally + * happens when we're not a dir cache and we haven't built any circuits + * lately. + */ +int +directory_too_idle_to_fetch_descriptors(const or_options_t *options, + time_t now) +{ + return !directory_caches_dir_info(options) && + !options->FetchUselessDescriptors && + rep_hist_circbuilding_dormant(now); +} + +/********************************************************************/ + +/** Map from flavor name to the cached_dir_t for the v3 consensuses that we're + * currently serving. */ +static strmap_t *cached_consensuses = NULL; + +/** Decrement the reference count on <b>d</b>, and free it if it no longer has + * any references. */ +void +cached_dir_decref(cached_dir_t *d) +{ + if (!d || --d->refcnt > 0) + return; + clear_cached_dir(d); + tor_free(d); +} + +/** Allocate and return a new cached_dir_t containing the string <b>s</b>, + * published at <b>published</b>. */ +cached_dir_t * +new_cached_dir(char *s, time_t published) +{ + cached_dir_t *d = tor_malloc_zero(sizeof(cached_dir_t)); + d->refcnt = 1; + d->dir = s; + d->dir_len = strlen(s); + d->published = published; + if (tor_compress(&(d->dir_compressed), &(d->dir_compressed_len), + d->dir, d->dir_len, ZLIB_METHOD)) { + log_warn(LD_BUG, "Error compressing directory"); + } + return d; +} + +/** Remove all storage held in <b>d</b>, but do not free <b>d</b> itself. */ +static void +clear_cached_dir(cached_dir_t *d) +{ + tor_free(d->dir); + tor_free(d->dir_compressed); + memset(d, 0, sizeof(cached_dir_t)); +} + +/** Free all storage held by the cached_dir_t in <b>d</b>. */ +static void +free_cached_dir_(void *_d) +{ + cached_dir_t *d; + if (!_d) + return; + + d = (cached_dir_t *)_d; + cached_dir_decref(d); +} + +/** Replace the v3 consensus networkstatus of type <b>flavor_name</b> that + * we're serving with <b>networkstatus</b>, published at <b>published</b>. No + * validation is performed. */ +void +dirserv_set_cached_consensus_networkstatus(const char *networkstatus, + const char *flavor_name, + const common_digests_t *digests, + const uint8_t *sha3_as_signed, + time_t published) +{ + cached_dir_t *new_networkstatus; + cached_dir_t *old_networkstatus; + if (!cached_consensuses) + cached_consensuses = strmap_new(); + + new_networkstatus = new_cached_dir(tor_strdup(networkstatus), published); + memcpy(&new_networkstatus->digests, digests, sizeof(common_digests_t)); + memcpy(&new_networkstatus->digest_sha3_as_signed, sha3_as_signed, + DIGEST256_LEN); + old_networkstatus = strmap_set(cached_consensuses, flavor_name, + new_networkstatus); + if (old_networkstatus) + cached_dir_decref(old_networkstatus); +} + +/** Return the latest downloaded consensus networkstatus in encoded, signed, + * optionally compressed format, suitable for sending to clients. */ +cached_dir_t * +dirserv_get_consensus(const char *flavor_name) +{ + if (!cached_consensuses) + return NULL; + return strmap_get(cached_consensuses, flavor_name); +} + +/** If a router's uptime is at least this value, then it is always + * considered stable, regardless of the rest of the network. This + * way we resist attacks where an attacker doubles the size of the + * network using allegedly high-uptime nodes, displacing all the + * current guards. */ +#define UPTIME_TO_GUARANTEE_STABLE (3600*24*30) +/** If a router's MTBF is at least this value, then it is always stable. + * See above. (Corresponds to about 7 days for current decay rates.) */ +#define MTBF_TO_GUARANTEE_STABLE (60*60*24*5) +/** Similarly, every node with at least this much weighted time known can be + * considered familiar enough to be a guard. Corresponds to about 20 days for + * current decay rates. + */ +#define TIME_KNOWN_TO_GUARANTEE_FAMILIAR (8*24*60*60) +/** Similarly, every node with sufficient WFU is around enough to be a guard. + */ +#define WFU_TO_GUARANTEE_GUARD (0.98) + +/* Thresholds for server performance: set by + * dirserv_compute_performance_thresholds, and used by + * generate_v2_networkstatus */ + +/** Any router with an uptime of at least this value is stable. */ +static uint32_t stable_uptime = 0; /* start at a safe value */ +/** Any router with an mtbf of at least this value is stable. */ +static double stable_mtbf = 0.0; +/** If true, we have measured enough mtbf info to look at stable_mtbf rather + * than stable_uptime. */ +static int enough_mtbf_info = 0; +/** Any router with a weighted fractional uptime of at least this much might + * be good as a guard. */ +static double guard_wfu = 0.0; +/** Don't call a router a guard unless we've known about it for at least this + * many seconds. */ +static long guard_tk = 0; +/** Any router with a bandwidth at least this high is "Fast" */ +static uint32_t fast_bandwidth_kb = 0; +/** If exits can be guards, then all guards must have a bandwidth this + * high. */ +static uint32_t guard_bandwidth_including_exits_kb = 0; +/** If exits can't be guards, then all guards must have a bandwidth this + * high. */ +static uint32_t guard_bandwidth_excluding_exits_kb = 0; + +/** Helper: estimate the uptime of a router given its stated uptime and the + * amount of time since it last stated its stated uptime. */ +static inline long +real_uptime(const routerinfo_t *router, time_t now) +{ + if (now < router->cache_info.published_on) + return router->uptime; + else + return router->uptime + (now - router->cache_info.published_on); +} + +/** Return 1 if <b>router</b> is not suitable for these parameters, else 0. + * If <b>need_uptime</b> is non-zero, we require a minimum uptime. + * If <b>need_capacity</b> is non-zero, we require a minimum advertised + * bandwidth. + */ +static int +dirserv_thinks_router_is_unreliable(time_t now, + routerinfo_t *router, + int need_uptime, int need_capacity) +{ + if (need_uptime) { + if (!enough_mtbf_info) { + /* XXXX We should change the rule from + * "use uptime if we don't have mtbf data" to "don't advertise Stable on + * v3 if we don't have enough mtbf data." Or maybe not, since if we ever + * hit a point where we need to reset a lot of authorities at once, + * none of them would be in a position to declare Stable. + */ + long uptime = real_uptime(router, now); + if ((unsigned)uptime < stable_uptime && + (unsigned)uptime < UPTIME_TO_GUARANTEE_STABLE) + return 1; + } else { + double mtbf = + rep_hist_get_stability(router->cache_info.identity_digest, now); + if (mtbf < stable_mtbf && + mtbf < MTBF_TO_GUARANTEE_STABLE) + return 1; + } + } + if (need_capacity) { + uint32_t bw_kb = dirserv_get_credible_bandwidth_kb(router); + if (bw_kb < fast_bandwidth_kb) + return 1; + } + return 0; +} + +/** Return true iff <b>router</b> should be assigned the "HSDir" flag. + * + * Right now this means it advertises support for it, it has a high uptime, + * it's a directory cache, it has the Stable and Fast flags, and it's currently + * considered Running. + * + * This function needs to be called after router-\>is_running has + * been set. + */ +static int +dirserv_thinks_router_is_hs_dir(const routerinfo_t *router, + const node_t *node, time_t now) +{ + + long uptime; + + /* If we haven't been running for at least + * get_options()->MinUptimeHidServDirectoryV2 seconds, we can't + * have accurate data telling us a relay has been up for at least + * that long. We also want to allow a bit of slack: Reachability + * tests aren't instant. If we haven't been running long enough, + * trust the relay. */ + + if (get_uptime() > + get_options()->MinUptimeHidServDirectoryV2 * 1.1) + uptime = MIN(rep_hist_get_uptime(router->cache_info.identity_digest, now), + real_uptime(router, now)); + else + uptime = real_uptime(router, now); + + return (router->wants_to_be_hs_dir && + router->supports_tunnelled_dir_requests && + node->is_stable && node->is_fast && + uptime >= get_options()->MinUptimeHidServDirectoryV2 && + router_is_active(router, node, now)); +} + +/** Don't consider routers with less bandwidth than this when computing + * thresholds. */ +#define ABSOLUTE_MIN_BW_VALUE_TO_CONSIDER_KB 4 + +/** Helper for dirserv_compute_performance_thresholds(): Decide whether to + * include a router in our calculations, and return true iff we should; the + * require_mbw parameter is passed in by + * dirserv_compute_performance_thresholds() and controls whether we ever + * count routers with only advertised bandwidths */ +static int +router_counts_toward_thresholds(const node_t *node, time_t now, + const digestmap_t *omit_as_sybil, + int require_mbw) +{ + /* Have measured bw? */ + int have_mbw = + dirserv_has_measured_bw(node->identity); + uint64_t min_bw_kb = ABSOLUTE_MIN_BW_VALUE_TO_CONSIDER_KB; + const or_options_t *options = get_options(); + + if (options->TestingTorNetwork) { + min_bw_kb = (int64_t)options->TestingMinExitFlagThreshold / 1000; + } + + return node->ri && router_is_active(node->ri, node, now) && + !digestmap_get(omit_as_sybil, node->identity) && + (dirserv_get_credible_bandwidth_kb(node->ri) >= min_bw_kb) && + (have_mbw || !require_mbw); +} + +/** Look through the routerlist, and using the measured bandwidth cache count + * how many measured bandwidths we know. This is used to decide whether we + * ever trust advertised bandwidths for purposes of assigning flags. */ +void +dirserv_count_measured_bws(const smartlist_t *routers) +{ + /* Initialize this first */ + routers_with_measured_bw = 0; + + /* Iterate over the routerlist and count measured bandwidths */ + SMARTLIST_FOREACH_BEGIN(routers, const routerinfo_t *, ri) { + /* Check if we know a measured bandwidth for this one */ + if (dirserv_has_measured_bw(ri->cache_info.identity_digest)) { + ++routers_with_measured_bw; + } + } SMARTLIST_FOREACH_END(ri); +} + +/** Look through the routerlist, the Mean Time Between Failure history, and + * the Weighted Fractional Uptime history, and use them to set thresholds for + * the Stable, Fast, and Guard flags. Update the fields stable_uptime, + * stable_mtbf, enough_mtbf_info, guard_wfu, guard_tk, fast_bandwidth, + * guard_bandwidth_including_exits, and guard_bandwidth_excluding_exits. + * + * Also, set the is_exit flag of each router appropriately. */ +void +dirserv_compute_performance_thresholds(digestmap_t *omit_as_sybil) +{ + int n_active, n_active_nonexit, n_familiar; + uint32_t *uptimes, *bandwidths_kb, *bandwidths_excluding_exits_kb; + long *tks; + double *mtbfs, *wfus; + smartlist_t *nodelist; + time_t now = time(NULL); + const or_options_t *options = get_options(); + + /* Require mbw? */ + int require_mbw = + (routers_with_measured_bw > + options->MinMeasuredBWsForAuthToIgnoreAdvertised) ? 1 : 0; + + /* initialize these all here, in case there are no routers */ + stable_uptime = 0; + stable_mtbf = 0; + fast_bandwidth_kb = 0; + guard_bandwidth_including_exits_kb = 0; + guard_bandwidth_excluding_exits_kb = 0; + guard_tk = 0; + guard_wfu = 0; + + nodelist_assert_ok(); + nodelist = nodelist_get_list(); + + /* Initialize arrays that will hold values for each router. We'll + * sort them and use that to compute thresholds. */ + n_active = n_active_nonexit = 0; + /* Uptime for every active router. */ + uptimes = tor_calloc(smartlist_len(nodelist), sizeof(uint32_t)); + /* Bandwidth for every active router. */ + bandwidths_kb = tor_calloc(smartlist_len(nodelist), sizeof(uint32_t)); + /* Bandwidth for every active non-exit router. */ + bandwidths_excluding_exits_kb = + tor_calloc(smartlist_len(nodelist), sizeof(uint32_t)); + /* Weighted mean time between failure for each active router. */ + mtbfs = tor_calloc(smartlist_len(nodelist), sizeof(double)); + /* Time-known for each active router. */ + tks = tor_calloc(smartlist_len(nodelist), sizeof(long)); + /* Weighted fractional uptime for each active router. */ + wfus = tor_calloc(smartlist_len(nodelist), sizeof(double)); + + /* Now, fill in the arrays. */ + SMARTLIST_FOREACH_BEGIN(nodelist, node_t *, node) { + if (options->BridgeAuthoritativeDir && + node->ri && + node->ri->purpose != ROUTER_PURPOSE_BRIDGE) + continue; + + routerinfo_t *ri = node->ri; + if (ri) { + node->is_exit = (!router_exit_policy_rejects_all(ri) && + exit_policy_is_general_exit(ri->exit_policy)); + } + + if (router_counts_toward_thresholds(node, now, omit_as_sybil, + require_mbw)) { + const char *id = node->identity; + uint32_t bw_kb; + + /* resolve spurious clang shallow analysis null pointer errors */ + tor_assert(ri); + + uptimes[n_active] = (uint32_t)real_uptime(ri, now); + mtbfs[n_active] = rep_hist_get_stability(id, now); + tks [n_active] = rep_hist_get_weighted_time_known(id, now); + bandwidths_kb[n_active] = bw_kb = dirserv_get_credible_bandwidth_kb(ri); + if (!node->is_exit || node->is_bad_exit) { + bandwidths_excluding_exits_kb[n_active_nonexit] = bw_kb; + ++n_active_nonexit; + } + ++n_active; + } + } SMARTLIST_FOREACH_END(node); + + /* Now, compute thresholds. */ + if (n_active) { + /* The median uptime is stable. */ + stable_uptime = median_uint32(uptimes, n_active); + /* The median mtbf is stable, if we have enough mtbf info */ + stable_mtbf = median_double(mtbfs, n_active); + /* The 12.5th percentile bandwidth is fast. */ + fast_bandwidth_kb = find_nth_uint32(bandwidths_kb, n_active, n_active/8); + /* (Now bandwidths is sorted.) */ + if (fast_bandwidth_kb < RELAY_REQUIRED_MIN_BANDWIDTH/(2 * 1000)) + fast_bandwidth_kb = bandwidths_kb[n_active/4]; + guard_bandwidth_including_exits_kb = + third_quartile_uint32(bandwidths_kb, n_active); + guard_tk = find_nth_long(tks, n_active, n_active/8); + } + + if (guard_tk > TIME_KNOWN_TO_GUARANTEE_FAMILIAR) + guard_tk = TIME_KNOWN_TO_GUARANTEE_FAMILIAR; + + { + /* We can vote on a parameter for the minimum and maximum. */ +#define ABSOLUTE_MIN_VALUE_FOR_FAST_FLAG 4 + int32_t min_fast_kb, max_fast_kb, min_fast, max_fast; + min_fast = networkstatus_get_param(NULL, "FastFlagMinThreshold", + ABSOLUTE_MIN_VALUE_FOR_FAST_FLAG, + ABSOLUTE_MIN_VALUE_FOR_FAST_FLAG, + INT32_MAX); + if (options->TestingTorNetwork) { + min_fast = (int32_t)options->TestingMinFastFlagThreshold; + } + max_fast = networkstatus_get_param(NULL, "FastFlagMaxThreshold", + INT32_MAX, min_fast, INT32_MAX); + min_fast_kb = min_fast / 1000; + max_fast_kb = max_fast / 1000; + + if (fast_bandwidth_kb < (uint32_t)min_fast_kb) + fast_bandwidth_kb = min_fast_kb; + if (fast_bandwidth_kb > (uint32_t)max_fast_kb) + fast_bandwidth_kb = max_fast_kb; + } + /* Protect sufficiently fast nodes from being pushed out of the set + * of Fast nodes. */ + if (options->AuthDirFastGuarantee && + fast_bandwidth_kb > options->AuthDirFastGuarantee/1000) + fast_bandwidth_kb = (uint32_t)options->AuthDirFastGuarantee/1000; + + /* Now that we have a time-known that 7/8 routers are known longer than, + * fill wfus with the wfu of every such "familiar" router. */ + n_familiar = 0; + + SMARTLIST_FOREACH_BEGIN(nodelist, node_t *, node) { + if (router_counts_toward_thresholds(node, now, + omit_as_sybil, require_mbw)) { + routerinfo_t *ri = node->ri; + const char *id = ri->cache_info.identity_digest; + long tk = rep_hist_get_weighted_time_known(id, now); + if (tk < guard_tk) + continue; + wfus[n_familiar++] = rep_hist_get_weighted_fractional_uptime(id, now); + } + } SMARTLIST_FOREACH_END(node); + if (n_familiar) + guard_wfu = median_double(wfus, n_familiar); + if (guard_wfu > WFU_TO_GUARANTEE_GUARD) + guard_wfu = WFU_TO_GUARANTEE_GUARD; + + enough_mtbf_info = rep_hist_have_measured_enough_stability(); + + if (n_active_nonexit) { + guard_bandwidth_excluding_exits_kb = + find_nth_uint32(bandwidths_excluding_exits_kb, + n_active_nonexit, n_active_nonexit*3/4); + } + + log_info(LD_DIRSERV, + "Cutoffs: For Stable, %lu sec uptime, %lu sec MTBF. " + "For Fast: %lu kilobytes/sec. " + "For Guard: WFU %.03f%%, time-known %lu sec, " + "and bandwidth %lu or %lu kilobytes/sec. " + "We%s have enough stability data.", + (unsigned long)stable_uptime, + (unsigned long)stable_mtbf, + (unsigned long)fast_bandwidth_kb, + guard_wfu*100, + (unsigned long)guard_tk, + (unsigned long)guard_bandwidth_including_exits_kb, + (unsigned long)guard_bandwidth_excluding_exits_kb, + enough_mtbf_info ? "" : " don't"); + + tor_free(uptimes); + tor_free(mtbfs); + tor_free(bandwidths_kb); + tor_free(bandwidths_excluding_exits_kb); + tor_free(tks); + tor_free(wfus); +} + +/* Use dirserv_compute_performance_thresholds() to compute the thresholds + * for the status flags, specifically for bridges. + * + * This is only called by a Bridge Authority from + * networkstatus_getinfo_by_purpose(). + */ +void +dirserv_compute_bridge_flag_thresholds(void) +{ + digestmap_t *omit_as_sybil = digestmap_new(); + dirserv_compute_performance_thresholds(omit_as_sybil); + digestmap_free(omit_as_sybil, NULL); +} + +/** Measured bandwidth cache entry */ +typedef struct mbw_cache_entry_s { + long mbw_kb; + time_t as_of; +} mbw_cache_entry_t; + +/** Measured bandwidth cache - keys are identity_digests, values are + * mbw_cache_entry_t *. */ +static digestmap_t *mbw_cache = NULL; + +/** Store a measured bandwidth cache entry when reading the measured + * bandwidths file. */ +STATIC void +dirserv_cache_measured_bw(const measured_bw_line_t *parsed_line, + time_t as_of) +{ + mbw_cache_entry_t *e = NULL; + + tor_assert(parsed_line); + + /* Allocate a cache if we need */ + if (!mbw_cache) mbw_cache = digestmap_new(); + + /* Check if we have an existing entry */ + e = digestmap_get(mbw_cache, parsed_line->node_id); + /* If we do, we can re-use it */ + if (e) { + /* Check that we really are newer, and update */ + if (as_of > e->as_of) { + e->mbw_kb = parsed_line->bw_kb; + e->as_of = as_of; + } + } else { + /* We'll have to insert a new entry */ + e = tor_malloc(sizeof(*e)); + e->mbw_kb = parsed_line->bw_kb; + e->as_of = as_of; + digestmap_set(mbw_cache, parsed_line->node_id, e); + } +} + +/** Clear and free the measured bandwidth cache */ +void +dirserv_clear_measured_bw_cache(void) +{ + if (mbw_cache) { + /* Free the map and all entries */ + digestmap_free(mbw_cache, tor_free_); + mbw_cache = NULL; + } +} + +/** Scan the measured bandwidth cache and remove expired entries */ +STATIC void +dirserv_expire_measured_bw_cache(time_t now) +{ + + if (mbw_cache) { + /* Iterate through the cache and check each entry */ + DIGESTMAP_FOREACH_MODIFY(mbw_cache, k, mbw_cache_entry_t *, e) { + if (now > e->as_of + MAX_MEASUREMENT_AGE) { + tor_free(e); + MAP_DEL_CURRENT(k); + } + } DIGESTMAP_FOREACH_END; + + /* Check if we cleared the whole thing and free if so */ + if (digestmap_size(mbw_cache) == 0) { + digestmap_free(mbw_cache, tor_free_); + mbw_cache = 0; + } + } +} + +/** Query the cache by identity digest, return value indicates whether + * we found it. The bw_out and as_of_out pointers receive the cached + * bandwidth value and the time it was cached if not NULL. */ +int +dirserv_query_measured_bw_cache_kb(const char *node_id, long *bw_kb_out, + time_t *as_of_out) +{ + mbw_cache_entry_t *v = NULL; + int rv = 0; + + if (mbw_cache && node_id) { + v = digestmap_get(mbw_cache, node_id); + if (v) { + /* Found something */ + rv = 1; + if (bw_kb_out) *bw_kb_out = v->mbw_kb; + if (as_of_out) *as_of_out = v->as_of; + } + } + + return rv; +} + +/** Predicate wrapper for dirserv_query_measured_bw_cache() */ +int +dirserv_has_measured_bw(const char *node_id) +{ + return dirserv_query_measured_bw_cache_kb(node_id, NULL, NULL); +} + +/** Get the current size of the measured bandwidth cache */ +int +dirserv_get_measured_bw_cache_size(void) +{ + if (mbw_cache) return digestmap_size(mbw_cache); + else return 0; +} + +/** Return the bandwidth we believe for assigning flags; prefer measured + * over advertised, and if we have above a threshold quantity of measured + * bandwidths, we don't want to ever give flags to unmeasured routers, so + * return 0. */ +static uint32_t +dirserv_get_credible_bandwidth_kb(const routerinfo_t *ri) +{ + int threshold; + uint32_t bw_kb = 0; + long mbw_kb; + + tor_assert(ri); + /* Check if we have a measured bandwidth, and check the threshold if not */ + if (!(dirserv_query_measured_bw_cache_kb(ri->cache_info.identity_digest, + &mbw_kb, NULL))) { + threshold = get_options()->MinMeasuredBWsForAuthToIgnoreAdvertised; + if (routers_with_measured_bw > threshold) { + /* Return zero for unmeasured bandwidth if we are above threshold */ + bw_kb = 0; + } else { + /* Return an advertised bandwidth otherwise */ + bw_kb = router_get_advertised_bandwidth_capped(ri) / 1000; + } + } else { + /* We have the measured bandwidth in mbw */ + bw_kb = (uint32_t)mbw_kb; + } + + return bw_kb; +} + +/** Give a statement of our current performance thresholds for inclusion + * in a vote document. */ +char * +dirserv_get_flag_thresholds_line(void) +{ + char *result=NULL; + const int measured_threshold = + get_options()->MinMeasuredBWsForAuthToIgnoreAdvertised; + const int enough_measured_bw = routers_with_measured_bw > measured_threshold; + + tor_asprintf(&result, + "stable-uptime=%lu stable-mtbf=%lu " + "fast-speed=%lu " + "guard-wfu=%.03f%% guard-tk=%lu " + "guard-bw-inc-exits=%lu guard-bw-exc-exits=%lu " + "enough-mtbf=%d ignoring-advertised-bws=%d", + (unsigned long)stable_uptime, + (unsigned long)stable_mtbf, + (unsigned long)fast_bandwidth_kb*1000, + guard_wfu*100, + (unsigned long)guard_tk, + (unsigned long)guard_bandwidth_including_exits_kb*1000, + (unsigned long)guard_bandwidth_excluding_exits_kb*1000, + enough_mtbf_info ? 1 : 0, + enough_measured_bw ? 1 : 0); + + return result; +} + +/** Helper: write the router-status information in <b>rs</b> into a newly + * allocated character buffer. Use the same format as in network-status + * documents. If <b>version</b> is non-NULL, add a "v" line for the platform. + * + * consensus_method is the current consensus method when format is + * NS_V3_CONSENSUS or NS_V3_CONSENSUS_MICRODESC. It is ignored for other + * formats: pass ROUTERSTATUS_FORMAT_NO_CONSENSUS_METHOD. + * + * Return 0 on success, -1 on failure. + * + * The format argument has one of the following values: + * NS_V2 - Output an entry suitable for a V2 NS opinion document + * NS_V3_CONSENSUS - Output the first portion of a V3 NS consensus entry + * for consensus_method. + * NS_V3_CONSENSUS_MICRODESC - Output the first portion of a V3 microdesc + * consensus entry for consensus_method. + * NS_V3_VOTE - Output a complete V3 NS vote. If <b>vrs</b> is present, + * it contains additional information for the vote. + * NS_CONTROL_PORT - Output a NS document for the control port. + */ +char * +routerstatus_format_entry(const routerstatus_t *rs, const char *version, + const char *protocols, + routerstatus_format_type_t format, + int consensus_method, + const vote_routerstatus_t *vrs) +{ + char *summary; + char *result = NULL; + + char published[ISO_TIME_LEN+1]; + char identity64[BASE64_DIGEST_LEN+1]; + char digest64[BASE64_DIGEST_LEN+1]; + smartlist_t *chunks = smartlist_new(); + + format_iso_time(published, rs->published_on); + digest_to_base64(identity64, rs->identity_digest); + digest_to_base64(digest64, rs->descriptor_digest); + + smartlist_add_asprintf(chunks, + "r %s %s %s%s%s %s %d %d\n", + rs->nickname, + identity64, + (format==NS_V3_CONSENSUS_MICRODESC)?"":digest64, + (format==NS_V3_CONSENSUS_MICRODESC)?"":" ", + published, + fmt_addr32(rs->addr), + (int)rs->or_port, + (int)rs->dir_port); + + /* TODO: Maybe we want to pass in what we need to build the rest of + * this here, instead of in the caller. Then we could use the + * networkstatus_type_t values, with an additional control port value + * added -MP */ + + /* V3 microdesc consensuses only have "a" lines in later consensus methods + */ + if (format == NS_V3_CONSENSUS_MICRODESC && + consensus_method < MIN_METHOD_FOR_A_LINES_IN_MICRODESC_CONSENSUS) + goto done; + + /* Possible "a" line. At most one for now. */ + if (!tor_addr_is_null(&rs->ipv6_addr)) { + smartlist_add_asprintf(chunks, "a %s\n", + fmt_addrport(&rs->ipv6_addr, rs->ipv6_orport)); + } + + if (format == NS_V3_CONSENSUS || format == NS_V3_CONSENSUS_MICRODESC) + goto done; + + smartlist_add_asprintf(chunks, + "s%s%s%s%s%s%s%s%s%s%s\n", + /* These must stay in alphabetical order. */ + rs->is_authority?" Authority":"", + rs->is_bad_exit?" BadExit":"", + rs->is_exit?" Exit":"", + rs->is_fast?" Fast":"", + rs->is_possible_guard?" Guard":"", + rs->is_hs_dir?" HSDir":"", + rs->is_flagged_running?" Running":"", + rs->is_stable?" Stable":"", + rs->is_v2_dir?" V2Dir":"", + rs->is_valid?" Valid":""); + + /* length of "opt v \n" */ +#define V_LINE_OVERHEAD 7 + if (version && strlen(version) < MAX_V_LINE_LEN - V_LINE_OVERHEAD) { + smartlist_add_asprintf(chunks, "v %s\n", version); + } + if (protocols) { + smartlist_add_asprintf(chunks, "pr %s\n", protocols); + } + + if (format != NS_V2) { + const routerinfo_t* desc = router_get_by_id_digest(rs->identity_digest); + uint32_t bw_kb; + + if (format != NS_CONTROL_PORT) { + /* Blow up more or less nicely if we didn't get anything or not the + * thing we expected. + */ + if (!desc) { + char id[HEX_DIGEST_LEN+1]; + char dd[HEX_DIGEST_LEN+1]; + + base16_encode(id, sizeof(id), rs->identity_digest, DIGEST_LEN); + base16_encode(dd, sizeof(dd), rs->descriptor_digest, DIGEST_LEN); + log_warn(LD_BUG, "Cannot get any descriptor for %s " + "(wanted descriptor %s).", + id, dd); + goto err; + } + + /* This assert could fire for the control port, because + * it can request NS documents before all descriptors + * have been fetched. Therefore, we only do this test when + * format != NS_CONTROL_PORT. */ + if (tor_memneq(desc->cache_info.signed_descriptor_digest, + rs->descriptor_digest, + DIGEST_LEN)) { + char rl_d[HEX_DIGEST_LEN+1]; + char rs_d[HEX_DIGEST_LEN+1]; + char id[HEX_DIGEST_LEN+1]; + + base16_encode(rl_d, sizeof(rl_d), + desc->cache_info.signed_descriptor_digest, DIGEST_LEN); + base16_encode(rs_d, sizeof(rs_d), rs->descriptor_digest, DIGEST_LEN); + base16_encode(id, sizeof(id), rs->identity_digest, DIGEST_LEN); + log_err(LD_BUG, "descriptor digest in routerlist does not match " + "the one in routerstatus: %s vs %s " + "(router %s)\n", + rl_d, rs_d, id); + + tor_assert(tor_memeq(desc->cache_info.signed_descriptor_digest, + rs->descriptor_digest, + DIGEST_LEN)); + } + } + + if (format == NS_CONTROL_PORT && rs->has_bandwidth) { + bw_kb = rs->bandwidth_kb; + } else { + tor_assert(desc); + bw_kb = router_get_advertised_bandwidth_capped(desc) / 1000; + } + smartlist_add_asprintf(chunks, + "w Bandwidth=%d", bw_kb); + + if (format == NS_V3_VOTE && vrs && vrs->has_measured_bw) { + smartlist_add_asprintf(chunks, + " Measured=%d", vrs->measured_bw_kb); + } + /* Write down guardfraction information if we have it. */ + if (format == NS_V3_VOTE && vrs && vrs->status.has_guardfraction) { + smartlist_add_asprintf(chunks, + " GuardFraction=%d", + vrs->status.guardfraction_percentage); + } + + smartlist_add_strdup(chunks, "\n"); + + if (desc) { + summary = policy_summarize(desc->exit_policy, AF_INET); + smartlist_add_asprintf(chunks, "p %s\n", summary); + tor_free(summary); + } + + if (format == NS_V3_VOTE && vrs) { + if (tor_mem_is_zero((char*)vrs->ed25519_id, ED25519_PUBKEY_LEN)) { + smartlist_add_strdup(chunks, "id ed25519 none\n"); + } else { + char ed_b64[BASE64_DIGEST256_LEN+1]; + digest256_to_base64(ed_b64, (const char*)vrs->ed25519_id); + smartlist_add_asprintf(chunks, "id ed25519 %s\n", ed_b64); + } + } + } + + done: + result = smartlist_join_strings(chunks, "", 0, NULL); + + err: + SMARTLIST_FOREACH(chunks, char *, cp, tor_free(cp)); + smartlist_free(chunks); + + return result; +} + +/** Extract status information from <b>ri</b> and from other authority + * functions and store it in <b>rs</b>. <b>rs</b> is zeroed out before it is + * set. + * + * We assume that ri-\>is_running has already been set, e.g. by + * dirserv_set_router_is_running(ri, now); + */ +void +set_routerstatus_from_routerinfo(routerstatus_t *rs, + node_t *node, + routerinfo_t *ri, + time_t now, + int listbadexits) +{ + const or_options_t *options = get_options(); + uint32_t routerbw_kb = dirserv_get_credible_bandwidth_kb(ri); + + memset(rs, 0, sizeof(routerstatus_t)); + + rs->is_authority = + router_digest_is_trusted_dir(ri->cache_info.identity_digest); + + /* Already set by compute_performance_thresholds. */ + rs->is_exit = node->is_exit; + rs->is_stable = node->is_stable = + !dirserv_thinks_router_is_unreliable(now, ri, 1, 0); + rs->is_fast = node->is_fast = + !dirserv_thinks_router_is_unreliable(now, ri, 0, 1); + rs->is_flagged_running = node->is_running; /* computed above */ + + rs->is_valid = node->is_valid; + + if (node->is_fast && node->is_stable && + ri->supports_tunnelled_dir_requests && + ((options->AuthDirGuardBWGuarantee && + routerbw_kb >= options->AuthDirGuardBWGuarantee/1000) || + routerbw_kb >= MIN(guard_bandwidth_including_exits_kb, + guard_bandwidth_excluding_exits_kb))) { + long tk = rep_hist_get_weighted_time_known( + node->identity, now); + double wfu = rep_hist_get_weighted_fractional_uptime( + node->identity, now); + rs->is_possible_guard = (wfu >= guard_wfu && tk >= guard_tk) ? 1 : 0; + } else { + rs->is_possible_guard = 0; + } + + rs->is_bad_exit = listbadexits && node->is_bad_exit; + rs->is_hs_dir = node->is_hs_dir = + dirserv_thinks_router_is_hs_dir(ri, node, now); + + rs->is_named = rs->is_unnamed = 0; + + rs->published_on = ri->cache_info.published_on; + memcpy(rs->identity_digest, node->identity, DIGEST_LEN); + memcpy(rs->descriptor_digest, ri->cache_info.signed_descriptor_digest, + DIGEST_LEN); + rs->addr = ri->addr; + strlcpy(rs->nickname, ri->nickname, sizeof(rs->nickname)); + rs->or_port = ri->or_port; + rs->dir_port = ri->dir_port; + rs->is_v2_dir = ri->supports_tunnelled_dir_requests; + if (options->AuthDirHasIPv6Connectivity == 1 && + !tor_addr_is_null(&ri->ipv6_addr) && + node->last_reachable6 >= now - REACHABLE_TIMEOUT) { + /* We're configured as having IPv6 connectivity. There's an IPv6 + OR port and it's reachable so copy it to the routerstatus. */ + tor_addr_copy(&rs->ipv6_addr, &ri->ipv6_addr); + rs->ipv6_orport = ri->ipv6_orport; + } else { + tor_addr_make_null(&rs->ipv6_addr, AF_INET6); + rs->ipv6_orport = 0; + } + + if (options->TestingTorNetwork) { + dirserv_set_routerstatus_testing(rs); + } +} + +/** Use TestingDirAuthVoteExit, TestingDirAuthVoteGuard, and + * TestingDirAuthVoteHSDir to give out the Exit, Guard, and HSDir flags, + * respectively. But don't set the corresponding node flags. + * Should only be called if TestingTorNetwork is set. */ +STATIC void +dirserv_set_routerstatus_testing(routerstatus_t *rs) +{ + const or_options_t *options = get_options(); + + tor_assert(options->TestingTorNetwork); + + if (routerset_contains_routerstatus(options->TestingDirAuthVoteExit, + rs, 0)) { + rs->is_exit = 1; + } else if (options->TestingDirAuthVoteExitIsStrict) { + rs->is_exit = 0; + } + + if (routerset_contains_routerstatus(options->TestingDirAuthVoteGuard, + rs, 0)) { + rs->is_possible_guard = 1; + } else if (options->TestingDirAuthVoteGuardIsStrict) { + rs->is_possible_guard = 0; + } + + if (routerset_contains_routerstatus(options->TestingDirAuthVoteHSDir, + rs, 0)) { + rs->is_hs_dir = 1; + } else if (options->TestingDirAuthVoteHSDirIsStrict) { + rs->is_hs_dir = 0; + } +} + +/** The guardfraction of the guard with identity fingerprint <b>guard_id</b> + * is <b>guardfraction_percentage</b>. See if we have a vote routerstatus for + * this guard in <b>vote_routerstatuses</b>, and if we do, register the + * information to it. + * + * Return 1 if we applied the information and 0 if we couldn't find a + * matching guard. + * + * Requires that <b>vote_routerstatuses</b> be sorted. + */ +static int +guardfraction_line_apply(const char *guard_id, + uint32_t guardfraction_percentage, + smartlist_t *vote_routerstatuses) +{ + vote_routerstatus_t *vrs = NULL; + + tor_assert(vote_routerstatuses); + + vrs = smartlist_bsearch(vote_routerstatuses, guard_id, + compare_digest_to_vote_routerstatus_entry); + + if (!vrs) { + return 0; + } + + vrs->status.has_guardfraction = 1; + vrs->status.guardfraction_percentage = guardfraction_percentage; + + return 1; +} + +/* Given a guard line from a guardfraction file, parse it and register + * its information to <b>vote_routerstatuses</b>. + * + * Return: + * * 1 if the line was proper and its information got registered. + * * 0 if the line was proper but no currently active guard was found + * to register the guardfraction information to. + * * -1 if the line could not be parsed and set <b>err_msg</b> to a + newly allocated string containing the error message. + */ +static int +guardfraction_file_parse_guard_line(const char *guard_line, + smartlist_t *vote_routerstatuses, + char **err_msg) +{ + char guard_id[DIGEST_LEN]; + uint32_t guardfraction; + char *inputs_tmp = NULL; + int num_ok = 1; + + smartlist_t *sl = smartlist_new(); + int retval = -1; + + tor_assert(err_msg); + + /* guard_line should contain something like this: + <hex digest> <guardfraction> <appearances> */ + smartlist_split_string(sl, guard_line, " ", + SPLIT_SKIP_SPACE|SPLIT_IGNORE_BLANK, 3); + if (smartlist_len(sl) < 3) { + tor_asprintf(err_msg, "bad line '%s'", guard_line); + goto done; + } + + inputs_tmp = smartlist_get(sl, 0); + if (strlen(inputs_tmp) != HEX_DIGEST_LEN || + base16_decode(guard_id, DIGEST_LEN, + inputs_tmp, HEX_DIGEST_LEN) != DIGEST_LEN) { + tor_asprintf(err_msg, "bad digest '%s'", inputs_tmp); + goto done; + } + + inputs_tmp = smartlist_get(sl, 1); + /* Guardfraction is an integer in [0, 100]. */ + guardfraction = + (uint32_t) tor_parse_long(inputs_tmp, 10, 0, 100, &num_ok, NULL); + if (!num_ok) { + tor_asprintf(err_msg, "wrong percentage '%s'", inputs_tmp); + goto done; + } + + /* If routerstatuses were provided, apply this info to actual routers. */ + if (vote_routerstatuses) { + retval = guardfraction_line_apply(guard_id, guardfraction, + vote_routerstatuses); + } else { + retval = 0; /* If we got this far, line was correctly formatted. */ + } + + done: + + SMARTLIST_FOREACH(sl, char *, cp, tor_free(cp)); + smartlist_free(sl); + + return retval; +} + +/** Given an inputs line from a guardfraction file, parse it and + * register its information to <b>total_consensuses</b> and + * <b>total_days</b>. + * + * Return 0 if it parsed well. Return -1 if there was an error, and + * set <b>err_msg</b> to a newly allocated string containing the + * error message. + */ +static int +guardfraction_file_parse_inputs_line(const char *inputs_line, + int *total_consensuses, + int *total_days, + char **err_msg) +{ + int retval = -1; + char *inputs_tmp = NULL; + int num_ok = 1; + smartlist_t *sl = smartlist_new(); + + tor_assert(err_msg); + + /* Second line is inputs information: + * n-inputs <total_consensuses> <total_days>. */ + smartlist_split_string(sl, inputs_line, " ", + SPLIT_SKIP_SPACE|SPLIT_IGNORE_BLANK, 3); + if (smartlist_len(sl) < 2) { + tor_asprintf(err_msg, "incomplete line '%s'", inputs_line); + goto done; + } + + inputs_tmp = smartlist_get(sl, 0); + *total_consensuses = + (int) tor_parse_long(inputs_tmp, 10, 0, INT_MAX, &num_ok, NULL); + if (!num_ok) { + tor_asprintf(err_msg, "unparseable consensus '%s'", inputs_tmp); + goto done; + } + + inputs_tmp = smartlist_get(sl, 1); + *total_days = + (int) tor_parse_long(inputs_tmp, 10, 0, INT_MAX, &num_ok, NULL); + if (!num_ok) { + tor_asprintf(err_msg, "unparseable days '%s'", inputs_tmp); + goto done; + } + + retval = 0; + + done: + SMARTLIST_FOREACH(sl, char *, cp, tor_free(cp)); + smartlist_free(sl); + + return retval; +} + +/* Maximum age of a guardfraction file that we are willing to accept. */ +#define MAX_GUARDFRACTION_FILE_AGE (7*24*60*60) /* approx a week */ + +/** Static strings of guardfraction files. */ +#define GUARDFRACTION_DATE_STR "written-at" +#define GUARDFRACTION_INPUTS "n-inputs" +#define GUARDFRACTION_GUARD "guard-seen" +#define GUARDFRACTION_VERSION "guardfraction-file-version" + +/** Given a guardfraction file in a string, parse it and register the + * guardfraction information to the provided vote routerstatuses. + * + * This is the rough format of the guardfraction file: + * + * guardfraction-file-version 1 + * written-at <date and time> + * n-inputs <number of consesuses parsed> <number of days considered> + * + * guard-seen <fpr 1> <guardfraction percentage> <consensus appearances> + * guard-seen <fpr 2> <guardfraction percentage> <consensus appearances> + * guard-seen <fpr 3> <guardfraction percentage> <consensus appearances> + * guard-seen <fpr 4> <guardfraction percentage> <consensus appearances> + * guard-seen <fpr 5> <guardfraction percentage> <consensus appearances> + * ... + * + * Return -1 if the parsing failed and 0 if it went smoothly. Parsing + * should tolerate errors in all lines but the written-at header. + */ +STATIC int +dirserv_read_guardfraction_file_from_str(const char *guardfraction_file_str, + smartlist_t *vote_routerstatuses) +{ + config_line_t *front=NULL, *line; + int ret_tmp; + int retval = -1; + int current_line_n = 0; /* line counter for better log messages */ + + /* Guardfraction info to be parsed */ + int total_consensuses = 0; + int total_days = 0; + + /* Stats */ + int guards_read_n = 0; + int guards_applied_n = 0; + + /* Parse file and split it in lines */ + ret_tmp = config_get_lines(guardfraction_file_str, &front, 0); + if (ret_tmp < 0) { + log_warn(LD_CONFIG, "Error reading from guardfraction file"); + goto done; + } + + /* Sort routerstatuses (needed later when applying guardfraction info) */ + if (vote_routerstatuses) + smartlist_sort(vote_routerstatuses, compare_vote_routerstatus_entries); + + for (line = front; line; line=line->next) { + current_line_n++; + + if (!strcmp(line->key, GUARDFRACTION_VERSION)) { + int num_ok = 1; + unsigned int version; + + version = + (unsigned int) tor_parse_long(line->value, + 10, 0, INT_MAX, &num_ok, NULL); + + if (!num_ok || version != 1) { + log_warn(LD_GENERAL, "Got unknown guardfraction version %d.", version); + goto done; + } + } else if (!strcmp(line->key, GUARDFRACTION_DATE_STR)) { + time_t file_written_at; + time_t now = time(NULL); + + /* First line is 'written-at <date>' */ + if (parse_iso_time(line->value, &file_written_at) < 0) { + log_warn(LD_CONFIG, "Guardfraction:%d: Bad date '%s'. Ignoring", + current_line_n, line->value); + goto done; /* don't tolerate failure here. */ + } + if (file_written_at < now - MAX_GUARDFRACTION_FILE_AGE) { + log_warn(LD_CONFIG, "Guardfraction:%d: was written very long ago '%s'", + current_line_n, line->value); + goto done; /* don't tolerate failure here. */ + } + } else if (!strcmp(line->key, GUARDFRACTION_INPUTS)) { + char *err_msg = NULL; + + if (guardfraction_file_parse_inputs_line(line->value, + &total_consensuses, + &total_days, + &err_msg) < 0) { + log_warn(LD_CONFIG, "Guardfraction:%d: %s", + current_line_n, err_msg); + tor_free(err_msg); + continue; + } + + } else if (!strcmp(line->key, GUARDFRACTION_GUARD)) { + char *err_msg = NULL; + + ret_tmp = guardfraction_file_parse_guard_line(line->value, + vote_routerstatuses, + &err_msg); + if (ret_tmp < 0) { /* failed while parsing the guard line */ + log_warn(LD_CONFIG, "Guardfraction:%d: %s", + current_line_n, err_msg); + tor_free(err_msg); + continue; + } + + /* Successfully parsed guard line. Check if it was applied properly. */ + guards_read_n++; + if (ret_tmp > 0) { + guards_applied_n++; + } + } else { + log_warn(LD_CONFIG, "Unknown guardfraction line %d (%s %s)", + current_line_n, line->key, line->value); + } + } + + retval = 0; + + log_info(LD_CONFIG, + "Successfully parsed guardfraction file with %d consensuses over " + "%d days. Parsed %d nodes and applied %d of them%s.", + total_consensuses, total_days, guards_read_n, guards_applied_n, + vote_routerstatuses ? "" : " (no routerstatus provided)" ); + + done: + config_free_lines(front); + + if (retval < 0) { + return retval; + } else { + return guards_read_n; + } +} + +/** Read a guardfraction file at <b>fname</b> and load all its + * information to <b>vote_routerstatuses</b>. */ +int +dirserv_read_guardfraction_file(const char *fname, + smartlist_t *vote_routerstatuses) +{ + char *guardfraction_file_str; + + /* Read file to a string */ + guardfraction_file_str = read_file_to_str(fname, RFTS_IGNORE_MISSING, NULL); + if (!guardfraction_file_str) { + log_warn(LD_FS, "Cannot open guardfraction file '%s'. Failing.", fname); + return -1; + } + + return dirserv_read_guardfraction_file_from_str(guardfraction_file_str, + vote_routerstatuses); +} + +/** + * Helper function to parse out a line in the measured bandwidth file + * into a measured_bw_line_t output structure. + * + * If <b>line_is_after_headers</b> is true, then if we encounter an incomplete + * bw line, return -1 and warn, since we are after the headers and we should + * only parse bw lines. Return 0 otherwise. + * + * If <b>line_is_after_headers</b> is false then it means that we are not past + * the header block yet. If we encounter an incomplete bw line, return -1 but + * don't warn since there could be additional header lines coming. If we + * encounter a proper bw line, return 0 (and we got past the headers). + */ +STATIC int +measured_bw_line_parse(measured_bw_line_t *out, const char *orig_line, + int line_is_after_headers) +{ + char *line = tor_strdup(orig_line); + char *cp = line; + int got_bw = 0; + int got_node_id = 0; + char *strtok_state; /* lame sauce d'jour */ + + if (strlen(line) == 0) { + log_warn(LD_DIRSERV, "Empty line in bandwidth file"); + tor_free(line); + return -1; + } + + /* Remove end of line character, so that is not part of the token */ + if (line[strlen(line) - 1] == '\n') { + line[strlen(line) - 1] = '\0'; + } + + cp = tor_strtok_r(cp, " \t", &strtok_state); + + if (!cp) { + log_warn(LD_DIRSERV, "Invalid line in bandwidth file: %s", + escaped(orig_line)); + tor_free(line); + return -1; + } + + if (orig_line[strlen(orig_line)-1] != '\n') { + log_warn(LD_DIRSERV, "Incomplete line in bandwidth file: %s", + escaped(orig_line)); + tor_free(line); + return -1; + } + + do { + if (strcmpstart(cp, "bw=") == 0) { + int parse_ok = 0; + char *endptr; + if (got_bw) { + log_warn(LD_DIRSERV, "Double bw= in bandwidth file line: %s", + escaped(orig_line)); + tor_free(line); + return -1; + } + cp+=strlen("bw="); + + out->bw_kb = tor_parse_long(cp, 10, 0, LONG_MAX, &parse_ok, &endptr); + if (!parse_ok || (*endptr && !TOR_ISSPACE(*endptr))) { + log_warn(LD_DIRSERV, "Invalid bandwidth in bandwidth file line: %s", + escaped(orig_line)); + tor_free(line); + return -1; + } + got_bw=1; + } else if (strcmpstart(cp, "node_id=$") == 0) { + if (got_node_id) { + log_warn(LD_DIRSERV, "Double node_id= in bandwidth file line: %s", + escaped(orig_line)); + tor_free(line); + return -1; + } + cp+=strlen("node_id=$"); + + if (strlen(cp) != HEX_DIGEST_LEN || + base16_decode(out->node_id, DIGEST_LEN, + cp, HEX_DIGEST_LEN) != DIGEST_LEN) { + log_warn(LD_DIRSERV, "Invalid node_id in bandwidth file line: %s", + escaped(orig_line)); + tor_free(line); + return -1; + } + strlcpy(out->node_hex, cp, sizeof(out->node_hex)); + got_node_id=1; + } + } while ((cp = tor_strtok_r(NULL, " \t", &strtok_state))); + + if (got_bw && got_node_id) { + tor_free(line); + return 0; + } else if (line_is_after_headers == 0) { + /* There could be additional header lines, therefore do not give warnings + * but returns -1 since it's not a complete bw line. */ + log_debug(LD_DIRSERV, "Missing bw or node_id in bandwidth file line: %s", + escaped(orig_line)); + tor_free(line); + return -1; + } else { + log_warn(LD_DIRSERV, "Incomplete line in bandwidth file: %s", + escaped(orig_line)); + tor_free(line); + return -1; + } +} + +/** + * Helper function to apply a parsed measurement line to a list + * of bandwidth statuses. Returns true if a line is found, + * false otherwise. + */ +STATIC int +measured_bw_line_apply(measured_bw_line_t *parsed_line, + smartlist_t *routerstatuses) +{ + vote_routerstatus_t *rs = NULL; + if (!routerstatuses) + return 0; + + rs = smartlist_bsearch(routerstatuses, parsed_line->node_id, + compare_digest_to_vote_routerstatus_entry); + + if (rs) { + rs->has_measured_bw = 1; + rs->measured_bw_kb = (uint32_t)parsed_line->bw_kb; + } else { + log_info(LD_DIRSERV, "Node ID %s not found in routerstatus list", + parsed_line->node_hex); + } + + return rs != NULL; +} + +/** + * Read the measured bandwidth file and apply it to the list of + * vote_routerstatus_t. Returns -1 on error, 0 otherwise. + */ +int +dirserv_read_measured_bandwidths(const char *from_file, + smartlist_t *routerstatuses) +{ + char line[512]; + FILE *fp = tor_fopen_cloexec(from_file, "r"); + int applied_lines = 0; + time_t file_time, now; + int ok; + /* This flag will be 1 only when the first successful bw measurement line + * has been encountered, so that measured_bw_line_parse don't give warnings + * if there are additional header lines, as introduced in Bandwidth List spec + * version 1.1.0 */ + int line_is_after_headers = 0; + + /* Initialise line, so that we can't possibly run off the end. */ + memset(line, 0, sizeof(line)); + + if (fp == NULL) { + log_warn(LD_CONFIG, "Can't open bandwidth file at configured location: %s", + from_file); + return -1; + } + + /* If fgets fails, line is either unmodified, or indeterminate. */ + if (!fgets(line, sizeof(line), fp)) { + log_warn(LD_DIRSERV, "Empty bandwidth file"); + fclose(fp); + return -1; + } + + if (!strlen(line) || line[strlen(line)-1] != '\n') { + log_warn(LD_DIRSERV, "Long or truncated time in bandwidth file: %s", + escaped(line)); + fclose(fp); + return -1; + } + + line[strlen(line)-1] = '\0'; + file_time = (time_t)tor_parse_ulong(line, 10, 0, ULONG_MAX, &ok, NULL); + if (!ok) { + log_warn(LD_DIRSERV, "Non-integer time in bandwidth file: %s", + escaped(line)); + fclose(fp); + return -1; + } + + now = time(NULL); + if ((now - file_time) > MAX_MEASUREMENT_AGE) { + log_warn(LD_DIRSERV, "Bandwidth measurement file stale. Age: %u", + (unsigned)(time(NULL) - file_time)); + fclose(fp); + return -1; + } + + if (routerstatuses) + smartlist_sort(routerstatuses, compare_vote_routerstatus_entries); + + while (!feof(fp)) { + measured_bw_line_t parsed_line; + if (fgets(line, sizeof(line), fp) && strlen(line)) { + if (measured_bw_line_parse(&parsed_line, line, + line_is_after_headers) != -1) { + /* This condition will be true when the first complete valid bw line + * has been encountered, which means the end of the header lines. */ + line_is_after_headers = 1; + /* Also cache the line for dirserv_get_bandwidth_for_router() */ + dirserv_cache_measured_bw(&parsed_line, file_time); + if (measured_bw_line_apply(&parsed_line, routerstatuses) > 0) + applied_lines++; + } + } + } + + /* Now would be a nice time to clean the cache, too */ + dirserv_expire_measured_bw_cache(now); + + fclose(fp); + log_info(LD_DIRSERV, + "Bandwidth measurement file successfully read. " + "Applied %d measurements.", applied_lines); + return 0; +} + +/** As dirserv_get_routerdescs(), but instead of getting signed_descriptor_t + * pointers, adds copies of digests to fps_out, and doesn't use the + * /tor/server/ prefix. For a /d/ request, adds descriptor digests; for other + * requests, adds identity digests. + */ +int +dirserv_get_routerdesc_spool(smartlist_t *spool_out, + const char *key, + dir_spool_source_t source, + int conn_is_encrypted, + const char **msg_out) +{ + *msg_out = NULL; + + if (!strcmp(key, "all")) { + const routerlist_t *rl = router_get_routerlist(); + SMARTLIST_FOREACH_BEGIN(rl->routers, const routerinfo_t *, r) { + spooled_resource_t *spooled; + spooled = spooled_resource_new(source, + (const uint8_t *)r->cache_info.identity_digest, + DIGEST_LEN); + /* Treat "all" requests as if they were unencrypted */ + conn_is_encrypted = 0; + smartlist_add(spool_out, spooled); + } SMARTLIST_FOREACH_END(r); + } else if (!strcmp(key, "authority")) { + const routerinfo_t *ri = router_get_my_routerinfo(); + if (ri) + smartlist_add(spool_out, + spooled_resource_new(source, + (const uint8_t *)ri->cache_info.identity_digest, + DIGEST_LEN)); + } else if (!strcmpstart(key, "d/")) { + key += strlen("d/"); + dir_split_resource_into_spoolable(key, source, spool_out, NULL, + DSR_HEX|DSR_SORT_UNIQ); + } else if (!strcmpstart(key, "fp/")) { + key += strlen("fp/"); + dir_split_resource_into_spoolable(key, source, spool_out, NULL, + DSR_HEX|DSR_SORT_UNIQ); + } else { + *msg_out = "Not found"; + return -1; + } + + if (! conn_is_encrypted) { + /* Remove anything that insists it not be sent unencrypted. */ + SMARTLIST_FOREACH_BEGIN(spool_out, spooled_resource_t *, spooled) { + const uint8_t *body = NULL; + size_t bodylen = 0; + int r = spooled_resource_lookup_body(spooled, conn_is_encrypted, + &body, &bodylen, NULL); + if (r < 0 || body == NULL || bodylen == 0) { + SMARTLIST_DEL_CURRENT(spool_out, spooled); + spooled_resource_free(spooled); + } + } SMARTLIST_FOREACH_END(spooled); + } + + if (!smartlist_len(spool_out)) { + *msg_out = "Servers unavailable"; + return -1; + } + return 0; +} + +/** Add a signed_descriptor_t to <b>descs_out</b> for each router matching + * <b>key</b>. The key should be either + * - "/tor/server/authority" for our own routerinfo; + * - "/tor/server/all" for all the routerinfos we have, concatenated; + * - "/tor/server/fp/FP" where FP is a plus-separated sequence of + * hex identity digests; or + * - "/tor/server/d/D" where D is a plus-separated sequence + * of server descriptor digests, in hex. + * + * Return 0 if we found some matching descriptors, or -1 if we do not + * have any descriptors, no matching descriptors, or if we did not + * recognize the key (URL). + * If -1 is returned *<b>msg</b> will be set to an appropriate error + * message. + * + * XXXX rename this function. It's only called from the controller. + * XXXX in fact, refactor this function, merging as much as possible. + */ +int +dirserv_get_routerdescs(smartlist_t *descs_out, const char *key, + const char **msg) +{ + *msg = NULL; + + if (!strcmp(key, "/tor/server/all")) { + routerlist_t *rl = router_get_routerlist(); + SMARTLIST_FOREACH(rl->routers, routerinfo_t *, r, + smartlist_add(descs_out, &(r->cache_info))); + } else if (!strcmp(key, "/tor/server/authority")) { + const routerinfo_t *ri = router_get_my_routerinfo(); + if (ri) + smartlist_add(descs_out, (void*) &(ri->cache_info)); + } else if (!strcmpstart(key, "/tor/server/d/")) { + smartlist_t *digests = smartlist_new(); + key += strlen("/tor/server/d/"); + dir_split_resource_into_fingerprints(key, digests, NULL, + DSR_HEX|DSR_SORT_UNIQ); + SMARTLIST_FOREACH(digests, const char *, d, + { + signed_descriptor_t *sd = router_get_by_descriptor_digest(d); + if (sd) + smartlist_add(descs_out,sd); + }); + SMARTLIST_FOREACH(digests, char *, d, tor_free(d)); + smartlist_free(digests); + } else if (!strcmpstart(key, "/tor/server/fp/")) { + smartlist_t *digests = smartlist_new(); + time_t cutoff = time(NULL) - ROUTER_MAX_AGE_TO_PUBLISH; + key += strlen("/tor/server/fp/"); + dir_split_resource_into_fingerprints(key, digests, NULL, + DSR_HEX|DSR_SORT_UNIQ); + SMARTLIST_FOREACH_BEGIN(digests, const char *, d) { + if (router_digest_is_me(d)) { + /* calling router_get_my_routerinfo() to make sure it exists */ + const routerinfo_t *ri = router_get_my_routerinfo(); + if (ri) + smartlist_add(descs_out, (void*) &(ri->cache_info)); + } else { + const routerinfo_t *ri = router_get_by_id_digest(d); + /* Don't actually serve a descriptor that everyone will think is + * expired. This is an (ugly) workaround to keep buggy 0.1.1.10 + * Tors from downloading descriptors that they will throw away. + */ + if (ri && ri->cache_info.published_on > cutoff) + smartlist_add(descs_out, (void*) &(ri->cache_info)); + } + } SMARTLIST_FOREACH_END(d); + SMARTLIST_FOREACH(digests, char *, d, tor_free(d)); + smartlist_free(digests); + } else { + *msg = "Key not recognized"; + return -1; + } + + if (!smartlist_len(descs_out)) { + *msg = "Servers unavailable"; + return -1; + } + return 0; +} + +/** Called when a TLS handshake has completed successfully with a + * router listening at <b>address</b>:<b>or_port</b>, and has yielded + * a certificate with digest <b>digest_rcvd</b>. + * + * Inform the reachability checker that we could get to this relay. + */ +void +dirserv_orconn_tls_done(const tor_addr_t *addr, + uint16_t or_port, + const char *digest_rcvd, + const ed25519_public_key_t *ed_id_rcvd) +{ + node_t *node = NULL; + tor_addr_port_t orport; + routerinfo_t *ri = NULL; + time_t now = time(NULL); + tor_assert(addr); + tor_assert(digest_rcvd); + + node = node_get_mutable_by_id(digest_rcvd); + if (node == NULL || node->ri == NULL) + return; + + ri = node->ri; + + if (get_options()->AuthDirTestEd25519LinkKeys && + node_supports_ed25519_link_authentication(node, 1) && + ri->cache_info.signing_key_cert) { + /* We allow the node to have an ed25519 key if we haven't been told one in + * the routerinfo, but if we *HAVE* been told one in the routerinfo, it + * needs to match. */ + const ed25519_public_key_t *expected_id = + &ri->cache_info.signing_key_cert->signing_key; + tor_assert(!ed25519_public_key_is_zero(expected_id)); + if (! ed_id_rcvd || ! ed25519_pubkey_eq(ed_id_rcvd, expected_id)) { + log_info(LD_DIRSERV, "Router at %s:%d with RSA ID %s " + "did not present expected Ed25519 ID.", + fmt_addr(addr), or_port, hex_str(digest_rcvd, DIGEST_LEN)); + return; /* Don't mark it as reachable. */ + } + } + + tor_addr_copy(&orport.addr, addr); + orport.port = or_port; + if (router_has_orport(ri, &orport)) { + /* Found the right router. */ + if (!authdir_mode_bridge(get_options()) || + ri->purpose == ROUTER_PURPOSE_BRIDGE) { + char addrstr[TOR_ADDR_BUF_LEN]; + /* This is a bridge or we're not a bridge authority -- + mark it as reachable. */ + log_info(LD_DIRSERV, "Found router %s to be reachable at %s:%d. Yay.", + router_describe(ri), + tor_addr_to_str(addrstr, addr, sizeof(addrstr), 1), + ri->or_port); + if (tor_addr_family(addr) == AF_INET) { + rep_hist_note_router_reachable(digest_rcvd, addr, or_port, now); + node->last_reachable = now; + } else if (tor_addr_family(addr) == AF_INET6) { + /* No rephist for IPv6. */ + node->last_reachable6 = now; + } + } + } +} + +/** Called when we, as an authority, receive a new router descriptor either as + * an upload or a download. Used to decide whether to relaunch reachability + * testing for the server. */ +int +dirserv_should_launch_reachability_test(const routerinfo_t *ri, + const routerinfo_t *ri_old) +{ + if (!authdir_mode_handles_descs(get_options(), ri->purpose)) + return 0; + if (!ri_old) { + /* New router: Launch an immediate reachability test, so we will have an + * opinion soon in case we're generating a consensus soon */ + return 1; + } + if (ri_old->is_hibernating && !ri->is_hibernating) { + /* It just came out of hibernation; launch a reachability test */ + return 1; + } + if (! routers_have_same_or_addrs(ri, ri_old)) { + /* Address or port changed; launch a reachability test */ + return 1; + } + return 0; +} + +/** Helper function for dirserv_test_reachability(). Start a TLS + * connection to <b>router</b>, and annotate it with when we started + * the test. */ +void +dirserv_single_reachability_test(time_t now, routerinfo_t *router) +{ + const or_options_t *options = get_options(); + channel_t *chan = NULL; + const node_t *node = NULL; + tor_addr_t router_addr; + const ed25519_public_key_t *ed_id_key; + (void) now; + + tor_assert(router); + node = node_get_by_id(router->cache_info.identity_digest); + tor_assert(node); + + if (options->AuthDirTestEd25519LinkKeys && + node_supports_ed25519_link_authentication(node, 1) && + router->cache_info.signing_key_cert) { + ed_id_key = &router->cache_info.signing_key_cert->signing_key; + } else { + ed_id_key = NULL; + } + + /* IPv4. */ + log_debug(LD_OR,"Testing reachability of %s at %s:%u.", + router->nickname, fmt_addr32(router->addr), router->or_port); + tor_addr_from_ipv4h(&router_addr, router->addr); + chan = channel_tls_connect(&router_addr, router->or_port, + router->cache_info.identity_digest, + ed_id_key); + if (chan) command_setup_channel(chan); + + /* Possible IPv6. */ + if (get_options()->AuthDirHasIPv6Connectivity == 1 && + !tor_addr_is_null(&router->ipv6_addr)) { + char addrstr[TOR_ADDR_BUF_LEN]; + log_debug(LD_OR, "Testing reachability of %s at %s:%u.", + router->nickname, + tor_addr_to_str(addrstr, &router->ipv6_addr, sizeof(addrstr), 1), + router->ipv6_orport); + chan = channel_tls_connect(&router->ipv6_addr, router->ipv6_orport, + router->cache_info.identity_digest, + ed_id_key); + if (chan) command_setup_channel(chan); + } +} + +/** Auth dir server only: load balance such that we only + * try a few connections per call. + * + * The load balancing is such that if we get called once every ten + * seconds, we will cycle through all the tests in + * REACHABILITY_TEST_CYCLE_PERIOD seconds (a bit over 20 minutes). + */ +void +dirserv_test_reachability(time_t now) +{ + /* XXX decide what to do here; see or-talk thread "purging old router + * information, revocation." -NM + * We can't afford to mess with this in 0.1.2.x. The reason is that + * if we stop doing reachability tests on some of routerlist, then + * we'll for-sure think they're down, which may have unexpected + * effects in other parts of the code. It doesn't hurt much to do + * the testing, and directory authorities are easy to upgrade. Let's + * wait til 0.2.0. -RD */ +// time_t cutoff = now - ROUTER_MAX_AGE_TO_PUBLISH; + routerlist_t *rl = router_get_routerlist(); + static char ctr = 0; + int bridge_auth = authdir_mode_bridge(get_options()); + + SMARTLIST_FOREACH_BEGIN(rl->routers, routerinfo_t *, router) { + const char *id_digest = router->cache_info.identity_digest; + if (router_is_me(router)) + continue; + if (bridge_auth && router->purpose != ROUTER_PURPOSE_BRIDGE) + continue; /* bridge authorities only test reachability on bridges */ +// if (router->cache_info.published_on > cutoff) +// continue; + if ((((uint8_t)id_digest[0]) % REACHABILITY_MODULO_PER_TEST) == ctr) { + dirserv_single_reachability_test(now, router); + } + } SMARTLIST_FOREACH_END(router); + ctr = (ctr + 1) % REACHABILITY_MODULO_PER_TEST; /* increment ctr */ +} + +/* ========== + * Spooling code. + * ========== */ + +spooled_resource_t * +spooled_resource_new(dir_spool_source_t source, + const uint8_t *digest, size_t digestlen) +{ + spooled_resource_t *spooled = tor_malloc_zero(sizeof(spooled_resource_t)); + spooled->spool_source = source; + switch (source) { + case DIR_SPOOL_NETWORKSTATUS: + spooled->spool_eagerly = 0; + break; + case DIR_SPOOL_SERVER_BY_DIGEST: + case DIR_SPOOL_SERVER_BY_FP: + case DIR_SPOOL_EXTRA_BY_DIGEST: + case DIR_SPOOL_EXTRA_BY_FP: + case DIR_SPOOL_MICRODESC: + default: + spooled->spool_eagerly = 1; + break; + case DIR_SPOOL_CONSENSUS_CACHE_ENTRY: + tor_assert_unreached(); + break; + } + tor_assert(digestlen <= sizeof(spooled->digest)); + if (digest) + memcpy(spooled->digest, digest, digestlen); + return spooled; +} + +/** + * Create a new spooled_resource_t to spool the contents of <b>entry</b> to + * the user. Return the spooled object on success, or NULL on failure (which + * is probably caused by a failure to map the body of the item from disk). + * + * Adds a reference to entry's reference counter. + */ +spooled_resource_t * +spooled_resource_new_from_cache_entry(consensus_cache_entry_t *entry) +{ + spooled_resource_t *spooled = tor_malloc_zero(sizeof(spooled_resource_t)); + spooled->spool_source = DIR_SPOOL_CONSENSUS_CACHE_ENTRY; + spooled->spool_eagerly = 0; + consensus_cache_entry_incref(entry); + spooled->consensus_cache_entry = entry; + + int r = consensus_cache_entry_get_body(entry, + &spooled->cce_body, + &spooled->cce_len); + if (r == 0) { + return spooled; + } else { + spooled_resource_free(spooled); + return NULL; + } +} + +/** Release all storage held by <b>spooled</b>. */ +void +spooled_resource_free_(spooled_resource_t *spooled) +{ + if (spooled == NULL) + return; + + if (spooled->cached_dir_ref) { + cached_dir_decref(spooled->cached_dir_ref); + } + + if (spooled->consensus_cache_entry) { + consensus_cache_entry_decref(spooled->consensus_cache_entry); + } + + tor_free(spooled); +} + +/** When spooling data from a cached_dir_t object, we always add + * at least this much. */ +#define DIRSERV_CACHED_DIR_CHUNK_SIZE 8192 + +/** Return an compression ratio for compressing objects from <b>source</b>. + */ +static double +estimate_compression_ratio(dir_spool_source_t source) +{ + /* We should put in better estimates here, depending on the number of + objects and their type */ + (void) source; + return 0.5; +} + +/** Return an estimated number of bytes needed for transmitting the + * resource in <b>spooled</b> on <b>conn</b> + * + * As a convenient side-effect, set *<b>published_out</b> to the resource's + * publication time. + */ +static size_t +spooled_resource_estimate_size(const spooled_resource_t *spooled, + dir_connection_t *conn, + int compressed, + time_t *published_out) +{ + if (spooled->spool_eagerly) { + const uint8_t *body = NULL; + size_t bodylen = 0; + int r = spooled_resource_lookup_body(spooled, + connection_dir_is_encrypted(conn), + &body, &bodylen, + published_out); + if (r == -1 || body == NULL || bodylen == 0) + return 0; + if (compressed) { + double ratio = estimate_compression_ratio(spooled->spool_source); + bodylen = (size_t)(bodylen * ratio); + } + return bodylen; + } else { + cached_dir_t *cached; + if (spooled->consensus_cache_entry) { + if (published_out) { + consensus_cache_entry_get_valid_after( + spooled->consensus_cache_entry, published_out); + } + + return spooled->cce_len; + } + if (spooled->cached_dir_ref) { + cached = spooled->cached_dir_ref; + } else { + cached = spooled_resource_lookup_cached_dir(spooled, + published_out); + } + if (cached == NULL) { + return 0; + } + size_t result = compressed ? cached->dir_compressed_len : cached->dir_len; + return result; + } +} + +/** Return code for spooled_resource_flush_some */ +typedef enum { + SRFS_ERR = -1, + SRFS_MORE = 0, + SRFS_DONE +} spooled_resource_flush_status_t; + +/** Flush some or all of the bytes from <b>spooled</b> onto <b>conn</b>. + * Return SRFS_ERR on error, SRFS_MORE if there are more bytes to flush from + * this spooled resource, or SRFS_DONE if we are done flushing this spooled + * resource. + */ +static spooled_resource_flush_status_t +spooled_resource_flush_some(spooled_resource_t *spooled, + dir_connection_t *conn) +{ + if (spooled->spool_eagerly) { + /* Spool_eagerly resources are sent all-at-once. */ + const uint8_t *body = NULL; + size_t bodylen = 0; + int r = spooled_resource_lookup_body(spooled, + connection_dir_is_encrypted(conn), + &body, &bodylen, NULL); + if (r == -1 || body == NULL || bodylen == 0) { + /* Absent objects count as "done". */ + return SRFS_DONE; + } + if (conn->compress_state) { + connection_buf_add_compress((const char*)body, bodylen, conn, 0); + } else { + connection_buf_add((const char*)body, bodylen, TO_CONN(conn)); + } + return SRFS_DONE; + } else { + cached_dir_t *cached = spooled->cached_dir_ref; + consensus_cache_entry_t *cce = spooled->consensus_cache_entry; + if (cached == NULL && cce == NULL) { + /* The cached_dir_t hasn't been materialized yet. So let's look it up. */ + cached = spooled->cached_dir_ref = + spooled_resource_lookup_cached_dir(spooled, NULL); + if (!cached) { + /* Absent objects count as done. */ + return SRFS_DONE; + } + ++cached->refcnt; + tor_assert_nonfatal(spooled->cached_dir_offset == 0); + } + + if (BUG(!cached && !cce)) + return SRFS_DONE; + + int64_t total_len; + const char *ptr; + if (cached) { + total_len = cached->dir_compressed_len; + ptr = cached->dir_compressed; + } else { + total_len = spooled->cce_len; + ptr = (const char *)spooled->cce_body; + } + /* How many bytes left to flush? */ + int64_t remaining; + remaining = total_len - spooled->cached_dir_offset; + if (BUG(remaining < 0)) + return SRFS_ERR; + ssize_t bytes = (ssize_t) MIN(DIRSERV_CACHED_DIR_CHUNK_SIZE, remaining); + if (conn->compress_state) { + connection_buf_add_compress( + ptr + spooled->cached_dir_offset, + bytes, conn, 0); + } else { + connection_buf_add(ptr + spooled->cached_dir_offset, + bytes, TO_CONN(conn)); + } + spooled->cached_dir_offset += bytes; + if (spooled->cached_dir_offset >= (off_t)total_len) { + return SRFS_DONE; + } else { + return SRFS_MORE; + } + } +} + +/** Helper: find the cached_dir_t for a spooled_resource_t, for + * sending it to <b>conn</b>. Set *<b>published_out</b>, if provided, + * to the published time of the cached_dir_t. + * + * DOES NOT increase the reference count on the result. Callers must do that + * themselves if they mean to hang on to it. + */ +static cached_dir_t * +spooled_resource_lookup_cached_dir(const spooled_resource_t *spooled, + time_t *published_out) +{ + tor_assert(spooled->spool_eagerly == 0); + cached_dir_t *d = lookup_cached_dir_by_fp(spooled->digest); + if (d != NULL) { + if (published_out) + *published_out = d->published; + } + return d; +} + +/** Helper: Look up the body for an eagerly-served spooled_resource. If + * <b>conn_is_encrypted</b> is false, don't look up any resource that + * shouldn't be sent over an unencrypted connection. On success, set + * <b>body_out</b>, <b>size_out</b>, and <b>published_out</b> to refer + * to the resource's body, size, and publication date, and return 0. + * On failure return -1. */ +static int +spooled_resource_lookup_body(const spooled_resource_t *spooled, + int conn_is_encrypted, + const uint8_t **body_out, + size_t *size_out, + time_t *published_out) +{ + tor_assert(spooled->spool_eagerly == 1); + + const signed_descriptor_t *sd = NULL; + + switch (spooled->spool_source) { + case DIR_SPOOL_EXTRA_BY_FP: { + sd = get_signed_descriptor_by_fp(spooled->digest, 1); + break; + } + case DIR_SPOOL_SERVER_BY_FP: { + sd = get_signed_descriptor_by_fp(spooled->digest, 0); + break; + } + case DIR_SPOOL_SERVER_BY_DIGEST: { + sd = router_get_by_descriptor_digest((const char *)spooled->digest); + break; + } + case DIR_SPOOL_EXTRA_BY_DIGEST: { + sd = extrainfo_get_by_descriptor_digest((const char *)spooled->digest); + break; + } + case DIR_SPOOL_MICRODESC: { + microdesc_t *md = microdesc_cache_lookup_by_digest256( + get_microdesc_cache(), + (const char *)spooled->digest); + if (! md || ! md->body) { + return -1; + } + *body_out = (const uint8_t *)md->body; + *size_out = md->bodylen; + if (published_out) + *published_out = TIME_MAX; + return 0; + } + case DIR_SPOOL_NETWORKSTATUS: + case DIR_SPOOL_CONSENSUS_CACHE_ENTRY: + default: + /* LCOV_EXCL_START */ + tor_assert_nonfatal_unreached(); + return -1; + /* LCOV_EXCL_STOP */ + } + + /* If we get here, then we tried to set "sd" to a signed_descriptor_t. */ + + if (sd == NULL) { + return -1; + } + if (sd->send_unencrypted == 0 && ! conn_is_encrypted) { + /* we did this check once before (so we could have an accurate size + * estimate and maybe send a 404 if somebody asked for only bridges on + * a connection), but we need to do it again in case a previously + * unknown bridge descriptor has shown up between then and now. */ + return -1; + } + *body_out = (const uint8_t *) signed_descriptor_get_body(sd); + *size_out = sd->signed_descriptor_len; + if (published_out) + *published_out = sd->published_on; + return 0; +} + +/** Given a fingerprint <b>fp</b> which is either set if we're looking for a + * v2 status, or zeroes if we're looking for a v3 status, or a NUL-padded + * flavor name if we want a flavored v3 status, return a pointer to the + * appropriate cached dir object, or NULL if there isn't one available. */ +static cached_dir_t * +lookup_cached_dir_by_fp(const uint8_t *fp) +{ + cached_dir_t *d = NULL; + if (tor_digest_is_zero((const char *)fp) && cached_consensuses) { + d = strmap_get(cached_consensuses, "ns"); + } else if (memchr(fp, '\0', DIGEST_LEN) && cached_consensuses) { + /* this here interface is a nasty hack: we're shoving a flavor into + * a digest field. */ + d = strmap_get(cached_consensuses, (const char *)fp); + } + return d; +} + +/** Try to guess the number of bytes that will be needed to send the + * spooled objects for <b>conn</b>'s outgoing spool. In the process, + * remove every element of the spool that refers to an absent object, or + * which was published earlier than <b>cutoff</b>. Set *<b>size_out</b> + * to the number of bytes, and *<b>n_expired_out</b> to the number of + * objects removed for being too old. */ +void +dirserv_spool_remove_missing_and_guess_size(dir_connection_t *conn, + time_t cutoff, + int compression, + size_t *size_out, + int *n_expired_out) +{ + if (BUG(!conn)) + return; + + smartlist_t *spool = conn->spool; + if (!spool) { + if (size_out) + *size_out = 0; + if (n_expired_out) + *n_expired_out = 0; + return; + } + int n_expired = 0; + uint64_t total = 0; + SMARTLIST_FOREACH_BEGIN(spool, spooled_resource_t *, spooled) { + time_t published = TIME_MAX; + size_t sz = spooled_resource_estimate_size(spooled, conn, + compression, &published); + if (published < cutoff) { + ++n_expired; + SMARTLIST_DEL_CURRENT(spool, spooled); + spooled_resource_free(spooled); + } else if (sz == 0) { + SMARTLIST_DEL_CURRENT(spool, spooled); + spooled_resource_free(spooled); + } else { + total += sz; + } + } SMARTLIST_FOREACH_END(spooled); + + if (size_out) { + *size_out = (total > SIZE_MAX) ? SIZE_MAX : (size_t)total; + } + if (n_expired_out) + *n_expired_out = n_expired; +} + +/** Helper: used to sort a connection's spool. */ +static int +dirserv_spool_sort_comparison_(const void **a_, const void **b_) +{ + const spooled_resource_t *a = *a_; + const spooled_resource_t *b = *b_; + return fast_memcmp(a->digest, b->digest, sizeof(a->digest)); +} + +/** Sort all the entries in <b>conn</b> by digest. */ +void +dirserv_spool_sort(dir_connection_t *conn) +{ + if (conn->spool == NULL) + return; + smartlist_sort(conn->spool, dirserv_spool_sort_comparison_); +} + +/** Return the cache-info for identity fingerprint <b>fp</b>, or + * its extra-info document if <b>extrainfo</b> is true. Return + * NULL if not found or if the descriptor is older than + * <b>publish_cutoff</b>. */ +static const signed_descriptor_t * +get_signed_descriptor_by_fp(const uint8_t *fp, int extrainfo) +{ + if (router_digest_is_me((const char *)fp)) { + if (extrainfo) + return &(router_get_my_extrainfo()->cache_info); + else + return &(router_get_my_routerinfo()->cache_info); + } else { + const routerinfo_t *ri = router_get_by_id_digest((const char *)fp); + if (ri) { + if (extrainfo) + return extrainfo_get_by_descriptor_digest( + ri->cache_info.extra_info_digest); + else + return &ri->cache_info; + } + } + return NULL; +} + +/** When we're spooling data onto our outbuf, add more whenever we dip + * below this threshold. */ +#define DIRSERV_BUFFER_MIN 16384 + +/** + * Called whenever we have flushed some directory data in state + * SERVER_WRITING, or whenever we want to fill the buffer with initial + * directory data (so that subsequent writes will occur, and trigger this + * function again.) + * + * Return 0 on success, and -1 on failure. + */ +int +connection_dirserv_flushed_some(dir_connection_t *conn) +{ + tor_assert(conn->base_.state == DIR_CONN_STATE_SERVER_WRITING); + if (conn->spool == NULL) + return 0; + + while (connection_get_outbuf_len(TO_CONN(conn)) < DIRSERV_BUFFER_MIN && + smartlist_len(conn->spool)) { + spooled_resource_t *spooled = + smartlist_get(conn->spool, smartlist_len(conn->spool)-1); + spooled_resource_flush_status_t status; + status = spooled_resource_flush_some(spooled, conn); + if (status == SRFS_ERR) { + return -1; + } else if (status == SRFS_MORE) { + return 0; + } + tor_assert(status == SRFS_DONE); + + /* If we're here, we're done flushing this resource. */ + tor_assert(smartlist_pop_last(conn->spool) == spooled); + spooled_resource_free(spooled); + } + + if (smartlist_len(conn->spool) > 0) { + /* We're still spooling something. */ + return 0; + } + + /* If we get here, we're done. */ + smartlist_free(conn->spool); + conn->spool = NULL; + if (conn->compress_state) { + /* Flush the compression state: there could be more bytes pending in there, + * and we don't want to omit bytes. */ + connection_buf_add_compress("", 0, conn, 1); + tor_compress_free(conn->compress_state); + conn->compress_state = NULL; + } + return 0; +} + +/** Remove every element from <b>conn</b>'s outgoing spool, and delete + * the spool. */ +void +dir_conn_clear_spool(dir_connection_t *conn) +{ + if (!conn || ! conn->spool) + return; + SMARTLIST_FOREACH(conn->spool, spooled_resource_t *, s, + spooled_resource_free(s)); + smartlist_free(conn->spool); + conn->spool = NULL; +} + +/** Return true iff <b>line</b> is a valid RecommendedPackages line. + */ +/* + The grammar is: + + "package" SP PACKAGENAME SP VERSION SP URL SP DIGESTS NL + + PACKAGENAME = NONSPACE + VERSION = NONSPACE + URL = NONSPACE + DIGESTS = DIGEST | DIGESTS SP DIGEST + DIGEST = DIGESTTYPE "=" DIGESTVAL + + NONSPACE = one or more non-space printing characters + + DIGESTVAL = DIGESTTYPE = one or more non-=, non-" " characters. + + SP = " " + NL = a newline + + */ +int +validate_recommended_package_line(const char *line) +{ + const char *cp = line; + +#define WORD() \ + do { \ + if (*cp == ' ') \ + return 0; \ + cp = strchr(cp, ' '); \ + if (!cp) \ + return 0; \ + } while (0) + + WORD(); /* skip packagename */ + ++cp; + WORD(); /* skip version */ + ++cp; + WORD(); /* Skip URL */ + ++cp; + + /* Skip digesttype=digestval + */ + int n_entries = 0; + while (1) { + const char *start_of_word = cp; + const char *end_of_word = strchr(cp, ' '); + if (! end_of_word) + end_of_word = cp + strlen(cp); + + if (start_of_word == end_of_word) + return 0; + + const char *eq = memchr(start_of_word, '=', end_of_word - start_of_word); + + if (!eq) + return 0; + if (eq == start_of_word) + return 0; + if (eq == end_of_word - 1) + return 0; + if (memchr(eq+1, '=', end_of_word - (eq+1))) + return 0; + + ++n_entries; + if (0 == *end_of_word) + break; + + cp = end_of_word + 1; + } + + /* If we reach this point, we have at least 1 entry. */ + tor_assert(n_entries > 0); + return 1; +} + +/** Release all storage used by the directory server. */ +void +dirserv_free_all(void) +{ + dirserv_free_fingerprint_list(); + + strmap_free(cached_consensuses, free_cached_dir_); + cached_consensuses = NULL; + + dirserv_clear_measured_bw_cache(); +} diff --git a/src/feature/dircache/dirserv.h b/src/feature/dircache/dirserv.h new file mode 100644 index 0000000000..3b4a646094 --- /dev/null +++ b/src/feature/dircache/dirserv.h @@ -0,0 +1,239 @@ +/* Copyright (c) 2001 Matej Pfajfar. + * Copyright (c) 2001-2004, Roger Dingledine. + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file dirserv.h + * \brief Header file for dirserv.c. + **/ + +#ifndef TOR_DIRSERV_H +#define TOR_DIRSERV_H + +struct ed25519_public_key_t; + +#include "lib/testsupport/testsupport.h" + +/** An enum to describe what format we're generating a routerstatus line in. + */ +typedef enum { + /** For use in a v2 opinion */ + NS_V2, + /** For use in a consensus networkstatus document (ns flavor) */ + NS_V3_CONSENSUS, + /** For use in a vote networkstatus document */ + NS_V3_VOTE, + /** For passing to the controlport in response to a GETINFO request */ + NS_CONTROL_PORT, + /** For use in a consensus networkstatus document (microdesc flavor) */ + NS_V3_CONSENSUS_MICRODESC +} routerstatus_format_type_t; + +/** What fraction (1 over this number) of the relay ID space do we + * (as a directory authority) launch connections to at each reachability + * test? */ +#define REACHABILITY_MODULO_PER_TEST 128 + +/** How often (in seconds) do we launch reachability tests? */ +#define REACHABILITY_TEST_INTERVAL 10 + +/** How many seconds apart are the reachability tests for a given relay? */ +#define REACHABILITY_TEST_CYCLE_PERIOD \ + (REACHABILITY_TEST_INTERVAL*REACHABILITY_MODULO_PER_TEST) + +/** Maximum length of an exit policy summary. */ +#define MAX_EXITPOLICY_SUMMARY_LEN 1000 + +/** Maximum allowable length of a version line in a networkstatus. */ +#define MAX_V_LINE_LEN 128 + +/** Ways to convert a spoolable_resource_t to a bunch of bytes. */ +typedef enum dir_spool_source_t { + DIR_SPOOL_SERVER_BY_DIGEST=1, DIR_SPOOL_SERVER_BY_FP, + DIR_SPOOL_EXTRA_BY_DIGEST, DIR_SPOOL_EXTRA_BY_FP, + DIR_SPOOL_MICRODESC, + DIR_SPOOL_NETWORKSTATUS, + DIR_SPOOL_CONSENSUS_CACHE_ENTRY, +} dir_spool_source_t; +#define dir_spool_source_bitfield_t ENUM_BF(dir_spool_source_t) + +/** Object to remember the identity of an object that we are spooling, + * or about to spool, in response to a directory request. + * + * (Why do we spool? Because some directory responses are very large, + * and we don't want to just shove the complete answer into the output + * buffer: that would take a ridiculous amount of RAM.) + * + * If the spooled resource is relatively small (like microdescriptors, + * descriptors, etc), we look them up by ID as needed, and add the whole + * thing onto the output buffer at once. If the spooled reseource is + * big (like networkstatus documents), we reference-count it, and add it + * a few K at a time. + */ +typedef struct spooled_resource_t { + /** + * If true, we add the entire object to the outbuf. If false, + * we spool the object a few K at a time. + */ + unsigned spool_eagerly : 1; + /** + * Tells us what kind of object to get, and how to look it up. + */ + dir_spool_source_bitfield_t spool_source : 7; + /** + * Tells us the specific object to spool. + */ + uint8_t digest[DIGEST256_LEN]; + /** + * A large object that we're spooling. Holds a reference count. Only + * used when spool_eagerly is false. + */ + struct cached_dir_t *cached_dir_ref; + /** + * A different kind of large object that we might be spooling. Also + * reference-counted. Also only used when spool_eagerly is false. + */ + struct consensus_cache_entry_t *consensus_cache_entry; + const uint8_t *cce_body; + size_t cce_len; + /** + * The current offset into cached_dir or cce_body. Only used when + * spool_eagerly is false */ + off_t cached_dir_offset; +} spooled_resource_t; + +#ifdef DIRSERV_PRIVATE +typedef struct measured_bw_line_t { + char node_id[DIGEST_LEN]; + char node_hex[MAX_HEX_NICKNAME_LEN+1]; + long int bw_kb; +} measured_bw_line_t; +#endif /* defined(DIRSERV_PRIVATE) */ + +int connection_dirserv_flushed_some(dir_connection_t *conn); + +int dirserv_add_own_fingerprint(crypto_pk_t *pk); +int dirserv_load_fingerprint_file(void); +void dirserv_free_fingerprint_list(void); +enum was_router_added_t dirserv_add_multiple_descriptors( + const char *desc, uint8_t purpose, + const char *source, + const char **msg); +enum was_router_added_t dirserv_add_descriptor(routerinfo_t *ri, + const char **msg, + const char *source); +void dirserv_set_router_is_running(routerinfo_t *router, time_t now); +int list_server_status_v1(smartlist_t *routers, char **router_status_out, + int for_controller); +char *dirserv_get_flag_thresholds_line(void); +void dirserv_compute_bridge_flag_thresholds(void); + +int directory_fetches_from_authorities(const or_options_t *options); +int directory_fetches_dir_info_early(const or_options_t *options); +int directory_fetches_dir_info_later(const or_options_t *options); +int directory_caches_unknown_auth_certs(const or_options_t *options); +int directory_caches_dir_info(const or_options_t *options); +int directory_permits_begindir_requests(const or_options_t *options); +int directory_too_idle_to_fetch_descriptors(const or_options_t *options, + time_t now); + +cached_dir_t *dirserv_get_consensus(const char *flavor_name); +void dirserv_set_cached_consensus_networkstatus(const char *consensus, + const char *flavor_name, + const common_digests_t *digests, + const uint8_t *sha3_as_signed, + time_t published); +void dirserv_clear_old_networkstatuses(time_t cutoff); +int dirserv_get_routerdesc_spool(smartlist_t *spools_out, const char *key, + dir_spool_source_t source, + int conn_is_encrypted, + const char **msg_out); +int dirserv_get_routerdescs(smartlist_t *descs_out, const char *key, + const char **msg); +void dirserv_orconn_tls_done(const tor_addr_t *addr, + uint16_t or_port, + const char *digest_rcvd, + const struct ed25519_public_key_t *ed_id_rcvd); +int dirserv_should_launch_reachability_test(const routerinfo_t *ri, + const routerinfo_t *ri_old); +void dirserv_single_reachability_test(time_t now, routerinfo_t *router); +void dirserv_test_reachability(time_t now); +int authdir_wants_to_reject_router(routerinfo_t *ri, const char **msg, + int complain, + int *valid_out); +uint32_t dirserv_router_get_status(const routerinfo_t *router, + const char **msg, + int severity); +void dirserv_set_node_flags_from_authoritative_status(node_t *node, + uint32_t authstatus); + +int dirserv_would_reject_router(const routerstatus_t *rs); +char *routerstatus_format_entry( + const routerstatus_t *rs, + const char *version, + const char *protocols, + routerstatus_format_type_t format, + int consensus_method, + const vote_routerstatus_t *vrs); +void dirserv_free_all(void); +void cached_dir_decref(cached_dir_t *d); +cached_dir_t *new_cached_dir(char *s, time_t published); + +int validate_recommended_package_line(const char *line); +int dirserv_query_measured_bw_cache_kb(const char *node_id, + long *bw_out, + time_t *as_of_out); +void dirserv_clear_measured_bw_cache(void); +int dirserv_has_measured_bw(const char *node_id); +int dirserv_get_measured_bw_cache_size(void); +void dirserv_count_measured_bws(const smartlist_t *routers); +int running_long_enough_to_decide_unreachable(void); +void dirserv_compute_performance_thresholds(digestmap_t *omit_as_sybil); + +#ifdef DIRSERV_PRIVATE + +STATIC void dirserv_set_routerstatus_testing(routerstatus_t *rs); + +/* Put the MAX_MEASUREMENT_AGE #define here so unit tests can see it */ +#define MAX_MEASUREMENT_AGE (3*24*60*60) /* 3 days */ + +STATIC int measured_bw_line_parse(measured_bw_line_t *out, const char *line, + int line_is_after_headers); + +STATIC int measured_bw_line_apply(measured_bw_line_t *parsed_line, + smartlist_t *routerstatuses); + +STATIC void dirserv_cache_measured_bw(const measured_bw_line_t *parsed_line, + time_t as_of); +STATIC void dirserv_expire_measured_bw_cache(time_t now); + +STATIC int +dirserv_read_guardfraction_file_from_str(const char *guardfraction_file_str, + smartlist_t *vote_routerstatuses); +#endif /* defined(DIRSERV_PRIVATE) */ + +int dirserv_read_measured_bandwidths(const char *from_file, + smartlist_t *routerstatuses); + +int dirserv_read_guardfraction_file(const char *fname, + smartlist_t *vote_routerstatuses); + +spooled_resource_t *spooled_resource_new(dir_spool_source_t source, + const uint8_t *digest, + size_t digestlen); +spooled_resource_t *spooled_resource_new_from_cache_entry( + struct consensus_cache_entry_t *entry); +void spooled_resource_free_(spooled_resource_t *spooled); +#define spooled_resource_free(sp) \ + FREE_AND_NULL(spooled_resource_t, spooled_resource_free_, (sp)) +void dirserv_spool_remove_missing_and_guess_size(dir_connection_t *conn, + time_t cutoff, + int compression, + size_t *size_out, + int *n_expired_out); +void dirserv_spool_sort(dir_connection_t *conn); +void dir_conn_clear_spool(dir_connection_t *conn); + +#endif /* !defined(TOR_DIRSERV_H) */ |