diff options
Diffstat (limited to 'src/feature/dircache')
-rw-r--r-- | src/feature/dircache/cached_dir_st.h | 25 | ||||
-rw-r--r-- | src/feature/dircache/conscache.c | 627 | ||||
-rw-r--r-- | src/feature/dircache/conscache.h | 66 | ||||
-rw-r--r-- | src/feature/dircache/consdiffmgr.c | 1945 | ||||
-rw-r--r-- | src/feature/dircache/consdiffmgr.h | 75 | ||||
-rw-r--r-- | src/feature/dircache/directory.c | 5966 | ||||
-rw-r--r-- | src/feature/dircache/directory.h | 347 | ||||
-rw-r--r-- | src/feature/dircache/dirserv.c | 3598 | ||||
-rw-r--r-- | src/feature/dircache/dirserv.h | 239 |
9 files changed, 12888 insertions, 0 deletions
diff --git a/src/feature/dircache/cached_dir_st.h b/src/feature/dircache/cached_dir_st.h new file mode 100644 index 0000000000..38ae86d975 --- /dev/null +++ b/src/feature/dircache/cached_dir_st.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2001 Matej Pfajfar. + * Copyright (c) 2001-2004, Roger Dingledine. + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#ifndef CACHED_DIR_ST_H +#define CACHED_DIR_ST_H + +/** A cached_dir_t represents a cacheable directory object, along with its + * compressed form. */ +struct cached_dir_t { + char *dir; /**< Contents of this object, NUL-terminated. */ + char *dir_compressed; /**< Compressed contents of this object. */ + size_t dir_len; /**< Length of <b>dir</b> (not counting its NUL). */ + size_t dir_compressed_len; /**< Length of <b>dir_compressed</b>. */ + time_t published; /**< When was this object published. */ + common_digests_t digests; /**< Digests of this object (networkstatus only) */ + /** Sha3 digest (also ns only) */ + uint8_t digest_sha3_as_signed[DIGEST256_LEN]; + int refcnt; /**< Reference count for this cached_dir_t. */ +}; + +#endif + diff --git a/src/feature/dircache/conscache.c b/src/feature/dircache/conscache.c new file mode 100644 index 0000000000..e9bf58a180 --- /dev/null +++ b/src/feature/dircache/conscache.c @@ -0,0 +1,627 @@ +/* Copyright (c) 2017-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#include "core/or/or.h" + +#include "app/config/config.h" +#include "feature/dircache/conscache.h" +#include "lib/crypt_ops/crypto_util.h" +#include "lib/fs/storagedir.h" +#include "lib/encoding/confline.h" + +#define CCE_MAGIC 0x17162253 + +#ifdef _WIN32 +/* On Windows, unlink won't work on a file if the file is actively mmap()ed. + * That forces us to be less aggressive about unlinking files, and causes other + * changes throughout our logic. + */ +#define MUST_UNMAP_TO_UNLINK +#endif /* defined(_WIN32) */ + +/** + * A consensus_cache_entry_t is a reference-counted handle to an + * item in a consensus_cache_t. It can be mmapped into RAM, or not, + * depending whether it's currently in use. + */ +struct consensus_cache_entry_t { + uint32_t magic; /**< Must be set to CCE_MAGIC */ + HANDLE_ENTRY(consensus_cache_entry, consensus_cache_entry_t); + int32_t refcnt; /**< Reference count. */ + unsigned can_remove : 1; /**< If true, we want to delete this file. */ + /** If true, we intend to unmap this file as soon as we're done with it. */ + unsigned release_aggressively : 1; + + /** Filename for this object within the storage_dir_t */ + char *fname; + /** Labels associated with this object. Immutable once the object + * is created. */ + config_line_t *labels; + /** Pointer to the cache that includes this entry (if any). */ + consensus_cache_t *in_cache; + + /** Since what time has this object been mapped into RAM, but with the cache + * being the only having a reference to it? */ + time_t unused_since; + /** mmaped contents of the underlying file. May be NULL */ + tor_mmap_t *map; + /** Length of the body within <b>map</b>. */ + size_t bodylen; + /** Pointer to the body within <b>map</b>. */ + const uint8_t *body; +}; + +/** + * A consensus_cache_t holds a directory full of labeled items. + */ +struct consensus_cache_t { + /** Underling storage_dir_t to handle persistence */ + storage_dir_t *dir; + /** List of all the entries in the directory. */ + smartlist_t *entries; + + /** The maximum number of entries that we'd like to allow in this cache. + * This is the same as the storagedir limit when MUST_UNMAP_TO_UNLINK is + * not defined. */ + unsigned max_entries; +}; + +static void consensus_cache_clear(consensus_cache_t *cache); +static void consensus_cache_rescan(consensus_cache_t *); +static void consensus_cache_entry_map(consensus_cache_t *, + consensus_cache_entry_t *); +static void consensus_cache_entry_unmap(consensus_cache_entry_t *ent); + +/** + * Helper: Open a consensus cache in subdirectory <b>subdir</b> of the + * data directory, to hold up to <b>max_entries</b> of data. + */ +consensus_cache_t * +consensus_cache_open(const char *subdir, int max_entries) +{ + int storagedir_max_entries; + consensus_cache_t *cache = tor_malloc_zero(sizeof(consensus_cache_t)); + char *directory = get_cachedir_fname(subdir); + cache->max_entries = max_entries; + +#ifdef MUST_UNMAP_TO_UNLINK + /* If we can't unlink the files that we're still using, then we need to + * tell the storagedir backend to allow far more files than this consensus + * cache actually wants, so that it can hold files which, from this cache's + * perspective, have become useless. + */ +#define VERY_LARGE_STORAGEDIR_LIMIT (1000*1000) + storagedir_max_entries = VERY_LARGE_STORAGEDIR_LIMIT; +#else /* !(defined(MUST_UNMAP_TO_UNLINK)) */ + /* Otherwise, we can just tell the storagedir to use the same limits + * as this cache. */ + storagedir_max_entries = max_entries; +#endif /* defined(MUST_UNMAP_TO_UNLINK) */ + + cache->dir = storage_dir_new(directory, storagedir_max_entries); + tor_free(directory); + if (!cache->dir) { + tor_free(cache); + return NULL; + } + + consensus_cache_rescan(cache); + return cache; +} + +/** Return true if it's okay to put more entries in this cache than + * its official file limit. + * + * (We need this method on Windows, where we can't unlink files that are still + * in use, and therefore might need to temporarily exceed the file limit until + * the no-longer-wanted files are deletable.) + */ +int +consensus_cache_may_overallocate(consensus_cache_t *cache) +{ + (void) cache; +#ifdef MUST_UNMAP_TO_UNLINK + return 1; +#else + return 0; +#endif +} + +/** + * Tell the sandbox (if any) configured by <b>cfg</b> to allow the + * operations that <b>cache</b> will need. + */ +int +consensus_cache_register_with_sandbox(consensus_cache_t *cache, + struct sandbox_cfg_elem **cfg) +{ +#ifdef MUST_UNMAP_TO_UNLINK + /* Our Linux sandbox doesn't support huge file lists like the one that would + * be generated by using VERY_LARGE_STORAGEDIR_LIMIT above in + * consensus_cache_open(). Since the Linux sandbox is the only one we have + * right now, we just assert that we never reach this point when we've had + * to use VERY_LARGE_STORAGEDIR_LIMIT. + * + * If at some point in the future we have a different sandbox mechanism that + * can handle huge file lists, we can remove this assertion or make it + * conditional. + */ + tor_assert_nonfatal_unreached(); +#endif /* defined(MUST_UNMAP_TO_UNLINK) */ + return storage_dir_register_with_sandbox(cache->dir, cfg); +} + +/** + * Helper: clear all entries from <b>cache</b> (but do not delete + * any that aren't marked for removal + */ +static void +consensus_cache_clear(consensus_cache_t *cache) +{ + consensus_cache_delete_pending(cache, 0); + + SMARTLIST_FOREACH_BEGIN(cache->entries, consensus_cache_entry_t *, ent) { + ent->in_cache = NULL; + consensus_cache_entry_decref(ent); + } SMARTLIST_FOREACH_END(ent); + smartlist_free(cache->entries); + cache->entries = NULL; +} + +/** + * Drop all storage held by <b>cache</b>. + */ +void +consensus_cache_free_(consensus_cache_t *cache) +{ + if (! cache) + return; + + if (cache->entries) { + consensus_cache_clear(cache); + } + storage_dir_free(cache->dir); + tor_free(cache); +} + +/** + * Write <b>datalen</b> bytes of data at <b>data</b> into the <b>cache</b>, + * labeling that data with <b>labels</b>. On failure, return NULL. On + * success, return a newly created consensus_cache_entry_t. + * + * The returned value will be owned by the cache, and you will have a + * reference to it. Call consensus_cache_entry_decref() when you are + * done with it. + * + * The provided <b>labels</b> MUST have distinct keys: if they don't, + * this API does not specify which values (if any) for the duplicate keys + * will be considered. + */ +consensus_cache_entry_t * +consensus_cache_add(consensus_cache_t *cache, + const config_line_t *labels, + const uint8_t *data, + size_t datalen) +{ + char *fname = NULL; + int r = storage_dir_save_labeled_to_file(cache->dir, + labels, data, datalen, &fname); + if (r < 0 || fname == NULL) { + return NULL; + } + consensus_cache_entry_t *ent = + tor_malloc_zero(sizeof(consensus_cache_entry_t)); + ent->magic = CCE_MAGIC; + ent->fname = fname; + ent->labels = config_lines_dup(labels); + ent->in_cache = cache; + ent->unused_since = TIME_MAX; + smartlist_add(cache->entries, ent); + /* Start the reference count at 2: the caller owns one copy, and the + * cache owns another. + */ + ent->refcnt = 2; + + return ent; +} + +/** + * Given a <b>cache</b>, return some entry for which <b>key</b>=<b>value</b>. + * Return NULL if no such entry exists. + * + * Does not adjust reference counts. + */ +consensus_cache_entry_t * +consensus_cache_find_first(consensus_cache_t *cache, + const char *key, + const char *value) +{ + smartlist_t *tmp = smartlist_new(); + consensus_cache_find_all(tmp, cache, key, value); + consensus_cache_entry_t *ent = NULL; + if (smartlist_len(tmp)) + ent = smartlist_get(tmp, 0); + smartlist_free(tmp); + return ent; +} + +/** + * Given a <b>cache</b>, add every entry to <b>out<b> for which + * <b>key</b>=<b>value</b>. If <b>key</b> is NULL, add every entry. + * + * Do not add any entry that has been marked for removal. + * + * Does not adjust reference counts. + */ +void +consensus_cache_find_all(smartlist_t *out, + consensus_cache_t *cache, + const char *key, + const char *value) +{ + SMARTLIST_FOREACH_BEGIN(cache->entries, consensus_cache_entry_t *, ent) { + if (ent->can_remove == 1) { + /* We want to delete this; pretend it isn't there. */ + continue; + } + if (! key) { + smartlist_add(out, ent); + continue; + } + const char *found_val = consensus_cache_entry_get_value(ent, key); + if (found_val && !strcmp(value, found_val)) { + smartlist_add(out, ent); + } + } SMARTLIST_FOREACH_END(ent); +} + +/** + * Given a list of consensus_cache_entry_t, remove all those entries + * that do not have <b>key</b>=<b>value</b> in their labels. + * + * Does not adjust reference counts. + */ +void +consensus_cache_filter_list(smartlist_t *lst, + const char *key, + const char *value) +{ + if (BUG(lst == NULL)) + return; // LCOV_EXCL_LINE + if (key == NULL) + return; + SMARTLIST_FOREACH_BEGIN(lst, consensus_cache_entry_t *, ent) { + const char *found_val = consensus_cache_entry_get_value(ent, key); + if (! found_val || strcmp(value, found_val)) { + SMARTLIST_DEL_CURRENT(lst, ent); + } + } SMARTLIST_FOREACH_END(ent); +} + +/** + * If <b>ent</b> has a label with the given <b>key</b>, return its + * value. Otherwise return NULL. + * + * The return value is only guaranteed to be valid for as long as you + * hold a reference to <b>ent</b>. + */ +const char * +consensus_cache_entry_get_value(const consensus_cache_entry_t *ent, + const char *key) +{ + const config_line_t *match = config_line_find(ent->labels, key); + if (match) + return match->value; + else + return NULL; +} + +/** + * Return a pointer to the labels in <b>ent</b>. + * + * This pointer is only guaranteed to be valid for as long as you + * hold a reference to <b>ent</b>. + */ +const config_line_t * +consensus_cache_entry_get_labels(const consensus_cache_entry_t *ent) +{ + return ent->labels; +} + +/** + * Increase the reference count of <b>ent</b>. + */ +void +consensus_cache_entry_incref(consensus_cache_entry_t *ent) +{ + if (BUG(ent->magic != CCE_MAGIC)) + return; // LCOV_EXCL_LINE + ++ent->refcnt; + ent->unused_since = TIME_MAX; +} + +/** + * Release a reference held to <b>ent</b>. + * + * If it was the last reference, ent will be freed. Therefore, you must not + * use <b>ent</b> after calling this function. + */ +void +consensus_cache_entry_decref(consensus_cache_entry_t *ent) +{ + if (! ent) + return; + if (BUG(ent->refcnt <= 0)) + return; // LCOV_EXCL_LINE + if (BUG(ent->magic != CCE_MAGIC)) + return; // LCOV_EXCL_LINE + + --ent->refcnt; + + if (ent->refcnt == 1 && ent->in_cache) { + /* Only the cache has a reference: we don't need to keep the file + * mapped */ + if (ent->map) { + if (ent->release_aggressively) { + consensus_cache_entry_unmap(ent); + } else { + ent->unused_since = approx_time(); + } + } + return; + } + + if (ent->refcnt > 0) + return; + + /* Refcount is zero; we can free it. */ + if (ent->map) { + consensus_cache_entry_unmap(ent); + } + tor_free(ent->fname); + config_free_lines(ent->labels); + consensus_cache_entry_handles_clear(ent); + memwipe(ent, 0, sizeof(consensus_cache_entry_t)); + tor_free(ent); +} + +/** + * Mark <b>ent</b> for deletion from the cache. Deletion will not occur + * until the cache is the only place that holds a reference to <b>ent</b>. + */ +void +consensus_cache_entry_mark_for_removal(consensus_cache_entry_t *ent) +{ + ent->can_remove = 1; +} + +/** + * Mark <b>ent</b> as the kind of entry that we don't need to keep mmap'd for + * any longer than we're actually using it. + */ +void +consensus_cache_entry_mark_for_aggressive_release(consensus_cache_entry_t *ent) +{ + ent->release_aggressively = 1; +} + +/** + * Try to read the body of <b>ent</b> into memory if it isn't already + * loaded. On success, set *<b>body_out</b> to the body, *<b>sz_out</b> + * to its size, and return 0. On failure return -1. + * + * The resulting body pointer will only be valid for as long as you + * hold a reference to <b>ent</b>. + */ +int +consensus_cache_entry_get_body(const consensus_cache_entry_t *ent, + const uint8_t **body_out, + size_t *sz_out) +{ + if (BUG(ent->magic != CCE_MAGIC)) + return -1; // LCOV_EXCL_LINE + + if (! ent->map) { + if (! ent->in_cache) + return -1; + + consensus_cache_entry_map((consensus_cache_t *)ent->in_cache, + (consensus_cache_entry_t *)ent); + if (! ent->map) { + return -1; + } + } + + *body_out = ent->body; + *sz_out = ent->bodylen; + return 0; +} + +/** + * Unmap every mmap'd element of <b>cache</b> that has been unused + * since <b>cutoff</b>. + */ +void +consensus_cache_unmap_lazy(consensus_cache_t *cache, time_t cutoff) +{ + SMARTLIST_FOREACH_BEGIN(cache->entries, consensus_cache_entry_t *, ent) { + tor_assert_nonfatal(ent->in_cache == cache); + if (ent->refcnt > 1 || BUG(ent->in_cache == NULL)) { + /* Somebody is using this entry right now */ + continue; + } + if (ent->unused_since > cutoff) { + /* Has been unused only for a little while */ + continue; + } + if (ent->map == NULL) { + /* Not actually mapped. */ + continue; + } + consensus_cache_entry_unmap(ent); + } SMARTLIST_FOREACH_END(ent); +} + +/** + * Return the number of currently unused filenames available in this cache. + */ +int +consensus_cache_get_n_filenames_available(consensus_cache_t *cache) +{ + tor_assert(cache); + int max = cache->max_entries; + int used = smartlist_len(storage_dir_list(cache->dir)); +#ifdef MUST_UNMAP_TO_UNLINK + if (used > max) + return 0; +#else + tor_assert_nonfatal(max >= used); +#endif /* defined(MUST_UNMAP_TO_UNLINK) */ + return max - used; +} + +/** + * Delete every element of <b>cache</b> has been marked with + * consensus_cache_entry_mark_for_removal. If <b>force</b> is false, + * retain those entries which are in use by something other than the cache. + */ +void +consensus_cache_delete_pending(consensus_cache_t *cache, int force) +{ + SMARTLIST_FOREACH_BEGIN(cache->entries, consensus_cache_entry_t *, ent) { + tor_assert_nonfatal(ent->in_cache == cache); + int force_ent = force; +#ifdef MUST_UNMAP_TO_UNLINK + /* We cannot delete anything with an active mmap on win32, so no + * force-deletion. */ + if (ent->map) { + force_ent = 0; + } +#endif /* defined(MUST_UNMAP_TO_UNLINK) */ + if (! force_ent) { + if (ent->refcnt > 1 || BUG(ent->in_cache == NULL)) { + /* Somebody is using this entry right now */ + continue; + } + } + if (ent->can_remove == 0) { + /* Don't want to delete this. */ + continue; + } + if (BUG(ent->refcnt <= 0)) { + continue; // LCOV_EXCL_LINE + } + + SMARTLIST_DEL_CURRENT(cache->entries, ent); + ent->in_cache = NULL; + char *fname = tor_strdup(ent->fname); /* save a copy */ + consensus_cache_entry_decref(ent); + storage_dir_remove_file(cache->dir, fname); + tor_free(fname); + } SMARTLIST_FOREACH_END(ent); +} + +/** + * Internal helper: rescan <b>cache</b> and rebuild its list of entries. + */ +static void +consensus_cache_rescan(consensus_cache_t *cache) +{ + if (cache->entries) { + consensus_cache_clear(cache); + } + + cache->entries = smartlist_new(); + const smartlist_t *fnames = storage_dir_list(cache->dir); + SMARTLIST_FOREACH_BEGIN(fnames, const char *, fname) { + tor_mmap_t *map = NULL; + config_line_t *labels = NULL; + const uint8_t *body; + size_t bodylen; + map = storage_dir_map_labeled(cache->dir, fname, + &labels, &body, &bodylen); + if (! map) { + /* The ERANGE error might come from tor_mmap_file() -- it means the file + * was empty. EINVAL might come from ..map_labeled() -- it means the + * file was misformatted. In both cases, we should just delete it. + */ + if (errno == ERANGE || errno == EINVAL) { + log_warn(LD_FS, "Found %s file %s in consensus cache; removing it.", + errno == ERANGE ? "empty" : "misformatted", + escaped(fname)); + storage_dir_remove_file(cache->dir, fname); + } else { + /* Can't load this; continue */ + log_warn(LD_FS, "Unable to map file %s from consensus cache: %s", + escaped(fname), strerror(errno)); + } + continue; + } + consensus_cache_entry_t *ent = + tor_malloc_zero(sizeof(consensus_cache_entry_t)); + ent->magic = CCE_MAGIC; + ent->fname = tor_strdup(fname); + ent->labels = labels; + ent->refcnt = 1; + ent->in_cache = cache; + ent->unused_since = TIME_MAX; + smartlist_add(cache->entries, ent); + tor_munmap_file(map); /* don't actually need to keep this around */ + } SMARTLIST_FOREACH_END(fname); +} + +/** + * Make sure that <b>ent</b> is mapped into RAM. + */ +static void +consensus_cache_entry_map(consensus_cache_t *cache, + consensus_cache_entry_t *ent) +{ + if (ent->map) + return; + + ent->map = storage_dir_map_labeled(cache->dir, ent->fname, + NULL, &ent->body, &ent->bodylen); + ent->unused_since = TIME_MAX; +} + +/** + * Unmap <b>ent</b> from RAM. + * + * Do not call this if something other than the cache is holding a reference + * to <b>ent</b> + */ +static void +consensus_cache_entry_unmap(consensus_cache_entry_t *ent) +{ + ent->unused_since = TIME_MAX; + if (!ent->map) + return; + + tor_munmap_file(ent->map); + ent->map = NULL; + ent->body = NULL; + ent->bodylen = 0; + ent->unused_since = TIME_MAX; +} + +HANDLE_IMPL(consensus_cache_entry, consensus_cache_entry_t, ) + +#ifdef TOR_UNIT_TESTS +/** + * Testing only: Return true iff <b>ent</b> is mapped into memory. + * + * (In normal operation, this information is not exposed.) + */ +int +consensus_cache_entry_is_mapped(consensus_cache_entry_t *ent) +{ + if (ent->map) { + tor_assert(ent->body); + return 1; + } else { + tor_assert(!ent->body); + return 0; + } +} +#endif /* defined(TOR_UNIT_TESTS) */ diff --git a/src/feature/dircache/conscache.h b/src/feature/dircache/conscache.h new file mode 100644 index 0000000000..c274a60393 --- /dev/null +++ b/src/feature/dircache/conscache.h @@ -0,0 +1,66 @@ +/* Copyright (c) 2017-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#ifndef TOR_CONSCACHE_H +#define TOR_CONSCACHE_H + +#include "lib/container/handles.h" + +typedef struct consensus_cache_entry_t consensus_cache_entry_t; +typedef struct consensus_cache_t consensus_cache_t; + +HANDLE_DECL(consensus_cache_entry, consensus_cache_entry_t, ) +#define consensus_cache_entry_handle_free(h) \ + FREE_AND_NULL(consensus_cache_entry_handle_t, \ + consensus_cache_entry_handle_free_, (h)) + +consensus_cache_t *consensus_cache_open(const char *subdir, int max_entries); +void consensus_cache_free_(consensus_cache_t *cache); +#define consensus_cache_free(cache) \ + FREE_AND_NULL(consensus_cache_t, consensus_cache_free_, (cache)) +struct sandbox_cfg_elem; +int consensus_cache_may_overallocate(consensus_cache_t *cache); +int consensus_cache_register_with_sandbox(consensus_cache_t *cache, + struct sandbox_cfg_elem **cfg); +void consensus_cache_unmap_lazy(consensus_cache_t *cache, time_t cutoff); +void consensus_cache_delete_pending(consensus_cache_t *cache, + int force); +int consensus_cache_get_n_filenames_available(consensus_cache_t *cache); +consensus_cache_entry_t *consensus_cache_add(consensus_cache_t *cache, + const struct config_line_t *labels, + const uint8_t *data, + size_t datalen); + +consensus_cache_entry_t *consensus_cache_find_first( + consensus_cache_t *cache, + const char *key, + const char *value); + +void consensus_cache_find_all(smartlist_t *out, + consensus_cache_t *cache, + const char *key, + const char *value); +void consensus_cache_filter_list(smartlist_t *lst, + const char *key, + const char *value); + +const char *consensus_cache_entry_get_value(const consensus_cache_entry_t *ent, + const char *key); +const struct config_line_t *consensus_cache_entry_get_labels( + const consensus_cache_entry_t *ent); + +void consensus_cache_entry_incref(consensus_cache_entry_t *ent); +void consensus_cache_entry_decref(consensus_cache_entry_t *ent); + +void consensus_cache_entry_mark_for_removal(consensus_cache_entry_t *ent); +void consensus_cache_entry_mark_for_aggressive_release( + consensus_cache_entry_t *ent); +int consensus_cache_entry_get_body(const consensus_cache_entry_t *ent, + const uint8_t **body_out, + size_t *sz_out); + +#ifdef TOR_UNIT_TESTS +int consensus_cache_entry_is_mapped(consensus_cache_entry_t *ent); +#endif + +#endif /* !defined(TOR_CONSCACHE_H) */ diff --git a/src/feature/dircache/consdiffmgr.c b/src/feature/dircache/consdiffmgr.c new file mode 100644 index 0000000000..304b64f3b6 --- /dev/null +++ b/src/feature/dircache/consdiffmgr.c @@ -0,0 +1,1945 @@ +/* Copyright (c) 2017-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file consdiffmsr.c + * + * \brief consensus diff manager functions + * + * This module is run by directory authorities and caches in order + * to remember a number of past consensus documents, and to generate + * and serve the diffs from those documents to the latest consensus. + */ + +#define CONSDIFFMGR_PRIVATE + +#include "core/or/or.h" +#include "app/config/config.h" +#include "feature/dircache/conscache.h" +#include "feature/dircommon/consdiff.h" +#include "feature/dircache/consdiffmgr.h" +#include "core/mainloop/cpuworker.h" +#include "feature/nodelist/networkstatus.h" +#include "feature/nodelist/routerparse.h" +#include "lib/evloop/compat_libevent.h" +#include "lib/evloop/workqueue.h" +#include "lib/compress/compress.h" +#include "lib/encoding/confline.h" + +#include "feature/nodelist/networkstatus_st.h" +#include "feature/nodelist/networkstatus_voter_info_st.h" + +/** + * Labels to apply to items in the conscache object. + * + * @{ + */ +/* One of DOCTYPE_CONSENSUS or DOCTYPE_CONSENSUS_DIFF */ +#define LABEL_DOCTYPE "document-type" +/* The valid-after time for a consensus (or for the target consensus of a + * diff), encoded as ISO UTC. */ +#define LABEL_VALID_AFTER "consensus-valid-after" +/* The fresh-until time for a consensus (or for the target consensus of a + * diff), encoded as ISO UTC. */ +#define LABEL_FRESH_UNTIL "consensus-fresh-until" +/* The valid-until time for a consensus (or for the target consensus of a + * diff), encoded as ISO UTC. */ +#define LABEL_VALID_UNTIL "consensus-valid-until" +/* Comma-separated list of hex-encoded identity digests for the voting + * authorities. */ +#define LABEL_SIGNATORIES "consensus-signatories" +/* A hex encoded SHA3 digest of the object, as compressed (if any) */ +#define LABEL_SHA3_DIGEST "sha3-digest" +/* A hex encoded SHA3 digest of the object before compression. */ +#define LABEL_SHA3_DIGEST_UNCOMPRESSED "sha3-digest-uncompressed" +/* A hex encoded SHA3 digest-as-signed of a consensus */ +#define LABEL_SHA3_DIGEST_AS_SIGNED "sha3-digest-as-signed" +/* The flavor of the consensus or consensuses diff */ +#define LABEL_FLAVOR "consensus-flavor" +/* Diff only: the SHA3 digest-as-signed of the source consensus. */ +#define LABEL_FROM_SHA3_DIGEST "from-sha3-digest" +/* Diff only: the SHA3 digest-in-full of the target consensus. */ +#define LABEL_TARGET_SHA3_DIGEST "target-sha3-digest" +/* Diff only: the valid-after date of the source consensus. */ +#define LABEL_FROM_VALID_AFTER "from-valid-after" +/* What kind of compression was used? */ +#define LABEL_COMPRESSION_TYPE "compression" +/** @} */ + +#define DOCTYPE_CONSENSUS "consensus" +#define DOCTYPE_CONSENSUS_DIFF "consensus-diff" + +/** + * Underlying directory that stores consensuses and consensus diffs. Don't + * use this directly: use cdm_cache_get() instead. + */ +static consensus_cache_t *cons_diff_cache = NULL; +/** + * If true, we have learned at least one new consensus since the + * consensus cache was last up-to-date. + */ +static int cdm_cache_dirty = 0; +/** + * If true, we have scanned the cache to update our hashtable of diffs. + */ +static int cdm_cache_loaded = 0; + +/** + * Possible status values for cdm_diff_t.cdm_diff_status + **/ +typedef enum cdm_diff_status_t { + CDM_DIFF_PRESENT=1, + CDM_DIFF_IN_PROGRESS=2, + CDM_DIFF_ERROR=3, +} cdm_diff_status_t; + +/** Which methods do we use for precompressing diffs? */ +static const compress_method_t compress_diffs_with[] = { + NO_METHOD, + GZIP_METHOD, +#ifdef HAVE_LZMA + LZMA_METHOD, +#endif +#ifdef HAVE_ZSTD + ZSTD_METHOD, +#endif +}; + +/** + * Event for rescanning the cache. + */ +static mainloop_event_t *consdiffmgr_rescan_ev = NULL; + +static void consdiffmgr_rescan_cb(mainloop_event_t *ev, void *arg); +static void mark_cdm_cache_dirty(void); + +/** How many different methods will we try to use for diff compression? */ +STATIC unsigned +n_diff_compression_methods(void) +{ + return ARRAY_LENGTH(compress_diffs_with); +} + +/** Which methods do we use for precompressing consensuses? */ +static const compress_method_t compress_consensus_with[] = { + ZLIB_METHOD, +#ifdef HAVE_LZMA + LZMA_METHOD, +#endif +#ifdef HAVE_ZSTD + ZSTD_METHOD, +#endif +}; + +/** How many different methods will we try to use for diff compression? */ +STATIC unsigned +n_consensus_compression_methods(void) +{ + return ARRAY_LENGTH(compress_consensus_with); +} + +/** For which compression method do we retain old consensuses? There's no + * need to keep all of them, since we won't be serving them. We'll + * go with ZLIB_METHOD because it's pretty fast and everyone has it. + */ +#define RETAIN_CONSENSUS_COMPRESSED_WITH_METHOD ZLIB_METHOD + +/** Handles pointing to the latest consensus entries as compressed and + * stored. */ +static consensus_cache_entry_handle_t * + latest_consensus[N_CONSENSUS_FLAVORS] + [ARRAY_LENGTH(compress_consensus_with)]; + +/** Hashtable node used to remember the current status of the diff + * from a given sha3 digest to the current consensus. */ +typedef struct cdm_diff_t { + HT_ENTRY(cdm_diff_t) node; + + /** Consensus flavor for this diff (part of ht key) */ + consensus_flavor_t flavor; + /** SHA3-256 digest of the consensus that this diff is _from_. (part of the + * ht key) */ + uint8_t from_sha3[DIGEST256_LEN]; + /** Method by which the diff is compressed. (part of the ht key */ + compress_method_t compress_method; + + /** One of the CDM_DIFF_* values, depending on whether this diff + * is available, in progress, or impossible to compute. */ + cdm_diff_status_t cdm_diff_status; + /** SHA3-256 digest of the consensus that this diff is _to. */ + uint8_t target_sha3[DIGEST256_LEN]; + + /** Handle to the cache entry for this diff, if any. We use a handle here + * to avoid thinking too hard about cache entry lifetime issues. */ + consensus_cache_entry_handle_t *entry; +} cdm_diff_t; + +/** Hashtable mapping flavor and source consensus digest to status. */ +static HT_HEAD(cdm_diff_ht, cdm_diff_t) cdm_diff_ht = HT_INITIALIZER(); + +/** + * Configuration for this module + */ +static consdiff_cfg_t consdiff_cfg = { + // XXXX I'd like to make this number bigger, but it interferes with the + // XXXX seccomp2 syscall filter, which tops out at BPF_MAXINS (4096) + // XXXX rules. + /* .cache_max_num = */ 128 +}; + +static int consdiffmgr_ensure_space_for_files(int n); +static int consensus_queue_compression_work(const char *consensus, + const networkstatus_t *as_parsed); +static int consensus_diff_queue_diff_work(consensus_cache_entry_t *diff_from, + consensus_cache_entry_t *diff_to); +static void consdiffmgr_set_cache_flags(void); + +/* ===== + * Hashtable setup + * ===== */ + +/** Helper: hash the key of a cdm_diff_t. */ +static unsigned +cdm_diff_hash(const cdm_diff_t *diff) +{ + uint8_t tmp[DIGEST256_LEN + 2]; + memcpy(tmp, diff->from_sha3, DIGEST256_LEN); + tmp[DIGEST256_LEN] = (uint8_t) diff->flavor; + tmp[DIGEST256_LEN+1] = (uint8_t) diff->compress_method; + return (unsigned) siphash24g(tmp, sizeof(tmp)); +} +/** Helper: compare two cdm_diff_t objects for key equality */ +static int +cdm_diff_eq(const cdm_diff_t *diff1, const cdm_diff_t *diff2) +{ + return fast_memeq(diff1->from_sha3, diff2->from_sha3, DIGEST256_LEN) && + diff1->flavor == diff2->flavor && + diff1->compress_method == diff2->compress_method; +} + +HT_PROTOTYPE(cdm_diff_ht, cdm_diff_t, node, cdm_diff_hash, cdm_diff_eq) +HT_GENERATE2(cdm_diff_ht, cdm_diff_t, node, cdm_diff_hash, cdm_diff_eq, + 0.6, tor_reallocarray, tor_free_) + +#define cdm_diff_free(diff) \ + FREE_AND_NULL(cdm_diff_t, cdm_diff_free_, (diff)) + +/** Release all storage held in <b>diff</b>. */ +static void +cdm_diff_free_(cdm_diff_t *diff) +{ + if (!diff) + return; + consensus_cache_entry_handle_free(diff->entry); + tor_free(diff); +} + +/** Create and return a new cdm_diff_t with the given values. Does not + * add it to the hashtable. */ +static cdm_diff_t * +cdm_diff_new(consensus_flavor_t flav, + const uint8_t *from_sha3, + const uint8_t *target_sha3, + compress_method_t method) +{ + cdm_diff_t *ent; + ent = tor_malloc_zero(sizeof(cdm_diff_t)); + ent->flavor = flav; + memcpy(ent->from_sha3, from_sha3, DIGEST256_LEN); + memcpy(ent->target_sha3, target_sha3, DIGEST256_LEN); + ent->compress_method = method; + return ent; +} + +/** + * Examine the diff hashtable to see whether we know anything about computing + * a diff of type <b>flav</b> between consensuses with the two provided + * SHA3-256 digests. If a computation is in progress, or if the computation + * has already been tried and failed, return 1. Otherwise, note the + * computation as "in progress" so that we don't reattempt it later, and + * return 0. + */ +static int +cdm_diff_ht_check_and_note_pending(consensus_flavor_t flav, + const uint8_t *from_sha3, + const uint8_t *target_sha3) +{ + struct cdm_diff_t search, *ent; + unsigned u; + int result = 0; + for (u = 0; u < n_diff_compression_methods(); ++u) { + compress_method_t method = compress_diffs_with[u]; + memset(&search, 0, sizeof(cdm_diff_t)); + search.flavor = flav; + search.compress_method = method; + memcpy(search.from_sha3, from_sha3, DIGEST256_LEN); + ent = HT_FIND(cdm_diff_ht, &cdm_diff_ht, &search); + if (ent) { + tor_assert_nonfatal(ent->cdm_diff_status != CDM_DIFF_PRESENT); + result = 1; + continue; + } + ent = cdm_diff_new(flav, from_sha3, target_sha3, method); + ent->cdm_diff_status = CDM_DIFF_IN_PROGRESS; + HT_INSERT(cdm_diff_ht, &cdm_diff_ht, ent); + } + return result; +} + +/** + * Update the status of the diff of type <b>flav</b> between consensuses with + * the two provided SHA3-256 digests, so that its status becomes + * <b>status</b>, and its value becomes the <b>handle</b>. If <b>handle</b> + * is NULL, then the old handle (if any) is freed, and replaced with NULL. + */ +static void +cdm_diff_ht_set_status(consensus_flavor_t flav, + const uint8_t *from_sha3, + const uint8_t *to_sha3, + compress_method_t method, + int status, + consensus_cache_entry_handle_t *handle) +{ + if (handle == NULL) { + tor_assert_nonfatal(status != CDM_DIFF_PRESENT); + } + + struct cdm_diff_t search, *ent; + memset(&search, 0, sizeof(cdm_diff_t)); + search.flavor = flav; + search.compress_method = method, + memcpy(search.from_sha3, from_sha3, DIGEST256_LEN); + ent = HT_FIND(cdm_diff_ht, &cdm_diff_ht, &search); + if (!ent) { + ent = cdm_diff_new(flav, from_sha3, to_sha3, method); + ent->cdm_diff_status = CDM_DIFF_IN_PROGRESS; + HT_INSERT(cdm_diff_ht, &cdm_diff_ht, ent); + } else if (fast_memneq(ent->target_sha3, to_sha3, DIGEST256_LEN)) { + // This can happen under certain really pathological conditions + // if we decide we don't care about a diff before it is actually + // done computing. + return; + } + + tor_assert_nonfatal(ent->cdm_diff_status == CDM_DIFF_IN_PROGRESS); + + ent->cdm_diff_status = status; + consensus_cache_entry_handle_free(ent->entry); + ent->entry = handle; +} + +/** + * Helper: Remove from the hash table every present (actually computed) diff + * of type <b>flav</b> whose target digest does not match + * <b>unless_target_sha3_matches</b>. + * + * This function is used for the hash table to throw away references to diffs + * that do not lead to the most given consensus of a given flavor. + */ +static void +cdm_diff_ht_purge(consensus_flavor_t flav, + const uint8_t *unless_target_sha3_matches) +{ + cdm_diff_t **diff, **next; + for (diff = HT_START(cdm_diff_ht, &cdm_diff_ht); diff; diff = next) { + cdm_diff_t *this = *diff; + + if ((*diff)->cdm_diff_status == CDM_DIFF_PRESENT && + flav == (*diff)->flavor) { + + if (BUG((*diff)->entry == NULL) || + consensus_cache_entry_handle_get((*diff)->entry) == NULL) { + /* the underlying entry has gone away; drop this. */ + next = HT_NEXT_RMV(cdm_diff_ht, &cdm_diff_ht, diff); + cdm_diff_free(this); + continue; + } + + if (unless_target_sha3_matches && + fast_memneq(unless_target_sha3_matches, (*diff)->target_sha3, + DIGEST256_LEN)) { + /* target hash doesn't match; drop this. */ + next = HT_NEXT_RMV(cdm_diff_ht, &cdm_diff_ht, diff); + cdm_diff_free(this); + continue; + } + } + next = HT_NEXT(cdm_diff_ht, &cdm_diff_ht, diff); + } +} + +/** + * Helper: initialize <b>cons_diff_cache</b>. + */ +static void +cdm_cache_init(void) +{ + unsigned n_entries = consdiff_cfg.cache_max_num * 2; + + tor_assert(cons_diff_cache == NULL); + cons_diff_cache = consensus_cache_open("diff-cache", n_entries); + if (cons_diff_cache == NULL) { + // LCOV_EXCL_START + log_err(LD_FS, "Error: Couldn't open storage for consensus diffs."); + tor_assert_unreached(); + // LCOV_EXCL_STOP + } else { + consdiffmgr_set_cache_flags(); + } + consdiffmgr_rescan_ev = + mainloop_event_postloop_new(consdiffmgr_rescan_cb, NULL); + mark_cdm_cache_dirty(); + cdm_cache_loaded = 0; +} + +/** + * Helper: return the consensus_cache_t * that backs this manager, + * initializing it if needed. + */ +STATIC consensus_cache_t * +cdm_cache_get(void) +{ + if (PREDICT_UNLIKELY(cons_diff_cache == NULL)) { + cdm_cache_init(); + } + return cons_diff_cache; +} + +/** + * Helper: given a list of labels, prepend the hex-encoded SHA3 digest + * of the <b>bodylen</b>-byte object at <b>body</b> to those labels, + * with <b>label</b> as its label. + */ +static void +cdm_labels_prepend_sha3(config_line_t **labels, + const char *label, + const uint8_t *body, + size_t bodylen) +{ + uint8_t sha3_digest[DIGEST256_LEN]; + char hexdigest[HEX_DIGEST256_LEN+1]; + crypto_digest256((char *)sha3_digest, + (const char *)body, bodylen, DIGEST_SHA3_256); + base16_encode(hexdigest, sizeof(hexdigest), + (const char *)sha3_digest, sizeof(sha3_digest)); + + config_line_prepend(labels, label, hexdigest); +} + +/** Helper: if there is a sha3-256 hex-encoded digest in <b>ent</b> with the + * given label, set <b>digest_out</b> to that value (decoded), and return 0. + * + * Return -1 if there is no such label, and -2 if it is badly formatted. */ +STATIC int +cdm_entry_get_sha3_value(uint8_t *digest_out, + consensus_cache_entry_t *ent, + const char *label) +{ + if (ent == NULL) + return -1; + + const char *hex = consensus_cache_entry_get_value(ent, label); + if (hex == NULL) + return -1; + + int n = base16_decode((char*)digest_out, DIGEST256_LEN, hex, strlen(hex)); + if (n != DIGEST256_LEN) + return -2; + else + return 0; +} + +/** + * Helper: look for a consensus with the given <b>flavor</b> and + * <b>valid_after</b> time in the cache. Return that consensus if it's + * present, or NULL if it's missing. + */ +STATIC consensus_cache_entry_t * +cdm_cache_lookup_consensus(consensus_flavor_t flavor, time_t valid_after) +{ + char formatted_time[ISO_TIME_LEN+1]; + format_iso_time_nospace(formatted_time, valid_after); + const char *flavname = networkstatus_get_flavor_name(flavor); + + /* We'll filter by valid-after time first, since that should + * match the fewest documents. */ + /* We could add an extra hashtable here, but since we only do this scan + * when adding a new consensus, it probably doesn't matter much. */ + smartlist_t *matches = smartlist_new(); + consensus_cache_find_all(matches, cdm_cache_get(), + LABEL_VALID_AFTER, formatted_time); + consensus_cache_filter_list(matches, LABEL_FLAVOR, flavname); + consensus_cache_filter_list(matches, LABEL_DOCTYPE, DOCTYPE_CONSENSUS); + + consensus_cache_entry_t *result = NULL; + if (smartlist_len(matches)) { + result = smartlist_get(matches, 0); + } + smartlist_free(matches); + + return result; +} + +/** Return the maximum age (in seconds) of consensuses that we should consider + * storing. The available space in the directory may impose additional limits + * on how much we store. */ +static int32_t +get_max_age_to_cache(void) +{ + const int32_t DEFAULT_MAX_AGE_TO_CACHE = 8192; + const int32_t MIN_MAX_AGE_TO_CACHE = 0; + const int32_t MAX_MAX_AGE_TO_CACHE = 8192; + const char MAX_AGE_TO_CACHE_NAME[] = "max-consensus-age-to-cache-for-diff"; + + const or_options_t *options = get_options(); + + if (options->MaxConsensusAgeForDiffs) { + const int v = options->MaxConsensusAgeForDiffs; + if (v >= MAX_MAX_AGE_TO_CACHE * 3600) + return MAX_MAX_AGE_TO_CACHE; + else + return v; + } + + /* The parameter is in hours, so we multiply */ + return 3600 * networkstatus_get_param(NULL, + MAX_AGE_TO_CACHE_NAME, + DEFAULT_MAX_AGE_TO_CACHE, + MIN_MAX_AGE_TO_CACHE, + MAX_MAX_AGE_TO_CACHE); +} + +/** + * Given a string containing a networkstatus consensus, and the results of + * having parsed that consensus, add that consensus to the cache if it is not + * already present and not too old. Create new consensus diffs from or to + * that consensus as appropriate. + * + * Return 0 on success and -1 on failure. + */ +int +consdiffmgr_add_consensus(const char *consensus, + const networkstatus_t *as_parsed) +{ + if (BUG(consensus == NULL) || BUG(as_parsed == NULL)) + return -1; // LCOV_EXCL_LINE + if (BUG(as_parsed->type != NS_TYPE_CONSENSUS)) + return -1; // LCOV_EXCL_LINE + + const consensus_flavor_t flavor = as_parsed->flavor; + const time_t valid_after = as_parsed->valid_after; + + if (valid_after < approx_time() - get_max_age_to_cache()) { + log_info(LD_DIRSERV, "We don't care about this consensus document; it's " + "too old."); + return -1; + } + + /* Do we already have this one? */ + consensus_cache_entry_t *entry = + cdm_cache_lookup_consensus(flavor, valid_after); + if (entry) { + log_info(LD_DIRSERV, "We already have a copy of that consensus"); + return -1; + } + + /* We don't have it. Add it to the cache. */ + return consensus_queue_compression_work(consensus, as_parsed); +} + +/** + * Helper: used to sort two smartlists of consensus_cache_entry_t by their + * LABEL_VALID_AFTER labels. + */ +static int +compare_by_valid_after_(const void **a, const void **b) +{ + const consensus_cache_entry_t *e1 = *a; + const consensus_cache_entry_t *e2 = *b; + /* We're in luck here: sorting UTC iso-encoded values lexically will work + * fine (until 9999). */ + return strcmp_opt(consensus_cache_entry_get_value(e1, LABEL_VALID_AFTER), + consensus_cache_entry_get_value(e2, LABEL_VALID_AFTER)); +} + +/** + * Helper: Sort <b>lst</b> by LABEL_VALID_AFTER and return the most recent + * entry. + */ +static consensus_cache_entry_t * +sort_and_find_most_recent(smartlist_t *lst) +{ + smartlist_sort(lst, compare_by_valid_after_); + if (smartlist_len(lst)) { + return smartlist_get(lst, smartlist_len(lst) - 1); + } else { + return NULL; + } +} + +/** Return i such that compress_consensus_with[i] == method. Return + * -1 if no such i exists. */ +static int +consensus_compression_method_pos(compress_method_t method) +{ + unsigned i; + for (i = 0; i < n_consensus_compression_methods(); ++i) { + if (compress_consensus_with[i] == method) { + return i; + } + } + return -1; +} + +/** + * If we know a consensus with the flavor <b>flavor</b> compressed with + * <b>method</b>, set *<b>entry_out</b> to that value. Return values are as + * for consdiffmgr_find_diff_from(). + */ +consdiff_status_t +consdiffmgr_find_consensus(struct consensus_cache_entry_t **entry_out, + consensus_flavor_t flavor, + compress_method_t method) +{ + tor_assert(entry_out); + tor_assert((int)flavor < N_CONSENSUS_FLAVORS); + + int pos = consensus_compression_method_pos(method); + if (pos < 0) { + // We don't compress consensuses with this method. + return CONSDIFF_NOT_FOUND; + } + consensus_cache_entry_handle_t *handle = latest_consensus[flavor][pos]; + if (!handle) + return CONSDIFF_NOT_FOUND; + *entry_out = consensus_cache_entry_handle_get(handle); + if (*entry_out) + return CONSDIFF_AVAILABLE; + else + return CONSDIFF_NOT_FOUND; +} + +/** + * Look up consensus_cache_entry_t for the consensus of type <b>flavor</b>, + * from the source consensus with the specified digest (which must be SHA3). + * + * If the diff is present, store it into *<b>entry_out</b> and return + * CONSDIFF_AVAILABLE. Otherwise return CONSDIFF_NOT_FOUND or + * CONSDIFF_IN_PROGRESS. + */ +consdiff_status_t +consdiffmgr_find_diff_from(consensus_cache_entry_t **entry_out, + consensus_flavor_t flavor, + int digest_type, + const uint8_t *digest, + size_t digestlen, + compress_method_t method) +{ + if (BUG(digest_type != DIGEST_SHA3_256) || + BUG(digestlen != DIGEST256_LEN)) { + return CONSDIFF_NOT_FOUND; // LCOV_EXCL_LINE + } + + // Try to look up the entry in the hashtable. + cdm_diff_t search, *ent; + memset(&search, 0, sizeof(search)); + search.flavor = flavor; + search.compress_method = method; + memcpy(search.from_sha3, digest, DIGEST256_LEN); + ent = HT_FIND(cdm_diff_ht, &cdm_diff_ht, &search); + + if (ent == NULL || + ent->cdm_diff_status == CDM_DIFF_ERROR) { + return CONSDIFF_NOT_FOUND; + } else if (ent->cdm_diff_status == CDM_DIFF_IN_PROGRESS) { + return CONSDIFF_IN_PROGRESS; + } else if (BUG(ent->cdm_diff_status != CDM_DIFF_PRESENT)) { + return CONSDIFF_IN_PROGRESS; + } + + if (BUG(ent->entry == NULL)) { + return CONSDIFF_NOT_FOUND; + } + *entry_out = consensus_cache_entry_handle_get(ent->entry); + return (*entry_out) ? CONSDIFF_AVAILABLE : CONSDIFF_NOT_FOUND; + +#if 0 + // XXXX Remove this. I'm keeping it around for now in case we need to + // XXXX debug issues in the hashtable. + char hex[HEX_DIGEST256_LEN+1]; + base16_encode(hex, sizeof(hex), (const char *)digest, digestlen); + const char *flavname = networkstatus_get_flavor_name(flavor); + + smartlist_t *matches = smartlist_new(); + consensus_cache_find_all(matches, cdm_cache_get(), + LABEL_FROM_SHA3_DIGEST, hex); + consensus_cache_filter_list(matches, LABEL_FLAVOR, flavname); + consensus_cache_filter_list(matches, LABEL_DOCTYPE, DOCTYPE_CONSENSUS_DIFF); + + *entry_out = sort_and_find_most_recent(matches); + consdiff_status_t result = + (*entry_out) ? CONSDIFF_AVAILABLE : CONSDIFF_NOT_FOUND; + smartlist_free(matches); + + return result; +#endif /* 0 */ +} + +/** + * Perform periodic cleanup tasks on the consensus diff cache. Return + * the number of objects marked for deletion. + */ +int +consdiffmgr_cleanup(void) +{ + smartlist_t *objects = smartlist_new(); + smartlist_t *consensuses = smartlist_new(); + smartlist_t *diffs = smartlist_new(); + int n_to_delete = 0; + + log_debug(LD_DIRSERV, "Looking for consdiffmgr entries to remove"); + + // 1. Delete any consensus or diff or anything whose valid_after is too old. + const time_t valid_after_cutoff = approx_time() - get_max_age_to_cache(); + + consensus_cache_find_all(objects, cdm_cache_get(), + NULL, NULL); + SMARTLIST_FOREACH_BEGIN(objects, consensus_cache_entry_t *, ent) { + const char *lv_valid_after = + consensus_cache_entry_get_value(ent, LABEL_VALID_AFTER); + if (! lv_valid_after) { + log_debug(LD_DIRSERV, "Ignoring entry because it had no %s label", + LABEL_VALID_AFTER); + continue; + } + time_t valid_after = 0; + if (parse_iso_time_nospace(lv_valid_after, &valid_after) < 0) { + log_debug(LD_DIRSERV, "Ignoring entry because its %s value (%s) was " + "unparseable", LABEL_VALID_AFTER, escaped(lv_valid_after)); + continue; + } + if (valid_after < valid_after_cutoff) { + log_debug(LD_DIRSERV, "Deleting entry because its %s value (%s) was " + "too old", LABEL_VALID_AFTER, lv_valid_after); + consensus_cache_entry_mark_for_removal(ent); + ++n_to_delete; + } + } SMARTLIST_FOREACH_END(ent); + + // 2. Delete all diffs that lead to a consensus whose valid-after is not the + // latest. + for (int flav = 0; flav < N_CONSENSUS_FLAVORS; ++flav) { + const char *flavname = networkstatus_get_flavor_name(flav); + /* Determine the most recent consensus of this flavor */ + consensus_cache_find_all(consensuses, cdm_cache_get(), + LABEL_DOCTYPE, DOCTYPE_CONSENSUS); + consensus_cache_filter_list(consensuses, LABEL_FLAVOR, flavname); + consensus_cache_entry_t *most_recent = + sort_and_find_most_recent(consensuses); + if (most_recent == NULL) + continue; + const char *most_recent_sha3 = + consensus_cache_entry_get_value(most_recent, + LABEL_SHA3_DIGEST_UNCOMPRESSED); + if (BUG(most_recent_sha3 == NULL)) + continue; // LCOV_EXCL_LINE + + /* consider all such-flavored diffs, and look to see if they match. */ + consensus_cache_find_all(diffs, cdm_cache_get(), + LABEL_DOCTYPE, DOCTYPE_CONSENSUS_DIFF); + consensus_cache_filter_list(diffs, LABEL_FLAVOR, flavname); + SMARTLIST_FOREACH_BEGIN(diffs, consensus_cache_entry_t *, diff) { + const char *this_diff_target_sha3 = + consensus_cache_entry_get_value(diff, LABEL_TARGET_SHA3_DIGEST); + if (!this_diff_target_sha3) + continue; + if (strcmp(this_diff_target_sha3, most_recent_sha3)) { + consensus_cache_entry_mark_for_removal(diff); + ++n_to_delete; + } + } SMARTLIST_FOREACH_END(diff); + smartlist_clear(consensuses); + smartlist_clear(diffs); + } + + // 3. Delete all consensuses except the most recent that are compressed with + // an un-preferred method. + for (int flav = 0; flav < N_CONSENSUS_FLAVORS; ++flav) { + const char *flavname = networkstatus_get_flavor_name(flav); + /* Determine the most recent consensus of this flavor */ + consensus_cache_find_all(consensuses, cdm_cache_get(), + LABEL_DOCTYPE, DOCTYPE_CONSENSUS); + consensus_cache_filter_list(consensuses, LABEL_FLAVOR, flavname); + consensus_cache_entry_t *most_recent = + sort_and_find_most_recent(consensuses); + if (most_recent == NULL) + continue; + const char *most_recent_sha3_uncompressed = + consensus_cache_entry_get_value(most_recent, + LABEL_SHA3_DIGEST_UNCOMPRESSED); + const char *retain_methodname = compression_method_get_name( + RETAIN_CONSENSUS_COMPRESSED_WITH_METHOD); + + if (BUG(most_recent_sha3_uncompressed == NULL)) + continue; + SMARTLIST_FOREACH_BEGIN(consensuses, consensus_cache_entry_t *, ent) { + const char *lv_sha3_uncompressed = + consensus_cache_entry_get_value(ent, LABEL_SHA3_DIGEST_UNCOMPRESSED); + if (BUG(! lv_sha3_uncompressed)) + continue; + if (!strcmp(lv_sha3_uncompressed, most_recent_sha3_uncompressed)) + continue; // This _is_ the most recent. + const char *lv_methodname = + consensus_cache_entry_get_value(ent, LABEL_COMPRESSION_TYPE); + if (! lv_methodname || strcmp(lv_methodname, retain_methodname)) { + consensus_cache_entry_mark_for_removal(ent); + ++n_to_delete; + } + } SMARTLIST_FOREACH_END(ent); + } + + smartlist_free(objects); + smartlist_free(consensuses); + smartlist_free(diffs); + + // Actually remove files, if they're not used. + consensus_cache_delete_pending(cdm_cache_get(), 0); + return n_to_delete; +} + +/** + * Initialize the consensus diff manager and its cache, and configure + * its parameters based on the latest torrc and networkstatus parameters. + */ +void +consdiffmgr_configure(const consdiff_cfg_t *cfg) +{ + if (cfg) + memcpy(&consdiff_cfg, cfg, sizeof(consdiff_cfg)); + + (void) cdm_cache_get(); +} + +/** + * Tell the sandbox (if any) configured by <b>cfg</b> to allow the + * operations that the consensus diff manager will need. + */ +int +consdiffmgr_register_with_sandbox(struct sandbox_cfg_elem **cfg) +{ + return consensus_cache_register_with_sandbox(cdm_cache_get(), cfg); +} + +/** + * Scan the consensus diff manager's cache for any grossly malformed entries, + * and mark them as deletable. Return 0 if no problems were found; 1 + * if problems were found and fixed. + */ +int +consdiffmgr_validate(void) +{ + /* Right now, we only check for entries that have bad sha3 values */ + int problems = 0; + + smartlist_t *objects = smartlist_new(); + consensus_cache_find_all(objects, cdm_cache_get(), + NULL, NULL); + SMARTLIST_FOREACH_BEGIN(objects, consensus_cache_entry_t *, obj) { + uint8_t sha3_expected[DIGEST256_LEN]; + uint8_t sha3_received[DIGEST256_LEN]; + int r = cdm_entry_get_sha3_value(sha3_expected, obj, LABEL_SHA3_DIGEST); + if (r == -1) { + /* digest isn't there; that's allowed */ + continue; + } else if (r == -2) { + /* digest is malformed; that's not allowed */ + problems = 1; + consensus_cache_entry_mark_for_removal(obj); + continue; + } + const uint8_t *body; + size_t bodylen; + consensus_cache_entry_incref(obj); + r = consensus_cache_entry_get_body(obj, &body, &bodylen); + if (r == 0) { + crypto_digest256((char *)sha3_received, (const char *)body, bodylen, + DIGEST_SHA3_256); + } + consensus_cache_entry_decref(obj); + if (r < 0) + continue; + + // Deconfuse coverity about the possibility of sha3_received being + // uninitialized + tor_assert(r <= 0); + + if (fast_memneq(sha3_received, sha3_expected, DIGEST256_LEN)) { + problems = 1; + consensus_cache_entry_mark_for_removal(obj); + continue; + } + + } SMARTLIST_FOREACH_END(obj); + smartlist_free(objects); + return problems; +} + +/** + * Helper: build new diffs of <b>flavor</b> as needed + */ +static void +consdiffmgr_rescan_flavor_(consensus_flavor_t flavor) +{ + smartlist_t *matches = NULL; + smartlist_t *diffs = NULL; + smartlist_t *compute_diffs_from = NULL; + strmap_t *have_diff_from = NULL; + + // look for the most recent consensus, and for all previous in-range + // consensuses. Do they all have diffs to it? + const char *flavname = networkstatus_get_flavor_name(flavor); + + // 1. find the most recent consensus, and the ones that we might want + // to diff to it. + const char *methodname = compression_method_get_name( + RETAIN_CONSENSUS_COMPRESSED_WITH_METHOD); + + matches = smartlist_new(); + consensus_cache_find_all(matches, cdm_cache_get(), + LABEL_FLAVOR, flavname); + consensus_cache_filter_list(matches, LABEL_DOCTYPE, DOCTYPE_CONSENSUS); + consensus_cache_filter_list(matches, LABEL_COMPRESSION_TYPE, methodname); + consensus_cache_entry_t *most_recent = sort_and_find_most_recent(matches); + if (!most_recent) { + log_info(LD_DIRSERV, "No 'most recent' %s consensus found; " + "not making diffs", flavname); + goto done; + } + tor_assert(smartlist_len(matches)); + smartlist_del(matches, smartlist_len(matches) - 1); + + const char *most_recent_valid_after = + consensus_cache_entry_get_value(most_recent, LABEL_VALID_AFTER); + if (BUG(most_recent_valid_after == NULL)) + goto done; //LCOV_EXCL_LINE + uint8_t most_recent_sha3[DIGEST256_LEN]; + if (BUG(cdm_entry_get_sha3_value(most_recent_sha3, most_recent, + LABEL_SHA3_DIGEST_UNCOMPRESSED) < 0)) + goto done; //LCOV_EXCL_LINE + + // 2. Find all the relevant diffs _to_ this consensus. These are ones + // that we don't need to compute. + diffs = smartlist_new(); + consensus_cache_find_all(diffs, cdm_cache_get(), + LABEL_VALID_AFTER, most_recent_valid_after); + consensus_cache_filter_list(diffs, LABEL_DOCTYPE, DOCTYPE_CONSENSUS_DIFF); + consensus_cache_filter_list(diffs, LABEL_FLAVOR, flavname); + have_diff_from = strmap_new(); + SMARTLIST_FOREACH_BEGIN(diffs, consensus_cache_entry_t *, diff) { + const char *va = consensus_cache_entry_get_value(diff, + LABEL_FROM_VALID_AFTER); + if (BUG(va == NULL)) + continue; // LCOV_EXCL_LINE + strmap_set(have_diff_from, va, diff); + } SMARTLIST_FOREACH_END(diff); + + // 3. See which consensuses in 'matches' don't have diffs yet. + smartlist_reverse(matches); // from newest to oldest. + compute_diffs_from = smartlist_new(); + SMARTLIST_FOREACH_BEGIN(matches, consensus_cache_entry_t *, ent) { + const char *va = consensus_cache_entry_get_value(ent, LABEL_VALID_AFTER); + if (BUG(va == NULL)) + continue; // LCOV_EXCL_LINE + if (strmap_get(have_diff_from, va) != NULL) + continue; /* we already have this one. */ + smartlist_add(compute_diffs_from, ent); + /* Since we are not going to serve this as the most recent consensus + * any more, we should stop keeping it mmap'd when it's not in use. + */ + consensus_cache_entry_mark_for_aggressive_release(ent); + } SMARTLIST_FOREACH_END(ent); + + log_info(LD_DIRSERV, + "The most recent %s consensus is valid-after %s. We have diffs to " + "this consensus for %d/%d older %s consensuses. Generating diffs " + "for the other %d.", + flavname, + most_recent_valid_after, + smartlist_len(matches) - smartlist_len(compute_diffs_from), + smartlist_len(matches), + flavname, + smartlist_len(compute_diffs_from)); + + // 4. Update the hashtable; remove entries in this flavor to other + // target consensuses. + cdm_diff_ht_purge(flavor, most_recent_sha3); + + // 5. Actually launch the requests. + SMARTLIST_FOREACH_BEGIN(compute_diffs_from, consensus_cache_entry_t *, c) { + if (BUG(c == most_recent)) + continue; // LCOV_EXCL_LINE + + uint8_t this_sha3[DIGEST256_LEN]; + if (cdm_entry_get_sha3_value(this_sha3, c, + LABEL_SHA3_DIGEST_AS_SIGNED)<0) { + // Not actually a bug, since we might be running with a directory + // with stale files from before the #22143 fixes. + continue; + } + if (cdm_diff_ht_check_and_note_pending(flavor, + this_sha3, most_recent_sha3)) { + // This is already pending, or we encountered an error. + continue; + } + consensus_diff_queue_diff_work(c, most_recent); + } SMARTLIST_FOREACH_END(c); + + done: + smartlist_free(matches); + smartlist_free(diffs); + smartlist_free(compute_diffs_from); + strmap_free(have_diff_from, NULL); +} + +/** + * Scan the cache for the latest consensuses and add their handles to + * latest_consensus + */ +static void +consdiffmgr_consensus_load(void) +{ + smartlist_t *matches = smartlist_new(); + for (int flav = 0; flav < N_CONSENSUS_FLAVORS; ++flav) { + const char *flavname = networkstatus_get_flavor_name(flav); + smartlist_clear(matches); + consensus_cache_find_all(matches, cdm_cache_get(), + LABEL_FLAVOR, flavname); + consensus_cache_filter_list(matches, LABEL_DOCTYPE, DOCTYPE_CONSENSUS); + consensus_cache_entry_t *most_recent = sort_and_find_most_recent(matches); + if (! most_recent) + continue; // no consensuses. + const char *most_recent_sha3 = + consensus_cache_entry_get_value(most_recent, + LABEL_SHA3_DIGEST_UNCOMPRESSED); + if (BUG(most_recent_sha3 == NULL)) + continue; // LCOV_EXCL_LINE + consensus_cache_filter_list(matches, LABEL_SHA3_DIGEST_UNCOMPRESSED, + most_recent_sha3); + + // Everything that remains matches the most recent consensus of this + // flavor. + SMARTLIST_FOREACH_BEGIN(matches, consensus_cache_entry_t *, ent) { + const char *lv_compression = + consensus_cache_entry_get_value(ent, LABEL_COMPRESSION_TYPE); + compress_method_t method = + compression_method_get_by_name(lv_compression); + int pos = consensus_compression_method_pos(method); + if (pos < 0) + continue; + consensus_cache_entry_handle_free(latest_consensus[flav][pos]); + latest_consensus[flav][pos] = consensus_cache_entry_handle_new(ent); + } SMARTLIST_FOREACH_END(ent); + } + smartlist_free(matches); +} + +/** + * Scan the cache for diffs, and add them to the hashtable. + */ +static void +consdiffmgr_diffs_load(void) +{ + smartlist_t *diffs = smartlist_new(); + consensus_cache_find_all(diffs, cdm_cache_get(), + LABEL_DOCTYPE, DOCTYPE_CONSENSUS_DIFF); + SMARTLIST_FOREACH_BEGIN(diffs, consensus_cache_entry_t *, diff) { + const char *lv_flavor = + consensus_cache_entry_get_value(diff, LABEL_FLAVOR); + if (!lv_flavor) + continue; + int flavor = networkstatus_parse_flavor_name(lv_flavor); + if (flavor < 0) + continue; + const char *lv_compression = + consensus_cache_entry_get_value(diff, LABEL_COMPRESSION_TYPE); + compress_method_t method = NO_METHOD; + if (lv_compression) { + method = compression_method_get_by_name(lv_compression); + if (method == UNKNOWN_METHOD) { + continue; + } + } + + uint8_t from_sha3[DIGEST256_LEN]; + uint8_t to_sha3[DIGEST256_LEN]; + if (cdm_entry_get_sha3_value(from_sha3, diff, LABEL_FROM_SHA3_DIGEST)<0) + continue; + if (cdm_entry_get_sha3_value(to_sha3, diff, LABEL_TARGET_SHA3_DIGEST)<0) + continue; + + cdm_diff_ht_set_status(flavor, from_sha3, to_sha3, + method, + CDM_DIFF_PRESENT, + consensus_cache_entry_handle_new(diff)); + } SMARTLIST_FOREACH_END(diff); + smartlist_free(diffs); +} + +/** + * Build new diffs as needed. + */ +void +consdiffmgr_rescan(void) +{ + if (cdm_cache_dirty == 0) + return; + + // Clean up here to make room for new diffs, and to ensure that older + // consensuses do not have any entries. + consdiffmgr_cleanup(); + + if (cdm_cache_loaded == 0) { + consdiffmgr_diffs_load(); + consdiffmgr_consensus_load(); + cdm_cache_loaded = 1; + } + + for (int flav = 0; flav < N_CONSENSUS_FLAVORS; ++flav) { + consdiffmgr_rescan_flavor_((consensus_flavor_t) flav); + } + + cdm_cache_dirty = 0; +} + +/** Callback wrapper for consdiffmgr_rescan */ +static void +consdiffmgr_rescan_cb(mainloop_event_t *ev, void *arg) +{ + (void)ev; + (void)arg; + consdiffmgr_rescan(); +} + +/** Mark the cache as dirty, and schedule a rescan event. */ +static void +mark_cdm_cache_dirty(void) +{ + cdm_cache_dirty = 1; + tor_assert(consdiffmgr_rescan_ev); + mainloop_event_activate(consdiffmgr_rescan_ev); +} + +/** + * Helper: compare two files by their from-valid-after and valid-after labels, + * trying to sort in ascending order by from-valid-after (when present) and + * valid-after (when not). Place everything that has neither label first in + * the list. + */ +static int +compare_by_staleness_(const void **a, const void **b) +{ + const consensus_cache_entry_t *e1 = *a; + const consensus_cache_entry_t *e2 = *b; + const char *va1, *fva1, *va2, *fva2; + va1 = consensus_cache_entry_get_value(e1, LABEL_VALID_AFTER); + va2 = consensus_cache_entry_get_value(e2, LABEL_VALID_AFTER); + fva1 = consensus_cache_entry_get_value(e1, LABEL_FROM_VALID_AFTER); + fva2 = consensus_cache_entry_get_value(e2, LABEL_FROM_VALID_AFTER); + + if (fva1) + va1 = fva1; + if (fva2) + va2 = fva2; + + /* See note about iso-encoded values in compare_by_valid_after_. Also note + * that missing dates will get placed first. */ + return strcmp_opt(va1, va2); +} + +/** If there are not enough unused filenames to store <b>n</b> files, then + * delete old consensuses until there are. (We have to keep track of the + * number of filenames because of the way that the seccomp2 cache works.) + * + * Return 0 on success, -1 on failure. + **/ +static int +consdiffmgr_ensure_space_for_files(int n) +{ + consensus_cache_t *cache = cdm_cache_get(); + if (consensus_cache_get_n_filenames_available(cache) >= n) { + // there are already enough unused filenames. + return 0; + } + // Try a cheap deletion of stuff that's waiting to get deleted. + consensus_cache_delete_pending(cache, 0); + if (consensus_cache_get_n_filenames_available(cache) >= n) { + // okay, _that_ made enough filenames available. + return 0; + } + // Let's get more assertive: clean out unused stuff, and force-remove + // the files that we can. + consdiffmgr_cleanup(); + consensus_cache_delete_pending(cache, 1); + const int n_to_remove = n - consensus_cache_get_n_filenames_available(cache); + if (n_to_remove <= 0) { + // okay, finally! + return 0; + } + + // At this point, we're going to have to throw out objects that will be + // missed. Too bad! + smartlist_t *objects = smartlist_new(); + consensus_cache_find_all(objects, cache, NULL, NULL); + smartlist_sort(objects, compare_by_staleness_); + int n_marked = 0; + SMARTLIST_FOREACH_BEGIN(objects, consensus_cache_entry_t *, ent) { + consensus_cache_entry_mark_for_removal(ent); + if (++n_marked >= n_to_remove) + break; + } SMARTLIST_FOREACH_END(ent); + smartlist_free(objects); + + consensus_cache_delete_pending(cache, 1); + + if (consensus_cache_may_overallocate(cache)) { + /* If we're allowed to throw extra files into the cache, let's do so + * rather getting upset. + */ + return 0; + } + + if (BUG(n_marked < n_to_remove)) + return -1; + else + return 0; +} + +/** + * Set consensus cache flags on the objects in this consdiffmgr. + */ +static void +consdiffmgr_set_cache_flags(void) +{ + /* Right now, we just mark the consensus objects for aggressive release, + * so that they get mmapped for as little time as possible. */ + smartlist_t *objects = smartlist_new(); + consensus_cache_find_all(objects, cdm_cache_get(), LABEL_DOCTYPE, + DOCTYPE_CONSENSUS); + SMARTLIST_FOREACH_BEGIN(objects, consensus_cache_entry_t *, ent) { + consensus_cache_entry_mark_for_aggressive_release(ent); + } SMARTLIST_FOREACH_END(ent); + smartlist_free(objects); +} + +/** + * Called before shutdown: drop all storage held by the consdiffmgr.c module. + */ +void +consdiffmgr_free_all(void) +{ + cdm_diff_t **diff, **next; + for (diff = HT_START(cdm_diff_ht, &cdm_diff_ht); diff; diff = next) { + cdm_diff_t *this = *diff; + next = HT_NEXT_RMV(cdm_diff_ht, &cdm_diff_ht, diff); + cdm_diff_free(this); + } + int i; + unsigned j; + for (i = 0; i < N_CONSENSUS_FLAVORS; ++i) { + for (j = 0; j < n_consensus_compression_methods(); ++j) { + consensus_cache_entry_handle_free(latest_consensus[i][j]); + } + } + memset(latest_consensus, 0, sizeof(latest_consensus)); + consensus_cache_free(cons_diff_cache); + cons_diff_cache = NULL; + mainloop_event_free(consdiffmgr_rescan_ev); +} + +/* ===== + Thread workers + =====*/ + +typedef struct compressed_result_t { + config_line_t *labels; + /** + * Output: Body of the diff, as compressed. + */ + uint8_t *body; + /** + * Output: length of body_out + */ + size_t bodylen; +} compressed_result_t; + +/** + * Compress the bytestring <b>input</b> of length <b>len</b> using the + * <n>n_methods</b> compression methods listed in the array <b>methods</b>. + * + * For each successful compression, set the fields in the <b>results_out</b> + * array in the position corresponding to the compression method. Use + * <b>labels_in</b> as a basis for the labels of the result. + * + * Return 0 if all compression succeeded; -1 if any failed. + */ +static int +compress_multiple(compressed_result_t *results_out, int n_methods, + const compress_method_t *methods, + const uint8_t *input, size_t len, + const config_line_t *labels_in) +{ + int rv = 0; + int i; + for (i = 0; i < n_methods; ++i) { + compress_method_t method = methods[i]; + const char *methodname = compression_method_get_name(method); + char *result; + size_t sz; + if (0 == tor_compress(&result, &sz, (const char*)input, len, method)) { + results_out[i].body = (uint8_t*)result; + results_out[i].bodylen = sz; + results_out[i].labels = config_lines_dup(labels_in); + cdm_labels_prepend_sha3(&results_out[i].labels, LABEL_SHA3_DIGEST, + results_out[i].body, + results_out[i].bodylen); + config_line_prepend(&results_out[i].labels, + LABEL_COMPRESSION_TYPE, + methodname); + } else { + rv = -1; + } + } + return rv; +} + +/** + * Given an array of <b>n</b> compressed_result_t in <b>results</b>, + * as produced by compress_multiple, store them all into the + * consdiffmgr, and store handles to them in the <b>handles_out</b> + * array. + * + * Return CDM_DIFF_PRESENT if any was stored, and CDM_DIFF_ERROR if none + * was stored. + */ +static cdm_diff_status_t +store_multiple(consensus_cache_entry_handle_t **handles_out, + int n, + const compress_method_t *methods, + const compressed_result_t *results, + const char *description) +{ + cdm_diff_status_t status = CDM_DIFF_ERROR; + consdiffmgr_ensure_space_for_files(n); + + int i; + for (i = 0; i < n; ++i) { + compress_method_t method = methods[i]; + uint8_t *body_out = results[i].body; + size_t bodylen_out = results[i].bodylen; + config_line_t *labels = results[i].labels; + const char *methodname = compression_method_get_name(method); + if (body_out && bodylen_out && labels) { + /* Success! Store the results */ + log_info(LD_DIRSERV, "Adding %s, compressed with %s", + description, methodname); + + consensus_cache_entry_t *ent = + consensus_cache_add(cdm_cache_get(), + labels, + body_out, + bodylen_out); + if (ent == NULL) { + static ratelim_t cant_store_ratelim = RATELIM_INIT(5*60); + log_fn_ratelim(&cant_store_ratelim, LOG_WARN, LD_FS, + "Unable to store object %s compressed with %s.", + description, methodname); + continue; + } + + status = CDM_DIFF_PRESENT; + handles_out[i] = consensus_cache_entry_handle_new(ent); + consensus_cache_entry_decref(ent); + } + } + return status; +} + +/** + * An object passed to a worker thread that will try to produce a consensus + * diff. + */ +typedef struct consensus_diff_worker_job_t { + /** + * Input: The consensus to compute the diff from. Holds a reference to the + * cache entry, which must not be released until the job is passed back to + * the main thread. The body must be mapped into memory in the main thread. + */ + consensus_cache_entry_t *diff_from; + /** + * Input: The consensus to compute the diff to. Holds a reference to the + * cache entry, which must not be released until the job is passed back to + * the main thread. The body must be mapped into memory in the main thread. + */ + consensus_cache_entry_t *diff_to; + + /** Output: labels and bodies */ + compressed_result_t out[ARRAY_LENGTH(compress_diffs_with)]; +} consensus_diff_worker_job_t; + +/** Given a consensus_cache_entry_t, check whether it has a label claiming + * that it was compressed. If so, uncompress its contents into <b>out</b> and + * set <b>outlen</b> to hold their size. If not, just copy the body into + * <b>out</b> and set <b>outlen</b> to its length. Return 0 on success, + * -1 on failure. + * + * In all cases, the output is nul-terminated. */ +STATIC int +uncompress_or_copy(char **out, size_t *outlen, + consensus_cache_entry_t *ent) +{ + const uint8_t *body; + size_t bodylen; + + if (consensus_cache_entry_get_body(ent, &body, &bodylen) < 0) + return -1; + + const char *lv_compression = + consensus_cache_entry_get_value(ent, LABEL_COMPRESSION_TYPE); + compress_method_t method = NO_METHOD; + + if (lv_compression) + method = compression_method_get_by_name(lv_compression); + + return tor_uncompress(out, outlen, (const char *)body, bodylen, + method, 1, LOG_WARN); +} + +/** + * Worker function. This function runs inside a worker thread and receives + * a consensus_diff_worker_job_t as its input. + */ +static workqueue_reply_t +consensus_diff_worker_threadfn(void *state_, void *work_) +{ + (void)state_; + consensus_diff_worker_job_t *job = work_; + const uint8_t *diff_from, *diff_to; + size_t len_from, len_to; + int r; + /* We need to have the body already mapped into RAM here. + */ + r = consensus_cache_entry_get_body(job->diff_from, &diff_from, &len_from); + if (BUG(r < 0)) + return WQ_RPL_REPLY; // LCOV_EXCL_LINE + r = consensus_cache_entry_get_body(job->diff_to, &diff_to, &len_to); + if (BUG(r < 0)) + return WQ_RPL_REPLY; // LCOV_EXCL_LINE + + const char *lv_to_valid_after = + consensus_cache_entry_get_value(job->diff_to, LABEL_VALID_AFTER); + const char *lv_to_fresh_until = + consensus_cache_entry_get_value(job->diff_to, LABEL_FRESH_UNTIL); + const char *lv_to_valid_until = + consensus_cache_entry_get_value(job->diff_to, LABEL_VALID_UNTIL); + const char *lv_to_signatories = + consensus_cache_entry_get_value(job->diff_to, LABEL_SIGNATORIES); + const char *lv_from_valid_after = + consensus_cache_entry_get_value(job->diff_from, LABEL_VALID_AFTER); + const char *lv_from_digest = + consensus_cache_entry_get_value(job->diff_from, + LABEL_SHA3_DIGEST_AS_SIGNED); + const char *lv_from_flavor = + consensus_cache_entry_get_value(job->diff_from, LABEL_FLAVOR); + const char *lv_to_flavor = + consensus_cache_entry_get_value(job->diff_to, LABEL_FLAVOR); + const char *lv_to_digest = + consensus_cache_entry_get_value(job->diff_to, + LABEL_SHA3_DIGEST_UNCOMPRESSED); + + if (! lv_from_digest) { + /* This isn't a bug right now, since it can happen if you're migrating + * from an older version of master to a newer one. The older ones didn't + * annotate their stored consensus objects with sha3-digest-as-signed. + */ + return WQ_RPL_REPLY; // LCOV_EXCL_LINE + } + + /* All these values are mandatory on the input */ + if (BUG(!lv_to_valid_after) || + BUG(!lv_from_valid_after) || + BUG(!lv_from_flavor) || + BUG(!lv_to_flavor)) { + return WQ_RPL_REPLY; // LCOV_EXCL_LINE + } + /* The flavors need to match */ + if (BUG(strcmp(lv_from_flavor, lv_to_flavor))) { + return WQ_RPL_REPLY; // LCOV_EXCL_LINE + } + + char *consensus_diff; + { + char *diff_from_nt = NULL, *diff_to_nt = NULL; + size_t diff_from_nt_len, diff_to_nt_len; + + if (uncompress_or_copy(&diff_from_nt, &diff_from_nt_len, + job->diff_from) < 0) { + return WQ_RPL_REPLY; + } + if (uncompress_or_copy(&diff_to_nt, &diff_to_nt_len, + job->diff_to) < 0) { + tor_free(diff_from_nt); + return WQ_RPL_REPLY; + } + tor_assert(diff_from_nt); + tor_assert(diff_to_nt); + + // XXXX ugh; this is going to calculate the SHA3 of both its + // XXXX inputs again, even though we already have that. Maybe it's time + // XXXX to change the API here? + consensus_diff = consensus_diff_generate(diff_from_nt, diff_to_nt); + tor_free(diff_from_nt); + tor_free(diff_to_nt); + } + if (!consensus_diff) { + /* Couldn't generate consensus; we'll leave the reply blank. */ + return WQ_RPL_REPLY; + } + + /* Compress the results and send the reply */ + tor_assert(compress_diffs_with[0] == NO_METHOD); + size_t difflen = strlen(consensus_diff); + job->out[0].body = (uint8_t *) consensus_diff; + job->out[0].bodylen = difflen; + + config_line_t *common_labels = NULL; + if (lv_to_valid_until) + config_line_prepend(&common_labels, LABEL_VALID_UNTIL, lv_to_valid_until); + if (lv_to_fresh_until) + config_line_prepend(&common_labels, LABEL_FRESH_UNTIL, lv_to_fresh_until); + if (lv_to_signatories) + config_line_prepend(&common_labels, LABEL_SIGNATORIES, lv_to_signatories); + cdm_labels_prepend_sha3(&common_labels, + LABEL_SHA3_DIGEST_UNCOMPRESSED, + job->out[0].body, + job->out[0].bodylen); + config_line_prepend(&common_labels, LABEL_FROM_VALID_AFTER, + lv_from_valid_after); + config_line_prepend(&common_labels, LABEL_VALID_AFTER, + lv_to_valid_after); + config_line_prepend(&common_labels, LABEL_FLAVOR, lv_from_flavor); + config_line_prepend(&common_labels, LABEL_FROM_SHA3_DIGEST, + lv_from_digest); + config_line_prepend(&common_labels, LABEL_TARGET_SHA3_DIGEST, + lv_to_digest); + config_line_prepend(&common_labels, LABEL_DOCTYPE, + DOCTYPE_CONSENSUS_DIFF); + + job->out[0].labels = config_lines_dup(common_labels); + cdm_labels_prepend_sha3(&job->out[0].labels, + LABEL_SHA3_DIGEST, + job->out[0].body, + job->out[0].bodylen); + + compress_multiple(job->out+1, + n_diff_compression_methods()-1, + compress_diffs_with+1, + (const uint8_t*)consensus_diff, difflen, common_labels); + + config_free_lines(common_labels); + return WQ_RPL_REPLY; +} + +#define consensus_diff_worker_job_free(job) \ + FREE_AND_NULL(consensus_diff_worker_job_t, \ + consensus_diff_worker_job_free_, (job)) + +/** + * Helper: release all storage held in <b>job</b>. + */ +static void +consensus_diff_worker_job_free_(consensus_diff_worker_job_t *job) +{ + if (!job) + return; + unsigned u; + for (u = 0; u < n_diff_compression_methods(); ++u) { + config_free_lines(job->out[u].labels); + tor_free(job->out[u].body); + } + consensus_cache_entry_decref(job->diff_from); + consensus_cache_entry_decref(job->diff_to); + tor_free(job); +} + +/** + * Worker function: This function runs in the main thread, and receives + * a consensus_diff_worker_job_t that the worker thread has already + * processed. + */ +static void +consensus_diff_worker_replyfn(void *work_) +{ + tor_assert(in_main_thread()); + tor_assert(work_); + + consensus_diff_worker_job_t *job = work_; + + const char *lv_from_digest = + consensus_cache_entry_get_value(job->diff_from, + LABEL_SHA3_DIGEST_AS_SIGNED); + const char *lv_to_digest = + consensus_cache_entry_get_value(job->diff_to, + LABEL_SHA3_DIGEST_UNCOMPRESSED); + const char *lv_flavor = + consensus_cache_entry_get_value(job->diff_to, LABEL_FLAVOR); + if (BUG(lv_from_digest == NULL)) + lv_from_digest = "???"; // LCOV_EXCL_LINE + if (BUG(lv_to_digest == NULL)) + lv_to_digest = "???"; // LCOV_EXCL_LINE + + uint8_t from_sha3[DIGEST256_LEN]; + uint8_t to_sha3[DIGEST256_LEN]; + int flav = -1; + int cache = 1; + if (BUG(cdm_entry_get_sha3_value(from_sha3, job->diff_from, + LABEL_SHA3_DIGEST_AS_SIGNED) < 0)) + cache = 0; + if (BUG(cdm_entry_get_sha3_value(to_sha3, job->diff_to, + LABEL_SHA3_DIGEST_UNCOMPRESSED) < 0)) + cache = 0; + if (BUG(lv_flavor == NULL)) { + cache = 0; + } else if ((flav = networkstatus_parse_flavor_name(lv_flavor)) < 0) { + cache = 0; + } + + consensus_cache_entry_handle_t *handles[ARRAY_LENGTH(compress_diffs_with)]; + memset(handles, 0, sizeof(handles)); + + char description[128]; + tor_snprintf(description, sizeof(description), + "consensus diff from %s to %s", + lv_from_digest, lv_to_digest); + + int status = store_multiple(handles, + n_diff_compression_methods(), + compress_diffs_with, + job->out, + description); + + if (status != CDM_DIFF_PRESENT) { + /* Failure! Nothing to do but complain */ + log_warn(LD_DIRSERV, + "Worker was unable to compute consensus diff " + "from %s to %s", lv_from_digest, lv_to_digest); + /* Cache this error so we don't try to compute this one again. */ + status = CDM_DIFF_ERROR; + } + + unsigned u; + for (u = 0; u < ARRAY_LENGTH(handles); ++u) { + compress_method_t method = compress_diffs_with[u]; + if (cache) { + consensus_cache_entry_handle_t *h = handles[u]; + int this_status = status; + if (h == NULL) { + this_status = CDM_DIFF_ERROR; + } + tor_assert_nonfatal(h != NULL || this_status == CDM_DIFF_ERROR); + cdm_diff_ht_set_status(flav, from_sha3, to_sha3, method, this_status, h); + } else { + consensus_cache_entry_handle_free(handles[u]); + } + } + + consensus_diff_worker_job_free(job); +} + +/** + * Queue the job of computing the diff from <b>diff_from</b> to <b>diff_to</b> + * in a worker thread. + */ +static int +consensus_diff_queue_diff_work(consensus_cache_entry_t *diff_from, + consensus_cache_entry_t *diff_to) +{ + tor_assert(in_main_thread()); + + consensus_cache_entry_incref(diff_from); + consensus_cache_entry_incref(diff_to); + + consensus_diff_worker_job_t *job = tor_malloc_zero(sizeof(*job)); + job->diff_from = diff_from; + job->diff_to = diff_to; + + /* Make sure body is mapped. */ + const uint8_t *body; + size_t bodylen; + int r1 = consensus_cache_entry_get_body(diff_from, &body, &bodylen); + int r2 = consensus_cache_entry_get_body(diff_to, &body, &bodylen); + if (r1 < 0 || r2 < 0) + goto err; + + workqueue_entry_t *work; + work = cpuworker_queue_work(WQ_PRI_LOW, + consensus_diff_worker_threadfn, + consensus_diff_worker_replyfn, + job); + if (!work) + goto err; + + return 0; + err: + consensus_diff_worker_job_free(job); // includes decrefs. + return -1; +} + +/** + * Holds requests and replies for consensus_compress_workers. + */ +typedef struct consensus_compress_worker_job_t { + char *consensus; + size_t consensus_len; + consensus_flavor_t flavor; + config_line_t *labels_in; + compressed_result_t out[ARRAY_LENGTH(compress_consensus_with)]; +} consensus_compress_worker_job_t; + +#define consensus_compress_worker_job_free(job) \ + FREE_AND_NULL(consensus_compress_worker_job_t, \ + consensus_compress_worker_job_free_, (job)) + +/** + * Free all resources held in <b>job</b> + */ +static void +consensus_compress_worker_job_free_(consensus_compress_worker_job_t *job) +{ + if (!job) + return; + tor_free(job->consensus); + config_free_lines(job->labels_in); + unsigned u; + for (u = 0; u < n_consensus_compression_methods(); ++u) { + config_free_lines(job->out[u].labels); + tor_free(job->out[u].body); + } + tor_free(job); +} +/** + * Worker function. This function runs inside a worker thread and receives + * a consensus_compress_worker_job_t as its input. + */ +static workqueue_reply_t +consensus_compress_worker_threadfn(void *state_, void *work_) +{ + (void)state_; + consensus_compress_worker_job_t *job = work_; + consensus_flavor_t flavor = job->flavor; + const char *consensus = job->consensus; + size_t bodylen = job->consensus_len; + + config_line_t *labels = config_lines_dup(job->labels_in); + const char *flavname = networkstatus_get_flavor_name(flavor); + + cdm_labels_prepend_sha3(&labels, LABEL_SHA3_DIGEST_UNCOMPRESSED, + (const uint8_t *)consensus, bodylen); + { + const char *start, *end; + if (router_get_networkstatus_v3_signed_boundaries(consensus, + &start, &end) < 0) { + start = consensus; + end = consensus+bodylen; + } + cdm_labels_prepend_sha3(&labels, LABEL_SHA3_DIGEST_AS_SIGNED, + (const uint8_t *)start, + end - start); + } + config_line_prepend(&labels, LABEL_FLAVOR, flavname); + config_line_prepend(&labels, LABEL_DOCTYPE, DOCTYPE_CONSENSUS); + + compress_multiple(job->out, + n_consensus_compression_methods(), + compress_consensus_with, + (const uint8_t*)consensus, bodylen, labels); + config_free_lines(labels); + return WQ_RPL_REPLY; +} + +/** + * Worker function: This function runs in the main thread, and receives + * a consensus_diff_compress_job_t that the worker thread has already + * processed. + */ +static void +consensus_compress_worker_replyfn(void *work_) +{ + consensus_compress_worker_job_t *job = work_; + + consensus_cache_entry_handle_t *handles[ + ARRAY_LENGTH(compress_consensus_with)]; + memset(handles, 0, sizeof(handles)); + + store_multiple(handles, + n_consensus_compression_methods(), + compress_consensus_with, + job->out, + "consensus"); + mark_cdm_cache_dirty(); + + unsigned u; + consensus_flavor_t f = job->flavor; + tor_assert((int)f < N_CONSENSUS_FLAVORS); + for (u = 0; u < ARRAY_LENGTH(handles); ++u) { + if (handles[u] == NULL) + continue; + consensus_cache_entry_handle_free(latest_consensus[f][u]); + latest_consensus[f][u] = handles[u]; + } + + consensus_compress_worker_job_free(job); +} + +/** + * If true, we compress in worker threads. + */ +static int background_compression = 0; + +/** + * Queue a job to compress <b>consensus</b> and store its compressed + * text in the cache. + */ +static int +consensus_queue_compression_work(const char *consensus, + const networkstatus_t *as_parsed) +{ + tor_assert(consensus); + tor_assert(as_parsed); + + consensus_compress_worker_job_t *job = tor_malloc_zero(sizeof(*job)); + job->consensus = tor_strdup(consensus); + job->consensus_len = strlen(consensus); + job->flavor = as_parsed->flavor; + + char va_str[ISO_TIME_LEN+1]; + char vu_str[ISO_TIME_LEN+1]; + char fu_str[ISO_TIME_LEN+1]; + format_iso_time_nospace(va_str, as_parsed->valid_after); + format_iso_time_nospace(fu_str, as_parsed->fresh_until); + format_iso_time_nospace(vu_str, as_parsed->valid_until); + config_line_append(&job->labels_in, LABEL_VALID_AFTER, va_str); + config_line_append(&job->labels_in, LABEL_FRESH_UNTIL, fu_str); + config_line_append(&job->labels_in, LABEL_VALID_UNTIL, vu_str); + if (as_parsed->voters) { + smartlist_t *hexvoters = smartlist_new(); + SMARTLIST_FOREACH_BEGIN(as_parsed->voters, + networkstatus_voter_info_t *, vi) { + if (smartlist_len(vi->sigs) == 0) + continue; // didn't sign. + char d[HEX_DIGEST_LEN+1]; + base16_encode(d, sizeof(d), vi->identity_digest, DIGEST_LEN); + smartlist_add_strdup(hexvoters, d); + } SMARTLIST_FOREACH_END(vi); + char *signers = smartlist_join_strings(hexvoters, ",", 0, NULL); + config_line_prepend(&job->labels_in, LABEL_SIGNATORIES, signers); + tor_free(signers); + SMARTLIST_FOREACH(hexvoters, char *, cp, tor_free(cp)); + smartlist_free(hexvoters); + } + + if (background_compression) { + workqueue_entry_t *work; + work = cpuworker_queue_work(WQ_PRI_LOW, + consensus_compress_worker_threadfn, + consensus_compress_worker_replyfn, + job); + if (!work) { + consensus_compress_worker_job_free(job); + return -1; + } + + return 0; + } else { + consensus_compress_worker_threadfn(NULL, job); + consensus_compress_worker_replyfn(job); + return 0; + } +} + +/** + * Tell the consdiffmgr backend to compress consensuses in worker threads. + */ +void +consdiffmgr_enable_background_compression(void) +{ + // This isn't the default behavior because it would break unit tests. + background_compression = 1; +} + +/** Read the set of voters from the cached object <b>ent</b> into + * <b>out</b>, as a list of hex-encoded digests. Return 0 on success, + * -1 if no signatories were recorded. */ +int +consensus_cache_entry_get_voter_id_digests(const consensus_cache_entry_t *ent, + smartlist_t *out) +{ + tor_assert(ent); + tor_assert(out); + const char *s; + s = consensus_cache_entry_get_value(ent, LABEL_SIGNATORIES); + if (s == NULL) + return -1; + smartlist_split_string(out, s, ",", SPLIT_SKIP_SPACE|SPLIT_STRIP_SPACE, 0); + return 0; +} + +/** Read the fresh-until time of cached object <b>ent</b> into *<b>out</b> + * and return 0, or return -1 if no such time was recorded. */ +int +consensus_cache_entry_get_fresh_until(const consensus_cache_entry_t *ent, + time_t *out) +{ + tor_assert(ent); + tor_assert(out); + const char *s; + s = consensus_cache_entry_get_value(ent, LABEL_FRESH_UNTIL); + if (s == NULL || parse_iso_time_nospace(s, out) < 0) + return -1; + else + return 0; +} + +/** Read the valid until timestamp from the cached object <b>ent</b> into + * *<b>out</b> and return 0, or return -1 if no such time was recorded. */ +int +consensus_cache_entry_get_valid_until(const consensus_cache_entry_t *ent, + time_t *out) +{ + tor_assert(ent); + tor_assert(out); + + const char *s; + s = consensus_cache_entry_get_value(ent, LABEL_VALID_UNTIL); + if (s == NULL || parse_iso_time_nospace(s, out) < 0) + return -1; + else + return 0; +} + +/** Read the valid after timestamp from the cached object <b>ent</b> into + * *<b>out</b> and return 0, or return -1 if no such time was recorded. */ +int +consensus_cache_entry_get_valid_after(const consensus_cache_entry_t *ent, + time_t *out) +{ + tor_assert(ent); + tor_assert(out); + + const char *s; + s = consensus_cache_entry_get_value(ent, LABEL_VALID_AFTER); + + if (s == NULL || parse_iso_time_nospace(s, out) < 0) + return -1; + else + return 0; +} diff --git a/src/feature/dircache/consdiffmgr.h b/src/feature/dircache/consdiffmgr.h new file mode 100644 index 0000000000..66c3d65002 --- /dev/null +++ b/src/feature/dircache/consdiffmgr.h @@ -0,0 +1,75 @@ +/* Copyright (c) 2017-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#ifndef TOR_CONSDIFFMGR_H +#define TOR_CONSDIFFMGR_H + +enum compress_method_t; + +/** + * Possible outcomes from trying to look up a given consensus diff. + */ +typedef enum consdiff_status_t { + CONSDIFF_AVAILABLE, + CONSDIFF_NOT_FOUND, + CONSDIFF_IN_PROGRESS, +} consdiff_status_t; + +typedef struct consdiff_cfg_t { + int32_t cache_max_num; +} consdiff_cfg_t; + +struct consensus_cache_entry_t; // from conscache.h + +int consdiffmgr_add_consensus(const char *consensus, + const networkstatus_t *as_parsed); + +consdiff_status_t consdiffmgr_find_consensus( + struct consensus_cache_entry_t **entry_out, + consensus_flavor_t flavor, + enum compress_method_t method); + +consdiff_status_t consdiffmgr_find_diff_from( + struct consensus_cache_entry_t **entry_out, + consensus_flavor_t flavor, + int digest_type, + const uint8_t *digest, + size_t digestlen, + enum compress_method_t method); + +int consensus_cache_entry_get_voter_id_digests( + const struct consensus_cache_entry_t *ent, + smartlist_t *out); +int consensus_cache_entry_get_fresh_until( + const struct consensus_cache_entry_t *ent, + time_t *out); +int consensus_cache_entry_get_valid_until( + const struct consensus_cache_entry_t *ent, + time_t *out); +int consensus_cache_entry_get_valid_after( + const struct consensus_cache_entry_t *ent, + time_t *out); + +void consdiffmgr_rescan(void); +int consdiffmgr_cleanup(void); +void consdiffmgr_enable_background_compression(void); +void consdiffmgr_configure(const consdiff_cfg_t *cfg); +struct sandbox_cfg_elem; +int consdiffmgr_register_with_sandbox(struct sandbox_cfg_elem **cfg); +void consdiffmgr_free_all(void); +int consdiffmgr_validate(void); + +#ifdef CONSDIFFMGR_PRIVATE +STATIC unsigned n_diff_compression_methods(void); +STATIC unsigned n_consensus_compression_methods(void); +STATIC consensus_cache_t *cdm_cache_get(void); +STATIC consensus_cache_entry_t *cdm_cache_lookup_consensus( + consensus_flavor_t flavor, time_t valid_after); +STATIC int cdm_entry_get_sha3_value(uint8_t *digest_out, + consensus_cache_entry_t *ent, + const char *label); +STATIC int uncompress_or_copy(char **out, size_t *outlen, + consensus_cache_entry_t *ent); +#endif /* defined(CONSDIFFMGR_PRIVATE) */ + +#endif /* !defined(TOR_CONSDIFFMGR_H) */ diff --git a/src/feature/dircache/directory.c b/src/feature/dircache/directory.c new file mode 100644 index 0000000000..a723176185 --- /dev/null +++ b/src/feature/dircache/directory.c @@ -0,0 +1,5966 @@ +/* Copyright (c) 2001-2004, Roger Dingledine. + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#define DIRECTORY_PRIVATE + +#include "core/or/or.h" +#include "lib/err/backtrace.h" +#include "feature/client/bridges.h" +#include "lib/container/buffers.h" +#include "core/or/circuitbuild.h" +#include "app/config/config.h" +#include "core/mainloop/connection.h" +#include "core/or/connection_edge.h" +#include "feature/dircache/conscache.h" +#include "feature/dircommon/consdiff.h" +#include "feature/dircache/consdiffmgr.h" +#include "feature/control/control.h" +#include "lib/compress/compress.h" +#include "lib/crypt_ops/crypto_rand.h" +#include "lib/crypt_ops/crypto_util.h" +#include "feature/dircache/directory.h" +#include "feature/dircache/dirserv.h" +#include "feature/client/entrynodes.h" +#include "feature/dircommon/fp_pair.h" +#include "feature/stats/geoip.h" +#include "feature/hs/hs_cache.h" +#include "feature/hs/hs_common.h" +#include "feature/hs/hs_control.h" +#include "feature/hs/hs_client.h" +#include "core/mainloop/main.h" +#include "feature/nodelist/microdesc.h" +#include "feature/nodelist/networkstatus.h" +#include "feature/nodelist/nodelist.h" +#include "core/or/policies.h" +#include "core/or/relay.h" +#include "feature/rend/rendclient.h" +#include "feature/rend/rendcommon.h" +#include "feature/rend/rendservice.h" +#include "feature/stats/rephist.h" +#include "feature/relay/router.h" +#include "feature/nodelist/routerlist.h" +#include "feature/nodelist/routerparse.h" +#include "feature/nodelist/routerset.h" +#include "lib/encoding/confline.h" +#include "lib/crypt_ops/crypto_format.h" + +#if defined(EXPORTMALLINFO) && defined(HAVE_MALLOC_H) && defined(HAVE_MALLINFO) +#if !defined(OpenBSD) +#include <malloc.h> +#endif +#endif + +#include "feature/dirauth/dirvote.h" +#include "feature/dirauth/mode.h" +#include "feature/dirauth/shared_random.h" + +#include "feature/nodelist/authority_cert_st.h" +#include "feature/dircache/cached_dir_st.h" +#include "feature/dircommon/dir_connection_st.h" +#include "feature/dirclient/dir_server_st.h" +#include "core/or/entry_connection_st.h" +#include "feature/nodelist/networkstatus_st.h" +#include "feature/nodelist/node_st.h" +#include "feature/rend/rend_service_descriptor_st.h" +#include "feature/nodelist/routerinfo_st.h" + +/** + * \file directory.c + * \brief Code to send and fetch information from directory authorities and + * caches via HTTP. + * + * Directory caches and authorities use dirserv.c to generate the results of a + * query and stream them to the connection; clients use routerparse.c to parse + * them. + * + * Every directory request has a dir_connection_t on the client side and on + * the server side. In most cases, the dir_connection_t object is a linked + * connection, tunneled through an edge_connection_t so that it can be a + * stream on the Tor network. The only non-tunneled connections are those + * that are used to upload material (descriptors and votes) to authorities. + * Among tunneled connections, some use one-hop circuits, and others use + * multi-hop circuits for anonymity. + * + * Directory requests are launched by calling + * directory_initiate_request(). This + * launch the connection, will construct an HTTP request with + * directory_send_command(), send the and wait for a response. The client + * later handles the response with connection_dir_client_reached_eof(), + * which passes the information received to another part of Tor. + * + * On the server side, requests are read in directory_handle_command(), + * which dispatches first on the request type (GET or POST), and then on + * the URL requested. GET requests are processed with a table-based + * dispatcher in url_table[]. The process of handling larger GET requests + * is complicated because we need to avoid allocating a copy of all the + * data to be sent to the client in one huge buffer. Instead, we spool the + * data into the buffer using logic in connection_dirserv_flushed_some() in + * dirserv.c. (TODO: If we extended buf.c to have a zero-copy + * reference-based buffer type, we could remove most of that code, at the + * cost of a bit more reference counting.) + **/ + +/* In-points to directory.c: + * + * - directory_post_to_dirservers(), called from + * router_upload_dir_desc_to_dirservers() in router.c + * upload_service_descriptor() in rendservice.c + * - directory_get_from_dirserver(), called from + * rend_client_refetch_renddesc() in rendclient.c + * run_scheduled_events() in main.c + * do_hup() in main.c + * - connection_dir_process_inbuf(), called from + * connection_process_inbuf() in connection.c + * - connection_dir_finished_flushing(), called from + * connection_finished_flushing() in connection.c + * - connection_dir_finished_connecting(), called from + * connection_finished_connecting() in connection.c + */ +static void directory_send_command(dir_connection_t *conn, + int direct, + const directory_request_t *request); +static int body_is_plausible(const char *body, size_t body_len, int purpose); +static void http_set_address_origin(const char *headers, connection_t *conn); +static void connection_dir_download_routerdesc_failed(dir_connection_t *conn); +static void connection_dir_bridge_routerdesc_failed(dir_connection_t *conn); +static void connection_dir_download_cert_failed( + dir_connection_t *conn, int status_code); +static void connection_dir_retry_bridges(smartlist_t *descs); +static void dir_routerdesc_download_failed(smartlist_t *failed, + int status_code, + int router_purpose, + int was_extrainfo, + int was_descriptor_digests); +static void dir_microdesc_download_failed(smartlist_t *failed, + int status_code, + const char *dir_id); +static int client_likes_consensus(const struct consensus_cache_entry_t *ent, + const char *want_url); + +static void connection_dir_close_consensus_fetches( + dir_connection_t *except_this_one, const char *resource); + +/********* START VARIABLES **********/ + +/** Maximum size, in bytes, for resized buffers. */ +#define MAX_BUF_SIZE ((1<<24)-1) /* 16MB-1 */ +/** Maximum size, in bytes, for any directory object that we've downloaded. */ +#define MAX_DIR_DL_SIZE MAX_BUF_SIZE + +/** Maximum size, in bytes, for any directory object that we're accepting + * as an upload. */ +#define MAX_DIR_UL_SIZE MAX_BUF_SIZE + +/** How far in the future do we allow a directory server to tell us it is + * before deciding that one of us has the wrong time? */ +#define ALLOW_DIRECTORY_TIME_SKEW (30*60) + +#define X_ADDRESS_HEADER "X-Your-Address-Is: " +#define X_OR_DIFF_FROM_CONSENSUS_HEADER "X-Or-Diff-From-Consensus: " + +/** HTTP cache control: how long do we tell proxies they can cache each + * kind of document we serve? */ +#define FULL_DIR_CACHE_LIFETIME (60*60) +#define RUNNINGROUTERS_CACHE_LIFETIME (20*60) +#define DIRPORTFRONTPAGE_CACHE_LIFETIME (20*60) +#define NETWORKSTATUS_CACHE_LIFETIME (5*60) +#define ROUTERDESC_CACHE_LIFETIME (30*60) +#define ROUTERDESC_BY_DIGEST_CACHE_LIFETIME (48*60*60) +#define ROBOTS_CACHE_LIFETIME (24*60*60) +#define MICRODESC_CACHE_LIFETIME (48*60*60) + +/********* END VARIABLES ************/ + +/** Convert a connection_t* to a dir_connection_t*; assert if the cast is + * invalid. */ +dir_connection_t * +TO_DIR_CONN(connection_t *c) +{ + tor_assert(c->magic == DIR_CONNECTION_MAGIC); + return DOWNCAST(dir_connection_t, c); +} + +/** Return false if the directory purpose <b>dir_purpose</b> + * does not require an anonymous (three-hop) connection. + * + * Return true 1) by default, 2) if all directory actions have + * specifically been configured to be over an anonymous connection, + * or 3) if the router is a bridge */ +int +purpose_needs_anonymity(uint8_t dir_purpose, uint8_t router_purpose, + const char *resource) +{ + if (get_options()->AllDirActionsPrivate) + return 1; + + if (router_purpose == ROUTER_PURPOSE_BRIDGE) { + if (dir_purpose == DIR_PURPOSE_FETCH_SERVERDESC + && resource && !strcmp(resource, "authority.z")) { + /* We are asking a bridge for its own descriptor. That doesn't need + anonymity. */ + return 0; + } + /* Assume all other bridge stuff needs anonymity. */ + return 1; /* if no circuits yet, this might break bootstrapping, but it's + * needed to be safe. */ + } + + switch (dir_purpose) + { + case DIR_PURPOSE_UPLOAD_DIR: + case DIR_PURPOSE_UPLOAD_VOTE: + case DIR_PURPOSE_UPLOAD_SIGNATURES: + case DIR_PURPOSE_FETCH_STATUS_VOTE: + case DIR_PURPOSE_FETCH_DETACHED_SIGNATURES: + case DIR_PURPOSE_FETCH_CONSENSUS: + case DIR_PURPOSE_FETCH_CERTIFICATE: + case DIR_PURPOSE_FETCH_SERVERDESC: + case DIR_PURPOSE_FETCH_EXTRAINFO: + case DIR_PURPOSE_FETCH_MICRODESC: + return 0; + case DIR_PURPOSE_HAS_FETCHED_HSDESC: + case DIR_PURPOSE_HAS_FETCHED_RENDDESC_V2: + case DIR_PURPOSE_UPLOAD_RENDDESC_V2: + case DIR_PURPOSE_FETCH_RENDDESC_V2: + case DIR_PURPOSE_FETCH_HSDESC: + case DIR_PURPOSE_UPLOAD_HSDESC: + return 1; + case DIR_PURPOSE_SERVER: + default: + log_warn(LD_BUG, "Called with dir_purpose=%d, router_purpose=%d", + dir_purpose, router_purpose); + tor_assert_nonfatal_unreached(); + return 1; /* Assume it needs anonymity; better safe than sorry. */ + } +} + +/** Return a newly allocated string describing <b>auth</b>. Only describes + * authority features. */ +STATIC char * +authdir_type_to_string(dirinfo_type_t auth) +{ + char *result; + smartlist_t *lst = smartlist_new(); + if (auth & V3_DIRINFO) + smartlist_add(lst, (void*)"V3"); + if (auth & BRIDGE_DIRINFO) + smartlist_add(lst, (void*)"Bridge"); + if (smartlist_len(lst)) { + result = smartlist_join_strings(lst, ", ", 0, NULL); + } else { + result = tor_strdup("[Not an authority]"); + } + smartlist_free(lst); + return result; +} + +/** Return a string describing a given directory connection purpose. */ +STATIC const char * +dir_conn_purpose_to_string(int purpose) +{ + switch (purpose) + { + case DIR_PURPOSE_UPLOAD_DIR: + return "server descriptor upload"; + case DIR_PURPOSE_UPLOAD_VOTE: + return "server vote upload"; + case DIR_PURPOSE_UPLOAD_SIGNATURES: + return "consensus signature upload"; + case DIR_PURPOSE_FETCH_SERVERDESC: + return "server descriptor fetch"; + case DIR_PURPOSE_FETCH_EXTRAINFO: + return "extra-info fetch"; + case DIR_PURPOSE_FETCH_CONSENSUS: + return "consensus network-status fetch"; + case DIR_PURPOSE_FETCH_CERTIFICATE: + return "authority cert fetch"; + case DIR_PURPOSE_FETCH_STATUS_VOTE: + return "status vote fetch"; + case DIR_PURPOSE_FETCH_DETACHED_SIGNATURES: + return "consensus signature fetch"; + case DIR_PURPOSE_FETCH_RENDDESC_V2: + return "hidden-service v2 descriptor fetch"; + case DIR_PURPOSE_UPLOAD_RENDDESC_V2: + return "hidden-service v2 descriptor upload"; + case DIR_PURPOSE_FETCH_HSDESC: + return "hidden-service descriptor fetch"; + case DIR_PURPOSE_UPLOAD_HSDESC: + return "hidden-service descriptor upload"; + case DIR_PURPOSE_FETCH_MICRODESC: + return "microdescriptor fetch"; + } + + log_warn(LD_BUG, "Called with unknown purpose %d", purpose); + return "(unknown)"; +} + +/** Return the requisite directory information types. */ +STATIC dirinfo_type_t +dir_fetch_type(int dir_purpose, int router_purpose, const char *resource) +{ + dirinfo_type_t type; + switch (dir_purpose) { + case DIR_PURPOSE_FETCH_EXTRAINFO: + type = EXTRAINFO_DIRINFO; + if (router_purpose == ROUTER_PURPOSE_BRIDGE) + type |= BRIDGE_DIRINFO; + else + type |= V3_DIRINFO; + break; + case DIR_PURPOSE_FETCH_SERVERDESC: + if (router_purpose == ROUTER_PURPOSE_BRIDGE) + type = BRIDGE_DIRINFO; + else + type = V3_DIRINFO; + break; + case DIR_PURPOSE_FETCH_STATUS_VOTE: + case DIR_PURPOSE_FETCH_DETACHED_SIGNATURES: + case DIR_PURPOSE_FETCH_CERTIFICATE: + type = V3_DIRINFO; + break; + case DIR_PURPOSE_FETCH_CONSENSUS: + type = V3_DIRINFO; + if (resource && !strcmp(resource, "microdesc")) + type |= MICRODESC_DIRINFO; + break; + case DIR_PURPOSE_FETCH_MICRODESC: + type = MICRODESC_DIRINFO; + break; + default: + log_warn(LD_BUG, "Unexpected purpose %d", (int)dir_purpose); + type = NO_DIRINFO; + break; + } + return type; +} + +/** Return true iff <b>identity_digest</b> is the digest of a router which + * says that it caches extrainfos. (If <b>is_authority</b> we always + * believe that to be true.) */ +int +router_supports_extrainfo(const char *identity_digest, int is_authority) +{ + const node_t *node = node_get_by_id(identity_digest); + + if (node && node->ri) { + if (node->ri->caches_extra_info) + return 1; + } + if (is_authority) { + return 1; + } + return 0; +} + +/** Return true iff any trusted directory authority has accepted our + * server descriptor. + * + * We consider any authority sufficient because waiting for all of + * them means it never happens while any authority is down; we don't + * go for something more complex in the middle (like \>1/3 or \>1/2 or + * \>=1/2) because that doesn't seem necessary yet. + */ +int +directories_have_accepted_server_descriptor(void) +{ + const smartlist_t *servers = router_get_trusted_dir_servers(); + const or_options_t *options = get_options(); + SMARTLIST_FOREACH(servers, dir_server_t *, d, { + if ((d->type & options->PublishServerDescriptor_) && + d->has_accepted_serverdesc) { + return 1; + } + }); + return 0; +} + +/** Start a connection to every suitable directory authority, using + * connection purpose <b>dir_purpose</b> and uploading <b>payload</b> + * (of length <b>payload_len</b>). The dir_purpose should be one of + * 'DIR_PURPOSE_UPLOAD_{DIR|VOTE|SIGNATURES}'. + * + * <b>router_purpose</b> describes the type of descriptor we're + * publishing, if we're publishing a descriptor -- e.g. general or bridge. + * + * <b>type</b> specifies what sort of dir authorities (V3, + * BRIDGE, etc) we should upload to. + * + * If <b>extrainfo_len</b> is nonzero, the first <b>payload_len</b> bytes of + * <b>payload</b> hold a router descriptor, and the next <b>extrainfo_len</b> + * bytes of <b>payload</b> hold an extra-info document. Upload the descriptor + * to all authorities, and the extra-info document to all authorities that + * support it. + */ +void +directory_post_to_dirservers(uint8_t dir_purpose, uint8_t router_purpose, + dirinfo_type_t type, + const char *payload, + size_t payload_len, size_t extrainfo_len) +{ + const or_options_t *options = get_options(); + dir_indirection_t indirection; + const smartlist_t *dirservers = router_get_trusted_dir_servers(); + int found = 0; + const int exclude_self = (dir_purpose == DIR_PURPOSE_UPLOAD_VOTE || + dir_purpose == DIR_PURPOSE_UPLOAD_SIGNATURES); + tor_assert(dirservers); + /* This tries dirservers which we believe to be down, but ultimately, that's + * harmless, and we may as well err on the side of getting things uploaded. + */ + SMARTLIST_FOREACH_BEGIN(dirservers, dir_server_t *, ds) { + routerstatus_t *rs = &(ds->fake_status); + size_t upload_len = payload_len; + + if ((type & ds->type) == 0) + continue; + + if (exclude_self && router_digest_is_me(ds->digest)) { + /* we don't upload to ourselves, but at least there's now at least + * one authority of this type that has what we wanted to upload. */ + found = 1; + continue; + } + + if (options->StrictNodes && + routerset_contains_routerstatus(options->ExcludeNodes, rs, -1)) { + log_warn(LD_DIR, "Wanted to contact authority '%s' for %s, but " + "it's in our ExcludedNodes list and StrictNodes is set. " + "Skipping.", + ds->nickname, + dir_conn_purpose_to_string(dir_purpose)); + continue; + } + + found = 1; /* at least one authority of this type was listed */ + if (dir_purpose == DIR_PURPOSE_UPLOAD_DIR) + ds->has_accepted_serverdesc = 0; + + if (extrainfo_len && router_supports_extrainfo(ds->digest, 1)) { + upload_len += extrainfo_len; + log_info(LD_DIR, "Uploading an extrainfo too (length %d)", + (int) extrainfo_len); + } + if (purpose_needs_anonymity(dir_purpose, router_purpose, NULL)) { + indirection = DIRIND_ANONYMOUS; + } else if (!fascist_firewall_allows_dir_server(ds, + FIREWALL_DIR_CONNECTION, + 0)) { + if (fascist_firewall_allows_dir_server(ds, FIREWALL_OR_CONNECTION, 0)) + indirection = DIRIND_ONEHOP; + else + indirection = DIRIND_ANONYMOUS; + } else { + indirection = DIRIND_DIRECT_CONN; + } + + directory_request_t *req = directory_request_new(dir_purpose); + directory_request_set_routerstatus(req, rs); + directory_request_set_router_purpose(req, router_purpose); + directory_request_set_indirection(req, indirection); + directory_request_set_payload(req, payload, upload_len); + directory_initiate_request(req); + directory_request_free(req); + } SMARTLIST_FOREACH_END(ds); + if (!found) { + char *s = authdir_type_to_string(type); + log_warn(LD_DIR, "Publishing server descriptor to directory authorities " + "of type '%s', but no authorities of that type listed!", s); + tor_free(s); + } +} + +/** Return true iff, according to the values in <b>options</b>, we should be + * using directory guards for direct downloads of directory information. */ +STATIC int +should_use_directory_guards(const or_options_t *options) +{ + /* Public (non-bridge) servers never use directory guards. */ + if (public_server_mode(options)) + return 0; + /* If guards are disabled, we can't use directory guards. + */ + if (!options->UseEntryGuards) + return 0; + /* If we're configured to fetch directory info aggressively or of a + * nonstandard type, don't use directory guards. */ + if (options->DownloadExtraInfo || options->FetchDirInfoEarly || + options->FetchDirInfoExtraEarly || options->FetchUselessDescriptors) + return 0; + return 1; +} + +/** Pick an unconstrained directory server from among our guards, the latest + * networkstatus, or the fallback dirservers, for use in downloading + * information of type <b>type</b>, and return its routerstatus. */ +static const routerstatus_t * +directory_pick_generic_dirserver(dirinfo_type_t type, int pds_flags, + uint8_t dir_purpose, + circuit_guard_state_t **guard_state_out) +{ + const routerstatus_t *rs = NULL; + const or_options_t *options = get_options(); + + if (options->UseBridges) + log_warn(LD_BUG, "Called when we have UseBridges set."); + + if (should_use_directory_guards(options)) { + const node_t *node = guards_choose_dirguard(dir_purpose, guard_state_out); + if (node) + rs = node->rs; + } else { + /* anybody with a non-zero dirport will do */ + rs = router_pick_directory_server(type, pds_flags); + } + if (!rs) { + log_info(LD_DIR, "No router found for %s; falling back to " + "dirserver list.", dir_conn_purpose_to_string(dir_purpose)); + rs = router_pick_fallback_dirserver(type, pds_flags); + } + + return rs; +} + +/** + * Set the extra fields in <b>req</b> that are used when requesting a + * consensus of type <b>resource</b>. + * + * Right now, these fields are if-modified-since and x-or-diff-from-consensus. + */ +static void +dir_consensus_request_set_additional_headers(directory_request_t *req, + const char *resource) +{ + time_t if_modified_since = 0; + uint8_t or_diff_from[DIGEST256_LEN]; + int or_diff_from_is_set = 0; + + /* DEFAULT_IF_MODIFIED_SINCE_DELAY is 1/20 of the default consensus + * period of 1 hour. + */ + const int DEFAULT_IF_MODIFIED_SINCE_DELAY = 180; + const int32_t DEFAULT_TRY_DIFF_FOR_CONSENSUS_NEWER = 72; + const int32_t MIN_TRY_DIFF_FOR_CONSENSUS_NEWER = 0; + const int32_t MAX_TRY_DIFF_FOR_CONSENSUS_NEWER = 8192; + const char TRY_DIFF_FOR_CONSENSUS_NEWER_NAME[] = + "try-diff-for-consensus-newer-than"; + + int flav = FLAV_NS; + if (resource) + flav = networkstatus_parse_flavor_name(resource); + + int32_t max_age_for_diff = 3600 * + networkstatus_get_param(NULL, + TRY_DIFF_FOR_CONSENSUS_NEWER_NAME, + DEFAULT_TRY_DIFF_FOR_CONSENSUS_NEWER, + MIN_TRY_DIFF_FOR_CONSENSUS_NEWER, + MAX_TRY_DIFF_FOR_CONSENSUS_NEWER); + + if (flav != -1) { + /* IF we have a parsed consensus of this type, we can do an + * if-modified-time based on it. */ + networkstatus_t *v; + v = networkstatus_get_latest_consensus_by_flavor(flav); + if (v) { + /* In networks with particularly short V3AuthVotingIntervals, + * ask for the consensus if it's been modified since half the + * V3AuthVotingInterval of the most recent consensus. */ + time_t ims_delay = DEFAULT_IF_MODIFIED_SINCE_DELAY; + if (v->fresh_until > v->valid_after + && ims_delay > (v->fresh_until - v->valid_after)/2) { + ims_delay = (v->fresh_until - v->valid_after)/2; + } + if_modified_since = v->valid_after + ims_delay; + if (v->valid_after >= approx_time() - max_age_for_diff) { + memcpy(or_diff_from, v->digest_sha3_as_signed, DIGEST256_LEN); + or_diff_from_is_set = 1; + } + } + } else { + /* Otherwise it might be a consensus we don't parse, but which we + * do cache. Look at the cached copy, perhaps. */ + cached_dir_t *cd = dirserv_get_consensus(resource); + /* We have no method of determining the voting interval from an + * unparsed consensus, so we use the default. */ + if (cd) { + if_modified_since = cd->published + DEFAULT_IF_MODIFIED_SINCE_DELAY; + if (cd->published >= approx_time() - max_age_for_diff) { + memcpy(or_diff_from, cd->digest_sha3_as_signed, DIGEST256_LEN); + or_diff_from_is_set = 1; + } + } + } + + if (if_modified_since > 0) + directory_request_set_if_modified_since(req, if_modified_since); + if (or_diff_from_is_set) { + char hex[HEX_DIGEST256_LEN + 1]; + base16_encode(hex, sizeof(hex), + (const char*)or_diff_from, sizeof(or_diff_from)); + directory_request_add_header(req, X_OR_DIFF_FROM_CONSENSUS_HEADER, hex); + } +} + +/** Start a connection to a random running directory server, using + * connection purpose <b>dir_purpose</b>, intending to fetch descriptors + * of purpose <b>router_purpose</b>, and requesting <b>resource</b>. + * Use <b>pds_flags</b> as arguments to router_pick_directory_server() + * or router_pick_trusteddirserver(). + */ +MOCK_IMPL(void, +directory_get_from_dirserver,( + uint8_t dir_purpose, + uint8_t router_purpose, + const char *resource, + int pds_flags, + download_want_authority_t want_authority)) +{ + const routerstatus_t *rs = NULL; + const or_options_t *options = get_options(); + int prefer_authority = (directory_fetches_from_authorities(options) + || want_authority == DL_WANT_AUTHORITY); + int require_authority = 0; + int get_via_tor = purpose_needs_anonymity(dir_purpose, router_purpose, + resource); + dirinfo_type_t type = dir_fetch_type(dir_purpose, router_purpose, resource); + + if (type == NO_DIRINFO) + return; + + if (!options->FetchServerDescriptors) + return; + + circuit_guard_state_t *guard_state = NULL; + if (!get_via_tor) { + if (options->UseBridges && !(type & BRIDGE_DIRINFO)) { + /* We want to ask a running bridge for which we have a descriptor. + * + * When we ask choose_random_entry() for a bridge, we specify what + * sort of dir fetch we'll be doing, so it won't return a bridge + * that can't answer our question. + */ + const node_t *node = guards_choose_dirguard(dir_purpose, &guard_state); + if (node && node->ri) { + /* every bridge has a routerinfo. */ + routerinfo_t *ri = node->ri; + /* clients always make OR connections to bridges */ + tor_addr_port_t or_ap; + directory_request_t *req = directory_request_new(dir_purpose); + /* we are willing to use a non-preferred address if we need to */ + fascist_firewall_choose_address_node(node, FIREWALL_OR_CONNECTION, 0, + &or_ap); + directory_request_set_or_addr_port(req, &or_ap); + directory_request_set_directory_id_digest(req, + ri->cache_info.identity_digest); + directory_request_set_router_purpose(req, router_purpose); + directory_request_set_resource(req, resource); + if (dir_purpose == DIR_PURPOSE_FETCH_CONSENSUS) + dir_consensus_request_set_additional_headers(req, resource); + directory_request_set_guard_state(req, guard_state); + directory_initiate_request(req); + directory_request_free(req); + } else { + if (guard_state) { + entry_guard_cancel(&guard_state); + } + log_notice(LD_DIR, "Ignoring directory request, since no bridge " + "nodes are available yet."); + } + + return; + } else { + if (prefer_authority || (type & BRIDGE_DIRINFO)) { + /* only ask authdirservers, and don't ask myself */ + rs = router_pick_trusteddirserver(type, pds_flags); + if (rs == NULL && (pds_flags & (PDS_NO_EXISTING_SERVERDESC_FETCH| + PDS_NO_EXISTING_MICRODESC_FETCH))) { + /* We don't want to fetch from any authorities that we're currently + * fetching server descriptors from, and we got no match. Did we + * get no match because all the authorities have connections + * fetching server descriptors (in which case we should just + * return,) or because all the authorities are down or on fire or + * unreachable or something (in which case we should go on with + * our fallback code)? */ + pds_flags &= ~(PDS_NO_EXISTING_SERVERDESC_FETCH| + PDS_NO_EXISTING_MICRODESC_FETCH); + rs = router_pick_trusteddirserver(type, pds_flags); + if (rs) { + log_debug(LD_DIR, "Deferring serverdesc fetch: all authorities " + "are in use."); + return; + } + } + if (rs == NULL && require_authority) { + log_info(LD_DIR, "No authorities were available for %s: will try " + "later.", dir_conn_purpose_to_string(dir_purpose)); + return; + } + } + if (!rs && !(type & BRIDGE_DIRINFO)) { + rs = directory_pick_generic_dirserver(type, pds_flags, + dir_purpose, + &guard_state); + if (!rs) + get_via_tor = 1; /* last resort: try routing it via Tor */ + } + } + } + + if (get_via_tor) { + /* Never use fascistfirewall; we're going via Tor. */ + pds_flags |= PDS_IGNORE_FASCISTFIREWALL; + rs = router_pick_directory_server(type, pds_flags); + } + + /* If we have any hope of building an indirect conn, we know some router + * descriptors. If (rs==NULL), we can't build circuits anyway, so + * there's no point in falling back to the authorities in this case. */ + if (rs) { + const dir_indirection_t indirection = + get_via_tor ? DIRIND_ANONYMOUS : DIRIND_ONEHOP; + directory_request_t *req = directory_request_new(dir_purpose); + directory_request_set_routerstatus(req, rs); + directory_request_set_router_purpose(req, router_purpose); + directory_request_set_indirection(req, indirection); + directory_request_set_resource(req, resource); + if (dir_purpose == DIR_PURPOSE_FETCH_CONSENSUS) + dir_consensus_request_set_additional_headers(req, resource); + if (guard_state) + directory_request_set_guard_state(req, guard_state); + directory_initiate_request(req); + directory_request_free(req); + } else { + log_notice(LD_DIR, + "While fetching directory info, " + "no running dirservers known. Will try again later. " + "(purpose %d)", dir_purpose); + if (!purpose_needs_anonymity(dir_purpose, router_purpose, resource)) { + /* remember we tried them all and failed. */ + directory_all_unreachable(time(NULL)); + } + } +} + +/** As directory_get_from_dirserver, but initiates a request to <i>every</i> + * directory authority other than ourself. Only for use by authorities when + * searching for missing information while voting. */ +void +directory_get_from_all_authorities(uint8_t dir_purpose, + uint8_t router_purpose, + const char *resource) +{ + tor_assert(dir_purpose == DIR_PURPOSE_FETCH_STATUS_VOTE || + dir_purpose == DIR_PURPOSE_FETCH_DETACHED_SIGNATURES); + + SMARTLIST_FOREACH_BEGIN(router_get_trusted_dir_servers(), + dir_server_t *, ds) { + if (router_digest_is_me(ds->digest)) + continue; + if (!(ds->type & V3_DIRINFO)) + continue; + const routerstatus_t *rs = &ds->fake_status; + directory_request_t *req = directory_request_new(dir_purpose); + directory_request_set_routerstatus(req, rs); + directory_request_set_router_purpose(req, router_purpose); + directory_request_set_resource(req, resource); + directory_initiate_request(req); + directory_request_free(req); + } SMARTLIST_FOREACH_END(ds); +} + +/** Return true iff <b>ind</b> requires a multihop circuit. */ +static int +dirind_is_anon(dir_indirection_t ind) +{ + return ind == DIRIND_ANON_DIRPORT || ind == DIRIND_ANONYMOUS; +} + +/* Choose reachable OR and Dir addresses and ports from status, copying them + * into use_or_ap and use_dir_ap. If indirection is anonymous, then we're + * connecting via another relay, so choose the primary IPv4 address and ports. + * + * status should have at least one reachable address, if we can't choose a + * reachable address, warn and return -1. Otherwise, return 0. + */ +static int +directory_choose_address_routerstatus(const routerstatus_t *status, + dir_indirection_t indirection, + tor_addr_port_t *use_or_ap, + tor_addr_port_t *use_dir_ap) +{ + tor_assert(status != NULL); + tor_assert(use_or_ap != NULL); + tor_assert(use_dir_ap != NULL); + + const or_options_t *options = get_options(); + int have_or = 0, have_dir = 0; + + /* We expect status to have at least one reachable address if we're + * connecting to it directly. + * + * Therefore, we can simply use the other address if the one we want isn't + * allowed by the firewall. + * + * (When Tor uploads and downloads a hidden service descriptor, it uses + * DIRIND_ANONYMOUS, except for Tor2Web, which uses DIRIND_ONEHOP. + * So this code will only modify the address for Tor2Web's HS descriptor + * fetches. Even Single Onion Servers (NYI) use DIRIND_ANONYMOUS, to avoid + * HSDirs denying service by rejecting descriptors.) + */ + + /* Initialise the OR / Dir addresses */ + tor_addr_make_null(&use_or_ap->addr, AF_UNSPEC); + use_or_ap->port = 0; + tor_addr_make_null(&use_dir_ap->addr, AF_UNSPEC); + use_dir_ap->port = 0; + + /* ORPort connections */ + if (indirection == DIRIND_ANONYMOUS) { + if (status->addr) { + /* Since we're going to build a 3-hop circuit and ask the 2nd relay + * to extend to this address, always use the primary (IPv4) OR address */ + tor_addr_from_ipv4h(&use_or_ap->addr, status->addr); + use_or_ap->port = status->or_port; + have_or = 1; + } + } else if (indirection == DIRIND_ONEHOP) { + /* We use an IPv6 address if we have one and we prefer it. + * Use the preferred address and port if they are reachable, otherwise, + * use the alternate address and port (if any). + */ + fascist_firewall_choose_address_rs(status, FIREWALL_OR_CONNECTION, 0, + use_or_ap); + have_or = tor_addr_port_is_valid_ap(use_or_ap, 0); + } + + /* DirPort connections + * DIRIND_ONEHOP uses ORPort, but may fall back to the DirPort on relays */ + if (indirection == DIRIND_DIRECT_CONN || + indirection == DIRIND_ANON_DIRPORT || + (indirection == DIRIND_ONEHOP + && !directory_must_use_begindir(options))) { + fascist_firewall_choose_address_rs(status, FIREWALL_DIR_CONNECTION, 0, + use_dir_ap); + have_dir = tor_addr_port_is_valid_ap(use_dir_ap, 0); + } + + /* We rejected all addresses in the relay's status. This means we can't + * connect to it. */ + if (!have_or && !have_dir) { + static int logged_backtrace = 0; + log_info(LD_BUG, "Rejected all OR and Dir addresses from %s when " + "launching an outgoing directory connection to: IPv4 %s OR %d " + "Dir %d IPv6 %s OR %d Dir %d", routerstatus_describe(status), + fmt_addr32(status->addr), status->or_port, + status->dir_port, fmt_addr(&status->ipv6_addr), + status->ipv6_orport, status->dir_port); + if (!logged_backtrace) { + log_backtrace(LOG_INFO, LD_BUG, "Addresses came from"); + logged_backtrace = 1; + } + return -1; + } + + return 0; +} + +/** Return true iff <b>conn</b> is the client side of a directory connection + * we launched to ourself in order to determine the reachability of our + * dir_port. */ +static int +directory_conn_is_self_reachability_test(dir_connection_t *conn) +{ + if (conn->requested_resource && + !strcmpstart(conn->requested_resource,"authority")) { + const routerinfo_t *me = router_get_my_routerinfo(); + if (me && + router_digest_is_me(conn->identity_digest) && + tor_addr_eq_ipv4h(&conn->base_.addr, me->addr) && /*XXXX prop 118*/ + me->dir_port == conn->base_.port) + return 1; + } + return 0; +} + +/** Called when we are unable to complete the client's request to a directory + * server due to a network error: Mark the router as down and try again if + * possible. + */ +static void +connection_dir_request_failed(dir_connection_t *conn) +{ + if (conn->guard_state) { + /* We haven't seen a success on this guard state, so consider it to have + * failed. */ + entry_guard_failed(&conn->guard_state); + } + if (directory_conn_is_self_reachability_test(conn)) { + return; /* this was a test fetch. don't retry. */ + } + if (!entry_list_is_constrained(get_options())) + router_set_status(conn->identity_digest, 0); /* don't try this one again */ + if (conn->base_.purpose == DIR_PURPOSE_FETCH_SERVERDESC || + conn->base_.purpose == DIR_PURPOSE_FETCH_EXTRAINFO) { + log_info(LD_DIR, "Giving up on serverdesc/extrainfo fetch from " + "directory server at '%s'; retrying", + conn->base_.address); + if (conn->router_purpose == ROUTER_PURPOSE_BRIDGE) + connection_dir_bridge_routerdesc_failed(conn); + connection_dir_download_routerdesc_failed(conn); + } else if (conn->base_.purpose == DIR_PURPOSE_FETCH_CONSENSUS) { + if (conn->requested_resource) + networkstatus_consensus_download_failed(0, conn->requested_resource); + } else if (conn->base_.purpose == DIR_PURPOSE_FETCH_CERTIFICATE) { + log_info(LD_DIR, "Giving up on certificate fetch from directory server " + "at '%s'; retrying", + conn->base_.address); + connection_dir_download_cert_failed(conn, 0); + } else if (conn->base_.purpose == DIR_PURPOSE_FETCH_DETACHED_SIGNATURES) { + log_info(LD_DIR, "Giving up downloading detached signatures from '%s'", + conn->base_.address); + } else if (conn->base_.purpose == DIR_PURPOSE_FETCH_STATUS_VOTE) { + log_info(LD_DIR, "Giving up downloading votes from '%s'", + conn->base_.address); + } else if (conn->base_.purpose == DIR_PURPOSE_FETCH_MICRODESC) { + log_info(LD_DIR, "Giving up on downloading microdescriptors from " + "directory server at '%s'; will retry", conn->base_.address); + connection_dir_download_routerdesc_failed(conn); + } +} + +/** Helper: Attempt to fetch directly the descriptors of each bridge + * listed in <b>failed</b>. + */ +static void +connection_dir_retry_bridges(smartlist_t *descs) +{ + char digest[DIGEST_LEN]; + SMARTLIST_FOREACH(descs, const char *, cp, + { + if (base16_decode(digest, DIGEST_LEN, cp, strlen(cp)) != DIGEST_LEN) { + log_warn(LD_BUG, "Malformed fingerprint in list: %s", + escaped(cp)); + continue; + } + retry_bridge_descriptor_fetch_directly(digest); + }); +} + +/** Called when an attempt to download one or more router descriptors + * or extra-info documents on connection <b>conn</b> failed. + */ +static void +connection_dir_download_routerdesc_failed(dir_connection_t *conn) +{ + /* No need to increment the failure count for routerdescs, since + * it's not their fault. */ + + /* No need to relaunch descriptor downloads here: we already do it + * every 10 or 60 seconds (FOO_DESCRIPTOR_RETRY_INTERVAL) in main.c. */ + tor_assert(conn->base_.purpose == DIR_PURPOSE_FETCH_SERVERDESC || + conn->base_.purpose == DIR_PURPOSE_FETCH_EXTRAINFO || + conn->base_.purpose == DIR_PURPOSE_FETCH_MICRODESC); + + (void) conn; +} + +/** Called when an attempt to download a bridge's routerdesc from + * one of the authorities failed due to a network error. If + * possible attempt to download descriptors from the bridge directly. + */ +static void +connection_dir_bridge_routerdesc_failed(dir_connection_t *conn) +{ + smartlist_t *which = NULL; + + /* Requests for bridge descriptors are in the form 'fp/', so ignore + anything else. */ + if (!conn->requested_resource || strcmpstart(conn->requested_resource,"fp/")) + return; + + which = smartlist_new(); + dir_split_resource_into_fingerprints(conn->requested_resource + + strlen("fp/"), + which, NULL, 0); + + tor_assert(conn->base_.purpose != DIR_PURPOSE_FETCH_EXTRAINFO); + if (smartlist_len(which)) { + connection_dir_retry_bridges(which); + SMARTLIST_FOREACH(which, char *, cp, tor_free(cp)); + } + smartlist_free(which); +} + +/** Called when an attempt to fetch a certificate fails. */ +static void +connection_dir_download_cert_failed(dir_connection_t *conn, int status) +{ + const char *fp_pfx = "fp/"; + const char *fpsk_pfx = "fp-sk/"; + smartlist_t *failed; + tor_assert(conn->base_.purpose == DIR_PURPOSE_FETCH_CERTIFICATE); + + if (!conn->requested_resource) + return; + failed = smartlist_new(); + /* + * We have two cases download by fingerprint (resource starts + * with "fp/") or download by fingerprint/signing key pair + * (resource starts with "fp-sk/"). + */ + if (!strcmpstart(conn->requested_resource, fp_pfx)) { + /* Download by fingerprint case */ + dir_split_resource_into_fingerprints(conn->requested_resource + + strlen(fp_pfx), + failed, NULL, DSR_HEX); + SMARTLIST_FOREACH_BEGIN(failed, char *, cp) { + /* Null signing key digest indicates download by fp only */ + authority_cert_dl_failed(cp, NULL, status); + tor_free(cp); + } SMARTLIST_FOREACH_END(cp); + } else if (!strcmpstart(conn->requested_resource, fpsk_pfx)) { + /* Download by (fp,sk) pairs */ + dir_split_resource_into_fingerprint_pairs(conn->requested_resource + + strlen(fpsk_pfx), failed); + SMARTLIST_FOREACH_BEGIN(failed, fp_pair_t *, cp) { + authority_cert_dl_failed(cp->first, cp->second, status); + tor_free(cp); + } SMARTLIST_FOREACH_END(cp); + } else { + log_warn(LD_DIR, + "Don't know what to do with failure for cert fetch %s", + conn->requested_resource); + } + + smartlist_free(failed); + + update_certificate_downloads(time(NULL)); +} + +/* Should this tor instance only use begindir for all its directory requests? + */ +int +directory_must_use_begindir(const or_options_t *options) +{ + /* Clients, onion services, and bridges must use begindir, + * relays and authorities do not have to */ + return !public_server_mode(options); +} + +/** Evaluate the situation and decide if we should use an encrypted + * "begindir-style" connection for this directory request. + * 0) If there is no DirPort, yes. + * 1) If or_port is 0, or it's a direct conn and or_port is firewalled + * or we're a dir mirror, no. + * 2) If we prefer to avoid begindir conns, and we're not fetching or + * publishing a bridge relay descriptor, no. + * 3) Else yes. + * If returning 0, return in *reason why we can't use begindir. + * reason must not be NULL. + */ +static int +directory_command_should_use_begindir(const or_options_t *options, + const directory_request_t *req, + const char **reason) +{ + const tor_addr_t *or_addr = &req->or_addr_port.addr; + //const tor_addr_t *dir_addr = &req->dir_addr_port.addr; + const int or_port = req->or_addr_port.port; + const int dir_port = req->dir_addr_port.port; + + const dir_indirection_t indirection = req->indirection; + + tor_assert(reason); + *reason = NULL; + + /* Reasons why we must use begindir */ + if (!dir_port) { + *reason = "(using begindir - directory with no DirPort)"; + return 1; /* We don't know a DirPort -- must begindir. */ + } + /* Reasons why we can't possibly use begindir */ + if (!or_port) { + *reason = "directory with unknown ORPort"; + return 0; /* We don't know an ORPort -- no chance. */ + } + if (indirection == DIRIND_DIRECT_CONN || + indirection == DIRIND_ANON_DIRPORT) { + *reason = "DirPort connection"; + return 0; + } + if (indirection == DIRIND_ONEHOP) { + /* We're firewalled and want a direct OR connection */ + if (!fascist_firewall_allows_address_addr(or_addr, or_port, + FIREWALL_OR_CONNECTION, 0, 0)) { + *reason = "ORPort not reachable"; + return 0; + } + } + /* Reasons why we want to avoid using begindir */ + if (indirection == DIRIND_ONEHOP) { + if (!directory_must_use_begindir(options)) { + *reason = "in relay mode"; + return 0; + } + } + /* DIRIND_ONEHOP on a client, or DIRIND_ANONYMOUS + */ + *reason = "(using begindir)"; + return 1; +} + +/** + * Create and return a new directory_request_t with purpose + * <b>dir_purpose</b>. + */ +directory_request_t * +directory_request_new(uint8_t dir_purpose) +{ + tor_assert(dir_purpose >= DIR_PURPOSE_MIN_); + tor_assert(dir_purpose <= DIR_PURPOSE_MAX_); + tor_assert(dir_purpose != DIR_PURPOSE_SERVER); + tor_assert(dir_purpose != DIR_PURPOSE_HAS_FETCHED_RENDDESC_V2); + tor_assert(dir_purpose != DIR_PURPOSE_HAS_FETCHED_HSDESC); + + directory_request_t *result = tor_malloc_zero(sizeof(*result)); + tor_addr_make_null(&result->or_addr_port.addr, AF_INET); + result->or_addr_port.port = 0; + tor_addr_make_null(&result->dir_addr_port.addr, AF_INET); + result->dir_addr_port.port = 0; + result->dir_purpose = dir_purpose; + result->router_purpose = ROUTER_PURPOSE_GENERAL; + result->indirection = DIRIND_ONEHOP; + return result; +} +/** + * Release all resources held by <b>req</b>. + */ +void +directory_request_free_(directory_request_t *req) +{ + if (req == NULL) + return; + config_free_lines(req->additional_headers); + tor_free(req); +} +/** + * Set the address and OR port to use for this directory request. If there is + * no OR port, we'll have to connect over the dirport. (If there are both, + * the indirection setting determines which to use.) + */ +void +directory_request_set_or_addr_port(directory_request_t *req, + const tor_addr_port_t *p) +{ + memcpy(&req->or_addr_port, p, sizeof(*p)); +} +/** + * Set the address and dirport to use for this directory request. If there + * is no dirport, we'll have to connect over the OR port. (If there are both, + * the indirection setting determines which to use.) + */ +void +directory_request_set_dir_addr_port(directory_request_t *req, + const tor_addr_port_t *p) +{ + memcpy(&req->dir_addr_port, p, sizeof(*p)); +} +/** + * Set the RSA identity digest of the directory to use for this directory + * request. + */ +void +directory_request_set_directory_id_digest(directory_request_t *req, + const char *digest) +{ + memcpy(req->digest, digest, DIGEST_LEN); +} +/** + * Set the router purpose associated with uploaded and downloaded router + * descriptors and extrainfo documents in this directory request. The purpose + * must be one of ROUTER_PURPOSE_GENERAL (the default) or + * ROUTER_PURPOSE_BRIDGE. + */ +void +directory_request_set_router_purpose(directory_request_t *req, + uint8_t router_purpose) +{ + tor_assert(router_purpose == ROUTER_PURPOSE_GENERAL || + router_purpose == ROUTER_PURPOSE_BRIDGE); + // assert that it actually makes sense to set this purpose, given + // the dir_purpose. + req->router_purpose = router_purpose; +} +/** + * Set the indirection to be used for the directory request. The indirection + * parameter configures whether to connect to a DirPort or ORPort, and whether + * to anonymize the connection. DIRIND_ONEHOP (use ORPort, don't anonymize) + * is the default. See dir_indirection_t for more information. + */ +void +directory_request_set_indirection(directory_request_t *req, + dir_indirection_t indirection) +{ + req->indirection = indirection; +} + +/** + * Set a pointer to the resource to request from a directory. Different + * request types use resources to indicate different components of their URL. + * Note that only an alias to <b>resource</b> is stored, so the + * <b>resource</b> must outlive the request. + */ +void +directory_request_set_resource(directory_request_t *req, + const char *resource) +{ + req->resource = resource; +} +/** + * Set a pointer to the payload to include with this directory request, along + * with its length. Note that only an alias to <b>payload</b> is stored, so + * the <b>payload</b> must outlive the request. + */ +void +directory_request_set_payload(directory_request_t *req, + const char *payload, + size_t payload_len) +{ + tor_assert(DIR_PURPOSE_IS_UPLOAD(req->dir_purpose)); + + req->payload = payload; + req->payload_len = payload_len; +} +/** + * Set an if-modified-since date to send along with the request. The + * default is 0 (meaning, send no if-modified-since header). + */ +void +directory_request_set_if_modified_since(directory_request_t *req, + time_t if_modified_since) +{ + req->if_modified_since = if_modified_since; +} + +/** Include a header of name <b>key</b> with content <b>val</b> in the + * request. Neither may include newlines or other odd characters. Their + * ordering is not currently guaranteed. + * + * Note that, as elsewhere in this module, header keys include a trailing + * colon and space. + */ +void +directory_request_add_header(directory_request_t *req, + const char *key, + const char *val) +{ + config_line_prepend(&req->additional_headers, key, val); +} +/** + * Set an object containing HS data to be associated with this request. Note + * that only an alias to <b>query</b> is stored, so the <b>query</b> object + * must outlive the request. + */ +void +directory_request_set_rend_query(directory_request_t *req, + const rend_data_t *query) +{ + if (query) { + tor_assert(req->dir_purpose == DIR_PURPOSE_FETCH_RENDDESC_V2 || + req->dir_purpose == DIR_PURPOSE_UPLOAD_RENDDESC_V2); + } + req->rend_query = query; +} +/** + * Set an object containing HS connection identifier to be associated with + * this request. Note that only an alias to <b>ident</b> is stored, so the + * <b>ident</b> object must outlive the request. + */ +void +directory_request_upload_set_hs_ident(directory_request_t *req, + const hs_ident_dir_conn_t *ident) +{ + if (ident) { + tor_assert(req->dir_purpose == DIR_PURPOSE_UPLOAD_HSDESC); + } + req->hs_ident = ident; +} +/** + * Set an object containing HS connection identifier to be associated with + * this fetch request. Note that only an alias to <b>ident</b> is stored, so + * the <b>ident</b> object must outlive the request. + */ +void +directory_request_fetch_set_hs_ident(directory_request_t *req, + const hs_ident_dir_conn_t *ident) +{ + if (ident) { + tor_assert(req->dir_purpose == DIR_PURPOSE_FETCH_HSDESC); + } + req->hs_ident = ident; +} +/** Set a static circuit_guard_state_t object to affliate with the request in + * <b>req</b>. This object will receive notification when the attempt to + * connect to the guard either succeeds or fails. */ +void +directory_request_set_guard_state(directory_request_t *req, + circuit_guard_state_t *state) +{ + req->guard_state = state; +} + +/** + * Internal: Return true if any information for contacting the directory in + * <b>req</b> has been set, other than by the routerstatus. */ +static int +directory_request_dir_contact_info_specified(const directory_request_t *req) +{ + /* We only check for ports here, since we don't use an addr unless the port + * is set */ + return (req->or_addr_port.port || + req->dir_addr_port.port || + ! tor_digest_is_zero(req->digest)); +} + +/** + * Set the routerstatus to use for the directory associated with this + * request. If this option is set, then no other function to set the + * directory's address or identity should be called. + */ +void +directory_request_set_routerstatus(directory_request_t *req, + const routerstatus_t *status) +{ + req->routerstatus = status; +} +/** + * Helper: update the addresses, ports, and identities in <b>req</b> + * from the routerstatus object in <b>req</b>. Return 0 on success. + * On failure, warn and return -1. + */ +static int +directory_request_set_dir_from_routerstatus(directory_request_t *req) + +{ + const routerstatus_t *status = req->routerstatus; + if (BUG(status == NULL)) + return -1; + const or_options_t *options = get_options(); + const node_t *node; + tor_addr_port_t use_or_ap, use_dir_ap; + const int anonymized_connection = dirind_is_anon(req->indirection); + + tor_assert(status != NULL); + + node = node_get_by_id(status->identity_digest); + + /* XXX The below check is wrong: !node means it's not in the consensus, + * but we haven't checked if we have a descriptor for it -- and also, + * we only care about the descriptor if it's a begindir-style anonymized + * connection. */ + if (!node && anonymized_connection) { + log_info(LD_DIR, "Not sending anonymized request to directory '%s'; we " + "don't have its router descriptor.", + routerstatus_describe(status)); + return -1; + } + + if (options->ExcludeNodes && options->StrictNodes && + routerset_contains_routerstatus(options->ExcludeNodes, status, -1)) { + log_warn(LD_DIR, "Wanted to contact directory mirror %s for %s, but " + "it's in our ExcludedNodes list and StrictNodes is set. " + "Skipping. This choice might make your Tor not work.", + routerstatus_describe(status), + dir_conn_purpose_to_string(req->dir_purpose)); + return -1; + } + + /* At this point, if we are a client making a direct connection to a + * directory server, we have selected a server that has at least one address + * allowed by ClientUseIPv4/6 and Reachable{"",OR,Dir}Addresses. This + * selection uses the preference in ClientPreferIPv6{OR,Dir}Port, if + * possible. (If UseBridges is set, clients always use IPv6, and prefer it + * by default.) + * + * Now choose an address that we can use to connect to the directory server. + */ + if (directory_choose_address_routerstatus(status, + req->indirection, &use_or_ap, + &use_dir_ap) < 0) { + return -1; + } + + directory_request_set_or_addr_port(req, &use_or_ap); + directory_request_set_dir_addr_port(req, &use_dir_ap); + directory_request_set_directory_id_digest(req, status->identity_digest); + return 0; +} + +/** + * Launch the provided directory request, configured in <b>request</b>. + * After this function is called, you can free <b>request</b>. + */ +MOCK_IMPL(void, +directory_initiate_request,(directory_request_t *request)) +{ + tor_assert(request); + if (request->routerstatus) { + tor_assert_nonfatal( + ! directory_request_dir_contact_info_specified(request)); + if (directory_request_set_dir_from_routerstatus(request) < 0) { + return; + } + } + + const tor_addr_port_t *or_addr_port = &request->or_addr_port; + const tor_addr_port_t *dir_addr_port = &request->dir_addr_port; + const char *digest = request->digest; + const uint8_t dir_purpose = request->dir_purpose; + const uint8_t router_purpose = request->router_purpose; + const dir_indirection_t indirection = request->indirection; + const char *resource = request->resource; + const rend_data_t *rend_query = request->rend_query; + const hs_ident_dir_conn_t *hs_ident = request->hs_ident; + circuit_guard_state_t *guard_state = request->guard_state; + + tor_assert(or_addr_port->port || dir_addr_port->port); + tor_assert(digest); + + dir_connection_t *conn; + const or_options_t *options = get_options(); + int socket_error = 0; + const char *begindir_reason = NULL; + /* Should the connection be to a relay's OR port (and inside that we will + * send our directory request)? */ + const int use_begindir = + directory_command_should_use_begindir(options, request, &begindir_reason); + + /* Will the connection go via a three-hop Tor circuit? Note that this + * is separate from whether it will use_begindir. */ + const int anonymized_connection = dirind_is_anon(indirection); + + /* What is the address we want to make the directory request to? If + * we're making a begindir request this is the ORPort of the relay + * we're contacting; if not a begindir request, this is its DirPort. + * Note that if anonymized_connection is true, we won't be initiating + * a connection directly to this address. */ + tor_addr_t addr; + tor_addr_copy(&addr, &(use_begindir ? or_addr_port : dir_addr_port)->addr); + uint16_t port = (use_begindir ? or_addr_port : dir_addr_port)->port; + + log_debug(LD_DIR, "anonymized %d, use_begindir %d.", + anonymized_connection, use_begindir); + + log_debug(LD_DIR, "Initiating %s", dir_conn_purpose_to_string(dir_purpose)); + + if (purpose_needs_anonymity(dir_purpose, router_purpose, resource)) { + tor_assert(anonymized_connection || + rend_non_anonymous_mode_enabled(options)); + } + + /* use encrypted begindir connections for everything except relays + * this provides better protection for directory fetches */ + if (!use_begindir && directory_must_use_begindir(options)) { + log_warn(LD_BUG, "Client could not use begindir connection: %s", + begindir_reason ? begindir_reason : "(NULL)"); + return; + } + + /* ensure that we don't make direct connections when a SOCKS server is + * configured. */ + if (!anonymized_connection && !use_begindir && !options->HTTPProxy && + (options->Socks4Proxy || options->Socks5Proxy)) { + log_warn(LD_DIR, "Cannot connect to a directory server through a " + "SOCKS proxy!"); + return; + } + + /* Make sure that the destination addr and port we picked is viable. */ + if (!port || tor_addr_is_null(&addr)) { + static int logged_backtrace = 0; + log_warn(LD_DIR, + "Cannot make an outgoing %sconnection without a remote %sPort.", + use_begindir ? "begindir " : "", + use_begindir ? "OR" : "Dir"); + if (!logged_backtrace) { + log_backtrace(LOG_INFO, LD_BUG, "Address came from"); + logged_backtrace = 1; + } + return; + } + + conn = dir_connection_new(tor_addr_family(&addr)); + + /* set up conn so it's got all the data we need to remember */ + tor_addr_copy(&conn->base_.addr, &addr); + conn->base_.port = port; + conn->base_.address = tor_addr_to_str_dup(&addr); + memcpy(conn->identity_digest, digest, DIGEST_LEN); + + conn->base_.purpose = dir_purpose; + conn->router_purpose = router_purpose; + + /* give it an initial state */ + conn->base_.state = DIR_CONN_STATE_CONNECTING; + + /* decide whether we can learn our IP address from this conn */ + /* XXXX This is a bad name for this field now. */ + conn->dirconn_direct = !anonymized_connection; + + /* copy rendezvous data, if any */ + if (rend_query) { + /* We can't have both v2 and v3+ identifier. */ + tor_assert_nonfatal(!hs_ident); + conn->rend_data = rend_data_dup(rend_query); + } + if (hs_ident) { + /* We can't have both v2 and v3+ identifier. */ + tor_assert_nonfatal(!rend_query); + conn->hs_ident = hs_ident_dir_conn_dup(hs_ident); + } + + if (!anonymized_connection && !use_begindir) { + /* then we want to connect to dirport directly */ + + if (options->HTTPProxy) { + tor_addr_copy(&addr, &options->HTTPProxyAddr); + port = options->HTTPProxyPort; + } + + // In this case we should not have picked a directory guard. + if (BUG(guard_state)) { + entry_guard_cancel(&guard_state); + } + + switch (connection_connect(TO_CONN(conn), conn->base_.address, &addr, + port, &socket_error)) { + case -1: + connection_mark_for_close(TO_CONN(conn)); + return; + case 1: + /* start flushing conn */ + conn->base_.state = DIR_CONN_STATE_CLIENT_SENDING; + /* fall through */ + case 0: + /* queue the command on the outbuf */ + directory_send_command(conn, 1, request); + connection_watch_events(TO_CONN(conn), READ_EVENT | WRITE_EVENT); + /* writable indicates finish, readable indicates broken link, + error indicates broken link in windowsland. */ + } + } else { + /* We will use a Tor circuit (maybe 1-hop, maybe 3-hop, maybe with + * begindir, maybe not with begindir) */ + + entry_connection_t *linked_conn; + + /* Anonymized tunneled connections can never share a circuit. + * One-hop directory connections can share circuits with each other + * but nothing else. */ + int iso_flags = anonymized_connection ? ISO_STREAM : ISO_SESSIONGRP; + + /* If it's an anonymized connection, remember the fact that we + * wanted it for later: maybe we'll want it again soon. */ + if (anonymized_connection && use_begindir) + rep_hist_note_used_internal(time(NULL), 0, 1); + else if (anonymized_connection && !use_begindir) + rep_hist_note_used_port(time(NULL), conn->base_.port); + + // In this case we should not have a directory guard; we'll + // get a regular guard later when we build the circuit. + if (BUG(anonymized_connection && guard_state)) { + entry_guard_cancel(&guard_state); + } + + conn->guard_state = guard_state; + + /* make an AP connection + * populate it and add it at the right state + * hook up both sides + */ + linked_conn = + connection_ap_make_link(TO_CONN(conn), + conn->base_.address, conn->base_.port, + digest, + SESSION_GROUP_DIRCONN, iso_flags, + use_begindir, !anonymized_connection); + if (!linked_conn) { + log_warn(LD_NET,"Making tunnel to dirserver failed."); + connection_mark_for_close(TO_CONN(conn)); + return; + } + + if (connection_add(TO_CONN(conn)) < 0) { + log_warn(LD_NET,"Unable to add connection for link to dirserver."); + connection_mark_for_close(TO_CONN(conn)); + return; + } + conn->base_.state = DIR_CONN_STATE_CLIENT_SENDING; + /* queue the command on the outbuf */ + directory_send_command(conn, 0, request); + + connection_watch_events(TO_CONN(conn), READ_EVENT|WRITE_EVENT); + connection_start_reading(ENTRY_TO_CONN(linked_conn)); + } +} + +/** Return true iff anything we say on <b>conn</b> is being encrypted before + * we send it to the client/server. */ +int +connection_dir_is_encrypted(const dir_connection_t *conn) +{ + /* Right now it's sufficient to see if conn is or has been linked, since + * the only thing it could be linked to is an edge connection on a + * circuit, and the only way it could have been unlinked is at the edge + * connection getting closed. + */ + return TO_CONN(conn)->linked; +} + +/** Helper for sorting + * + * sort strings alphabetically + */ +static int +compare_strs_(const void **a, const void **b) +{ + const char *s1 = *a, *s2 = *b; + return strcmp(s1, s2); +} + +#define CONDITIONAL_CONSENSUS_FPR_LEN 3 +#if (CONDITIONAL_CONSENSUS_FPR_LEN > DIGEST_LEN) +#error "conditional consensus fingerprint length is larger than digest length" +#endif + +/** Return the URL we should use for a consensus download. + * + * Use the "conditional consensus downloading" feature described in + * dir-spec.txt, i.e. + * GET .../consensus/<b>fpr</b>+<b>fpr</b>+<b>fpr</b> + * + * If 'resource' is provided, it is the name of a consensus flavor to request. + */ +static char * +directory_get_consensus_url(const char *resource) +{ + char *url = NULL; + const char *hyphen, *flavor; + if (resource==NULL || strcmp(resource, "ns")==0) { + flavor = ""; /* Request ns consensuses as "", so older servers will work*/ + hyphen = ""; + } else { + flavor = resource; + hyphen = "-"; + } + + { + char *authority_id_list; + smartlist_t *authority_digests = smartlist_new(); + + SMARTLIST_FOREACH_BEGIN(router_get_trusted_dir_servers(), + dir_server_t *, ds) { + char *hex; + if (!(ds->type & V3_DIRINFO)) + continue; + + hex = tor_malloc(2*CONDITIONAL_CONSENSUS_FPR_LEN+1); + base16_encode(hex, 2*CONDITIONAL_CONSENSUS_FPR_LEN+1, + ds->v3_identity_digest, CONDITIONAL_CONSENSUS_FPR_LEN); + smartlist_add(authority_digests, hex); + } SMARTLIST_FOREACH_END(ds); + smartlist_sort(authority_digests, compare_strs_); + authority_id_list = smartlist_join_strings(authority_digests, + "+", 0, NULL); + + tor_asprintf(&url, "/tor/status-vote/current/consensus%s%s/%s.z", + hyphen, flavor, authority_id_list); + + SMARTLIST_FOREACH(authority_digests, char *, cp, tor_free(cp)); + smartlist_free(authority_digests); + tor_free(authority_id_list); + } + return url; +} + +/** + * Copies the ipv6 from source to destination, subject to buffer size limit + * size. If decorate is true, makes sure the copied address is decorated. + */ +static void +copy_ipv6_address(char* destination, const char* source, size_t len, + int decorate) { + tor_assert(destination); + tor_assert(source); + + if (decorate && source[0] != '[') { + tor_snprintf(destination, len, "[%s]", source); + } else { + strlcpy(destination, source, len); + } +} + +/** Queue an appropriate HTTP command for <b>request</b> on + * <b>conn</b>-\>outbuf. If <b>direct</b> is true, we're making a + * non-anonymized connection to the dirport. + */ +static void +directory_send_command(dir_connection_t *conn, + const int direct, + const directory_request_t *req) +{ + tor_assert(req); + const int purpose = req->dir_purpose; + const char *resource = req->resource; + const char *payload = req->payload; + const size_t payload_len = req->payload_len; + const time_t if_modified_since = req->if_modified_since; + const int anonymized_connection = dirind_is_anon(req->indirection); + + char proxystring[256]; + char hoststring[128]; + /* NEEDS to be the same size hoststring. + Will be decorated with brackets around it if it is ipv6. */ + char decorated_address[128]; + smartlist_t *headers = smartlist_new(); + char *url; + char *accept_encoding; + size_t url_len; + char request[8192]; + size_t request_len, total_request_len = 0; + const char *httpcommand = NULL; + + tor_assert(conn); + tor_assert(conn->base_.type == CONN_TYPE_DIR); + + tor_free(conn->requested_resource); + if (resource) + conn->requested_resource = tor_strdup(resource); + + /* decorate the ip address if it is ipv6 */ + if (strchr(conn->base_.address, ':')) { + copy_ipv6_address(decorated_address, conn->base_.address, + sizeof(decorated_address), 1); + } else { + strlcpy(decorated_address, conn->base_.address, sizeof(decorated_address)); + } + + /* come up with a string for which Host: we want */ + if (conn->base_.port == 80) { + strlcpy(hoststring, decorated_address, sizeof(hoststring)); + } else { + tor_snprintf(hoststring, sizeof(hoststring), "%s:%d", + decorated_address, conn->base_.port); + } + + /* Format if-modified-since */ + if (if_modified_since) { + char b[RFC1123_TIME_LEN+1]; + format_rfc1123_time(b, if_modified_since); + smartlist_add_asprintf(headers, "If-Modified-Since: %s\r\n", b); + } + + /* come up with some proxy lines, if we're using one. */ + if (direct && get_options()->HTTPProxy) { + char *base64_authenticator=NULL; + const char *authenticator = get_options()->HTTPProxyAuthenticator; + + tor_snprintf(proxystring, sizeof(proxystring),"http://%s", hoststring); + if (authenticator) { + base64_authenticator = alloc_http_authenticator(authenticator); + if (!base64_authenticator) + log_warn(LD_BUG, "Encoding http authenticator failed"); + } + if (base64_authenticator) { + smartlist_add_asprintf(headers, + "Proxy-Authorization: Basic %s\r\n", + base64_authenticator); + tor_free(base64_authenticator); + } + } else { + proxystring[0] = 0; + } + + if (! anonymized_connection) { + /* Add Accept-Encoding. */ + accept_encoding = accept_encoding_header(); + smartlist_add_asprintf(headers, "Accept-Encoding: %s\r\n", + accept_encoding); + tor_free(accept_encoding); + } + + /* Add additional headers, if any */ + { + config_line_t *h; + for (h = req->additional_headers; h; h = h->next) { + smartlist_add_asprintf(headers, "%s%s\r\n", h->key, h->value); + } + } + + switch (purpose) { + case DIR_PURPOSE_FETCH_CONSENSUS: + /* resource is optional. If present, it's a flavor name */ + tor_assert(!payload); + httpcommand = "GET"; + url = directory_get_consensus_url(resource); + log_info(LD_DIR, "Downloading consensus from %s using %s", + hoststring, url); + break; + case DIR_PURPOSE_FETCH_CERTIFICATE: + tor_assert(resource); + tor_assert(!payload); + httpcommand = "GET"; + tor_asprintf(&url, "/tor/keys/%s", resource); + break; + case DIR_PURPOSE_FETCH_STATUS_VOTE: + tor_assert(resource); + tor_assert(!payload); + httpcommand = "GET"; + tor_asprintf(&url, "/tor/status-vote/next/%s.z", resource); + break; + case DIR_PURPOSE_FETCH_DETACHED_SIGNATURES: + tor_assert(!resource); + tor_assert(!payload); + httpcommand = "GET"; + url = tor_strdup("/tor/status-vote/next/consensus-signatures.z"); + break; + case DIR_PURPOSE_FETCH_SERVERDESC: + tor_assert(resource); + httpcommand = "GET"; + tor_asprintf(&url, "/tor/server/%s", resource); + break; + case DIR_PURPOSE_FETCH_EXTRAINFO: + tor_assert(resource); + httpcommand = "GET"; + tor_asprintf(&url, "/tor/extra/%s", resource); + break; + case DIR_PURPOSE_FETCH_MICRODESC: + tor_assert(resource); + httpcommand = "GET"; + tor_asprintf(&url, "/tor/micro/%s", resource); + break; + case DIR_PURPOSE_UPLOAD_DIR: { + const char *why = router_get_descriptor_gen_reason(); + tor_assert(!resource); + tor_assert(payload); + httpcommand = "POST"; + url = tor_strdup("/tor/"); + if (!why) { + why = "for no reason at all"; + } + smartlist_add_asprintf(headers, "X-Desc-Gen-Reason: %s\r\n", why); + break; + } + case DIR_PURPOSE_UPLOAD_VOTE: + tor_assert(!resource); + tor_assert(payload); + httpcommand = "POST"; + url = tor_strdup("/tor/post/vote"); + break; + case DIR_PURPOSE_UPLOAD_SIGNATURES: + tor_assert(!resource); + tor_assert(payload); + httpcommand = "POST"; + url = tor_strdup("/tor/post/consensus-signature"); + break; + case DIR_PURPOSE_FETCH_RENDDESC_V2: + tor_assert(resource); + tor_assert(strlen(resource) <= REND_DESC_ID_V2_LEN_BASE32); + tor_assert(!payload); + httpcommand = "GET"; + tor_asprintf(&url, "/tor/rendezvous2/%s", resource); + break; + case DIR_PURPOSE_FETCH_HSDESC: + tor_assert(resource); + tor_assert(strlen(resource) <= ED25519_BASE64_LEN); + tor_assert(!payload); + httpcommand = "GET"; + tor_asprintf(&url, "/tor/hs/3/%s", resource); + break; + case DIR_PURPOSE_UPLOAD_RENDDESC_V2: + tor_assert(!resource); + tor_assert(payload); + httpcommand = "POST"; + url = tor_strdup("/tor/rendezvous2/publish"); + break; + case DIR_PURPOSE_UPLOAD_HSDESC: + tor_assert(resource); + tor_assert(payload); + httpcommand = "POST"; + tor_asprintf(&url, "/tor/hs/%s/publish", resource); + break; + default: + tor_assert(0); + return; + } + + /* warn in the non-tunneled case */ + if (direct && (strlen(proxystring) + strlen(url) >= 4096)) { + log_warn(LD_BUG, + "Squid does not like URLs longer than 4095 bytes, and this " + "one is %d bytes long: %s%s", + (int)(strlen(proxystring) + strlen(url)), proxystring, url); + } + + tor_snprintf(request, sizeof(request), "%s %s", httpcommand, proxystring); + + request_len = strlen(request); + total_request_len += request_len; + connection_buf_add(request, request_len, TO_CONN(conn)); + + url_len = strlen(url); + total_request_len += url_len; + connection_buf_add(url, url_len, TO_CONN(conn)); + tor_free(url); + + if (!strcmp(httpcommand, "POST") || payload) { + smartlist_add_asprintf(headers, "Content-Length: %lu\r\n", + payload ? (unsigned long)payload_len : 0); + } + + { + char *header = smartlist_join_strings(headers, "", 0, NULL); + tor_snprintf(request, sizeof(request), " HTTP/1.0\r\nHost: %s\r\n%s\r\n", + hoststring, header); + tor_free(header); + } + + request_len = strlen(request); + total_request_len += request_len; + connection_buf_add(request, request_len, TO_CONN(conn)); + + if (payload) { + /* then send the payload afterwards too */ + connection_buf_add(payload, payload_len, TO_CONN(conn)); + total_request_len += payload_len; + } + + SMARTLIST_FOREACH(headers, char *, h, tor_free(h)); + smartlist_free(headers); + + log_debug(LD_DIR, + "Sent request to directory server '%s:%d': " + "(purpose: %d, request size: %"TOR_PRIuSZ", " + "payload size: %"TOR_PRIuSZ")", + conn->base_.address, conn->base_.port, + conn->base_.purpose, + (total_request_len), + (payload ? payload_len : 0)); +} + +/** Parse an HTTP request string <b>headers</b> of the form + * \verbatim + * "\%s [http[s]://]\%s HTTP/1..." + * \endverbatim + * If it's well-formed, strdup the second \%s into *<b>url</b>, and + * nul-terminate it. If the url doesn't start with "/tor/", rewrite it + * so it does. Return 0. + * Otherwise, return -1. + */ +STATIC int +parse_http_url(const char *headers, char **url) +{ + char *command = NULL; + if (parse_http_command(headers, &command, url) < 0) { + return -1; + } + if (strcmpstart(*url, "/tor/")) { + char *new_url = NULL; + tor_asprintf(&new_url, "/tor%s%s", + *url[0] == '/' ? "" : "/", + *url); + tor_free(*url); + *url = new_url; + } + tor_free(command); + return 0; +} + +/** Parse an HTTP request line at the start of a headers string. On failure, + * return -1. On success, set *<b>command_out</b> to a copy of the HTTP + * command ("get", "post", etc), set *<b>url_out</b> to a copy of the URL, and + * return 0. */ +int +parse_http_command(const char *headers, char **command_out, char **url_out) +{ + const char *command, *end_of_command; + char *s, *start, *tmp; + + s = (char *)eat_whitespace_no_nl(headers); + if (!*s) return -1; + command = s; + s = (char *)find_whitespace(s); /* get past GET/POST */ + if (!*s) return -1; + end_of_command = s; + s = (char *)eat_whitespace_no_nl(s); + if (!*s) return -1; + start = s; /* this is the URL, assuming it's valid */ + s = (char *)find_whitespace(start); + if (!*s) return -1; + + /* tolerate the http[s] proxy style of putting the hostname in the url */ + if (s-start >= 4 && !strcmpstart(start,"http")) { + tmp = start + 4; + if (*tmp == 's') + tmp++; + if (s-tmp >= 3 && !strcmpstart(tmp,"://")) { + tmp = strchr(tmp+3, '/'); + if (tmp && tmp < s) { + log_debug(LD_DIR,"Skipping over 'http[s]://hostname/' string"); + start = tmp; + } + } + } + + /* Check if the header is well formed (next sequence + * should be HTTP/1.X\r\n). Assumes we're supporting 1.0? */ + { + unsigned minor_ver; + char ch; + char *e = (char *)eat_whitespace_no_nl(s); + if (2 != tor_sscanf(e, "HTTP/1.%u%c", &minor_ver, &ch)) { + return -1; + } + if (ch != '\r') + return -1; + } + + *url_out = tor_memdup_nulterm(start, s-start); + *command_out = tor_memdup_nulterm(command, end_of_command - command); + return 0; +} + +/** Return a copy of the first HTTP header in <b>headers</b> whose key is + * <b>which</b>. The key should be given with a terminating colon and space; + * this function copies everything after, up to but not including the + * following \\r\\n. */ +char * +http_get_header(const char *headers, const char *which) +{ + const char *cp = headers; + while (cp) { + if (!strcasecmpstart(cp, which)) { + char *eos; + cp += strlen(which); + if ((eos = strchr(cp,'\r'))) + return tor_strndup(cp, eos-cp); + else + return tor_strdup(cp); + } + cp = strchr(cp, '\n'); + if (cp) + ++cp; + } + return NULL; +} + +/** If <b>headers</b> indicates that a proxy was involved, then rewrite + * <b>conn</b>-\>address to describe our best guess of the address that + * originated this HTTP request. */ +static void +http_set_address_origin(const char *headers, connection_t *conn) +{ + char *fwd; + + fwd = http_get_header(headers, "Forwarded-For: "); + if (!fwd) + fwd = http_get_header(headers, "X-Forwarded-For: "); + if (fwd) { + tor_addr_t toraddr; + if (tor_addr_parse(&toraddr,fwd) == -1 || + tor_addr_is_internal(&toraddr,0)) { + log_debug(LD_DIR, "Ignoring local/internal IP %s", escaped(fwd)); + tor_free(fwd); + return; + } + + tor_free(conn->address); + conn->address = tor_strdup(fwd); + tor_free(fwd); + } +} + +/** Parse an HTTP response string <b>headers</b> of the form + * \verbatim + * "HTTP/1.\%d \%d\%s\r\n...". + * \endverbatim + * + * If it's well-formed, assign the status code to *<b>code</b> and + * return 0. Otherwise, return -1. + * + * On success: If <b>date</b> is provided, set *date to the Date + * header in the http headers, or 0 if no such header is found. If + * <b>compression</b> is provided, set *<b>compression</b> to the + * compression method given in the Content-Encoding header, or 0 if no + * such header is found, or -1 if the value of the header is not + * recognized. If <b>reason</b> is provided, strdup the reason string + * into it. + */ +int +parse_http_response(const char *headers, int *code, time_t *date, + compress_method_t *compression, char **reason) +{ + unsigned n1, n2; + char datestr[RFC1123_TIME_LEN+1]; + smartlist_t *parsed_headers; + tor_assert(headers); + tor_assert(code); + + while (TOR_ISSPACE(*headers)) headers++; /* tolerate leading whitespace */ + + if (tor_sscanf(headers, "HTTP/1.%u %u", &n1, &n2) < 2 || + (n1 != 0 && n1 != 1) || + (n2 < 100 || n2 >= 600)) { + log_warn(LD_HTTP,"Failed to parse header %s",escaped(headers)); + return -1; + } + *code = n2; + + parsed_headers = smartlist_new(); + smartlist_split_string(parsed_headers, headers, "\n", + SPLIT_SKIP_SPACE|SPLIT_IGNORE_BLANK, -1); + if (reason) { + smartlist_t *status_line_elements = smartlist_new(); + tor_assert(smartlist_len(parsed_headers)); + smartlist_split_string(status_line_elements, + smartlist_get(parsed_headers, 0), + " ", SPLIT_SKIP_SPACE|SPLIT_IGNORE_BLANK, 3); + tor_assert(smartlist_len(status_line_elements) <= 3); + if (smartlist_len(status_line_elements) == 3) { + *reason = smartlist_get(status_line_elements, 2); + smartlist_set(status_line_elements, 2, NULL); /* Prevent free */ + } + SMARTLIST_FOREACH(status_line_elements, char *, cp, tor_free(cp)); + smartlist_free(status_line_elements); + } + if (date) { + *date = 0; + SMARTLIST_FOREACH(parsed_headers, const char *, s, + if (!strcmpstart(s, "Date: ")) { + strlcpy(datestr, s+6, sizeof(datestr)); + /* This will do nothing on failure, so we don't need to check + the result. We shouldn't warn, since there are many other valid + date formats besides the one we use. */ + parse_rfc1123_time(datestr, date); + break; + }); + } + if (compression) { + const char *enc = NULL; + SMARTLIST_FOREACH(parsed_headers, const char *, s, + if (!strcmpstart(s, "Content-Encoding: ")) { + enc = s+18; break; + }); + + if (enc == NULL) + *compression = NO_METHOD; + else { + *compression = compression_method_get_by_name(enc); + + if (*compression == UNKNOWN_METHOD) + log_info(LD_HTTP, "Unrecognized content encoding: %s. Trying to deal.", + escaped(enc)); + } + } + SMARTLIST_FOREACH(parsed_headers, char *, s, tor_free(s)); + smartlist_free(parsed_headers); + + return 0; +} + +/** Return true iff <b>body</b> doesn't start with a plausible router or + * network-status or microdescriptor opening. This is a sign of possible + * compression. */ +static int +body_is_plausible(const char *body, size_t len, int purpose) +{ + int i; + if (len == 0) + return 1; /* empty bodies don't need decompression */ + if (len < 32) + return 0; + if (purpose == DIR_PURPOSE_FETCH_MICRODESC) { + return (!strcmpstart(body,"onion-key")); + } + + if (!strcmpstart(body,"router") || + !strcmpstart(body,"network-status")) + return 1; + for (i=0;i<32;++i) { + if (!TOR_ISPRINT(body[i]) && !TOR_ISSPACE(body[i])) + return 0; + } + + return 1; +} + +/** Called when we've just fetched a bunch of router descriptors in + * <b>body</b>. The list <b>which</b>, if present, holds digests for + * descriptors we requested: descriptor digests if <b>descriptor_digests</b> + * is true, or identity digests otherwise. Parse the descriptors, validate + * them, and annotate them as having purpose <b>purpose</b> and as having been + * downloaded from <b>source</b>. + * + * Return the number of routers actually added. */ +static int +load_downloaded_routers(const char *body, smartlist_t *which, + int descriptor_digests, + int router_purpose, + const char *source) +{ + char buf[256]; + char time_buf[ISO_TIME_LEN+1]; + int added = 0; + int general = router_purpose == ROUTER_PURPOSE_GENERAL; + format_iso_time(time_buf, time(NULL)); + tor_assert(source); + + if (tor_snprintf(buf, sizeof(buf), + "@downloaded-at %s\n" + "@source %s\n" + "%s%s%s", time_buf, escaped(source), + !general ? "@purpose " : "", + !general ? router_purpose_to_string(router_purpose) : "", + !general ? "\n" : "")<0) + return added; + + added = router_load_routers_from_string(body, NULL, SAVED_NOWHERE, which, + descriptor_digests, buf); + if (added && general) + control_event_bootstrap(BOOTSTRAP_STATUS_LOADING_DESCRIPTORS, + count_loading_descriptors_progress()); + return added; +} + +static int handle_response_fetch_certificate(dir_connection_t *, + const response_handler_args_t *); +static int handle_response_fetch_status_vote(dir_connection_t *, + const response_handler_args_t *); +static int handle_response_fetch_detached_signatures(dir_connection_t *, + const response_handler_args_t *); +static int handle_response_fetch_desc(dir_connection_t *, + const response_handler_args_t *); +static int handle_response_upload_dir(dir_connection_t *, + const response_handler_args_t *); +static int handle_response_upload_vote(dir_connection_t *, + const response_handler_args_t *); +static int handle_response_upload_signatures(dir_connection_t *, + const response_handler_args_t *); +static int handle_response_fetch_renddesc_v2(dir_connection_t *, + const response_handler_args_t *); +static int handle_response_upload_renddesc_v2(dir_connection_t *, + const response_handler_args_t *); +static int handle_response_upload_hsdesc(dir_connection_t *, + const response_handler_args_t *); + +static int +dir_client_decompress_response_body(char **bodyp, size_t *bodylenp, + dir_connection_t *conn, + compress_method_t compression, + int anonymized_connection) +{ + int rv = 0; + const char *body = *bodyp; + size_t body_len = *bodylenp; + int allow_partial = (conn->base_.purpose == DIR_PURPOSE_FETCH_SERVERDESC || + conn->base_.purpose == DIR_PURPOSE_FETCH_EXTRAINFO || + conn->base_.purpose == DIR_PURPOSE_FETCH_MICRODESC); + + int plausible = body_is_plausible(body, body_len, conn->base_.purpose); + + if (plausible && compression == NO_METHOD) { + return 0; + } + + int severity = LOG_DEBUG; + char *new_body = NULL; + size_t new_len = 0; + const char *description1, *description2; + int want_to_try_both = 0; + int tried_both = 0; + compress_method_t guessed = detect_compression_method(body, body_len); + + description1 = compression_method_get_human_name(compression); + + if (BUG(description1 == NULL)) + description1 = compression_method_get_human_name(UNKNOWN_METHOD); + + if (guessed == UNKNOWN_METHOD && !plausible) + description2 = "confusing binary junk"; + else + description2 = compression_method_get_human_name(guessed); + + /* Tell the user if we don't believe what we're told about compression.*/ + want_to_try_both = (compression == UNKNOWN_METHOD || + guessed != compression); + if (want_to_try_both) { + severity = LOG_PROTOCOL_WARN; + } + + tor_log(severity, LD_HTTP, + "HTTP body from server '%s:%d' was labeled as %s, " + "%s it seems to be %s.%s", + conn->base_.address, conn->base_.port, description1, + guessed != compression?"but":"and", + description2, + (compression>0 && guessed>0 && want_to_try_both)? + " Trying both.":""); + + /* Try declared compression first if we can. + * tor_compress_supports_method() also returns true for NO_METHOD. + * Ensure that the server is not sending us data compressed using a + * compression method that is not allowed for anonymous connections. */ + if (anonymized_connection && + ! allowed_anonymous_connection_compression_method(compression)) { + warn_disallowed_anonymous_compression_method(compression); + rv = -1; + goto done; + } + + if (tor_compress_supports_method(compression)) { + tor_uncompress(&new_body, &new_len, body, body_len, compression, + !allow_partial, LOG_PROTOCOL_WARN); + if (new_body) { + /* We succeeded with the declared compression method. Great! */ + rv = 0; + goto done; + } + } + + /* Okay, if that didn't work, and we think that it was compressed + * differently, try that. */ + if (anonymized_connection && + ! allowed_anonymous_connection_compression_method(guessed)) { + warn_disallowed_anonymous_compression_method(guessed); + rv = -1; + goto done; + } + + if (tor_compress_supports_method(guessed) && + compression != guessed) { + tor_uncompress(&new_body, &new_len, body, body_len, guessed, + !allow_partial, LOG_INFO); + tried_both = 1; + } + /* If we're pretty sure that we have a compressed directory, and + * we didn't manage to uncompress it, then warn and bail. */ + if (!plausible && !new_body) { + log_fn(LOG_PROTOCOL_WARN, LD_HTTP, + "Unable to decompress HTTP body (tried %s%s%s, server '%s:%d').", + description1, + tried_both?" and ":"", + tried_both?description2:"", + conn->base_.address, conn->base_.port); + rv = -1; + goto done; + } + + done: + if (new_body) { + if (rv == 0) { + /* success! */ + tor_free(*bodyp); + *bodyp = new_body; + *bodylenp = new_len; + } else { + tor_free(new_body); + } + } + + return rv; +} + +/** We are a client, and we've finished reading the server's + * response. Parse it and act appropriately. + * + * If we're still happy with using this directory server in the future, return + * 0. Otherwise return -1; and the caller should consider trying the request + * again. + * + * The caller will take care of marking the connection for close. + */ +static int +connection_dir_client_reached_eof(dir_connection_t *conn) +{ + char *body = NULL; + char *headers = NULL; + char *reason = NULL; + size_t body_len = 0; + int status_code; + time_t date_header = 0; + long apparent_skew; + compress_method_t compression; + int skewed = 0; + int rv; + int allow_partial = (conn->base_.purpose == DIR_PURPOSE_FETCH_SERVERDESC || + conn->base_.purpose == DIR_PURPOSE_FETCH_EXTRAINFO || + conn->base_.purpose == DIR_PURPOSE_FETCH_MICRODESC); + size_t received_bytes; + const int anonymized_connection = + purpose_needs_anonymity(conn->base_.purpose, + conn->router_purpose, + conn->requested_resource); + + received_bytes = connection_get_inbuf_len(TO_CONN(conn)); + + switch (connection_fetch_from_buf_http(TO_CONN(conn), + &headers, MAX_HEADERS_SIZE, + &body, &body_len, MAX_DIR_DL_SIZE, + allow_partial)) { + case -1: /* overflow */ + log_warn(LD_PROTOCOL, + "'fetch' response too large (server '%s:%d'). Closing.", + conn->base_.address, conn->base_.port); + return -1; + case 0: + log_info(LD_HTTP, + "'fetch' response not all here, but we're at eof. Closing."); + return -1; + /* case 1, fall through */ + } + + if (parse_http_response(headers, &status_code, &date_header, + &compression, &reason) < 0) { + log_warn(LD_HTTP,"Unparseable headers (server '%s:%d'). Closing.", + conn->base_.address, conn->base_.port); + + rv = -1; + goto done; + } + if (!reason) reason = tor_strdup("[no reason given]"); + + tor_log(LOG_DEBUG, LD_DIR, + "Received response from directory server '%s:%d': %d %s " + "(purpose: %d, response size: %"TOR_PRIuSZ +#ifdef MEASUREMENTS_21206 + ", data cells received: %d, data cells sent: %d" +#endif + ", compression: %d)", + conn->base_.address, conn->base_.port, status_code, + escaped(reason), conn->base_.purpose, + (received_bytes), +#ifdef MEASUREMENTS_21206 + conn->data_cells_received, conn->data_cells_sent, +#endif + compression); + + if (conn->guard_state) { + /* we count the connection as successful once we can read from it. We do + * not, however, delay use of the circuit here, since it's just for a + * one-hop directory request. */ + /* XXXXprop271 note that this will not do the right thing for other + * waiting circuits that would be triggered by this circuit becoming + * complete/usable. But that's ok, I think. + */ + entry_guard_succeeded(&conn->guard_state); + circuit_guard_state_free(conn->guard_state); + conn->guard_state = NULL; + } + + /* now check if it's got any hints for us about our IP address. */ + if (conn->dirconn_direct) { + char *guess = http_get_header(headers, X_ADDRESS_HEADER); + if (guess) { + router_new_address_suggestion(guess, conn); + tor_free(guess); + } + } + + if (date_header > 0) { + /* The date header was written very soon after we sent our request, + * so compute the skew as the difference between sending the request + * and the date header. (We used to check now-date_header, but that's + * inaccurate if we spend a lot of time downloading.) + */ + apparent_skew = conn->base_.timestamp_last_write_allowed - date_header; + if (labs(apparent_skew)>ALLOW_DIRECTORY_TIME_SKEW) { + int trusted = router_digest_is_trusted_dir(conn->identity_digest); + clock_skew_warning(TO_CONN(conn), apparent_skew, trusted, LD_HTTP, + "directory", "DIRSERV"); + skewed = 1; /* don't check the recommended-versions line */ + } else { + log_debug(LD_HTTP, "Time on received directory is within tolerance; " + "we are %ld seconds skewed. (That's okay.)", apparent_skew); + } + } + (void) skewed; /* skewed isn't used yet. */ + + if (status_code == 503) { + routerstatus_t *rs; + dir_server_t *ds; + const char *id_digest = conn->identity_digest; + log_info(LD_DIR,"Received http status code %d (%s) from server " + "'%s:%d'. I'll try again soon.", + status_code, escaped(reason), conn->base_.address, + conn->base_.port); + time_t now = approx_time(); + if ((rs = router_get_mutable_consensus_status_by_id(id_digest))) + rs->last_dir_503_at = now; + if ((ds = router_get_fallback_dirserver_by_digest(id_digest))) + ds->fake_status.last_dir_503_at = now; + + rv = -1; + goto done; + } + + if (dir_client_decompress_response_body(&body, &body_len, + conn, compression, anonymized_connection) < 0) { + rv = -1; + goto done; + } + + response_handler_args_t args; + memset(&args, 0, sizeof(args)); + args.status_code = status_code; + args.reason = reason; + args.body = body; + args.body_len = body_len; + args.headers = headers; + + switch (conn->base_.purpose) { + case DIR_PURPOSE_FETCH_CONSENSUS: + rv = handle_response_fetch_consensus(conn, &args); + break; + case DIR_PURPOSE_FETCH_CERTIFICATE: + rv = handle_response_fetch_certificate(conn, &args); + break; + case DIR_PURPOSE_FETCH_STATUS_VOTE: + rv = handle_response_fetch_status_vote(conn, &args); + break; + case DIR_PURPOSE_FETCH_DETACHED_SIGNATURES: + rv = handle_response_fetch_detached_signatures(conn, &args); + break; + case DIR_PURPOSE_FETCH_SERVERDESC: + case DIR_PURPOSE_FETCH_EXTRAINFO: + rv = handle_response_fetch_desc(conn, &args); + break; + case DIR_PURPOSE_FETCH_MICRODESC: + rv = handle_response_fetch_microdesc(conn, &args); + break; + case DIR_PURPOSE_FETCH_RENDDESC_V2: + rv = handle_response_fetch_renddesc_v2(conn, &args); + break; + case DIR_PURPOSE_UPLOAD_DIR: + rv = handle_response_upload_dir(conn, &args); + break; + case DIR_PURPOSE_UPLOAD_SIGNATURES: + rv = handle_response_upload_signatures(conn, &args); + break; + case DIR_PURPOSE_UPLOAD_VOTE: + rv = handle_response_upload_vote(conn, &args); + break; + case DIR_PURPOSE_UPLOAD_RENDDESC_V2: + rv = handle_response_upload_renddesc_v2(conn, &args); + break; + case DIR_PURPOSE_UPLOAD_HSDESC: + rv = handle_response_upload_hsdesc(conn, &args); + break; + case DIR_PURPOSE_FETCH_HSDESC: + rv = handle_response_fetch_hsdesc_v3(conn, &args); + break; + default: + tor_assert_nonfatal_unreached(); + rv = -1; + break; + } + + done: + tor_free(body); + tor_free(headers); + tor_free(reason); + return rv; +} + +/** + * Handler function: processes a response to a request for a networkstatus + * consensus document by checking the consensus, storing it, and marking + * router requests as reachable. + **/ +STATIC int +handle_response_fetch_consensus(dir_connection_t *conn, + const response_handler_args_t *args) +{ + tor_assert(conn->base_.purpose == DIR_PURPOSE_FETCH_CONSENSUS); + const int status_code = args->status_code; + const char *body = args->body; + const size_t body_len = args->body_len; + const char *reason = args->reason; + const time_t now = approx_time(); + + const char *consensus; + char *new_consensus = NULL; + const char *sourcename; + + int r; + const char *flavname = conn->requested_resource; + if (status_code != 200) { + int severity = (status_code == 304) ? LOG_INFO : LOG_WARN; + tor_log(severity, LD_DIR, + "Received http status code %d (%s) from server " + "'%s:%d' while fetching consensus directory.", + status_code, escaped(reason), conn->base_.address, + conn->base_.port); + networkstatus_consensus_download_failed(status_code, flavname); + return -1; + } + + if (looks_like_a_consensus_diff(body, body_len)) { + /* First find our previous consensus. Maybe it's in ram, maybe not. */ + cached_dir_t *cd = dirserv_get_consensus(flavname); + const char *consensus_body; + char *owned_consensus = NULL; + if (cd) { + consensus_body = cd->dir; + } else { + owned_consensus = networkstatus_read_cached_consensus(flavname); + consensus_body = owned_consensus; + } + if (!consensus_body) { + log_warn(LD_DIR, "Received a consensus diff, but we can't find " + "any %s-flavored consensus in our current cache.",flavname); + networkstatus_consensus_download_failed(0, flavname); + // XXXX if this happens too much, see below + return -1; + } + + new_consensus = consensus_diff_apply(consensus_body, body); + tor_free(owned_consensus); + if (new_consensus == NULL) { + log_warn(LD_DIR, "Could not apply consensus diff received from server " + "'%s:%d'", conn->base_.address, conn->base_.port); + // XXXX If this happens too many times, we should maybe not use + // XXXX this directory for diffs any more? + networkstatus_consensus_download_failed(0, flavname); + return -1; + } + log_info(LD_DIR, "Applied consensus diff (size %d) from server " + "'%s:%d', resulting in a new consensus document (size %d).", + (int)body_len, conn->base_.address, conn->base_.port, + (int)strlen(new_consensus)); + consensus = new_consensus; + sourcename = "generated based on a diff"; + } else { + log_info(LD_DIR,"Received consensus directory (body size %d) from server " + "'%s:%d'", (int)body_len, conn->base_.address, conn->base_.port); + consensus = body; + sourcename = "downloaded"; + } + + if ((r=networkstatus_set_current_consensus(consensus, flavname, 0, + conn->identity_digest))<0) { + log_fn(r<-1?LOG_WARN:LOG_INFO, LD_DIR, + "Unable to load %s consensus directory %s from " + "server '%s:%d'. I'll try again soon.", + flavname, sourcename, conn->base_.address, conn->base_.port); + networkstatus_consensus_download_failed(0, flavname); + tor_free(new_consensus); + return -1; + } + + /* If we launched other fetches for this consensus, cancel them. */ + connection_dir_close_consensus_fetches(conn, flavname); + + /* update the list of routers and directory guards */ + routers_update_all_from_networkstatus(now, 3); + update_microdescs_from_networkstatus(now); + directory_info_has_arrived(now, 0, 0); + + if (authdir_mode_v3(get_options())) { + sr_act_post_consensus( + networkstatus_get_latest_consensus_by_flavor(FLAV_NS)); + } + log_info(LD_DIR, "Successfully loaded consensus."); + + tor_free(new_consensus); + return 0; +} + +/** + * Handler function: processes a response to a request for one or more + * authority certificates + **/ +static int +handle_response_fetch_certificate(dir_connection_t *conn, + const response_handler_args_t *args) +{ + tor_assert(conn->base_.purpose == DIR_PURPOSE_FETCH_CERTIFICATE); + const int status_code = args->status_code; + const char *reason = args->reason; + const char *body = args->body; + const size_t body_len = args->body_len; + + if (status_code != 200) { + log_warn(LD_DIR, + "Received http status code %d (%s) from server " + "'%s:%d' while fetching \"/tor/keys/%s\".", + status_code, escaped(reason), conn->base_.address, + conn->base_.port, conn->requested_resource); + connection_dir_download_cert_failed(conn, status_code); + return -1; + } + log_info(LD_DIR,"Received authority certificates (body size %d) from " + "server '%s:%d'", + (int)body_len, conn->base_.address, conn->base_.port); + + /* + * Tell trusted_dirs_load_certs_from_string() whether it was by fp + * or fp-sk pair. + */ + int src_code = -1; + if (!strcmpstart(conn->requested_resource, "fp/")) { + src_code = TRUSTED_DIRS_CERTS_SRC_DL_BY_ID_DIGEST; + } else if (!strcmpstart(conn->requested_resource, "fp-sk/")) { + src_code = TRUSTED_DIRS_CERTS_SRC_DL_BY_ID_SK_DIGEST; + } + + if (src_code != -1) { + if (trusted_dirs_load_certs_from_string(body, src_code, 1, + conn->identity_digest)<0) { + log_warn(LD_DIR, "Unable to parse fetched certificates"); + /* if we fetched more than one and only some failed, the successful + * ones got flushed to disk so it's safe to call this on them */ + connection_dir_download_cert_failed(conn, status_code); + } else { + time_t now = approx_time(); + directory_info_has_arrived(now, 0, 0); + log_info(LD_DIR, "Successfully loaded certificates from fetch."); + } + } else { + log_warn(LD_DIR, + "Couldn't figure out what to do with fetched certificates for " + "unknown resource %s", + conn->requested_resource); + connection_dir_download_cert_failed(conn, status_code); + } + return 0; +} + +/** + * Handler function: processes a response to a request for an authority's + * current networkstatus vote. + **/ +static int +handle_response_fetch_status_vote(dir_connection_t *conn, + const response_handler_args_t *args) +{ + tor_assert(conn->base_.purpose == DIR_PURPOSE_FETCH_STATUS_VOTE); + const int status_code = args->status_code; + const char *reason = args->reason; + const char *body = args->body; + const size_t body_len = args->body_len; + + const char *msg; + int st; + log_info(LD_DIR,"Got votes (body size %d) from server %s:%d", + (int)body_len, conn->base_.address, conn->base_.port); + if (status_code != 200) { + log_warn(LD_DIR, + "Received http status code %d (%s) from server " + "'%s:%d' while fetching \"/tor/status-vote/next/%s.z\".", + status_code, escaped(reason), conn->base_.address, + conn->base_.port, conn->requested_resource); + return -1; + } + dirvote_add_vote(body, &msg, &st); + if (st > 299) { + log_warn(LD_DIR, "Error adding retrieved vote: %s", msg); + } else { + log_info(LD_DIR, "Added vote(s) successfully [msg: %s]", msg); + } + + return 0; +} + +/** + * Handler function: processes a response to a request for the signatures + * that an authority knows about on a given consensus. + **/ +static int +handle_response_fetch_detached_signatures(dir_connection_t *conn, + const response_handler_args_t *args) +{ + tor_assert(conn->base_.purpose == DIR_PURPOSE_FETCH_DETACHED_SIGNATURES); + const int status_code = args->status_code; + const char *reason = args->reason; + const char *body = args->body; + const size_t body_len = args->body_len; + + const char *msg = NULL; + log_info(LD_DIR,"Got detached signatures (body size %d) from server %s:%d", + (int)body_len, conn->base_.address, conn->base_.port); + if (status_code != 200) { + log_warn(LD_DIR, + "Received http status code %d (%s) from server '%s:%d' while fetching " + "\"/tor/status-vote/next/consensus-signatures.z\".", + status_code, escaped(reason), conn->base_.address, + conn->base_.port); + return -1; + } + if (dirvote_add_signatures(body, conn->base_.address, &msg)<0) { + log_warn(LD_DIR, "Problem adding detached signatures from %s:%d: %s", + conn->base_.address, conn->base_.port, msg?msg:"???"); + } + + return 0; +} + +/** + * Handler function: processes a response to a request for a group of server + * descriptors or an extrainfo documents. + **/ +static int +handle_response_fetch_desc(dir_connection_t *conn, + const response_handler_args_t *args) +{ + tor_assert(conn->base_.purpose == DIR_PURPOSE_FETCH_SERVERDESC || + conn->base_.purpose == DIR_PURPOSE_FETCH_EXTRAINFO); + const int status_code = args->status_code; + const char *reason = args->reason; + const char *body = args->body; + const size_t body_len = args->body_len; + + int was_ei = conn->base_.purpose == DIR_PURPOSE_FETCH_EXTRAINFO; + smartlist_t *which = NULL; + int n_asked_for = 0; + int descriptor_digests = conn->requested_resource && + !strcmpstart(conn->requested_resource,"d/"); + log_info(LD_DIR,"Received %s (body size %d) from server '%s:%d'", + was_ei ? "extra server info" : "server info", + (int)body_len, conn->base_.address, conn->base_.port); + if (conn->requested_resource && + (!strcmpstart(conn->requested_resource,"d/") || + !strcmpstart(conn->requested_resource,"fp/"))) { + which = smartlist_new(); + dir_split_resource_into_fingerprints(conn->requested_resource + + (descriptor_digests ? 2 : 3), + which, NULL, 0); + n_asked_for = smartlist_len(which); + } + if (status_code != 200) { + int dir_okay = status_code == 404 || + (status_code == 400 && !strcmp(reason, "Servers unavailable.")); + /* 404 means that it didn't have them; no big deal. + * Older (pre-0.1.1.8) servers said 400 Servers unavailable instead. */ + log_fn(dir_okay ? LOG_INFO : LOG_WARN, LD_DIR, + "Received http status code %d (%s) from server '%s:%d' " + "while fetching \"/tor/server/%s\". I'll try again soon.", + status_code, escaped(reason), conn->base_.address, + conn->base_.port, conn->requested_resource); + if (!which) { + connection_dir_download_routerdesc_failed(conn); + } else { + dir_routerdesc_download_failed(which, status_code, + conn->router_purpose, + was_ei, descriptor_digests); + SMARTLIST_FOREACH(which, char *, cp, tor_free(cp)); + smartlist_free(which); + } + return dir_okay ? 0 : -1; + } + /* Learn the routers, assuming we requested by fingerprint or "all" + * or "authority". + * + * We use "authority" to fetch our own descriptor for + * testing, and to fetch bridge descriptors for bootstrapping. Ignore + * the output of "authority" requests unless we are using bridges, + * since otherwise they'll be the response from reachability tests, + * and we don't really want to add that to our routerlist. */ + if (which || (conn->requested_resource && + (!strcmpstart(conn->requested_resource, "all") || + (!strcmpstart(conn->requested_resource, "authority") && + get_options()->UseBridges)))) { + /* as we learn from them, we remove them from 'which' */ + if (was_ei) { + router_load_extrainfo_from_string(body, NULL, SAVED_NOWHERE, which, + descriptor_digests); + } else { + //router_load_routers_from_string(body, NULL, SAVED_NOWHERE, which, + // descriptor_digests, conn->router_purpose); + if (load_downloaded_routers(body, which, descriptor_digests, + conn->router_purpose, + conn->base_.address)) { + time_t now = approx_time(); + directory_info_has_arrived(now, 0, 1); + } + } + } + if (which) { /* mark remaining ones as failed */ + log_info(LD_DIR, "Received %d/%d %s requested from %s:%d", + n_asked_for-smartlist_len(which), n_asked_for, + was_ei ? "extra-info documents" : "router descriptors", + conn->base_.address, (int)conn->base_.port); + if (smartlist_len(which)) { + dir_routerdesc_download_failed(which, status_code, + conn->router_purpose, + was_ei, descriptor_digests); + } + SMARTLIST_FOREACH(which, char *, cp, tor_free(cp)); + smartlist_free(which); + } + if (directory_conn_is_self_reachability_test(conn)) + router_dirport_found_reachable(); + + return 0; +} + +/** + * Handler function: processes a response to a request for a group of + * microdescriptors + **/ +STATIC int +handle_response_fetch_microdesc(dir_connection_t *conn, + const response_handler_args_t *args) +{ + tor_assert(conn->base_.purpose == DIR_PURPOSE_FETCH_MICRODESC); + const int status_code = args->status_code; + const char *reason = args->reason; + const char *body = args->body; + const size_t body_len = args->body_len; + + smartlist_t *which = NULL; + log_info(LD_DIR,"Received answer to microdescriptor request (status %d, " + "body size %d) from server '%s:%d'", + status_code, (int)body_len, conn->base_.address, + conn->base_.port); + tor_assert(conn->requested_resource && + !strcmpstart(conn->requested_resource, "d/")); + tor_assert_nonfatal(!tor_mem_is_zero(conn->identity_digest, DIGEST_LEN)); + which = smartlist_new(); + dir_split_resource_into_fingerprints(conn->requested_resource+2, + which, NULL, + DSR_DIGEST256|DSR_BASE64); + if (status_code != 200) { + log_info(LD_DIR, "Received status code %d (%s) from server " + "'%s:%d' while fetching \"/tor/micro/%s\". I'll try again " + "soon.", + status_code, escaped(reason), conn->base_.address, + (int)conn->base_.port, conn->requested_resource); + dir_microdesc_download_failed(which, status_code, conn->identity_digest); + SMARTLIST_FOREACH(which, char *, cp, tor_free(cp)); + smartlist_free(which); + return 0; + } else { + smartlist_t *mds; + time_t now = approx_time(); + mds = microdescs_add_to_cache(get_microdesc_cache(), + body, body+body_len, SAVED_NOWHERE, 0, + now, which); + if (smartlist_len(which)) { + /* Mark remaining ones as failed. */ + dir_microdesc_download_failed(which, status_code, conn->identity_digest); + } + if (mds && smartlist_len(mds)) { + control_event_bootstrap(BOOTSTRAP_STATUS_LOADING_DESCRIPTORS, + count_loading_descriptors_progress()); + directory_info_has_arrived(now, 0, 1); + } + SMARTLIST_FOREACH(which, char *, cp, tor_free(cp)); + smartlist_free(which); + smartlist_free(mds); + } + + return 0; +} + +/** + * Handler function: processes a response to a POST request to upload our + * router descriptor. + **/ +static int +handle_response_upload_dir(dir_connection_t *conn, + const response_handler_args_t *args) +{ + tor_assert(conn->base_.purpose == DIR_PURPOSE_UPLOAD_DIR); + const int status_code = args->status_code; + const char *reason = args->reason; + const char *headers = args->headers; + + switch (status_code) { + case 200: { + dir_server_t *ds = + router_get_trusteddirserver_by_digest(conn->identity_digest); + char *rejected_hdr = http_get_header(headers, + "X-Descriptor-Not-New: "); + if (rejected_hdr) { + if (!strcmp(rejected_hdr, "Yes")) { + log_info(LD_GENERAL, + "Authority '%s' declined our descriptor (not new)", + ds->nickname); + /* XXXX use this information; be sure to upload next one + * sooner. -NM */ + /* XXXX++ On further thought, the task above implies that we're + * basing our regenerate-descriptor time on when we uploaded the + * last descriptor, not on the published time of the last + * descriptor. If those are different, that's a bad thing to + * do. -NM */ + } + tor_free(rejected_hdr); + } + log_info(LD_GENERAL,"eof (status 200) after uploading server " + "descriptor: finished."); + control_event_server_status( + LOG_NOTICE, "ACCEPTED_SERVER_DESCRIPTOR DIRAUTH=%s:%d", + conn->base_.address, conn->base_.port); + + ds->has_accepted_serverdesc = 1; + if (directories_have_accepted_server_descriptor()) + control_event_server_status(LOG_NOTICE, "GOOD_SERVER_DESCRIPTOR"); + } + break; + case 400: + log_warn(LD_GENERAL,"http status 400 (%s) response from " + "dirserver '%s:%d'. Please correct.", + escaped(reason), conn->base_.address, conn->base_.port); + control_event_server_status(LOG_WARN, + "BAD_SERVER_DESCRIPTOR DIRAUTH=%s:%d REASON=\"%s\"", + conn->base_.address, conn->base_.port, escaped(reason)); + break; + default: + log_warn(LD_GENERAL, + "HTTP status %d (%s) was unexpected while uploading " + "descriptor to server '%s:%d'. Possibly the server is " + "misconfigured?", + status_code, escaped(reason), conn->base_.address, + conn->base_.port); + break; + } + /* return 0 in all cases, since we don't want to mark any + * dirservers down just because they don't like us. */ + + return 0; +} + +/** + * Handler function: processes a response to POST request to upload our + * own networkstatus vote. + **/ +static int +handle_response_upload_vote(dir_connection_t *conn, + const response_handler_args_t *args) +{ + tor_assert(conn->base_.purpose == DIR_PURPOSE_UPLOAD_VOTE); + const int status_code = args->status_code; + const char *reason = args->reason; + + switch (status_code) { + case 200: { + log_notice(LD_DIR,"Uploaded a vote to dirserver %s:%d", + conn->base_.address, conn->base_.port); + } + break; + case 400: + log_warn(LD_DIR,"http status 400 (%s) response after uploading " + "vote to dirserver '%s:%d'. Please correct.", + escaped(reason), conn->base_.address, conn->base_.port); + break; + default: + log_warn(LD_GENERAL, + "HTTP status %d (%s) was unexpected while uploading " + "vote to server '%s:%d'.", + status_code, escaped(reason), conn->base_.address, + conn->base_.port); + break; + } + /* return 0 in all cases, since we don't want to mark any + * dirservers down just because they don't like us. */ + return 0; +} + +/** + * Handler function: processes a response to POST request to upload our + * view of the signatures on the current consensus. + **/ +static int +handle_response_upload_signatures(dir_connection_t *conn, + const response_handler_args_t *args) +{ + tor_assert(conn->base_.purpose == DIR_PURPOSE_UPLOAD_SIGNATURES); + const int status_code = args->status_code; + const char *reason = args->reason; + + switch (status_code) { + case 200: { + log_notice(LD_DIR,"Uploaded signature(s) to dirserver %s:%d", + conn->base_.address, conn->base_.port); + } + break; + case 400: + log_warn(LD_DIR,"http status 400 (%s) response after uploading " + "signatures to dirserver '%s:%d'. Please correct.", + escaped(reason), conn->base_.address, conn->base_.port); + break; + default: + log_warn(LD_GENERAL, + "HTTP status %d (%s) was unexpected while uploading " + "signatures to server '%s:%d'.", + status_code, escaped(reason), conn->base_.address, + conn->base_.port); + break; + } + /* return 0 in all cases, since we don't want to mark any + * dirservers down just because they don't like us. */ + + return 0; +} + +/** + * Handler function: processes a response to a request for a v3 hidden service + * descriptor. + **/ +STATIC int +handle_response_fetch_hsdesc_v3(dir_connection_t *conn, + const response_handler_args_t *args) +{ + const int status_code = args->status_code; + const char *reason = args->reason; + const char *body = args->body; + const size_t body_len = args->body_len; + + tor_assert(conn->hs_ident); + + log_info(LD_REND,"Received v3 hsdesc (body size %d, status %d (%s))", + (int)body_len, status_code, escaped(reason)); + + switch (status_code) { + case 200: + /* We got something: Try storing it in the cache. */ + if (hs_cache_store_as_client(body, &conn->hs_ident->identity_pk) < 0) { + log_warn(LD_REND, "Failed to store hidden service descriptor"); + /* Fire control port FAILED event. */ + hs_control_desc_event_failed(conn->hs_ident, conn->identity_digest, + "BAD_DESC"); + hs_control_desc_event_content(conn->hs_ident, conn->identity_digest, + NULL); + } else { + log_info(LD_REND, "Stored hidden service descriptor successfully."); + TO_CONN(conn)->purpose = DIR_PURPOSE_HAS_FETCHED_HSDESC; + hs_client_desc_has_arrived(conn->hs_ident); + /* Fire control port RECEIVED event. */ + hs_control_desc_event_received(conn->hs_ident, conn->identity_digest); + hs_control_desc_event_content(conn->hs_ident, conn->identity_digest, + body); + } + break; + case 404: + /* Not there. We'll retry when connection_about_to_close_connection() + * tries to clean this conn up. */ + log_info(LD_REND, "Fetching hidden service v3 descriptor not found: " + "Retrying at another directory."); + /* Fire control port FAILED event. */ + hs_control_desc_event_failed(conn->hs_ident, conn->identity_digest, + "NOT_FOUND"); + hs_control_desc_event_content(conn->hs_ident, conn->identity_digest, + NULL); + break; + case 400: + log_warn(LD_REND, "Fetching v3 hidden service descriptor failed: " + "http status 400 (%s). Dirserver didn't like our " + "query? Retrying at another directory.", + escaped(reason)); + /* Fire control port FAILED event. */ + hs_control_desc_event_failed(conn->hs_ident, conn->identity_digest, + "QUERY_REJECTED"); + hs_control_desc_event_content(conn->hs_ident, conn->identity_digest, + NULL); + break; + default: + log_warn(LD_REND, "Fetching v3 hidden service descriptor failed: " + "http status %d (%s) response unexpected from HSDir server " + "'%s:%d'. Retrying at another directory.", + status_code, escaped(reason), TO_CONN(conn)->address, + TO_CONN(conn)->port); + /* Fire control port FAILED event. */ + hs_control_desc_event_failed(conn->hs_ident, conn->identity_digest, + "UNEXPECTED"); + hs_control_desc_event_content(conn->hs_ident, conn->identity_digest, + NULL); + break; + } + + return 0; +} + +/** + * Handler function: processes a response to a request for a v2 hidden service + * descriptor. + **/ +static int +handle_response_fetch_renddesc_v2(dir_connection_t *conn, + const response_handler_args_t *args) +{ + tor_assert(conn->base_.purpose == DIR_PURPOSE_FETCH_RENDDESC_V2); + const int status_code = args->status_code; + const char *reason = args->reason; + const char *body = args->body; + const size_t body_len = args->body_len; + +#define SEND_HS_DESC_FAILED_EVENT(reason) \ + (control_event_hsv2_descriptor_failed(conn->rend_data, \ + conn->identity_digest, \ + reason)) +#define SEND_HS_DESC_FAILED_CONTENT() \ + (control_event_hs_descriptor_content( \ + rend_data_get_address(conn->rend_data), \ + conn->requested_resource, \ + conn->identity_digest, \ + NULL)) + + tor_assert(conn->rend_data); + log_info(LD_REND,"Received rendezvous descriptor (body size %d, status %d " + "(%s))", + (int)body_len, status_code, escaped(reason)); + switch (status_code) { + case 200: + { + rend_cache_entry_t *entry = NULL; + + if (rend_cache_store_v2_desc_as_client(body, + conn->requested_resource, + conn->rend_data, &entry) < 0) { + log_warn(LD_REND,"Fetching v2 rendezvous descriptor failed. " + "Retrying at another directory."); + /* We'll retry when connection_about_to_close_connection() + * cleans this dir conn up. */ + SEND_HS_DESC_FAILED_EVENT("BAD_DESC"); + SEND_HS_DESC_FAILED_CONTENT(); + } else { + char service_id[REND_SERVICE_ID_LEN_BASE32 + 1]; + /* Should never be NULL here if we found the descriptor. */ + tor_assert(entry); + rend_get_service_id(entry->parsed->pk, service_id); + + /* success. notify pending connections about this. */ + log_info(LD_REND, "Successfully fetched v2 rendezvous " + "descriptor."); + control_event_hsv2_descriptor_received(service_id, + conn->rend_data, + conn->identity_digest); + control_event_hs_descriptor_content(service_id, + conn->requested_resource, + conn->identity_digest, + body); + conn->base_.purpose = DIR_PURPOSE_HAS_FETCHED_RENDDESC_V2; + rend_client_desc_trynow(service_id); + memwipe(service_id, 0, sizeof(service_id)); + } + break; + } + case 404: + /* Not there. We'll retry when + * connection_about_to_close_connection() cleans this conn up. */ + log_info(LD_REND,"Fetching v2 rendezvous descriptor failed: " + "Retrying at another directory."); + SEND_HS_DESC_FAILED_EVENT("NOT_FOUND"); + SEND_HS_DESC_FAILED_CONTENT(); + break; + case 400: + log_warn(LD_REND, "Fetching v2 rendezvous descriptor failed: " + "http status 400 (%s). Dirserver didn't like our " + "v2 rendezvous query? Retrying at another directory.", + escaped(reason)); + SEND_HS_DESC_FAILED_EVENT("QUERY_REJECTED"); + SEND_HS_DESC_FAILED_CONTENT(); + break; + default: + log_warn(LD_REND, "Fetching v2 rendezvous descriptor failed: " + "http status %d (%s) response unexpected while " + "fetching v2 hidden service descriptor (server '%s:%d'). " + "Retrying at another directory.", + status_code, escaped(reason), conn->base_.address, + conn->base_.port); + SEND_HS_DESC_FAILED_EVENT("UNEXPECTED"); + SEND_HS_DESC_FAILED_CONTENT(); + break; + } + + return 0; +} + +/** + * Handler function: processes a response to a POST request to upload a v2 + * hidden service descriptor. + **/ +static int +handle_response_upload_renddesc_v2(dir_connection_t *conn, + const response_handler_args_t *args) +{ + tor_assert(conn->base_.purpose == DIR_PURPOSE_UPLOAD_RENDDESC_V2); + const int status_code = args->status_code; + const char *reason = args->reason; + +#define SEND_HS_DESC_UPLOAD_FAILED_EVENT(reason) \ + (control_event_hs_descriptor_upload_failed( \ + conn->identity_digest, \ + rend_data_get_address(conn->rend_data), \ + reason)) + + log_info(LD_REND,"Uploaded rendezvous descriptor (status %d " + "(%s))", + status_code, escaped(reason)); + /* Without the rend data, we'll have a problem identifying what has been + * uploaded for which service. */ + tor_assert(conn->rend_data); + switch (status_code) { + case 200: + log_info(LD_REND, + "Uploading rendezvous descriptor: finished with status " + "200 (%s)", escaped(reason)); + control_event_hs_descriptor_uploaded(conn->identity_digest, + rend_data_get_address(conn->rend_data)); + rend_service_desc_has_uploaded(conn->rend_data); + break; + case 400: + log_warn(LD_REND,"http status 400 (%s) response from dirserver " + "'%s:%d'. Malformed rendezvous descriptor?", + escaped(reason), conn->base_.address, conn->base_.port); + SEND_HS_DESC_UPLOAD_FAILED_EVENT("UPLOAD_REJECTED"); + break; + default: + log_warn(LD_REND,"http status %d (%s) response unexpected (server " + "'%s:%d').", + status_code, escaped(reason), conn->base_.address, + conn->base_.port); + SEND_HS_DESC_UPLOAD_FAILED_EVENT("UNEXPECTED"); + break; + } + + return 0; +} + +/** + * Handler function: processes a response to a POST request to upload an + * hidden service descriptor. + **/ +static int +handle_response_upload_hsdesc(dir_connection_t *conn, + const response_handler_args_t *args) +{ + const int status_code = args->status_code; + const char *reason = args->reason; + + tor_assert(conn); + tor_assert(conn->base_.purpose == DIR_PURPOSE_UPLOAD_HSDESC); + + log_info(LD_REND, "Uploaded hidden service descriptor (status %d " + "(%s))", + status_code, escaped(reason)); + /* For this directory response, it MUST have an hidden service identifier on + * this connection. */ + tor_assert(conn->hs_ident); + switch (status_code) { + case 200: + log_info(LD_REND, "Uploading hidden service descriptor: " + "finished with status 200 (%s)", escaped(reason)); + hs_control_desc_event_uploaded(conn->hs_ident, conn->identity_digest); + break; + case 400: + log_fn(LOG_PROTOCOL_WARN, LD_REND, + "Uploading hidden service descriptor: http " + "status 400 (%s) response from dirserver " + "'%s:%d'. Malformed hidden service descriptor?", + escaped(reason), conn->base_.address, conn->base_.port); + hs_control_desc_event_failed(conn->hs_ident, conn->identity_digest, + "UPLOAD_REJECTED"); + break; + default: + log_warn(LD_REND, "Uploading hidden service descriptor: http " + "status %d (%s) response unexpected (server " + "'%s:%d').", + status_code, escaped(reason), conn->base_.address, + conn->base_.port); + hs_control_desc_event_failed(conn->hs_ident, conn->identity_digest, + "UNEXPECTED"); + break; + } + + return 0; +} + +/** Called when a directory connection reaches EOF. */ +int +connection_dir_reached_eof(dir_connection_t *conn) +{ + int retval; + if (conn->base_.state != DIR_CONN_STATE_CLIENT_READING) { + log_info(LD_HTTP,"conn reached eof, not reading. [state=%d] Closing.", + conn->base_.state); + connection_close_immediate(TO_CONN(conn)); /* error: give up on flushing */ + connection_mark_for_close(TO_CONN(conn)); + return -1; + } + + retval = connection_dir_client_reached_eof(conn); + if (retval == 0) /* success */ + conn->base_.state = DIR_CONN_STATE_CLIENT_FINISHED; + connection_mark_for_close(TO_CONN(conn)); + return retval; +} + +/** If any directory object is arriving, and it's over 10MB large, we're + * getting DoS'd. (As of 0.1.2.x, raw directories are about 1MB, and we never + * ask for more than 96 router descriptors at a time.) + */ +#define MAX_DIRECTORY_OBJECT_SIZE (10*(1<<20)) + +#define MAX_VOTE_DL_SIZE (MAX_DIRECTORY_OBJECT_SIZE * 5) + +/** Read handler for directory connections. (That's connections <em>to</em> + * directory servers and connections <em>at</em> directory servers.) + */ +int +connection_dir_process_inbuf(dir_connection_t *conn) +{ + size_t max_size; + tor_assert(conn); + tor_assert(conn->base_.type == CONN_TYPE_DIR); + + /* Directory clients write, then read data until they receive EOF; + * directory servers read data until they get an HTTP command, then + * write their response (when it's finished flushing, they mark for + * close). + */ + + /* If we're on the dirserver side, look for a command. */ + if (conn->base_.state == DIR_CONN_STATE_SERVER_COMMAND_WAIT) { + if (directory_handle_command(conn) < 0) { + connection_mark_for_close(TO_CONN(conn)); + return -1; + } + return 0; + } + + max_size = + (TO_CONN(conn)->purpose == DIR_PURPOSE_FETCH_STATUS_VOTE) ? + MAX_VOTE_DL_SIZE : MAX_DIRECTORY_OBJECT_SIZE; + + if (connection_get_inbuf_len(TO_CONN(conn)) > max_size) { + log_warn(LD_HTTP, + "Too much data received from directory connection (%s): " + "denial of service attempt, or you need to upgrade?", + conn->base_.address); + connection_mark_for_close(TO_CONN(conn)); + return -1; + } + + if (!conn->base_.inbuf_reached_eof) + log_debug(LD_HTTP,"Got data, not eof. Leaving on inbuf."); + return 0; +} + +/** We are closing a dir connection: If <b>dir_conn</b> is a dir connection + * that tried to fetch an HS descriptor, check if it successfully fetched it, + * or if we need to try again. */ +static void +refetch_hsdesc_if_needed(dir_connection_t *dir_conn) +{ + connection_t *conn = TO_CONN(dir_conn); + + /* If we were trying to fetch a v2 rend desc and did not succeed, retry as + * needed. (If a fetch is successful, the connection state is changed to + * DIR_PURPOSE_HAS_FETCHED_RENDDESC_V2 or DIR_PURPOSE_HAS_FETCHED_HSDESC to + * mark that refetching is unnecessary.) */ + if (conn->purpose == DIR_PURPOSE_FETCH_RENDDESC_V2 && + dir_conn->rend_data && + rend_valid_v2_service_id( + rend_data_get_address(dir_conn->rend_data))) { + rend_client_refetch_v2_renddesc(dir_conn->rend_data); + } + + /* Check for v3 rend desc fetch */ + if (conn->purpose == DIR_PURPOSE_FETCH_HSDESC && + dir_conn->hs_ident && + !ed25519_public_key_is_zero(&dir_conn->hs_ident->identity_pk)) { + hs_client_refetch_hsdesc(&dir_conn->hs_ident->identity_pk); + } +} + +/** Called when we're about to finally unlink and free a directory connection: + * perform necessary accounting and cleanup */ +void +connection_dir_about_to_close(dir_connection_t *dir_conn) +{ + connection_t *conn = TO_CONN(dir_conn); + + if (conn->state < DIR_CONN_STATE_CLIENT_FINISHED) { + /* It's a directory connection and connecting or fetching + * failed: forget about this router, and maybe try again. */ + connection_dir_request_failed(dir_conn); + } + + refetch_hsdesc_if_needed(dir_conn); +} + +/** Create an http response for the client <b>conn</b> out of + * <b>status</b> and <b>reason_phrase</b>. Write it to <b>conn</b>. + */ +static void +write_short_http_response(dir_connection_t *conn, int status, + const char *reason_phrase) +{ + char *buf = NULL; + char *datestring = NULL; + + IF_BUG_ONCE(!reason_phrase) { /* bullet-proofing */ + reason_phrase = "unspecified"; + } + + if (server_mode(get_options())) { + /* include the Date: header, but only if we're a relay or bridge */ + char datebuf[RFC1123_TIME_LEN+1]; + format_rfc1123_time(datebuf, time(NULL)); + tor_asprintf(&datestring, "Date: %s\r\n", datebuf); + } + + tor_asprintf(&buf, "HTTP/1.0 %d %s\r\n%s\r\n", + status, reason_phrase, datestring?datestring:""); + + log_debug(LD_DIRSERV,"Wrote status 'HTTP/1.0 %d %s'", status, reason_phrase); + connection_buf_add(buf, strlen(buf), TO_CONN(conn)); + + tor_free(datestring); + tor_free(buf); +} + +/** Write the header for an HTTP/1.0 response onto <b>conn</b>-\>outbuf, + * with <b>type</b> as the Content-Type. + * + * If <b>length</b> is nonnegative, it is the Content-Length. + * If <b>encoding</b> is provided, it is the Content-Encoding. + * If <b>cache_lifetime</b> is greater than 0, the content may be cached for + * up to cache_lifetime seconds. Otherwise, the content may not be cached. */ +static void +write_http_response_header_impl(dir_connection_t *conn, ssize_t length, + const char *type, const char *encoding, + const char *extra_headers, + long cache_lifetime) +{ + char date[RFC1123_TIME_LEN+1]; + time_t now = time(NULL); + buf_t *buf = buf_new_with_capacity(1024); + + tor_assert(conn); + + format_rfc1123_time(date, now); + + buf_add_printf(buf, "HTTP/1.0 200 OK\r\nDate: %s\r\n", date); + if (type) { + buf_add_printf(buf, "Content-Type: %s\r\n", type); + } + if (!is_local_addr(&conn->base_.addr)) { + /* Don't report the source address for a nearby/private connection. + * Otherwise we tend to mis-report in cases where incoming ports are + * being forwarded to a Tor server running behind the firewall. */ + buf_add_printf(buf, X_ADDRESS_HEADER "%s\r\n", conn->base_.address); + } + if (encoding) { + buf_add_printf(buf, "Content-Encoding: %s\r\n", encoding); + } + if (length >= 0) { + buf_add_printf(buf, "Content-Length: %ld\r\n", (long)length); + } + if (cache_lifetime > 0) { + char expbuf[RFC1123_TIME_LEN+1]; + format_rfc1123_time(expbuf, (time_t)(now + cache_lifetime)); + /* We could say 'Cache-control: max-age=%d' here if we start doing + * http/1.1 */ + buf_add_printf(buf, "Expires: %s\r\n", expbuf); + } else if (cache_lifetime == 0) { + /* We could say 'Cache-control: no-cache' here if we start doing + * http/1.1 */ + buf_add_string(buf, "Pragma: no-cache\r\n"); + } + if (extra_headers) { + buf_add_string(buf, extra_headers); + } + buf_add_string(buf, "\r\n"); + + connection_buf_add_buf(TO_CONN(conn), buf); + buf_free(buf); +} + +/** As write_http_response_header_impl, but sets encoding and content-typed + * based on whether the response will be <b>compressed</b> or not. */ +static void +write_http_response_headers(dir_connection_t *conn, ssize_t length, + compress_method_t method, + const char *extra_headers, long cache_lifetime) +{ + const char *methodname = compression_method_get_name(method); + const char *doctype; + if (method == NO_METHOD) + doctype = "text/plain"; + else + doctype = "application/octet-stream"; + write_http_response_header_impl(conn, length, + doctype, + methodname, + extra_headers, + cache_lifetime); +} + +/** As write_http_response_headers, but assumes extra_headers is NULL */ +static void +write_http_response_header(dir_connection_t *conn, ssize_t length, + compress_method_t method, + long cache_lifetime) +{ + write_http_response_headers(conn, length, method, NULL, cache_lifetime); +} + +/** Array of compression methods to use (if supported) for serving + * precompressed data, ordered from best to worst. */ +static compress_method_t srv_meth_pref_precompressed[] = { + LZMA_METHOD, + ZSTD_METHOD, + ZLIB_METHOD, + GZIP_METHOD, + NO_METHOD +}; + +/** Array of compression methods to use (if supported) for serving + * streamed data, ordered from best to worst. */ +static compress_method_t srv_meth_pref_streaming_compression[] = { + ZSTD_METHOD, + ZLIB_METHOD, + GZIP_METHOD, + NO_METHOD +}; + +/** Array of allowed compression methods to use (if supported) when receiving a + * response from a request that was required to be anonymous. */ +static compress_method_t client_meth_allowed_anonymous_compression[] = { + ZLIB_METHOD, + GZIP_METHOD, + NO_METHOD +}; + +/** Parse the compression methods listed in an Accept-Encoding header <b>h</b>, + * and convert them to a bitfield where compression method x is supported if + * and only if 1 << x is set in the bitfield. */ +STATIC unsigned +parse_accept_encoding_header(const char *h) +{ + unsigned result = (1u << NO_METHOD); + smartlist_t *methods = smartlist_new(); + smartlist_split_string(methods, h, ",", + SPLIT_SKIP_SPACE|SPLIT_STRIP_SPACE|SPLIT_IGNORE_BLANK, 0); + + SMARTLIST_FOREACH_BEGIN(methods, const char *, m) { + compress_method_t method = compression_method_get_by_name(m); + if (method != UNKNOWN_METHOD) { + tor_assert(((unsigned)method) < 8*sizeof(unsigned)); + result |= (1u << method); + } + } SMARTLIST_FOREACH_END(m); + SMARTLIST_FOREACH_BEGIN(methods, char *, m) { + tor_free(m); + } SMARTLIST_FOREACH_END(m); + smartlist_free(methods); + return result; +} + +/** Array of compression methods to use (if supported) for requesting + * compressed data, ordered from best to worst. */ +static compress_method_t client_meth_pref[] = { + LZMA_METHOD, + ZSTD_METHOD, + ZLIB_METHOD, + GZIP_METHOD, + NO_METHOD +}; + +/** Return a newly allocated string containing a comma separated list of + * supported encodings. */ +STATIC char * +accept_encoding_header(void) +{ + smartlist_t *methods = smartlist_new(); + char *header = NULL; + compress_method_t method; + unsigned i; + + for (i = 0; i < ARRAY_LENGTH(client_meth_pref); ++i) { + method = client_meth_pref[i]; + if (tor_compress_supports_method(method)) + smartlist_add(methods, (char *)compression_method_get_name(method)); + } + + header = smartlist_join_strings(methods, ", ", 0, NULL); + smartlist_free(methods); + + return header; +} + +/** Decide whether a client would accept the consensus we have. + * + * Clients can say they only want a consensus if it's signed by more + * than half the authorities in a list. They pass this list in + * the url as "...consensus/<b>fpr</b>+<b>fpr</b>+<b>fpr</b>". + * + * <b>fpr</b> may be an abbreviated fingerprint, i.e. only a left substring + * of the full authority identity digest. (Only strings of even length, + * i.e. encodings of full bytes, are handled correctly. In the case + * of an odd number of hex digits the last one is silently ignored.) + * + * Returns 1 if more than half of the requested authorities signed the + * consensus, 0 otherwise. + */ +int +client_likes_consensus(const struct consensus_cache_entry_t *ent, + const char *want_url) +{ + smartlist_t *voters = smartlist_new(); + int need_at_least; + int have = 0; + + if (consensus_cache_entry_get_voter_id_digests(ent, voters) != 0) { + smartlist_free(voters); + return 1; // We don't know the voters; assume the client won't mind. */ + } + + smartlist_t *want_authorities = smartlist_new(); + dir_split_resource_into_fingerprints(want_url, want_authorities, NULL, 0); + need_at_least = smartlist_len(want_authorities)/2+1; + + SMARTLIST_FOREACH_BEGIN(want_authorities, const char *, want_digest) { + + SMARTLIST_FOREACH_BEGIN(voters, const char *, digest) { + if (!strcasecmpstart(digest, want_digest)) { + have++; + break; + }; + } SMARTLIST_FOREACH_END(digest); + + /* early exit, if we already have enough */ + if (have >= need_at_least) + break; + } SMARTLIST_FOREACH_END(want_digest); + + SMARTLIST_FOREACH(want_authorities, char *, d, tor_free(d)); + smartlist_free(want_authorities); + SMARTLIST_FOREACH(voters, char *, cp, tor_free(cp)); + smartlist_free(voters); + return (have >= need_at_least); +} + +/** Return the compression level we should use for sending a compressed + * response of size <b>n_bytes</b>. */ +STATIC compression_level_t +choose_compression_level(ssize_t n_bytes) +{ + if (! have_been_under_memory_pressure()) { + return HIGH_COMPRESSION; /* we have plenty of RAM. */ + } else if (n_bytes < 0) { + return HIGH_COMPRESSION; /* unknown; might be big. */ + } else if (n_bytes < 1024) { + return LOW_COMPRESSION; + } else if (n_bytes < 2048) { + return MEDIUM_COMPRESSION; + } else { + return HIGH_COMPRESSION; + } +} + +/** Information passed to handle a GET request. */ +typedef struct get_handler_args_t { + /** Bitmask of compression methods that the client said (or implied) it + * supported. */ + unsigned compression_supported; + /** If nonzero, the time included an if-modified-since header with this + * value. */ + time_t if_modified_since; + /** String containing the requested URL or resource. */ + const char *url; + /** String containing the HTTP headers */ + const char *headers; +} get_handler_args_t; + +/** Entry for handling an HTTP GET request. + * + * This entry matches a request if "string" is equal to the requested + * resource, or if "is_prefix" is true and "string" is a prefix of the + * requested resource. + * + * The 'handler' function is called to handle the request. It receives + * an arguments structure, and must return 0 on success or -1 if we should + * close the connection. + **/ +typedef struct url_table_ent_s { + const char *string; + int is_prefix; + int (*handler)(dir_connection_t *conn, const get_handler_args_t *args); +} url_table_ent_t; + +static int handle_get_frontpage(dir_connection_t *conn, + const get_handler_args_t *args); +static int handle_get_current_consensus(dir_connection_t *conn, + const get_handler_args_t *args); +static int handle_get_status_vote(dir_connection_t *conn, + const get_handler_args_t *args); +static int handle_get_microdesc(dir_connection_t *conn, + const get_handler_args_t *args); +static int handle_get_descriptor(dir_connection_t *conn, + const get_handler_args_t *args); +static int handle_get_keys(dir_connection_t *conn, + const get_handler_args_t *args); +static int handle_get_hs_descriptor_v2(dir_connection_t *conn, + const get_handler_args_t *args); +static int handle_get_robots(dir_connection_t *conn, + const get_handler_args_t *args); +static int handle_get_networkstatus_bridges(dir_connection_t *conn, + const get_handler_args_t *args); + +/** Table for handling GET requests. */ +static const url_table_ent_t url_table[] = { + { "/tor/", 0, handle_get_frontpage }, + { "/tor/status-vote/current/consensus", 1, handle_get_current_consensus }, + { "/tor/status-vote/current/", 1, handle_get_status_vote }, + { "/tor/status-vote/next/", 1, handle_get_status_vote }, + { "/tor/micro/d/", 1, handle_get_microdesc }, + { "/tor/server/", 1, handle_get_descriptor }, + { "/tor/extra/", 1, handle_get_descriptor }, + { "/tor/keys/", 1, handle_get_keys }, + { "/tor/rendezvous2/", 1, handle_get_hs_descriptor_v2 }, + { "/tor/hs/3/", 1, handle_get_hs_descriptor_v3 }, + { "/tor/robots.txt", 0, handle_get_robots }, + { "/tor/networkstatus-bridges", 0, handle_get_networkstatus_bridges }, + { NULL, 0, NULL }, +}; + +/** Helper function: called when a dirserver gets a complete HTTP GET + * request. Look for a request for a directory or for a rendezvous + * service descriptor. On finding one, write a response into + * conn-\>outbuf. If the request is unrecognized, send a 404. + * Return 0 if we handled this successfully, or -1 if we need to close + * the connection. */ +MOCK_IMPL(STATIC int, +directory_handle_command_get,(dir_connection_t *conn, const char *headers, + const char *req_body, size_t req_body_len)) +{ + char *url, *url_mem, *header; + time_t if_modified_since = 0; + int zlib_compressed_in_url; + unsigned compression_methods_supported; + + /* We ignore the body of a GET request. */ + (void)req_body; + (void)req_body_len; + + log_debug(LD_DIRSERV,"Received GET command."); + + conn->base_.state = DIR_CONN_STATE_SERVER_WRITING; + + if (parse_http_url(headers, &url) < 0) { + write_short_http_response(conn, 400, "Bad request"); + return 0; + } + if ((header = http_get_header(headers, "If-Modified-Since: "))) { + struct tm tm; + if (parse_http_time(header, &tm) == 0) { + if (tor_timegm(&tm, &if_modified_since)<0) { + if_modified_since = 0; + } else { + log_debug(LD_DIRSERV, "If-Modified-Since is '%s'.", escaped(header)); + } + } + /* The correct behavior on a malformed If-Modified-Since header is to + * act as if no If-Modified-Since header had been given. */ + tor_free(header); + } + log_debug(LD_DIRSERV,"rewritten url as '%s'.", escaped(url)); + + url_mem = url; + { + size_t url_len = strlen(url); + + zlib_compressed_in_url = url_len > 2 && !strcmp(url+url_len-2, ".z"); + if (zlib_compressed_in_url) { + url[url_len-2] = '\0'; + } + } + + if ((header = http_get_header(headers, "Accept-Encoding: "))) { + compression_methods_supported = parse_accept_encoding_header(header); + tor_free(header); + } else { + compression_methods_supported = (1u << NO_METHOD); + } + if (zlib_compressed_in_url) { + compression_methods_supported |= (1u << ZLIB_METHOD); + } + + /* Remove all methods that we don't both support. */ + compression_methods_supported &= tor_compress_get_supported_method_bitmask(); + + get_handler_args_t args; + args.url = url; + args.headers = headers; + args.if_modified_since = if_modified_since; + args.compression_supported = compression_methods_supported; + + int i, result = -1; + for (i = 0; url_table[i].string; ++i) { + int match; + if (url_table[i].is_prefix) { + match = !strcmpstart(url, url_table[i].string); + } else { + match = !strcmp(url, url_table[i].string); + } + if (match) { + result = url_table[i].handler(conn, &args); + goto done; + } + } + + /* we didn't recognize the url */ + write_short_http_response(conn, 404, "Not found"); + result = 0; + + done: + tor_free(url_mem); + return result; +} + +/** Helper function for GET / or GET /tor/ + */ +static int +handle_get_frontpage(dir_connection_t *conn, const get_handler_args_t *args) +{ + (void) args; /* unused */ + const char *frontpage = get_dirportfrontpage(); + + if (frontpage) { + size_t dlen; + dlen = strlen(frontpage); + /* Let's return a disclaimer page (users shouldn't use V1 anymore, + and caches don't fetch '/', so this is safe). */ + + /* [We don't check for write_bucket_low here, since we want to serve + * this page no matter what.] */ + write_http_response_header_impl(conn, dlen, "text/html", "identity", + NULL, DIRPORTFRONTPAGE_CACHE_LIFETIME); + connection_buf_add(frontpage, dlen, TO_CONN(conn)); + } else { + write_short_http_response(conn, 404, "Not found"); + } + return 0; +} + +/** Warn that the cached consensus <b>consensus</b> of type + * <b>flavor</b> is too old and will not be served to clients. Rate-limit the + * warning to avoid logging an entry on every request. + */ +static void +warn_consensus_is_too_old(const struct consensus_cache_entry_t *consensus, + const char *flavor, time_t now) +{ +#define TOO_OLD_WARNING_INTERVAL (60*60) + static ratelim_t warned = RATELIM_INIT(TOO_OLD_WARNING_INTERVAL); + char timestamp[ISO_TIME_LEN+1]; + time_t valid_until; + char *dupes; + + if (consensus_cache_entry_get_valid_until(consensus, &valid_until)) + return; + + if ((dupes = rate_limit_log(&warned, now))) { + format_local_iso_time(timestamp, valid_until); + log_warn(LD_DIRSERV, "Our %s%sconsensus is too old, so we will not " + "serve it to clients. It was valid until %s local time and we " + "continued to serve it for up to 24 hours after it expired.%s", + flavor ? flavor : "", flavor ? " " : "", timestamp, dupes); + tor_free(dupes); + } +} + +/** + * Parse a single hex-encoded sha3-256 digest from <b>hex</b> into + * <b>digest</b>. Return 0 on success. On failure, report that the hash came + * from <b>location</b>, report that we are taking <b>action</b> with it, and + * return -1. + */ +static int +parse_one_diff_hash(uint8_t *digest, const char *hex, const char *location, + const char *action) +{ + if (base16_decode((char*)digest, DIGEST256_LEN, hex, strlen(hex)) == + DIGEST256_LEN) { + return 0; + } else { + log_fn(LOG_PROTOCOL_WARN, LD_DIR, + "%s contained bogus digest %s; %s.", + location, escaped(hex), action); + return -1; + } +} + +/** If there is an X-Or-Diff-From-Consensus header included in <b>headers</b>, + * set <b>digest_out<b> to a new smartlist containing every 256-bit + * hex-encoded digest listed in that header and return 0. Otherwise return + * -1. */ +static int +parse_or_diff_from_header(smartlist_t **digests_out, const char *headers) +{ + char *hdr = http_get_header(headers, X_OR_DIFF_FROM_CONSENSUS_HEADER); + if (hdr == NULL) { + return -1; + } + smartlist_t *hex_digests = smartlist_new(); + *digests_out = smartlist_new(); + smartlist_split_string(hex_digests, hdr, " ", + SPLIT_SKIP_SPACE|SPLIT_IGNORE_BLANK, -1); + SMARTLIST_FOREACH_BEGIN(hex_digests, const char *, hex) { + uint8_t digest[DIGEST256_LEN]; + if (!parse_one_diff_hash(digest, hex, "X-Or-Diff-From-Consensus header", + "ignoring")) { + smartlist_add(*digests_out, tor_memdup(digest, sizeof(digest))); + } + } SMARTLIST_FOREACH_END(hex); + SMARTLIST_FOREACH(hex_digests, char *, cp, tor_free(cp)); + smartlist_free(hex_digests); + tor_free(hdr); + return 0; +} + +/** Fallback compression method. The fallback compression method is used in + * case a client requests a non-compressed document. We only store compressed + * documents, so we use this compression method to fetch the document and let + * the spooling system do the streaming decompression. + */ +#define FALLBACK_COMPRESS_METHOD ZLIB_METHOD + +/** + * Try to find the best consensus diff possible in order to serve a client + * request for a diff from one of the consensuses in <b>digests</b> to the + * current consensus of flavor <b>flav</b>. The client supports the + * compression methods listed in the <b>compression_methods</b> bitfield: + * place the method chosen (if any) into <b>compression_used_out</b>. + */ +static struct consensus_cache_entry_t * +find_best_diff(const smartlist_t *digests, int flav, + unsigned compression_methods, + compress_method_t *compression_used_out) +{ + struct consensus_cache_entry_t *result = NULL; + + SMARTLIST_FOREACH_BEGIN(digests, const uint8_t *, diff_from) { + unsigned u; + for (u = 0; u < ARRAY_LENGTH(srv_meth_pref_precompressed); ++u) { + compress_method_t method = srv_meth_pref_precompressed[u]; + if (0 == (compression_methods & (1u<<method))) + continue; // client doesn't like this one, or we don't have it. + if (consdiffmgr_find_diff_from(&result, flav, DIGEST_SHA3_256, + diff_from, DIGEST256_LEN, + method) == CONSDIFF_AVAILABLE) { + tor_assert_nonfatal(result); + *compression_used_out = method; + return result; + } + } + } SMARTLIST_FOREACH_END(diff_from); + + SMARTLIST_FOREACH_BEGIN(digests, const uint8_t *, diff_from) { + if (consdiffmgr_find_diff_from(&result, flav, DIGEST_SHA3_256, diff_from, + DIGEST256_LEN, FALLBACK_COMPRESS_METHOD) == CONSDIFF_AVAILABLE) { + tor_assert_nonfatal(result); + *compression_used_out = FALLBACK_COMPRESS_METHOD; + return result; + } + } SMARTLIST_FOREACH_END(diff_from); + + return NULL; +} + +/** Lookup the cached consensus document by the flavor found in <b>flav</b>. + * The preferred set of compression methods should be listed in the + * <b>compression_methods</b> bitfield. The compression method chosen (if any) + * is stored in <b>compression_used_out</b>. */ +static struct consensus_cache_entry_t * +find_best_consensus(int flav, + unsigned compression_methods, + compress_method_t *compression_used_out) +{ + struct consensus_cache_entry_t *result = NULL; + unsigned u; + + for (u = 0; u < ARRAY_LENGTH(srv_meth_pref_precompressed); ++u) { + compress_method_t method = srv_meth_pref_precompressed[u]; + + if (0 == (compression_methods & (1u<<method))) + continue; + + if (consdiffmgr_find_consensus(&result, flav, + method) == CONSDIFF_AVAILABLE) { + tor_assert_nonfatal(result); + *compression_used_out = method; + return result; + } + } + + if (consdiffmgr_find_consensus(&result, flav, + FALLBACK_COMPRESS_METHOD) == CONSDIFF_AVAILABLE) { + tor_assert_nonfatal(result); + *compression_used_out = FALLBACK_COMPRESS_METHOD; + return result; + } + + return NULL; +} + +/** Try to find the best supported compression method possible from a given + * <b>compression_methods</b>. Return NO_METHOD if no mutually supported + * compression method could be found. */ +static compress_method_t +find_best_compression_method(unsigned compression_methods, int stream) +{ + unsigned u; + compress_method_t *methods; + size_t length; + + if (stream) { + methods = srv_meth_pref_streaming_compression; + length = ARRAY_LENGTH(srv_meth_pref_streaming_compression); + } else { + methods = srv_meth_pref_precompressed; + length = ARRAY_LENGTH(srv_meth_pref_precompressed); + } + + for (u = 0; u < length; ++u) { + compress_method_t method = methods[u]; + if (compression_methods & (1u<<method)) + return method; + } + + return NO_METHOD; +} + +/** Check if any of the digests in <b>digests</b> matches the latest consensus + * flavor (given in <b>flavor</b>) that we have available. */ +static int +digest_list_contains_best_consensus(consensus_flavor_t flavor, + const smartlist_t *digests) +{ + const networkstatus_t *ns = NULL; + + if (digests == NULL) + return 0; + + ns = networkstatus_get_latest_consensus_by_flavor(flavor); + + if (ns == NULL) + return 0; + + SMARTLIST_FOREACH_BEGIN(digests, const uint8_t *, digest) { + if (tor_memeq(ns->digest_sha3_as_signed, digest, DIGEST256_LEN)) + return 1; + } SMARTLIST_FOREACH_END(digest); + + return 0; +} + +/** Check if the given compression method is allowed for a connection that is + * supposed to be anonymous. Returns 1 if the compression method is allowed, + * otherwise 0. */ +STATIC int +allowed_anonymous_connection_compression_method(compress_method_t method) +{ + unsigned u; + + for (u = 0; u < ARRAY_LENGTH(client_meth_allowed_anonymous_compression); + ++u) { + compress_method_t allowed_method = + client_meth_allowed_anonymous_compression[u]; + + if (! tor_compress_supports_method(allowed_method)) + continue; + + if (method == allowed_method) + return 1; + } + + return 0; +} + +/** Log a warning when a remote server has sent us a document using a + * compression method that is not allowed for anonymous directory requests. */ +STATIC void +warn_disallowed_anonymous_compression_method(compress_method_t method) +{ + log_fn(LOG_PROTOCOL_WARN, LD_HTTP, + "Received a %s HTTP response, which is not " + "allowed for anonymous directory requests.", + compression_method_get_human_name(method)); +} + +/** Encodes the results of parsing a consensus request to figure out what + * consensus, and possibly what diffs, the user asked for. */ +typedef struct { + /** name of the flavor to retrieve. */ + char *flavor; + /** flavor to retrive, as enum. */ + consensus_flavor_t flav; + /** plus-separated list of authority fingerprints; see + * client_likes_consensus(). Aliases the URL in the request passed to + * parse_consensus_request(). */ + const char *want_fps; + /** Optionally, a smartlist of sha3 digests-as-signed of the consensuses + * to return a diff from. */ + smartlist_t *diff_from_digests; + /** If true, never send a full consensus. If there is no diff, send + * a 404 instead. */ + int diff_only; +} parsed_consensus_request_t; + +/** Remove all data held in <b>req</b>. Do not free <b>req</b> itself, since + * it is stack-allocated. */ +static void +parsed_consensus_request_clear(parsed_consensus_request_t *req) +{ + if (!req) + return; + tor_free(req->flavor); + if (req->diff_from_digests) { + SMARTLIST_FOREACH(req->diff_from_digests, uint8_t *, d, tor_free(d)); + smartlist_free(req->diff_from_digests); + } + memset(req, 0, sizeof(parsed_consensus_request_t)); +} + +/** + * Parse the URL and relevant headers of <b>args</b> for a current-consensus + * request to learn what flavor of consensus we want, what keys it must be + * signed with, and what diffs we would accept (or demand) instead. Return 0 + * on success and -1 on failure. + */ +static int +parse_consensus_request(parsed_consensus_request_t *out, + const get_handler_args_t *args) +{ + const char *url = args->url; + memset(out, 0, sizeof(parsed_consensus_request_t)); + out->flav = FLAV_NS; + + const char CONSENSUS_URL_PREFIX[] = "/tor/status-vote/current/consensus/"; + const char CONSENSUS_FLAVORED_PREFIX[] = + "/tor/status-vote/current/consensus-"; + + /* figure out the flavor if any, and who we wanted to sign the thing */ + const char *after_flavor = NULL; + + if (!strcmpstart(url, CONSENSUS_FLAVORED_PREFIX)) { + const char *f, *cp; + f = url + strlen(CONSENSUS_FLAVORED_PREFIX); + cp = strchr(f, '/'); + if (cp) { + after_flavor = cp+1; + out->flavor = tor_strndup(f, cp-f); + } else { + out->flavor = tor_strdup(f); + } + int flav = networkstatus_parse_flavor_name(out->flavor); + if (flav < 0) + flav = FLAV_NS; + out->flav = flav; + } else { + if (!strcmpstart(url, CONSENSUS_URL_PREFIX)) + after_flavor = url+strlen(CONSENSUS_URL_PREFIX); + } + + /* see whether we've been asked explicitly for a diff from an older + * consensus. (The user might also have said that a diff would be okay, + * via X-Or-Diff-From-Consensus */ + const char DIFF_COMPONENT[] = "diff/"; + char *diff_hash_in_url = NULL; + if (after_flavor && !strcmpstart(after_flavor, DIFF_COMPONENT)) { + after_flavor += strlen(DIFF_COMPONENT); + const char *cp = strchr(after_flavor, '/'); + if (cp) { + diff_hash_in_url = tor_strndup(after_flavor, cp-after_flavor); + out->want_fps = cp+1; + } else { + diff_hash_in_url = tor_strdup(after_flavor); + out->want_fps = NULL; + } + } else { + out->want_fps = after_flavor; + } + + if (diff_hash_in_url) { + uint8_t diff_from[DIGEST256_LEN]; + out->diff_from_digests = smartlist_new(); + out->diff_only = 1; + int ok = !parse_one_diff_hash(diff_from, diff_hash_in_url, "URL", + "rejecting"); + tor_free(diff_hash_in_url); + if (ok) { + smartlist_add(out->diff_from_digests, + tor_memdup(diff_from, DIGEST256_LEN)); + } else { + return -1; + } + } else { + parse_or_diff_from_header(&out->diff_from_digests, args->headers); + } + + return 0; +} + +/** Helper function for GET /tor/status-vote/current/consensus + */ +static int +handle_get_current_consensus(dir_connection_t *conn, + const get_handler_args_t *args) +{ + const compress_method_t compress_method = + find_best_compression_method(args->compression_supported, 0); + const time_t if_modified_since = args->if_modified_since; + int clear_spool = 0; + + /* v3 network status fetch. */ + long lifetime = NETWORKSTATUS_CACHE_LIFETIME; + + time_t now = time(NULL); + parsed_consensus_request_t req; + + if (parse_consensus_request(&req, args) < 0) { + write_short_http_response(conn, 404, "Couldn't parse request"); + goto done; + } + + if (digest_list_contains_best_consensus(req.flav, + req.diff_from_digests)) { + write_short_http_response(conn, 304, "Not modified"); + geoip_note_ns_response(GEOIP_REJECT_NOT_MODIFIED); + goto done; + } + + struct consensus_cache_entry_t *cached_consensus = NULL; + + compress_method_t compression_used = NO_METHOD; + if (req.diff_from_digests) { + cached_consensus = find_best_diff(req.diff_from_digests, req.flav, + args->compression_supported, + &compression_used); + } + + if (req.diff_only && !cached_consensus) { + write_short_http_response(conn, 404, "No such diff available"); + // XXXX warn_consensus_is_too_old(v, req.flavor, now); + geoip_note_ns_response(GEOIP_REJECT_NOT_FOUND); + goto done; + } + + if (! cached_consensus) { + cached_consensus = find_best_consensus(req.flav, + args->compression_supported, + &compression_used); + } + + time_t fresh_until, valid_until; + int have_fresh_until = 0, have_valid_until = 0; + if (cached_consensus) { + have_fresh_until = + !consensus_cache_entry_get_fresh_until(cached_consensus, &fresh_until); + have_valid_until = + !consensus_cache_entry_get_valid_until(cached_consensus, &valid_until); + } + + if (cached_consensus && have_valid_until && + !networkstatus_valid_until_is_reasonably_live(valid_until, now)) { + write_short_http_response(conn, 404, "Consensus is too old"); + warn_consensus_is_too_old(cached_consensus, req.flavor, now); + geoip_note_ns_response(GEOIP_REJECT_NOT_FOUND); + goto done; + } + + if (cached_consensus && req.want_fps && + !client_likes_consensus(cached_consensus, req.want_fps)) { + write_short_http_response(conn, 404, "Consensus not signed by sufficient " + "number of requested authorities"); + geoip_note_ns_response(GEOIP_REJECT_NOT_ENOUGH_SIGS); + goto done; + } + + conn->spool = smartlist_new(); + clear_spool = 1; + { + spooled_resource_t *spooled; + if (cached_consensus) { + spooled = spooled_resource_new_from_cache_entry(cached_consensus); + smartlist_add(conn->spool, spooled); + } + } + + lifetime = (have_fresh_until && fresh_until > now) ? fresh_until - now : 0; + + size_t size_guess = 0; + int n_expired = 0; + dirserv_spool_remove_missing_and_guess_size(conn, if_modified_since, + compress_method != NO_METHOD, + &size_guess, + &n_expired); + + if (!smartlist_len(conn->spool) && !n_expired) { + write_short_http_response(conn, 404, "Not found"); + geoip_note_ns_response(GEOIP_REJECT_NOT_FOUND); + goto done; + } else if (!smartlist_len(conn->spool)) { + write_short_http_response(conn, 304, "Not modified"); + geoip_note_ns_response(GEOIP_REJECT_NOT_MODIFIED); + goto done; + } + + if (global_write_bucket_low(TO_CONN(conn), size_guess, 2)) { + log_debug(LD_DIRSERV, + "Client asked for network status lists, but we've been " + "writing too many bytes lately. Sending 503 Dir busy."); + write_short_http_response(conn, 503, "Directory busy, try again later"); + geoip_note_ns_response(GEOIP_REJECT_BUSY); + goto done; + } + + tor_addr_t addr; + if (tor_addr_parse(&addr, (TO_CONN(conn))->address) >= 0) { + geoip_note_client_seen(GEOIP_CLIENT_NETWORKSTATUS, + &addr, NULL, + time(NULL)); + geoip_note_ns_response(GEOIP_SUCCESS); + /* Note that a request for a network status has started, so that we + * can measure the download time later on. */ + if (conn->dirreq_id) + geoip_start_dirreq(conn->dirreq_id, size_guess, DIRREQ_TUNNELED); + else + geoip_start_dirreq(TO_CONN(conn)->global_identifier, size_guess, + DIRREQ_DIRECT); + } + + /* Use this header to tell caches that the response depends on the + * X-Or-Diff-From-Consensus header (or lack thereof). */ + const char vary_header[] = "Vary: X-Or-Diff-From-Consensus\r\n"; + + clear_spool = 0; + + // The compress_method might have been NO_METHOD, but we store the data + // compressed. Decompress them using `compression_used`. See fallback code in + // find_best_consensus() and find_best_diff(). + write_http_response_headers(conn, -1, + compress_method == NO_METHOD ? + NO_METHOD : compression_used, + vary_header, + smartlist_len(conn->spool) == 1 ? lifetime : 0); + + if (compress_method == NO_METHOD && smartlist_len(conn->spool)) + conn->compress_state = tor_compress_new(0, compression_used, + HIGH_COMPRESSION); + + /* Prime the connection with some data. */ + const int initial_flush_result = connection_dirserv_flushed_some(conn); + tor_assert_nonfatal(initial_flush_result == 0); + goto done; + + done: + parsed_consensus_request_clear(&req); + if (clear_spool) { + dir_conn_clear_spool(conn); + } + return 0; +} + +/** Helper function for GET /tor/status-vote/{current,next}/... + */ +static int +handle_get_status_vote(dir_connection_t *conn, const get_handler_args_t *args) +{ + const char *url = args->url; + { + ssize_t body_len = 0; + ssize_t estimated_len = 0; + int lifetime = 60; /* XXXX?? should actually use vote intervals. */ + /* This smartlist holds strings that we can compress on the fly. */ + smartlist_t *items = smartlist_new(); + /* This smartlist holds cached_dir_t objects that have a precompressed + * deflated version. */ + smartlist_t *dir_items = smartlist_new(); + dirvote_dirreq_get_status_vote(url, items, dir_items); + if (!smartlist_len(dir_items) && !smartlist_len(items)) { + write_short_http_response(conn, 404, "Not found"); + goto vote_done; + } + + /* We're sending items from at most one kind of source */ + tor_assert_nonfatal(smartlist_len(items) == 0 || + smartlist_len(dir_items) == 0); + + int streaming; + unsigned mask; + if (smartlist_len(items)) { + /* We're taking strings and compressing them on the fly. */ + streaming = 1; + mask = ~0u; + } else { + /* We're taking cached_dir_t objects. We only have them uncompressed + * or deflated. */ + streaming = 0; + mask = (1u<<NO_METHOD) | (1u<<ZLIB_METHOD); + } + const compress_method_t compress_method = find_best_compression_method( + args->compression_supported&mask, streaming); + + SMARTLIST_FOREACH(dir_items, cached_dir_t *, d, + body_len += compress_method != NO_METHOD ? + d->dir_compressed_len : d->dir_len); + estimated_len += body_len; + SMARTLIST_FOREACH(items, const char *, item, { + size_t ln = strlen(item); + if (compress_method != NO_METHOD) { + estimated_len += ln/2; + } else { + body_len += ln; estimated_len += ln; + } + }); + + if (global_write_bucket_low(TO_CONN(conn), estimated_len, 2)) { + write_short_http_response(conn, 503, "Directory busy, try again later"); + goto vote_done; + } + write_http_response_header(conn, body_len ? body_len : -1, + compress_method, + lifetime); + + if (smartlist_len(items)) { + if (compress_method != NO_METHOD) { + conn->compress_state = tor_compress_new(1, compress_method, + choose_compression_level(estimated_len)); + SMARTLIST_FOREACH(items, const char *, c, + connection_buf_add_compress(c, strlen(c), conn, 0)); + connection_buf_add_compress("", 0, conn, 1); + } else { + SMARTLIST_FOREACH(items, const char *, c, + connection_buf_add(c, strlen(c), TO_CONN(conn))); + } + } else { + SMARTLIST_FOREACH(dir_items, cached_dir_t *, d, + connection_buf_add(compress_method != NO_METHOD ? + d->dir_compressed : d->dir, + compress_method != NO_METHOD ? + d->dir_compressed_len : d->dir_len, + TO_CONN(conn))); + } + vote_done: + smartlist_free(items); + smartlist_free(dir_items); + goto done; + } + done: + return 0; +} + +/** Helper function for GET /tor/micro/d/... + */ +static int +handle_get_microdesc(dir_connection_t *conn, const get_handler_args_t *args) +{ + const char *url = args->url; + const compress_method_t compress_method = + find_best_compression_method(args->compression_supported, 1); + int clear_spool = 1; + { + conn->spool = smartlist_new(); + + dir_split_resource_into_spoolable(url+strlen("/tor/micro/d/"), + DIR_SPOOL_MICRODESC, + conn->spool, NULL, + DSR_DIGEST256|DSR_BASE64|DSR_SORT_UNIQ); + + size_t size_guess = 0; + dirserv_spool_remove_missing_and_guess_size(conn, 0, + compress_method != NO_METHOD, + &size_guess, NULL); + if (smartlist_len(conn->spool) == 0) { + write_short_http_response(conn, 404, "Not found"); + goto done; + } + if (global_write_bucket_low(TO_CONN(conn), size_guess, 2)) { + log_info(LD_DIRSERV, + "Client asked for server descriptors, but we've been " + "writing too many bytes lately. Sending 503 Dir busy."); + write_short_http_response(conn, 503, "Directory busy, try again later"); + goto done; + } + + clear_spool = 0; + write_http_response_header(conn, -1, + compress_method, + MICRODESC_CACHE_LIFETIME); + + if (compress_method != NO_METHOD) + conn->compress_state = tor_compress_new(1, compress_method, + choose_compression_level(size_guess)); + + const int initial_flush_result = connection_dirserv_flushed_some(conn); + tor_assert_nonfatal(initial_flush_result == 0); + goto done; + } + + done: + if (clear_spool) { + dir_conn_clear_spool(conn); + } + return 0; +} + +/** Helper function for GET /tor/{server,extra}/... + */ +static int +handle_get_descriptor(dir_connection_t *conn, const get_handler_args_t *args) +{ + const char *url = args->url; + const compress_method_t compress_method = + find_best_compression_method(args->compression_supported, 1); + const or_options_t *options = get_options(); + int clear_spool = 1; + if (!strcmpstart(url,"/tor/server/") || + (!options->BridgeAuthoritativeDir && + !options->BridgeRelay && !strcmpstart(url,"/tor/extra/"))) { + int res; + const char *msg = NULL; + int cache_lifetime = 0; + int is_extra = !strcmpstart(url,"/tor/extra/"); + url += is_extra ? strlen("/tor/extra/") : strlen("/tor/server/"); + dir_spool_source_t source; + time_t publish_cutoff = 0; + if (!strcmpstart(url, "d/")) { + source = + is_extra ? DIR_SPOOL_EXTRA_BY_DIGEST : DIR_SPOOL_SERVER_BY_DIGEST; + } else { + source = + is_extra ? DIR_SPOOL_EXTRA_BY_FP : DIR_SPOOL_SERVER_BY_FP; + /* We only want to apply a publish cutoff when we're requesting + * resources by fingerprint. */ + publish_cutoff = time(NULL) - ROUTER_MAX_AGE_TO_PUBLISH; + } + + conn->spool = smartlist_new(); + res = dirserv_get_routerdesc_spool(conn->spool, url, + source, + connection_dir_is_encrypted(conn), + &msg); + + if (!strcmpstart(url, "all")) { + cache_lifetime = FULL_DIR_CACHE_LIFETIME; + } else if (smartlist_len(conn->spool) == 1) { + cache_lifetime = ROUTERDESC_BY_DIGEST_CACHE_LIFETIME; + } + + size_t size_guess = 0; + int n_expired = 0; + dirserv_spool_remove_missing_and_guess_size(conn, publish_cutoff, + compress_method != NO_METHOD, + &size_guess, &n_expired); + + /* If we are the bridge authority and the descriptor is a bridge + * descriptor, remember that we served this descriptor for desc stats. */ + /* XXXX it's a bit of a kludge to have this here. */ + if (get_options()->BridgeAuthoritativeDir && + source == DIR_SPOOL_SERVER_BY_FP) { + SMARTLIST_FOREACH_BEGIN(conn->spool, spooled_resource_t *, spooled) { + const routerinfo_t *router = + router_get_by_id_digest((const char *)spooled->digest); + /* router can be NULL here when the bridge auth is asked for its own + * descriptor. */ + if (router && router->purpose == ROUTER_PURPOSE_BRIDGE) + rep_hist_note_desc_served(router->cache_info.identity_digest); + } SMARTLIST_FOREACH_END(spooled); + } + + if (res < 0 || size_guess == 0 || smartlist_len(conn->spool) == 0) { + if (msg == NULL) + msg = "Not found"; + write_short_http_response(conn, 404, msg); + } else { + if (global_write_bucket_low(TO_CONN(conn), size_guess, 2)) { + log_info(LD_DIRSERV, + "Client asked for server descriptors, but we've been " + "writing too many bytes lately. Sending 503 Dir busy."); + write_short_http_response(conn, 503, + "Directory busy, try again later"); + dir_conn_clear_spool(conn); + goto done; + } + write_http_response_header(conn, -1, compress_method, cache_lifetime); + if (compress_method != NO_METHOD) + conn->compress_state = tor_compress_new(1, compress_method, + choose_compression_level(size_guess)); + clear_spool = 0; + /* Prime the connection with some data. */ + int initial_flush_result = connection_dirserv_flushed_some(conn); + tor_assert_nonfatal(initial_flush_result == 0); + } + goto done; + } + done: + if (clear_spool) + dir_conn_clear_spool(conn); + return 0; +} + +/** Helper function for GET /tor/keys/... + */ +static int +handle_get_keys(dir_connection_t *conn, const get_handler_args_t *args) +{ + const char *url = args->url; + const compress_method_t compress_method = + find_best_compression_method(args->compression_supported, 1); + const time_t if_modified_since = args->if_modified_since; + { + smartlist_t *certs = smartlist_new(); + ssize_t len = -1; + if (!strcmp(url, "/tor/keys/all")) { + authority_cert_get_all(certs); + } else if (!strcmp(url, "/tor/keys/authority")) { + authority_cert_t *cert = get_my_v3_authority_cert(); + if (cert) + smartlist_add(certs, cert); + } else if (!strcmpstart(url, "/tor/keys/fp/")) { + smartlist_t *fps = smartlist_new(); + dir_split_resource_into_fingerprints(url+strlen("/tor/keys/fp/"), + fps, NULL, + DSR_HEX|DSR_SORT_UNIQ); + SMARTLIST_FOREACH(fps, char *, d, { + authority_cert_t *c = authority_cert_get_newest_by_id(d); + if (c) smartlist_add(certs, c); + tor_free(d); + }); + smartlist_free(fps); + } else if (!strcmpstart(url, "/tor/keys/sk/")) { + smartlist_t *fps = smartlist_new(); + dir_split_resource_into_fingerprints(url+strlen("/tor/keys/sk/"), + fps, NULL, + DSR_HEX|DSR_SORT_UNIQ); + SMARTLIST_FOREACH(fps, char *, d, { + authority_cert_t *c = authority_cert_get_by_sk_digest(d); + if (c) smartlist_add(certs, c); + tor_free(d); + }); + smartlist_free(fps); + } else if (!strcmpstart(url, "/tor/keys/fp-sk/")) { + smartlist_t *fp_sks = smartlist_new(); + dir_split_resource_into_fingerprint_pairs(url+strlen("/tor/keys/fp-sk/"), + fp_sks); + SMARTLIST_FOREACH(fp_sks, fp_pair_t *, pair, { + authority_cert_t *c = authority_cert_get_by_digests(pair->first, + pair->second); + if (c) smartlist_add(certs, c); + tor_free(pair); + }); + smartlist_free(fp_sks); + } else { + write_short_http_response(conn, 400, "Bad request"); + goto keys_done; + } + if (!smartlist_len(certs)) { + write_short_http_response(conn, 404, "Not found"); + goto keys_done; + } + SMARTLIST_FOREACH(certs, authority_cert_t *, c, + if (c->cache_info.published_on < if_modified_since) + SMARTLIST_DEL_CURRENT(certs, c)); + if (!smartlist_len(certs)) { + write_short_http_response(conn, 304, "Not modified"); + goto keys_done; + } + len = 0; + SMARTLIST_FOREACH(certs, authority_cert_t *, c, + len += c->cache_info.signed_descriptor_len); + + if (global_write_bucket_low(TO_CONN(conn), + compress_method != NO_METHOD ? len/2 : len, + 2)) { + write_short_http_response(conn, 503, "Directory busy, try again later"); + goto keys_done; + } + + write_http_response_header(conn, + compress_method != NO_METHOD ? -1 : len, + compress_method, + 60*60); + if (compress_method != NO_METHOD) { + conn->compress_state = tor_compress_new(1, compress_method, + choose_compression_level(len)); + SMARTLIST_FOREACH(certs, authority_cert_t *, c, + connection_buf_add_compress( + c->cache_info.signed_descriptor_body, + c->cache_info.signed_descriptor_len, + conn, 0)); + connection_buf_add_compress("", 0, conn, 1); + } else { + SMARTLIST_FOREACH(certs, authority_cert_t *, c, + connection_buf_add(c->cache_info.signed_descriptor_body, + c->cache_info.signed_descriptor_len, + TO_CONN(conn))); + } + keys_done: + smartlist_free(certs); + goto done; + } + done: + return 0; +} + +/** Helper function for GET /tor/rendezvous2/ + */ +static int +handle_get_hs_descriptor_v2(dir_connection_t *conn, + const get_handler_args_t *args) +{ + const char *url = args->url; + if (connection_dir_is_encrypted(conn)) { + /* Handle v2 rendezvous descriptor fetch request. */ + const char *descp; + const char *query = url + strlen("/tor/rendezvous2/"); + if (rend_valid_descriptor_id(query)) { + log_info(LD_REND, "Got a v2 rendezvous descriptor request for ID '%s'", + safe_str(escaped(query))); + switch (rend_cache_lookup_v2_desc_as_dir(query, &descp)) { + case 1: /* valid */ + write_http_response_header(conn, strlen(descp), NO_METHOD, 0); + connection_buf_add(descp, strlen(descp), TO_CONN(conn)); + break; + case 0: /* well-formed but not present */ + write_short_http_response(conn, 404, "Not found"); + break; + case -1: /* not well-formed */ + write_short_http_response(conn, 400, "Bad request"); + break; + } + } else { /* not well-formed */ + write_short_http_response(conn, 400, "Bad request"); + } + goto done; + } else { + /* Not encrypted! */ + write_short_http_response(conn, 404, "Not found"); + } + done: + return 0; +} + +/** Helper function for GET /tor/hs/3/<z>. Only for version 3. + */ +STATIC int +handle_get_hs_descriptor_v3(dir_connection_t *conn, + const get_handler_args_t *args) +{ + int retval; + const char *desc_str = NULL; + const char *pubkey_str = NULL; + const char *url = args->url; + + /* Reject unencrypted dir connections */ + if (!connection_dir_is_encrypted(conn)) { + write_short_http_response(conn, 404, "Not found"); + goto done; + } + + /* After the path prefix follows the base64 encoded blinded pubkey which we + * use to get the descriptor from the cache. Skip the prefix and get the + * pubkey. */ + tor_assert(!strcmpstart(url, "/tor/hs/3/")); + pubkey_str = url + strlen("/tor/hs/3/"); + retval = hs_cache_lookup_as_dir(HS_VERSION_THREE, + pubkey_str, &desc_str); + if (retval <= 0 || desc_str == NULL) { + write_short_http_response(conn, 404, "Not found"); + goto done; + } + + /* Found requested descriptor! Pass it to this nice client. */ + write_http_response_header(conn, strlen(desc_str), NO_METHOD, 0); + connection_buf_add(desc_str, strlen(desc_str), TO_CONN(conn)); + + done: + return 0; +} + +/** Helper function for GET /tor/networkstatus-bridges + */ +static int +handle_get_networkstatus_bridges(dir_connection_t *conn, + const get_handler_args_t *args) +{ + const char *headers = args->headers; + + const or_options_t *options = get_options(); + if (options->BridgeAuthoritativeDir && + options->BridgePassword_AuthDigest_ && + connection_dir_is_encrypted(conn)) { + char *status; + char digest[DIGEST256_LEN]; + + char *header = http_get_header(headers, "Authorization: Basic "); + if (header) + crypto_digest256(digest, header, strlen(header), DIGEST_SHA256); + + /* now make sure the password is there and right */ + if (!header || + tor_memneq(digest, + options->BridgePassword_AuthDigest_, DIGEST256_LEN)) { + write_short_http_response(conn, 404, "Not found"); + tor_free(header); + goto done; + } + tor_free(header); + + /* all happy now. send an answer. */ + status = networkstatus_getinfo_by_purpose("bridge", time(NULL)); + size_t dlen = strlen(status); + write_http_response_header(conn, dlen, NO_METHOD, 0); + connection_buf_add(status, dlen, TO_CONN(conn)); + tor_free(status); + goto done; + } + done: + return 0; +} + +/** Helper function for GET robots.txt or /tor/robots.txt */ +static int +handle_get_robots(dir_connection_t *conn, const get_handler_args_t *args) +{ + (void)args; + { + const char robots[] = "User-agent: *\r\nDisallow: /\r\n"; + size_t len = strlen(robots); + write_http_response_header(conn, len, NO_METHOD, ROBOTS_CACHE_LIFETIME); + connection_buf_add(robots, len, TO_CONN(conn)); + } + return 0; +} + +/* Given the <b>url</b> from a POST request, try to extract the version number + * using the provided <b>prefix</b>. The version should be after the prefix and + * ending with the separator "/". For instance: + * /tor/hs/3/publish + * + * On success, <b>end_pos</b> points to the position right after the version + * was found. On error, it is set to NULL. + * + * Return version on success else negative value. */ +STATIC int +parse_hs_version_from_post(const char *url, const char *prefix, + const char **end_pos) +{ + int ok; + unsigned long version; + const char *start; + char *end = NULL; + + tor_assert(url); + tor_assert(prefix); + tor_assert(end_pos); + + /* Check if the prefix does start the url. */ + if (strcmpstart(url, prefix)) { + goto err; + } + /* Move pointer to the end of the prefix string. */ + start = url + strlen(prefix); + /* Try this to be the HS version and if we are still at the separator, next + * will be move to the right value. */ + version = tor_parse_long(start, 10, 0, INT_MAX, &ok, &end); + if (!ok) { + goto err; + } + + *end_pos = end; + return (int) version; + err: + *end_pos = NULL; + return -1; +} + +/* Handle the POST request for a hidden service descripror. The request is in + * <b>url</b>, the body of the request is in <b>body</b>. Return 200 on success + * else return 400 indicating a bad request. */ +STATIC int +handle_post_hs_descriptor(const char *url, const char *body) +{ + int version; + const char *end_pos; + + tor_assert(url); + tor_assert(body); + + version = parse_hs_version_from_post(url, "/tor/hs/", &end_pos); + if (version < 0) { + goto err; + } + + /* We have a valid version number, now make sure it's a publish request. Use + * the end position just after the version and check for the command. */ + if (strcmpstart(end_pos, "/publish")) { + goto err; + } + + switch (version) { + case HS_VERSION_THREE: + if (hs_cache_store_as_dir(body) < 0) { + goto err; + } + log_info(LD_REND, "Publish request for HS descriptor handled " + "successfully."); + break; + default: + /* Unsupported version, return a bad request. */ + goto err; + } + + return 200; + err: + /* Bad request. */ + return 400; +} + +/** Helper function: called when a dirserver gets a complete HTTP POST + * request. Look for an uploaded server descriptor or rendezvous + * service descriptor. On finding one, process it and write a + * response into conn-\>outbuf. If the request is unrecognized, send a + * 400. Always return 0. */ +MOCK_IMPL(STATIC int, +directory_handle_command_post,(dir_connection_t *conn, const char *headers, + const char *body, size_t body_len)) +{ + char *url = NULL; + const or_options_t *options = get_options(); + + log_debug(LD_DIRSERV,"Received POST command."); + + conn->base_.state = DIR_CONN_STATE_SERVER_WRITING; + + if (!public_server_mode(options)) { + log_info(LD_DIR, "Rejected dir post request from %s " + "since we're not a public relay.", conn->base_.address); + write_short_http_response(conn, 503, "Not acting as a public relay"); + goto done; + } + + if (parse_http_url(headers, &url) < 0) { + write_short_http_response(conn, 400, "Bad request"); + return 0; + } + log_debug(LD_DIRSERV,"rewritten url as '%s'.", escaped(url)); + + /* Handle v2 rendezvous service publish request. */ + if (connection_dir_is_encrypted(conn) && + !strcmpstart(url,"/tor/rendezvous2/publish")) { + if (rend_cache_store_v2_desc_as_dir(body) < 0) { + log_warn(LD_REND, "Rejected v2 rend descriptor (body size %d) from %s.", + (int)body_len, conn->base_.address); + write_short_http_response(conn, 400, + "Invalid v2 service descriptor rejected"); + } else { + write_short_http_response(conn, 200, "Service descriptor (v2) stored"); + log_info(LD_REND, "Handled v2 rendezvous descriptor post: accepted"); + } + goto done; + } + + /* Handle HS descriptor publish request. */ + /* XXX: This should be disabled with a consensus param until we want to + * the prop224 be deployed and thus use. */ + if (connection_dir_is_encrypted(conn) && !strcmpstart(url, "/tor/hs/")) { + const char *msg = "HS descriptor stored successfully."; + + /* We most probably have a publish request for an HS descriptor. */ + int code = handle_post_hs_descriptor(url, body); + if (code != 200) { + msg = "Invalid HS descriptor. Rejected."; + } + write_short_http_response(conn, code, msg); + goto done; + } + + if (!authdir_mode(options)) { + /* we just provide cached directories; we don't want to + * receive anything. */ + write_short_http_response(conn, 400, "Nonauthoritative directory does not " + "accept posted server descriptors"); + goto done; + } + + if (authdir_mode(options) && + !strcmp(url,"/tor/")) { /* server descriptor post */ + const char *msg = "[None]"; + uint8_t purpose = authdir_mode_bridge(options) ? + ROUTER_PURPOSE_BRIDGE : ROUTER_PURPOSE_GENERAL; + was_router_added_t r = dirserv_add_multiple_descriptors(body, purpose, + conn->base_.address, &msg); + tor_assert(msg); + + if (r == ROUTER_ADDED_SUCCESSFULLY) { + write_short_http_response(conn, 200, msg); + } else if (WRA_WAS_OUTDATED(r)) { + write_http_response_header_impl(conn, -1, NULL, NULL, + "X-Descriptor-Not-New: Yes\r\n", -1); + } else { + log_info(LD_DIRSERV, + "Rejected router descriptor or extra-info from %s " + "(\"%s\").", + conn->base_.address, msg); + write_short_http_response(conn, 400, msg); + } + goto done; + } + + if (authdir_mode_v3(options) && + !strcmp(url,"/tor/post/vote")) { /* v3 networkstatus vote */ + const char *msg = "OK"; + int status; + if (dirvote_add_vote(body, &msg, &status)) { + write_short_http_response(conn, status, "Vote stored"); + } else { + tor_assert(msg); + log_warn(LD_DIRSERV, "Rejected vote from %s (\"%s\").", + conn->base_.address, msg); + write_short_http_response(conn, status, msg); + } + goto done; + } + + if (authdir_mode_v3(options) && + !strcmp(url,"/tor/post/consensus-signature")) { /* sigs on consensus. */ + const char *msg = NULL; + if (dirvote_add_signatures(body, conn->base_.address, &msg)>=0) { + write_short_http_response(conn, 200, msg?msg:"Signatures stored"); + } else { + log_warn(LD_DIR, "Unable to store signatures posted by %s: %s", + conn->base_.address, msg?msg:"???"); + write_short_http_response(conn, 400, + msg?msg:"Unable to store signatures"); + } + goto done; + } + + /* we didn't recognize the url */ + write_short_http_response(conn, 404, "Not found"); + + done: + tor_free(url); + return 0; +} + +/** Called when a dirserver receives data on a directory connection; + * looks for an HTTP request. If the request is complete, remove it + * from the inbuf, try to process it; otherwise, leave it on the + * buffer. Return a 0 on success, or -1 on error. + */ +STATIC int +directory_handle_command(dir_connection_t *conn) +{ + char *headers=NULL, *body=NULL; + size_t body_len=0; + int r; + + tor_assert(conn); + tor_assert(conn->base_.type == CONN_TYPE_DIR); + + switch (connection_fetch_from_buf_http(TO_CONN(conn), + &headers, MAX_HEADERS_SIZE, + &body, &body_len, MAX_DIR_UL_SIZE, 0)) { + case -1: /* overflow */ + log_warn(LD_DIRSERV, + "Request too large from address '%s' to DirPort. Closing.", + safe_str(conn->base_.address)); + return -1; + case 0: + log_debug(LD_DIRSERV,"command not all here yet."); + return 0; + /* case 1, fall through */ + } + + http_set_address_origin(headers, TO_CONN(conn)); + // we should escape headers here as well, + // but we can't call escaped() twice, as it uses the same buffer + //log_debug(LD_DIRSERV,"headers %s, body %s.", headers, escaped(body)); + + if (!strncasecmp(headers,"GET",3)) + r = directory_handle_command_get(conn, headers, body, body_len); + else if (!strncasecmp(headers,"POST",4)) + r = directory_handle_command_post(conn, headers, body, body_len); + else { + log_fn(LOG_PROTOCOL_WARN, LD_PROTOCOL, + "Got headers %s with unknown command. Closing.", + escaped(headers)); + r = -1; + } + + tor_free(headers); tor_free(body); + return r; +} + +/** Write handler for directory connections; called when all data has + * been flushed. Close the connection or wait for a response as + * appropriate. + */ +int +connection_dir_finished_flushing(dir_connection_t *conn) +{ + tor_assert(conn); + tor_assert(conn->base_.type == CONN_TYPE_DIR); + + /* Note that we have finished writing the directory response. For direct + * connections this means we're done; for tunneled connections it's only + * an intermediate step. */ + if (conn->dirreq_id) + geoip_change_dirreq_state(conn->dirreq_id, DIRREQ_TUNNELED, + DIRREQ_FLUSHING_DIR_CONN_FINISHED); + else + geoip_change_dirreq_state(TO_CONN(conn)->global_identifier, + DIRREQ_DIRECT, + DIRREQ_FLUSHING_DIR_CONN_FINISHED); + switch (conn->base_.state) { + case DIR_CONN_STATE_CONNECTING: + case DIR_CONN_STATE_CLIENT_SENDING: + log_debug(LD_DIR,"client finished sending command."); + conn->base_.state = DIR_CONN_STATE_CLIENT_READING; + return 0; + case DIR_CONN_STATE_SERVER_WRITING: + if (conn->spool) { + log_warn(LD_BUG, "Emptied a dirserv buffer, but it's still spooling!"); + connection_mark_for_close(TO_CONN(conn)); + } else { + log_debug(LD_DIRSERV, "Finished writing server response. Closing."); + connection_mark_for_close(TO_CONN(conn)); + } + return 0; + default: + log_warn(LD_BUG,"called in unexpected state %d.", + conn->base_.state); + tor_fragile_assert(); + return -1; + } + return 0; +} + +/* We just got a new consensus! If there are other in-progress requests + * for this consensus flavor (for example because we launched several in + * parallel), cancel them. + * + * We do this check here (not just in + * connection_ap_handshake_attach_circuit()) to handle the edge case where + * a consensus fetch begins and ends before some other one tries to attach to + * a circuit, in which case the other one won't know that we're all happy now. + * + * Don't mark the conn that just gave us the consensus -- otherwise we + * would end up double-marking it when it cleans itself up. + */ +static void +connection_dir_close_consensus_fetches(dir_connection_t *except_this_one, + const char *resource) +{ + smartlist_t *conns_to_close = + connection_dir_list_by_purpose_and_resource(DIR_PURPOSE_FETCH_CONSENSUS, + resource); + SMARTLIST_FOREACH_BEGIN(conns_to_close, dir_connection_t *, d) { + if (d == except_this_one) + continue; + log_info(LD_DIR, "Closing consensus fetch (to %s) since one " + "has just arrived.", TO_CONN(d)->address); + connection_mark_for_close(TO_CONN(d)); + } SMARTLIST_FOREACH_END(d); + smartlist_free(conns_to_close); +} + +/** Connected handler for directory connections: begin sending data to the + * server, and return 0. + * Only used when connections don't immediately connect. */ +int +connection_dir_finished_connecting(dir_connection_t *conn) +{ + tor_assert(conn); + tor_assert(conn->base_.type == CONN_TYPE_DIR); + tor_assert(conn->base_.state == DIR_CONN_STATE_CONNECTING); + + log_debug(LD_HTTP,"Dir connection to router %s:%u established.", + conn->base_.address,conn->base_.port); + + /* start flushing conn */ + conn->base_.state = DIR_CONN_STATE_CLIENT_SENDING; + return 0; +} + +/** Decide which download schedule we want to use based on descriptor type + * in <b>dls</b> and <b>options</b>. + * + * Then, return the initial delay for that download schedule, in seconds. + * + * Helper function for download_status_increment_failure(), + * download_status_reset(), and download_status_increment_attempt(). */ +STATIC int +find_dl_min_delay(const download_status_t *dls, const or_options_t *options) +{ + tor_assert(dls); + tor_assert(options); + + switch (dls->schedule) { + case DL_SCHED_GENERIC: + /* Any other directory document */ + if (dir_server_mode(options)) { + /* A directory authority or directory mirror */ + return options->TestingServerDownloadInitialDelay; + } else { + return options->TestingClientDownloadInitialDelay; + } + case DL_SCHED_CONSENSUS: + if (!networkstatus_consensus_can_use_multiple_directories(options)) { + /* A public relay */ + return options->TestingServerConsensusDownloadInitialDelay; + } else { + /* A client or bridge */ + if (networkstatus_consensus_is_bootstrapping(time(NULL))) { + /* During bootstrapping */ + if (!networkstatus_consensus_can_use_extra_fallbacks(options)) { + /* A bootstrapping client without extra fallback directories */ + return options-> + ClientBootstrapConsensusAuthorityOnlyDownloadInitialDelay; + } else if (dls->want_authority) { + /* A bootstrapping client with extra fallback directories, but + * connecting to an authority */ + return + options->ClientBootstrapConsensusAuthorityDownloadInitialDelay; + } else { + /* A bootstrapping client connecting to extra fallback directories + */ + return + options->ClientBootstrapConsensusFallbackDownloadInitialDelay; + } + } else { + /* A client with a reasonably live consensus, with or without + * certificates */ + return options->TestingClientConsensusDownloadInitialDelay; + } + } + case DL_SCHED_BRIDGE: + if (options->UseBridges && num_bridges_usable(0) > 0) { + /* A bridge client that is sure that one or more of its bridges are + * running can afford to wait longer to update bridge descriptors. */ + return options->TestingBridgeDownloadInitialDelay; + } else { + /* A bridge client which might have no running bridges, must try to + * get bridge descriptors straight away. */ + return options->TestingBridgeBootstrapDownloadInitialDelay; + } + default: + tor_assert(0); + } + + /* Impossible, but gcc will fail with -Werror without a `return`. */ + return 0; +} + +/** As next_random_exponential_delay() below, but does not compute a random + * value. Instead, compute the range of values that + * next_random_exponential_delay() should use when computing its random value. + * Store the low bound into *<b>low_bound_out</b>, and the high bound into + * *<b>high_bound_out</b>. Guarantees that the low bound is strictly less + * than the high bound. */ +STATIC void +next_random_exponential_delay_range(int *low_bound_out, + int *high_bound_out, + int delay, + int base_delay) +{ + // This is the "decorrelated jitter" approach, from + // https://www.awsarchitectureblog.com/2015/03/backoff.html + // The formula is + // sleep = min(cap, random_between(base, sleep * 3)) + + const int delay_times_3 = delay < INT_MAX/3 ? delay * 3 : INT_MAX; + *low_bound_out = base_delay; + if (delay_times_3 > base_delay) { + *high_bound_out = delay_times_3; + } else { + *high_bound_out = base_delay+1; + } +} + +/** Advance one delay step. The algorithm will generate a random delay, + * such that each failure is possibly (random) longer than the ones before. + * + * We then clamp that value to be no larger than max_delay, and return it. + * + * The <b>base_delay</b> parameter is lowest possible delay time (can't be + * zero); the <b>backoff_position</b> parameter is the number of times we've + * generated a delay; and the <b>delay</b> argument is the most recently used + * delay. + */ +STATIC int +next_random_exponential_delay(int delay, + int base_delay) +{ + /* Check preconditions */ + if (BUG(delay < 0)) + delay = 0; + + if (base_delay < 1) + base_delay = 1; + + int low_bound=0, high_bound=INT_MAX; + + next_random_exponential_delay_range(&low_bound, &high_bound, + delay, base_delay); + + return crypto_rand_int_range(low_bound, high_bound); +} + +/** Find the current delay for dls based on min_delay. + * + * This function sets dls->next_attempt_at based on now, and returns the delay. + * Helper for download_status_increment_failure and + * download_status_increment_attempt. */ +STATIC int +download_status_schedule_get_delay(download_status_t *dls, + int min_delay, + time_t now) +{ + tor_assert(dls); + /* If we're using random exponential backoff, we do need min/max delay */ + tor_assert(min_delay >= 0); + + int delay = INT_MAX; + uint8_t dls_schedule_position = (dls->increment_on + == DL_SCHED_INCREMENT_ATTEMPT + ? dls->n_download_attempts + : dls->n_download_failures); + + /* Check if we missed a reset somehow */ + IF_BUG_ONCE(dls->last_backoff_position > dls_schedule_position) { + dls->last_backoff_position = 0; + dls->last_delay_used = 0; + } + + if (dls_schedule_position > 0) { + delay = dls->last_delay_used; + + while (dls->last_backoff_position < dls_schedule_position) { + /* Do one increment step */ + delay = next_random_exponential_delay(delay, min_delay); + /* Update our position */ + ++(dls->last_backoff_position); + } + } else { + /* If we're just starting out, use the minimum delay */ + delay = min_delay; + } + + /* Clamp it within min/max if we have them */ + if (min_delay >= 0 && delay < min_delay) delay = min_delay; + + /* Store it for next time */ + dls->last_backoff_position = dls_schedule_position; + dls->last_delay_used = delay; + + /* A negative delay makes no sense. Knowing that delay is + * non-negative allows us to safely do the wrapping check below. */ + tor_assert(delay >= 0); + + /* Avoid now+delay overflowing TIME_MAX, by comparing with a subtraction + * that won't overflow (since delay is non-negative). */ + if (delay < INT_MAX && now <= TIME_MAX - delay) { + dls->next_attempt_at = now+delay; + } else { + dls->next_attempt_at = TIME_MAX; + } + + return delay; +} + +/* Log a debug message about item, which increments on increment_action, has + * incremented dls_n_download_increments times. The message varies based on + * was_schedule_incremented (if not, not_incremented_response is logged), and + * the values of increment, dls_next_attempt_at, and now. + * Helper for download_status_increment_failure and + * download_status_increment_attempt. */ +static void +download_status_log_helper(const char *item, int was_schedule_incremented, + const char *increment_action, + const char *not_incremented_response, + uint8_t dls_n_download_increments, int increment, + time_t dls_next_attempt_at, time_t now) +{ + if (item) { + if (!was_schedule_incremented) + log_debug(LD_DIR, "%s %s %d time(s); I'll try again %s.", + item, increment_action, (int)dls_n_download_increments, + not_incremented_response); + else if (increment == 0) + log_debug(LD_DIR, "%s %s %d time(s); I'll try again immediately.", + item, increment_action, (int)dls_n_download_increments); + else if (dls_next_attempt_at < TIME_MAX) + log_debug(LD_DIR, "%s %s %d time(s); I'll try again in %d seconds.", + item, increment_action, (int)dls_n_download_increments, + (int)(dls_next_attempt_at-now)); + else + log_debug(LD_DIR, "%s %s %d time(s); Giving up for a while.", + item, increment_action, (int)dls_n_download_increments); + } +} + +/** Determine when a failed download attempt should be retried. + * Called when an attempt to download <b>dls</b> has failed with HTTP status + * <b>status_code</b>. Increment the failure count (if the code indicates a + * real failure, or if we're a server) and set <b>dls</b>-\>next_attempt_at to + * an appropriate time in the future and return it. + * If <b>dls->increment_on</b> is DL_SCHED_INCREMENT_ATTEMPT, increment the + * failure count, and return a time in the far future for the next attempt (to + * avoid an immediate retry). */ +time_t +download_status_increment_failure(download_status_t *dls, int status_code, + const char *item, int server, time_t now) +{ + (void) status_code; // XXXX no longer used. + (void) server; // XXXX no longer used. + int increment = -1; + int min_delay = 0; + + tor_assert(dls); + + /* dls wasn't reset before it was used */ + if (dls->next_attempt_at == 0) { + download_status_reset(dls); + } + + /* count the failure */ + if (dls->n_download_failures < IMPOSSIBLE_TO_DOWNLOAD-1) { + ++dls->n_download_failures; + } + + if (dls->increment_on == DL_SCHED_INCREMENT_FAILURE) { + /* We don't find out that a failure-based schedule has attempted a + * connection until that connection fails. + * We'll never find out about successful connections, but this doesn't + * matter, because schedules are reset after a successful download. + */ + if (dls->n_download_attempts < IMPOSSIBLE_TO_DOWNLOAD-1) + ++dls->n_download_attempts; + + /* only return a failure retry time if this schedule increments on failures + */ + min_delay = find_dl_min_delay(dls, get_options()); + increment = download_status_schedule_get_delay(dls, min_delay, now); + } + + download_status_log_helper(item, !dls->increment_on, "failed", + "concurrently", dls->n_download_failures, + increment, + download_status_get_next_attempt_at(dls), + now); + + if (dls->increment_on == DL_SCHED_INCREMENT_ATTEMPT) { + /* stop this schedule retrying on failure, it will launch concurrent + * connections instead */ + return TIME_MAX; + } else { + return download_status_get_next_attempt_at(dls); + } +} + +/** Determine when the next download attempt should be made when using an + * attempt-based (potentially concurrent) download schedule. + * Called when an attempt to download <b>dls</b> is being initiated. + * Increment the attempt count and set <b>dls</b>-\>next_attempt_at to an + * appropriate time in the future and return it. + * If <b>dls->increment_on</b> is DL_SCHED_INCREMENT_FAILURE, don't increment + * the attempts, and return a time in the far future (to avoid launching a + * concurrent attempt). */ +time_t +download_status_increment_attempt(download_status_t *dls, const char *item, + time_t now) +{ + int delay = -1; + int min_delay = 0; + + tor_assert(dls); + + /* dls wasn't reset before it was used */ + if (dls->next_attempt_at == 0) { + download_status_reset(dls); + } + + if (dls->increment_on == DL_SCHED_INCREMENT_FAILURE) { + /* this schedule should retry on failure, and not launch any concurrent + attempts */ + log_warn(LD_BUG, "Tried to launch an attempt-based connection on a " + "failure-based schedule."); + return TIME_MAX; + } + + if (dls->n_download_attempts < IMPOSSIBLE_TO_DOWNLOAD-1) + ++dls->n_download_attempts; + + min_delay = find_dl_min_delay(dls, get_options()); + delay = download_status_schedule_get_delay(dls, min_delay, now); + + download_status_log_helper(item, dls->increment_on, "attempted", + "on failure", dls->n_download_attempts, + delay, download_status_get_next_attempt_at(dls), + now); + + return download_status_get_next_attempt_at(dls); +} + +static time_t +download_status_get_initial_delay_from_now(const download_status_t *dls) +{ + /* We use constant initial delays, even in exponential backoff + * schedules. */ + return time(NULL) + find_dl_min_delay(dls, get_options()); +} + +/** Reset <b>dls</b> so that it will be considered downloadable + * immediately, and/or to show that we don't need it anymore. + * + * Must be called to initialise a download schedule, otherwise the zeroth item + * in the schedule will never be used. + * + * (We find the zeroth element of the download schedule, and set + * next_attempt_at to be the appropriate offset from 'now'. In most + * cases this means setting it to 'now', so the item will be immediately + * downloadable; when using authorities with fallbacks, there is a few seconds' + * delay.) */ +void +download_status_reset(download_status_t *dls) +{ + if (dls->n_download_failures == IMPOSSIBLE_TO_DOWNLOAD + || dls->n_download_attempts == IMPOSSIBLE_TO_DOWNLOAD) + return; /* Don't reset this. */ + + dls->n_download_failures = 0; + dls->n_download_attempts = 0; + dls->next_attempt_at = download_status_get_initial_delay_from_now(dls); + dls->last_backoff_position = 0; + dls->last_delay_used = 0; + /* Don't reset dls->want_authority or dls->increment_on */ +} + +/** Return true iff, as of <b>now</b>, the resource tracked by <b>dls</b> is + * ready to get its download reattempted. */ +int +download_status_is_ready(download_status_t *dls, time_t now) +{ + /* dls wasn't reset before it was used */ + if (dls->next_attempt_at == 0) { + download_status_reset(dls); + } + + return download_status_get_next_attempt_at(dls) <= now; +} + +/** Mark <b>dl</b> as never downloadable. */ +void +download_status_mark_impossible(download_status_t *dl) +{ + dl->n_download_failures = IMPOSSIBLE_TO_DOWNLOAD; + dl->n_download_attempts = IMPOSSIBLE_TO_DOWNLOAD; +} + +/** Return the number of failures on <b>dls</b> since the last success (if + * any). */ +int +download_status_get_n_failures(const download_status_t *dls) +{ + return dls->n_download_failures; +} + +/** Return the number of attempts to download <b>dls</b> since the last success + * (if any). This can differ from download_status_get_n_failures() due to + * outstanding concurrent attempts. */ +int +download_status_get_n_attempts(const download_status_t *dls) +{ + return dls->n_download_attempts; +} + +/** Return the next time to attempt to download <b>dls</b>. */ +time_t +download_status_get_next_attempt_at(const download_status_t *dls) +{ + /* dls wasn't reset before it was used */ + if (dls->next_attempt_at == 0) { + /* so give the answer we would have given if it had been */ + return download_status_get_initial_delay_from_now(dls); + } + + return dls->next_attempt_at; +} + +/** Called when one or more routerdesc (or extrainfo, if <b>was_extrainfo</b>) + * fetches have failed (with uppercase fingerprints listed in <b>failed</b>, + * either as descriptor digests or as identity digests based on + * <b>was_descriptor_digests</b>). + */ +static void +dir_routerdesc_download_failed(smartlist_t *failed, int status_code, + int router_purpose, + int was_extrainfo, int was_descriptor_digests) +{ + char digest[DIGEST_LEN]; + time_t now = time(NULL); + int server = directory_fetches_from_authorities(get_options()); + if (!was_descriptor_digests) { + if (router_purpose == ROUTER_PURPOSE_BRIDGE) { + tor_assert(!was_extrainfo); + connection_dir_retry_bridges(failed); + } + return; /* FFFF should implement for other-than-router-purpose someday */ + } + SMARTLIST_FOREACH_BEGIN(failed, const char *, cp) { + download_status_t *dls = NULL; + if (base16_decode(digest, DIGEST_LEN, cp, strlen(cp)) != DIGEST_LEN) { + log_warn(LD_BUG, "Malformed fingerprint in list: %s", escaped(cp)); + continue; + } + if (was_extrainfo) { + signed_descriptor_t *sd = + router_get_by_extrainfo_digest(digest); + if (sd) + dls = &sd->ei_dl_status; + } else { + dls = router_get_dl_status_by_descriptor_digest(digest); + } + if (!dls) + continue; + download_status_increment_failure(dls, status_code, cp, server, now); + } SMARTLIST_FOREACH_END(cp); + + /* No need to relaunch descriptor downloads here: we already do it + * every 10 or 60 seconds (FOO_DESCRIPTOR_RETRY_INTERVAL) in main.c. */ +} + +/** Called when a connection to download microdescriptors from relay with + * <b>dir_id</b> has failed in whole or in part. <b>failed</b> is a list + * of every microdesc digest we didn't get. <b>status_code</b> is the http + * status code we received. Reschedule the microdesc downloads as + * appropriate. */ +static void +dir_microdesc_download_failed(smartlist_t *failed, + int status_code, const char *dir_id) +{ + networkstatus_t *consensus + = networkstatus_get_latest_consensus_by_flavor(FLAV_MICRODESC); + routerstatus_t *rs; + download_status_t *dls; + time_t now = time(NULL); + int server = directory_fetches_from_authorities(get_options()); + + if (! consensus) + return; + + /* We failed to fetch a microdescriptor from 'dir_id', note it down + * so that we don't try the same relay next time... */ + microdesc_note_outdated_dirserver(dir_id); + + SMARTLIST_FOREACH_BEGIN(failed, const char *, d) { + rs = router_get_mutable_consensus_status_by_descriptor_digest(consensus,d); + if (!rs) + continue; + dls = &rs->dl_status; + + { /* Increment the failure count for this md fetch */ + char buf[BASE64_DIGEST256_LEN+1]; + digest256_to_base64(buf, d); + log_info(LD_DIR, "Failed to download md %s from %s", + buf, hex_str(dir_id, DIGEST_LEN)); + download_status_increment_failure(dls, status_code, buf, + server, now); + } + } SMARTLIST_FOREACH_END(d); +} + +/** Helper. Compare two fp_pair_t objects, and return negative, 0, or + * positive as appropriate. */ +static int +compare_pairs_(const void **a, const void **b) +{ + const fp_pair_t *fp1 = *a, *fp2 = *b; + int r; + if ((r = fast_memcmp(fp1->first, fp2->first, DIGEST_LEN))) + return r; + else + return fast_memcmp(fp1->second, fp2->second, DIGEST_LEN); +} + +/** Divide a string <b>res</b> of the form FP1-FP2+FP3-FP4...[.z], where each + * FP is a hex-encoded fingerprint, into a sequence of distinct sorted + * fp_pair_t. Skip malformed pairs. On success, return 0 and add those + * fp_pair_t into <b>pairs_out</b>. On failure, return -1. */ +int +dir_split_resource_into_fingerprint_pairs(const char *res, + smartlist_t *pairs_out) +{ + smartlist_t *pairs_tmp = smartlist_new(); + smartlist_t *pairs_result = smartlist_new(); + + smartlist_split_string(pairs_tmp, res, "+", 0, 0); + if (smartlist_len(pairs_tmp)) { + char *last = smartlist_get(pairs_tmp,smartlist_len(pairs_tmp)-1); + size_t last_len = strlen(last); + if (last_len > 2 && !strcmp(last+last_len-2, ".z")) { + last[last_len-2] = '\0'; + } + } + SMARTLIST_FOREACH_BEGIN(pairs_tmp, char *, cp) { + if (strlen(cp) != HEX_DIGEST_LEN*2+1) { + log_info(LD_DIR, + "Skipping digest pair %s with non-standard length.", escaped(cp)); + } else if (cp[HEX_DIGEST_LEN] != '-') { + log_info(LD_DIR, + "Skipping digest pair %s with missing dash.", escaped(cp)); + } else { + fp_pair_t pair; + if (base16_decode(pair.first, DIGEST_LEN, + cp, HEX_DIGEST_LEN) != DIGEST_LEN || + base16_decode(pair.second,DIGEST_LEN, + cp+HEX_DIGEST_LEN+1, HEX_DIGEST_LEN) != DIGEST_LEN) { + log_info(LD_DIR, "Skipping non-decodable digest pair %s", escaped(cp)); + } else { + smartlist_add(pairs_result, tor_memdup(&pair, sizeof(pair))); + } + } + tor_free(cp); + } SMARTLIST_FOREACH_END(cp); + smartlist_free(pairs_tmp); + + /* Uniq-and-sort */ + smartlist_sort(pairs_result, compare_pairs_); + smartlist_uniq(pairs_result, compare_pairs_, tor_free_); + + smartlist_add_all(pairs_out, pairs_result); + smartlist_free(pairs_result); + return 0; +} + +/** Given a directory <b>resource</b> request, containing zero + * or more strings separated by plus signs, followed optionally by ".z", store + * the strings, in order, into <b>fp_out</b>. If <b>compressed_out</b> is + * non-NULL, set it to 1 if the resource ends in ".z", else set it to 0. + * + * If (flags & DSR_HEX), then delete all elements that aren't hex digests, and + * decode the rest. If (flags & DSR_BASE64), then use "-" rather than "+" as + * a separator, delete all the elements that aren't base64-encoded digests, + * and decode the rest. If (flags & DSR_DIGEST256), these digests should be + * 256 bits long; else they should be 160. + * + * If (flags & DSR_SORT_UNIQ), then sort the list and remove all duplicates. + */ +int +dir_split_resource_into_fingerprints(const char *resource, + smartlist_t *fp_out, int *compressed_out, + int flags) +{ + const int decode_hex = flags & DSR_HEX; + const int decode_base64 = flags & DSR_BASE64; + const int digests_are_256 = flags & DSR_DIGEST256; + const int sort_uniq = flags & DSR_SORT_UNIQ; + + const int digest_len = digests_are_256 ? DIGEST256_LEN : DIGEST_LEN; + const int hex_digest_len = digests_are_256 ? + HEX_DIGEST256_LEN : HEX_DIGEST_LEN; + const int base64_digest_len = digests_are_256 ? + BASE64_DIGEST256_LEN : BASE64_DIGEST_LEN; + smartlist_t *fp_tmp = smartlist_new(); + + tor_assert(!(decode_hex && decode_base64)); + tor_assert(fp_out); + + smartlist_split_string(fp_tmp, resource, decode_base64?"-":"+", 0, 0); + if (compressed_out) + *compressed_out = 0; + if (smartlist_len(fp_tmp)) { + char *last = smartlist_get(fp_tmp,smartlist_len(fp_tmp)-1); + size_t last_len = strlen(last); + if (last_len > 2 && !strcmp(last+last_len-2, ".z")) { + last[last_len-2] = '\0'; + if (compressed_out) + *compressed_out = 1; + } + } + if (decode_hex || decode_base64) { + const size_t encoded_len = decode_hex ? hex_digest_len : base64_digest_len; + int i; + char *cp, *d = NULL; + for (i = 0; i < smartlist_len(fp_tmp); ++i) { + cp = smartlist_get(fp_tmp, i); + if (strlen(cp) != encoded_len) { + log_info(LD_DIR, + "Skipping digest %s with non-standard length.", escaped(cp)); + smartlist_del_keeporder(fp_tmp, i--); + goto again; + } + d = tor_malloc_zero(digest_len); + if (decode_hex ? + (base16_decode(d, digest_len, cp, hex_digest_len) != digest_len) : + (base64_decode(d, digest_len, cp, base64_digest_len) + != digest_len)) { + log_info(LD_DIR, "Skipping non-decodable digest %s", escaped(cp)); + smartlist_del_keeporder(fp_tmp, i--); + goto again; + } + smartlist_set(fp_tmp, i, d); + d = NULL; + again: + tor_free(cp); + tor_free(d); + } + } + if (sort_uniq) { + if (decode_hex || decode_base64) { + if (digests_are_256) { + smartlist_sort_digests256(fp_tmp); + smartlist_uniq_digests256(fp_tmp); + } else { + smartlist_sort_digests(fp_tmp); + smartlist_uniq_digests(fp_tmp); + } + } else { + smartlist_sort_strings(fp_tmp); + smartlist_uniq_strings(fp_tmp); + } + } + smartlist_add_all(fp_out, fp_tmp); + smartlist_free(fp_tmp); + return 0; +} + +/** As dir_split_resource_into_fingerprints, but instead fills + * <b>spool_out</b> with a list of spoolable_resource_t for the resource + * identified through <b>source</b>. */ +int +dir_split_resource_into_spoolable(const char *resource, + dir_spool_source_t source, + smartlist_t *spool_out, + int *compressed_out, + int flags) +{ + smartlist_t *fingerprints = smartlist_new(); + + tor_assert(flags & (DSR_HEX|DSR_BASE64)); + const size_t digest_len = + (flags & DSR_DIGEST256) ? DIGEST256_LEN : DIGEST_LEN; + + int r = dir_split_resource_into_fingerprints(resource, fingerprints, + compressed_out, flags); + /* This is not a very efficient implementation XXXX */ + SMARTLIST_FOREACH_BEGIN(fingerprints, uint8_t *, digest) { + spooled_resource_t *spooled = + spooled_resource_new(source, digest, digest_len); + if (spooled) + smartlist_add(spool_out, spooled); + tor_free(digest); + } SMARTLIST_FOREACH_END(digest); + + smartlist_free(fingerprints); + return r; +} diff --git a/src/feature/dircache/directory.h b/src/feature/dircache/directory.h new file mode 100644 index 0000000000..54ea0c584d --- /dev/null +++ b/src/feature/dircache/directory.h @@ -0,0 +1,347 @@ +/* Copyright (c) 2001 Matej Pfajfar. + * Copyright (c) 2001-2004, Roger Dingledine. + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file directory.h + * \brief Header file for directory.c. + **/ + +#ifndef TOR_DIRECTORY_H +#define TOR_DIRECTORY_H + +#include "feature/hs/hs_ident.h" +enum compress_method_t; + +dir_connection_t *TO_DIR_CONN(connection_t *c); + +#define DIR_CONN_STATE_MIN_ 1 +/** State for connection to directory server: waiting for connect(). */ +#define DIR_CONN_STATE_CONNECTING 1 +/** State for connection to directory server: sending HTTP request. */ +#define DIR_CONN_STATE_CLIENT_SENDING 2 +/** State for connection to directory server: reading HTTP response. */ +#define DIR_CONN_STATE_CLIENT_READING 3 +/** State for connection to directory server: happy and finished. */ +#define DIR_CONN_STATE_CLIENT_FINISHED 4 +/** State for connection at directory server: waiting for HTTP request. */ +#define DIR_CONN_STATE_SERVER_COMMAND_WAIT 5 +/** State for connection at directory server: sending HTTP response. */ +#define DIR_CONN_STATE_SERVER_WRITING 6 +#define DIR_CONN_STATE_MAX_ 6 + +#define DIR_PURPOSE_MIN_ 4 +/** A connection to a directory server: set after a v2 rendezvous + * descriptor is downloaded. */ +#define DIR_PURPOSE_HAS_FETCHED_RENDDESC_V2 4 +/** A connection to a directory server: download one or more server + * descriptors. */ +#define DIR_PURPOSE_FETCH_SERVERDESC 6 +/** A connection to a directory server: download one or more extra-info + * documents. */ +#define DIR_PURPOSE_FETCH_EXTRAINFO 7 +/** A connection to a directory server: upload a server descriptor. */ +#define DIR_PURPOSE_UPLOAD_DIR 8 +/** A connection to a directory server: upload a v3 networkstatus vote. */ +#define DIR_PURPOSE_UPLOAD_VOTE 10 +/** A connection to a directory server: upload a v3 consensus signature */ +#define DIR_PURPOSE_UPLOAD_SIGNATURES 11 +/** A connection to a directory server: download one or more v3 networkstatus + * votes. */ +#define DIR_PURPOSE_FETCH_STATUS_VOTE 12 +/** A connection to a directory server: download a v3 detached signatures + * object for a consensus. */ +#define DIR_PURPOSE_FETCH_DETACHED_SIGNATURES 13 +/** A connection to a directory server: download a v3 networkstatus + * consensus. */ +#define DIR_PURPOSE_FETCH_CONSENSUS 14 +/** A connection to a directory server: download one or more directory + * authority certificates. */ +#define DIR_PURPOSE_FETCH_CERTIFICATE 15 + +/** Purpose for connection at a directory server. */ +#define DIR_PURPOSE_SERVER 16 +/** A connection to a hidden service directory server: upload a v2 rendezvous + * descriptor. */ +#define DIR_PURPOSE_UPLOAD_RENDDESC_V2 17 +/** A connection to a hidden service directory server: download a v2 rendezvous + * descriptor. */ +#define DIR_PURPOSE_FETCH_RENDDESC_V2 18 +/** A connection to a directory server: download a microdescriptor. */ +#define DIR_PURPOSE_FETCH_MICRODESC 19 +/** A connection to a hidden service directory: upload a v3 descriptor. */ +#define DIR_PURPOSE_UPLOAD_HSDESC 20 +/** A connection to a hidden service directory: fetch a v3 descriptor. */ +#define DIR_PURPOSE_FETCH_HSDESC 21 +/** A connection to a directory server: set after a hidden service descriptor + * is downloaded. */ +#define DIR_PURPOSE_HAS_FETCHED_HSDESC 22 +#define DIR_PURPOSE_MAX_ 22 + +/** True iff <b>p</b> is a purpose corresponding to uploading + * data to a directory server. */ +#define DIR_PURPOSE_IS_UPLOAD(p) \ + ((p)==DIR_PURPOSE_UPLOAD_DIR || \ + (p)==DIR_PURPOSE_UPLOAD_VOTE || \ + (p)==DIR_PURPOSE_UPLOAD_SIGNATURES || \ + (p)==DIR_PURPOSE_UPLOAD_RENDDESC_V2 || \ + (p)==DIR_PURPOSE_UPLOAD_HSDESC) + +int directories_have_accepted_server_descriptor(void); +void directory_post_to_dirservers(uint8_t dir_purpose, uint8_t router_purpose, + dirinfo_type_t type, const char *payload, + size_t payload_len, size_t extrainfo_len); +MOCK_DECL(void, directory_get_from_dirserver, ( + uint8_t dir_purpose, + uint8_t router_purpose, + const char *resource, + int pds_flags, + download_want_authority_t want_authority)); +void directory_get_from_all_authorities(uint8_t dir_purpose, + uint8_t router_purpose, + const char *resource); + +/** Enumeration of ways to connect to a directory server */ +typedef enum { + /** Default: connect over a one-hop Tor circuit. Relays fall back to direct + * DirPort connections, clients, onion services, and bridges do not */ + DIRIND_ONEHOP=0, + /** Connect over a multi-hop anonymizing Tor circuit */ + DIRIND_ANONYMOUS=1, + /** Connect to the DirPort directly */ + DIRIND_DIRECT_CONN, + /** Connect over a multi-hop anonymizing Tor circuit to our dirport */ + DIRIND_ANON_DIRPORT, +} dir_indirection_t; + +int directory_must_use_begindir(const or_options_t *options); + +/** + * A directory_request_t describes the information about a directory request + * at the client side. It describes what we're going to ask for, which + * directory we're going to ask for it, how we're going to contact that + * directory, and (in some cases) what to do with it when we're done. + */ +typedef struct directory_request_t directory_request_t; +directory_request_t *directory_request_new(uint8_t dir_purpose); +void directory_request_free_(directory_request_t *req); +#define directory_request_free(req) \ + FREE_AND_NULL(directory_request_t, directory_request_free_, (req)) +void directory_request_set_or_addr_port(directory_request_t *req, + const tor_addr_port_t *p); +void directory_request_set_dir_addr_port(directory_request_t *req, + const tor_addr_port_t *p); +void directory_request_set_directory_id_digest(directory_request_t *req, + const char *digest); +struct circuit_guard_state_t; +void directory_request_set_guard_state(directory_request_t *req, + struct circuit_guard_state_t *state); +void directory_request_set_router_purpose(directory_request_t *req, + uint8_t router_purpose); +void directory_request_set_indirection(directory_request_t *req, + dir_indirection_t indirection); +void directory_request_set_resource(directory_request_t *req, + const char *resource); +void directory_request_set_payload(directory_request_t *req, + const char *payload, + size_t payload_len); +void directory_request_set_if_modified_since(directory_request_t *req, + time_t if_modified_since); +void directory_request_set_rend_query(directory_request_t *req, + const rend_data_t *query); +void directory_request_upload_set_hs_ident(directory_request_t *req, + const hs_ident_dir_conn_t *ident); +void directory_request_fetch_set_hs_ident(directory_request_t *req, + const hs_ident_dir_conn_t *ident); + +void directory_request_set_routerstatus(directory_request_t *req, + const routerstatus_t *rs); +void directory_request_add_header(directory_request_t *req, + const char *key, + const char *val); +MOCK_DECL(void, directory_initiate_request, (directory_request_t *request)); + +int parse_http_response(const char *headers, int *code, time_t *date, + enum compress_method_t *compression, char **response); +int parse_http_command(const char *headers, + char **command_out, char **url_out); +char *http_get_header(const char *headers, const char *which); + +int connection_dir_is_encrypted(const dir_connection_t *conn); +int connection_dir_reached_eof(dir_connection_t *conn); +int connection_dir_process_inbuf(dir_connection_t *conn); +int connection_dir_finished_flushing(dir_connection_t *conn); +int connection_dir_finished_connecting(dir_connection_t *conn); +void connection_dir_about_to_close(dir_connection_t *dir_conn); + +#define DSR_HEX (1<<0) +#define DSR_BASE64 (1<<1) +#define DSR_DIGEST256 (1<<2) +#define DSR_SORT_UNIQ (1<<3) +int dir_split_resource_into_fingerprints(const char *resource, + smartlist_t *fp_out, int *compressed_out, + int flags); +enum dir_spool_source_t; +int dir_split_resource_into_spoolable(const char *resource, + enum dir_spool_source_t source, + smartlist_t *spool_out, + int *compressed_out, + int flags); +int dir_split_resource_into_fingerprint_pairs(const char *res, + smartlist_t *pairs_out); +char *directory_dump_request_log(void); +void note_request(const char *key, size_t bytes); +int router_supports_extrainfo(const char *identity_digest, int is_authority); + +time_t download_status_increment_failure(download_status_t *dls, + int status_code, const char *item, + int server, time_t now); +time_t download_status_increment_attempt(download_status_t *dls, + const char *item, time_t now); +/** Increment the failure count of the download_status_t <b>dls</b>, with + * the optional status code <b>sc</b>. */ +#define download_status_failed(dls, sc) \ + download_status_increment_failure((dls), (sc), NULL, \ + dir_server_mode(get_options()), \ + time(NULL)) + +void download_status_reset(download_status_t *dls); +int download_status_is_ready(download_status_t *dls, time_t now); +time_t download_status_get_next_attempt_at(const download_status_t *dls); +void download_status_mark_impossible(download_status_t *dl); + +int download_status_get_n_failures(const download_status_t *dls); +int download_status_get_n_attempts(const download_status_t *dls); + +int purpose_needs_anonymity(uint8_t dir_purpose, uint8_t router_purpose, + const char *resource); + +#ifdef DIRECTORY_PRIVATE + +/** A structure to hold arguments passed into each directory response + * handler */ +typedef struct response_handler_args_t { + int status_code; + const char *reason; + const char *body; + size_t body_len; + const char *headers; +} response_handler_args_t; + +struct directory_request_t { + /** + * These fields specify which directory we're contacting. Routerstatus, + * if present, overrides the other fields. + * + * @{ */ + tor_addr_port_t or_addr_port; + tor_addr_port_t dir_addr_port; + char digest[DIGEST_LEN]; + + const routerstatus_t *routerstatus; + /** @} */ + /** One of DIR_PURPOSE_* other than DIR_PURPOSE_SERVER. Describes what + * kind of operation we'll be doing (upload/download), and of what kind + * of document. */ + uint8_t dir_purpose; + /** One of ROUTER_PURPOSE_*; used for uploads and downloads of routerinfo + * and extrainfo docs. */ + uint8_t router_purpose; + /** Enum: determines whether to anonymize, and whether to use dirport or + * orport. */ + dir_indirection_t indirection; + /** Alias to the variable part of the URL for this request */ + const char *resource; + /** Alias to the payload to upload (if any) */ + const char *payload; + /** Number of bytes to upload from payload</b> */ + size_t payload_len; + /** Value to send in an if-modified-since header, or 0 for none. */ + time_t if_modified_since; + /** Hidden-service-specific information v2. */ + const rend_data_t *rend_query; + /** Extra headers to append to the request */ + struct config_line_t *additional_headers; + /** Hidden-service-specific information for v3+. */ + const hs_ident_dir_conn_t *hs_ident; + /** Used internally to directory.c: gets informed when the attempt to + * connect to the directory succeeds or fails, if that attempt bears on the + * directory's usability as a directory guard. */ + struct circuit_guard_state_t *guard_state; +}; + +struct get_handler_args_t; +STATIC int handle_get_hs_descriptor_v3(dir_connection_t *conn, + const struct get_handler_args_t *args); +STATIC int directory_handle_command(dir_connection_t *conn); +STATIC char *accept_encoding_header(void); +STATIC int allowed_anonymous_connection_compression_method( + enum compress_method_t); +STATIC void warn_disallowed_anonymous_compression_method( + enum compress_method_t); + +STATIC int handle_response_fetch_hsdesc_v3(dir_connection_t *conn, + const response_handler_args_t *args); +STATIC int handle_response_fetch_microdesc(dir_connection_t *conn, + const response_handler_args_t *args); + +STATIC int handle_response_fetch_consensus(dir_connection_t *conn, + const response_handler_args_t *args); + +#endif /* defined(DIRECTORY_PRIVATE) */ + +#ifdef TOR_UNIT_TESTS +/* Used only by test_dir.c and test_hs_cache.c */ + +STATIC int parse_http_url(const char *headers, char **url); +STATIC dirinfo_type_t dir_fetch_type(int dir_purpose, int router_purpose, + const char *resource); +MOCK_DECL(STATIC int, directory_handle_command_get,(dir_connection_t *conn, + const char *headers, + const char *req_body, + size_t req_body_len)); +MOCK_DECL(STATIC int, directory_handle_command_post,(dir_connection_t *conn, + const char *headers, + const char *body, + size_t body_len)); +STATIC int download_status_schedule_get_delay(download_status_t *dls, + int min_delay, + time_t now); + +STATIC int handle_post_hs_descriptor(const char *url, const char *body); + +STATIC char* authdir_type_to_string(dirinfo_type_t auth); +STATIC const char * dir_conn_purpose_to_string(int purpose); +STATIC int should_use_directory_guards(const or_options_t *options); +enum compression_level_t; +STATIC enum compression_level_t choose_compression_level(ssize_t n_bytes); +STATIC int find_dl_min_delay(const download_status_t *dls, + const or_options_t *options); + +STATIC int next_random_exponential_delay(int delay, + int base_delay); + +STATIC void next_random_exponential_delay_range(int *low_bound_out, + int *high_bound_out, + int delay, + int base_delay); + +STATIC int parse_hs_version_from_post(const char *url, const char *prefix, + const char **end_pos); + +STATIC unsigned parse_accept_encoding_header(const char *h); +#endif /* defined(TOR_UNIT_TESTS) */ + +#if defined(TOR_UNIT_TESTS) || defined(DIRECTORY_PRIVATE) +/* Used only by directory.c and test_dir.c */ + +/* no more than quadruple the previous delay (multiplier + 1) */ +#define DIR_DEFAULT_RANDOM_MULTIPLIER (3) +/* no more than triple the previous delay */ +#define DIR_TEST_NET_RANDOM_MULTIPLIER (2) + +#endif /* defined(TOR_UNIT_TESTS) || defined(DIRECTORY_PRIVATE) */ + +#endif /* !defined(TOR_DIRECTORY_H) */ diff --git a/src/feature/dircache/dirserv.c b/src/feature/dircache/dirserv.c new file mode 100644 index 0000000000..c5286b0cbf --- /dev/null +++ b/src/feature/dircache/dirserv.c @@ -0,0 +1,3598 @@ +/* Copyright (c) 2001-2004, Roger Dingledine. + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#define DIRSERV_PRIVATE +#include "core/or/or.h" +#include "lib/container/buffers.h" +#include "app/config/config.h" +#include "app/config/confparse.h" +#include "core/or/channel.h" +#include "core/or/channeltls.h" +#include "core/or/command.h" +#include "core/mainloop/connection.h" +#include "core/or/connection_or.h" +#include "feature/dircache/conscache.h" +#include "feature/dircache/consdiffmgr.h" +#include "feature/control/control.h" +#include "feature/dircache/directory.h" +#include "feature/dircache/dirserv.h" +#include "feature/hibernate/hibernate.h" +#include "feature/dirauth/keypin.h" +#include "core/mainloop/main.h" +#include "feature/nodelist/microdesc.h" +#include "feature/nodelist/networkstatus.h" +#include "feature/nodelist/nodelist.h" +#include "core/or/policies.h" +#include "core/or/protover.h" +#include "feature/stats/rephist.h" +#include "feature/relay/router.h" +#include "feature/nodelist/routerlist.h" +#include "feature/nodelist/routerparse.h" +#include "feature/nodelist/routerset.h" +#include "feature/nodelist/torcert.h" +#include "feature/dircommon/voting_schedule.h" + +#include "feature/dirauth/dirvote.h" + +#include "feature/dircache/cached_dir_st.h" +#include "feature/dircommon/dir_connection_st.h" +#include "feature/nodelist/extrainfo_st.h" +#include "feature/nodelist/microdesc_st.h" +#include "feature/nodelist/node_st.h" +#include "feature/nodelist/routerinfo_st.h" +#include "feature/nodelist/routerlist_st.h" +#include "core/or/tor_version_st.h" +#include "feature/nodelist/vote_routerstatus_st.h" + +#include "lib/compress/compress.h" +#include "lib/container/order.h" +#include "lib/crypt_ops/crypto_format.h" +#include "lib/encoding/confline.h" + +/** + * \file dirserv.c + * \brief Directory server core implementation. Manages directory + * contents and generates directory documents. + * + * This module implements most of directory cache functionality, and some of + * the directory authority functionality. The directory.c module delegates + * here in order to handle incoming requests from clients, via + * connection_dirserv_flushed_some() and its kin. In order to save RAM, this + * module is responsible for spooling directory objects (in whole or in part) + * onto buf_t instances, and then closing the dir_connection_t once the + * objects are totally flushed. + * + * The directory.c module also delegates here for handling descriptor uploads + * via dirserv_add_multiple_descriptors(). + * + * Additionally, this module handles some aspects of voting, including: + * deciding how to vote on individual flags (based on decisions reached in + * rephist.c), of formatting routerstatus lines, and deciding what relays to + * include in an authority's vote. (TODO: Those functions could profitably be + * split off. They only live in this file because historically they were + * shared among the v1, v2, and v3 directory code.) + */ + +/** How far in the future do we allow a router to get? (seconds) */ +#define ROUTER_ALLOW_SKEW (60*60*12) +/** How many seconds do we wait before regenerating the directory? */ +#define DIR_REGEN_SLACK_TIME 30 +/** If we're a cache, keep this many networkstatuses around from non-trusted + * directory authorities. */ +#define MAX_UNTRUSTED_NETWORKSTATUSES 16 + +/** Total number of routers with measured bandwidth; this is set by + * dirserv_count_measured_bws() before the loop in + * dirserv_generate_networkstatus_vote_obj() and checked by + * dirserv_get_credible_bandwidth() and + * dirserv_compute_performance_thresholds() */ +static int routers_with_measured_bw = 0; + +static void directory_remove_invalid(void); +struct authdir_config_t; +static uint32_t +dirserv_get_status_impl(const char *fp, const char *nickname, + uint32_t addr, uint16_t or_port, + const char *platform, const char **msg, + int severity); +static void clear_cached_dir(cached_dir_t *d); +static const signed_descriptor_t *get_signed_descriptor_by_fp( + const uint8_t *fp, + int extrainfo); +static was_router_added_t dirserv_add_extrainfo(extrainfo_t *ei, + const char **msg); +static uint32_t dirserv_get_credible_bandwidth_kb(const routerinfo_t *ri); + +static int spooled_resource_lookup_body(const spooled_resource_t *spooled, + int conn_is_encrypted, + const uint8_t **body_out, + size_t *size_out, + time_t *published_out); +static cached_dir_t *spooled_resource_lookup_cached_dir( + const spooled_resource_t *spooled, + time_t *published_out); +static cached_dir_t *lookup_cached_dir_by_fp(const uint8_t *fp); + +/************** Fingerprint handling code ************/ + +/* 1 Historically used to indicate Named */ +#define FP_INVALID 2 /**< Believed invalid. */ +#define FP_REJECT 4 /**< We will not publish this router. */ +/* 8 Historically used to avoid using this as a dir. */ +#define FP_BADEXIT 16 /**< We'll tell clients not to use this as an exit. */ +/* 32 Historically used to indicade Unnamed */ + +/** Target of status_by_digest map. */ +typedef uint32_t router_status_t; + +static void add_fingerprint_to_dir(const char *fp, + struct authdir_config_t *list, + router_status_t add_status); + +/** List of nickname-\>identity fingerprint mappings for all the routers + * that we name. Used to prevent router impersonation. */ +typedef struct authdir_config_t { + strmap_t *fp_by_name; /**< Map from lc nickname to fingerprint. */ + digestmap_t *status_by_digest; /**< Map from digest to router_status_t. */ +} authdir_config_t; + +/** Should be static; exposed for testing. */ +static authdir_config_t *fingerprint_list = NULL; + +/** Allocate and return a new, empty, authdir_config_t. */ +static authdir_config_t * +authdir_config_new(void) +{ + authdir_config_t *list = tor_malloc_zero(sizeof(authdir_config_t)); + list->fp_by_name = strmap_new(); + list->status_by_digest = digestmap_new(); + return list; +} + +/** Add the fingerprint <b>fp</b> to the smartlist of fingerprint_entry_t's + * <b>list</b>, or-ing the currently set status flags with + * <b>add_status</b>. + */ +/* static */ void +add_fingerprint_to_dir(const char *fp, authdir_config_t *list, + router_status_t add_status) +{ + char *fingerprint; + char d[DIGEST_LEN]; + router_status_t *status; + tor_assert(fp); + tor_assert(list); + + fingerprint = tor_strdup(fp); + tor_strstrip(fingerprint, " "); + if (base16_decode(d, DIGEST_LEN, + fingerprint, strlen(fingerprint)) != DIGEST_LEN) { + log_warn(LD_DIRSERV, "Couldn't decode fingerprint \"%s\"", + escaped(fp)); + tor_free(fingerprint); + return; + } + + status = digestmap_get(list->status_by_digest, d); + if (!status) { + status = tor_malloc_zero(sizeof(router_status_t)); + digestmap_set(list->status_by_digest, d, status); + } + + tor_free(fingerprint); + *status |= add_status; + return; +} + +/** Add the fingerprint for this OR to the global list of recognized + * identity key fingerprints. */ +int +dirserv_add_own_fingerprint(crypto_pk_t *pk) +{ + char fp[FINGERPRINT_LEN+1]; + if (crypto_pk_get_fingerprint(pk, fp, 0)<0) { + log_err(LD_BUG, "Error computing fingerprint"); + return -1; + } + if (!fingerprint_list) + fingerprint_list = authdir_config_new(); + add_fingerprint_to_dir(fp, fingerprint_list, 0); + return 0; +} + +/** Load the nickname-\>fingerprint mappings stored in the approved-routers + * file. The file format is line-based, with each non-blank holding one + * nickname, some space, and a fingerprint for that nickname. On success, + * replace the current fingerprint list with the new list and return 0. On + * failure, leave the current fingerprint list untouched, and return -1. */ +int +dirserv_load_fingerprint_file(void) +{ + char *fname; + char *cf; + char *nickname, *fingerprint; + authdir_config_t *fingerprint_list_new; + int result; + config_line_t *front=NULL, *list; + + fname = get_datadir_fname("approved-routers"); + log_info(LD_GENERAL, + "Reloading approved fingerprints from \"%s\"...", fname); + + cf = read_file_to_str(fname, RFTS_IGNORE_MISSING, NULL); + if (!cf) { + log_warn(LD_FS, "Cannot open fingerprint file '%s'. That's ok.", fname); + tor_free(fname); + return 0; + } + tor_free(fname); + + result = config_get_lines(cf, &front, 0); + tor_free(cf); + if (result < 0) { + log_warn(LD_CONFIG, "Error reading from fingerprint file"); + return -1; + } + + fingerprint_list_new = authdir_config_new(); + + for (list=front; list; list=list->next) { + char digest_tmp[DIGEST_LEN]; + router_status_t add_status = 0; + nickname = list->key; fingerprint = list->value; + tor_strstrip(fingerprint, " "); /* remove spaces */ + if (strlen(fingerprint) != HEX_DIGEST_LEN || + base16_decode(digest_tmp, sizeof(digest_tmp), + fingerprint, HEX_DIGEST_LEN) != sizeof(digest_tmp)) { + log_notice(LD_CONFIG, + "Invalid fingerprint (nickname '%s', " + "fingerprint %s). Skipping.", + nickname, fingerprint); + continue; + } + if (!strcasecmp(nickname, "!reject")) { + add_status = FP_REJECT; + } else if (!strcasecmp(nickname, "!badexit")) { + add_status = FP_BADEXIT; + } else if (!strcasecmp(nickname, "!invalid")) { + add_status = FP_INVALID; + } + add_fingerprint_to_dir(fingerprint, fingerprint_list_new, add_status); + } + + config_free_lines(front); + dirserv_free_fingerprint_list(); + fingerprint_list = fingerprint_list_new; + /* Delete any routers whose fingerprints we no longer recognize */ + directory_remove_invalid(); + return 0; +} + +/* If this is set, then we don't allow routers that have advertised an Ed25519 + * identity to stop doing so. This is going to be essential for good identity + * security: otherwise anybody who can attack RSA-1024 but not Ed25519 could + * just sign fake descriptors missing the Ed25519 key. But we won't actually + * be able to prevent that kind of thing until we're confident that there isn't + * actually a legit reason to downgrade to 0.2.5. Now we are not recommending + * 0.2.5 anymore so there is no reason to keep the #undef. + */ + +#define DISABLE_DISABLING_ED25519 + +/** Check whether <b>router</b> has a nickname/identity key combination that + * we recognize from the fingerprint list, or an IP we automatically act on + * according to our configuration. Return the appropriate router status. + * + * If the status is 'FP_REJECT' and <b>msg</b> is provided, set + * *<b>msg</b> to an explanation of why. */ +uint32_t +dirserv_router_get_status(const routerinfo_t *router, const char **msg, + int severity) +{ + char d[DIGEST_LEN]; + const int key_pinning = get_options()->AuthDirPinKeys; + + if (crypto_pk_get_digest(router->identity_pkey, d)) { + log_warn(LD_BUG,"Error computing fingerprint"); + if (msg) + *msg = "Bug: Error computing fingerprint"; + return FP_REJECT; + } + + /* Check for the more usual versions to reject a router first. */ + const uint32_t r = dirserv_get_status_impl(d, router->nickname, + router->addr, router->or_port, + router->platform, msg, severity); + if (r) + return r; + + /* dirserv_get_status_impl already rejects versions older than 0.2.4.18-rc, + * and onion_curve25519_pkey was introduced in 0.2.4.8-alpha. + * But just in case a relay doesn't provide or lies about its version, or + * doesn't include an ntor key in its descriptor, check that it exists, + * and is non-zero (clients check that it's non-zero before using it). */ + if (!routerinfo_has_curve25519_onion_key(router)) { + log_fn(severity, LD_DIR, + "Descriptor from router %s is missing an ntor curve25519 onion " + "key.", router_describe(router)); + if (msg) + *msg = "Missing ntor curve25519 onion key. Please upgrade!"; + return FP_REJECT; + } + + if (router->cache_info.signing_key_cert) { + /* This has an ed25519 identity key. */ + if (KEYPIN_MISMATCH == + keypin_check((const uint8_t*)router->cache_info.identity_digest, + router->cache_info.signing_key_cert->signing_key.pubkey)) { + log_fn(severity, LD_DIR, + "Descriptor from router %s has an Ed25519 key, " + "but the <rsa,ed25519> keys don't match what they were before.", + router_describe(router)); + if (key_pinning) { + if (msg) { + *msg = "Ed25519 identity key or RSA identity key has changed."; + } + return FP_REJECT; + } + } + } else { + /* No ed25519 key */ + if (KEYPIN_MISMATCH == keypin_check_lone_rsa( + (const uint8_t*)router->cache_info.identity_digest)) { + log_fn(severity, LD_DIR, + "Descriptor from router %s has no Ed25519 key, " + "when we previously knew an Ed25519 for it. Ignoring for now, " + "since Ed25519 keys are fairly new.", + router_describe(router)); +#ifdef DISABLE_DISABLING_ED25519 + if (key_pinning) { + if (msg) { + *msg = "Ed25519 identity key has disappeared."; + } + return FP_REJECT; + } +#endif /* defined(DISABLE_DISABLING_ED25519) */ + } + } + + return 0; +} + +/** Return true if there is no point in downloading the router described by + * <b>rs</b> because this directory would reject it. */ +int +dirserv_would_reject_router(const routerstatus_t *rs) +{ + uint32_t res; + + res = dirserv_get_status_impl(rs->identity_digest, rs->nickname, + rs->addr, rs->or_port, + NULL, NULL, LOG_DEBUG); + + return (res & FP_REJECT) != 0; +} + +/** Helper: As dirserv_router_get_status, but takes the router fingerprint + * (hex, no spaces), nickname, address (used for logging only), IP address, OR + * port and platform (logging only) as arguments. + * + * Log messages at 'severity'. (There's not much point in + * logging that we're rejecting servers we'll not download.) + */ +static uint32_t +dirserv_get_status_impl(const char *id_digest, const char *nickname, + uint32_t addr, uint16_t or_port, + const char *platform, const char **msg, int severity) +{ + uint32_t result = 0; + router_status_t *status_by_digest; + + if (!fingerprint_list) + fingerprint_list = authdir_config_new(); + + log_debug(LD_DIRSERV, "%d fingerprints, %d digests known.", + strmap_size(fingerprint_list->fp_by_name), + digestmap_size(fingerprint_list->status_by_digest)); + + if (platform) { + tor_version_t ver_tmp; + if (tor_version_parse_platform(platform, &ver_tmp, 1) < 0) { + if (msg) { + *msg = "Malformed platform string."; + } + return FP_REJECT; + } + } + + /* Versions before Tor 0.2.4.18-rc are too old to support, and are + * missing some important security fixes too. Disable them. */ + if (platform && !tor_version_as_new_as(platform,"0.2.4.18-rc")) { + if (msg) + *msg = "Tor version is insecure or unsupported. Please upgrade!"; + return FP_REJECT; + } + + /* Tor 0.2.9.x where x<5 suffers from bug #20499, where relays don't + * keep their consensus up to date so they make bad guards. + * The simple fix is to just drop them from the network. */ + if (platform && + tor_version_as_new_as(platform,"0.2.9.0-alpha") && + !tor_version_as_new_as(platform,"0.2.9.5-alpha")) { + if (msg) + *msg = "Tor version contains bug 20499. Please upgrade!"; + return FP_REJECT; + } + + status_by_digest = digestmap_get(fingerprint_list->status_by_digest, + id_digest); + if (status_by_digest) + result |= *status_by_digest; + + if (result & FP_REJECT) { + if (msg) + *msg = "Fingerprint is marked rejected -- if you think this is a " + "mistake please set a valid email address in ContactInfo and " + "send an email to bad-relays@lists.torproject.org mentioning " + "your fingerprint(s)?"; + return FP_REJECT; + } else if (result & FP_INVALID) { + if (msg) + *msg = "Fingerprint is marked invalid"; + } + + if (authdir_policy_badexit_address(addr, or_port)) { + log_fn(severity, LD_DIRSERV, + "Marking '%s' as bad exit because of address '%s'", + nickname, fmt_addr32(addr)); + result |= FP_BADEXIT; + } + + if (!authdir_policy_permits_address(addr, or_port)) { + log_fn(severity, LD_DIRSERV, "Rejecting '%s' because of address '%s'", + nickname, fmt_addr32(addr)); + if (msg) + *msg = "Suspicious relay address range -- if you think this is a " + "mistake please set a valid email address in ContactInfo and " + "send an email to bad-relays@lists.torproject.org mentioning " + "your address(es) and fingerprint(s)?"; + return FP_REJECT; + } + if (!authdir_policy_valid_address(addr, or_port)) { + log_fn(severity, LD_DIRSERV, + "Not marking '%s' valid because of address '%s'", + nickname, fmt_addr32(addr)); + result |= FP_INVALID; + } + + return result; +} + +/** Clear the current fingerprint list. */ +void +dirserv_free_fingerprint_list(void) +{ + if (!fingerprint_list) + return; + + strmap_free(fingerprint_list->fp_by_name, tor_free_); + digestmap_free(fingerprint_list->status_by_digest, tor_free_); + tor_free(fingerprint_list); +} + +/* + * Descriptor list + */ + +/** Return -1 if <b>ri</b> has a private or otherwise bad address, + * unless we're configured to not care. Return 0 if all ok. */ +static int +dirserv_router_has_valid_address(routerinfo_t *ri) +{ + tor_addr_t addr; + if (get_options()->DirAllowPrivateAddresses) + return 0; /* whatever it is, we're fine with it */ + tor_addr_from_ipv4h(&addr, ri->addr); + + if (tor_addr_is_internal(&addr, 0)) { + log_info(LD_DIRSERV, + "Router %s published internal IP address. Refusing.", + router_describe(ri)); + return -1; /* it's a private IP, we should reject it */ + } + return 0; +} + +/** Check whether we, as a directory server, want to accept <b>ri</b>. If so, + * set its is_valid,running fields and return 0. Otherwise, return -1. + * + * If the router is rejected, set *<b>msg</b> to an explanation of why. + * + * If <b>complain</b> then explain at log-level 'notice' why we refused + * a descriptor; else explain at log-level 'info'. + */ +int +authdir_wants_to_reject_router(routerinfo_t *ri, const char **msg, + int complain, int *valid_out) +{ + /* Okay. Now check whether the fingerprint is recognized. */ + time_t now; + int severity = (complain && ri->contact_info) ? LOG_NOTICE : LOG_INFO; + uint32_t status = dirserv_router_get_status(ri, msg, severity); + tor_assert(msg); + if (status & FP_REJECT) + return -1; /* msg is already set. */ + + /* Is there too much clock skew? */ + now = time(NULL); + if (ri->cache_info.published_on > now+ROUTER_ALLOW_SKEW) { + log_fn(severity, LD_DIRSERV, "Publication time for %s is too " + "far (%d minutes) in the future; possible clock skew. Not adding " + "(%s)", + router_describe(ri), + (int)((ri->cache_info.published_on-now)/60), + esc_router_info(ri)); + *msg = "Rejected: Your clock is set too far in the future, or your " + "timezone is not correct."; + return -1; + } + if (ri->cache_info.published_on < now-ROUTER_MAX_AGE_TO_PUBLISH) { + log_fn(severity, LD_DIRSERV, + "Publication time for %s is too far " + "(%d minutes) in the past. Not adding (%s)", + router_describe(ri), + (int)((now-ri->cache_info.published_on)/60), + esc_router_info(ri)); + *msg = "Rejected: Server is expired, or your clock is too far in the past," + " or your timezone is not correct."; + return -1; + } + if (dirserv_router_has_valid_address(ri) < 0) { + log_fn(severity, LD_DIRSERV, + "Router %s has invalid address. Not adding (%s).", + router_describe(ri), + esc_router_info(ri)); + *msg = "Rejected: Address is a private address."; + return -1; + } + + *valid_out = ! (status & FP_INVALID); + + return 0; +} + +/** Update the relevant flags of <b>node</b> based on our opinion as a + * directory authority in <b>authstatus</b>, as returned by + * dirserv_router_get_status or equivalent. */ +void +dirserv_set_node_flags_from_authoritative_status(node_t *node, + uint32_t authstatus) +{ + node->is_valid = (authstatus & FP_INVALID) ? 0 : 1; + node->is_bad_exit = (authstatus & FP_BADEXIT) ? 1 : 0; +} + +/** True iff <b>a</b> is more severe than <b>b</b>. */ +static int +WRA_MORE_SEVERE(was_router_added_t a, was_router_added_t b) +{ + return a < b; +} + +/** As for dirserv_add_descriptor(), but accepts multiple documents, and + * returns the most severe error that occurred for any one of them. */ +was_router_added_t +dirserv_add_multiple_descriptors(const char *desc, uint8_t purpose, + const char *source, + const char **msg) +{ + was_router_added_t r, r_tmp; + const char *msg_out; + smartlist_t *list; + const char *s; + int n_parsed = 0; + time_t now = time(NULL); + char annotation_buf[ROUTER_ANNOTATION_BUF_LEN]; + char time_buf[ISO_TIME_LEN+1]; + int general = purpose == ROUTER_PURPOSE_GENERAL; + tor_assert(msg); + + r=ROUTER_ADDED_SUCCESSFULLY; /*Least severe return value. */ + + format_iso_time(time_buf, now); + if (tor_snprintf(annotation_buf, sizeof(annotation_buf), + "@uploaded-at %s\n" + "@source %s\n" + "%s%s%s", time_buf, escaped(source), + !general ? "@purpose " : "", + !general ? router_purpose_to_string(purpose) : "", + !general ? "\n" : "")<0) { + *msg = "Couldn't format annotations"; + /* XXX Not cool: we return -1 below, but (was_router_added_t)-1 is + * ROUTER_BAD_EI, which isn't what's gone wrong here. :( */ + return -1; + } + + s = desc; + list = smartlist_new(); + if (!router_parse_list_from_string(&s, NULL, list, SAVED_NOWHERE, 0, 0, + annotation_buf, NULL)) { + SMARTLIST_FOREACH(list, routerinfo_t *, ri, { + msg_out = NULL; + tor_assert(ri->purpose == purpose); + r_tmp = dirserv_add_descriptor(ri, &msg_out, source); + if (WRA_MORE_SEVERE(r_tmp, r)) { + r = r_tmp; + *msg = msg_out; + } + }); + } + n_parsed += smartlist_len(list); + smartlist_clear(list); + + s = desc; + if (!router_parse_list_from_string(&s, NULL, list, SAVED_NOWHERE, 1, 0, + NULL, NULL)) { + SMARTLIST_FOREACH(list, extrainfo_t *, ei, { + msg_out = NULL; + + r_tmp = dirserv_add_extrainfo(ei, &msg_out); + if (WRA_MORE_SEVERE(r_tmp, r)) { + r = r_tmp; + *msg = msg_out; + } + }); + } + n_parsed += smartlist_len(list); + smartlist_free(list); + + if (! *msg) { + if (!n_parsed) { + *msg = "No descriptors found in your POST."; + if (WRA_WAS_ADDED(r)) + r = ROUTER_IS_ALREADY_KNOWN; + } else { + *msg = "(no message)"; + } + } + + return r; +} + +/** Examine the parsed server descriptor in <b>ri</b> and maybe insert it into + * the list of server descriptors. Set *<b>msg</b> to a message that should be + * passed back to the origin of this descriptor, or NULL if there is no such + * message. Use <b>source</b> to produce better log messages. + * + * If <b>ri</b> is not added to the list of server descriptors, free it. + * That means the caller must not access <b>ri</b> after this function + * returns, since it might have been freed. + * + * Return the status of the operation. + * + * This function is only called when fresh descriptors are posted, not when + * we re-load the cache. + */ +was_router_added_t +dirserv_add_descriptor(routerinfo_t *ri, const char **msg, const char *source) +{ + was_router_added_t r; + routerinfo_t *ri_old; + char *desc, *nickname; + const size_t desclen = ri->cache_info.signed_descriptor_len + + ri->cache_info.annotations_len; + const int key_pinning = get_options()->AuthDirPinKeys; + *msg = NULL; + + /* If it's too big, refuse it now. Otherwise we'll cache it all over the + * network and it'll clog everything up. */ + if (ri->cache_info.signed_descriptor_len > MAX_DESCRIPTOR_UPLOAD_SIZE) { + log_notice(LD_DIR, "Somebody attempted to publish a router descriptor '%s'" + " (source: %s) with size %d. Either this is an attack, or the " + "MAX_DESCRIPTOR_UPLOAD_SIZE (%d) constant is too low.", + ri->nickname, source, (int)ri->cache_info.signed_descriptor_len, + MAX_DESCRIPTOR_UPLOAD_SIZE); + *msg = "Router descriptor was too large."; + r = ROUTER_AUTHDIR_REJECTS; + goto fail; + } + + /* Check whether this descriptor is semantically identical to the last one + * from this server. (We do this here and not in router_add_to_routerlist + * because we want to be able to accept the newest router descriptor that + * another authority has, so we all converge on the same one.) */ + ri_old = router_get_mutable_by_digest(ri->cache_info.identity_digest); + if (ri_old && ri_old->cache_info.published_on < ri->cache_info.published_on + && router_differences_are_cosmetic(ri_old, ri) + && !router_is_me(ri)) { + log_info(LD_DIRSERV, + "Not replacing descriptor from %s (source: %s); " + "differences are cosmetic.", + router_describe(ri), source); + *msg = "Not replacing router descriptor; no information has changed since " + "the last one with this identity."; + r = ROUTER_IS_ALREADY_KNOWN; + goto fail; + } + + /* Do keypinning again ... this time, to add the pin if appropriate */ + int keypin_status; + if (ri->cache_info.signing_key_cert) { + ed25519_public_key_t *pkey = &ri->cache_info.signing_key_cert->signing_key; + /* First let's validate this pubkey before pinning it */ + if (ed25519_validate_pubkey(pkey) < 0) { + log_warn(LD_DIRSERV, "Received bad key from %s (source %s)", + router_describe(ri), source); + routerinfo_free(ri); + return ROUTER_AUTHDIR_REJECTS; + } + + /* Now pin it! */ + keypin_status = keypin_check_and_add( + (const uint8_t*)ri->cache_info.identity_digest, + pkey->pubkey, ! key_pinning); + } else { + keypin_status = keypin_check_lone_rsa( + (const uint8_t*)ri->cache_info.identity_digest); +#ifndef DISABLE_DISABLING_ED25519 + if (keypin_status == KEYPIN_MISMATCH) + keypin_status = KEYPIN_NOT_FOUND; +#endif + } + if (keypin_status == KEYPIN_MISMATCH && key_pinning) { + log_info(LD_DIRSERV, "Dropping descriptor from %s (source: %s) because " + "its key did not match an older RSA/Ed25519 keypair", + router_describe(ri), source); + *msg = "Looks like your keypair has changed? This authority previously " + "recorded a different RSA identity for this Ed25519 identity (or vice " + "versa.) Did you replace or copy some of your key files, but not " + "the others? You should either restore the expected keypair, or " + "delete your keys and restart Tor to start your relay with a new " + "identity."; + r = ROUTER_AUTHDIR_REJECTS; + goto fail; + } + + /* Make a copy of desc, since router_add_to_routerlist might free + * ri and its associated signed_descriptor_t. */ + desc = tor_strndup(ri->cache_info.signed_descriptor_body, desclen); + nickname = tor_strdup(ri->nickname); + + /* Tell if we're about to need to launch a test if we add this. */ + ri->needs_retest_if_added = + dirserv_should_launch_reachability_test(ri, ri_old); + + r = router_add_to_routerlist(ri, msg, 0, 0); + if (!WRA_WAS_ADDED(r)) { + /* unless the routerinfo was fine, just out-of-date */ + log_info(LD_DIRSERV, + "Did not add descriptor from '%s' (source: %s): %s.", + nickname, source, *msg ? *msg : "(no message)"); + } else { + smartlist_t *changed; + + changed = smartlist_new(); + smartlist_add(changed, ri); + routerlist_descriptors_added(changed, 0); + smartlist_free(changed); + if (!*msg) { + *msg = "Descriptor accepted"; + } + log_info(LD_DIRSERV, + "Added descriptor from '%s' (source: %s): %s.", + nickname, source, *msg); + } + tor_free(desc); + tor_free(nickname); + return r; + fail: + { + const char *desc_digest = ri->cache_info.signed_descriptor_digest; + download_status_t *dls = + router_get_dl_status_by_descriptor_digest(desc_digest); + if (dls) { + log_info(LD_GENERAL, "Marking router with descriptor %s as rejected, " + "and therefore undownloadable", + hex_str(desc_digest, DIGEST_LEN)); + download_status_mark_impossible(dls); + } + routerinfo_free(ri); + } + return r; +} + +/** As dirserv_add_descriptor, but for an extrainfo_t <b>ei</b>. */ +static was_router_added_t +dirserv_add_extrainfo(extrainfo_t *ei, const char **msg) +{ + routerinfo_t *ri; + int r; + was_router_added_t rv; + tor_assert(msg); + *msg = NULL; + + /* Needs to be mutable so routerinfo_incompatible_with_extrainfo + * can mess with some of the flags in ri->cache_info. */ + ri = router_get_mutable_by_digest(ei->cache_info.identity_digest); + if (!ri) { + *msg = "No corresponding router descriptor for extra-info descriptor"; + rv = ROUTER_BAD_EI; + goto fail; + } + + /* If it's too big, refuse it now. Otherwise we'll cache it all over the + * network and it'll clog everything up. */ + if (ei->cache_info.signed_descriptor_len > MAX_EXTRAINFO_UPLOAD_SIZE) { + log_notice(LD_DIR, "Somebody attempted to publish an extrainfo " + "with size %d. Either this is an attack, or the " + "MAX_EXTRAINFO_UPLOAD_SIZE (%d) constant is too low.", + (int)ei->cache_info.signed_descriptor_len, + MAX_EXTRAINFO_UPLOAD_SIZE); + *msg = "Extrainfo document was too large"; + rv = ROUTER_BAD_EI; + goto fail; + } + + if ((r = routerinfo_incompatible_with_extrainfo(ri->identity_pkey, ei, + &ri->cache_info, msg))) { + if (r<0) { + extrainfo_free(ei); + return ROUTER_IS_ALREADY_KNOWN; + } + rv = ROUTER_BAD_EI; + goto fail; + } + router_add_extrainfo_to_routerlist(ei, msg, 0, 0); + return ROUTER_ADDED_SUCCESSFULLY; + fail: + { + const char *d = ei->cache_info.signed_descriptor_digest; + signed_descriptor_t *sd = router_get_by_extrainfo_digest((char*)d); + if (sd) { + log_info(LD_GENERAL, "Marking extrainfo with descriptor %s as " + "rejected, and therefore undownloadable", + hex_str((char*)d,DIGEST_LEN)); + download_status_mark_impossible(&sd->ei_dl_status); + } + extrainfo_free(ei); + } + return rv; +} + +/** Remove all descriptors whose nicknames or fingerprints no longer + * are allowed by our fingerprint list. (Descriptors that used to be + * good can become bad when we reload the fingerprint list.) + */ +static void +directory_remove_invalid(void) +{ + routerlist_t *rl = router_get_routerlist(); + smartlist_t *nodes = smartlist_new(); + smartlist_add_all(nodes, nodelist_get_list()); + + SMARTLIST_FOREACH_BEGIN(nodes, node_t *, node) { + const char *msg = NULL; + const char *description; + routerinfo_t *ent = node->ri; + uint32_t r; + if (!ent) + continue; + r = dirserv_router_get_status(ent, &msg, LOG_INFO); + description = router_describe(ent); + if (r & FP_REJECT) { + log_info(LD_DIRSERV, "Router %s is now rejected: %s", + description, msg?msg:""); + routerlist_remove(rl, ent, 0, time(NULL)); + continue; + } + if (bool_neq((r & FP_INVALID), !node->is_valid)) { + log_info(LD_DIRSERV, "Router '%s' is now %svalid.", description, + (r&FP_INVALID) ? "in" : ""); + node->is_valid = (r&FP_INVALID)?0:1; + } + if (bool_neq((r & FP_BADEXIT), node->is_bad_exit)) { + log_info(LD_DIRSERV, "Router '%s' is now a %s exit", description, + (r & FP_BADEXIT) ? "bad" : "good"); + node->is_bad_exit = (r&FP_BADEXIT) ? 1: 0; + } + } SMARTLIST_FOREACH_END(node); + + routerlist_assert_ok(rl); + smartlist_free(nodes); +} + +/** + * Allocate and return a description of the status of the server <b>desc</b>, + * for use in a v1-style router-status line. The server is listed + * as running iff <b>is_live</b> is true. + * + * This is deprecated: it's only used for controllers that want outputs in + * the old format. + */ +static char * +list_single_server_status(const routerinfo_t *desc, int is_live) +{ + char buf[MAX_NICKNAME_LEN+HEX_DIGEST_LEN+4]; /* !nickname=$hexdigest\0 */ + char *cp; + const node_t *node; + + tor_assert(desc); + + cp = buf; + if (!is_live) { + *cp++ = '!'; + } + node = node_get_by_id(desc->cache_info.identity_digest); + if (node && node->is_valid) { + strlcpy(cp, desc->nickname, sizeof(buf)-(cp-buf)); + cp += strlen(cp); + *cp++ = '='; + } + *cp++ = '$'; + base16_encode(cp, HEX_DIGEST_LEN+1, desc->cache_info.identity_digest, + DIGEST_LEN); + return tor_strdup(buf); +} + +/* DOCDOC running_long_enough_to_decide_unreachable */ +int +running_long_enough_to_decide_unreachable(void) +{ + return time_of_process_start + + get_options()->TestingAuthDirTimeToLearnReachability < approx_time(); +} + +/** Each server needs to have passed a reachability test no more + * than this number of seconds ago, or it is listed as down in + * the directory. */ +#define REACHABLE_TIMEOUT (45*60) + +/** If we tested a router and found it reachable _at least this long_ after it + * declared itself hibernating, it is probably done hibernating and we just + * missed a descriptor from it. */ +#define HIBERNATION_PUBLICATION_SKEW (60*60) + +/** Treat a router as alive if + * - It's me, and I'm not hibernating. + * or - We've found it reachable recently. */ +void +dirserv_set_router_is_running(routerinfo_t *router, time_t now) +{ + /*XXXX This function is a mess. Separate out the part that calculates + whether it's reachable and the part that tells rephist that the router was + unreachable. + */ + int answer; + const or_options_t *options = get_options(); + node_t *node = node_get_mutable_by_id(router->cache_info.identity_digest); + tor_assert(node); + + if (router_is_me(router)) { + /* We always know if we are shutting down or hibernating ourselves. */ + answer = ! we_are_hibernating(); + } else if (router->is_hibernating && + (router->cache_info.published_on + + HIBERNATION_PUBLICATION_SKEW) > node->last_reachable) { + /* A hibernating router is down unless we (somehow) had contact with it + * since it declared itself to be hibernating. */ + answer = 0; + } else if (options->AssumeReachable) { + /* If AssumeReachable, everybody is up unless they say they are down! */ + answer = 1; + } else { + /* Otherwise, a router counts as up if we found all announced OR + ports reachable in the last REACHABLE_TIMEOUT seconds. + + XXX prop186 For now there's always one IPv4 and at most one + IPv6 OR port. + + If we're not on IPv6, don't consider reachability of potential + IPv6 OR port since that'd kill all dual stack relays until a + majority of the dir auths have IPv6 connectivity. */ + answer = (now < node->last_reachable + REACHABLE_TIMEOUT && + (options->AuthDirHasIPv6Connectivity != 1 || + tor_addr_is_null(&router->ipv6_addr) || + now < node->last_reachable6 + REACHABLE_TIMEOUT)); + } + + if (!answer && running_long_enough_to_decide_unreachable()) { + /* Not considered reachable. tell rephist about that. + + Because we launch a reachability test for each router every + REACHABILITY_TEST_CYCLE_PERIOD seconds, then the router has probably + been down since at least that time after we last successfully reached + it. + + XXX ipv6 + */ + time_t when = now; + if (node->last_reachable && + node->last_reachable + REACHABILITY_TEST_CYCLE_PERIOD < now) + when = node->last_reachable + REACHABILITY_TEST_CYCLE_PERIOD; + rep_hist_note_router_unreachable(router->cache_info.identity_digest, when); + } + + node->is_running = answer; +} + +/** Based on the routerinfo_ts in <b>routers</b>, allocate the + * contents of a v1-style router-status line, and store it in + * *<b>router_status_out</b>. Return 0 on success, -1 on failure. + * + * If for_controller is true, include the routers with very old descriptors. + * + * This is deprecated: it's only used for controllers that want outputs in + * the old format. + */ +int +list_server_status_v1(smartlist_t *routers, char **router_status_out, + int for_controller) +{ + /* List of entries in a router-status style: An optional !, then an optional + * equals-suffixed nickname, then a dollar-prefixed hexdigest. */ + smartlist_t *rs_entries; + time_t now = time(NULL); + time_t cutoff = now - ROUTER_MAX_AGE_TO_PUBLISH; + const or_options_t *options = get_options(); + /* We include v2 dir auths here too, because they need to answer + * controllers. Eventually we'll deprecate this whole function; + * see also networkstatus_getinfo_by_purpose(). */ + int authdir = authdir_mode_publishes_statuses(options); + tor_assert(router_status_out); + + rs_entries = smartlist_new(); + + SMARTLIST_FOREACH_BEGIN(routers, routerinfo_t *, ri) { + const node_t *node = node_get_by_id(ri->cache_info.identity_digest); + tor_assert(node); + if (authdir) { + /* Update router status in routerinfo_t. */ + dirserv_set_router_is_running(ri, now); + } + if (for_controller) { + char name_buf[MAX_VERBOSE_NICKNAME_LEN+2]; + char *cp = name_buf; + if (!node->is_running) + *cp++ = '!'; + router_get_verbose_nickname(cp, ri); + smartlist_add_strdup(rs_entries, name_buf); + } else if (ri->cache_info.published_on >= cutoff) { + smartlist_add(rs_entries, list_single_server_status(ri, + node->is_running)); + } + } SMARTLIST_FOREACH_END(ri); + + *router_status_out = smartlist_join_strings(rs_entries, " ", 0, NULL); + + SMARTLIST_FOREACH(rs_entries, char *, cp, tor_free(cp)); + smartlist_free(rs_entries); + + return 0; +} + +/** Return 1 if <b>ri</b>'s descriptor is "active" -- running, valid, + * not hibernating, having observed bw greater 0, and not too old. Else + * return 0. + */ +static int +router_is_active(const routerinfo_t *ri, const node_t *node, time_t now) +{ + time_t cutoff = now - ROUTER_MAX_AGE_TO_PUBLISH; + if (ri->cache_info.published_on < cutoff) { + return 0; + } + if (!node->is_running || !node->is_valid || ri->is_hibernating) { + return 0; + } + /* Only require bandwidth capacity in non-test networks, or + * if TestingTorNetwork, and TestingMinExitFlagThreshold is non-zero */ + if (!ri->bandwidthcapacity) { + if (get_options()->TestingTorNetwork) { + if (get_options()->TestingMinExitFlagThreshold > 0) { + /* If we're in a TestingTorNetwork, and TestingMinExitFlagThreshold is, + * then require bandwidthcapacity */ + return 0; + } + } else { + /* If we're not in a TestingTorNetwork, then require bandwidthcapacity */ + return 0; + } + } + return 1; +} + +/********************************************************************/ + +/* A set of functions to answer questions about how we'd like to behave + * as a directory mirror/client. */ + +/** Return 1 if we fetch our directory material directly from the + * authorities, rather than from a mirror. */ +int +directory_fetches_from_authorities(const or_options_t *options) +{ + const routerinfo_t *me; + uint32_t addr; + int refuseunknown; + if (options->FetchDirInfoEarly) + return 1; + if (options->BridgeRelay == 1) + return 0; + if (server_mode(options) && + router_pick_published_address(options, &addr, 1) < 0) + return 1; /* we don't know our IP address; ask an authority. */ + refuseunknown = ! router_my_exit_policy_is_reject_star() && + should_refuse_unknown_exits(options); + if (!dir_server_mode(options) && !refuseunknown) + return 0; + if (!server_mode(options) || !advertised_server_mode()) + return 0; + me = router_get_my_routerinfo(); + if (!me || (!me->supports_tunnelled_dir_requests && !refuseunknown)) + return 0; /* if we don't service directory requests, return 0 too */ + return 1; +} + +/** Return 1 if we should fetch new networkstatuses, descriptors, etc + * on the "mirror" schedule rather than the "client" schedule. + */ +int +directory_fetches_dir_info_early(const or_options_t *options) +{ + return directory_fetches_from_authorities(options); +} + +/** Return 1 if we should fetch new networkstatuses, descriptors, etc + * on a very passive schedule -- waiting long enough for ordinary clients + * to probably have the info we want. These would include bridge users, + * and maybe others in the future e.g. if a Tor client uses another Tor + * client as a directory guard. + */ +int +directory_fetches_dir_info_later(const or_options_t *options) +{ + return options->UseBridges != 0; +} + +/** Return true iff we want to serve certificates for authorities + * that we don't acknowledge as authorities ourself. + * Use we_want_to_fetch_unknown_auth_certs to check if we want to fetch + * and keep these certificates. + */ +int +directory_caches_unknown_auth_certs(const or_options_t *options) +{ + return dir_server_mode(options) || options->BridgeRelay; +} + +/** Return 1 if we want to fetch and serve descriptors, networkstatuses, etc + * Else return 0. + * Check options->DirPort_set and directory_permits_begindir_requests() + * to see if we are willing to serve these directory documents to others via + * the DirPort and begindir-over-ORPort, respectively. + * + * To check if we should fetch documents, use we_want_to_fetch_flavor and + * we_want_to_fetch_unknown_auth_certs instead of this function. + */ +int +directory_caches_dir_info(const or_options_t *options) +{ + if (options->BridgeRelay || dir_server_mode(options)) + return 1; + if (!server_mode(options) || !advertised_server_mode()) + return 0; + /* We need an up-to-date view of network info if we're going to try to + * block exit attempts from unknown relays. */ + return ! router_my_exit_policy_is_reject_star() && + should_refuse_unknown_exits(options); +} + +/** Return 1 if we want to allow remote clients to ask us directory + * requests via the "begin_dir" interface, which doesn't require + * having any separate port open. */ +int +directory_permits_begindir_requests(const or_options_t *options) +{ + return options->BridgeRelay != 0 || dir_server_mode(options); +} + +/** Return 1 if we have no need to fetch new descriptors. This generally + * happens when we're not a dir cache and we haven't built any circuits + * lately. + */ +int +directory_too_idle_to_fetch_descriptors(const or_options_t *options, + time_t now) +{ + return !directory_caches_dir_info(options) && + !options->FetchUselessDescriptors && + rep_hist_circbuilding_dormant(now); +} + +/********************************************************************/ + +/** Map from flavor name to the cached_dir_t for the v3 consensuses that we're + * currently serving. */ +static strmap_t *cached_consensuses = NULL; + +/** Decrement the reference count on <b>d</b>, and free it if it no longer has + * any references. */ +void +cached_dir_decref(cached_dir_t *d) +{ + if (!d || --d->refcnt > 0) + return; + clear_cached_dir(d); + tor_free(d); +} + +/** Allocate and return a new cached_dir_t containing the string <b>s</b>, + * published at <b>published</b>. */ +cached_dir_t * +new_cached_dir(char *s, time_t published) +{ + cached_dir_t *d = tor_malloc_zero(sizeof(cached_dir_t)); + d->refcnt = 1; + d->dir = s; + d->dir_len = strlen(s); + d->published = published; + if (tor_compress(&(d->dir_compressed), &(d->dir_compressed_len), + d->dir, d->dir_len, ZLIB_METHOD)) { + log_warn(LD_BUG, "Error compressing directory"); + } + return d; +} + +/** Remove all storage held in <b>d</b>, but do not free <b>d</b> itself. */ +static void +clear_cached_dir(cached_dir_t *d) +{ + tor_free(d->dir); + tor_free(d->dir_compressed); + memset(d, 0, sizeof(cached_dir_t)); +} + +/** Free all storage held by the cached_dir_t in <b>d</b>. */ +static void +free_cached_dir_(void *_d) +{ + cached_dir_t *d; + if (!_d) + return; + + d = (cached_dir_t *)_d; + cached_dir_decref(d); +} + +/** Replace the v3 consensus networkstatus of type <b>flavor_name</b> that + * we're serving with <b>networkstatus</b>, published at <b>published</b>. No + * validation is performed. */ +void +dirserv_set_cached_consensus_networkstatus(const char *networkstatus, + const char *flavor_name, + const common_digests_t *digests, + const uint8_t *sha3_as_signed, + time_t published) +{ + cached_dir_t *new_networkstatus; + cached_dir_t *old_networkstatus; + if (!cached_consensuses) + cached_consensuses = strmap_new(); + + new_networkstatus = new_cached_dir(tor_strdup(networkstatus), published); + memcpy(&new_networkstatus->digests, digests, sizeof(common_digests_t)); + memcpy(&new_networkstatus->digest_sha3_as_signed, sha3_as_signed, + DIGEST256_LEN); + old_networkstatus = strmap_set(cached_consensuses, flavor_name, + new_networkstatus); + if (old_networkstatus) + cached_dir_decref(old_networkstatus); +} + +/** Return the latest downloaded consensus networkstatus in encoded, signed, + * optionally compressed format, suitable for sending to clients. */ +cached_dir_t * +dirserv_get_consensus(const char *flavor_name) +{ + if (!cached_consensuses) + return NULL; + return strmap_get(cached_consensuses, flavor_name); +} + +/** If a router's uptime is at least this value, then it is always + * considered stable, regardless of the rest of the network. This + * way we resist attacks where an attacker doubles the size of the + * network using allegedly high-uptime nodes, displacing all the + * current guards. */ +#define UPTIME_TO_GUARANTEE_STABLE (3600*24*30) +/** If a router's MTBF is at least this value, then it is always stable. + * See above. (Corresponds to about 7 days for current decay rates.) */ +#define MTBF_TO_GUARANTEE_STABLE (60*60*24*5) +/** Similarly, every node with at least this much weighted time known can be + * considered familiar enough to be a guard. Corresponds to about 20 days for + * current decay rates. + */ +#define TIME_KNOWN_TO_GUARANTEE_FAMILIAR (8*24*60*60) +/** Similarly, every node with sufficient WFU is around enough to be a guard. + */ +#define WFU_TO_GUARANTEE_GUARD (0.98) + +/* Thresholds for server performance: set by + * dirserv_compute_performance_thresholds, and used by + * generate_v2_networkstatus */ + +/** Any router with an uptime of at least this value is stable. */ +static uint32_t stable_uptime = 0; /* start at a safe value */ +/** Any router with an mtbf of at least this value is stable. */ +static double stable_mtbf = 0.0; +/** If true, we have measured enough mtbf info to look at stable_mtbf rather + * than stable_uptime. */ +static int enough_mtbf_info = 0; +/** Any router with a weighted fractional uptime of at least this much might + * be good as a guard. */ +static double guard_wfu = 0.0; +/** Don't call a router a guard unless we've known about it for at least this + * many seconds. */ +static long guard_tk = 0; +/** Any router with a bandwidth at least this high is "Fast" */ +static uint32_t fast_bandwidth_kb = 0; +/** If exits can be guards, then all guards must have a bandwidth this + * high. */ +static uint32_t guard_bandwidth_including_exits_kb = 0; +/** If exits can't be guards, then all guards must have a bandwidth this + * high. */ +static uint32_t guard_bandwidth_excluding_exits_kb = 0; + +/** Helper: estimate the uptime of a router given its stated uptime and the + * amount of time since it last stated its stated uptime. */ +static inline long +real_uptime(const routerinfo_t *router, time_t now) +{ + if (now < router->cache_info.published_on) + return router->uptime; + else + return router->uptime + (now - router->cache_info.published_on); +} + +/** Return 1 if <b>router</b> is not suitable for these parameters, else 0. + * If <b>need_uptime</b> is non-zero, we require a minimum uptime. + * If <b>need_capacity</b> is non-zero, we require a minimum advertised + * bandwidth. + */ +static int +dirserv_thinks_router_is_unreliable(time_t now, + routerinfo_t *router, + int need_uptime, int need_capacity) +{ + if (need_uptime) { + if (!enough_mtbf_info) { + /* XXXX We should change the rule from + * "use uptime if we don't have mtbf data" to "don't advertise Stable on + * v3 if we don't have enough mtbf data." Or maybe not, since if we ever + * hit a point where we need to reset a lot of authorities at once, + * none of them would be in a position to declare Stable. + */ + long uptime = real_uptime(router, now); + if ((unsigned)uptime < stable_uptime && + (unsigned)uptime < UPTIME_TO_GUARANTEE_STABLE) + return 1; + } else { + double mtbf = + rep_hist_get_stability(router->cache_info.identity_digest, now); + if (mtbf < stable_mtbf && + mtbf < MTBF_TO_GUARANTEE_STABLE) + return 1; + } + } + if (need_capacity) { + uint32_t bw_kb = dirserv_get_credible_bandwidth_kb(router); + if (bw_kb < fast_bandwidth_kb) + return 1; + } + return 0; +} + +/** Return true iff <b>router</b> should be assigned the "HSDir" flag. + * + * Right now this means it advertises support for it, it has a high uptime, + * it's a directory cache, it has the Stable and Fast flags, and it's currently + * considered Running. + * + * This function needs to be called after router-\>is_running has + * been set. + */ +static int +dirserv_thinks_router_is_hs_dir(const routerinfo_t *router, + const node_t *node, time_t now) +{ + + long uptime; + + /* If we haven't been running for at least + * get_options()->MinUptimeHidServDirectoryV2 seconds, we can't + * have accurate data telling us a relay has been up for at least + * that long. We also want to allow a bit of slack: Reachability + * tests aren't instant. If we haven't been running long enough, + * trust the relay. */ + + if (get_uptime() > + get_options()->MinUptimeHidServDirectoryV2 * 1.1) + uptime = MIN(rep_hist_get_uptime(router->cache_info.identity_digest, now), + real_uptime(router, now)); + else + uptime = real_uptime(router, now); + + return (router->wants_to_be_hs_dir && + router->supports_tunnelled_dir_requests && + node->is_stable && node->is_fast && + uptime >= get_options()->MinUptimeHidServDirectoryV2 && + router_is_active(router, node, now)); +} + +/** Don't consider routers with less bandwidth than this when computing + * thresholds. */ +#define ABSOLUTE_MIN_BW_VALUE_TO_CONSIDER_KB 4 + +/** Helper for dirserv_compute_performance_thresholds(): Decide whether to + * include a router in our calculations, and return true iff we should; the + * require_mbw parameter is passed in by + * dirserv_compute_performance_thresholds() and controls whether we ever + * count routers with only advertised bandwidths */ +static int +router_counts_toward_thresholds(const node_t *node, time_t now, + const digestmap_t *omit_as_sybil, + int require_mbw) +{ + /* Have measured bw? */ + int have_mbw = + dirserv_has_measured_bw(node->identity); + uint64_t min_bw_kb = ABSOLUTE_MIN_BW_VALUE_TO_CONSIDER_KB; + const or_options_t *options = get_options(); + + if (options->TestingTorNetwork) { + min_bw_kb = (int64_t)options->TestingMinExitFlagThreshold / 1000; + } + + return node->ri && router_is_active(node->ri, node, now) && + !digestmap_get(omit_as_sybil, node->identity) && + (dirserv_get_credible_bandwidth_kb(node->ri) >= min_bw_kb) && + (have_mbw || !require_mbw); +} + +/** Look through the routerlist, and using the measured bandwidth cache count + * how many measured bandwidths we know. This is used to decide whether we + * ever trust advertised bandwidths for purposes of assigning flags. */ +void +dirserv_count_measured_bws(const smartlist_t *routers) +{ + /* Initialize this first */ + routers_with_measured_bw = 0; + + /* Iterate over the routerlist and count measured bandwidths */ + SMARTLIST_FOREACH_BEGIN(routers, const routerinfo_t *, ri) { + /* Check if we know a measured bandwidth for this one */ + if (dirserv_has_measured_bw(ri->cache_info.identity_digest)) { + ++routers_with_measured_bw; + } + } SMARTLIST_FOREACH_END(ri); +} + +/** Look through the routerlist, the Mean Time Between Failure history, and + * the Weighted Fractional Uptime history, and use them to set thresholds for + * the Stable, Fast, and Guard flags. Update the fields stable_uptime, + * stable_mtbf, enough_mtbf_info, guard_wfu, guard_tk, fast_bandwidth, + * guard_bandwidth_including_exits, and guard_bandwidth_excluding_exits. + * + * Also, set the is_exit flag of each router appropriately. */ +void +dirserv_compute_performance_thresholds(digestmap_t *omit_as_sybil) +{ + int n_active, n_active_nonexit, n_familiar; + uint32_t *uptimes, *bandwidths_kb, *bandwidths_excluding_exits_kb; + long *tks; + double *mtbfs, *wfus; + smartlist_t *nodelist; + time_t now = time(NULL); + const or_options_t *options = get_options(); + + /* Require mbw? */ + int require_mbw = + (routers_with_measured_bw > + options->MinMeasuredBWsForAuthToIgnoreAdvertised) ? 1 : 0; + + /* initialize these all here, in case there are no routers */ + stable_uptime = 0; + stable_mtbf = 0; + fast_bandwidth_kb = 0; + guard_bandwidth_including_exits_kb = 0; + guard_bandwidth_excluding_exits_kb = 0; + guard_tk = 0; + guard_wfu = 0; + + nodelist_assert_ok(); + nodelist = nodelist_get_list(); + + /* Initialize arrays that will hold values for each router. We'll + * sort them and use that to compute thresholds. */ + n_active = n_active_nonexit = 0; + /* Uptime for every active router. */ + uptimes = tor_calloc(smartlist_len(nodelist), sizeof(uint32_t)); + /* Bandwidth for every active router. */ + bandwidths_kb = tor_calloc(smartlist_len(nodelist), sizeof(uint32_t)); + /* Bandwidth for every active non-exit router. */ + bandwidths_excluding_exits_kb = + tor_calloc(smartlist_len(nodelist), sizeof(uint32_t)); + /* Weighted mean time between failure for each active router. */ + mtbfs = tor_calloc(smartlist_len(nodelist), sizeof(double)); + /* Time-known for each active router. */ + tks = tor_calloc(smartlist_len(nodelist), sizeof(long)); + /* Weighted fractional uptime for each active router. */ + wfus = tor_calloc(smartlist_len(nodelist), sizeof(double)); + + /* Now, fill in the arrays. */ + SMARTLIST_FOREACH_BEGIN(nodelist, node_t *, node) { + if (options->BridgeAuthoritativeDir && + node->ri && + node->ri->purpose != ROUTER_PURPOSE_BRIDGE) + continue; + + routerinfo_t *ri = node->ri; + if (ri) { + node->is_exit = (!router_exit_policy_rejects_all(ri) && + exit_policy_is_general_exit(ri->exit_policy)); + } + + if (router_counts_toward_thresholds(node, now, omit_as_sybil, + require_mbw)) { + const char *id = node->identity; + uint32_t bw_kb; + + /* resolve spurious clang shallow analysis null pointer errors */ + tor_assert(ri); + + uptimes[n_active] = (uint32_t)real_uptime(ri, now); + mtbfs[n_active] = rep_hist_get_stability(id, now); + tks [n_active] = rep_hist_get_weighted_time_known(id, now); + bandwidths_kb[n_active] = bw_kb = dirserv_get_credible_bandwidth_kb(ri); + if (!node->is_exit || node->is_bad_exit) { + bandwidths_excluding_exits_kb[n_active_nonexit] = bw_kb; + ++n_active_nonexit; + } + ++n_active; + } + } SMARTLIST_FOREACH_END(node); + + /* Now, compute thresholds. */ + if (n_active) { + /* The median uptime is stable. */ + stable_uptime = median_uint32(uptimes, n_active); + /* The median mtbf is stable, if we have enough mtbf info */ + stable_mtbf = median_double(mtbfs, n_active); + /* The 12.5th percentile bandwidth is fast. */ + fast_bandwidth_kb = find_nth_uint32(bandwidths_kb, n_active, n_active/8); + /* (Now bandwidths is sorted.) */ + if (fast_bandwidth_kb < RELAY_REQUIRED_MIN_BANDWIDTH/(2 * 1000)) + fast_bandwidth_kb = bandwidths_kb[n_active/4]; + guard_bandwidth_including_exits_kb = + third_quartile_uint32(bandwidths_kb, n_active); + guard_tk = find_nth_long(tks, n_active, n_active/8); + } + + if (guard_tk > TIME_KNOWN_TO_GUARANTEE_FAMILIAR) + guard_tk = TIME_KNOWN_TO_GUARANTEE_FAMILIAR; + + { + /* We can vote on a parameter for the minimum and maximum. */ +#define ABSOLUTE_MIN_VALUE_FOR_FAST_FLAG 4 + int32_t min_fast_kb, max_fast_kb, min_fast, max_fast; + min_fast = networkstatus_get_param(NULL, "FastFlagMinThreshold", + ABSOLUTE_MIN_VALUE_FOR_FAST_FLAG, + ABSOLUTE_MIN_VALUE_FOR_FAST_FLAG, + INT32_MAX); + if (options->TestingTorNetwork) { + min_fast = (int32_t)options->TestingMinFastFlagThreshold; + } + max_fast = networkstatus_get_param(NULL, "FastFlagMaxThreshold", + INT32_MAX, min_fast, INT32_MAX); + min_fast_kb = min_fast / 1000; + max_fast_kb = max_fast / 1000; + + if (fast_bandwidth_kb < (uint32_t)min_fast_kb) + fast_bandwidth_kb = min_fast_kb; + if (fast_bandwidth_kb > (uint32_t)max_fast_kb) + fast_bandwidth_kb = max_fast_kb; + } + /* Protect sufficiently fast nodes from being pushed out of the set + * of Fast nodes. */ + if (options->AuthDirFastGuarantee && + fast_bandwidth_kb > options->AuthDirFastGuarantee/1000) + fast_bandwidth_kb = (uint32_t)options->AuthDirFastGuarantee/1000; + + /* Now that we have a time-known that 7/8 routers are known longer than, + * fill wfus with the wfu of every such "familiar" router. */ + n_familiar = 0; + + SMARTLIST_FOREACH_BEGIN(nodelist, node_t *, node) { + if (router_counts_toward_thresholds(node, now, + omit_as_sybil, require_mbw)) { + routerinfo_t *ri = node->ri; + const char *id = ri->cache_info.identity_digest; + long tk = rep_hist_get_weighted_time_known(id, now); + if (tk < guard_tk) + continue; + wfus[n_familiar++] = rep_hist_get_weighted_fractional_uptime(id, now); + } + } SMARTLIST_FOREACH_END(node); + if (n_familiar) + guard_wfu = median_double(wfus, n_familiar); + if (guard_wfu > WFU_TO_GUARANTEE_GUARD) + guard_wfu = WFU_TO_GUARANTEE_GUARD; + + enough_mtbf_info = rep_hist_have_measured_enough_stability(); + + if (n_active_nonexit) { + guard_bandwidth_excluding_exits_kb = + find_nth_uint32(bandwidths_excluding_exits_kb, + n_active_nonexit, n_active_nonexit*3/4); + } + + log_info(LD_DIRSERV, + "Cutoffs: For Stable, %lu sec uptime, %lu sec MTBF. " + "For Fast: %lu kilobytes/sec. " + "For Guard: WFU %.03f%%, time-known %lu sec, " + "and bandwidth %lu or %lu kilobytes/sec. " + "We%s have enough stability data.", + (unsigned long)stable_uptime, + (unsigned long)stable_mtbf, + (unsigned long)fast_bandwidth_kb, + guard_wfu*100, + (unsigned long)guard_tk, + (unsigned long)guard_bandwidth_including_exits_kb, + (unsigned long)guard_bandwidth_excluding_exits_kb, + enough_mtbf_info ? "" : " don't"); + + tor_free(uptimes); + tor_free(mtbfs); + tor_free(bandwidths_kb); + tor_free(bandwidths_excluding_exits_kb); + tor_free(tks); + tor_free(wfus); +} + +/* Use dirserv_compute_performance_thresholds() to compute the thresholds + * for the status flags, specifically for bridges. + * + * This is only called by a Bridge Authority from + * networkstatus_getinfo_by_purpose(). + */ +void +dirserv_compute_bridge_flag_thresholds(void) +{ + digestmap_t *omit_as_sybil = digestmap_new(); + dirserv_compute_performance_thresholds(omit_as_sybil); + digestmap_free(omit_as_sybil, NULL); +} + +/** Measured bandwidth cache entry */ +typedef struct mbw_cache_entry_s { + long mbw_kb; + time_t as_of; +} mbw_cache_entry_t; + +/** Measured bandwidth cache - keys are identity_digests, values are + * mbw_cache_entry_t *. */ +static digestmap_t *mbw_cache = NULL; + +/** Store a measured bandwidth cache entry when reading the measured + * bandwidths file. */ +STATIC void +dirserv_cache_measured_bw(const measured_bw_line_t *parsed_line, + time_t as_of) +{ + mbw_cache_entry_t *e = NULL; + + tor_assert(parsed_line); + + /* Allocate a cache if we need */ + if (!mbw_cache) mbw_cache = digestmap_new(); + + /* Check if we have an existing entry */ + e = digestmap_get(mbw_cache, parsed_line->node_id); + /* If we do, we can re-use it */ + if (e) { + /* Check that we really are newer, and update */ + if (as_of > e->as_of) { + e->mbw_kb = parsed_line->bw_kb; + e->as_of = as_of; + } + } else { + /* We'll have to insert a new entry */ + e = tor_malloc(sizeof(*e)); + e->mbw_kb = parsed_line->bw_kb; + e->as_of = as_of; + digestmap_set(mbw_cache, parsed_line->node_id, e); + } +} + +/** Clear and free the measured bandwidth cache */ +void +dirserv_clear_measured_bw_cache(void) +{ + if (mbw_cache) { + /* Free the map and all entries */ + digestmap_free(mbw_cache, tor_free_); + mbw_cache = NULL; + } +} + +/** Scan the measured bandwidth cache and remove expired entries */ +STATIC void +dirserv_expire_measured_bw_cache(time_t now) +{ + + if (mbw_cache) { + /* Iterate through the cache and check each entry */ + DIGESTMAP_FOREACH_MODIFY(mbw_cache, k, mbw_cache_entry_t *, e) { + if (now > e->as_of + MAX_MEASUREMENT_AGE) { + tor_free(e); + MAP_DEL_CURRENT(k); + } + } DIGESTMAP_FOREACH_END; + + /* Check if we cleared the whole thing and free if so */ + if (digestmap_size(mbw_cache) == 0) { + digestmap_free(mbw_cache, tor_free_); + mbw_cache = 0; + } + } +} + +/** Query the cache by identity digest, return value indicates whether + * we found it. The bw_out and as_of_out pointers receive the cached + * bandwidth value and the time it was cached if not NULL. */ +int +dirserv_query_measured_bw_cache_kb(const char *node_id, long *bw_kb_out, + time_t *as_of_out) +{ + mbw_cache_entry_t *v = NULL; + int rv = 0; + + if (mbw_cache && node_id) { + v = digestmap_get(mbw_cache, node_id); + if (v) { + /* Found something */ + rv = 1; + if (bw_kb_out) *bw_kb_out = v->mbw_kb; + if (as_of_out) *as_of_out = v->as_of; + } + } + + return rv; +} + +/** Predicate wrapper for dirserv_query_measured_bw_cache() */ +int +dirserv_has_measured_bw(const char *node_id) +{ + return dirserv_query_measured_bw_cache_kb(node_id, NULL, NULL); +} + +/** Get the current size of the measured bandwidth cache */ +int +dirserv_get_measured_bw_cache_size(void) +{ + if (mbw_cache) return digestmap_size(mbw_cache); + else return 0; +} + +/** Return the bandwidth we believe for assigning flags; prefer measured + * over advertised, and if we have above a threshold quantity of measured + * bandwidths, we don't want to ever give flags to unmeasured routers, so + * return 0. */ +static uint32_t +dirserv_get_credible_bandwidth_kb(const routerinfo_t *ri) +{ + int threshold; + uint32_t bw_kb = 0; + long mbw_kb; + + tor_assert(ri); + /* Check if we have a measured bandwidth, and check the threshold if not */ + if (!(dirserv_query_measured_bw_cache_kb(ri->cache_info.identity_digest, + &mbw_kb, NULL))) { + threshold = get_options()->MinMeasuredBWsForAuthToIgnoreAdvertised; + if (routers_with_measured_bw > threshold) { + /* Return zero for unmeasured bandwidth if we are above threshold */ + bw_kb = 0; + } else { + /* Return an advertised bandwidth otherwise */ + bw_kb = router_get_advertised_bandwidth_capped(ri) / 1000; + } + } else { + /* We have the measured bandwidth in mbw */ + bw_kb = (uint32_t)mbw_kb; + } + + return bw_kb; +} + +/** Give a statement of our current performance thresholds for inclusion + * in a vote document. */ +char * +dirserv_get_flag_thresholds_line(void) +{ + char *result=NULL; + const int measured_threshold = + get_options()->MinMeasuredBWsForAuthToIgnoreAdvertised; + const int enough_measured_bw = routers_with_measured_bw > measured_threshold; + + tor_asprintf(&result, + "stable-uptime=%lu stable-mtbf=%lu " + "fast-speed=%lu " + "guard-wfu=%.03f%% guard-tk=%lu " + "guard-bw-inc-exits=%lu guard-bw-exc-exits=%lu " + "enough-mtbf=%d ignoring-advertised-bws=%d", + (unsigned long)stable_uptime, + (unsigned long)stable_mtbf, + (unsigned long)fast_bandwidth_kb*1000, + guard_wfu*100, + (unsigned long)guard_tk, + (unsigned long)guard_bandwidth_including_exits_kb*1000, + (unsigned long)guard_bandwidth_excluding_exits_kb*1000, + enough_mtbf_info ? 1 : 0, + enough_measured_bw ? 1 : 0); + + return result; +} + +/** Helper: write the router-status information in <b>rs</b> into a newly + * allocated character buffer. Use the same format as in network-status + * documents. If <b>version</b> is non-NULL, add a "v" line for the platform. + * + * consensus_method is the current consensus method when format is + * NS_V3_CONSENSUS or NS_V3_CONSENSUS_MICRODESC. It is ignored for other + * formats: pass ROUTERSTATUS_FORMAT_NO_CONSENSUS_METHOD. + * + * Return 0 on success, -1 on failure. + * + * The format argument has one of the following values: + * NS_V2 - Output an entry suitable for a V2 NS opinion document + * NS_V3_CONSENSUS - Output the first portion of a V3 NS consensus entry + * for consensus_method. + * NS_V3_CONSENSUS_MICRODESC - Output the first portion of a V3 microdesc + * consensus entry for consensus_method. + * NS_V3_VOTE - Output a complete V3 NS vote. If <b>vrs</b> is present, + * it contains additional information for the vote. + * NS_CONTROL_PORT - Output a NS document for the control port. + */ +char * +routerstatus_format_entry(const routerstatus_t *rs, const char *version, + const char *protocols, + routerstatus_format_type_t format, + int consensus_method, + const vote_routerstatus_t *vrs) +{ + char *summary; + char *result = NULL; + + char published[ISO_TIME_LEN+1]; + char identity64[BASE64_DIGEST_LEN+1]; + char digest64[BASE64_DIGEST_LEN+1]; + smartlist_t *chunks = smartlist_new(); + + format_iso_time(published, rs->published_on); + digest_to_base64(identity64, rs->identity_digest); + digest_to_base64(digest64, rs->descriptor_digest); + + smartlist_add_asprintf(chunks, + "r %s %s %s%s%s %s %d %d\n", + rs->nickname, + identity64, + (format==NS_V3_CONSENSUS_MICRODESC)?"":digest64, + (format==NS_V3_CONSENSUS_MICRODESC)?"":" ", + published, + fmt_addr32(rs->addr), + (int)rs->or_port, + (int)rs->dir_port); + + /* TODO: Maybe we want to pass in what we need to build the rest of + * this here, instead of in the caller. Then we could use the + * networkstatus_type_t values, with an additional control port value + * added -MP */ + + /* V3 microdesc consensuses only have "a" lines in later consensus methods + */ + if (format == NS_V3_CONSENSUS_MICRODESC && + consensus_method < MIN_METHOD_FOR_A_LINES_IN_MICRODESC_CONSENSUS) + goto done; + + /* Possible "a" line. At most one for now. */ + if (!tor_addr_is_null(&rs->ipv6_addr)) { + smartlist_add_asprintf(chunks, "a %s\n", + fmt_addrport(&rs->ipv6_addr, rs->ipv6_orport)); + } + + if (format == NS_V3_CONSENSUS || format == NS_V3_CONSENSUS_MICRODESC) + goto done; + + smartlist_add_asprintf(chunks, + "s%s%s%s%s%s%s%s%s%s%s\n", + /* These must stay in alphabetical order. */ + rs->is_authority?" Authority":"", + rs->is_bad_exit?" BadExit":"", + rs->is_exit?" Exit":"", + rs->is_fast?" Fast":"", + rs->is_possible_guard?" Guard":"", + rs->is_hs_dir?" HSDir":"", + rs->is_flagged_running?" Running":"", + rs->is_stable?" Stable":"", + rs->is_v2_dir?" V2Dir":"", + rs->is_valid?" Valid":""); + + /* length of "opt v \n" */ +#define V_LINE_OVERHEAD 7 + if (version && strlen(version) < MAX_V_LINE_LEN - V_LINE_OVERHEAD) { + smartlist_add_asprintf(chunks, "v %s\n", version); + } + if (protocols) { + smartlist_add_asprintf(chunks, "pr %s\n", protocols); + } + + if (format != NS_V2) { + const routerinfo_t* desc = router_get_by_id_digest(rs->identity_digest); + uint32_t bw_kb; + + if (format != NS_CONTROL_PORT) { + /* Blow up more or less nicely if we didn't get anything or not the + * thing we expected. + */ + if (!desc) { + char id[HEX_DIGEST_LEN+1]; + char dd[HEX_DIGEST_LEN+1]; + + base16_encode(id, sizeof(id), rs->identity_digest, DIGEST_LEN); + base16_encode(dd, sizeof(dd), rs->descriptor_digest, DIGEST_LEN); + log_warn(LD_BUG, "Cannot get any descriptor for %s " + "(wanted descriptor %s).", + id, dd); + goto err; + } + + /* This assert could fire for the control port, because + * it can request NS documents before all descriptors + * have been fetched. Therefore, we only do this test when + * format != NS_CONTROL_PORT. */ + if (tor_memneq(desc->cache_info.signed_descriptor_digest, + rs->descriptor_digest, + DIGEST_LEN)) { + char rl_d[HEX_DIGEST_LEN+1]; + char rs_d[HEX_DIGEST_LEN+1]; + char id[HEX_DIGEST_LEN+1]; + + base16_encode(rl_d, sizeof(rl_d), + desc->cache_info.signed_descriptor_digest, DIGEST_LEN); + base16_encode(rs_d, sizeof(rs_d), rs->descriptor_digest, DIGEST_LEN); + base16_encode(id, sizeof(id), rs->identity_digest, DIGEST_LEN); + log_err(LD_BUG, "descriptor digest in routerlist does not match " + "the one in routerstatus: %s vs %s " + "(router %s)\n", + rl_d, rs_d, id); + + tor_assert(tor_memeq(desc->cache_info.signed_descriptor_digest, + rs->descriptor_digest, + DIGEST_LEN)); + } + } + + if (format == NS_CONTROL_PORT && rs->has_bandwidth) { + bw_kb = rs->bandwidth_kb; + } else { + tor_assert(desc); + bw_kb = router_get_advertised_bandwidth_capped(desc) / 1000; + } + smartlist_add_asprintf(chunks, + "w Bandwidth=%d", bw_kb); + + if (format == NS_V3_VOTE && vrs && vrs->has_measured_bw) { + smartlist_add_asprintf(chunks, + " Measured=%d", vrs->measured_bw_kb); + } + /* Write down guardfraction information if we have it. */ + if (format == NS_V3_VOTE && vrs && vrs->status.has_guardfraction) { + smartlist_add_asprintf(chunks, + " GuardFraction=%d", + vrs->status.guardfraction_percentage); + } + + smartlist_add_strdup(chunks, "\n"); + + if (desc) { + summary = policy_summarize(desc->exit_policy, AF_INET); + smartlist_add_asprintf(chunks, "p %s\n", summary); + tor_free(summary); + } + + if (format == NS_V3_VOTE && vrs) { + if (tor_mem_is_zero((char*)vrs->ed25519_id, ED25519_PUBKEY_LEN)) { + smartlist_add_strdup(chunks, "id ed25519 none\n"); + } else { + char ed_b64[BASE64_DIGEST256_LEN+1]; + digest256_to_base64(ed_b64, (const char*)vrs->ed25519_id); + smartlist_add_asprintf(chunks, "id ed25519 %s\n", ed_b64); + } + } + } + + done: + result = smartlist_join_strings(chunks, "", 0, NULL); + + err: + SMARTLIST_FOREACH(chunks, char *, cp, tor_free(cp)); + smartlist_free(chunks); + + return result; +} + +/** Extract status information from <b>ri</b> and from other authority + * functions and store it in <b>rs</b>. <b>rs</b> is zeroed out before it is + * set. + * + * We assume that ri-\>is_running has already been set, e.g. by + * dirserv_set_router_is_running(ri, now); + */ +void +set_routerstatus_from_routerinfo(routerstatus_t *rs, + node_t *node, + routerinfo_t *ri, + time_t now, + int listbadexits) +{ + const or_options_t *options = get_options(); + uint32_t routerbw_kb = dirserv_get_credible_bandwidth_kb(ri); + + memset(rs, 0, sizeof(routerstatus_t)); + + rs->is_authority = + router_digest_is_trusted_dir(ri->cache_info.identity_digest); + + /* Already set by compute_performance_thresholds. */ + rs->is_exit = node->is_exit; + rs->is_stable = node->is_stable = + !dirserv_thinks_router_is_unreliable(now, ri, 1, 0); + rs->is_fast = node->is_fast = + !dirserv_thinks_router_is_unreliable(now, ri, 0, 1); + rs->is_flagged_running = node->is_running; /* computed above */ + + rs->is_valid = node->is_valid; + + if (node->is_fast && node->is_stable && + ri->supports_tunnelled_dir_requests && + ((options->AuthDirGuardBWGuarantee && + routerbw_kb >= options->AuthDirGuardBWGuarantee/1000) || + routerbw_kb >= MIN(guard_bandwidth_including_exits_kb, + guard_bandwidth_excluding_exits_kb))) { + long tk = rep_hist_get_weighted_time_known( + node->identity, now); + double wfu = rep_hist_get_weighted_fractional_uptime( + node->identity, now); + rs->is_possible_guard = (wfu >= guard_wfu && tk >= guard_tk) ? 1 : 0; + } else { + rs->is_possible_guard = 0; + } + + rs->is_bad_exit = listbadexits && node->is_bad_exit; + rs->is_hs_dir = node->is_hs_dir = + dirserv_thinks_router_is_hs_dir(ri, node, now); + + rs->is_named = rs->is_unnamed = 0; + + rs->published_on = ri->cache_info.published_on; + memcpy(rs->identity_digest, node->identity, DIGEST_LEN); + memcpy(rs->descriptor_digest, ri->cache_info.signed_descriptor_digest, + DIGEST_LEN); + rs->addr = ri->addr; + strlcpy(rs->nickname, ri->nickname, sizeof(rs->nickname)); + rs->or_port = ri->or_port; + rs->dir_port = ri->dir_port; + rs->is_v2_dir = ri->supports_tunnelled_dir_requests; + if (options->AuthDirHasIPv6Connectivity == 1 && + !tor_addr_is_null(&ri->ipv6_addr) && + node->last_reachable6 >= now - REACHABLE_TIMEOUT) { + /* We're configured as having IPv6 connectivity. There's an IPv6 + OR port and it's reachable so copy it to the routerstatus. */ + tor_addr_copy(&rs->ipv6_addr, &ri->ipv6_addr); + rs->ipv6_orport = ri->ipv6_orport; + } else { + tor_addr_make_null(&rs->ipv6_addr, AF_INET6); + rs->ipv6_orport = 0; + } + + if (options->TestingTorNetwork) { + dirserv_set_routerstatus_testing(rs); + } +} + +/** Use TestingDirAuthVoteExit, TestingDirAuthVoteGuard, and + * TestingDirAuthVoteHSDir to give out the Exit, Guard, and HSDir flags, + * respectively. But don't set the corresponding node flags. + * Should only be called if TestingTorNetwork is set. */ +STATIC void +dirserv_set_routerstatus_testing(routerstatus_t *rs) +{ + const or_options_t *options = get_options(); + + tor_assert(options->TestingTorNetwork); + + if (routerset_contains_routerstatus(options->TestingDirAuthVoteExit, + rs, 0)) { + rs->is_exit = 1; + } else if (options->TestingDirAuthVoteExitIsStrict) { + rs->is_exit = 0; + } + + if (routerset_contains_routerstatus(options->TestingDirAuthVoteGuard, + rs, 0)) { + rs->is_possible_guard = 1; + } else if (options->TestingDirAuthVoteGuardIsStrict) { + rs->is_possible_guard = 0; + } + + if (routerset_contains_routerstatus(options->TestingDirAuthVoteHSDir, + rs, 0)) { + rs->is_hs_dir = 1; + } else if (options->TestingDirAuthVoteHSDirIsStrict) { + rs->is_hs_dir = 0; + } +} + +/** The guardfraction of the guard with identity fingerprint <b>guard_id</b> + * is <b>guardfraction_percentage</b>. See if we have a vote routerstatus for + * this guard in <b>vote_routerstatuses</b>, and if we do, register the + * information to it. + * + * Return 1 if we applied the information and 0 if we couldn't find a + * matching guard. + * + * Requires that <b>vote_routerstatuses</b> be sorted. + */ +static int +guardfraction_line_apply(const char *guard_id, + uint32_t guardfraction_percentage, + smartlist_t *vote_routerstatuses) +{ + vote_routerstatus_t *vrs = NULL; + + tor_assert(vote_routerstatuses); + + vrs = smartlist_bsearch(vote_routerstatuses, guard_id, + compare_digest_to_vote_routerstatus_entry); + + if (!vrs) { + return 0; + } + + vrs->status.has_guardfraction = 1; + vrs->status.guardfraction_percentage = guardfraction_percentage; + + return 1; +} + +/* Given a guard line from a guardfraction file, parse it and register + * its information to <b>vote_routerstatuses</b>. + * + * Return: + * * 1 if the line was proper and its information got registered. + * * 0 if the line was proper but no currently active guard was found + * to register the guardfraction information to. + * * -1 if the line could not be parsed and set <b>err_msg</b> to a + newly allocated string containing the error message. + */ +static int +guardfraction_file_parse_guard_line(const char *guard_line, + smartlist_t *vote_routerstatuses, + char **err_msg) +{ + char guard_id[DIGEST_LEN]; + uint32_t guardfraction; + char *inputs_tmp = NULL; + int num_ok = 1; + + smartlist_t *sl = smartlist_new(); + int retval = -1; + + tor_assert(err_msg); + + /* guard_line should contain something like this: + <hex digest> <guardfraction> <appearances> */ + smartlist_split_string(sl, guard_line, " ", + SPLIT_SKIP_SPACE|SPLIT_IGNORE_BLANK, 3); + if (smartlist_len(sl) < 3) { + tor_asprintf(err_msg, "bad line '%s'", guard_line); + goto done; + } + + inputs_tmp = smartlist_get(sl, 0); + if (strlen(inputs_tmp) != HEX_DIGEST_LEN || + base16_decode(guard_id, DIGEST_LEN, + inputs_tmp, HEX_DIGEST_LEN) != DIGEST_LEN) { + tor_asprintf(err_msg, "bad digest '%s'", inputs_tmp); + goto done; + } + + inputs_tmp = smartlist_get(sl, 1); + /* Guardfraction is an integer in [0, 100]. */ + guardfraction = + (uint32_t) tor_parse_long(inputs_tmp, 10, 0, 100, &num_ok, NULL); + if (!num_ok) { + tor_asprintf(err_msg, "wrong percentage '%s'", inputs_tmp); + goto done; + } + + /* If routerstatuses were provided, apply this info to actual routers. */ + if (vote_routerstatuses) { + retval = guardfraction_line_apply(guard_id, guardfraction, + vote_routerstatuses); + } else { + retval = 0; /* If we got this far, line was correctly formatted. */ + } + + done: + + SMARTLIST_FOREACH(sl, char *, cp, tor_free(cp)); + smartlist_free(sl); + + return retval; +} + +/** Given an inputs line from a guardfraction file, parse it and + * register its information to <b>total_consensuses</b> and + * <b>total_days</b>. + * + * Return 0 if it parsed well. Return -1 if there was an error, and + * set <b>err_msg</b> to a newly allocated string containing the + * error message. + */ +static int +guardfraction_file_parse_inputs_line(const char *inputs_line, + int *total_consensuses, + int *total_days, + char **err_msg) +{ + int retval = -1; + char *inputs_tmp = NULL; + int num_ok = 1; + smartlist_t *sl = smartlist_new(); + + tor_assert(err_msg); + + /* Second line is inputs information: + * n-inputs <total_consensuses> <total_days>. */ + smartlist_split_string(sl, inputs_line, " ", + SPLIT_SKIP_SPACE|SPLIT_IGNORE_BLANK, 3); + if (smartlist_len(sl) < 2) { + tor_asprintf(err_msg, "incomplete line '%s'", inputs_line); + goto done; + } + + inputs_tmp = smartlist_get(sl, 0); + *total_consensuses = + (int) tor_parse_long(inputs_tmp, 10, 0, INT_MAX, &num_ok, NULL); + if (!num_ok) { + tor_asprintf(err_msg, "unparseable consensus '%s'", inputs_tmp); + goto done; + } + + inputs_tmp = smartlist_get(sl, 1); + *total_days = + (int) tor_parse_long(inputs_tmp, 10, 0, INT_MAX, &num_ok, NULL); + if (!num_ok) { + tor_asprintf(err_msg, "unparseable days '%s'", inputs_tmp); + goto done; + } + + retval = 0; + + done: + SMARTLIST_FOREACH(sl, char *, cp, tor_free(cp)); + smartlist_free(sl); + + return retval; +} + +/* Maximum age of a guardfraction file that we are willing to accept. */ +#define MAX_GUARDFRACTION_FILE_AGE (7*24*60*60) /* approx a week */ + +/** Static strings of guardfraction files. */ +#define GUARDFRACTION_DATE_STR "written-at" +#define GUARDFRACTION_INPUTS "n-inputs" +#define GUARDFRACTION_GUARD "guard-seen" +#define GUARDFRACTION_VERSION "guardfraction-file-version" + +/** Given a guardfraction file in a string, parse it and register the + * guardfraction information to the provided vote routerstatuses. + * + * This is the rough format of the guardfraction file: + * + * guardfraction-file-version 1 + * written-at <date and time> + * n-inputs <number of consesuses parsed> <number of days considered> + * + * guard-seen <fpr 1> <guardfraction percentage> <consensus appearances> + * guard-seen <fpr 2> <guardfraction percentage> <consensus appearances> + * guard-seen <fpr 3> <guardfraction percentage> <consensus appearances> + * guard-seen <fpr 4> <guardfraction percentage> <consensus appearances> + * guard-seen <fpr 5> <guardfraction percentage> <consensus appearances> + * ... + * + * Return -1 if the parsing failed and 0 if it went smoothly. Parsing + * should tolerate errors in all lines but the written-at header. + */ +STATIC int +dirserv_read_guardfraction_file_from_str(const char *guardfraction_file_str, + smartlist_t *vote_routerstatuses) +{ + config_line_t *front=NULL, *line; + int ret_tmp; + int retval = -1; + int current_line_n = 0; /* line counter for better log messages */ + + /* Guardfraction info to be parsed */ + int total_consensuses = 0; + int total_days = 0; + + /* Stats */ + int guards_read_n = 0; + int guards_applied_n = 0; + + /* Parse file and split it in lines */ + ret_tmp = config_get_lines(guardfraction_file_str, &front, 0); + if (ret_tmp < 0) { + log_warn(LD_CONFIG, "Error reading from guardfraction file"); + goto done; + } + + /* Sort routerstatuses (needed later when applying guardfraction info) */ + if (vote_routerstatuses) + smartlist_sort(vote_routerstatuses, compare_vote_routerstatus_entries); + + for (line = front; line; line=line->next) { + current_line_n++; + + if (!strcmp(line->key, GUARDFRACTION_VERSION)) { + int num_ok = 1; + unsigned int version; + + version = + (unsigned int) tor_parse_long(line->value, + 10, 0, INT_MAX, &num_ok, NULL); + + if (!num_ok || version != 1) { + log_warn(LD_GENERAL, "Got unknown guardfraction version %d.", version); + goto done; + } + } else if (!strcmp(line->key, GUARDFRACTION_DATE_STR)) { + time_t file_written_at; + time_t now = time(NULL); + + /* First line is 'written-at <date>' */ + if (parse_iso_time(line->value, &file_written_at) < 0) { + log_warn(LD_CONFIG, "Guardfraction:%d: Bad date '%s'. Ignoring", + current_line_n, line->value); + goto done; /* don't tolerate failure here. */ + } + if (file_written_at < now - MAX_GUARDFRACTION_FILE_AGE) { + log_warn(LD_CONFIG, "Guardfraction:%d: was written very long ago '%s'", + current_line_n, line->value); + goto done; /* don't tolerate failure here. */ + } + } else if (!strcmp(line->key, GUARDFRACTION_INPUTS)) { + char *err_msg = NULL; + + if (guardfraction_file_parse_inputs_line(line->value, + &total_consensuses, + &total_days, + &err_msg) < 0) { + log_warn(LD_CONFIG, "Guardfraction:%d: %s", + current_line_n, err_msg); + tor_free(err_msg); + continue; + } + + } else if (!strcmp(line->key, GUARDFRACTION_GUARD)) { + char *err_msg = NULL; + + ret_tmp = guardfraction_file_parse_guard_line(line->value, + vote_routerstatuses, + &err_msg); + if (ret_tmp < 0) { /* failed while parsing the guard line */ + log_warn(LD_CONFIG, "Guardfraction:%d: %s", + current_line_n, err_msg); + tor_free(err_msg); + continue; + } + + /* Successfully parsed guard line. Check if it was applied properly. */ + guards_read_n++; + if (ret_tmp > 0) { + guards_applied_n++; + } + } else { + log_warn(LD_CONFIG, "Unknown guardfraction line %d (%s %s)", + current_line_n, line->key, line->value); + } + } + + retval = 0; + + log_info(LD_CONFIG, + "Successfully parsed guardfraction file with %d consensuses over " + "%d days. Parsed %d nodes and applied %d of them%s.", + total_consensuses, total_days, guards_read_n, guards_applied_n, + vote_routerstatuses ? "" : " (no routerstatus provided)" ); + + done: + config_free_lines(front); + + if (retval < 0) { + return retval; + } else { + return guards_read_n; + } +} + +/** Read a guardfraction file at <b>fname</b> and load all its + * information to <b>vote_routerstatuses</b>. */ +int +dirserv_read_guardfraction_file(const char *fname, + smartlist_t *vote_routerstatuses) +{ + char *guardfraction_file_str; + + /* Read file to a string */ + guardfraction_file_str = read_file_to_str(fname, RFTS_IGNORE_MISSING, NULL); + if (!guardfraction_file_str) { + log_warn(LD_FS, "Cannot open guardfraction file '%s'. Failing.", fname); + return -1; + } + + return dirserv_read_guardfraction_file_from_str(guardfraction_file_str, + vote_routerstatuses); +} + +/** + * Helper function to parse out a line in the measured bandwidth file + * into a measured_bw_line_t output structure. + * + * If <b>line_is_after_headers</b> is true, then if we encounter an incomplete + * bw line, return -1 and warn, since we are after the headers and we should + * only parse bw lines. Return 0 otherwise. + * + * If <b>line_is_after_headers</b> is false then it means that we are not past + * the header block yet. If we encounter an incomplete bw line, return -1 but + * don't warn since there could be additional header lines coming. If we + * encounter a proper bw line, return 0 (and we got past the headers). + */ +STATIC int +measured_bw_line_parse(measured_bw_line_t *out, const char *orig_line, + int line_is_after_headers) +{ + char *line = tor_strdup(orig_line); + char *cp = line; + int got_bw = 0; + int got_node_id = 0; + char *strtok_state; /* lame sauce d'jour */ + + if (strlen(line) == 0) { + log_warn(LD_DIRSERV, "Empty line in bandwidth file"); + tor_free(line); + return -1; + } + + /* Remove end of line character, so that is not part of the token */ + if (line[strlen(line) - 1] == '\n') { + line[strlen(line) - 1] = '\0'; + } + + cp = tor_strtok_r(cp, " \t", &strtok_state); + + if (!cp) { + log_warn(LD_DIRSERV, "Invalid line in bandwidth file: %s", + escaped(orig_line)); + tor_free(line); + return -1; + } + + if (orig_line[strlen(orig_line)-1] != '\n') { + log_warn(LD_DIRSERV, "Incomplete line in bandwidth file: %s", + escaped(orig_line)); + tor_free(line); + return -1; + } + + do { + if (strcmpstart(cp, "bw=") == 0) { + int parse_ok = 0; + char *endptr; + if (got_bw) { + log_warn(LD_DIRSERV, "Double bw= in bandwidth file line: %s", + escaped(orig_line)); + tor_free(line); + return -1; + } + cp+=strlen("bw="); + + out->bw_kb = tor_parse_long(cp, 10, 0, LONG_MAX, &parse_ok, &endptr); + if (!parse_ok || (*endptr && !TOR_ISSPACE(*endptr))) { + log_warn(LD_DIRSERV, "Invalid bandwidth in bandwidth file line: %s", + escaped(orig_line)); + tor_free(line); + return -1; + } + got_bw=1; + } else if (strcmpstart(cp, "node_id=$") == 0) { + if (got_node_id) { + log_warn(LD_DIRSERV, "Double node_id= in bandwidth file line: %s", + escaped(orig_line)); + tor_free(line); + return -1; + } + cp+=strlen("node_id=$"); + + if (strlen(cp) != HEX_DIGEST_LEN || + base16_decode(out->node_id, DIGEST_LEN, + cp, HEX_DIGEST_LEN) != DIGEST_LEN) { + log_warn(LD_DIRSERV, "Invalid node_id in bandwidth file line: %s", + escaped(orig_line)); + tor_free(line); + return -1; + } + strlcpy(out->node_hex, cp, sizeof(out->node_hex)); + got_node_id=1; + } + } while ((cp = tor_strtok_r(NULL, " \t", &strtok_state))); + + if (got_bw && got_node_id) { + tor_free(line); + return 0; + } else if (line_is_after_headers == 0) { + /* There could be additional header lines, therefore do not give warnings + * but returns -1 since it's not a complete bw line. */ + log_debug(LD_DIRSERV, "Missing bw or node_id in bandwidth file line: %s", + escaped(orig_line)); + tor_free(line); + return -1; + } else { + log_warn(LD_DIRSERV, "Incomplete line in bandwidth file: %s", + escaped(orig_line)); + tor_free(line); + return -1; + } +} + +/** + * Helper function to apply a parsed measurement line to a list + * of bandwidth statuses. Returns true if a line is found, + * false otherwise. + */ +STATIC int +measured_bw_line_apply(measured_bw_line_t *parsed_line, + smartlist_t *routerstatuses) +{ + vote_routerstatus_t *rs = NULL; + if (!routerstatuses) + return 0; + + rs = smartlist_bsearch(routerstatuses, parsed_line->node_id, + compare_digest_to_vote_routerstatus_entry); + + if (rs) { + rs->has_measured_bw = 1; + rs->measured_bw_kb = (uint32_t)parsed_line->bw_kb; + } else { + log_info(LD_DIRSERV, "Node ID %s not found in routerstatus list", + parsed_line->node_hex); + } + + return rs != NULL; +} + +/** + * Read the measured bandwidth file and apply it to the list of + * vote_routerstatus_t. Returns -1 on error, 0 otherwise. + */ +int +dirserv_read_measured_bandwidths(const char *from_file, + smartlist_t *routerstatuses) +{ + char line[512]; + FILE *fp = tor_fopen_cloexec(from_file, "r"); + int applied_lines = 0; + time_t file_time, now; + int ok; + /* This flag will be 1 only when the first successful bw measurement line + * has been encountered, so that measured_bw_line_parse don't give warnings + * if there are additional header lines, as introduced in Bandwidth List spec + * version 1.1.0 */ + int line_is_after_headers = 0; + + /* Initialise line, so that we can't possibly run off the end. */ + memset(line, 0, sizeof(line)); + + if (fp == NULL) { + log_warn(LD_CONFIG, "Can't open bandwidth file at configured location: %s", + from_file); + return -1; + } + + /* If fgets fails, line is either unmodified, or indeterminate. */ + if (!fgets(line, sizeof(line), fp)) { + log_warn(LD_DIRSERV, "Empty bandwidth file"); + fclose(fp); + return -1; + } + + if (!strlen(line) || line[strlen(line)-1] != '\n') { + log_warn(LD_DIRSERV, "Long or truncated time in bandwidth file: %s", + escaped(line)); + fclose(fp); + return -1; + } + + line[strlen(line)-1] = '\0'; + file_time = (time_t)tor_parse_ulong(line, 10, 0, ULONG_MAX, &ok, NULL); + if (!ok) { + log_warn(LD_DIRSERV, "Non-integer time in bandwidth file: %s", + escaped(line)); + fclose(fp); + return -1; + } + + now = time(NULL); + if ((now - file_time) > MAX_MEASUREMENT_AGE) { + log_warn(LD_DIRSERV, "Bandwidth measurement file stale. Age: %u", + (unsigned)(time(NULL) - file_time)); + fclose(fp); + return -1; + } + + if (routerstatuses) + smartlist_sort(routerstatuses, compare_vote_routerstatus_entries); + + while (!feof(fp)) { + measured_bw_line_t parsed_line; + if (fgets(line, sizeof(line), fp) && strlen(line)) { + if (measured_bw_line_parse(&parsed_line, line, + line_is_after_headers) != -1) { + /* This condition will be true when the first complete valid bw line + * has been encountered, which means the end of the header lines. */ + line_is_after_headers = 1; + /* Also cache the line for dirserv_get_bandwidth_for_router() */ + dirserv_cache_measured_bw(&parsed_line, file_time); + if (measured_bw_line_apply(&parsed_line, routerstatuses) > 0) + applied_lines++; + } + } + } + + /* Now would be a nice time to clean the cache, too */ + dirserv_expire_measured_bw_cache(now); + + fclose(fp); + log_info(LD_DIRSERV, + "Bandwidth measurement file successfully read. " + "Applied %d measurements.", applied_lines); + return 0; +} + +/** As dirserv_get_routerdescs(), but instead of getting signed_descriptor_t + * pointers, adds copies of digests to fps_out, and doesn't use the + * /tor/server/ prefix. For a /d/ request, adds descriptor digests; for other + * requests, adds identity digests. + */ +int +dirserv_get_routerdesc_spool(smartlist_t *spool_out, + const char *key, + dir_spool_source_t source, + int conn_is_encrypted, + const char **msg_out) +{ + *msg_out = NULL; + + if (!strcmp(key, "all")) { + const routerlist_t *rl = router_get_routerlist(); + SMARTLIST_FOREACH_BEGIN(rl->routers, const routerinfo_t *, r) { + spooled_resource_t *spooled; + spooled = spooled_resource_new(source, + (const uint8_t *)r->cache_info.identity_digest, + DIGEST_LEN); + /* Treat "all" requests as if they were unencrypted */ + conn_is_encrypted = 0; + smartlist_add(spool_out, spooled); + } SMARTLIST_FOREACH_END(r); + } else if (!strcmp(key, "authority")) { + const routerinfo_t *ri = router_get_my_routerinfo(); + if (ri) + smartlist_add(spool_out, + spooled_resource_new(source, + (const uint8_t *)ri->cache_info.identity_digest, + DIGEST_LEN)); + } else if (!strcmpstart(key, "d/")) { + key += strlen("d/"); + dir_split_resource_into_spoolable(key, source, spool_out, NULL, + DSR_HEX|DSR_SORT_UNIQ); + } else if (!strcmpstart(key, "fp/")) { + key += strlen("fp/"); + dir_split_resource_into_spoolable(key, source, spool_out, NULL, + DSR_HEX|DSR_SORT_UNIQ); + } else { + *msg_out = "Not found"; + return -1; + } + + if (! conn_is_encrypted) { + /* Remove anything that insists it not be sent unencrypted. */ + SMARTLIST_FOREACH_BEGIN(spool_out, spooled_resource_t *, spooled) { + const uint8_t *body = NULL; + size_t bodylen = 0; + int r = spooled_resource_lookup_body(spooled, conn_is_encrypted, + &body, &bodylen, NULL); + if (r < 0 || body == NULL || bodylen == 0) { + SMARTLIST_DEL_CURRENT(spool_out, spooled); + spooled_resource_free(spooled); + } + } SMARTLIST_FOREACH_END(spooled); + } + + if (!smartlist_len(spool_out)) { + *msg_out = "Servers unavailable"; + return -1; + } + return 0; +} + +/** Add a signed_descriptor_t to <b>descs_out</b> for each router matching + * <b>key</b>. The key should be either + * - "/tor/server/authority" for our own routerinfo; + * - "/tor/server/all" for all the routerinfos we have, concatenated; + * - "/tor/server/fp/FP" where FP is a plus-separated sequence of + * hex identity digests; or + * - "/tor/server/d/D" where D is a plus-separated sequence + * of server descriptor digests, in hex. + * + * Return 0 if we found some matching descriptors, or -1 if we do not + * have any descriptors, no matching descriptors, or if we did not + * recognize the key (URL). + * If -1 is returned *<b>msg</b> will be set to an appropriate error + * message. + * + * XXXX rename this function. It's only called from the controller. + * XXXX in fact, refactor this function, merging as much as possible. + */ +int +dirserv_get_routerdescs(smartlist_t *descs_out, const char *key, + const char **msg) +{ + *msg = NULL; + + if (!strcmp(key, "/tor/server/all")) { + routerlist_t *rl = router_get_routerlist(); + SMARTLIST_FOREACH(rl->routers, routerinfo_t *, r, + smartlist_add(descs_out, &(r->cache_info))); + } else if (!strcmp(key, "/tor/server/authority")) { + const routerinfo_t *ri = router_get_my_routerinfo(); + if (ri) + smartlist_add(descs_out, (void*) &(ri->cache_info)); + } else if (!strcmpstart(key, "/tor/server/d/")) { + smartlist_t *digests = smartlist_new(); + key += strlen("/tor/server/d/"); + dir_split_resource_into_fingerprints(key, digests, NULL, + DSR_HEX|DSR_SORT_UNIQ); + SMARTLIST_FOREACH(digests, const char *, d, + { + signed_descriptor_t *sd = router_get_by_descriptor_digest(d); + if (sd) + smartlist_add(descs_out,sd); + }); + SMARTLIST_FOREACH(digests, char *, d, tor_free(d)); + smartlist_free(digests); + } else if (!strcmpstart(key, "/tor/server/fp/")) { + smartlist_t *digests = smartlist_new(); + time_t cutoff = time(NULL) - ROUTER_MAX_AGE_TO_PUBLISH; + key += strlen("/tor/server/fp/"); + dir_split_resource_into_fingerprints(key, digests, NULL, + DSR_HEX|DSR_SORT_UNIQ); + SMARTLIST_FOREACH_BEGIN(digests, const char *, d) { + if (router_digest_is_me(d)) { + /* calling router_get_my_routerinfo() to make sure it exists */ + const routerinfo_t *ri = router_get_my_routerinfo(); + if (ri) + smartlist_add(descs_out, (void*) &(ri->cache_info)); + } else { + const routerinfo_t *ri = router_get_by_id_digest(d); + /* Don't actually serve a descriptor that everyone will think is + * expired. This is an (ugly) workaround to keep buggy 0.1.1.10 + * Tors from downloading descriptors that they will throw away. + */ + if (ri && ri->cache_info.published_on > cutoff) + smartlist_add(descs_out, (void*) &(ri->cache_info)); + } + } SMARTLIST_FOREACH_END(d); + SMARTLIST_FOREACH(digests, char *, d, tor_free(d)); + smartlist_free(digests); + } else { + *msg = "Key not recognized"; + return -1; + } + + if (!smartlist_len(descs_out)) { + *msg = "Servers unavailable"; + return -1; + } + return 0; +} + +/** Called when a TLS handshake has completed successfully with a + * router listening at <b>address</b>:<b>or_port</b>, and has yielded + * a certificate with digest <b>digest_rcvd</b>. + * + * Inform the reachability checker that we could get to this relay. + */ +void +dirserv_orconn_tls_done(const tor_addr_t *addr, + uint16_t or_port, + const char *digest_rcvd, + const ed25519_public_key_t *ed_id_rcvd) +{ + node_t *node = NULL; + tor_addr_port_t orport; + routerinfo_t *ri = NULL; + time_t now = time(NULL); + tor_assert(addr); + tor_assert(digest_rcvd); + + node = node_get_mutable_by_id(digest_rcvd); + if (node == NULL || node->ri == NULL) + return; + + ri = node->ri; + + if (get_options()->AuthDirTestEd25519LinkKeys && + node_supports_ed25519_link_authentication(node, 1) && + ri->cache_info.signing_key_cert) { + /* We allow the node to have an ed25519 key if we haven't been told one in + * the routerinfo, but if we *HAVE* been told one in the routerinfo, it + * needs to match. */ + const ed25519_public_key_t *expected_id = + &ri->cache_info.signing_key_cert->signing_key; + tor_assert(!ed25519_public_key_is_zero(expected_id)); + if (! ed_id_rcvd || ! ed25519_pubkey_eq(ed_id_rcvd, expected_id)) { + log_info(LD_DIRSERV, "Router at %s:%d with RSA ID %s " + "did not present expected Ed25519 ID.", + fmt_addr(addr), or_port, hex_str(digest_rcvd, DIGEST_LEN)); + return; /* Don't mark it as reachable. */ + } + } + + tor_addr_copy(&orport.addr, addr); + orport.port = or_port; + if (router_has_orport(ri, &orport)) { + /* Found the right router. */ + if (!authdir_mode_bridge(get_options()) || + ri->purpose == ROUTER_PURPOSE_BRIDGE) { + char addrstr[TOR_ADDR_BUF_LEN]; + /* This is a bridge or we're not a bridge authority -- + mark it as reachable. */ + log_info(LD_DIRSERV, "Found router %s to be reachable at %s:%d. Yay.", + router_describe(ri), + tor_addr_to_str(addrstr, addr, sizeof(addrstr), 1), + ri->or_port); + if (tor_addr_family(addr) == AF_INET) { + rep_hist_note_router_reachable(digest_rcvd, addr, or_port, now); + node->last_reachable = now; + } else if (tor_addr_family(addr) == AF_INET6) { + /* No rephist for IPv6. */ + node->last_reachable6 = now; + } + } + } +} + +/** Called when we, as an authority, receive a new router descriptor either as + * an upload or a download. Used to decide whether to relaunch reachability + * testing for the server. */ +int +dirserv_should_launch_reachability_test(const routerinfo_t *ri, + const routerinfo_t *ri_old) +{ + if (!authdir_mode_handles_descs(get_options(), ri->purpose)) + return 0; + if (!ri_old) { + /* New router: Launch an immediate reachability test, so we will have an + * opinion soon in case we're generating a consensus soon */ + return 1; + } + if (ri_old->is_hibernating && !ri->is_hibernating) { + /* It just came out of hibernation; launch a reachability test */ + return 1; + } + if (! routers_have_same_or_addrs(ri, ri_old)) { + /* Address or port changed; launch a reachability test */ + return 1; + } + return 0; +} + +/** Helper function for dirserv_test_reachability(). Start a TLS + * connection to <b>router</b>, and annotate it with when we started + * the test. */ +void +dirserv_single_reachability_test(time_t now, routerinfo_t *router) +{ + const or_options_t *options = get_options(); + channel_t *chan = NULL; + const node_t *node = NULL; + tor_addr_t router_addr; + const ed25519_public_key_t *ed_id_key; + (void) now; + + tor_assert(router); + node = node_get_by_id(router->cache_info.identity_digest); + tor_assert(node); + + if (options->AuthDirTestEd25519LinkKeys && + node_supports_ed25519_link_authentication(node, 1) && + router->cache_info.signing_key_cert) { + ed_id_key = &router->cache_info.signing_key_cert->signing_key; + } else { + ed_id_key = NULL; + } + + /* IPv4. */ + log_debug(LD_OR,"Testing reachability of %s at %s:%u.", + router->nickname, fmt_addr32(router->addr), router->or_port); + tor_addr_from_ipv4h(&router_addr, router->addr); + chan = channel_tls_connect(&router_addr, router->or_port, + router->cache_info.identity_digest, + ed_id_key); + if (chan) command_setup_channel(chan); + + /* Possible IPv6. */ + if (get_options()->AuthDirHasIPv6Connectivity == 1 && + !tor_addr_is_null(&router->ipv6_addr)) { + char addrstr[TOR_ADDR_BUF_LEN]; + log_debug(LD_OR, "Testing reachability of %s at %s:%u.", + router->nickname, + tor_addr_to_str(addrstr, &router->ipv6_addr, sizeof(addrstr), 1), + router->ipv6_orport); + chan = channel_tls_connect(&router->ipv6_addr, router->ipv6_orport, + router->cache_info.identity_digest, + ed_id_key); + if (chan) command_setup_channel(chan); + } +} + +/** Auth dir server only: load balance such that we only + * try a few connections per call. + * + * The load balancing is such that if we get called once every ten + * seconds, we will cycle through all the tests in + * REACHABILITY_TEST_CYCLE_PERIOD seconds (a bit over 20 minutes). + */ +void +dirserv_test_reachability(time_t now) +{ + /* XXX decide what to do here; see or-talk thread "purging old router + * information, revocation." -NM + * We can't afford to mess with this in 0.1.2.x. The reason is that + * if we stop doing reachability tests on some of routerlist, then + * we'll for-sure think they're down, which may have unexpected + * effects in other parts of the code. It doesn't hurt much to do + * the testing, and directory authorities are easy to upgrade. Let's + * wait til 0.2.0. -RD */ +// time_t cutoff = now - ROUTER_MAX_AGE_TO_PUBLISH; + routerlist_t *rl = router_get_routerlist(); + static char ctr = 0; + int bridge_auth = authdir_mode_bridge(get_options()); + + SMARTLIST_FOREACH_BEGIN(rl->routers, routerinfo_t *, router) { + const char *id_digest = router->cache_info.identity_digest; + if (router_is_me(router)) + continue; + if (bridge_auth && router->purpose != ROUTER_PURPOSE_BRIDGE) + continue; /* bridge authorities only test reachability on bridges */ +// if (router->cache_info.published_on > cutoff) +// continue; + if ((((uint8_t)id_digest[0]) % REACHABILITY_MODULO_PER_TEST) == ctr) { + dirserv_single_reachability_test(now, router); + } + } SMARTLIST_FOREACH_END(router); + ctr = (ctr + 1) % REACHABILITY_MODULO_PER_TEST; /* increment ctr */ +} + +/* ========== + * Spooling code. + * ========== */ + +spooled_resource_t * +spooled_resource_new(dir_spool_source_t source, + const uint8_t *digest, size_t digestlen) +{ + spooled_resource_t *spooled = tor_malloc_zero(sizeof(spooled_resource_t)); + spooled->spool_source = source; + switch (source) { + case DIR_SPOOL_NETWORKSTATUS: + spooled->spool_eagerly = 0; + break; + case DIR_SPOOL_SERVER_BY_DIGEST: + case DIR_SPOOL_SERVER_BY_FP: + case DIR_SPOOL_EXTRA_BY_DIGEST: + case DIR_SPOOL_EXTRA_BY_FP: + case DIR_SPOOL_MICRODESC: + default: + spooled->spool_eagerly = 1; + break; + case DIR_SPOOL_CONSENSUS_CACHE_ENTRY: + tor_assert_unreached(); + break; + } + tor_assert(digestlen <= sizeof(spooled->digest)); + if (digest) + memcpy(spooled->digest, digest, digestlen); + return spooled; +} + +/** + * Create a new spooled_resource_t to spool the contents of <b>entry</b> to + * the user. Return the spooled object on success, or NULL on failure (which + * is probably caused by a failure to map the body of the item from disk). + * + * Adds a reference to entry's reference counter. + */ +spooled_resource_t * +spooled_resource_new_from_cache_entry(consensus_cache_entry_t *entry) +{ + spooled_resource_t *spooled = tor_malloc_zero(sizeof(spooled_resource_t)); + spooled->spool_source = DIR_SPOOL_CONSENSUS_CACHE_ENTRY; + spooled->spool_eagerly = 0; + consensus_cache_entry_incref(entry); + spooled->consensus_cache_entry = entry; + + int r = consensus_cache_entry_get_body(entry, + &spooled->cce_body, + &spooled->cce_len); + if (r == 0) { + return spooled; + } else { + spooled_resource_free(spooled); + return NULL; + } +} + +/** Release all storage held by <b>spooled</b>. */ +void +spooled_resource_free_(spooled_resource_t *spooled) +{ + if (spooled == NULL) + return; + + if (spooled->cached_dir_ref) { + cached_dir_decref(spooled->cached_dir_ref); + } + + if (spooled->consensus_cache_entry) { + consensus_cache_entry_decref(spooled->consensus_cache_entry); + } + + tor_free(spooled); +} + +/** When spooling data from a cached_dir_t object, we always add + * at least this much. */ +#define DIRSERV_CACHED_DIR_CHUNK_SIZE 8192 + +/** Return an compression ratio for compressing objects from <b>source</b>. + */ +static double +estimate_compression_ratio(dir_spool_source_t source) +{ + /* We should put in better estimates here, depending on the number of + objects and their type */ + (void) source; + return 0.5; +} + +/** Return an estimated number of bytes needed for transmitting the + * resource in <b>spooled</b> on <b>conn</b> + * + * As a convenient side-effect, set *<b>published_out</b> to the resource's + * publication time. + */ +static size_t +spooled_resource_estimate_size(const spooled_resource_t *spooled, + dir_connection_t *conn, + int compressed, + time_t *published_out) +{ + if (spooled->spool_eagerly) { + const uint8_t *body = NULL; + size_t bodylen = 0; + int r = spooled_resource_lookup_body(spooled, + connection_dir_is_encrypted(conn), + &body, &bodylen, + published_out); + if (r == -1 || body == NULL || bodylen == 0) + return 0; + if (compressed) { + double ratio = estimate_compression_ratio(spooled->spool_source); + bodylen = (size_t)(bodylen * ratio); + } + return bodylen; + } else { + cached_dir_t *cached; + if (spooled->consensus_cache_entry) { + if (published_out) { + consensus_cache_entry_get_valid_after( + spooled->consensus_cache_entry, published_out); + } + + return spooled->cce_len; + } + if (spooled->cached_dir_ref) { + cached = spooled->cached_dir_ref; + } else { + cached = spooled_resource_lookup_cached_dir(spooled, + published_out); + } + if (cached == NULL) { + return 0; + } + size_t result = compressed ? cached->dir_compressed_len : cached->dir_len; + return result; + } +} + +/** Return code for spooled_resource_flush_some */ +typedef enum { + SRFS_ERR = -1, + SRFS_MORE = 0, + SRFS_DONE +} spooled_resource_flush_status_t; + +/** Flush some or all of the bytes from <b>spooled</b> onto <b>conn</b>. + * Return SRFS_ERR on error, SRFS_MORE if there are more bytes to flush from + * this spooled resource, or SRFS_DONE if we are done flushing this spooled + * resource. + */ +static spooled_resource_flush_status_t +spooled_resource_flush_some(spooled_resource_t *spooled, + dir_connection_t *conn) +{ + if (spooled->spool_eagerly) { + /* Spool_eagerly resources are sent all-at-once. */ + const uint8_t *body = NULL; + size_t bodylen = 0; + int r = spooled_resource_lookup_body(spooled, + connection_dir_is_encrypted(conn), + &body, &bodylen, NULL); + if (r == -1 || body == NULL || bodylen == 0) { + /* Absent objects count as "done". */ + return SRFS_DONE; + } + if (conn->compress_state) { + connection_buf_add_compress((const char*)body, bodylen, conn, 0); + } else { + connection_buf_add((const char*)body, bodylen, TO_CONN(conn)); + } + return SRFS_DONE; + } else { + cached_dir_t *cached = spooled->cached_dir_ref; + consensus_cache_entry_t *cce = spooled->consensus_cache_entry; + if (cached == NULL && cce == NULL) { + /* The cached_dir_t hasn't been materialized yet. So let's look it up. */ + cached = spooled->cached_dir_ref = + spooled_resource_lookup_cached_dir(spooled, NULL); + if (!cached) { + /* Absent objects count as done. */ + return SRFS_DONE; + } + ++cached->refcnt; + tor_assert_nonfatal(spooled->cached_dir_offset == 0); + } + + if (BUG(!cached && !cce)) + return SRFS_DONE; + + int64_t total_len; + const char *ptr; + if (cached) { + total_len = cached->dir_compressed_len; + ptr = cached->dir_compressed; + } else { + total_len = spooled->cce_len; + ptr = (const char *)spooled->cce_body; + } + /* How many bytes left to flush? */ + int64_t remaining; + remaining = total_len - spooled->cached_dir_offset; + if (BUG(remaining < 0)) + return SRFS_ERR; + ssize_t bytes = (ssize_t) MIN(DIRSERV_CACHED_DIR_CHUNK_SIZE, remaining); + if (conn->compress_state) { + connection_buf_add_compress( + ptr + spooled->cached_dir_offset, + bytes, conn, 0); + } else { + connection_buf_add(ptr + spooled->cached_dir_offset, + bytes, TO_CONN(conn)); + } + spooled->cached_dir_offset += bytes; + if (spooled->cached_dir_offset >= (off_t)total_len) { + return SRFS_DONE; + } else { + return SRFS_MORE; + } + } +} + +/** Helper: find the cached_dir_t for a spooled_resource_t, for + * sending it to <b>conn</b>. Set *<b>published_out</b>, if provided, + * to the published time of the cached_dir_t. + * + * DOES NOT increase the reference count on the result. Callers must do that + * themselves if they mean to hang on to it. + */ +static cached_dir_t * +spooled_resource_lookup_cached_dir(const spooled_resource_t *spooled, + time_t *published_out) +{ + tor_assert(spooled->spool_eagerly == 0); + cached_dir_t *d = lookup_cached_dir_by_fp(spooled->digest); + if (d != NULL) { + if (published_out) + *published_out = d->published; + } + return d; +} + +/** Helper: Look up the body for an eagerly-served spooled_resource. If + * <b>conn_is_encrypted</b> is false, don't look up any resource that + * shouldn't be sent over an unencrypted connection. On success, set + * <b>body_out</b>, <b>size_out</b>, and <b>published_out</b> to refer + * to the resource's body, size, and publication date, and return 0. + * On failure return -1. */ +static int +spooled_resource_lookup_body(const spooled_resource_t *spooled, + int conn_is_encrypted, + const uint8_t **body_out, + size_t *size_out, + time_t *published_out) +{ + tor_assert(spooled->spool_eagerly == 1); + + const signed_descriptor_t *sd = NULL; + + switch (spooled->spool_source) { + case DIR_SPOOL_EXTRA_BY_FP: { + sd = get_signed_descriptor_by_fp(spooled->digest, 1); + break; + } + case DIR_SPOOL_SERVER_BY_FP: { + sd = get_signed_descriptor_by_fp(spooled->digest, 0); + break; + } + case DIR_SPOOL_SERVER_BY_DIGEST: { + sd = router_get_by_descriptor_digest((const char *)spooled->digest); + break; + } + case DIR_SPOOL_EXTRA_BY_DIGEST: { + sd = extrainfo_get_by_descriptor_digest((const char *)spooled->digest); + break; + } + case DIR_SPOOL_MICRODESC: { + microdesc_t *md = microdesc_cache_lookup_by_digest256( + get_microdesc_cache(), + (const char *)spooled->digest); + if (! md || ! md->body) { + return -1; + } + *body_out = (const uint8_t *)md->body; + *size_out = md->bodylen; + if (published_out) + *published_out = TIME_MAX; + return 0; + } + case DIR_SPOOL_NETWORKSTATUS: + case DIR_SPOOL_CONSENSUS_CACHE_ENTRY: + default: + /* LCOV_EXCL_START */ + tor_assert_nonfatal_unreached(); + return -1; + /* LCOV_EXCL_STOP */ + } + + /* If we get here, then we tried to set "sd" to a signed_descriptor_t. */ + + if (sd == NULL) { + return -1; + } + if (sd->send_unencrypted == 0 && ! conn_is_encrypted) { + /* we did this check once before (so we could have an accurate size + * estimate and maybe send a 404 if somebody asked for only bridges on + * a connection), but we need to do it again in case a previously + * unknown bridge descriptor has shown up between then and now. */ + return -1; + } + *body_out = (const uint8_t *) signed_descriptor_get_body(sd); + *size_out = sd->signed_descriptor_len; + if (published_out) + *published_out = sd->published_on; + return 0; +} + +/** Given a fingerprint <b>fp</b> which is either set if we're looking for a + * v2 status, or zeroes if we're looking for a v3 status, or a NUL-padded + * flavor name if we want a flavored v3 status, return a pointer to the + * appropriate cached dir object, or NULL if there isn't one available. */ +static cached_dir_t * +lookup_cached_dir_by_fp(const uint8_t *fp) +{ + cached_dir_t *d = NULL; + if (tor_digest_is_zero((const char *)fp) && cached_consensuses) { + d = strmap_get(cached_consensuses, "ns"); + } else if (memchr(fp, '\0', DIGEST_LEN) && cached_consensuses) { + /* this here interface is a nasty hack: we're shoving a flavor into + * a digest field. */ + d = strmap_get(cached_consensuses, (const char *)fp); + } + return d; +} + +/** Try to guess the number of bytes that will be needed to send the + * spooled objects for <b>conn</b>'s outgoing spool. In the process, + * remove every element of the spool that refers to an absent object, or + * which was published earlier than <b>cutoff</b>. Set *<b>size_out</b> + * to the number of bytes, and *<b>n_expired_out</b> to the number of + * objects removed for being too old. */ +void +dirserv_spool_remove_missing_and_guess_size(dir_connection_t *conn, + time_t cutoff, + int compression, + size_t *size_out, + int *n_expired_out) +{ + if (BUG(!conn)) + return; + + smartlist_t *spool = conn->spool; + if (!spool) { + if (size_out) + *size_out = 0; + if (n_expired_out) + *n_expired_out = 0; + return; + } + int n_expired = 0; + uint64_t total = 0; + SMARTLIST_FOREACH_BEGIN(spool, spooled_resource_t *, spooled) { + time_t published = TIME_MAX; + size_t sz = spooled_resource_estimate_size(spooled, conn, + compression, &published); + if (published < cutoff) { + ++n_expired; + SMARTLIST_DEL_CURRENT(spool, spooled); + spooled_resource_free(spooled); + } else if (sz == 0) { + SMARTLIST_DEL_CURRENT(spool, spooled); + spooled_resource_free(spooled); + } else { + total += sz; + } + } SMARTLIST_FOREACH_END(spooled); + + if (size_out) { + *size_out = (total > SIZE_MAX) ? SIZE_MAX : (size_t)total; + } + if (n_expired_out) + *n_expired_out = n_expired; +} + +/** Helper: used to sort a connection's spool. */ +static int +dirserv_spool_sort_comparison_(const void **a_, const void **b_) +{ + const spooled_resource_t *a = *a_; + const spooled_resource_t *b = *b_; + return fast_memcmp(a->digest, b->digest, sizeof(a->digest)); +} + +/** Sort all the entries in <b>conn</b> by digest. */ +void +dirserv_spool_sort(dir_connection_t *conn) +{ + if (conn->spool == NULL) + return; + smartlist_sort(conn->spool, dirserv_spool_sort_comparison_); +} + +/** Return the cache-info for identity fingerprint <b>fp</b>, or + * its extra-info document if <b>extrainfo</b> is true. Return + * NULL if not found or if the descriptor is older than + * <b>publish_cutoff</b>. */ +static const signed_descriptor_t * +get_signed_descriptor_by_fp(const uint8_t *fp, int extrainfo) +{ + if (router_digest_is_me((const char *)fp)) { + if (extrainfo) + return &(router_get_my_extrainfo()->cache_info); + else + return &(router_get_my_routerinfo()->cache_info); + } else { + const routerinfo_t *ri = router_get_by_id_digest((const char *)fp); + if (ri) { + if (extrainfo) + return extrainfo_get_by_descriptor_digest( + ri->cache_info.extra_info_digest); + else + return &ri->cache_info; + } + } + return NULL; +} + +/** When we're spooling data onto our outbuf, add more whenever we dip + * below this threshold. */ +#define DIRSERV_BUFFER_MIN 16384 + +/** + * Called whenever we have flushed some directory data in state + * SERVER_WRITING, or whenever we want to fill the buffer with initial + * directory data (so that subsequent writes will occur, and trigger this + * function again.) + * + * Return 0 on success, and -1 on failure. + */ +int +connection_dirserv_flushed_some(dir_connection_t *conn) +{ + tor_assert(conn->base_.state == DIR_CONN_STATE_SERVER_WRITING); + if (conn->spool == NULL) + return 0; + + while (connection_get_outbuf_len(TO_CONN(conn)) < DIRSERV_BUFFER_MIN && + smartlist_len(conn->spool)) { + spooled_resource_t *spooled = + smartlist_get(conn->spool, smartlist_len(conn->spool)-1); + spooled_resource_flush_status_t status; + status = spooled_resource_flush_some(spooled, conn); + if (status == SRFS_ERR) { + return -1; + } else if (status == SRFS_MORE) { + return 0; + } + tor_assert(status == SRFS_DONE); + + /* If we're here, we're done flushing this resource. */ + tor_assert(smartlist_pop_last(conn->spool) == spooled); + spooled_resource_free(spooled); + } + + if (smartlist_len(conn->spool) > 0) { + /* We're still spooling something. */ + return 0; + } + + /* If we get here, we're done. */ + smartlist_free(conn->spool); + conn->spool = NULL; + if (conn->compress_state) { + /* Flush the compression state: there could be more bytes pending in there, + * and we don't want to omit bytes. */ + connection_buf_add_compress("", 0, conn, 1); + tor_compress_free(conn->compress_state); + conn->compress_state = NULL; + } + return 0; +} + +/** Remove every element from <b>conn</b>'s outgoing spool, and delete + * the spool. */ +void +dir_conn_clear_spool(dir_connection_t *conn) +{ + if (!conn || ! conn->spool) + return; + SMARTLIST_FOREACH(conn->spool, spooled_resource_t *, s, + spooled_resource_free(s)); + smartlist_free(conn->spool); + conn->spool = NULL; +} + +/** Return true iff <b>line</b> is a valid RecommendedPackages line. + */ +/* + The grammar is: + + "package" SP PACKAGENAME SP VERSION SP URL SP DIGESTS NL + + PACKAGENAME = NONSPACE + VERSION = NONSPACE + URL = NONSPACE + DIGESTS = DIGEST | DIGESTS SP DIGEST + DIGEST = DIGESTTYPE "=" DIGESTVAL + + NONSPACE = one or more non-space printing characters + + DIGESTVAL = DIGESTTYPE = one or more non-=, non-" " characters. + + SP = " " + NL = a newline + + */ +int +validate_recommended_package_line(const char *line) +{ + const char *cp = line; + +#define WORD() \ + do { \ + if (*cp == ' ') \ + return 0; \ + cp = strchr(cp, ' '); \ + if (!cp) \ + return 0; \ + } while (0) + + WORD(); /* skip packagename */ + ++cp; + WORD(); /* skip version */ + ++cp; + WORD(); /* Skip URL */ + ++cp; + + /* Skip digesttype=digestval + */ + int n_entries = 0; + while (1) { + const char *start_of_word = cp; + const char *end_of_word = strchr(cp, ' '); + if (! end_of_word) + end_of_word = cp + strlen(cp); + + if (start_of_word == end_of_word) + return 0; + + const char *eq = memchr(start_of_word, '=', end_of_word - start_of_word); + + if (!eq) + return 0; + if (eq == start_of_word) + return 0; + if (eq == end_of_word - 1) + return 0; + if (memchr(eq+1, '=', end_of_word - (eq+1))) + return 0; + + ++n_entries; + if (0 == *end_of_word) + break; + + cp = end_of_word + 1; + } + + /* If we reach this point, we have at least 1 entry. */ + tor_assert(n_entries > 0); + return 1; +} + +/** Release all storage used by the directory server. */ +void +dirserv_free_all(void) +{ + dirserv_free_fingerprint_list(); + + strmap_free(cached_consensuses, free_cached_dir_); + cached_consensuses = NULL; + + dirserv_clear_measured_bw_cache(); +} diff --git a/src/feature/dircache/dirserv.h b/src/feature/dircache/dirserv.h new file mode 100644 index 0000000000..3b4a646094 --- /dev/null +++ b/src/feature/dircache/dirserv.h @@ -0,0 +1,239 @@ +/* Copyright (c) 2001 Matej Pfajfar. + * Copyright (c) 2001-2004, Roger Dingledine. + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file dirserv.h + * \brief Header file for dirserv.c. + **/ + +#ifndef TOR_DIRSERV_H +#define TOR_DIRSERV_H + +struct ed25519_public_key_t; + +#include "lib/testsupport/testsupport.h" + +/** An enum to describe what format we're generating a routerstatus line in. + */ +typedef enum { + /** For use in a v2 opinion */ + NS_V2, + /** For use in a consensus networkstatus document (ns flavor) */ + NS_V3_CONSENSUS, + /** For use in a vote networkstatus document */ + NS_V3_VOTE, + /** For passing to the controlport in response to a GETINFO request */ + NS_CONTROL_PORT, + /** For use in a consensus networkstatus document (microdesc flavor) */ + NS_V3_CONSENSUS_MICRODESC +} routerstatus_format_type_t; + +/** What fraction (1 over this number) of the relay ID space do we + * (as a directory authority) launch connections to at each reachability + * test? */ +#define REACHABILITY_MODULO_PER_TEST 128 + +/** How often (in seconds) do we launch reachability tests? */ +#define REACHABILITY_TEST_INTERVAL 10 + +/** How many seconds apart are the reachability tests for a given relay? */ +#define REACHABILITY_TEST_CYCLE_PERIOD \ + (REACHABILITY_TEST_INTERVAL*REACHABILITY_MODULO_PER_TEST) + +/** Maximum length of an exit policy summary. */ +#define MAX_EXITPOLICY_SUMMARY_LEN 1000 + +/** Maximum allowable length of a version line in a networkstatus. */ +#define MAX_V_LINE_LEN 128 + +/** Ways to convert a spoolable_resource_t to a bunch of bytes. */ +typedef enum dir_spool_source_t { + DIR_SPOOL_SERVER_BY_DIGEST=1, DIR_SPOOL_SERVER_BY_FP, + DIR_SPOOL_EXTRA_BY_DIGEST, DIR_SPOOL_EXTRA_BY_FP, + DIR_SPOOL_MICRODESC, + DIR_SPOOL_NETWORKSTATUS, + DIR_SPOOL_CONSENSUS_CACHE_ENTRY, +} dir_spool_source_t; +#define dir_spool_source_bitfield_t ENUM_BF(dir_spool_source_t) + +/** Object to remember the identity of an object that we are spooling, + * or about to spool, in response to a directory request. + * + * (Why do we spool? Because some directory responses are very large, + * and we don't want to just shove the complete answer into the output + * buffer: that would take a ridiculous amount of RAM.) + * + * If the spooled resource is relatively small (like microdescriptors, + * descriptors, etc), we look them up by ID as needed, and add the whole + * thing onto the output buffer at once. If the spooled reseource is + * big (like networkstatus documents), we reference-count it, and add it + * a few K at a time. + */ +typedef struct spooled_resource_t { + /** + * If true, we add the entire object to the outbuf. If false, + * we spool the object a few K at a time. + */ + unsigned spool_eagerly : 1; + /** + * Tells us what kind of object to get, and how to look it up. + */ + dir_spool_source_bitfield_t spool_source : 7; + /** + * Tells us the specific object to spool. + */ + uint8_t digest[DIGEST256_LEN]; + /** + * A large object that we're spooling. Holds a reference count. Only + * used when spool_eagerly is false. + */ + struct cached_dir_t *cached_dir_ref; + /** + * A different kind of large object that we might be spooling. Also + * reference-counted. Also only used when spool_eagerly is false. + */ + struct consensus_cache_entry_t *consensus_cache_entry; + const uint8_t *cce_body; + size_t cce_len; + /** + * The current offset into cached_dir or cce_body. Only used when + * spool_eagerly is false */ + off_t cached_dir_offset; +} spooled_resource_t; + +#ifdef DIRSERV_PRIVATE +typedef struct measured_bw_line_t { + char node_id[DIGEST_LEN]; + char node_hex[MAX_HEX_NICKNAME_LEN+1]; + long int bw_kb; +} measured_bw_line_t; +#endif /* defined(DIRSERV_PRIVATE) */ + +int connection_dirserv_flushed_some(dir_connection_t *conn); + +int dirserv_add_own_fingerprint(crypto_pk_t *pk); +int dirserv_load_fingerprint_file(void); +void dirserv_free_fingerprint_list(void); +enum was_router_added_t dirserv_add_multiple_descriptors( + const char *desc, uint8_t purpose, + const char *source, + const char **msg); +enum was_router_added_t dirserv_add_descriptor(routerinfo_t *ri, + const char **msg, + const char *source); +void dirserv_set_router_is_running(routerinfo_t *router, time_t now); +int list_server_status_v1(smartlist_t *routers, char **router_status_out, + int for_controller); +char *dirserv_get_flag_thresholds_line(void); +void dirserv_compute_bridge_flag_thresholds(void); + +int directory_fetches_from_authorities(const or_options_t *options); +int directory_fetches_dir_info_early(const or_options_t *options); +int directory_fetches_dir_info_later(const or_options_t *options); +int directory_caches_unknown_auth_certs(const or_options_t *options); +int directory_caches_dir_info(const or_options_t *options); +int directory_permits_begindir_requests(const or_options_t *options); +int directory_too_idle_to_fetch_descriptors(const or_options_t *options, + time_t now); + +cached_dir_t *dirserv_get_consensus(const char *flavor_name); +void dirserv_set_cached_consensus_networkstatus(const char *consensus, + const char *flavor_name, + const common_digests_t *digests, + const uint8_t *sha3_as_signed, + time_t published); +void dirserv_clear_old_networkstatuses(time_t cutoff); +int dirserv_get_routerdesc_spool(smartlist_t *spools_out, const char *key, + dir_spool_source_t source, + int conn_is_encrypted, + const char **msg_out); +int dirserv_get_routerdescs(smartlist_t *descs_out, const char *key, + const char **msg); +void dirserv_orconn_tls_done(const tor_addr_t *addr, + uint16_t or_port, + const char *digest_rcvd, + const struct ed25519_public_key_t *ed_id_rcvd); +int dirserv_should_launch_reachability_test(const routerinfo_t *ri, + const routerinfo_t *ri_old); +void dirserv_single_reachability_test(time_t now, routerinfo_t *router); +void dirserv_test_reachability(time_t now); +int authdir_wants_to_reject_router(routerinfo_t *ri, const char **msg, + int complain, + int *valid_out); +uint32_t dirserv_router_get_status(const routerinfo_t *router, + const char **msg, + int severity); +void dirserv_set_node_flags_from_authoritative_status(node_t *node, + uint32_t authstatus); + +int dirserv_would_reject_router(const routerstatus_t *rs); +char *routerstatus_format_entry( + const routerstatus_t *rs, + const char *version, + const char *protocols, + routerstatus_format_type_t format, + int consensus_method, + const vote_routerstatus_t *vrs); +void dirserv_free_all(void); +void cached_dir_decref(cached_dir_t *d); +cached_dir_t *new_cached_dir(char *s, time_t published); + +int validate_recommended_package_line(const char *line); +int dirserv_query_measured_bw_cache_kb(const char *node_id, + long *bw_out, + time_t *as_of_out); +void dirserv_clear_measured_bw_cache(void); +int dirserv_has_measured_bw(const char *node_id); +int dirserv_get_measured_bw_cache_size(void); +void dirserv_count_measured_bws(const smartlist_t *routers); +int running_long_enough_to_decide_unreachable(void); +void dirserv_compute_performance_thresholds(digestmap_t *omit_as_sybil); + +#ifdef DIRSERV_PRIVATE + +STATIC void dirserv_set_routerstatus_testing(routerstatus_t *rs); + +/* Put the MAX_MEASUREMENT_AGE #define here so unit tests can see it */ +#define MAX_MEASUREMENT_AGE (3*24*60*60) /* 3 days */ + +STATIC int measured_bw_line_parse(measured_bw_line_t *out, const char *line, + int line_is_after_headers); + +STATIC int measured_bw_line_apply(measured_bw_line_t *parsed_line, + smartlist_t *routerstatuses); + +STATIC void dirserv_cache_measured_bw(const measured_bw_line_t *parsed_line, + time_t as_of); +STATIC void dirserv_expire_measured_bw_cache(time_t now); + +STATIC int +dirserv_read_guardfraction_file_from_str(const char *guardfraction_file_str, + smartlist_t *vote_routerstatuses); +#endif /* defined(DIRSERV_PRIVATE) */ + +int dirserv_read_measured_bandwidths(const char *from_file, + smartlist_t *routerstatuses); + +int dirserv_read_guardfraction_file(const char *fname, + smartlist_t *vote_routerstatuses); + +spooled_resource_t *spooled_resource_new(dir_spool_source_t source, + const uint8_t *digest, + size_t digestlen); +spooled_resource_t *spooled_resource_new_from_cache_entry( + struct consensus_cache_entry_t *entry); +void spooled_resource_free_(spooled_resource_t *spooled); +#define spooled_resource_free(sp) \ + FREE_AND_NULL(spooled_resource_t, spooled_resource_free_, (sp)) +void dirserv_spool_remove_missing_and_guess_size(dir_connection_t *conn, + time_t cutoff, + int compression, + size_t *size_out, + int *n_expired_out); +void dirserv_spool_sort(dir_connection_t *conn); +void dir_conn_clear_spool(dir_connection_t *conn); + +#endif /* !defined(TOR_DIRSERV_H) */ |