summaryrefslogtreecommitdiff
path: root/src/feature/dircache
diff options
context:
space:
mode:
Diffstat (limited to 'src/feature/dircache')
-rw-r--r--src/feature/dircache/cached_dir_st.h25
-rw-r--r--src/feature/dircache/conscache.c627
-rw-r--r--src/feature/dircache/conscache.h66
-rw-r--r--src/feature/dircache/consdiffmgr.c1945
-rw-r--r--src/feature/dircache/consdiffmgr.h75
-rw-r--r--src/feature/dircache/dircache.c1740
-rw-r--r--src/feature/dircache/dircache.h43
-rw-r--r--src/feature/dircache/dirserv.c918
-rw-r--r--src/feature/dircache/dirserv.h119
9 files changed, 5558 insertions, 0 deletions
diff --git a/src/feature/dircache/cached_dir_st.h b/src/feature/dircache/cached_dir_st.h
new file mode 100644
index 0000000000..38ae86d975
--- /dev/null
+++ b/src/feature/dircache/cached_dir_st.h
@@ -0,0 +1,25 @@
+/* Copyright (c) 2001 Matej Pfajfar.
+ * Copyright (c) 2001-2004, Roger Dingledine.
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+#ifndef CACHED_DIR_ST_H
+#define CACHED_DIR_ST_H
+
+/** A cached_dir_t represents a cacheable directory object, along with its
+ * compressed form. */
+struct cached_dir_t {
+ char *dir; /**< Contents of this object, NUL-terminated. */
+ char *dir_compressed; /**< Compressed contents of this object. */
+ size_t dir_len; /**< Length of <b>dir</b> (not counting its NUL). */
+ size_t dir_compressed_len; /**< Length of <b>dir_compressed</b>. */
+ time_t published; /**< When was this object published. */
+ common_digests_t digests; /**< Digests of this object (networkstatus only) */
+ /** Sha3 digest (also ns only) */
+ uint8_t digest_sha3_as_signed[DIGEST256_LEN];
+ int refcnt; /**< Reference count for this cached_dir_t. */
+};
+
+#endif
+
diff --git a/src/feature/dircache/conscache.c b/src/feature/dircache/conscache.c
new file mode 100644
index 0000000000..e9bf58a180
--- /dev/null
+++ b/src/feature/dircache/conscache.c
@@ -0,0 +1,627 @@
+/* Copyright (c) 2017-2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+#include "core/or/or.h"
+
+#include "app/config/config.h"
+#include "feature/dircache/conscache.h"
+#include "lib/crypt_ops/crypto_util.h"
+#include "lib/fs/storagedir.h"
+#include "lib/encoding/confline.h"
+
+#define CCE_MAGIC 0x17162253
+
+#ifdef _WIN32
+/* On Windows, unlink won't work on a file if the file is actively mmap()ed.
+ * That forces us to be less aggressive about unlinking files, and causes other
+ * changes throughout our logic.
+ */
+#define MUST_UNMAP_TO_UNLINK
+#endif /* defined(_WIN32) */
+
+/**
+ * A consensus_cache_entry_t is a reference-counted handle to an
+ * item in a consensus_cache_t. It can be mmapped into RAM, or not,
+ * depending whether it's currently in use.
+ */
+struct consensus_cache_entry_t {
+ uint32_t magic; /**< Must be set to CCE_MAGIC */
+ HANDLE_ENTRY(consensus_cache_entry, consensus_cache_entry_t);
+ int32_t refcnt; /**< Reference count. */
+ unsigned can_remove : 1; /**< If true, we want to delete this file. */
+ /** If true, we intend to unmap this file as soon as we're done with it. */
+ unsigned release_aggressively : 1;
+
+ /** Filename for this object within the storage_dir_t */
+ char *fname;
+ /** Labels associated with this object. Immutable once the object
+ * is created. */
+ config_line_t *labels;
+ /** Pointer to the cache that includes this entry (if any). */
+ consensus_cache_t *in_cache;
+
+ /** Since what time has this object been mapped into RAM, but with the cache
+ * being the only having a reference to it? */
+ time_t unused_since;
+ /** mmaped contents of the underlying file. May be NULL */
+ tor_mmap_t *map;
+ /** Length of the body within <b>map</b>. */
+ size_t bodylen;
+ /** Pointer to the body within <b>map</b>. */
+ const uint8_t *body;
+};
+
+/**
+ * A consensus_cache_t holds a directory full of labeled items.
+ */
+struct consensus_cache_t {
+ /** Underling storage_dir_t to handle persistence */
+ storage_dir_t *dir;
+ /** List of all the entries in the directory. */
+ smartlist_t *entries;
+
+ /** The maximum number of entries that we'd like to allow in this cache.
+ * This is the same as the storagedir limit when MUST_UNMAP_TO_UNLINK is
+ * not defined. */
+ unsigned max_entries;
+};
+
+static void consensus_cache_clear(consensus_cache_t *cache);
+static void consensus_cache_rescan(consensus_cache_t *);
+static void consensus_cache_entry_map(consensus_cache_t *,
+ consensus_cache_entry_t *);
+static void consensus_cache_entry_unmap(consensus_cache_entry_t *ent);
+
+/**
+ * Helper: Open a consensus cache in subdirectory <b>subdir</b> of the
+ * data directory, to hold up to <b>max_entries</b> of data.
+ */
+consensus_cache_t *
+consensus_cache_open(const char *subdir, int max_entries)
+{
+ int storagedir_max_entries;
+ consensus_cache_t *cache = tor_malloc_zero(sizeof(consensus_cache_t));
+ char *directory = get_cachedir_fname(subdir);
+ cache->max_entries = max_entries;
+
+#ifdef MUST_UNMAP_TO_UNLINK
+ /* If we can't unlink the files that we're still using, then we need to
+ * tell the storagedir backend to allow far more files than this consensus
+ * cache actually wants, so that it can hold files which, from this cache's
+ * perspective, have become useless.
+ */
+#define VERY_LARGE_STORAGEDIR_LIMIT (1000*1000)
+ storagedir_max_entries = VERY_LARGE_STORAGEDIR_LIMIT;
+#else /* !(defined(MUST_UNMAP_TO_UNLINK)) */
+ /* Otherwise, we can just tell the storagedir to use the same limits
+ * as this cache. */
+ storagedir_max_entries = max_entries;
+#endif /* defined(MUST_UNMAP_TO_UNLINK) */
+
+ cache->dir = storage_dir_new(directory, storagedir_max_entries);
+ tor_free(directory);
+ if (!cache->dir) {
+ tor_free(cache);
+ return NULL;
+ }
+
+ consensus_cache_rescan(cache);
+ return cache;
+}
+
+/** Return true if it's okay to put more entries in this cache than
+ * its official file limit.
+ *
+ * (We need this method on Windows, where we can't unlink files that are still
+ * in use, and therefore might need to temporarily exceed the file limit until
+ * the no-longer-wanted files are deletable.)
+ */
+int
+consensus_cache_may_overallocate(consensus_cache_t *cache)
+{
+ (void) cache;
+#ifdef MUST_UNMAP_TO_UNLINK
+ return 1;
+#else
+ return 0;
+#endif
+}
+
+/**
+ * Tell the sandbox (if any) configured by <b>cfg</b> to allow the
+ * operations that <b>cache</b> will need.
+ */
+int
+consensus_cache_register_with_sandbox(consensus_cache_t *cache,
+ struct sandbox_cfg_elem **cfg)
+{
+#ifdef MUST_UNMAP_TO_UNLINK
+ /* Our Linux sandbox doesn't support huge file lists like the one that would
+ * be generated by using VERY_LARGE_STORAGEDIR_LIMIT above in
+ * consensus_cache_open(). Since the Linux sandbox is the only one we have
+ * right now, we just assert that we never reach this point when we've had
+ * to use VERY_LARGE_STORAGEDIR_LIMIT.
+ *
+ * If at some point in the future we have a different sandbox mechanism that
+ * can handle huge file lists, we can remove this assertion or make it
+ * conditional.
+ */
+ tor_assert_nonfatal_unreached();
+#endif /* defined(MUST_UNMAP_TO_UNLINK) */
+ return storage_dir_register_with_sandbox(cache->dir, cfg);
+}
+
+/**
+ * Helper: clear all entries from <b>cache</b> (but do not delete
+ * any that aren't marked for removal
+ */
+static void
+consensus_cache_clear(consensus_cache_t *cache)
+{
+ consensus_cache_delete_pending(cache, 0);
+
+ SMARTLIST_FOREACH_BEGIN(cache->entries, consensus_cache_entry_t *, ent) {
+ ent->in_cache = NULL;
+ consensus_cache_entry_decref(ent);
+ } SMARTLIST_FOREACH_END(ent);
+ smartlist_free(cache->entries);
+ cache->entries = NULL;
+}
+
+/**
+ * Drop all storage held by <b>cache</b>.
+ */
+void
+consensus_cache_free_(consensus_cache_t *cache)
+{
+ if (! cache)
+ return;
+
+ if (cache->entries) {
+ consensus_cache_clear(cache);
+ }
+ storage_dir_free(cache->dir);
+ tor_free(cache);
+}
+
+/**
+ * Write <b>datalen</b> bytes of data at <b>data</b> into the <b>cache</b>,
+ * labeling that data with <b>labels</b>. On failure, return NULL. On
+ * success, return a newly created consensus_cache_entry_t.
+ *
+ * The returned value will be owned by the cache, and you will have a
+ * reference to it. Call consensus_cache_entry_decref() when you are
+ * done with it.
+ *
+ * The provided <b>labels</b> MUST have distinct keys: if they don't,
+ * this API does not specify which values (if any) for the duplicate keys
+ * will be considered.
+ */
+consensus_cache_entry_t *
+consensus_cache_add(consensus_cache_t *cache,
+ const config_line_t *labels,
+ const uint8_t *data,
+ size_t datalen)
+{
+ char *fname = NULL;
+ int r = storage_dir_save_labeled_to_file(cache->dir,
+ labels, data, datalen, &fname);
+ if (r < 0 || fname == NULL) {
+ return NULL;
+ }
+ consensus_cache_entry_t *ent =
+ tor_malloc_zero(sizeof(consensus_cache_entry_t));
+ ent->magic = CCE_MAGIC;
+ ent->fname = fname;
+ ent->labels = config_lines_dup(labels);
+ ent->in_cache = cache;
+ ent->unused_since = TIME_MAX;
+ smartlist_add(cache->entries, ent);
+ /* Start the reference count at 2: the caller owns one copy, and the
+ * cache owns another.
+ */
+ ent->refcnt = 2;
+
+ return ent;
+}
+
+/**
+ * Given a <b>cache</b>, return some entry for which <b>key</b>=<b>value</b>.
+ * Return NULL if no such entry exists.
+ *
+ * Does not adjust reference counts.
+ */
+consensus_cache_entry_t *
+consensus_cache_find_first(consensus_cache_t *cache,
+ const char *key,
+ const char *value)
+{
+ smartlist_t *tmp = smartlist_new();
+ consensus_cache_find_all(tmp, cache, key, value);
+ consensus_cache_entry_t *ent = NULL;
+ if (smartlist_len(tmp))
+ ent = smartlist_get(tmp, 0);
+ smartlist_free(tmp);
+ return ent;
+}
+
+/**
+ * Given a <b>cache</b>, add every entry to <b>out<b> for which
+ * <b>key</b>=<b>value</b>. If <b>key</b> is NULL, add every entry.
+ *
+ * Do not add any entry that has been marked for removal.
+ *
+ * Does not adjust reference counts.
+ */
+void
+consensus_cache_find_all(smartlist_t *out,
+ consensus_cache_t *cache,
+ const char *key,
+ const char *value)
+{
+ SMARTLIST_FOREACH_BEGIN(cache->entries, consensus_cache_entry_t *, ent) {
+ if (ent->can_remove == 1) {
+ /* We want to delete this; pretend it isn't there. */
+ continue;
+ }
+ if (! key) {
+ smartlist_add(out, ent);
+ continue;
+ }
+ const char *found_val = consensus_cache_entry_get_value(ent, key);
+ if (found_val && !strcmp(value, found_val)) {
+ smartlist_add(out, ent);
+ }
+ } SMARTLIST_FOREACH_END(ent);
+}
+
+/**
+ * Given a list of consensus_cache_entry_t, remove all those entries
+ * that do not have <b>key</b>=<b>value</b> in their labels.
+ *
+ * Does not adjust reference counts.
+ */
+void
+consensus_cache_filter_list(smartlist_t *lst,
+ const char *key,
+ const char *value)
+{
+ if (BUG(lst == NULL))
+ return; // LCOV_EXCL_LINE
+ if (key == NULL)
+ return;
+ SMARTLIST_FOREACH_BEGIN(lst, consensus_cache_entry_t *, ent) {
+ const char *found_val = consensus_cache_entry_get_value(ent, key);
+ if (! found_val || strcmp(value, found_val)) {
+ SMARTLIST_DEL_CURRENT(lst, ent);
+ }
+ } SMARTLIST_FOREACH_END(ent);
+}
+
+/**
+ * If <b>ent</b> has a label with the given <b>key</b>, return its
+ * value. Otherwise return NULL.
+ *
+ * The return value is only guaranteed to be valid for as long as you
+ * hold a reference to <b>ent</b>.
+ */
+const char *
+consensus_cache_entry_get_value(const consensus_cache_entry_t *ent,
+ const char *key)
+{
+ const config_line_t *match = config_line_find(ent->labels, key);
+ if (match)
+ return match->value;
+ else
+ return NULL;
+}
+
+/**
+ * Return a pointer to the labels in <b>ent</b>.
+ *
+ * This pointer is only guaranteed to be valid for as long as you
+ * hold a reference to <b>ent</b>.
+ */
+const config_line_t *
+consensus_cache_entry_get_labels(const consensus_cache_entry_t *ent)
+{
+ return ent->labels;
+}
+
+/**
+ * Increase the reference count of <b>ent</b>.
+ */
+void
+consensus_cache_entry_incref(consensus_cache_entry_t *ent)
+{
+ if (BUG(ent->magic != CCE_MAGIC))
+ return; // LCOV_EXCL_LINE
+ ++ent->refcnt;
+ ent->unused_since = TIME_MAX;
+}
+
+/**
+ * Release a reference held to <b>ent</b>.
+ *
+ * If it was the last reference, ent will be freed. Therefore, you must not
+ * use <b>ent</b> after calling this function.
+ */
+void
+consensus_cache_entry_decref(consensus_cache_entry_t *ent)
+{
+ if (! ent)
+ return;
+ if (BUG(ent->refcnt <= 0))
+ return; // LCOV_EXCL_LINE
+ if (BUG(ent->magic != CCE_MAGIC))
+ return; // LCOV_EXCL_LINE
+
+ --ent->refcnt;
+
+ if (ent->refcnt == 1 && ent->in_cache) {
+ /* Only the cache has a reference: we don't need to keep the file
+ * mapped */
+ if (ent->map) {
+ if (ent->release_aggressively) {
+ consensus_cache_entry_unmap(ent);
+ } else {
+ ent->unused_since = approx_time();
+ }
+ }
+ return;
+ }
+
+ if (ent->refcnt > 0)
+ return;
+
+ /* Refcount is zero; we can free it. */
+ if (ent->map) {
+ consensus_cache_entry_unmap(ent);
+ }
+ tor_free(ent->fname);
+ config_free_lines(ent->labels);
+ consensus_cache_entry_handles_clear(ent);
+ memwipe(ent, 0, sizeof(consensus_cache_entry_t));
+ tor_free(ent);
+}
+
+/**
+ * Mark <b>ent</b> for deletion from the cache. Deletion will not occur
+ * until the cache is the only place that holds a reference to <b>ent</b>.
+ */
+void
+consensus_cache_entry_mark_for_removal(consensus_cache_entry_t *ent)
+{
+ ent->can_remove = 1;
+}
+
+/**
+ * Mark <b>ent</b> as the kind of entry that we don't need to keep mmap'd for
+ * any longer than we're actually using it.
+ */
+void
+consensus_cache_entry_mark_for_aggressive_release(consensus_cache_entry_t *ent)
+{
+ ent->release_aggressively = 1;
+}
+
+/**
+ * Try to read the body of <b>ent</b> into memory if it isn't already
+ * loaded. On success, set *<b>body_out</b> to the body, *<b>sz_out</b>
+ * to its size, and return 0. On failure return -1.
+ *
+ * The resulting body pointer will only be valid for as long as you
+ * hold a reference to <b>ent</b>.
+ */
+int
+consensus_cache_entry_get_body(const consensus_cache_entry_t *ent,
+ const uint8_t **body_out,
+ size_t *sz_out)
+{
+ if (BUG(ent->magic != CCE_MAGIC))
+ return -1; // LCOV_EXCL_LINE
+
+ if (! ent->map) {
+ if (! ent->in_cache)
+ return -1;
+
+ consensus_cache_entry_map((consensus_cache_t *)ent->in_cache,
+ (consensus_cache_entry_t *)ent);
+ if (! ent->map) {
+ return -1;
+ }
+ }
+
+ *body_out = ent->body;
+ *sz_out = ent->bodylen;
+ return 0;
+}
+
+/**
+ * Unmap every mmap'd element of <b>cache</b> that has been unused
+ * since <b>cutoff</b>.
+ */
+void
+consensus_cache_unmap_lazy(consensus_cache_t *cache, time_t cutoff)
+{
+ SMARTLIST_FOREACH_BEGIN(cache->entries, consensus_cache_entry_t *, ent) {
+ tor_assert_nonfatal(ent->in_cache == cache);
+ if (ent->refcnt > 1 || BUG(ent->in_cache == NULL)) {
+ /* Somebody is using this entry right now */
+ continue;
+ }
+ if (ent->unused_since > cutoff) {
+ /* Has been unused only for a little while */
+ continue;
+ }
+ if (ent->map == NULL) {
+ /* Not actually mapped. */
+ continue;
+ }
+ consensus_cache_entry_unmap(ent);
+ } SMARTLIST_FOREACH_END(ent);
+}
+
+/**
+ * Return the number of currently unused filenames available in this cache.
+ */
+int
+consensus_cache_get_n_filenames_available(consensus_cache_t *cache)
+{
+ tor_assert(cache);
+ int max = cache->max_entries;
+ int used = smartlist_len(storage_dir_list(cache->dir));
+#ifdef MUST_UNMAP_TO_UNLINK
+ if (used > max)
+ return 0;
+#else
+ tor_assert_nonfatal(max >= used);
+#endif /* defined(MUST_UNMAP_TO_UNLINK) */
+ return max - used;
+}
+
+/**
+ * Delete every element of <b>cache</b> has been marked with
+ * consensus_cache_entry_mark_for_removal. If <b>force</b> is false,
+ * retain those entries which are in use by something other than the cache.
+ */
+void
+consensus_cache_delete_pending(consensus_cache_t *cache, int force)
+{
+ SMARTLIST_FOREACH_BEGIN(cache->entries, consensus_cache_entry_t *, ent) {
+ tor_assert_nonfatal(ent->in_cache == cache);
+ int force_ent = force;
+#ifdef MUST_UNMAP_TO_UNLINK
+ /* We cannot delete anything with an active mmap on win32, so no
+ * force-deletion. */
+ if (ent->map) {
+ force_ent = 0;
+ }
+#endif /* defined(MUST_UNMAP_TO_UNLINK) */
+ if (! force_ent) {
+ if (ent->refcnt > 1 || BUG(ent->in_cache == NULL)) {
+ /* Somebody is using this entry right now */
+ continue;
+ }
+ }
+ if (ent->can_remove == 0) {
+ /* Don't want to delete this. */
+ continue;
+ }
+ if (BUG(ent->refcnt <= 0)) {
+ continue; // LCOV_EXCL_LINE
+ }
+
+ SMARTLIST_DEL_CURRENT(cache->entries, ent);
+ ent->in_cache = NULL;
+ char *fname = tor_strdup(ent->fname); /* save a copy */
+ consensus_cache_entry_decref(ent);
+ storage_dir_remove_file(cache->dir, fname);
+ tor_free(fname);
+ } SMARTLIST_FOREACH_END(ent);
+}
+
+/**
+ * Internal helper: rescan <b>cache</b> and rebuild its list of entries.
+ */
+static void
+consensus_cache_rescan(consensus_cache_t *cache)
+{
+ if (cache->entries) {
+ consensus_cache_clear(cache);
+ }
+
+ cache->entries = smartlist_new();
+ const smartlist_t *fnames = storage_dir_list(cache->dir);
+ SMARTLIST_FOREACH_BEGIN(fnames, const char *, fname) {
+ tor_mmap_t *map = NULL;
+ config_line_t *labels = NULL;
+ const uint8_t *body;
+ size_t bodylen;
+ map = storage_dir_map_labeled(cache->dir, fname,
+ &labels, &body, &bodylen);
+ if (! map) {
+ /* The ERANGE error might come from tor_mmap_file() -- it means the file
+ * was empty. EINVAL might come from ..map_labeled() -- it means the
+ * file was misformatted. In both cases, we should just delete it.
+ */
+ if (errno == ERANGE || errno == EINVAL) {
+ log_warn(LD_FS, "Found %s file %s in consensus cache; removing it.",
+ errno == ERANGE ? "empty" : "misformatted",
+ escaped(fname));
+ storage_dir_remove_file(cache->dir, fname);
+ } else {
+ /* Can't load this; continue */
+ log_warn(LD_FS, "Unable to map file %s from consensus cache: %s",
+ escaped(fname), strerror(errno));
+ }
+ continue;
+ }
+ consensus_cache_entry_t *ent =
+ tor_malloc_zero(sizeof(consensus_cache_entry_t));
+ ent->magic = CCE_MAGIC;
+ ent->fname = tor_strdup(fname);
+ ent->labels = labels;
+ ent->refcnt = 1;
+ ent->in_cache = cache;
+ ent->unused_since = TIME_MAX;
+ smartlist_add(cache->entries, ent);
+ tor_munmap_file(map); /* don't actually need to keep this around */
+ } SMARTLIST_FOREACH_END(fname);
+}
+
+/**
+ * Make sure that <b>ent</b> is mapped into RAM.
+ */
+static void
+consensus_cache_entry_map(consensus_cache_t *cache,
+ consensus_cache_entry_t *ent)
+{
+ if (ent->map)
+ return;
+
+ ent->map = storage_dir_map_labeled(cache->dir, ent->fname,
+ NULL, &ent->body, &ent->bodylen);
+ ent->unused_since = TIME_MAX;
+}
+
+/**
+ * Unmap <b>ent</b> from RAM.
+ *
+ * Do not call this if something other than the cache is holding a reference
+ * to <b>ent</b>
+ */
+static void
+consensus_cache_entry_unmap(consensus_cache_entry_t *ent)
+{
+ ent->unused_since = TIME_MAX;
+ if (!ent->map)
+ return;
+
+ tor_munmap_file(ent->map);
+ ent->map = NULL;
+ ent->body = NULL;
+ ent->bodylen = 0;
+ ent->unused_since = TIME_MAX;
+}
+
+HANDLE_IMPL(consensus_cache_entry, consensus_cache_entry_t, )
+
+#ifdef TOR_UNIT_TESTS
+/**
+ * Testing only: Return true iff <b>ent</b> is mapped into memory.
+ *
+ * (In normal operation, this information is not exposed.)
+ */
+int
+consensus_cache_entry_is_mapped(consensus_cache_entry_t *ent)
+{
+ if (ent->map) {
+ tor_assert(ent->body);
+ return 1;
+ } else {
+ tor_assert(!ent->body);
+ return 0;
+ }
+}
+#endif /* defined(TOR_UNIT_TESTS) */
diff --git a/src/feature/dircache/conscache.h b/src/feature/dircache/conscache.h
new file mode 100644
index 0000000000..c274a60393
--- /dev/null
+++ b/src/feature/dircache/conscache.h
@@ -0,0 +1,66 @@
+/* Copyright (c) 2017-2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+#ifndef TOR_CONSCACHE_H
+#define TOR_CONSCACHE_H
+
+#include "lib/container/handles.h"
+
+typedef struct consensus_cache_entry_t consensus_cache_entry_t;
+typedef struct consensus_cache_t consensus_cache_t;
+
+HANDLE_DECL(consensus_cache_entry, consensus_cache_entry_t, )
+#define consensus_cache_entry_handle_free(h) \
+ FREE_AND_NULL(consensus_cache_entry_handle_t, \
+ consensus_cache_entry_handle_free_, (h))
+
+consensus_cache_t *consensus_cache_open(const char *subdir, int max_entries);
+void consensus_cache_free_(consensus_cache_t *cache);
+#define consensus_cache_free(cache) \
+ FREE_AND_NULL(consensus_cache_t, consensus_cache_free_, (cache))
+struct sandbox_cfg_elem;
+int consensus_cache_may_overallocate(consensus_cache_t *cache);
+int consensus_cache_register_with_sandbox(consensus_cache_t *cache,
+ struct sandbox_cfg_elem **cfg);
+void consensus_cache_unmap_lazy(consensus_cache_t *cache, time_t cutoff);
+void consensus_cache_delete_pending(consensus_cache_t *cache,
+ int force);
+int consensus_cache_get_n_filenames_available(consensus_cache_t *cache);
+consensus_cache_entry_t *consensus_cache_add(consensus_cache_t *cache,
+ const struct config_line_t *labels,
+ const uint8_t *data,
+ size_t datalen);
+
+consensus_cache_entry_t *consensus_cache_find_first(
+ consensus_cache_t *cache,
+ const char *key,
+ const char *value);
+
+void consensus_cache_find_all(smartlist_t *out,
+ consensus_cache_t *cache,
+ const char *key,
+ const char *value);
+void consensus_cache_filter_list(smartlist_t *lst,
+ const char *key,
+ const char *value);
+
+const char *consensus_cache_entry_get_value(const consensus_cache_entry_t *ent,
+ const char *key);
+const struct config_line_t *consensus_cache_entry_get_labels(
+ const consensus_cache_entry_t *ent);
+
+void consensus_cache_entry_incref(consensus_cache_entry_t *ent);
+void consensus_cache_entry_decref(consensus_cache_entry_t *ent);
+
+void consensus_cache_entry_mark_for_removal(consensus_cache_entry_t *ent);
+void consensus_cache_entry_mark_for_aggressive_release(
+ consensus_cache_entry_t *ent);
+int consensus_cache_entry_get_body(const consensus_cache_entry_t *ent,
+ const uint8_t **body_out,
+ size_t *sz_out);
+
+#ifdef TOR_UNIT_TESTS
+int consensus_cache_entry_is_mapped(consensus_cache_entry_t *ent);
+#endif
+
+#endif /* !defined(TOR_CONSCACHE_H) */
diff --git a/src/feature/dircache/consdiffmgr.c b/src/feature/dircache/consdiffmgr.c
new file mode 100644
index 0000000000..e79aad6efb
--- /dev/null
+++ b/src/feature/dircache/consdiffmgr.c
@@ -0,0 +1,1945 @@
+/* Copyright (c) 2017-2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file consdiffmsr.c
+ *
+ * \brief consensus diff manager functions
+ *
+ * This module is run by directory authorities and caches in order
+ * to remember a number of past consensus documents, and to generate
+ * and serve the diffs from those documents to the latest consensus.
+ */
+
+#define CONSDIFFMGR_PRIVATE
+
+#include "core/or/or.h"
+#include "app/config/config.h"
+#include "feature/dircache/conscache.h"
+#include "feature/dircommon/consdiff.h"
+#include "feature/dircache/consdiffmgr.h"
+#include "core/mainloop/cpuworker.h"
+#include "feature/nodelist/networkstatus.h"
+#include "feature/dirparse/ns_parse.h"
+#include "lib/evloop/compat_libevent.h"
+#include "lib/evloop/workqueue.h"
+#include "lib/compress/compress.h"
+#include "lib/encoding/confline.h"
+
+#include "feature/nodelist/networkstatus_st.h"
+#include "feature/nodelist/networkstatus_voter_info_st.h"
+
+/**
+ * Labels to apply to items in the conscache object.
+ *
+ * @{
+ */
+/* One of DOCTYPE_CONSENSUS or DOCTYPE_CONSENSUS_DIFF */
+#define LABEL_DOCTYPE "document-type"
+/* The valid-after time for a consensus (or for the target consensus of a
+ * diff), encoded as ISO UTC. */
+#define LABEL_VALID_AFTER "consensus-valid-after"
+/* The fresh-until time for a consensus (or for the target consensus of a
+ * diff), encoded as ISO UTC. */
+#define LABEL_FRESH_UNTIL "consensus-fresh-until"
+/* The valid-until time for a consensus (or for the target consensus of a
+ * diff), encoded as ISO UTC. */
+#define LABEL_VALID_UNTIL "consensus-valid-until"
+/* Comma-separated list of hex-encoded identity digests for the voting
+ * authorities. */
+#define LABEL_SIGNATORIES "consensus-signatories"
+/* A hex encoded SHA3 digest of the object, as compressed (if any) */
+#define LABEL_SHA3_DIGEST "sha3-digest"
+/* A hex encoded SHA3 digest of the object before compression. */
+#define LABEL_SHA3_DIGEST_UNCOMPRESSED "sha3-digest-uncompressed"
+/* A hex encoded SHA3 digest-as-signed of a consensus */
+#define LABEL_SHA3_DIGEST_AS_SIGNED "sha3-digest-as-signed"
+/* The flavor of the consensus or consensuses diff */
+#define LABEL_FLAVOR "consensus-flavor"
+/* Diff only: the SHA3 digest-as-signed of the source consensus. */
+#define LABEL_FROM_SHA3_DIGEST "from-sha3-digest"
+/* Diff only: the SHA3 digest-in-full of the target consensus. */
+#define LABEL_TARGET_SHA3_DIGEST "target-sha3-digest"
+/* Diff only: the valid-after date of the source consensus. */
+#define LABEL_FROM_VALID_AFTER "from-valid-after"
+/* What kind of compression was used? */
+#define LABEL_COMPRESSION_TYPE "compression"
+/** @} */
+
+#define DOCTYPE_CONSENSUS "consensus"
+#define DOCTYPE_CONSENSUS_DIFF "consensus-diff"
+
+/**
+ * Underlying directory that stores consensuses and consensus diffs. Don't
+ * use this directly: use cdm_cache_get() instead.
+ */
+static consensus_cache_t *cons_diff_cache = NULL;
+/**
+ * If true, we have learned at least one new consensus since the
+ * consensus cache was last up-to-date.
+ */
+static int cdm_cache_dirty = 0;
+/**
+ * If true, we have scanned the cache to update our hashtable of diffs.
+ */
+static int cdm_cache_loaded = 0;
+
+/**
+ * Possible status values for cdm_diff_t.cdm_diff_status
+ **/
+typedef enum cdm_diff_status_t {
+ CDM_DIFF_PRESENT=1,
+ CDM_DIFF_IN_PROGRESS=2,
+ CDM_DIFF_ERROR=3,
+} cdm_diff_status_t;
+
+/** Which methods do we use for precompressing diffs? */
+static const compress_method_t compress_diffs_with[] = {
+ NO_METHOD,
+ GZIP_METHOD,
+#ifdef HAVE_LZMA
+ LZMA_METHOD,
+#endif
+#ifdef HAVE_ZSTD
+ ZSTD_METHOD,
+#endif
+};
+
+/**
+ * Event for rescanning the cache.
+ */
+static mainloop_event_t *consdiffmgr_rescan_ev = NULL;
+
+static void consdiffmgr_rescan_cb(mainloop_event_t *ev, void *arg);
+static void mark_cdm_cache_dirty(void);
+
+/** How many different methods will we try to use for diff compression? */
+STATIC unsigned
+n_diff_compression_methods(void)
+{
+ return ARRAY_LENGTH(compress_diffs_with);
+}
+
+/** Which methods do we use for precompressing consensuses? */
+static const compress_method_t compress_consensus_with[] = {
+ ZLIB_METHOD,
+#ifdef HAVE_LZMA
+ LZMA_METHOD,
+#endif
+#ifdef HAVE_ZSTD
+ ZSTD_METHOD,
+#endif
+};
+
+/** How many different methods will we try to use for diff compression? */
+STATIC unsigned
+n_consensus_compression_methods(void)
+{
+ return ARRAY_LENGTH(compress_consensus_with);
+}
+
+/** For which compression method do we retain old consensuses? There's no
+ * need to keep all of them, since we won't be serving them. We'll
+ * go with ZLIB_METHOD because it's pretty fast and everyone has it.
+ */
+#define RETAIN_CONSENSUS_COMPRESSED_WITH_METHOD ZLIB_METHOD
+
+/** Handles pointing to the latest consensus entries as compressed and
+ * stored. */
+static consensus_cache_entry_handle_t *
+ latest_consensus[N_CONSENSUS_FLAVORS]
+ [ARRAY_LENGTH(compress_consensus_with)];
+
+/** Hashtable node used to remember the current status of the diff
+ * from a given sha3 digest to the current consensus. */
+typedef struct cdm_diff_t {
+ HT_ENTRY(cdm_diff_t) node;
+
+ /** Consensus flavor for this diff (part of ht key) */
+ consensus_flavor_t flavor;
+ /** SHA3-256 digest of the consensus that this diff is _from_. (part of the
+ * ht key) */
+ uint8_t from_sha3[DIGEST256_LEN];
+ /** Method by which the diff is compressed. (part of the ht key */
+ compress_method_t compress_method;
+
+ /** One of the CDM_DIFF_* values, depending on whether this diff
+ * is available, in progress, or impossible to compute. */
+ cdm_diff_status_t cdm_diff_status;
+ /** SHA3-256 digest of the consensus that this diff is _to. */
+ uint8_t target_sha3[DIGEST256_LEN];
+
+ /** Handle to the cache entry for this diff, if any. We use a handle here
+ * to avoid thinking too hard about cache entry lifetime issues. */
+ consensus_cache_entry_handle_t *entry;
+} cdm_diff_t;
+
+/** Hashtable mapping flavor and source consensus digest to status. */
+static HT_HEAD(cdm_diff_ht, cdm_diff_t) cdm_diff_ht = HT_INITIALIZER();
+
+/**
+ * Configuration for this module
+ */
+static consdiff_cfg_t consdiff_cfg = {
+ // XXXX I'd like to make this number bigger, but it interferes with the
+ // XXXX seccomp2 syscall filter, which tops out at BPF_MAXINS (4096)
+ // XXXX rules.
+ /* .cache_max_num = */ 128
+};
+
+static int consdiffmgr_ensure_space_for_files(int n);
+static int consensus_queue_compression_work(const char *consensus,
+ const networkstatus_t *as_parsed);
+static int consensus_diff_queue_diff_work(consensus_cache_entry_t *diff_from,
+ consensus_cache_entry_t *diff_to);
+static void consdiffmgr_set_cache_flags(void);
+
+/* =====
+ * Hashtable setup
+ * ===== */
+
+/** Helper: hash the key of a cdm_diff_t. */
+static unsigned
+cdm_diff_hash(const cdm_diff_t *diff)
+{
+ uint8_t tmp[DIGEST256_LEN + 2];
+ memcpy(tmp, diff->from_sha3, DIGEST256_LEN);
+ tmp[DIGEST256_LEN] = (uint8_t) diff->flavor;
+ tmp[DIGEST256_LEN+1] = (uint8_t) diff->compress_method;
+ return (unsigned) siphash24g(tmp, sizeof(tmp));
+}
+/** Helper: compare two cdm_diff_t objects for key equality */
+static int
+cdm_diff_eq(const cdm_diff_t *diff1, const cdm_diff_t *diff2)
+{
+ return fast_memeq(diff1->from_sha3, diff2->from_sha3, DIGEST256_LEN) &&
+ diff1->flavor == diff2->flavor &&
+ diff1->compress_method == diff2->compress_method;
+}
+
+HT_PROTOTYPE(cdm_diff_ht, cdm_diff_t, node, cdm_diff_hash, cdm_diff_eq)
+HT_GENERATE2(cdm_diff_ht, cdm_diff_t, node, cdm_diff_hash, cdm_diff_eq,
+ 0.6, tor_reallocarray, tor_free_)
+
+#define cdm_diff_free(diff) \
+ FREE_AND_NULL(cdm_diff_t, cdm_diff_free_, (diff))
+
+/** Release all storage held in <b>diff</b>. */
+static void
+cdm_diff_free_(cdm_diff_t *diff)
+{
+ if (!diff)
+ return;
+ consensus_cache_entry_handle_free(diff->entry);
+ tor_free(diff);
+}
+
+/** Create and return a new cdm_diff_t with the given values. Does not
+ * add it to the hashtable. */
+static cdm_diff_t *
+cdm_diff_new(consensus_flavor_t flav,
+ const uint8_t *from_sha3,
+ const uint8_t *target_sha3,
+ compress_method_t method)
+{
+ cdm_diff_t *ent;
+ ent = tor_malloc_zero(sizeof(cdm_diff_t));
+ ent->flavor = flav;
+ memcpy(ent->from_sha3, from_sha3, DIGEST256_LEN);
+ memcpy(ent->target_sha3, target_sha3, DIGEST256_LEN);
+ ent->compress_method = method;
+ return ent;
+}
+
+/**
+ * Examine the diff hashtable to see whether we know anything about computing
+ * a diff of type <b>flav</b> between consensuses with the two provided
+ * SHA3-256 digests. If a computation is in progress, or if the computation
+ * has already been tried and failed, return 1. Otherwise, note the
+ * computation as "in progress" so that we don't reattempt it later, and
+ * return 0.
+ */
+static int
+cdm_diff_ht_check_and_note_pending(consensus_flavor_t flav,
+ const uint8_t *from_sha3,
+ const uint8_t *target_sha3)
+{
+ struct cdm_diff_t search, *ent;
+ unsigned u;
+ int result = 0;
+ for (u = 0; u < n_diff_compression_methods(); ++u) {
+ compress_method_t method = compress_diffs_with[u];
+ memset(&search, 0, sizeof(cdm_diff_t));
+ search.flavor = flav;
+ search.compress_method = method;
+ memcpy(search.from_sha3, from_sha3, DIGEST256_LEN);
+ ent = HT_FIND(cdm_diff_ht, &cdm_diff_ht, &search);
+ if (ent) {
+ tor_assert_nonfatal(ent->cdm_diff_status != CDM_DIFF_PRESENT);
+ result = 1;
+ continue;
+ }
+ ent = cdm_diff_new(flav, from_sha3, target_sha3, method);
+ ent->cdm_diff_status = CDM_DIFF_IN_PROGRESS;
+ HT_INSERT(cdm_diff_ht, &cdm_diff_ht, ent);
+ }
+ return result;
+}
+
+/**
+ * Update the status of the diff of type <b>flav</b> between consensuses with
+ * the two provided SHA3-256 digests, so that its status becomes
+ * <b>status</b>, and its value becomes the <b>handle</b>. If <b>handle</b>
+ * is NULL, then the old handle (if any) is freed, and replaced with NULL.
+ */
+static void
+cdm_diff_ht_set_status(consensus_flavor_t flav,
+ const uint8_t *from_sha3,
+ const uint8_t *to_sha3,
+ compress_method_t method,
+ int status,
+ consensus_cache_entry_handle_t *handle)
+{
+ if (handle == NULL) {
+ tor_assert_nonfatal(status != CDM_DIFF_PRESENT);
+ }
+
+ struct cdm_diff_t search, *ent;
+ memset(&search, 0, sizeof(cdm_diff_t));
+ search.flavor = flav;
+ search.compress_method = method,
+ memcpy(search.from_sha3, from_sha3, DIGEST256_LEN);
+ ent = HT_FIND(cdm_diff_ht, &cdm_diff_ht, &search);
+ if (!ent) {
+ ent = cdm_diff_new(flav, from_sha3, to_sha3, method);
+ ent->cdm_diff_status = CDM_DIFF_IN_PROGRESS;
+ HT_INSERT(cdm_diff_ht, &cdm_diff_ht, ent);
+ } else if (fast_memneq(ent->target_sha3, to_sha3, DIGEST256_LEN)) {
+ // This can happen under certain really pathological conditions
+ // if we decide we don't care about a diff before it is actually
+ // done computing.
+ return;
+ }
+
+ tor_assert_nonfatal(ent->cdm_diff_status == CDM_DIFF_IN_PROGRESS);
+
+ ent->cdm_diff_status = status;
+ consensus_cache_entry_handle_free(ent->entry);
+ ent->entry = handle;
+}
+
+/**
+ * Helper: Remove from the hash table every present (actually computed) diff
+ * of type <b>flav</b> whose target digest does not match
+ * <b>unless_target_sha3_matches</b>.
+ *
+ * This function is used for the hash table to throw away references to diffs
+ * that do not lead to the most given consensus of a given flavor.
+ */
+static void
+cdm_diff_ht_purge(consensus_flavor_t flav,
+ const uint8_t *unless_target_sha3_matches)
+{
+ cdm_diff_t **diff, **next;
+ for (diff = HT_START(cdm_diff_ht, &cdm_diff_ht); diff; diff = next) {
+ cdm_diff_t *this = *diff;
+
+ if ((*diff)->cdm_diff_status == CDM_DIFF_PRESENT &&
+ flav == (*diff)->flavor) {
+
+ if (BUG((*diff)->entry == NULL) ||
+ consensus_cache_entry_handle_get((*diff)->entry) == NULL) {
+ /* the underlying entry has gone away; drop this. */
+ next = HT_NEXT_RMV(cdm_diff_ht, &cdm_diff_ht, diff);
+ cdm_diff_free(this);
+ continue;
+ }
+
+ if (unless_target_sha3_matches &&
+ fast_memneq(unless_target_sha3_matches, (*diff)->target_sha3,
+ DIGEST256_LEN)) {
+ /* target hash doesn't match; drop this. */
+ next = HT_NEXT_RMV(cdm_diff_ht, &cdm_diff_ht, diff);
+ cdm_diff_free(this);
+ continue;
+ }
+ }
+ next = HT_NEXT(cdm_diff_ht, &cdm_diff_ht, diff);
+ }
+}
+
+/**
+ * Helper: initialize <b>cons_diff_cache</b>.
+ */
+static void
+cdm_cache_init(void)
+{
+ unsigned n_entries = consdiff_cfg.cache_max_num * 2;
+
+ tor_assert(cons_diff_cache == NULL);
+ cons_diff_cache = consensus_cache_open("diff-cache", n_entries);
+ if (cons_diff_cache == NULL) {
+ // LCOV_EXCL_START
+ log_err(LD_FS, "Error: Couldn't open storage for consensus diffs.");
+ tor_assert_unreached();
+ // LCOV_EXCL_STOP
+ } else {
+ consdiffmgr_set_cache_flags();
+ }
+ consdiffmgr_rescan_ev =
+ mainloop_event_postloop_new(consdiffmgr_rescan_cb, NULL);
+ mark_cdm_cache_dirty();
+ cdm_cache_loaded = 0;
+}
+
+/**
+ * Helper: return the consensus_cache_t * that backs this manager,
+ * initializing it if needed.
+ */
+STATIC consensus_cache_t *
+cdm_cache_get(void)
+{
+ if (PREDICT_UNLIKELY(cons_diff_cache == NULL)) {
+ cdm_cache_init();
+ }
+ return cons_diff_cache;
+}
+
+/**
+ * Helper: given a list of labels, prepend the hex-encoded SHA3 digest
+ * of the <b>bodylen</b>-byte object at <b>body</b> to those labels,
+ * with <b>label</b> as its label.
+ */
+static void
+cdm_labels_prepend_sha3(config_line_t **labels,
+ const char *label,
+ const uint8_t *body,
+ size_t bodylen)
+{
+ uint8_t sha3_digest[DIGEST256_LEN];
+ char hexdigest[HEX_DIGEST256_LEN+1];
+ crypto_digest256((char *)sha3_digest,
+ (const char *)body, bodylen, DIGEST_SHA3_256);
+ base16_encode(hexdigest, sizeof(hexdigest),
+ (const char *)sha3_digest, sizeof(sha3_digest));
+
+ config_line_prepend(labels, label, hexdigest);
+}
+
+/** Helper: if there is a sha3-256 hex-encoded digest in <b>ent</b> with the
+ * given label, set <b>digest_out</b> to that value (decoded), and return 0.
+ *
+ * Return -1 if there is no such label, and -2 if it is badly formatted. */
+STATIC int
+cdm_entry_get_sha3_value(uint8_t *digest_out,
+ consensus_cache_entry_t *ent,
+ const char *label)
+{
+ if (ent == NULL)
+ return -1;
+
+ const char *hex = consensus_cache_entry_get_value(ent, label);
+ if (hex == NULL)
+ return -1;
+
+ int n = base16_decode((char*)digest_out, DIGEST256_LEN, hex, strlen(hex));
+ if (n != DIGEST256_LEN)
+ return -2;
+ else
+ return 0;
+}
+
+/**
+ * Helper: look for a consensus with the given <b>flavor</b> and
+ * <b>valid_after</b> time in the cache. Return that consensus if it's
+ * present, or NULL if it's missing.
+ */
+STATIC consensus_cache_entry_t *
+cdm_cache_lookup_consensus(consensus_flavor_t flavor, time_t valid_after)
+{
+ char formatted_time[ISO_TIME_LEN+1];
+ format_iso_time_nospace(formatted_time, valid_after);
+ const char *flavname = networkstatus_get_flavor_name(flavor);
+
+ /* We'll filter by valid-after time first, since that should
+ * match the fewest documents. */
+ /* We could add an extra hashtable here, but since we only do this scan
+ * when adding a new consensus, it probably doesn't matter much. */
+ smartlist_t *matches = smartlist_new();
+ consensus_cache_find_all(matches, cdm_cache_get(),
+ LABEL_VALID_AFTER, formatted_time);
+ consensus_cache_filter_list(matches, LABEL_FLAVOR, flavname);
+ consensus_cache_filter_list(matches, LABEL_DOCTYPE, DOCTYPE_CONSENSUS);
+
+ consensus_cache_entry_t *result = NULL;
+ if (smartlist_len(matches)) {
+ result = smartlist_get(matches, 0);
+ }
+ smartlist_free(matches);
+
+ return result;
+}
+
+/** Return the maximum age (in seconds) of consensuses that we should consider
+ * storing. The available space in the directory may impose additional limits
+ * on how much we store. */
+static int32_t
+get_max_age_to_cache(void)
+{
+ const int32_t DEFAULT_MAX_AGE_TO_CACHE = 8192;
+ const int32_t MIN_MAX_AGE_TO_CACHE = 0;
+ const int32_t MAX_MAX_AGE_TO_CACHE = 8192;
+ const char MAX_AGE_TO_CACHE_NAME[] = "max-consensus-age-to-cache-for-diff";
+
+ const or_options_t *options = get_options();
+
+ if (options->MaxConsensusAgeForDiffs) {
+ const int v = options->MaxConsensusAgeForDiffs;
+ if (v >= MAX_MAX_AGE_TO_CACHE * 3600)
+ return MAX_MAX_AGE_TO_CACHE;
+ else
+ return v;
+ }
+
+ /* The parameter is in hours, so we multiply */
+ return 3600 * networkstatus_get_param(NULL,
+ MAX_AGE_TO_CACHE_NAME,
+ DEFAULT_MAX_AGE_TO_CACHE,
+ MIN_MAX_AGE_TO_CACHE,
+ MAX_MAX_AGE_TO_CACHE);
+}
+
+/**
+ * Given a string containing a networkstatus consensus, and the results of
+ * having parsed that consensus, add that consensus to the cache if it is not
+ * already present and not too old. Create new consensus diffs from or to
+ * that consensus as appropriate.
+ *
+ * Return 0 on success and -1 on failure.
+ */
+int
+consdiffmgr_add_consensus(const char *consensus,
+ const networkstatus_t *as_parsed)
+{
+ if (BUG(consensus == NULL) || BUG(as_parsed == NULL))
+ return -1; // LCOV_EXCL_LINE
+ if (BUG(as_parsed->type != NS_TYPE_CONSENSUS))
+ return -1; // LCOV_EXCL_LINE
+
+ const consensus_flavor_t flavor = as_parsed->flavor;
+ const time_t valid_after = as_parsed->valid_after;
+
+ if (valid_after < approx_time() - get_max_age_to_cache()) {
+ log_info(LD_DIRSERV, "We don't care about this consensus document; it's "
+ "too old.");
+ return -1;
+ }
+
+ /* Do we already have this one? */
+ consensus_cache_entry_t *entry =
+ cdm_cache_lookup_consensus(flavor, valid_after);
+ if (entry) {
+ log_info(LD_DIRSERV, "We already have a copy of that consensus");
+ return -1;
+ }
+
+ /* We don't have it. Add it to the cache. */
+ return consensus_queue_compression_work(consensus, as_parsed);
+}
+
+/**
+ * Helper: used to sort two smartlists of consensus_cache_entry_t by their
+ * LABEL_VALID_AFTER labels.
+ */
+static int
+compare_by_valid_after_(const void **a, const void **b)
+{
+ const consensus_cache_entry_t *e1 = *a;
+ const consensus_cache_entry_t *e2 = *b;
+ /* We're in luck here: sorting UTC iso-encoded values lexically will work
+ * fine (until 9999). */
+ return strcmp_opt(consensus_cache_entry_get_value(e1, LABEL_VALID_AFTER),
+ consensus_cache_entry_get_value(e2, LABEL_VALID_AFTER));
+}
+
+/**
+ * Helper: Sort <b>lst</b> by LABEL_VALID_AFTER and return the most recent
+ * entry.
+ */
+static consensus_cache_entry_t *
+sort_and_find_most_recent(smartlist_t *lst)
+{
+ smartlist_sort(lst, compare_by_valid_after_);
+ if (smartlist_len(lst)) {
+ return smartlist_get(lst, smartlist_len(lst) - 1);
+ } else {
+ return NULL;
+ }
+}
+
+/** Return i such that compress_consensus_with[i] == method. Return
+ * -1 if no such i exists. */
+static int
+consensus_compression_method_pos(compress_method_t method)
+{
+ unsigned i;
+ for (i = 0; i < n_consensus_compression_methods(); ++i) {
+ if (compress_consensus_with[i] == method) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+/**
+ * If we know a consensus with the flavor <b>flavor</b> compressed with
+ * <b>method</b>, set *<b>entry_out</b> to that value. Return values are as
+ * for consdiffmgr_find_diff_from().
+ */
+consdiff_status_t
+consdiffmgr_find_consensus(struct consensus_cache_entry_t **entry_out,
+ consensus_flavor_t flavor,
+ compress_method_t method)
+{
+ tor_assert(entry_out);
+ tor_assert((int)flavor < N_CONSENSUS_FLAVORS);
+
+ int pos = consensus_compression_method_pos(method);
+ if (pos < 0) {
+ // We don't compress consensuses with this method.
+ return CONSDIFF_NOT_FOUND;
+ }
+ consensus_cache_entry_handle_t *handle = latest_consensus[flavor][pos];
+ if (!handle)
+ return CONSDIFF_NOT_FOUND;
+ *entry_out = consensus_cache_entry_handle_get(handle);
+ if (*entry_out)
+ return CONSDIFF_AVAILABLE;
+ else
+ return CONSDIFF_NOT_FOUND;
+}
+
+/**
+ * Look up consensus_cache_entry_t for the consensus of type <b>flavor</b>,
+ * from the source consensus with the specified digest (which must be SHA3).
+ *
+ * If the diff is present, store it into *<b>entry_out</b> and return
+ * CONSDIFF_AVAILABLE. Otherwise return CONSDIFF_NOT_FOUND or
+ * CONSDIFF_IN_PROGRESS.
+ */
+consdiff_status_t
+consdiffmgr_find_diff_from(consensus_cache_entry_t **entry_out,
+ consensus_flavor_t flavor,
+ int digest_type,
+ const uint8_t *digest,
+ size_t digestlen,
+ compress_method_t method)
+{
+ if (BUG(digest_type != DIGEST_SHA3_256) ||
+ BUG(digestlen != DIGEST256_LEN)) {
+ return CONSDIFF_NOT_FOUND; // LCOV_EXCL_LINE
+ }
+
+ // Try to look up the entry in the hashtable.
+ cdm_diff_t search, *ent;
+ memset(&search, 0, sizeof(search));
+ search.flavor = flavor;
+ search.compress_method = method;
+ memcpy(search.from_sha3, digest, DIGEST256_LEN);
+ ent = HT_FIND(cdm_diff_ht, &cdm_diff_ht, &search);
+
+ if (ent == NULL ||
+ ent->cdm_diff_status == CDM_DIFF_ERROR) {
+ return CONSDIFF_NOT_FOUND;
+ } else if (ent->cdm_diff_status == CDM_DIFF_IN_PROGRESS) {
+ return CONSDIFF_IN_PROGRESS;
+ } else if (BUG(ent->cdm_diff_status != CDM_DIFF_PRESENT)) {
+ return CONSDIFF_IN_PROGRESS;
+ }
+
+ if (BUG(ent->entry == NULL)) {
+ return CONSDIFF_NOT_FOUND;
+ }
+ *entry_out = consensus_cache_entry_handle_get(ent->entry);
+ return (*entry_out) ? CONSDIFF_AVAILABLE : CONSDIFF_NOT_FOUND;
+
+#if 0
+ // XXXX Remove this. I'm keeping it around for now in case we need to
+ // XXXX debug issues in the hashtable.
+ char hex[HEX_DIGEST256_LEN+1];
+ base16_encode(hex, sizeof(hex), (const char *)digest, digestlen);
+ const char *flavname = networkstatus_get_flavor_name(flavor);
+
+ smartlist_t *matches = smartlist_new();
+ consensus_cache_find_all(matches, cdm_cache_get(),
+ LABEL_FROM_SHA3_DIGEST, hex);
+ consensus_cache_filter_list(matches, LABEL_FLAVOR, flavname);
+ consensus_cache_filter_list(matches, LABEL_DOCTYPE, DOCTYPE_CONSENSUS_DIFF);
+
+ *entry_out = sort_and_find_most_recent(matches);
+ consdiff_status_t result =
+ (*entry_out) ? CONSDIFF_AVAILABLE : CONSDIFF_NOT_FOUND;
+ smartlist_free(matches);
+
+ return result;
+#endif /* 0 */
+}
+
+/**
+ * Perform periodic cleanup tasks on the consensus diff cache. Return
+ * the number of objects marked for deletion.
+ */
+int
+consdiffmgr_cleanup(void)
+{
+ smartlist_t *objects = smartlist_new();
+ smartlist_t *consensuses = smartlist_new();
+ smartlist_t *diffs = smartlist_new();
+ int n_to_delete = 0;
+
+ log_debug(LD_DIRSERV, "Looking for consdiffmgr entries to remove");
+
+ // 1. Delete any consensus or diff or anything whose valid_after is too old.
+ const time_t valid_after_cutoff = approx_time() - get_max_age_to_cache();
+
+ consensus_cache_find_all(objects, cdm_cache_get(),
+ NULL, NULL);
+ SMARTLIST_FOREACH_BEGIN(objects, consensus_cache_entry_t *, ent) {
+ const char *lv_valid_after =
+ consensus_cache_entry_get_value(ent, LABEL_VALID_AFTER);
+ if (! lv_valid_after) {
+ log_debug(LD_DIRSERV, "Ignoring entry because it had no %s label",
+ LABEL_VALID_AFTER);
+ continue;
+ }
+ time_t valid_after = 0;
+ if (parse_iso_time_nospace(lv_valid_after, &valid_after) < 0) {
+ log_debug(LD_DIRSERV, "Ignoring entry because its %s value (%s) was "
+ "unparseable", LABEL_VALID_AFTER, escaped(lv_valid_after));
+ continue;
+ }
+ if (valid_after < valid_after_cutoff) {
+ log_debug(LD_DIRSERV, "Deleting entry because its %s value (%s) was "
+ "too old", LABEL_VALID_AFTER, lv_valid_after);
+ consensus_cache_entry_mark_for_removal(ent);
+ ++n_to_delete;
+ }
+ } SMARTLIST_FOREACH_END(ent);
+
+ // 2. Delete all diffs that lead to a consensus whose valid-after is not the
+ // latest.
+ for (int flav = 0; flav < N_CONSENSUS_FLAVORS; ++flav) {
+ const char *flavname = networkstatus_get_flavor_name(flav);
+ /* Determine the most recent consensus of this flavor */
+ consensus_cache_find_all(consensuses, cdm_cache_get(),
+ LABEL_DOCTYPE, DOCTYPE_CONSENSUS);
+ consensus_cache_filter_list(consensuses, LABEL_FLAVOR, flavname);
+ consensus_cache_entry_t *most_recent =
+ sort_and_find_most_recent(consensuses);
+ if (most_recent == NULL)
+ continue;
+ const char *most_recent_sha3 =
+ consensus_cache_entry_get_value(most_recent,
+ LABEL_SHA3_DIGEST_UNCOMPRESSED);
+ if (BUG(most_recent_sha3 == NULL))
+ continue; // LCOV_EXCL_LINE
+
+ /* consider all such-flavored diffs, and look to see if they match. */
+ consensus_cache_find_all(diffs, cdm_cache_get(),
+ LABEL_DOCTYPE, DOCTYPE_CONSENSUS_DIFF);
+ consensus_cache_filter_list(diffs, LABEL_FLAVOR, flavname);
+ SMARTLIST_FOREACH_BEGIN(diffs, consensus_cache_entry_t *, diff) {
+ const char *this_diff_target_sha3 =
+ consensus_cache_entry_get_value(diff, LABEL_TARGET_SHA3_DIGEST);
+ if (!this_diff_target_sha3)
+ continue;
+ if (strcmp(this_diff_target_sha3, most_recent_sha3)) {
+ consensus_cache_entry_mark_for_removal(diff);
+ ++n_to_delete;
+ }
+ } SMARTLIST_FOREACH_END(diff);
+ smartlist_clear(consensuses);
+ smartlist_clear(diffs);
+ }
+
+ // 3. Delete all consensuses except the most recent that are compressed with
+ // an un-preferred method.
+ for (int flav = 0; flav < N_CONSENSUS_FLAVORS; ++flav) {
+ const char *flavname = networkstatus_get_flavor_name(flav);
+ /* Determine the most recent consensus of this flavor */
+ consensus_cache_find_all(consensuses, cdm_cache_get(),
+ LABEL_DOCTYPE, DOCTYPE_CONSENSUS);
+ consensus_cache_filter_list(consensuses, LABEL_FLAVOR, flavname);
+ consensus_cache_entry_t *most_recent =
+ sort_and_find_most_recent(consensuses);
+ if (most_recent == NULL)
+ continue;
+ const char *most_recent_sha3_uncompressed =
+ consensus_cache_entry_get_value(most_recent,
+ LABEL_SHA3_DIGEST_UNCOMPRESSED);
+ const char *retain_methodname = compression_method_get_name(
+ RETAIN_CONSENSUS_COMPRESSED_WITH_METHOD);
+
+ if (BUG(most_recent_sha3_uncompressed == NULL))
+ continue;
+ SMARTLIST_FOREACH_BEGIN(consensuses, consensus_cache_entry_t *, ent) {
+ const char *lv_sha3_uncompressed =
+ consensus_cache_entry_get_value(ent, LABEL_SHA3_DIGEST_UNCOMPRESSED);
+ if (BUG(! lv_sha3_uncompressed))
+ continue;
+ if (!strcmp(lv_sha3_uncompressed, most_recent_sha3_uncompressed))
+ continue; // This _is_ the most recent.
+ const char *lv_methodname =
+ consensus_cache_entry_get_value(ent, LABEL_COMPRESSION_TYPE);
+ if (! lv_methodname || strcmp(lv_methodname, retain_methodname)) {
+ consensus_cache_entry_mark_for_removal(ent);
+ ++n_to_delete;
+ }
+ } SMARTLIST_FOREACH_END(ent);
+ }
+
+ smartlist_free(objects);
+ smartlist_free(consensuses);
+ smartlist_free(diffs);
+
+ // Actually remove files, if they're not used.
+ consensus_cache_delete_pending(cdm_cache_get(), 0);
+ return n_to_delete;
+}
+
+/**
+ * Initialize the consensus diff manager and its cache, and configure
+ * its parameters based on the latest torrc and networkstatus parameters.
+ */
+void
+consdiffmgr_configure(const consdiff_cfg_t *cfg)
+{
+ if (cfg)
+ memcpy(&consdiff_cfg, cfg, sizeof(consdiff_cfg));
+
+ (void) cdm_cache_get();
+}
+
+/**
+ * Tell the sandbox (if any) configured by <b>cfg</b> to allow the
+ * operations that the consensus diff manager will need.
+ */
+int
+consdiffmgr_register_with_sandbox(struct sandbox_cfg_elem **cfg)
+{
+ return consensus_cache_register_with_sandbox(cdm_cache_get(), cfg);
+}
+
+/**
+ * Scan the consensus diff manager's cache for any grossly malformed entries,
+ * and mark them as deletable. Return 0 if no problems were found; 1
+ * if problems were found and fixed.
+ */
+int
+consdiffmgr_validate(void)
+{
+ /* Right now, we only check for entries that have bad sha3 values */
+ int problems = 0;
+
+ smartlist_t *objects = smartlist_new();
+ consensus_cache_find_all(objects, cdm_cache_get(),
+ NULL, NULL);
+ SMARTLIST_FOREACH_BEGIN(objects, consensus_cache_entry_t *, obj) {
+ uint8_t sha3_expected[DIGEST256_LEN];
+ uint8_t sha3_received[DIGEST256_LEN];
+ int r = cdm_entry_get_sha3_value(sha3_expected, obj, LABEL_SHA3_DIGEST);
+ if (r == -1) {
+ /* digest isn't there; that's allowed */
+ continue;
+ } else if (r == -2) {
+ /* digest is malformed; that's not allowed */
+ problems = 1;
+ consensus_cache_entry_mark_for_removal(obj);
+ continue;
+ }
+ const uint8_t *body;
+ size_t bodylen;
+ consensus_cache_entry_incref(obj);
+ r = consensus_cache_entry_get_body(obj, &body, &bodylen);
+ if (r == 0) {
+ crypto_digest256((char *)sha3_received, (const char *)body, bodylen,
+ DIGEST_SHA3_256);
+ }
+ consensus_cache_entry_decref(obj);
+ if (r < 0)
+ continue;
+
+ // Deconfuse coverity about the possibility of sha3_received being
+ // uninitialized
+ tor_assert(r <= 0);
+
+ if (fast_memneq(sha3_received, sha3_expected, DIGEST256_LEN)) {
+ problems = 1;
+ consensus_cache_entry_mark_for_removal(obj);
+ continue;
+ }
+
+ } SMARTLIST_FOREACH_END(obj);
+ smartlist_free(objects);
+ return problems;
+}
+
+/**
+ * Helper: build new diffs of <b>flavor</b> as needed
+ */
+static void
+consdiffmgr_rescan_flavor_(consensus_flavor_t flavor)
+{
+ smartlist_t *matches = NULL;
+ smartlist_t *diffs = NULL;
+ smartlist_t *compute_diffs_from = NULL;
+ strmap_t *have_diff_from = NULL;
+
+ // look for the most recent consensus, and for all previous in-range
+ // consensuses. Do they all have diffs to it?
+ const char *flavname = networkstatus_get_flavor_name(flavor);
+
+ // 1. find the most recent consensus, and the ones that we might want
+ // to diff to it.
+ const char *methodname = compression_method_get_name(
+ RETAIN_CONSENSUS_COMPRESSED_WITH_METHOD);
+
+ matches = smartlist_new();
+ consensus_cache_find_all(matches, cdm_cache_get(),
+ LABEL_FLAVOR, flavname);
+ consensus_cache_filter_list(matches, LABEL_DOCTYPE, DOCTYPE_CONSENSUS);
+ consensus_cache_filter_list(matches, LABEL_COMPRESSION_TYPE, methodname);
+ consensus_cache_entry_t *most_recent = sort_and_find_most_recent(matches);
+ if (!most_recent) {
+ log_info(LD_DIRSERV, "No 'most recent' %s consensus found; "
+ "not making diffs", flavname);
+ goto done;
+ }
+ tor_assert(smartlist_len(matches));
+ smartlist_del(matches, smartlist_len(matches) - 1);
+
+ const char *most_recent_valid_after =
+ consensus_cache_entry_get_value(most_recent, LABEL_VALID_AFTER);
+ if (BUG(most_recent_valid_after == NULL))
+ goto done; //LCOV_EXCL_LINE
+ uint8_t most_recent_sha3[DIGEST256_LEN];
+ if (BUG(cdm_entry_get_sha3_value(most_recent_sha3, most_recent,
+ LABEL_SHA3_DIGEST_UNCOMPRESSED) < 0))
+ goto done; //LCOV_EXCL_LINE
+
+ // 2. Find all the relevant diffs _to_ this consensus. These are ones
+ // that we don't need to compute.
+ diffs = smartlist_new();
+ consensus_cache_find_all(diffs, cdm_cache_get(),
+ LABEL_VALID_AFTER, most_recent_valid_after);
+ consensus_cache_filter_list(diffs, LABEL_DOCTYPE, DOCTYPE_CONSENSUS_DIFF);
+ consensus_cache_filter_list(diffs, LABEL_FLAVOR, flavname);
+ have_diff_from = strmap_new();
+ SMARTLIST_FOREACH_BEGIN(diffs, consensus_cache_entry_t *, diff) {
+ const char *va = consensus_cache_entry_get_value(diff,
+ LABEL_FROM_VALID_AFTER);
+ if (BUG(va == NULL))
+ continue; // LCOV_EXCL_LINE
+ strmap_set(have_diff_from, va, diff);
+ } SMARTLIST_FOREACH_END(diff);
+
+ // 3. See which consensuses in 'matches' don't have diffs yet.
+ smartlist_reverse(matches); // from newest to oldest.
+ compute_diffs_from = smartlist_new();
+ SMARTLIST_FOREACH_BEGIN(matches, consensus_cache_entry_t *, ent) {
+ const char *va = consensus_cache_entry_get_value(ent, LABEL_VALID_AFTER);
+ if (BUG(va == NULL))
+ continue; // LCOV_EXCL_LINE
+ if (strmap_get(have_diff_from, va) != NULL)
+ continue; /* we already have this one. */
+ smartlist_add(compute_diffs_from, ent);
+ /* Since we are not going to serve this as the most recent consensus
+ * any more, we should stop keeping it mmap'd when it's not in use.
+ */
+ consensus_cache_entry_mark_for_aggressive_release(ent);
+ } SMARTLIST_FOREACH_END(ent);
+
+ log_info(LD_DIRSERV,
+ "The most recent %s consensus is valid-after %s. We have diffs to "
+ "this consensus for %d/%d older %s consensuses. Generating diffs "
+ "for the other %d.",
+ flavname,
+ most_recent_valid_after,
+ smartlist_len(matches) - smartlist_len(compute_diffs_from),
+ smartlist_len(matches),
+ flavname,
+ smartlist_len(compute_diffs_from));
+
+ // 4. Update the hashtable; remove entries in this flavor to other
+ // target consensuses.
+ cdm_diff_ht_purge(flavor, most_recent_sha3);
+
+ // 5. Actually launch the requests.
+ SMARTLIST_FOREACH_BEGIN(compute_diffs_from, consensus_cache_entry_t *, c) {
+ if (BUG(c == most_recent))
+ continue; // LCOV_EXCL_LINE
+
+ uint8_t this_sha3[DIGEST256_LEN];
+ if (cdm_entry_get_sha3_value(this_sha3, c,
+ LABEL_SHA3_DIGEST_AS_SIGNED)<0) {
+ // Not actually a bug, since we might be running with a directory
+ // with stale files from before the #22143 fixes.
+ continue;
+ }
+ if (cdm_diff_ht_check_and_note_pending(flavor,
+ this_sha3, most_recent_sha3)) {
+ // This is already pending, or we encountered an error.
+ continue;
+ }
+ consensus_diff_queue_diff_work(c, most_recent);
+ } SMARTLIST_FOREACH_END(c);
+
+ done:
+ smartlist_free(matches);
+ smartlist_free(diffs);
+ smartlist_free(compute_diffs_from);
+ strmap_free(have_diff_from, NULL);
+}
+
+/**
+ * Scan the cache for the latest consensuses and add their handles to
+ * latest_consensus
+ */
+static void
+consdiffmgr_consensus_load(void)
+{
+ smartlist_t *matches = smartlist_new();
+ for (int flav = 0; flav < N_CONSENSUS_FLAVORS; ++flav) {
+ const char *flavname = networkstatus_get_flavor_name(flav);
+ smartlist_clear(matches);
+ consensus_cache_find_all(matches, cdm_cache_get(),
+ LABEL_FLAVOR, flavname);
+ consensus_cache_filter_list(matches, LABEL_DOCTYPE, DOCTYPE_CONSENSUS);
+ consensus_cache_entry_t *most_recent = sort_and_find_most_recent(matches);
+ if (! most_recent)
+ continue; // no consensuses.
+ const char *most_recent_sha3 =
+ consensus_cache_entry_get_value(most_recent,
+ LABEL_SHA3_DIGEST_UNCOMPRESSED);
+ if (BUG(most_recent_sha3 == NULL))
+ continue; // LCOV_EXCL_LINE
+ consensus_cache_filter_list(matches, LABEL_SHA3_DIGEST_UNCOMPRESSED,
+ most_recent_sha3);
+
+ // Everything that remains matches the most recent consensus of this
+ // flavor.
+ SMARTLIST_FOREACH_BEGIN(matches, consensus_cache_entry_t *, ent) {
+ const char *lv_compression =
+ consensus_cache_entry_get_value(ent, LABEL_COMPRESSION_TYPE);
+ compress_method_t method =
+ compression_method_get_by_name(lv_compression);
+ int pos = consensus_compression_method_pos(method);
+ if (pos < 0)
+ continue;
+ consensus_cache_entry_handle_free(latest_consensus[flav][pos]);
+ latest_consensus[flav][pos] = consensus_cache_entry_handle_new(ent);
+ } SMARTLIST_FOREACH_END(ent);
+ }
+ smartlist_free(matches);
+}
+
+/**
+ * Scan the cache for diffs, and add them to the hashtable.
+ */
+static void
+consdiffmgr_diffs_load(void)
+{
+ smartlist_t *diffs = smartlist_new();
+ consensus_cache_find_all(diffs, cdm_cache_get(),
+ LABEL_DOCTYPE, DOCTYPE_CONSENSUS_DIFF);
+ SMARTLIST_FOREACH_BEGIN(diffs, consensus_cache_entry_t *, diff) {
+ const char *lv_flavor =
+ consensus_cache_entry_get_value(diff, LABEL_FLAVOR);
+ if (!lv_flavor)
+ continue;
+ int flavor = networkstatus_parse_flavor_name(lv_flavor);
+ if (flavor < 0)
+ continue;
+ const char *lv_compression =
+ consensus_cache_entry_get_value(diff, LABEL_COMPRESSION_TYPE);
+ compress_method_t method = NO_METHOD;
+ if (lv_compression) {
+ method = compression_method_get_by_name(lv_compression);
+ if (method == UNKNOWN_METHOD) {
+ continue;
+ }
+ }
+
+ uint8_t from_sha3[DIGEST256_LEN];
+ uint8_t to_sha3[DIGEST256_LEN];
+ if (cdm_entry_get_sha3_value(from_sha3, diff, LABEL_FROM_SHA3_DIGEST)<0)
+ continue;
+ if (cdm_entry_get_sha3_value(to_sha3, diff, LABEL_TARGET_SHA3_DIGEST)<0)
+ continue;
+
+ cdm_diff_ht_set_status(flavor, from_sha3, to_sha3,
+ method,
+ CDM_DIFF_PRESENT,
+ consensus_cache_entry_handle_new(diff));
+ } SMARTLIST_FOREACH_END(diff);
+ smartlist_free(diffs);
+}
+
+/**
+ * Build new diffs as needed.
+ */
+void
+consdiffmgr_rescan(void)
+{
+ if (cdm_cache_dirty == 0)
+ return;
+
+ // Clean up here to make room for new diffs, and to ensure that older
+ // consensuses do not have any entries.
+ consdiffmgr_cleanup();
+
+ if (cdm_cache_loaded == 0) {
+ consdiffmgr_diffs_load();
+ consdiffmgr_consensus_load();
+ cdm_cache_loaded = 1;
+ }
+
+ for (int flav = 0; flav < N_CONSENSUS_FLAVORS; ++flav) {
+ consdiffmgr_rescan_flavor_((consensus_flavor_t) flav);
+ }
+
+ cdm_cache_dirty = 0;
+}
+
+/** Callback wrapper for consdiffmgr_rescan */
+static void
+consdiffmgr_rescan_cb(mainloop_event_t *ev, void *arg)
+{
+ (void)ev;
+ (void)arg;
+ consdiffmgr_rescan();
+}
+
+/** Mark the cache as dirty, and schedule a rescan event. */
+static void
+mark_cdm_cache_dirty(void)
+{
+ cdm_cache_dirty = 1;
+ tor_assert(consdiffmgr_rescan_ev);
+ mainloop_event_activate(consdiffmgr_rescan_ev);
+}
+
+/**
+ * Helper: compare two files by their from-valid-after and valid-after labels,
+ * trying to sort in ascending order by from-valid-after (when present) and
+ * valid-after (when not). Place everything that has neither label first in
+ * the list.
+ */
+static int
+compare_by_staleness_(const void **a, const void **b)
+{
+ const consensus_cache_entry_t *e1 = *a;
+ const consensus_cache_entry_t *e2 = *b;
+ const char *va1, *fva1, *va2, *fva2;
+ va1 = consensus_cache_entry_get_value(e1, LABEL_VALID_AFTER);
+ va2 = consensus_cache_entry_get_value(e2, LABEL_VALID_AFTER);
+ fva1 = consensus_cache_entry_get_value(e1, LABEL_FROM_VALID_AFTER);
+ fva2 = consensus_cache_entry_get_value(e2, LABEL_FROM_VALID_AFTER);
+
+ if (fva1)
+ va1 = fva1;
+ if (fva2)
+ va2 = fva2;
+
+ /* See note about iso-encoded values in compare_by_valid_after_. Also note
+ * that missing dates will get placed first. */
+ return strcmp_opt(va1, va2);
+}
+
+/** If there are not enough unused filenames to store <b>n</b> files, then
+ * delete old consensuses until there are. (We have to keep track of the
+ * number of filenames because of the way that the seccomp2 cache works.)
+ *
+ * Return 0 on success, -1 on failure.
+ **/
+static int
+consdiffmgr_ensure_space_for_files(int n)
+{
+ consensus_cache_t *cache = cdm_cache_get();
+ if (consensus_cache_get_n_filenames_available(cache) >= n) {
+ // there are already enough unused filenames.
+ return 0;
+ }
+ // Try a cheap deletion of stuff that's waiting to get deleted.
+ consensus_cache_delete_pending(cache, 0);
+ if (consensus_cache_get_n_filenames_available(cache) >= n) {
+ // okay, _that_ made enough filenames available.
+ return 0;
+ }
+ // Let's get more assertive: clean out unused stuff, and force-remove
+ // the files that we can.
+ consdiffmgr_cleanup();
+ consensus_cache_delete_pending(cache, 1);
+ const int n_to_remove = n - consensus_cache_get_n_filenames_available(cache);
+ if (n_to_remove <= 0) {
+ // okay, finally!
+ return 0;
+ }
+
+ // At this point, we're going to have to throw out objects that will be
+ // missed. Too bad!
+ smartlist_t *objects = smartlist_new();
+ consensus_cache_find_all(objects, cache, NULL, NULL);
+ smartlist_sort(objects, compare_by_staleness_);
+ int n_marked = 0;
+ SMARTLIST_FOREACH_BEGIN(objects, consensus_cache_entry_t *, ent) {
+ consensus_cache_entry_mark_for_removal(ent);
+ if (++n_marked >= n_to_remove)
+ break;
+ } SMARTLIST_FOREACH_END(ent);
+ smartlist_free(objects);
+
+ consensus_cache_delete_pending(cache, 1);
+
+ if (consensus_cache_may_overallocate(cache)) {
+ /* If we're allowed to throw extra files into the cache, let's do so
+ * rather getting upset.
+ */
+ return 0;
+ }
+
+ if (BUG(n_marked < n_to_remove))
+ return -1;
+ else
+ return 0;
+}
+
+/**
+ * Set consensus cache flags on the objects in this consdiffmgr.
+ */
+static void
+consdiffmgr_set_cache_flags(void)
+{
+ /* Right now, we just mark the consensus objects for aggressive release,
+ * so that they get mmapped for as little time as possible. */
+ smartlist_t *objects = smartlist_new();
+ consensus_cache_find_all(objects, cdm_cache_get(), LABEL_DOCTYPE,
+ DOCTYPE_CONSENSUS);
+ SMARTLIST_FOREACH_BEGIN(objects, consensus_cache_entry_t *, ent) {
+ consensus_cache_entry_mark_for_aggressive_release(ent);
+ } SMARTLIST_FOREACH_END(ent);
+ smartlist_free(objects);
+}
+
+/**
+ * Called before shutdown: drop all storage held by the consdiffmgr.c module.
+ */
+void
+consdiffmgr_free_all(void)
+{
+ cdm_diff_t **diff, **next;
+ for (diff = HT_START(cdm_diff_ht, &cdm_diff_ht); diff; diff = next) {
+ cdm_diff_t *this = *diff;
+ next = HT_NEXT_RMV(cdm_diff_ht, &cdm_diff_ht, diff);
+ cdm_diff_free(this);
+ }
+ int i;
+ unsigned j;
+ for (i = 0; i < N_CONSENSUS_FLAVORS; ++i) {
+ for (j = 0; j < n_consensus_compression_methods(); ++j) {
+ consensus_cache_entry_handle_free(latest_consensus[i][j]);
+ }
+ }
+ memset(latest_consensus, 0, sizeof(latest_consensus));
+ consensus_cache_free(cons_diff_cache);
+ cons_diff_cache = NULL;
+ mainloop_event_free(consdiffmgr_rescan_ev);
+}
+
+/* =====
+ Thread workers
+ =====*/
+
+typedef struct compressed_result_t {
+ config_line_t *labels;
+ /**
+ * Output: Body of the diff, as compressed.
+ */
+ uint8_t *body;
+ /**
+ * Output: length of body_out
+ */
+ size_t bodylen;
+} compressed_result_t;
+
+/**
+ * Compress the bytestring <b>input</b> of length <b>len</b> using the
+ * <n>n_methods</b> compression methods listed in the array <b>methods</b>.
+ *
+ * For each successful compression, set the fields in the <b>results_out</b>
+ * array in the position corresponding to the compression method. Use
+ * <b>labels_in</b> as a basis for the labels of the result.
+ *
+ * Return 0 if all compression succeeded; -1 if any failed.
+ */
+static int
+compress_multiple(compressed_result_t *results_out, int n_methods,
+ const compress_method_t *methods,
+ const uint8_t *input, size_t len,
+ const config_line_t *labels_in)
+{
+ int rv = 0;
+ int i;
+ for (i = 0; i < n_methods; ++i) {
+ compress_method_t method = methods[i];
+ const char *methodname = compression_method_get_name(method);
+ char *result;
+ size_t sz;
+ if (0 == tor_compress(&result, &sz, (const char*)input, len, method)) {
+ results_out[i].body = (uint8_t*)result;
+ results_out[i].bodylen = sz;
+ results_out[i].labels = config_lines_dup(labels_in);
+ cdm_labels_prepend_sha3(&results_out[i].labels, LABEL_SHA3_DIGEST,
+ results_out[i].body,
+ results_out[i].bodylen);
+ config_line_prepend(&results_out[i].labels,
+ LABEL_COMPRESSION_TYPE,
+ methodname);
+ } else {
+ rv = -1;
+ }
+ }
+ return rv;
+}
+
+/**
+ * Given an array of <b>n</b> compressed_result_t in <b>results</b>,
+ * as produced by compress_multiple, store them all into the
+ * consdiffmgr, and store handles to them in the <b>handles_out</b>
+ * array.
+ *
+ * Return CDM_DIFF_PRESENT if any was stored, and CDM_DIFF_ERROR if none
+ * was stored.
+ */
+static cdm_diff_status_t
+store_multiple(consensus_cache_entry_handle_t **handles_out,
+ int n,
+ const compress_method_t *methods,
+ const compressed_result_t *results,
+ const char *description)
+{
+ cdm_diff_status_t status = CDM_DIFF_ERROR;
+ consdiffmgr_ensure_space_for_files(n);
+
+ int i;
+ for (i = 0; i < n; ++i) {
+ compress_method_t method = methods[i];
+ uint8_t *body_out = results[i].body;
+ size_t bodylen_out = results[i].bodylen;
+ config_line_t *labels = results[i].labels;
+ const char *methodname = compression_method_get_name(method);
+ if (body_out && bodylen_out && labels) {
+ /* Success! Store the results */
+ log_info(LD_DIRSERV, "Adding %s, compressed with %s",
+ description, methodname);
+
+ consensus_cache_entry_t *ent =
+ consensus_cache_add(cdm_cache_get(),
+ labels,
+ body_out,
+ bodylen_out);
+ if (ent == NULL) {
+ static ratelim_t cant_store_ratelim = RATELIM_INIT(5*60);
+ log_fn_ratelim(&cant_store_ratelim, LOG_WARN, LD_FS,
+ "Unable to store object %s compressed with %s.",
+ description, methodname);
+ continue;
+ }
+
+ status = CDM_DIFF_PRESENT;
+ handles_out[i] = consensus_cache_entry_handle_new(ent);
+ consensus_cache_entry_decref(ent);
+ }
+ }
+ return status;
+}
+
+/**
+ * An object passed to a worker thread that will try to produce a consensus
+ * diff.
+ */
+typedef struct consensus_diff_worker_job_t {
+ /**
+ * Input: The consensus to compute the diff from. Holds a reference to the
+ * cache entry, which must not be released until the job is passed back to
+ * the main thread. The body must be mapped into memory in the main thread.
+ */
+ consensus_cache_entry_t *diff_from;
+ /**
+ * Input: The consensus to compute the diff to. Holds a reference to the
+ * cache entry, which must not be released until the job is passed back to
+ * the main thread. The body must be mapped into memory in the main thread.
+ */
+ consensus_cache_entry_t *diff_to;
+
+ /** Output: labels and bodies */
+ compressed_result_t out[ARRAY_LENGTH(compress_diffs_with)];
+} consensus_diff_worker_job_t;
+
+/** Given a consensus_cache_entry_t, check whether it has a label claiming
+ * that it was compressed. If so, uncompress its contents into <b>out</b> and
+ * set <b>outlen</b> to hold their size. If not, just copy the body into
+ * <b>out</b> and set <b>outlen</b> to its length. Return 0 on success,
+ * -1 on failure.
+ *
+ * In all cases, the output is nul-terminated. */
+STATIC int
+uncompress_or_copy(char **out, size_t *outlen,
+ consensus_cache_entry_t *ent)
+{
+ const uint8_t *body;
+ size_t bodylen;
+
+ if (consensus_cache_entry_get_body(ent, &body, &bodylen) < 0)
+ return -1;
+
+ const char *lv_compression =
+ consensus_cache_entry_get_value(ent, LABEL_COMPRESSION_TYPE);
+ compress_method_t method = NO_METHOD;
+
+ if (lv_compression)
+ method = compression_method_get_by_name(lv_compression);
+
+ return tor_uncompress(out, outlen, (const char *)body, bodylen,
+ method, 1, LOG_WARN);
+}
+
+/**
+ * Worker function. This function runs inside a worker thread and receives
+ * a consensus_diff_worker_job_t as its input.
+ */
+static workqueue_reply_t
+consensus_diff_worker_threadfn(void *state_, void *work_)
+{
+ (void)state_;
+ consensus_diff_worker_job_t *job = work_;
+ const uint8_t *diff_from, *diff_to;
+ size_t len_from, len_to;
+ int r;
+ /* We need to have the body already mapped into RAM here.
+ */
+ r = consensus_cache_entry_get_body(job->diff_from, &diff_from, &len_from);
+ if (BUG(r < 0))
+ return WQ_RPL_REPLY; // LCOV_EXCL_LINE
+ r = consensus_cache_entry_get_body(job->diff_to, &diff_to, &len_to);
+ if (BUG(r < 0))
+ return WQ_RPL_REPLY; // LCOV_EXCL_LINE
+
+ const char *lv_to_valid_after =
+ consensus_cache_entry_get_value(job->diff_to, LABEL_VALID_AFTER);
+ const char *lv_to_fresh_until =
+ consensus_cache_entry_get_value(job->diff_to, LABEL_FRESH_UNTIL);
+ const char *lv_to_valid_until =
+ consensus_cache_entry_get_value(job->diff_to, LABEL_VALID_UNTIL);
+ const char *lv_to_signatories =
+ consensus_cache_entry_get_value(job->diff_to, LABEL_SIGNATORIES);
+ const char *lv_from_valid_after =
+ consensus_cache_entry_get_value(job->diff_from, LABEL_VALID_AFTER);
+ const char *lv_from_digest =
+ consensus_cache_entry_get_value(job->diff_from,
+ LABEL_SHA3_DIGEST_AS_SIGNED);
+ const char *lv_from_flavor =
+ consensus_cache_entry_get_value(job->diff_from, LABEL_FLAVOR);
+ const char *lv_to_flavor =
+ consensus_cache_entry_get_value(job->diff_to, LABEL_FLAVOR);
+ const char *lv_to_digest =
+ consensus_cache_entry_get_value(job->diff_to,
+ LABEL_SHA3_DIGEST_UNCOMPRESSED);
+
+ if (! lv_from_digest) {
+ /* This isn't a bug right now, since it can happen if you're migrating
+ * from an older version of master to a newer one. The older ones didn't
+ * annotate their stored consensus objects with sha3-digest-as-signed.
+ */
+ return WQ_RPL_REPLY; // LCOV_EXCL_LINE
+ }
+
+ /* All these values are mandatory on the input */
+ if (BUG(!lv_to_valid_after) ||
+ BUG(!lv_from_valid_after) ||
+ BUG(!lv_from_flavor) ||
+ BUG(!lv_to_flavor)) {
+ return WQ_RPL_REPLY; // LCOV_EXCL_LINE
+ }
+ /* The flavors need to match */
+ if (BUG(strcmp(lv_from_flavor, lv_to_flavor))) {
+ return WQ_RPL_REPLY; // LCOV_EXCL_LINE
+ }
+
+ char *consensus_diff;
+ {
+ char *diff_from_nt = NULL, *diff_to_nt = NULL;
+ size_t diff_from_nt_len, diff_to_nt_len;
+
+ if (uncompress_or_copy(&diff_from_nt, &diff_from_nt_len,
+ job->diff_from) < 0) {
+ return WQ_RPL_REPLY;
+ }
+ if (uncompress_or_copy(&diff_to_nt, &diff_to_nt_len,
+ job->diff_to) < 0) {
+ tor_free(diff_from_nt);
+ return WQ_RPL_REPLY;
+ }
+ tor_assert(diff_from_nt);
+ tor_assert(diff_to_nt);
+
+ // XXXX ugh; this is going to calculate the SHA3 of both its
+ // XXXX inputs again, even though we already have that. Maybe it's time
+ // XXXX to change the API here?
+ consensus_diff = consensus_diff_generate(diff_from_nt, diff_to_nt);
+ tor_free(diff_from_nt);
+ tor_free(diff_to_nt);
+ }
+ if (!consensus_diff) {
+ /* Couldn't generate consensus; we'll leave the reply blank. */
+ return WQ_RPL_REPLY;
+ }
+
+ /* Compress the results and send the reply */
+ tor_assert(compress_diffs_with[0] == NO_METHOD);
+ size_t difflen = strlen(consensus_diff);
+ job->out[0].body = (uint8_t *) consensus_diff;
+ job->out[0].bodylen = difflen;
+
+ config_line_t *common_labels = NULL;
+ if (lv_to_valid_until)
+ config_line_prepend(&common_labels, LABEL_VALID_UNTIL, lv_to_valid_until);
+ if (lv_to_fresh_until)
+ config_line_prepend(&common_labels, LABEL_FRESH_UNTIL, lv_to_fresh_until);
+ if (lv_to_signatories)
+ config_line_prepend(&common_labels, LABEL_SIGNATORIES, lv_to_signatories);
+ cdm_labels_prepend_sha3(&common_labels,
+ LABEL_SHA3_DIGEST_UNCOMPRESSED,
+ job->out[0].body,
+ job->out[0].bodylen);
+ config_line_prepend(&common_labels, LABEL_FROM_VALID_AFTER,
+ lv_from_valid_after);
+ config_line_prepend(&common_labels, LABEL_VALID_AFTER,
+ lv_to_valid_after);
+ config_line_prepend(&common_labels, LABEL_FLAVOR, lv_from_flavor);
+ config_line_prepend(&common_labels, LABEL_FROM_SHA3_DIGEST,
+ lv_from_digest);
+ config_line_prepend(&common_labels, LABEL_TARGET_SHA3_DIGEST,
+ lv_to_digest);
+ config_line_prepend(&common_labels, LABEL_DOCTYPE,
+ DOCTYPE_CONSENSUS_DIFF);
+
+ job->out[0].labels = config_lines_dup(common_labels);
+ cdm_labels_prepend_sha3(&job->out[0].labels,
+ LABEL_SHA3_DIGEST,
+ job->out[0].body,
+ job->out[0].bodylen);
+
+ compress_multiple(job->out+1,
+ n_diff_compression_methods()-1,
+ compress_diffs_with+1,
+ (const uint8_t*)consensus_diff, difflen, common_labels);
+
+ config_free_lines(common_labels);
+ return WQ_RPL_REPLY;
+}
+
+#define consensus_diff_worker_job_free(job) \
+ FREE_AND_NULL(consensus_diff_worker_job_t, \
+ consensus_diff_worker_job_free_, (job))
+
+/**
+ * Helper: release all storage held in <b>job</b>.
+ */
+static void
+consensus_diff_worker_job_free_(consensus_diff_worker_job_t *job)
+{
+ if (!job)
+ return;
+ unsigned u;
+ for (u = 0; u < n_diff_compression_methods(); ++u) {
+ config_free_lines(job->out[u].labels);
+ tor_free(job->out[u].body);
+ }
+ consensus_cache_entry_decref(job->diff_from);
+ consensus_cache_entry_decref(job->diff_to);
+ tor_free(job);
+}
+
+/**
+ * Worker function: This function runs in the main thread, and receives
+ * a consensus_diff_worker_job_t that the worker thread has already
+ * processed.
+ */
+static void
+consensus_diff_worker_replyfn(void *work_)
+{
+ tor_assert(in_main_thread());
+ tor_assert(work_);
+
+ consensus_diff_worker_job_t *job = work_;
+
+ const char *lv_from_digest =
+ consensus_cache_entry_get_value(job->diff_from,
+ LABEL_SHA3_DIGEST_AS_SIGNED);
+ const char *lv_to_digest =
+ consensus_cache_entry_get_value(job->diff_to,
+ LABEL_SHA3_DIGEST_UNCOMPRESSED);
+ const char *lv_flavor =
+ consensus_cache_entry_get_value(job->diff_to, LABEL_FLAVOR);
+ if (BUG(lv_from_digest == NULL))
+ lv_from_digest = "???"; // LCOV_EXCL_LINE
+ if (BUG(lv_to_digest == NULL))
+ lv_to_digest = "???"; // LCOV_EXCL_LINE
+
+ uint8_t from_sha3[DIGEST256_LEN];
+ uint8_t to_sha3[DIGEST256_LEN];
+ int flav = -1;
+ int cache = 1;
+ if (BUG(cdm_entry_get_sha3_value(from_sha3, job->diff_from,
+ LABEL_SHA3_DIGEST_AS_SIGNED) < 0))
+ cache = 0;
+ if (BUG(cdm_entry_get_sha3_value(to_sha3, job->diff_to,
+ LABEL_SHA3_DIGEST_UNCOMPRESSED) < 0))
+ cache = 0;
+ if (BUG(lv_flavor == NULL)) {
+ cache = 0;
+ } else if ((flav = networkstatus_parse_flavor_name(lv_flavor)) < 0) {
+ cache = 0;
+ }
+
+ consensus_cache_entry_handle_t *handles[ARRAY_LENGTH(compress_diffs_with)];
+ memset(handles, 0, sizeof(handles));
+
+ char description[128];
+ tor_snprintf(description, sizeof(description),
+ "consensus diff from %s to %s",
+ lv_from_digest, lv_to_digest);
+
+ int status = store_multiple(handles,
+ n_diff_compression_methods(),
+ compress_diffs_with,
+ job->out,
+ description);
+
+ if (status != CDM_DIFF_PRESENT) {
+ /* Failure! Nothing to do but complain */
+ log_warn(LD_DIRSERV,
+ "Worker was unable to compute consensus diff "
+ "from %s to %s", lv_from_digest, lv_to_digest);
+ /* Cache this error so we don't try to compute this one again. */
+ status = CDM_DIFF_ERROR;
+ }
+
+ unsigned u;
+ for (u = 0; u < ARRAY_LENGTH(handles); ++u) {
+ compress_method_t method = compress_diffs_with[u];
+ if (cache) {
+ consensus_cache_entry_handle_t *h = handles[u];
+ int this_status = status;
+ if (h == NULL) {
+ this_status = CDM_DIFF_ERROR;
+ }
+ tor_assert_nonfatal(h != NULL || this_status == CDM_DIFF_ERROR);
+ cdm_diff_ht_set_status(flav, from_sha3, to_sha3, method, this_status, h);
+ } else {
+ consensus_cache_entry_handle_free(handles[u]);
+ }
+ }
+
+ consensus_diff_worker_job_free(job);
+}
+
+/**
+ * Queue the job of computing the diff from <b>diff_from</b> to <b>diff_to</b>
+ * in a worker thread.
+ */
+static int
+consensus_diff_queue_diff_work(consensus_cache_entry_t *diff_from,
+ consensus_cache_entry_t *diff_to)
+{
+ tor_assert(in_main_thread());
+
+ consensus_cache_entry_incref(diff_from);
+ consensus_cache_entry_incref(diff_to);
+
+ consensus_diff_worker_job_t *job = tor_malloc_zero(sizeof(*job));
+ job->diff_from = diff_from;
+ job->diff_to = diff_to;
+
+ /* Make sure body is mapped. */
+ const uint8_t *body;
+ size_t bodylen;
+ int r1 = consensus_cache_entry_get_body(diff_from, &body, &bodylen);
+ int r2 = consensus_cache_entry_get_body(diff_to, &body, &bodylen);
+ if (r1 < 0 || r2 < 0)
+ goto err;
+
+ workqueue_entry_t *work;
+ work = cpuworker_queue_work(WQ_PRI_LOW,
+ consensus_diff_worker_threadfn,
+ consensus_diff_worker_replyfn,
+ job);
+ if (!work)
+ goto err;
+
+ return 0;
+ err:
+ consensus_diff_worker_job_free(job); // includes decrefs.
+ return -1;
+}
+
+/**
+ * Holds requests and replies for consensus_compress_workers.
+ */
+typedef struct consensus_compress_worker_job_t {
+ char *consensus;
+ size_t consensus_len;
+ consensus_flavor_t flavor;
+ config_line_t *labels_in;
+ compressed_result_t out[ARRAY_LENGTH(compress_consensus_with)];
+} consensus_compress_worker_job_t;
+
+#define consensus_compress_worker_job_free(job) \
+ FREE_AND_NULL(consensus_compress_worker_job_t, \
+ consensus_compress_worker_job_free_, (job))
+
+/**
+ * Free all resources held in <b>job</b>
+ */
+static void
+consensus_compress_worker_job_free_(consensus_compress_worker_job_t *job)
+{
+ if (!job)
+ return;
+ tor_free(job->consensus);
+ config_free_lines(job->labels_in);
+ unsigned u;
+ for (u = 0; u < n_consensus_compression_methods(); ++u) {
+ config_free_lines(job->out[u].labels);
+ tor_free(job->out[u].body);
+ }
+ tor_free(job);
+}
+/**
+ * Worker function. This function runs inside a worker thread and receives
+ * a consensus_compress_worker_job_t as its input.
+ */
+static workqueue_reply_t
+consensus_compress_worker_threadfn(void *state_, void *work_)
+{
+ (void)state_;
+ consensus_compress_worker_job_t *job = work_;
+ consensus_flavor_t flavor = job->flavor;
+ const char *consensus = job->consensus;
+ size_t bodylen = job->consensus_len;
+
+ config_line_t *labels = config_lines_dup(job->labels_in);
+ const char *flavname = networkstatus_get_flavor_name(flavor);
+
+ cdm_labels_prepend_sha3(&labels, LABEL_SHA3_DIGEST_UNCOMPRESSED,
+ (const uint8_t *)consensus, bodylen);
+ {
+ const char *start, *end;
+ if (router_get_networkstatus_v3_signed_boundaries(consensus,
+ &start, &end) < 0) {
+ start = consensus;
+ end = consensus+bodylen;
+ }
+ cdm_labels_prepend_sha3(&labels, LABEL_SHA3_DIGEST_AS_SIGNED,
+ (const uint8_t *)start,
+ end - start);
+ }
+ config_line_prepend(&labels, LABEL_FLAVOR, flavname);
+ config_line_prepend(&labels, LABEL_DOCTYPE, DOCTYPE_CONSENSUS);
+
+ compress_multiple(job->out,
+ n_consensus_compression_methods(),
+ compress_consensus_with,
+ (const uint8_t*)consensus, bodylen, labels);
+ config_free_lines(labels);
+ return WQ_RPL_REPLY;
+}
+
+/**
+ * Worker function: This function runs in the main thread, and receives
+ * a consensus_diff_compress_job_t that the worker thread has already
+ * processed.
+ */
+static void
+consensus_compress_worker_replyfn(void *work_)
+{
+ consensus_compress_worker_job_t *job = work_;
+
+ consensus_cache_entry_handle_t *handles[
+ ARRAY_LENGTH(compress_consensus_with)];
+ memset(handles, 0, sizeof(handles));
+
+ store_multiple(handles,
+ n_consensus_compression_methods(),
+ compress_consensus_with,
+ job->out,
+ "consensus");
+ mark_cdm_cache_dirty();
+
+ unsigned u;
+ consensus_flavor_t f = job->flavor;
+ tor_assert((int)f < N_CONSENSUS_FLAVORS);
+ for (u = 0; u < ARRAY_LENGTH(handles); ++u) {
+ if (handles[u] == NULL)
+ continue;
+ consensus_cache_entry_handle_free(latest_consensus[f][u]);
+ latest_consensus[f][u] = handles[u];
+ }
+
+ consensus_compress_worker_job_free(job);
+}
+
+/**
+ * If true, we compress in worker threads.
+ */
+static int background_compression = 0;
+
+/**
+ * Queue a job to compress <b>consensus</b> and store its compressed
+ * text in the cache.
+ */
+static int
+consensus_queue_compression_work(const char *consensus,
+ const networkstatus_t *as_parsed)
+{
+ tor_assert(consensus);
+ tor_assert(as_parsed);
+
+ consensus_compress_worker_job_t *job = tor_malloc_zero(sizeof(*job));
+ job->consensus = tor_strdup(consensus);
+ job->consensus_len = strlen(consensus);
+ job->flavor = as_parsed->flavor;
+
+ char va_str[ISO_TIME_LEN+1];
+ char vu_str[ISO_TIME_LEN+1];
+ char fu_str[ISO_TIME_LEN+1];
+ format_iso_time_nospace(va_str, as_parsed->valid_after);
+ format_iso_time_nospace(fu_str, as_parsed->fresh_until);
+ format_iso_time_nospace(vu_str, as_parsed->valid_until);
+ config_line_append(&job->labels_in, LABEL_VALID_AFTER, va_str);
+ config_line_append(&job->labels_in, LABEL_FRESH_UNTIL, fu_str);
+ config_line_append(&job->labels_in, LABEL_VALID_UNTIL, vu_str);
+ if (as_parsed->voters) {
+ smartlist_t *hexvoters = smartlist_new();
+ SMARTLIST_FOREACH_BEGIN(as_parsed->voters,
+ networkstatus_voter_info_t *, vi) {
+ if (smartlist_len(vi->sigs) == 0)
+ continue; // didn't sign.
+ char d[HEX_DIGEST_LEN+1];
+ base16_encode(d, sizeof(d), vi->identity_digest, DIGEST_LEN);
+ smartlist_add_strdup(hexvoters, d);
+ } SMARTLIST_FOREACH_END(vi);
+ char *signers = smartlist_join_strings(hexvoters, ",", 0, NULL);
+ config_line_prepend(&job->labels_in, LABEL_SIGNATORIES, signers);
+ tor_free(signers);
+ SMARTLIST_FOREACH(hexvoters, char *, cp, tor_free(cp));
+ smartlist_free(hexvoters);
+ }
+
+ if (background_compression) {
+ workqueue_entry_t *work;
+ work = cpuworker_queue_work(WQ_PRI_LOW,
+ consensus_compress_worker_threadfn,
+ consensus_compress_worker_replyfn,
+ job);
+ if (!work) {
+ consensus_compress_worker_job_free(job);
+ return -1;
+ }
+
+ return 0;
+ } else {
+ consensus_compress_worker_threadfn(NULL, job);
+ consensus_compress_worker_replyfn(job);
+ return 0;
+ }
+}
+
+/**
+ * Tell the consdiffmgr backend to compress consensuses in worker threads.
+ */
+void
+consdiffmgr_enable_background_compression(void)
+{
+ // This isn't the default behavior because it would break unit tests.
+ background_compression = 1;
+}
+
+/** Read the set of voters from the cached object <b>ent</b> into
+ * <b>out</b>, as a list of hex-encoded digests. Return 0 on success,
+ * -1 if no signatories were recorded. */
+int
+consensus_cache_entry_get_voter_id_digests(const consensus_cache_entry_t *ent,
+ smartlist_t *out)
+{
+ tor_assert(ent);
+ tor_assert(out);
+ const char *s;
+ s = consensus_cache_entry_get_value(ent, LABEL_SIGNATORIES);
+ if (s == NULL)
+ return -1;
+ smartlist_split_string(out, s, ",", SPLIT_SKIP_SPACE|SPLIT_STRIP_SPACE, 0);
+ return 0;
+}
+
+/** Read the fresh-until time of cached object <b>ent</b> into *<b>out</b>
+ * and return 0, or return -1 if no such time was recorded. */
+int
+consensus_cache_entry_get_fresh_until(const consensus_cache_entry_t *ent,
+ time_t *out)
+{
+ tor_assert(ent);
+ tor_assert(out);
+ const char *s;
+ s = consensus_cache_entry_get_value(ent, LABEL_FRESH_UNTIL);
+ if (s == NULL || parse_iso_time_nospace(s, out) < 0)
+ return -1;
+ else
+ return 0;
+}
+
+/** Read the valid until timestamp from the cached object <b>ent</b> into
+ * *<b>out</b> and return 0, or return -1 if no such time was recorded. */
+int
+consensus_cache_entry_get_valid_until(const consensus_cache_entry_t *ent,
+ time_t *out)
+{
+ tor_assert(ent);
+ tor_assert(out);
+
+ const char *s;
+ s = consensus_cache_entry_get_value(ent, LABEL_VALID_UNTIL);
+ if (s == NULL || parse_iso_time_nospace(s, out) < 0)
+ return -1;
+ else
+ return 0;
+}
+
+/** Read the valid after timestamp from the cached object <b>ent</b> into
+ * *<b>out</b> and return 0, or return -1 if no such time was recorded. */
+int
+consensus_cache_entry_get_valid_after(const consensus_cache_entry_t *ent,
+ time_t *out)
+{
+ tor_assert(ent);
+ tor_assert(out);
+
+ const char *s;
+ s = consensus_cache_entry_get_value(ent, LABEL_VALID_AFTER);
+
+ if (s == NULL || parse_iso_time_nospace(s, out) < 0)
+ return -1;
+ else
+ return 0;
+}
diff --git a/src/feature/dircache/consdiffmgr.h b/src/feature/dircache/consdiffmgr.h
new file mode 100644
index 0000000000..66c3d65002
--- /dev/null
+++ b/src/feature/dircache/consdiffmgr.h
@@ -0,0 +1,75 @@
+/* Copyright (c) 2017-2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+#ifndef TOR_CONSDIFFMGR_H
+#define TOR_CONSDIFFMGR_H
+
+enum compress_method_t;
+
+/**
+ * Possible outcomes from trying to look up a given consensus diff.
+ */
+typedef enum consdiff_status_t {
+ CONSDIFF_AVAILABLE,
+ CONSDIFF_NOT_FOUND,
+ CONSDIFF_IN_PROGRESS,
+} consdiff_status_t;
+
+typedef struct consdiff_cfg_t {
+ int32_t cache_max_num;
+} consdiff_cfg_t;
+
+struct consensus_cache_entry_t; // from conscache.h
+
+int consdiffmgr_add_consensus(const char *consensus,
+ const networkstatus_t *as_parsed);
+
+consdiff_status_t consdiffmgr_find_consensus(
+ struct consensus_cache_entry_t **entry_out,
+ consensus_flavor_t flavor,
+ enum compress_method_t method);
+
+consdiff_status_t consdiffmgr_find_diff_from(
+ struct consensus_cache_entry_t **entry_out,
+ consensus_flavor_t flavor,
+ int digest_type,
+ const uint8_t *digest,
+ size_t digestlen,
+ enum compress_method_t method);
+
+int consensus_cache_entry_get_voter_id_digests(
+ const struct consensus_cache_entry_t *ent,
+ smartlist_t *out);
+int consensus_cache_entry_get_fresh_until(
+ const struct consensus_cache_entry_t *ent,
+ time_t *out);
+int consensus_cache_entry_get_valid_until(
+ const struct consensus_cache_entry_t *ent,
+ time_t *out);
+int consensus_cache_entry_get_valid_after(
+ const struct consensus_cache_entry_t *ent,
+ time_t *out);
+
+void consdiffmgr_rescan(void);
+int consdiffmgr_cleanup(void);
+void consdiffmgr_enable_background_compression(void);
+void consdiffmgr_configure(const consdiff_cfg_t *cfg);
+struct sandbox_cfg_elem;
+int consdiffmgr_register_with_sandbox(struct sandbox_cfg_elem **cfg);
+void consdiffmgr_free_all(void);
+int consdiffmgr_validate(void);
+
+#ifdef CONSDIFFMGR_PRIVATE
+STATIC unsigned n_diff_compression_methods(void);
+STATIC unsigned n_consensus_compression_methods(void);
+STATIC consensus_cache_t *cdm_cache_get(void);
+STATIC consensus_cache_entry_t *cdm_cache_lookup_consensus(
+ consensus_flavor_t flavor, time_t valid_after);
+STATIC int cdm_entry_get_sha3_value(uint8_t *digest_out,
+ consensus_cache_entry_t *ent,
+ const char *label);
+STATIC int uncompress_or_copy(char **out, size_t *outlen,
+ consensus_cache_entry_t *ent);
+#endif /* defined(CONSDIFFMGR_PRIVATE) */
+
+#endif /* !defined(TOR_CONSDIFFMGR_H) */
diff --git a/src/feature/dircache/dircache.c b/src/feature/dircache/dircache.c
new file mode 100644
index 0000000000..872a88018f
--- /dev/null
+++ b/src/feature/dircache/dircache.c
@@ -0,0 +1,1740 @@
+/* Copyright (c) 2001-2004, Roger Dingledine.
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+#define DIRCACHE_PRIVATE
+
+#include "core/or/or.h"
+
+#include "app/config/config.h"
+#include "core/mainloop/connection.h"
+#include "core/or/relay.h"
+#include "feature/dirauth/dirvote.h"
+#include "feature/dirauth/authmode.h"
+#include "feature/dirauth/process_descs.h"
+#include "feature/dircache/conscache.h"
+#include "feature/dircache/consdiffmgr.h"
+#include "feature/dircache/dircache.h"
+#include "feature/dircache/dirserv.h"
+#include "feature/dircommon/directory.h"
+#include "feature/dircommon/fp_pair.h"
+#include "feature/hs/hs_cache.h"
+#include "feature/nodelist/authcert.h"
+#include "feature/nodelist/networkstatus.h"
+#include "feature/nodelist/routerlist.h"
+#include "feature/relay/routermode.h"
+#include "feature/rend/rendcache.h"
+#include "feature/stats/geoip_stats.h"
+#include "feature/stats/rephist.h"
+#include "lib/compress/compress.h"
+
+#include "feature/dircache/cached_dir_st.h"
+#include "feature/dircommon/dir_connection_st.h"
+#include "feature/nodelist/authority_cert_st.h"
+#include "feature/nodelist/networkstatus_st.h"
+#include "feature/nodelist/routerinfo_st.h"
+
+/** Maximum size, in bytes, for any directory object that we're accepting
+ * as an upload. */
+#define MAX_DIR_UL_SIZE ((1<<24)-1) /* 16MB-1 */
+
+/** HTTP cache control: how long do we tell proxies they can cache each
+ * kind of document we serve? */
+#define FULL_DIR_CACHE_LIFETIME (60*60)
+#define RUNNINGROUTERS_CACHE_LIFETIME (20*60)
+#define DIRPORTFRONTPAGE_CACHE_LIFETIME (20*60)
+#define NETWORKSTATUS_CACHE_LIFETIME (5*60)
+#define ROUTERDESC_CACHE_LIFETIME (30*60)
+#define ROUTERDESC_BY_DIGEST_CACHE_LIFETIME (48*60*60)
+#define ROBOTS_CACHE_LIFETIME (24*60*60)
+#define MICRODESC_CACHE_LIFETIME (48*60*60)
+
+/** Parse an HTTP request string <b>headers</b> of the form
+ * \verbatim
+ * "\%s [http[s]://]\%s HTTP/1..."
+ * \endverbatim
+ * If it's well-formed, strdup the second \%s into *<b>url</b>, and
+ * nul-terminate it. If the url doesn't start with "/tor/", rewrite it
+ * so it does. Return 0.
+ * Otherwise, return -1.
+ */
+STATIC int
+parse_http_url(const char *headers, char **url)
+{
+ char *command = NULL;
+ if (parse_http_command(headers, &command, url) < 0) {
+ return -1;
+ }
+ if (strcmpstart(*url, "/tor/")) {
+ char *new_url = NULL;
+ tor_asprintf(&new_url, "/tor%s%s",
+ *url[0] == '/' ? "" : "/",
+ *url);
+ tor_free(*url);
+ *url = new_url;
+ }
+ tor_free(command);
+ return 0;
+}
+
+/** Create an http response for the client <b>conn</b> out of
+ * <b>status</b> and <b>reason_phrase</b>. Write it to <b>conn</b>.
+ */
+static void
+write_short_http_response(dir_connection_t *conn, int status,
+ const char *reason_phrase)
+{
+ char *buf = NULL;
+ char *datestring = NULL;
+
+ IF_BUG_ONCE(!reason_phrase) { /* bullet-proofing */
+ reason_phrase = "unspecified";
+ }
+
+ if (server_mode(get_options())) {
+ /* include the Date: header, but only if we're a relay or bridge */
+ char datebuf[RFC1123_TIME_LEN+1];
+ format_rfc1123_time(datebuf, time(NULL));
+ tor_asprintf(&datestring, "Date: %s\r\n", datebuf);
+ }
+
+ tor_asprintf(&buf, "HTTP/1.0 %d %s\r\n%s\r\n",
+ status, reason_phrase, datestring?datestring:"");
+
+ log_debug(LD_DIRSERV,"Wrote status 'HTTP/1.0 %d %s'", status, reason_phrase);
+ connection_buf_add(buf, strlen(buf), TO_CONN(conn));
+
+ tor_free(datestring);
+ tor_free(buf);
+}
+
+/** Write the header for an HTTP/1.0 response onto <b>conn</b>-\>outbuf,
+ * with <b>type</b> as the Content-Type.
+ *
+ * If <b>length</b> is nonnegative, it is the Content-Length.
+ * If <b>encoding</b> is provided, it is the Content-Encoding.
+ * If <b>cache_lifetime</b> is greater than 0, the content may be cached for
+ * up to cache_lifetime seconds. Otherwise, the content may not be cached. */
+static void
+write_http_response_header_impl(dir_connection_t *conn, ssize_t length,
+ const char *type, const char *encoding,
+ const char *extra_headers,
+ long cache_lifetime)
+{
+ char date[RFC1123_TIME_LEN+1];
+ time_t now = time(NULL);
+ buf_t *buf = buf_new_with_capacity(1024);
+
+ tor_assert(conn);
+
+ format_rfc1123_time(date, now);
+
+ buf_add_printf(buf, "HTTP/1.0 200 OK\r\nDate: %s\r\n", date);
+ if (type) {
+ buf_add_printf(buf, "Content-Type: %s\r\n", type);
+ }
+ if (!is_local_addr(&conn->base_.addr)) {
+ /* Don't report the source address for a nearby/private connection.
+ * Otherwise we tend to mis-report in cases where incoming ports are
+ * being forwarded to a Tor server running behind the firewall. */
+ buf_add_printf(buf, X_ADDRESS_HEADER "%s\r\n", conn->base_.address);
+ }
+ if (encoding) {
+ buf_add_printf(buf, "Content-Encoding: %s\r\n", encoding);
+ }
+ if (length >= 0) {
+ buf_add_printf(buf, "Content-Length: %ld\r\n", (long)length);
+ }
+ if (cache_lifetime > 0) {
+ char expbuf[RFC1123_TIME_LEN+1];
+ format_rfc1123_time(expbuf, (time_t)(now + cache_lifetime));
+ /* We could say 'Cache-control: max-age=%d' here if we start doing
+ * http/1.1 */
+ buf_add_printf(buf, "Expires: %s\r\n", expbuf);
+ } else if (cache_lifetime == 0) {
+ /* We could say 'Cache-control: no-cache' here if we start doing
+ * http/1.1 */
+ buf_add_string(buf, "Pragma: no-cache\r\n");
+ }
+ if (extra_headers) {
+ buf_add_string(buf, extra_headers);
+ }
+ buf_add_string(buf, "\r\n");
+
+ connection_buf_add_buf(TO_CONN(conn), buf);
+ buf_free(buf);
+}
+
+/** As write_http_response_header_impl, but sets encoding and content-typed
+ * based on whether the response will be <b>compressed</b> or not. */
+static void
+write_http_response_headers(dir_connection_t *conn, ssize_t length,
+ compress_method_t method,
+ const char *extra_headers, long cache_lifetime)
+{
+ const char *methodname = compression_method_get_name(method);
+ const char *doctype;
+ if (method == NO_METHOD)
+ doctype = "text/plain";
+ else
+ doctype = "application/octet-stream";
+ write_http_response_header_impl(conn, length,
+ doctype,
+ methodname,
+ extra_headers,
+ cache_lifetime);
+}
+
+/** As write_http_response_headers, but assumes extra_headers is NULL */
+static void
+write_http_response_header(dir_connection_t *conn, ssize_t length,
+ compress_method_t method,
+ long cache_lifetime)
+{
+ write_http_response_headers(conn, length, method, NULL, cache_lifetime);
+}
+
+/** Array of compression methods to use (if supported) for serving
+ * precompressed data, ordered from best to worst. */
+static compress_method_t srv_meth_pref_precompressed[] = {
+ LZMA_METHOD,
+ ZSTD_METHOD,
+ ZLIB_METHOD,
+ GZIP_METHOD,
+ NO_METHOD
+};
+
+/** Array of compression methods to use (if supported) for serving
+ * streamed data, ordered from best to worst. */
+static compress_method_t srv_meth_pref_streaming_compression[] = {
+ ZSTD_METHOD,
+ ZLIB_METHOD,
+ GZIP_METHOD,
+ NO_METHOD
+};
+
+/** Parse the compression methods listed in an Accept-Encoding header <b>h</b>,
+ * and convert them to a bitfield where compression method x is supported if
+ * and only if 1 &lt;&lt; x is set in the bitfield. */
+STATIC unsigned
+parse_accept_encoding_header(const char *h)
+{
+ unsigned result = (1u << NO_METHOD);
+ smartlist_t *methods = smartlist_new();
+ smartlist_split_string(methods, h, ",",
+ SPLIT_SKIP_SPACE|SPLIT_STRIP_SPACE|SPLIT_IGNORE_BLANK, 0);
+
+ SMARTLIST_FOREACH_BEGIN(methods, const char *, m) {
+ compress_method_t method = compression_method_get_by_name(m);
+ if (method != UNKNOWN_METHOD) {
+ tor_assert(((unsigned)method) < 8*sizeof(unsigned));
+ result |= (1u << method);
+ }
+ } SMARTLIST_FOREACH_END(m);
+ SMARTLIST_FOREACH_BEGIN(methods, char *, m) {
+ tor_free(m);
+ } SMARTLIST_FOREACH_END(m);
+ smartlist_free(methods);
+ return result;
+}
+
+/** Decide whether a client would accept the consensus we have.
+ *
+ * Clients can say they only want a consensus if it's signed by more
+ * than half the authorities in a list. They pass this list in
+ * the url as "...consensus/<b>fpr</b>+<b>fpr</b>+<b>fpr</b>".
+ *
+ * <b>fpr</b> may be an abbreviated fingerprint, i.e. only a left substring
+ * of the full authority identity digest. (Only strings of even length,
+ * i.e. encodings of full bytes, are handled correctly. In the case
+ * of an odd number of hex digits the last one is silently ignored.)
+ *
+ * Returns 1 if more than half of the requested authorities signed the
+ * consensus, 0 otherwise.
+ */
+static int
+client_likes_consensus(const struct consensus_cache_entry_t *ent,
+ const char *want_url)
+{
+ smartlist_t *voters = smartlist_new();
+ int need_at_least;
+ int have = 0;
+
+ if (consensus_cache_entry_get_voter_id_digests(ent, voters) != 0) {
+ smartlist_free(voters);
+ return 1; // We don't know the voters; assume the client won't mind. */
+ }
+
+ smartlist_t *want_authorities = smartlist_new();
+ dir_split_resource_into_fingerprints(want_url, want_authorities, NULL, 0);
+ need_at_least = smartlist_len(want_authorities)/2+1;
+
+ SMARTLIST_FOREACH_BEGIN(want_authorities, const char *, want_digest) {
+
+ SMARTLIST_FOREACH_BEGIN(voters, const char *, digest) {
+ if (!strcasecmpstart(digest, want_digest)) {
+ have++;
+ break;
+ };
+ } SMARTLIST_FOREACH_END(digest);
+
+ /* early exit, if we already have enough */
+ if (have >= need_at_least)
+ break;
+ } SMARTLIST_FOREACH_END(want_digest);
+
+ SMARTLIST_FOREACH(want_authorities, char *, d, tor_free(d));
+ smartlist_free(want_authorities);
+ SMARTLIST_FOREACH(voters, char *, cp, tor_free(cp));
+ smartlist_free(voters);
+ return (have >= need_at_least);
+}
+
+/** Return the compression level we should use for sending a compressed
+ * response of size <b>n_bytes</b>. */
+STATIC compression_level_t
+choose_compression_level(ssize_t n_bytes)
+{
+ if (! have_been_under_memory_pressure()) {
+ return HIGH_COMPRESSION; /* we have plenty of RAM. */
+ } else if (n_bytes < 0) {
+ return HIGH_COMPRESSION; /* unknown; might be big. */
+ } else if (n_bytes < 1024) {
+ return LOW_COMPRESSION;
+ } else if (n_bytes < 2048) {
+ return MEDIUM_COMPRESSION;
+ } else {
+ return HIGH_COMPRESSION;
+ }
+}
+
+/** Information passed to handle a GET request. */
+typedef struct get_handler_args_t {
+ /** Bitmask of compression methods that the client said (or implied) it
+ * supported. */
+ unsigned compression_supported;
+ /** If nonzero, the time included an if-modified-since header with this
+ * value. */
+ time_t if_modified_since;
+ /** String containing the requested URL or resource. */
+ const char *url;
+ /** String containing the HTTP headers */
+ const char *headers;
+} get_handler_args_t;
+
+/** Entry for handling an HTTP GET request.
+ *
+ * This entry matches a request if "string" is equal to the requested
+ * resource, or if "is_prefix" is true and "string" is a prefix of the
+ * requested resource.
+ *
+ * The 'handler' function is called to handle the request. It receives
+ * an arguments structure, and must return 0 on success or -1 if we should
+ * close the connection.
+ **/
+typedef struct url_table_ent_s {
+ const char *string;
+ int is_prefix;
+ int (*handler)(dir_connection_t *conn, const get_handler_args_t *args);
+} url_table_ent_t;
+
+static int handle_get_frontpage(dir_connection_t *conn,
+ const get_handler_args_t *args);
+static int handle_get_current_consensus(dir_connection_t *conn,
+ const get_handler_args_t *args);
+static int handle_get_status_vote(dir_connection_t *conn,
+ const get_handler_args_t *args);
+static int handle_get_microdesc(dir_connection_t *conn,
+ const get_handler_args_t *args);
+static int handle_get_descriptor(dir_connection_t *conn,
+ const get_handler_args_t *args);
+static int handle_get_keys(dir_connection_t *conn,
+ const get_handler_args_t *args);
+static int handle_get_hs_descriptor_v2(dir_connection_t *conn,
+ const get_handler_args_t *args);
+static int handle_get_robots(dir_connection_t *conn,
+ const get_handler_args_t *args);
+static int handle_get_networkstatus_bridges(dir_connection_t *conn,
+ const get_handler_args_t *args);
+
+/** Table for handling GET requests. */
+static const url_table_ent_t url_table[] = {
+ { "/tor/", 0, handle_get_frontpage },
+ { "/tor/status-vote/current/consensus", 1, handle_get_current_consensus },
+ { "/tor/status-vote/current/", 1, handle_get_status_vote },
+ { "/tor/status-vote/next/", 1, handle_get_status_vote },
+ { "/tor/micro/d/", 1, handle_get_microdesc },
+ { "/tor/server/", 1, handle_get_descriptor },
+ { "/tor/extra/", 1, handle_get_descriptor },
+ { "/tor/keys/", 1, handle_get_keys },
+ { "/tor/rendezvous2/", 1, handle_get_hs_descriptor_v2 },
+ { "/tor/hs/3/", 1, handle_get_hs_descriptor_v3 },
+ { "/tor/robots.txt", 0, handle_get_robots },
+ { "/tor/networkstatus-bridges", 0, handle_get_networkstatus_bridges },
+ { NULL, 0, NULL },
+};
+
+/** Helper function: called when a dirserver gets a complete HTTP GET
+ * request. Look for a request for a directory or for a rendezvous
+ * service descriptor. On finding one, write a response into
+ * conn-\>outbuf. If the request is unrecognized, send a 404.
+ * Return 0 if we handled this successfully, or -1 if we need to close
+ * the connection. */
+MOCK_IMPL(STATIC int,
+directory_handle_command_get,(dir_connection_t *conn, const char *headers,
+ const char *req_body, size_t req_body_len))
+{
+ char *url, *url_mem, *header;
+ time_t if_modified_since = 0;
+ int zlib_compressed_in_url;
+ unsigned compression_methods_supported;
+
+ /* We ignore the body of a GET request. */
+ (void)req_body;
+ (void)req_body_len;
+
+ log_debug(LD_DIRSERV,"Received GET command.");
+
+ conn->base_.state = DIR_CONN_STATE_SERVER_WRITING;
+
+ if (parse_http_url(headers, &url) < 0) {
+ write_short_http_response(conn, 400, "Bad request");
+ return 0;
+ }
+ if ((header = http_get_header(headers, "If-Modified-Since: "))) {
+ struct tm tm;
+ if (parse_http_time(header, &tm) == 0) {
+ if (tor_timegm(&tm, &if_modified_since)<0) {
+ if_modified_since = 0;
+ } else {
+ log_debug(LD_DIRSERV, "If-Modified-Since is '%s'.", escaped(header));
+ }
+ }
+ /* The correct behavior on a malformed If-Modified-Since header is to
+ * act as if no If-Modified-Since header had been given. */
+ tor_free(header);
+ }
+ log_debug(LD_DIRSERV,"rewritten url as '%s'.", escaped(url));
+
+ url_mem = url;
+ {
+ size_t url_len = strlen(url);
+
+ zlib_compressed_in_url = url_len > 2 && !strcmp(url+url_len-2, ".z");
+ if (zlib_compressed_in_url) {
+ url[url_len-2] = '\0';
+ }
+ }
+
+ if ((header = http_get_header(headers, "Accept-Encoding: "))) {
+ compression_methods_supported = parse_accept_encoding_header(header);
+ tor_free(header);
+ } else {
+ compression_methods_supported = (1u << NO_METHOD);
+ }
+ if (zlib_compressed_in_url) {
+ compression_methods_supported |= (1u << ZLIB_METHOD);
+ }
+
+ /* Remove all methods that we don't both support. */
+ compression_methods_supported &= tor_compress_get_supported_method_bitmask();
+
+ get_handler_args_t args;
+ args.url = url;
+ args.headers = headers;
+ args.if_modified_since = if_modified_since;
+ args.compression_supported = compression_methods_supported;
+
+ int i, result = -1;
+ for (i = 0; url_table[i].string; ++i) {
+ int match;
+ if (url_table[i].is_prefix) {
+ match = !strcmpstart(url, url_table[i].string);
+ } else {
+ match = !strcmp(url, url_table[i].string);
+ }
+ if (match) {
+ result = url_table[i].handler(conn, &args);
+ goto done;
+ }
+ }
+
+ /* we didn't recognize the url */
+ write_short_http_response(conn, 404, "Not found");
+ result = 0;
+
+ done:
+ tor_free(url_mem);
+ return result;
+}
+
+/** Helper function for GET / or GET /tor/
+ */
+static int
+handle_get_frontpage(dir_connection_t *conn, const get_handler_args_t *args)
+{
+ (void) args; /* unused */
+ const char *frontpage = get_dirportfrontpage();
+
+ if (frontpage) {
+ size_t dlen;
+ dlen = strlen(frontpage);
+ /* Let's return a disclaimer page (users shouldn't use V1 anymore,
+ and caches don't fetch '/', so this is safe). */
+
+ /* [We don't check for write_bucket_low here, since we want to serve
+ * this page no matter what.] */
+ write_http_response_header_impl(conn, dlen, "text/html", "identity",
+ NULL, DIRPORTFRONTPAGE_CACHE_LIFETIME);
+ connection_buf_add(frontpage, dlen, TO_CONN(conn));
+ } else {
+ write_short_http_response(conn, 404, "Not found");
+ }
+ return 0;
+}
+
+/** Warn that the cached consensus <b>consensus</b> of type
+ * <b>flavor</b> is too old and will not be served to clients. Rate-limit the
+ * warning to avoid logging an entry on every request.
+ */
+static void
+warn_consensus_is_too_old(const struct consensus_cache_entry_t *consensus,
+ const char *flavor, time_t now)
+{
+#define TOO_OLD_WARNING_INTERVAL (60*60)
+ static ratelim_t warned = RATELIM_INIT(TOO_OLD_WARNING_INTERVAL);
+ char timestamp[ISO_TIME_LEN+1];
+ time_t valid_until;
+ char *dupes;
+
+ if (consensus_cache_entry_get_valid_until(consensus, &valid_until))
+ return;
+
+ if ((dupes = rate_limit_log(&warned, now))) {
+ format_local_iso_time(timestamp, valid_until);
+ log_warn(LD_DIRSERV, "Our %s%sconsensus is too old, so we will not "
+ "serve it to clients. It was valid until %s local time and we "
+ "continued to serve it for up to 24 hours after it expired.%s",
+ flavor ? flavor : "", flavor ? " " : "", timestamp, dupes);
+ tor_free(dupes);
+ }
+}
+
+/**
+ * Parse a single hex-encoded sha3-256 digest from <b>hex</b> into
+ * <b>digest</b>. Return 0 on success. On failure, report that the hash came
+ * from <b>location</b>, report that we are taking <b>action</b> with it, and
+ * return -1.
+ */
+static int
+parse_one_diff_hash(uint8_t *digest, const char *hex, const char *location,
+ const char *action)
+{
+ if (base16_decode((char*)digest, DIGEST256_LEN, hex, strlen(hex)) ==
+ DIGEST256_LEN) {
+ return 0;
+ } else {
+ log_fn(LOG_PROTOCOL_WARN, LD_DIR,
+ "%s contained bogus digest %s; %s.",
+ location, escaped(hex), action);
+ return -1;
+ }
+}
+
+/** If there is an X-Or-Diff-From-Consensus header included in <b>headers</b>,
+ * set <b>digest_out<b> to a new smartlist containing every 256-bit
+ * hex-encoded digest listed in that header and return 0. Otherwise return
+ * -1. */
+static int
+parse_or_diff_from_header(smartlist_t **digests_out, const char *headers)
+{
+ char *hdr = http_get_header(headers, X_OR_DIFF_FROM_CONSENSUS_HEADER);
+ if (hdr == NULL) {
+ return -1;
+ }
+ smartlist_t *hex_digests = smartlist_new();
+ *digests_out = smartlist_new();
+ smartlist_split_string(hex_digests, hdr, " ",
+ SPLIT_SKIP_SPACE|SPLIT_IGNORE_BLANK, -1);
+ SMARTLIST_FOREACH_BEGIN(hex_digests, const char *, hex) {
+ uint8_t digest[DIGEST256_LEN];
+ if (!parse_one_diff_hash(digest, hex, "X-Or-Diff-From-Consensus header",
+ "ignoring")) {
+ smartlist_add(*digests_out, tor_memdup(digest, sizeof(digest)));
+ }
+ } SMARTLIST_FOREACH_END(hex);
+ SMARTLIST_FOREACH(hex_digests, char *, cp, tor_free(cp));
+ smartlist_free(hex_digests);
+ tor_free(hdr);
+ return 0;
+}
+
+/** Fallback compression method. The fallback compression method is used in
+ * case a client requests a non-compressed document. We only store compressed
+ * documents, so we use this compression method to fetch the document and let
+ * the spooling system do the streaming decompression.
+ */
+#define FALLBACK_COMPRESS_METHOD ZLIB_METHOD
+
+/**
+ * Try to find the best consensus diff possible in order to serve a client
+ * request for a diff from one of the consensuses in <b>digests</b> to the
+ * current consensus of flavor <b>flav</b>. The client supports the
+ * compression methods listed in the <b>compression_methods</b> bitfield:
+ * place the method chosen (if any) into <b>compression_used_out</b>.
+ */
+static struct consensus_cache_entry_t *
+find_best_diff(const smartlist_t *digests, int flav,
+ unsigned compression_methods,
+ compress_method_t *compression_used_out)
+{
+ struct consensus_cache_entry_t *result = NULL;
+
+ SMARTLIST_FOREACH_BEGIN(digests, const uint8_t *, diff_from) {
+ unsigned u;
+ for (u = 0; u < ARRAY_LENGTH(srv_meth_pref_precompressed); ++u) {
+ compress_method_t method = srv_meth_pref_precompressed[u];
+ if (0 == (compression_methods & (1u<<method)))
+ continue; // client doesn't like this one, or we don't have it.
+ if (consdiffmgr_find_diff_from(&result, flav, DIGEST_SHA3_256,
+ diff_from, DIGEST256_LEN,
+ method) == CONSDIFF_AVAILABLE) {
+ tor_assert_nonfatal(result);
+ *compression_used_out = method;
+ return result;
+ }
+ }
+ } SMARTLIST_FOREACH_END(diff_from);
+
+ SMARTLIST_FOREACH_BEGIN(digests, const uint8_t *, diff_from) {
+ if (consdiffmgr_find_diff_from(&result, flav, DIGEST_SHA3_256, diff_from,
+ DIGEST256_LEN, FALLBACK_COMPRESS_METHOD) == CONSDIFF_AVAILABLE) {
+ tor_assert_nonfatal(result);
+ *compression_used_out = FALLBACK_COMPRESS_METHOD;
+ return result;
+ }
+ } SMARTLIST_FOREACH_END(diff_from);
+
+ return NULL;
+}
+
+/** Lookup the cached consensus document by the flavor found in <b>flav</b>.
+ * The preferred set of compression methods should be listed in the
+ * <b>compression_methods</b> bitfield. The compression method chosen (if any)
+ * is stored in <b>compression_used_out</b>. */
+static struct consensus_cache_entry_t *
+find_best_consensus(int flav,
+ unsigned compression_methods,
+ compress_method_t *compression_used_out)
+{
+ struct consensus_cache_entry_t *result = NULL;
+ unsigned u;
+
+ for (u = 0; u < ARRAY_LENGTH(srv_meth_pref_precompressed); ++u) {
+ compress_method_t method = srv_meth_pref_precompressed[u];
+
+ if (0 == (compression_methods & (1u<<method)))
+ continue;
+
+ if (consdiffmgr_find_consensus(&result, flav,
+ method) == CONSDIFF_AVAILABLE) {
+ tor_assert_nonfatal(result);
+ *compression_used_out = method;
+ return result;
+ }
+ }
+
+ if (consdiffmgr_find_consensus(&result, flav,
+ FALLBACK_COMPRESS_METHOD) == CONSDIFF_AVAILABLE) {
+ tor_assert_nonfatal(result);
+ *compression_used_out = FALLBACK_COMPRESS_METHOD;
+ return result;
+ }
+
+ return NULL;
+}
+
+/** Try to find the best supported compression method possible from a given
+ * <b>compression_methods</b>. Return NO_METHOD if no mutually supported
+ * compression method could be found. */
+static compress_method_t
+find_best_compression_method(unsigned compression_methods, int stream)
+{
+ unsigned u;
+ compress_method_t *methods;
+ size_t length;
+
+ if (stream) {
+ methods = srv_meth_pref_streaming_compression;
+ length = ARRAY_LENGTH(srv_meth_pref_streaming_compression);
+ } else {
+ methods = srv_meth_pref_precompressed;
+ length = ARRAY_LENGTH(srv_meth_pref_precompressed);
+ }
+
+ for (u = 0; u < length; ++u) {
+ compress_method_t method = methods[u];
+ if (compression_methods & (1u<<method))
+ return method;
+ }
+
+ return NO_METHOD;
+}
+
+/** Check if any of the digests in <b>digests</b> matches the latest consensus
+ * flavor (given in <b>flavor</b>) that we have available. */
+static int
+digest_list_contains_best_consensus(consensus_flavor_t flavor,
+ const smartlist_t *digests)
+{
+ const networkstatus_t *ns = NULL;
+
+ if (digests == NULL)
+ return 0;
+
+ ns = networkstatus_get_latest_consensus_by_flavor(flavor);
+
+ if (ns == NULL)
+ return 0;
+
+ SMARTLIST_FOREACH_BEGIN(digests, const uint8_t *, digest) {
+ if (tor_memeq(ns->digest_sha3_as_signed, digest, DIGEST256_LEN))
+ return 1;
+ } SMARTLIST_FOREACH_END(digest);
+
+ return 0;
+}
+
+/** Encodes the results of parsing a consensus request to figure out what
+ * consensus, and possibly what diffs, the user asked for. */
+typedef struct {
+ /** name of the flavor to retrieve. */
+ char *flavor;
+ /** flavor to retrive, as enum. */
+ consensus_flavor_t flav;
+ /** plus-separated list of authority fingerprints; see
+ * client_likes_consensus(). Aliases the URL in the request passed to
+ * parse_consensus_request(). */
+ const char *want_fps;
+ /** Optionally, a smartlist of sha3 digests-as-signed of the consensuses
+ * to return a diff from. */
+ smartlist_t *diff_from_digests;
+ /** If true, never send a full consensus. If there is no diff, send
+ * a 404 instead. */
+ int diff_only;
+} parsed_consensus_request_t;
+
+/** Remove all data held in <b>req</b>. Do not free <b>req</b> itself, since
+ * it is stack-allocated. */
+static void
+parsed_consensus_request_clear(parsed_consensus_request_t *req)
+{
+ if (!req)
+ return;
+ tor_free(req->flavor);
+ if (req->diff_from_digests) {
+ SMARTLIST_FOREACH(req->diff_from_digests, uint8_t *, d, tor_free(d));
+ smartlist_free(req->diff_from_digests);
+ }
+ memset(req, 0, sizeof(parsed_consensus_request_t));
+}
+
+/**
+ * Parse the URL and relevant headers of <b>args</b> for a current-consensus
+ * request to learn what flavor of consensus we want, what keys it must be
+ * signed with, and what diffs we would accept (or demand) instead. Return 0
+ * on success and -1 on failure.
+ */
+static int
+parse_consensus_request(parsed_consensus_request_t *out,
+ const get_handler_args_t *args)
+{
+ const char *url = args->url;
+ memset(out, 0, sizeof(parsed_consensus_request_t));
+ out->flav = FLAV_NS;
+
+ const char CONSENSUS_URL_PREFIX[] = "/tor/status-vote/current/consensus/";
+ const char CONSENSUS_FLAVORED_PREFIX[] =
+ "/tor/status-vote/current/consensus-";
+
+ /* figure out the flavor if any, and who we wanted to sign the thing */
+ const char *after_flavor = NULL;
+
+ if (!strcmpstart(url, CONSENSUS_FLAVORED_PREFIX)) {
+ const char *f, *cp;
+ f = url + strlen(CONSENSUS_FLAVORED_PREFIX);
+ cp = strchr(f, '/');
+ if (cp) {
+ after_flavor = cp+1;
+ out->flavor = tor_strndup(f, cp-f);
+ } else {
+ out->flavor = tor_strdup(f);
+ }
+ int flav = networkstatus_parse_flavor_name(out->flavor);
+ if (flav < 0)
+ flav = FLAV_NS;
+ out->flav = flav;
+ } else {
+ if (!strcmpstart(url, CONSENSUS_URL_PREFIX))
+ after_flavor = url+strlen(CONSENSUS_URL_PREFIX);
+ }
+
+ /* see whether we've been asked explicitly for a diff from an older
+ * consensus. (The user might also have said that a diff would be okay,
+ * via X-Or-Diff-From-Consensus */
+ const char DIFF_COMPONENT[] = "diff/";
+ char *diff_hash_in_url = NULL;
+ if (after_flavor && !strcmpstart(after_flavor, DIFF_COMPONENT)) {
+ after_flavor += strlen(DIFF_COMPONENT);
+ const char *cp = strchr(after_flavor, '/');
+ if (cp) {
+ diff_hash_in_url = tor_strndup(after_flavor, cp-after_flavor);
+ out->want_fps = cp+1;
+ } else {
+ diff_hash_in_url = tor_strdup(after_flavor);
+ out->want_fps = NULL;
+ }
+ } else {
+ out->want_fps = after_flavor;
+ }
+
+ if (diff_hash_in_url) {
+ uint8_t diff_from[DIGEST256_LEN];
+ out->diff_from_digests = smartlist_new();
+ out->diff_only = 1;
+ int ok = !parse_one_diff_hash(diff_from, diff_hash_in_url, "URL",
+ "rejecting");
+ tor_free(diff_hash_in_url);
+ if (ok) {
+ smartlist_add(out->diff_from_digests,
+ tor_memdup(diff_from, DIGEST256_LEN));
+ } else {
+ return -1;
+ }
+ } else {
+ parse_or_diff_from_header(&out->diff_from_digests, args->headers);
+ }
+
+ return 0;
+}
+
+/** Helper function for GET /tor/status-vote/current/consensus
+ */
+static int
+handle_get_current_consensus(dir_connection_t *conn,
+ const get_handler_args_t *args)
+{
+ const compress_method_t compress_method =
+ find_best_compression_method(args->compression_supported, 0);
+ const time_t if_modified_since = args->if_modified_since;
+ int clear_spool = 0;
+
+ /* v3 network status fetch. */
+ long lifetime = NETWORKSTATUS_CACHE_LIFETIME;
+
+ time_t now = time(NULL);
+ parsed_consensus_request_t req;
+
+ if (parse_consensus_request(&req, args) < 0) {
+ write_short_http_response(conn, 404, "Couldn't parse request");
+ goto done;
+ }
+
+ if (digest_list_contains_best_consensus(req.flav,
+ req.diff_from_digests)) {
+ write_short_http_response(conn, 304, "Not modified");
+ geoip_note_ns_response(GEOIP_REJECT_NOT_MODIFIED);
+ goto done;
+ }
+
+ struct consensus_cache_entry_t *cached_consensus = NULL;
+
+ compress_method_t compression_used = NO_METHOD;
+ if (req.diff_from_digests) {
+ cached_consensus = find_best_diff(req.diff_from_digests, req.flav,
+ args->compression_supported,
+ &compression_used);
+ }
+
+ if (req.diff_only && !cached_consensus) {
+ write_short_http_response(conn, 404, "No such diff available");
+ // XXXX warn_consensus_is_too_old(v, req.flavor, now);
+ geoip_note_ns_response(GEOIP_REJECT_NOT_FOUND);
+ goto done;
+ }
+
+ if (! cached_consensus) {
+ cached_consensus = find_best_consensus(req.flav,
+ args->compression_supported,
+ &compression_used);
+ }
+
+ time_t fresh_until, valid_until;
+ int have_fresh_until = 0, have_valid_until = 0;
+ if (cached_consensus) {
+ have_fresh_until =
+ !consensus_cache_entry_get_fresh_until(cached_consensus, &fresh_until);
+ have_valid_until =
+ !consensus_cache_entry_get_valid_until(cached_consensus, &valid_until);
+ }
+
+ if (cached_consensus && have_valid_until &&
+ !networkstatus_valid_until_is_reasonably_live(valid_until, now)) {
+ write_short_http_response(conn, 404, "Consensus is too old");
+ warn_consensus_is_too_old(cached_consensus, req.flavor, now);
+ geoip_note_ns_response(GEOIP_REJECT_NOT_FOUND);
+ goto done;
+ }
+
+ if (cached_consensus && req.want_fps &&
+ !client_likes_consensus(cached_consensus, req.want_fps)) {
+ write_short_http_response(conn, 404, "Consensus not signed by sufficient "
+ "number of requested authorities");
+ geoip_note_ns_response(GEOIP_REJECT_NOT_ENOUGH_SIGS);
+ goto done;
+ }
+
+ conn->spool = smartlist_new();
+ clear_spool = 1;
+ {
+ spooled_resource_t *spooled;
+ if (cached_consensus) {
+ spooled = spooled_resource_new_from_cache_entry(cached_consensus);
+ smartlist_add(conn->spool, spooled);
+ }
+ }
+
+ lifetime = (have_fresh_until && fresh_until > now) ? fresh_until - now : 0;
+
+ size_t size_guess = 0;
+ int n_expired = 0;
+ dirserv_spool_remove_missing_and_guess_size(conn, if_modified_since,
+ compress_method != NO_METHOD,
+ &size_guess,
+ &n_expired);
+
+ if (!smartlist_len(conn->spool) && !n_expired) {
+ write_short_http_response(conn, 404, "Not found");
+ geoip_note_ns_response(GEOIP_REJECT_NOT_FOUND);
+ goto done;
+ } else if (!smartlist_len(conn->spool)) {
+ write_short_http_response(conn, 304, "Not modified");
+ geoip_note_ns_response(GEOIP_REJECT_NOT_MODIFIED);
+ goto done;
+ }
+
+ if (global_write_bucket_low(TO_CONN(conn), size_guess, 2)) {
+ log_debug(LD_DIRSERV,
+ "Client asked for network status lists, but we've been "
+ "writing too many bytes lately. Sending 503 Dir busy.");
+ write_short_http_response(conn, 503, "Directory busy, try again later");
+ geoip_note_ns_response(GEOIP_REJECT_BUSY);
+ goto done;
+ }
+
+ tor_addr_t addr;
+ if (tor_addr_parse(&addr, (TO_CONN(conn))->address) >= 0) {
+ geoip_note_client_seen(GEOIP_CLIENT_NETWORKSTATUS,
+ &addr, NULL,
+ time(NULL));
+ geoip_note_ns_response(GEOIP_SUCCESS);
+ /* Note that a request for a network status has started, so that we
+ * can measure the download time later on. */
+ if (conn->dirreq_id)
+ geoip_start_dirreq(conn->dirreq_id, size_guess, DIRREQ_TUNNELED);
+ else
+ geoip_start_dirreq(TO_CONN(conn)->global_identifier, size_guess,
+ DIRREQ_DIRECT);
+ }
+
+ /* Use this header to tell caches that the response depends on the
+ * X-Or-Diff-From-Consensus header (or lack thereof). */
+ const char vary_header[] = "Vary: X-Or-Diff-From-Consensus\r\n";
+
+ clear_spool = 0;
+
+ // The compress_method might have been NO_METHOD, but we store the data
+ // compressed. Decompress them using `compression_used`. See fallback code in
+ // find_best_consensus() and find_best_diff().
+ write_http_response_headers(conn, -1,
+ compress_method == NO_METHOD ?
+ NO_METHOD : compression_used,
+ vary_header,
+ smartlist_len(conn->spool) == 1 ? lifetime : 0);
+
+ if (compress_method == NO_METHOD && smartlist_len(conn->spool))
+ conn->compress_state = tor_compress_new(0, compression_used,
+ HIGH_COMPRESSION);
+
+ /* Prime the connection with some data. */
+ const int initial_flush_result = connection_dirserv_flushed_some(conn);
+ tor_assert_nonfatal(initial_flush_result == 0);
+ goto done;
+
+ done:
+ parsed_consensus_request_clear(&req);
+ if (clear_spool) {
+ dir_conn_clear_spool(conn);
+ }
+ return 0;
+}
+
+/** Helper function for GET /tor/status-vote/{current,next}/...
+ */
+static int
+handle_get_status_vote(dir_connection_t *conn, const get_handler_args_t *args)
+{
+ const char *url = args->url;
+ {
+ ssize_t body_len = 0;
+ ssize_t estimated_len = 0;
+ int lifetime = 60; /* XXXX?? should actually use vote intervals. */
+ /* This smartlist holds strings that we can compress on the fly. */
+ smartlist_t *items = smartlist_new();
+ /* This smartlist holds cached_dir_t objects that have a precompressed
+ * deflated version. */
+ smartlist_t *dir_items = smartlist_new();
+ dirvote_dirreq_get_status_vote(url, items, dir_items);
+ if (!smartlist_len(dir_items) && !smartlist_len(items)) {
+ write_short_http_response(conn, 404, "Not found");
+ goto vote_done;
+ }
+
+ /* We're sending items from at most one kind of source */
+ tor_assert_nonfatal(smartlist_len(items) == 0 ||
+ smartlist_len(dir_items) == 0);
+
+ int streaming;
+ unsigned mask;
+ if (smartlist_len(items)) {
+ /* We're taking strings and compressing them on the fly. */
+ streaming = 1;
+ mask = ~0u;
+ } else {
+ /* We're taking cached_dir_t objects. We only have them uncompressed
+ * or deflated. */
+ streaming = 0;
+ mask = (1u<<NO_METHOD) | (1u<<ZLIB_METHOD);
+ }
+ const compress_method_t compress_method = find_best_compression_method(
+ args->compression_supported&mask, streaming);
+
+ SMARTLIST_FOREACH(dir_items, cached_dir_t *, d,
+ body_len += compress_method != NO_METHOD ?
+ d->dir_compressed_len : d->dir_len);
+ estimated_len += body_len;
+ SMARTLIST_FOREACH(items, const char *, item, {
+ size_t ln = strlen(item);
+ if (compress_method != NO_METHOD) {
+ estimated_len += ln/2;
+ } else {
+ body_len += ln; estimated_len += ln;
+ }
+ });
+
+ if (global_write_bucket_low(TO_CONN(conn), estimated_len, 2)) {
+ write_short_http_response(conn, 503, "Directory busy, try again later");
+ goto vote_done;
+ }
+ write_http_response_header(conn, body_len ? body_len : -1,
+ compress_method,
+ lifetime);
+
+ if (smartlist_len(items)) {
+ if (compress_method != NO_METHOD) {
+ conn->compress_state = tor_compress_new(1, compress_method,
+ choose_compression_level(estimated_len));
+ SMARTLIST_FOREACH(items, const char *, c,
+ connection_buf_add_compress(c, strlen(c), conn, 0));
+ connection_buf_add_compress("", 0, conn, 1);
+ } else {
+ SMARTLIST_FOREACH(items, const char *, c,
+ connection_buf_add(c, strlen(c), TO_CONN(conn)));
+ }
+ } else {
+ SMARTLIST_FOREACH(dir_items, cached_dir_t *, d,
+ connection_buf_add(compress_method != NO_METHOD ?
+ d->dir_compressed : d->dir,
+ compress_method != NO_METHOD ?
+ d->dir_compressed_len : d->dir_len,
+ TO_CONN(conn)));
+ }
+ vote_done:
+ smartlist_free(items);
+ smartlist_free(dir_items);
+ goto done;
+ }
+ done:
+ return 0;
+}
+
+/** Helper function for GET /tor/micro/d/...
+ */
+static int
+handle_get_microdesc(dir_connection_t *conn, const get_handler_args_t *args)
+{
+ const char *url = args->url;
+ const compress_method_t compress_method =
+ find_best_compression_method(args->compression_supported, 1);
+ int clear_spool = 1;
+ {
+ conn->spool = smartlist_new();
+
+ dir_split_resource_into_spoolable(url+strlen("/tor/micro/d/"),
+ DIR_SPOOL_MICRODESC,
+ conn->spool, NULL,
+ DSR_DIGEST256|DSR_BASE64|DSR_SORT_UNIQ);
+
+ size_t size_guess = 0;
+ dirserv_spool_remove_missing_and_guess_size(conn, 0,
+ compress_method != NO_METHOD,
+ &size_guess, NULL);
+ if (smartlist_len(conn->spool) == 0) {
+ write_short_http_response(conn, 404, "Not found");
+ goto done;
+ }
+ if (global_write_bucket_low(TO_CONN(conn), size_guess, 2)) {
+ log_info(LD_DIRSERV,
+ "Client asked for server descriptors, but we've been "
+ "writing too many bytes lately. Sending 503 Dir busy.");
+ write_short_http_response(conn, 503, "Directory busy, try again later");
+ goto done;
+ }
+
+ clear_spool = 0;
+ write_http_response_header(conn, -1,
+ compress_method,
+ MICRODESC_CACHE_LIFETIME);
+
+ if (compress_method != NO_METHOD)
+ conn->compress_state = tor_compress_new(1, compress_method,
+ choose_compression_level(size_guess));
+
+ const int initial_flush_result = connection_dirserv_flushed_some(conn);
+ tor_assert_nonfatal(initial_flush_result == 0);
+ goto done;
+ }
+
+ done:
+ if (clear_spool) {
+ dir_conn_clear_spool(conn);
+ }
+ return 0;
+}
+
+/** Helper function for GET /tor/{server,extra}/...
+ */
+static int
+handle_get_descriptor(dir_connection_t *conn, const get_handler_args_t *args)
+{
+ const char *url = args->url;
+ const compress_method_t compress_method =
+ find_best_compression_method(args->compression_supported, 1);
+ const or_options_t *options = get_options();
+ int clear_spool = 1;
+ if (!strcmpstart(url,"/tor/server/") ||
+ (!options->BridgeAuthoritativeDir &&
+ !options->BridgeRelay && !strcmpstart(url,"/tor/extra/"))) {
+ int res;
+ const char *msg = NULL;
+ int cache_lifetime = 0;
+ int is_extra = !strcmpstart(url,"/tor/extra/");
+ url += is_extra ? strlen("/tor/extra/") : strlen("/tor/server/");
+ dir_spool_source_t source;
+ time_t publish_cutoff = 0;
+ if (!strcmpstart(url, "d/")) {
+ source =
+ is_extra ? DIR_SPOOL_EXTRA_BY_DIGEST : DIR_SPOOL_SERVER_BY_DIGEST;
+ } else {
+ source =
+ is_extra ? DIR_SPOOL_EXTRA_BY_FP : DIR_SPOOL_SERVER_BY_FP;
+ /* We only want to apply a publish cutoff when we're requesting
+ * resources by fingerprint. */
+ publish_cutoff = time(NULL) - ROUTER_MAX_AGE_TO_PUBLISH;
+ }
+
+ conn->spool = smartlist_new();
+ res = dirserv_get_routerdesc_spool(conn->spool, url,
+ source,
+ connection_dir_is_encrypted(conn),
+ &msg);
+
+ if (!strcmpstart(url, "all")) {
+ cache_lifetime = FULL_DIR_CACHE_LIFETIME;
+ } else if (smartlist_len(conn->spool) == 1) {
+ cache_lifetime = ROUTERDESC_BY_DIGEST_CACHE_LIFETIME;
+ }
+
+ size_t size_guess = 0;
+ int n_expired = 0;
+ dirserv_spool_remove_missing_and_guess_size(conn, publish_cutoff,
+ compress_method != NO_METHOD,
+ &size_guess, &n_expired);
+
+ /* If we are the bridge authority and the descriptor is a bridge
+ * descriptor, remember that we served this descriptor for desc stats. */
+ /* XXXX it's a bit of a kludge to have this here. */
+ if (get_options()->BridgeAuthoritativeDir &&
+ source == DIR_SPOOL_SERVER_BY_FP) {
+ SMARTLIST_FOREACH_BEGIN(conn->spool, spooled_resource_t *, spooled) {
+ const routerinfo_t *router =
+ router_get_by_id_digest((const char *)spooled->digest);
+ /* router can be NULL here when the bridge auth is asked for its own
+ * descriptor. */
+ if (router && router->purpose == ROUTER_PURPOSE_BRIDGE)
+ rep_hist_note_desc_served(router->cache_info.identity_digest);
+ } SMARTLIST_FOREACH_END(spooled);
+ }
+
+ if (res < 0 || size_guess == 0 || smartlist_len(conn->spool) == 0) {
+ if (msg == NULL)
+ msg = "Not found";
+ write_short_http_response(conn, 404, msg);
+ } else {
+ if (global_write_bucket_low(TO_CONN(conn), size_guess, 2)) {
+ log_info(LD_DIRSERV,
+ "Client asked for server descriptors, but we've been "
+ "writing too many bytes lately. Sending 503 Dir busy.");
+ write_short_http_response(conn, 503,
+ "Directory busy, try again later");
+ dir_conn_clear_spool(conn);
+ goto done;
+ }
+ write_http_response_header(conn, -1, compress_method, cache_lifetime);
+ if (compress_method != NO_METHOD)
+ conn->compress_state = tor_compress_new(1, compress_method,
+ choose_compression_level(size_guess));
+ clear_spool = 0;
+ /* Prime the connection with some data. */
+ int initial_flush_result = connection_dirserv_flushed_some(conn);
+ tor_assert_nonfatal(initial_flush_result == 0);
+ }
+ goto done;
+ }
+ done:
+ if (clear_spool)
+ dir_conn_clear_spool(conn);
+ return 0;
+}
+
+/** Helper function for GET /tor/keys/...
+ */
+static int
+handle_get_keys(dir_connection_t *conn, const get_handler_args_t *args)
+{
+ const char *url = args->url;
+ const compress_method_t compress_method =
+ find_best_compression_method(args->compression_supported, 1);
+ const time_t if_modified_since = args->if_modified_since;
+ {
+ smartlist_t *certs = smartlist_new();
+ ssize_t len = -1;
+ if (!strcmp(url, "/tor/keys/all")) {
+ authority_cert_get_all(certs);
+ } else if (!strcmp(url, "/tor/keys/authority")) {
+ authority_cert_t *cert = get_my_v3_authority_cert();
+ if (cert)
+ smartlist_add(certs, cert);
+ } else if (!strcmpstart(url, "/tor/keys/fp/")) {
+ smartlist_t *fps = smartlist_new();
+ dir_split_resource_into_fingerprints(url+strlen("/tor/keys/fp/"),
+ fps, NULL,
+ DSR_HEX|DSR_SORT_UNIQ);
+ SMARTLIST_FOREACH(fps, char *, d, {
+ authority_cert_t *c = authority_cert_get_newest_by_id(d);
+ if (c) smartlist_add(certs, c);
+ tor_free(d);
+ });
+ smartlist_free(fps);
+ } else if (!strcmpstart(url, "/tor/keys/sk/")) {
+ smartlist_t *fps = smartlist_new();
+ dir_split_resource_into_fingerprints(url+strlen("/tor/keys/sk/"),
+ fps, NULL,
+ DSR_HEX|DSR_SORT_UNIQ);
+ SMARTLIST_FOREACH(fps, char *, d, {
+ authority_cert_t *c = authority_cert_get_by_sk_digest(d);
+ if (c) smartlist_add(certs, c);
+ tor_free(d);
+ });
+ smartlist_free(fps);
+ } else if (!strcmpstart(url, "/tor/keys/fp-sk/")) {
+ smartlist_t *fp_sks = smartlist_new();
+ dir_split_resource_into_fingerprint_pairs(url+strlen("/tor/keys/fp-sk/"),
+ fp_sks);
+ SMARTLIST_FOREACH(fp_sks, fp_pair_t *, pair, {
+ authority_cert_t *c = authority_cert_get_by_digests(pair->first,
+ pair->second);
+ if (c) smartlist_add(certs, c);
+ tor_free(pair);
+ });
+ smartlist_free(fp_sks);
+ } else {
+ write_short_http_response(conn, 400, "Bad request");
+ goto keys_done;
+ }
+ if (!smartlist_len(certs)) {
+ write_short_http_response(conn, 404, "Not found");
+ goto keys_done;
+ }
+ SMARTLIST_FOREACH(certs, authority_cert_t *, c,
+ if (c->cache_info.published_on < if_modified_since)
+ SMARTLIST_DEL_CURRENT(certs, c));
+ if (!smartlist_len(certs)) {
+ write_short_http_response(conn, 304, "Not modified");
+ goto keys_done;
+ }
+ len = 0;
+ SMARTLIST_FOREACH(certs, authority_cert_t *, c,
+ len += c->cache_info.signed_descriptor_len);
+
+ if (global_write_bucket_low(TO_CONN(conn),
+ compress_method != NO_METHOD ? len/2 : len,
+ 2)) {
+ write_short_http_response(conn, 503, "Directory busy, try again later");
+ goto keys_done;
+ }
+
+ write_http_response_header(conn,
+ compress_method != NO_METHOD ? -1 : len,
+ compress_method,
+ 60*60);
+ if (compress_method != NO_METHOD) {
+ conn->compress_state = tor_compress_new(1, compress_method,
+ choose_compression_level(len));
+ SMARTLIST_FOREACH(certs, authority_cert_t *, c,
+ connection_buf_add_compress(
+ c->cache_info.signed_descriptor_body,
+ c->cache_info.signed_descriptor_len,
+ conn, 0));
+ connection_buf_add_compress("", 0, conn, 1);
+ } else {
+ SMARTLIST_FOREACH(certs, authority_cert_t *, c,
+ connection_buf_add(c->cache_info.signed_descriptor_body,
+ c->cache_info.signed_descriptor_len,
+ TO_CONN(conn)));
+ }
+ keys_done:
+ smartlist_free(certs);
+ goto done;
+ }
+ done:
+ return 0;
+}
+
+/** Helper function for GET /tor/rendezvous2/
+ */
+static int
+handle_get_hs_descriptor_v2(dir_connection_t *conn,
+ const get_handler_args_t *args)
+{
+ const char *url = args->url;
+ if (connection_dir_is_encrypted(conn)) {
+ /* Handle v2 rendezvous descriptor fetch request. */
+ const char *descp;
+ const char *query = url + strlen("/tor/rendezvous2/");
+ if (rend_valid_descriptor_id(query)) {
+ log_info(LD_REND, "Got a v2 rendezvous descriptor request for ID '%s'",
+ safe_str(escaped(query)));
+ switch (rend_cache_lookup_v2_desc_as_dir(query, &descp)) {
+ case 1: /* valid */
+ write_http_response_header(conn, strlen(descp), NO_METHOD, 0);
+ connection_buf_add(descp, strlen(descp), TO_CONN(conn));
+ break;
+ case 0: /* well-formed but not present */
+ write_short_http_response(conn, 404, "Not found");
+ break;
+ case -1: /* not well-formed */
+ write_short_http_response(conn, 400, "Bad request");
+ break;
+ }
+ } else { /* not well-formed */
+ write_short_http_response(conn, 400, "Bad request");
+ }
+ goto done;
+ } else {
+ /* Not encrypted! */
+ write_short_http_response(conn, 404, "Not found");
+ }
+ done:
+ return 0;
+}
+
+/** Helper function for GET /tor/hs/3/<z>. Only for version 3.
+ */
+STATIC int
+handle_get_hs_descriptor_v3(dir_connection_t *conn,
+ const get_handler_args_t *args)
+{
+ int retval;
+ const char *desc_str = NULL;
+ const char *pubkey_str = NULL;
+ const char *url = args->url;
+
+ /* Reject unencrypted dir connections */
+ if (!connection_dir_is_encrypted(conn)) {
+ write_short_http_response(conn, 404, "Not found");
+ goto done;
+ }
+
+ /* After the path prefix follows the base64 encoded blinded pubkey which we
+ * use to get the descriptor from the cache. Skip the prefix and get the
+ * pubkey. */
+ tor_assert(!strcmpstart(url, "/tor/hs/3/"));
+ pubkey_str = url + strlen("/tor/hs/3/");
+ retval = hs_cache_lookup_as_dir(HS_VERSION_THREE,
+ pubkey_str, &desc_str);
+ if (retval <= 0 || desc_str == NULL) {
+ write_short_http_response(conn, 404, "Not found");
+ goto done;
+ }
+
+ /* Found requested descriptor! Pass it to this nice client. */
+ write_http_response_header(conn, strlen(desc_str), NO_METHOD, 0);
+ connection_buf_add(desc_str, strlen(desc_str), TO_CONN(conn));
+
+ done:
+ return 0;
+}
+
+/** Helper function for GET /tor/networkstatus-bridges
+ */
+static int
+handle_get_networkstatus_bridges(dir_connection_t *conn,
+ const get_handler_args_t *args)
+{
+ const char *headers = args->headers;
+
+ const or_options_t *options = get_options();
+ if (options->BridgeAuthoritativeDir &&
+ options->BridgePassword_AuthDigest_ &&
+ connection_dir_is_encrypted(conn)) {
+ char *status;
+ char digest[DIGEST256_LEN];
+
+ char *header = http_get_header(headers, "Authorization: Basic ");
+ if (header)
+ crypto_digest256(digest, header, strlen(header), DIGEST_SHA256);
+
+ /* now make sure the password is there and right */
+ if (!header ||
+ tor_memneq(digest,
+ options->BridgePassword_AuthDigest_, DIGEST256_LEN)) {
+ write_short_http_response(conn, 404, "Not found");
+ tor_free(header);
+ goto done;
+ }
+ tor_free(header);
+
+ /* all happy now. send an answer. */
+ status = networkstatus_getinfo_by_purpose("bridge", time(NULL));
+ size_t dlen = strlen(status);
+ write_http_response_header(conn, dlen, NO_METHOD, 0);
+ connection_buf_add(status, dlen, TO_CONN(conn));
+ tor_free(status);
+ goto done;
+ }
+ done:
+ return 0;
+}
+
+/** Helper function for GET robots.txt or /tor/robots.txt */
+static int
+handle_get_robots(dir_connection_t *conn, const get_handler_args_t *args)
+{
+ (void)args;
+ {
+ const char robots[] = "User-agent: *\r\nDisallow: /\r\n";
+ size_t len = strlen(robots);
+ write_http_response_header(conn, len, NO_METHOD, ROBOTS_CACHE_LIFETIME);
+ connection_buf_add(robots, len, TO_CONN(conn));
+ }
+ return 0;
+}
+
+/* Given the <b>url</b> from a POST request, try to extract the version number
+ * using the provided <b>prefix</b>. The version should be after the prefix and
+ * ending with the separator "/". For instance:
+ * /tor/hs/3/publish
+ *
+ * On success, <b>end_pos</b> points to the position right after the version
+ * was found. On error, it is set to NULL.
+ *
+ * Return version on success else negative value. */
+STATIC int
+parse_hs_version_from_post(const char *url, const char *prefix,
+ const char **end_pos)
+{
+ int ok;
+ unsigned long version;
+ const char *start;
+ char *end = NULL;
+
+ tor_assert(url);
+ tor_assert(prefix);
+ tor_assert(end_pos);
+
+ /* Check if the prefix does start the url. */
+ if (strcmpstart(url, prefix)) {
+ goto err;
+ }
+ /* Move pointer to the end of the prefix string. */
+ start = url + strlen(prefix);
+ /* Try this to be the HS version and if we are still at the separator, next
+ * will be move to the right value. */
+ version = tor_parse_long(start, 10, 0, INT_MAX, &ok, &end);
+ if (!ok) {
+ goto err;
+ }
+
+ *end_pos = end;
+ return (int) version;
+ err:
+ *end_pos = NULL;
+ return -1;
+}
+
+/* Handle the POST request for a hidden service descripror. The request is in
+ * <b>url</b>, the body of the request is in <b>body</b>. Return 200 on success
+ * else return 400 indicating a bad request. */
+STATIC int
+handle_post_hs_descriptor(const char *url, const char *body)
+{
+ int version;
+ const char *end_pos;
+
+ tor_assert(url);
+ tor_assert(body);
+
+ version = parse_hs_version_from_post(url, "/tor/hs/", &end_pos);
+ if (version < 0) {
+ goto err;
+ }
+
+ /* We have a valid version number, now make sure it's a publish request. Use
+ * the end position just after the version and check for the command. */
+ if (strcmpstart(end_pos, "/publish")) {
+ goto err;
+ }
+
+ switch (version) {
+ case HS_VERSION_THREE:
+ if (hs_cache_store_as_dir(body) < 0) {
+ goto err;
+ }
+ log_info(LD_REND, "Publish request for HS descriptor handled "
+ "successfully.");
+ break;
+ default:
+ /* Unsupported version, return a bad request. */
+ goto err;
+ }
+
+ return 200;
+ err:
+ /* Bad request. */
+ return 400;
+}
+
+/** Helper function: called when a dirserver gets a complete HTTP POST
+ * request. Look for an uploaded server descriptor or rendezvous
+ * service descriptor. On finding one, process it and write a
+ * response into conn-\>outbuf. If the request is unrecognized, send a
+ * 400. Always return 0. */
+MOCK_IMPL(STATIC int,
+directory_handle_command_post,(dir_connection_t *conn, const char *headers,
+ const char *body, size_t body_len))
+{
+ char *url = NULL;
+ const or_options_t *options = get_options();
+
+ log_debug(LD_DIRSERV,"Received POST command.");
+
+ conn->base_.state = DIR_CONN_STATE_SERVER_WRITING;
+
+ if (!public_server_mode(options)) {
+ log_info(LD_DIR, "Rejected dir post request from %s "
+ "since we're not a public relay.", conn->base_.address);
+ write_short_http_response(conn, 503, "Not acting as a public relay");
+ goto done;
+ }
+
+ if (parse_http_url(headers, &url) < 0) {
+ write_short_http_response(conn, 400, "Bad request");
+ return 0;
+ }
+ log_debug(LD_DIRSERV,"rewritten url as '%s'.", escaped(url));
+
+ /* Handle v2 rendezvous service publish request. */
+ if (connection_dir_is_encrypted(conn) &&
+ !strcmpstart(url,"/tor/rendezvous2/publish")) {
+ if (rend_cache_store_v2_desc_as_dir(body) < 0) {
+ log_warn(LD_REND, "Rejected v2 rend descriptor (body size %d) from %s.",
+ (int)body_len, conn->base_.address);
+ write_short_http_response(conn, 400,
+ "Invalid v2 service descriptor rejected");
+ } else {
+ write_short_http_response(conn, 200, "Service descriptor (v2) stored");
+ log_info(LD_REND, "Handled v2 rendezvous descriptor post: accepted");
+ }
+ goto done;
+ }
+
+ /* Handle HS descriptor publish request. */
+ /* XXX: This should be disabled with a consensus param until we want to
+ * the prop224 be deployed and thus use. */
+ if (connection_dir_is_encrypted(conn) && !strcmpstart(url, "/tor/hs/")) {
+ const char *msg = "HS descriptor stored successfully.";
+
+ /* We most probably have a publish request for an HS descriptor. */
+ int code = handle_post_hs_descriptor(url, body);
+ if (code != 200) {
+ msg = "Invalid HS descriptor. Rejected.";
+ }
+ write_short_http_response(conn, code, msg);
+ goto done;
+ }
+
+ if (!authdir_mode(options)) {
+ /* we just provide cached directories; we don't want to
+ * receive anything. */
+ write_short_http_response(conn, 400, "Nonauthoritative directory does not "
+ "accept posted server descriptors");
+ goto done;
+ }
+
+ if (authdir_mode(options) &&
+ !strcmp(url,"/tor/")) { /* server descriptor post */
+ const char *msg = "[None]";
+ uint8_t purpose = authdir_mode_bridge(options) ?
+ ROUTER_PURPOSE_BRIDGE : ROUTER_PURPOSE_GENERAL;
+ was_router_added_t r = dirserv_add_multiple_descriptors(body, purpose,
+ conn->base_.address, &msg);
+ tor_assert(msg);
+
+ if (r == ROUTER_ADDED_SUCCESSFULLY) {
+ write_short_http_response(conn, 200, msg);
+ } else if (WRA_WAS_OUTDATED(r)) {
+ write_http_response_header_impl(conn, -1, NULL, NULL,
+ "X-Descriptor-Not-New: Yes\r\n", -1);
+ } else {
+ log_info(LD_DIRSERV,
+ "Rejected router descriptor or extra-info from %s "
+ "(\"%s\").",
+ conn->base_.address, msg);
+ write_short_http_response(conn, 400, msg);
+ }
+ goto done;
+ }
+
+ if (authdir_mode_v3(options) &&
+ !strcmp(url,"/tor/post/vote")) { /* v3 networkstatus vote */
+ const char *msg = "OK";
+ int status;
+ if (dirvote_add_vote(body, &msg, &status)) {
+ write_short_http_response(conn, status, "Vote stored");
+ } else {
+ tor_assert(msg);
+ log_warn(LD_DIRSERV, "Rejected vote from %s (\"%s\").",
+ conn->base_.address, msg);
+ write_short_http_response(conn, status, msg);
+ }
+ goto done;
+ }
+
+ if (authdir_mode_v3(options) &&
+ !strcmp(url,"/tor/post/consensus-signature")) { /* sigs on consensus. */
+ const char *msg = NULL;
+ if (dirvote_add_signatures(body, conn->base_.address, &msg)>=0) {
+ write_short_http_response(conn, 200, msg?msg:"Signatures stored");
+ } else {
+ log_warn(LD_DIR, "Unable to store signatures posted by %s: %s",
+ conn->base_.address, msg?msg:"???");
+ write_short_http_response(conn, 400,
+ msg?msg:"Unable to store signatures");
+ }
+ goto done;
+ }
+
+ /* we didn't recognize the url */
+ write_short_http_response(conn, 404, "Not found");
+
+ done:
+ tor_free(url);
+ return 0;
+}
+
+/** If <b>headers</b> indicates that a proxy was involved, then rewrite
+ * <b>conn</b>-\>address to describe our best guess of the address that
+ * originated this HTTP request. */
+static void
+http_set_address_origin(const char *headers, connection_t *conn)
+{
+ char *fwd;
+
+ fwd = http_get_header(headers, "Forwarded-For: ");
+ if (!fwd)
+ fwd = http_get_header(headers, "X-Forwarded-For: ");
+ if (fwd) {
+ tor_addr_t toraddr;
+ if (tor_addr_parse(&toraddr,fwd) == -1 ||
+ tor_addr_is_internal(&toraddr,0)) {
+ log_debug(LD_DIR, "Ignoring local/internal IP %s", escaped(fwd));
+ tor_free(fwd);
+ return;
+ }
+
+ tor_free(conn->address);
+ conn->address = tor_strdup(fwd);
+ tor_free(fwd);
+ }
+}
+
+/** Called when a dirserver receives data on a directory connection;
+ * looks for an HTTP request. If the request is complete, remove it
+ * from the inbuf, try to process it; otherwise, leave it on the
+ * buffer. Return a 0 on success, or -1 on error.
+ */
+int
+directory_handle_command(dir_connection_t *conn)
+{
+ char *headers=NULL, *body=NULL;
+ size_t body_len=0;
+ int r;
+
+ tor_assert(conn);
+ tor_assert(conn->base_.type == CONN_TYPE_DIR);
+
+ switch (connection_fetch_from_buf_http(TO_CONN(conn),
+ &headers, MAX_HEADERS_SIZE,
+ &body, &body_len, MAX_DIR_UL_SIZE, 0)) {
+ case -1: /* overflow */
+ log_warn(LD_DIRSERV,
+ "Request too large from address '%s' to DirPort. Closing.",
+ safe_str(conn->base_.address));
+ return -1;
+ case 0:
+ log_debug(LD_DIRSERV,"command not all here yet.");
+ return 0;
+ /* case 1, fall through */
+ }
+
+ http_set_address_origin(headers, TO_CONN(conn));
+ // we should escape headers here as well,
+ // but we can't call escaped() twice, as it uses the same buffer
+ //log_debug(LD_DIRSERV,"headers %s, body %s.", headers, escaped(body));
+
+ if (!strncasecmp(headers,"GET",3))
+ r = directory_handle_command_get(conn, headers, body, body_len);
+ else if (!strncasecmp(headers,"POST",4))
+ r = directory_handle_command_post(conn, headers, body, body_len);
+ else {
+ log_fn(LOG_PROTOCOL_WARN, LD_PROTOCOL,
+ "Got headers %s with unknown command. Closing.",
+ escaped(headers));
+ r = -1;
+ }
+
+ tor_free(headers); tor_free(body);
+ return r;
+}
diff --git a/src/feature/dircache/dircache.h b/src/feature/dircache/dircache.h
new file mode 100644
index 0000000000..f05780375a
--- /dev/null
+++ b/src/feature/dircache/dircache.h
@@ -0,0 +1,43 @@
+/* Copyright (c) 2001 Matej Pfajfar.
+ * Copyright (c) 2001-2004, Roger Dingledine.
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file dircache.h
+ * \brief Header file for dircache.c.
+ **/
+
+#ifndef TOR_DIRCACHE_H
+#define TOR_DIRCACHE_H
+
+int directory_handle_command(dir_connection_t *conn);
+
+#ifdef DIRCACHE_PRIVATE
+MOCK_DECL(STATIC int, directory_handle_command_get,(dir_connection_t *conn,
+ const char *headers,
+ const char *req_body,
+ size_t req_body_len));
+MOCK_DECL(STATIC int, directory_handle_command_post,(dir_connection_t *conn,
+ const char *headers,
+ const char *body,
+ size_t body_len));
+
+STATIC int handle_post_hs_descriptor(const char *url, const char *body);
+enum compression_level_t;
+STATIC enum compression_level_t choose_compression_level(ssize_t n_bytes);
+
+struct get_handler_args_t;
+STATIC int handle_get_hs_descriptor_v3(dir_connection_t *conn,
+ const struct get_handler_args_t *args);
+
+STATIC int parse_http_url(const char *headers, char **url);
+
+STATIC int parse_hs_version_from_post(const char *url, const char *prefix,
+ const char **end_pos);
+
+STATIC unsigned parse_accept_encoding_header(const char *h);
+#endif
+
+#endif /* !defined(TOR_DIRCACHE_H) */
diff --git a/src/feature/dircache/dirserv.c b/src/feature/dircache/dirserv.c
new file mode 100644
index 0000000000..57178cd506
--- /dev/null
+++ b/src/feature/dircache/dirserv.c
@@ -0,0 +1,918 @@
+/* Copyright (c) 2001-2004, Roger Dingledine.
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+#define DIRSERV_PRIVATE
+#include "core/or/or.h"
+
+#include "app/config/config.h"
+#include "core/mainloop/connection.h"
+#include "feature/dircache/conscache.h"
+#include "feature/dircache/consdiffmgr.h"
+#include "feature/dircommon/directory.h"
+#include "feature/dircache/dirserv.h"
+#include "feature/nodelist/microdesc.h"
+#include "feature/nodelist/routerlist.h"
+#include "feature/relay/router.h"
+#include "feature/relay/routermode.h"
+#include "feature/stats/predict_ports.h"
+
+#include "feature/dircache/cached_dir_st.h"
+#include "feature/dircommon/dir_connection_st.h"
+#include "feature/nodelist/extrainfo_st.h"
+#include "feature/nodelist/microdesc_st.h"
+#include "feature/nodelist/routerinfo_st.h"
+#include "feature/nodelist/routerlist_st.h"
+
+#include "lib/compress/compress.h"
+
+/**
+ * \file dirserv.c
+ * \brief Directory server core implementation. Manages directory
+ * contents and generates directory documents.
+ *
+ * This module implements most of directory cache functionality, and some of
+ * the directory authority functionality. The directory.c module delegates
+ * here in order to handle incoming requests from clients, via
+ * connection_dirserv_flushed_some() and its kin. In order to save RAM, this
+ * module is responsible for spooling directory objects (in whole or in part)
+ * onto buf_t instances, and then closing the dir_connection_t once the
+ * objects are totally flushed.
+ *
+ * The directory.c module also delegates here for handling descriptor uploads
+ * via dirserv_add_multiple_descriptors().
+ *
+ * Additionally, this module handles some aspects of voting, including:
+ * deciding how to vote on individual flags (based on decisions reached in
+ * rephist.c), of formatting routerstatus lines, and deciding what relays to
+ * include in an authority's vote. (TODO: Those functions could profitably be
+ * split off. They only live in this file because historically they were
+ * shared among the v1, v2, and v3 directory code.)
+ */
+
+static void clear_cached_dir(cached_dir_t *d);
+static const signed_descriptor_t *get_signed_descriptor_by_fp(
+ const uint8_t *fp,
+ int extrainfo);
+
+static int spooled_resource_lookup_body(const spooled_resource_t *spooled,
+ int conn_is_encrypted,
+ const uint8_t **body_out,
+ size_t *size_out,
+ time_t *published_out);
+static cached_dir_t *spooled_resource_lookup_cached_dir(
+ const spooled_resource_t *spooled,
+ time_t *published_out);
+static cached_dir_t *lookup_cached_dir_by_fp(const uint8_t *fp);
+
+/********************************************************************/
+
+/* A set of functions to answer questions about how we'd like to behave
+ * as a directory mirror/client. */
+
+/** Return 1 if we fetch our directory material directly from the
+ * authorities, rather than from a mirror. */
+int
+directory_fetches_from_authorities(const or_options_t *options)
+{
+ const routerinfo_t *me;
+ uint32_t addr;
+ int refuseunknown;
+ if (options->FetchDirInfoEarly)
+ return 1;
+ if (options->BridgeRelay == 1)
+ return 0;
+ if (server_mode(options) &&
+ router_pick_published_address(options, &addr, 1) < 0)
+ return 1; /* we don't know our IP address; ask an authority. */
+ refuseunknown = ! router_my_exit_policy_is_reject_star() &&
+ should_refuse_unknown_exits(options);
+ if (!dir_server_mode(options) && !refuseunknown)
+ return 0;
+ if (!server_mode(options) || !advertised_server_mode())
+ return 0;
+ me = router_get_my_routerinfo();
+ if (!me || (!me->supports_tunnelled_dir_requests && !refuseunknown))
+ return 0; /* if we don't service directory requests, return 0 too */
+ return 1;
+}
+
+/** Return 1 if we should fetch new networkstatuses, descriptors, etc
+ * on the "mirror" schedule rather than the "client" schedule.
+ */
+int
+directory_fetches_dir_info_early(const or_options_t *options)
+{
+ return directory_fetches_from_authorities(options);
+}
+
+/** Return 1 if we should fetch new networkstatuses, descriptors, etc
+ * on a very passive schedule -- waiting long enough for ordinary clients
+ * to probably have the info we want. These would include bridge users,
+ * and maybe others in the future e.g. if a Tor client uses another Tor
+ * client as a directory guard.
+ */
+int
+directory_fetches_dir_info_later(const or_options_t *options)
+{
+ return options->UseBridges != 0;
+}
+
+/** Return true iff we want to serve certificates for authorities
+ * that we don't acknowledge as authorities ourself.
+ * Use we_want_to_fetch_unknown_auth_certs to check if we want to fetch
+ * and keep these certificates.
+ */
+int
+directory_caches_unknown_auth_certs(const or_options_t *options)
+{
+ return dir_server_mode(options) || options->BridgeRelay;
+}
+
+/** Return 1 if we want to fetch and serve descriptors, networkstatuses, etc
+ * Else return 0.
+ * Check options->DirPort_set and directory_permits_begindir_requests()
+ * to see if we are willing to serve these directory documents to others via
+ * the DirPort and begindir-over-ORPort, respectively.
+ *
+ * To check if we should fetch documents, use we_want_to_fetch_flavor and
+ * we_want_to_fetch_unknown_auth_certs instead of this function.
+ */
+int
+directory_caches_dir_info(const or_options_t *options)
+{
+ if (options->BridgeRelay || dir_server_mode(options))
+ return 1;
+ if (!server_mode(options) || !advertised_server_mode())
+ return 0;
+ /* We need an up-to-date view of network info if we're going to try to
+ * block exit attempts from unknown relays. */
+ return ! router_my_exit_policy_is_reject_star() &&
+ should_refuse_unknown_exits(options);
+}
+
+/** Return 1 if we want to allow remote clients to ask us directory
+ * requests via the "begin_dir" interface, which doesn't require
+ * having any separate port open. */
+int
+directory_permits_begindir_requests(const or_options_t *options)
+{
+ return options->BridgeRelay != 0 || dir_server_mode(options);
+}
+
+/** Return 1 if we have no need to fetch new descriptors. This generally
+ * happens when we're not a dir cache and we haven't built any circuits
+ * lately.
+ */
+int
+directory_too_idle_to_fetch_descriptors(const or_options_t *options,
+ time_t now)
+{
+ return !directory_caches_dir_info(options) &&
+ !options->FetchUselessDescriptors &&
+ rep_hist_circbuilding_dormant(now);
+}
+
+/********************************************************************/
+
+/** Map from flavor name to the cached_dir_t for the v3 consensuses that we're
+ * currently serving. */
+static strmap_t *cached_consensuses = NULL;
+
+/** Decrement the reference count on <b>d</b>, and free it if it no longer has
+ * any references. */
+void
+cached_dir_decref(cached_dir_t *d)
+{
+ if (!d || --d->refcnt > 0)
+ return;
+ clear_cached_dir(d);
+ tor_free(d);
+}
+
+/** Allocate and return a new cached_dir_t containing the string <b>s</b>,
+ * published at <b>published</b>. */
+cached_dir_t *
+new_cached_dir(char *s, time_t published)
+{
+ cached_dir_t *d = tor_malloc_zero(sizeof(cached_dir_t));
+ d->refcnt = 1;
+ d->dir = s;
+ d->dir_len = strlen(s);
+ d->published = published;
+ if (tor_compress(&(d->dir_compressed), &(d->dir_compressed_len),
+ d->dir, d->dir_len, ZLIB_METHOD)) {
+ log_warn(LD_BUG, "Error compressing directory");
+ }
+ return d;
+}
+
+/** Remove all storage held in <b>d</b>, but do not free <b>d</b> itself. */
+static void
+clear_cached_dir(cached_dir_t *d)
+{
+ tor_free(d->dir);
+ tor_free(d->dir_compressed);
+ memset(d, 0, sizeof(cached_dir_t));
+}
+
+/** Free all storage held by the cached_dir_t in <b>d</b>. */
+static void
+free_cached_dir_(void *_d)
+{
+ cached_dir_t *d;
+ if (!_d)
+ return;
+
+ d = (cached_dir_t *)_d;
+ cached_dir_decref(d);
+}
+
+/** Replace the v3 consensus networkstatus of type <b>flavor_name</b> that
+ * we're serving with <b>networkstatus</b>, published at <b>published</b>. No
+ * validation is performed. */
+void
+dirserv_set_cached_consensus_networkstatus(const char *networkstatus,
+ const char *flavor_name,
+ const common_digests_t *digests,
+ const uint8_t *sha3_as_signed,
+ time_t published)
+{
+ cached_dir_t *new_networkstatus;
+ cached_dir_t *old_networkstatus;
+ if (!cached_consensuses)
+ cached_consensuses = strmap_new();
+
+ new_networkstatus = new_cached_dir(tor_strdup(networkstatus), published);
+ memcpy(&new_networkstatus->digests, digests, sizeof(common_digests_t));
+ memcpy(&new_networkstatus->digest_sha3_as_signed, sha3_as_signed,
+ DIGEST256_LEN);
+ old_networkstatus = strmap_set(cached_consensuses, flavor_name,
+ new_networkstatus);
+ if (old_networkstatus)
+ cached_dir_decref(old_networkstatus);
+}
+
+/** Return the latest downloaded consensus networkstatus in encoded, signed,
+ * optionally compressed format, suitable for sending to clients. */
+cached_dir_t *
+dirserv_get_consensus(const char *flavor_name)
+{
+ if (!cached_consensuses)
+ return NULL;
+ return strmap_get(cached_consensuses, flavor_name);
+}
+
+/** As dirserv_get_routerdescs(), but instead of getting signed_descriptor_t
+ * pointers, adds copies of digests to fps_out, and doesn't use the
+ * /tor/server/ prefix. For a /d/ request, adds descriptor digests; for other
+ * requests, adds identity digests.
+ */
+int
+dirserv_get_routerdesc_spool(smartlist_t *spool_out,
+ const char *key,
+ dir_spool_source_t source,
+ int conn_is_encrypted,
+ const char **msg_out)
+{
+ *msg_out = NULL;
+
+ if (!strcmp(key, "all")) {
+ const routerlist_t *rl = router_get_routerlist();
+ SMARTLIST_FOREACH_BEGIN(rl->routers, const routerinfo_t *, r) {
+ spooled_resource_t *spooled;
+ spooled = spooled_resource_new(source,
+ (const uint8_t *)r->cache_info.identity_digest,
+ DIGEST_LEN);
+ /* Treat "all" requests as if they were unencrypted */
+ conn_is_encrypted = 0;
+ smartlist_add(spool_out, spooled);
+ } SMARTLIST_FOREACH_END(r);
+ } else if (!strcmp(key, "authority")) {
+ const routerinfo_t *ri = router_get_my_routerinfo();
+ if (ri)
+ smartlist_add(spool_out,
+ spooled_resource_new(source,
+ (const uint8_t *)ri->cache_info.identity_digest,
+ DIGEST_LEN));
+ } else if (!strcmpstart(key, "d/")) {
+ key += strlen("d/");
+ dir_split_resource_into_spoolable(key, source, spool_out, NULL,
+ DSR_HEX|DSR_SORT_UNIQ);
+ } else if (!strcmpstart(key, "fp/")) {
+ key += strlen("fp/");
+ dir_split_resource_into_spoolable(key, source, spool_out, NULL,
+ DSR_HEX|DSR_SORT_UNIQ);
+ } else {
+ *msg_out = "Not found";
+ return -1;
+ }
+
+ if (! conn_is_encrypted) {
+ /* Remove anything that insists it not be sent unencrypted. */
+ SMARTLIST_FOREACH_BEGIN(spool_out, spooled_resource_t *, spooled) {
+ const uint8_t *body = NULL;
+ size_t bodylen = 0;
+ int r = spooled_resource_lookup_body(spooled, conn_is_encrypted,
+ &body, &bodylen, NULL);
+ if (r < 0 || body == NULL || bodylen == 0) {
+ SMARTLIST_DEL_CURRENT(spool_out, spooled);
+ spooled_resource_free(spooled);
+ }
+ } SMARTLIST_FOREACH_END(spooled);
+ }
+
+ if (!smartlist_len(spool_out)) {
+ *msg_out = "Servers unavailable";
+ return -1;
+ }
+ return 0;
+}
+
+/** Add a signed_descriptor_t to <b>descs_out</b> for each router matching
+ * <b>key</b>. The key should be either
+ * - "/tor/server/authority" for our own routerinfo;
+ * - "/tor/server/all" for all the routerinfos we have, concatenated;
+ * - "/tor/server/fp/FP" where FP is a plus-separated sequence of
+ * hex identity digests; or
+ * - "/tor/server/d/D" where D is a plus-separated sequence
+ * of server descriptor digests, in hex.
+ *
+ * Return 0 if we found some matching descriptors, or -1 if we do not
+ * have any descriptors, no matching descriptors, or if we did not
+ * recognize the key (URL).
+ * If -1 is returned *<b>msg</b> will be set to an appropriate error
+ * message.
+ *
+ * XXXX rename this function. It's only called from the controller.
+ * XXXX in fact, refactor this function, merging as much as possible.
+ */
+int
+dirserv_get_routerdescs(smartlist_t *descs_out, const char *key,
+ const char **msg)
+{
+ *msg = NULL;
+
+ if (!strcmp(key, "/tor/server/all")) {
+ routerlist_t *rl = router_get_routerlist();
+ SMARTLIST_FOREACH(rl->routers, routerinfo_t *, r,
+ smartlist_add(descs_out, &(r->cache_info)));
+ } else if (!strcmp(key, "/tor/server/authority")) {
+ const routerinfo_t *ri = router_get_my_routerinfo();
+ if (ri)
+ smartlist_add(descs_out, (void*) &(ri->cache_info));
+ } else if (!strcmpstart(key, "/tor/server/d/")) {
+ smartlist_t *digests = smartlist_new();
+ key += strlen("/tor/server/d/");
+ dir_split_resource_into_fingerprints(key, digests, NULL,
+ DSR_HEX|DSR_SORT_UNIQ);
+ SMARTLIST_FOREACH(digests, const char *, d,
+ {
+ signed_descriptor_t *sd = router_get_by_descriptor_digest(d);
+ if (sd)
+ smartlist_add(descs_out,sd);
+ });
+ SMARTLIST_FOREACH(digests, char *, d, tor_free(d));
+ smartlist_free(digests);
+ } else if (!strcmpstart(key, "/tor/server/fp/")) {
+ smartlist_t *digests = smartlist_new();
+ time_t cutoff = time(NULL) - ROUTER_MAX_AGE_TO_PUBLISH;
+ key += strlen("/tor/server/fp/");
+ dir_split_resource_into_fingerprints(key, digests, NULL,
+ DSR_HEX|DSR_SORT_UNIQ);
+ SMARTLIST_FOREACH_BEGIN(digests, const char *, d) {
+ if (router_digest_is_me(d)) {
+ /* calling router_get_my_routerinfo() to make sure it exists */
+ const routerinfo_t *ri = router_get_my_routerinfo();
+ if (ri)
+ smartlist_add(descs_out, (void*) &(ri->cache_info));
+ } else {
+ const routerinfo_t *ri = router_get_by_id_digest(d);
+ /* Don't actually serve a descriptor that everyone will think is
+ * expired. This is an (ugly) workaround to keep buggy 0.1.1.10
+ * Tors from downloading descriptors that they will throw away.
+ */
+ if (ri && ri->cache_info.published_on > cutoff)
+ smartlist_add(descs_out, (void*) &(ri->cache_info));
+ }
+ } SMARTLIST_FOREACH_END(d);
+ SMARTLIST_FOREACH(digests, char *, d, tor_free(d));
+ smartlist_free(digests);
+ } else {
+ *msg = "Key not recognized";
+ return -1;
+ }
+
+ if (!smartlist_len(descs_out)) {
+ *msg = "Servers unavailable";
+ return -1;
+ }
+ return 0;
+}
+
+/* ==========
+ * Spooling code.
+ * ========== */
+
+spooled_resource_t *
+spooled_resource_new(dir_spool_source_t source,
+ const uint8_t *digest, size_t digestlen)
+{
+ spooled_resource_t *spooled = tor_malloc_zero(sizeof(spooled_resource_t));
+ spooled->spool_source = source;
+ switch (source) {
+ case DIR_SPOOL_NETWORKSTATUS:
+ spooled->spool_eagerly = 0;
+ break;
+ case DIR_SPOOL_SERVER_BY_DIGEST:
+ case DIR_SPOOL_SERVER_BY_FP:
+ case DIR_SPOOL_EXTRA_BY_DIGEST:
+ case DIR_SPOOL_EXTRA_BY_FP:
+ case DIR_SPOOL_MICRODESC:
+ default:
+ spooled->spool_eagerly = 1;
+ break;
+ case DIR_SPOOL_CONSENSUS_CACHE_ENTRY:
+ tor_assert_unreached();
+ break;
+ }
+ tor_assert(digestlen <= sizeof(spooled->digest));
+ if (digest)
+ memcpy(spooled->digest, digest, digestlen);
+ return spooled;
+}
+
+/**
+ * Create a new spooled_resource_t to spool the contents of <b>entry</b> to
+ * the user. Return the spooled object on success, or NULL on failure (which
+ * is probably caused by a failure to map the body of the item from disk).
+ *
+ * Adds a reference to entry's reference counter.
+ */
+spooled_resource_t *
+spooled_resource_new_from_cache_entry(consensus_cache_entry_t *entry)
+{
+ spooled_resource_t *spooled = tor_malloc_zero(sizeof(spooled_resource_t));
+ spooled->spool_source = DIR_SPOOL_CONSENSUS_CACHE_ENTRY;
+ spooled->spool_eagerly = 0;
+ consensus_cache_entry_incref(entry);
+ spooled->consensus_cache_entry = entry;
+
+ int r = consensus_cache_entry_get_body(entry,
+ &spooled->cce_body,
+ &spooled->cce_len);
+ if (r == 0) {
+ return spooled;
+ } else {
+ spooled_resource_free(spooled);
+ return NULL;
+ }
+}
+
+/** Release all storage held by <b>spooled</b>. */
+void
+spooled_resource_free_(spooled_resource_t *spooled)
+{
+ if (spooled == NULL)
+ return;
+
+ if (spooled->cached_dir_ref) {
+ cached_dir_decref(spooled->cached_dir_ref);
+ }
+
+ if (spooled->consensus_cache_entry) {
+ consensus_cache_entry_decref(spooled->consensus_cache_entry);
+ }
+
+ tor_free(spooled);
+}
+
+/** When spooling data from a cached_dir_t object, we always add
+ * at least this much. */
+#define DIRSERV_CACHED_DIR_CHUNK_SIZE 8192
+
+/** Return an compression ratio for compressing objects from <b>source</b>.
+ */
+static double
+estimate_compression_ratio(dir_spool_source_t source)
+{
+ /* We should put in better estimates here, depending on the number of
+ objects and their type */
+ (void) source;
+ return 0.5;
+}
+
+/** Return an estimated number of bytes needed for transmitting the
+ * resource in <b>spooled</b> on <b>conn</b>
+ *
+ * As a convenient side-effect, set *<b>published_out</b> to the resource's
+ * publication time.
+ */
+static size_t
+spooled_resource_estimate_size(const spooled_resource_t *spooled,
+ dir_connection_t *conn,
+ int compressed,
+ time_t *published_out)
+{
+ if (spooled->spool_eagerly) {
+ const uint8_t *body = NULL;
+ size_t bodylen = 0;
+ int r = spooled_resource_lookup_body(spooled,
+ connection_dir_is_encrypted(conn),
+ &body, &bodylen,
+ published_out);
+ if (r == -1 || body == NULL || bodylen == 0)
+ return 0;
+ if (compressed) {
+ double ratio = estimate_compression_ratio(spooled->spool_source);
+ bodylen = (size_t)(bodylen * ratio);
+ }
+ return bodylen;
+ } else {
+ cached_dir_t *cached;
+ if (spooled->consensus_cache_entry) {
+ if (published_out) {
+ consensus_cache_entry_get_valid_after(
+ spooled->consensus_cache_entry, published_out);
+ }
+
+ return spooled->cce_len;
+ }
+ if (spooled->cached_dir_ref) {
+ cached = spooled->cached_dir_ref;
+ } else {
+ cached = spooled_resource_lookup_cached_dir(spooled,
+ published_out);
+ }
+ if (cached == NULL) {
+ return 0;
+ }
+ size_t result = compressed ? cached->dir_compressed_len : cached->dir_len;
+ return result;
+ }
+}
+
+/** Return code for spooled_resource_flush_some */
+typedef enum {
+ SRFS_ERR = -1,
+ SRFS_MORE = 0,
+ SRFS_DONE
+} spooled_resource_flush_status_t;
+
+/** Flush some or all of the bytes from <b>spooled</b> onto <b>conn</b>.
+ * Return SRFS_ERR on error, SRFS_MORE if there are more bytes to flush from
+ * this spooled resource, or SRFS_DONE if we are done flushing this spooled
+ * resource.
+ */
+static spooled_resource_flush_status_t
+spooled_resource_flush_some(spooled_resource_t *spooled,
+ dir_connection_t *conn)
+{
+ if (spooled->spool_eagerly) {
+ /* Spool_eagerly resources are sent all-at-once. */
+ const uint8_t *body = NULL;
+ size_t bodylen = 0;
+ int r = spooled_resource_lookup_body(spooled,
+ connection_dir_is_encrypted(conn),
+ &body, &bodylen, NULL);
+ if (r == -1 || body == NULL || bodylen == 0) {
+ /* Absent objects count as "done". */
+ return SRFS_DONE;
+ }
+ if (conn->compress_state) {
+ connection_buf_add_compress((const char*)body, bodylen, conn, 0);
+ } else {
+ connection_buf_add((const char*)body, bodylen, TO_CONN(conn));
+ }
+ return SRFS_DONE;
+ } else {
+ cached_dir_t *cached = spooled->cached_dir_ref;
+ consensus_cache_entry_t *cce = spooled->consensus_cache_entry;
+ if (cached == NULL && cce == NULL) {
+ /* The cached_dir_t hasn't been materialized yet. So let's look it up. */
+ cached = spooled->cached_dir_ref =
+ spooled_resource_lookup_cached_dir(spooled, NULL);
+ if (!cached) {
+ /* Absent objects count as done. */
+ return SRFS_DONE;
+ }
+ ++cached->refcnt;
+ tor_assert_nonfatal(spooled->cached_dir_offset == 0);
+ }
+
+ if (BUG(!cached && !cce))
+ return SRFS_DONE;
+
+ int64_t total_len;
+ const char *ptr;
+ if (cached) {
+ total_len = cached->dir_compressed_len;
+ ptr = cached->dir_compressed;
+ } else {
+ total_len = spooled->cce_len;
+ ptr = (const char *)spooled->cce_body;
+ }
+ /* How many bytes left to flush? */
+ int64_t remaining;
+ remaining = total_len - spooled->cached_dir_offset;
+ if (BUG(remaining < 0))
+ return SRFS_ERR;
+ ssize_t bytes = (ssize_t) MIN(DIRSERV_CACHED_DIR_CHUNK_SIZE, remaining);
+ if (conn->compress_state) {
+ connection_buf_add_compress(
+ ptr + spooled->cached_dir_offset,
+ bytes, conn, 0);
+ } else {
+ connection_buf_add(ptr + spooled->cached_dir_offset,
+ bytes, TO_CONN(conn));
+ }
+ spooled->cached_dir_offset += bytes;
+ if (spooled->cached_dir_offset >= (off_t)total_len) {
+ return SRFS_DONE;
+ } else {
+ return SRFS_MORE;
+ }
+ }
+}
+
+/** Helper: find the cached_dir_t for a spooled_resource_t, for
+ * sending it to <b>conn</b>. Set *<b>published_out</b>, if provided,
+ * to the published time of the cached_dir_t.
+ *
+ * DOES NOT increase the reference count on the result. Callers must do that
+ * themselves if they mean to hang on to it.
+ */
+static cached_dir_t *
+spooled_resource_lookup_cached_dir(const spooled_resource_t *spooled,
+ time_t *published_out)
+{
+ tor_assert(spooled->spool_eagerly == 0);
+ cached_dir_t *d = lookup_cached_dir_by_fp(spooled->digest);
+ if (d != NULL) {
+ if (published_out)
+ *published_out = d->published;
+ }
+ return d;
+}
+
+/** Helper: Look up the body for an eagerly-served spooled_resource. If
+ * <b>conn_is_encrypted</b> is false, don't look up any resource that
+ * shouldn't be sent over an unencrypted connection. On success, set
+ * <b>body_out</b>, <b>size_out</b>, and <b>published_out</b> to refer
+ * to the resource's body, size, and publication date, and return 0.
+ * On failure return -1. */
+static int
+spooled_resource_lookup_body(const spooled_resource_t *spooled,
+ int conn_is_encrypted,
+ const uint8_t **body_out,
+ size_t *size_out,
+ time_t *published_out)
+{
+ tor_assert(spooled->spool_eagerly == 1);
+
+ const signed_descriptor_t *sd = NULL;
+
+ switch (spooled->spool_source) {
+ case DIR_SPOOL_EXTRA_BY_FP: {
+ sd = get_signed_descriptor_by_fp(spooled->digest, 1);
+ break;
+ }
+ case DIR_SPOOL_SERVER_BY_FP: {
+ sd = get_signed_descriptor_by_fp(spooled->digest, 0);
+ break;
+ }
+ case DIR_SPOOL_SERVER_BY_DIGEST: {
+ sd = router_get_by_descriptor_digest((const char *)spooled->digest);
+ break;
+ }
+ case DIR_SPOOL_EXTRA_BY_DIGEST: {
+ sd = extrainfo_get_by_descriptor_digest((const char *)spooled->digest);
+ break;
+ }
+ case DIR_SPOOL_MICRODESC: {
+ microdesc_t *md = microdesc_cache_lookup_by_digest256(
+ get_microdesc_cache(),
+ (const char *)spooled->digest);
+ if (! md || ! md->body) {
+ return -1;
+ }
+ *body_out = (const uint8_t *)md->body;
+ *size_out = md->bodylen;
+ if (published_out)
+ *published_out = TIME_MAX;
+ return 0;
+ }
+ case DIR_SPOOL_NETWORKSTATUS:
+ case DIR_SPOOL_CONSENSUS_CACHE_ENTRY:
+ default:
+ /* LCOV_EXCL_START */
+ tor_assert_nonfatal_unreached();
+ return -1;
+ /* LCOV_EXCL_STOP */
+ }
+
+ /* If we get here, then we tried to set "sd" to a signed_descriptor_t. */
+
+ if (sd == NULL) {
+ return -1;
+ }
+ if (sd->send_unencrypted == 0 && ! conn_is_encrypted) {
+ /* we did this check once before (so we could have an accurate size
+ * estimate and maybe send a 404 if somebody asked for only bridges on
+ * a connection), but we need to do it again in case a previously
+ * unknown bridge descriptor has shown up between then and now. */
+ return -1;
+ }
+ *body_out = (const uint8_t *) signed_descriptor_get_body(sd);
+ *size_out = sd->signed_descriptor_len;
+ if (published_out)
+ *published_out = sd->published_on;
+ return 0;
+}
+
+/** Given a fingerprint <b>fp</b> which is either set if we're looking for a
+ * v2 status, or zeroes if we're looking for a v3 status, or a NUL-padded
+ * flavor name if we want a flavored v3 status, return a pointer to the
+ * appropriate cached dir object, or NULL if there isn't one available. */
+static cached_dir_t *
+lookup_cached_dir_by_fp(const uint8_t *fp)
+{
+ cached_dir_t *d = NULL;
+ if (tor_digest_is_zero((const char *)fp) && cached_consensuses) {
+ d = strmap_get(cached_consensuses, "ns");
+ } else if (memchr(fp, '\0', DIGEST_LEN) && cached_consensuses) {
+ /* this here interface is a nasty hack: we're shoving a flavor into
+ * a digest field. */
+ d = strmap_get(cached_consensuses, (const char *)fp);
+ }
+ return d;
+}
+
+/** Try to guess the number of bytes that will be needed to send the
+ * spooled objects for <b>conn</b>'s outgoing spool. In the process,
+ * remove every element of the spool that refers to an absent object, or
+ * which was published earlier than <b>cutoff</b>. Set *<b>size_out</b>
+ * to the number of bytes, and *<b>n_expired_out</b> to the number of
+ * objects removed for being too old. */
+void
+dirserv_spool_remove_missing_and_guess_size(dir_connection_t *conn,
+ time_t cutoff,
+ int compression,
+ size_t *size_out,
+ int *n_expired_out)
+{
+ if (BUG(!conn))
+ return;
+
+ smartlist_t *spool = conn->spool;
+ if (!spool) {
+ if (size_out)
+ *size_out = 0;
+ if (n_expired_out)
+ *n_expired_out = 0;
+ return;
+ }
+ int n_expired = 0;
+ uint64_t total = 0;
+ SMARTLIST_FOREACH_BEGIN(spool, spooled_resource_t *, spooled) {
+ time_t published = TIME_MAX;
+ size_t sz = spooled_resource_estimate_size(spooled, conn,
+ compression, &published);
+ if (published < cutoff) {
+ ++n_expired;
+ SMARTLIST_DEL_CURRENT(spool, spooled);
+ spooled_resource_free(spooled);
+ } else if (sz == 0) {
+ SMARTLIST_DEL_CURRENT(spool, spooled);
+ spooled_resource_free(spooled);
+ } else {
+ total += sz;
+ }
+ } SMARTLIST_FOREACH_END(spooled);
+
+ if (size_out) {
+ *size_out = (total > SIZE_MAX) ? SIZE_MAX : (size_t)total;
+ }
+ if (n_expired_out)
+ *n_expired_out = n_expired;
+}
+
+/** Helper: used to sort a connection's spool. */
+static int
+dirserv_spool_sort_comparison_(const void **a_, const void **b_)
+{
+ const spooled_resource_t *a = *a_;
+ const spooled_resource_t *b = *b_;
+ return fast_memcmp(a->digest, b->digest, sizeof(a->digest));
+}
+
+/** Sort all the entries in <b>conn</b> by digest. */
+void
+dirserv_spool_sort(dir_connection_t *conn)
+{
+ if (conn->spool == NULL)
+ return;
+ smartlist_sort(conn->spool, dirserv_spool_sort_comparison_);
+}
+
+/** Return the cache-info for identity fingerprint <b>fp</b>, or
+ * its extra-info document if <b>extrainfo</b> is true. Return
+ * NULL if not found or if the descriptor is older than
+ * <b>publish_cutoff</b>. */
+static const signed_descriptor_t *
+get_signed_descriptor_by_fp(const uint8_t *fp, int extrainfo)
+{
+ if (router_digest_is_me((const char *)fp)) {
+ if (extrainfo)
+ return &(router_get_my_extrainfo()->cache_info);
+ else
+ return &(router_get_my_routerinfo()->cache_info);
+ } else {
+ const routerinfo_t *ri = router_get_by_id_digest((const char *)fp);
+ if (ri) {
+ if (extrainfo)
+ return extrainfo_get_by_descriptor_digest(
+ ri->cache_info.extra_info_digest);
+ else
+ return &ri->cache_info;
+ }
+ }
+ return NULL;
+}
+
+/** When we're spooling data onto our outbuf, add more whenever we dip
+ * below this threshold. */
+#define DIRSERV_BUFFER_MIN 16384
+
+/**
+ * Called whenever we have flushed some directory data in state
+ * SERVER_WRITING, or whenever we want to fill the buffer with initial
+ * directory data (so that subsequent writes will occur, and trigger this
+ * function again.)
+ *
+ * Return 0 on success, and -1 on failure.
+ */
+int
+connection_dirserv_flushed_some(dir_connection_t *conn)
+{
+ tor_assert(conn->base_.state == DIR_CONN_STATE_SERVER_WRITING);
+ if (conn->spool == NULL)
+ return 0;
+
+ while (connection_get_outbuf_len(TO_CONN(conn)) < DIRSERV_BUFFER_MIN &&
+ smartlist_len(conn->spool)) {
+ spooled_resource_t *spooled =
+ smartlist_get(conn->spool, smartlist_len(conn->spool)-1);
+ spooled_resource_flush_status_t status;
+ status = spooled_resource_flush_some(spooled, conn);
+ if (status == SRFS_ERR) {
+ return -1;
+ } else if (status == SRFS_MORE) {
+ return 0;
+ }
+ tor_assert(status == SRFS_DONE);
+
+ /* If we're here, we're done flushing this resource. */
+ tor_assert(smartlist_pop_last(conn->spool) == spooled);
+ spooled_resource_free(spooled);
+ }
+
+ if (smartlist_len(conn->spool) > 0) {
+ /* We're still spooling something. */
+ return 0;
+ }
+
+ /* If we get here, we're done. */
+ smartlist_free(conn->spool);
+ conn->spool = NULL;
+ if (conn->compress_state) {
+ /* Flush the compression state: there could be more bytes pending in there,
+ * and we don't want to omit bytes. */
+ connection_buf_add_compress("", 0, conn, 1);
+ tor_compress_free(conn->compress_state);
+ conn->compress_state = NULL;
+ }
+ return 0;
+}
+
+/** Remove every element from <b>conn</b>'s outgoing spool, and delete
+ * the spool. */
+void
+dir_conn_clear_spool(dir_connection_t *conn)
+{
+ if (!conn || ! conn->spool)
+ return;
+ SMARTLIST_FOREACH(conn->spool, spooled_resource_t *, s,
+ spooled_resource_free(s));
+ smartlist_free(conn->spool);
+ conn->spool = NULL;
+}
+
+/** Release all storage used by the directory server. */
+void
+dirserv_free_all(void)
+{
+ strmap_free(cached_consensuses, free_cached_dir_);
+ cached_consensuses = NULL;
+}
diff --git a/src/feature/dircache/dirserv.h b/src/feature/dircache/dirserv.h
new file mode 100644
index 0000000000..41e1376688
--- /dev/null
+++ b/src/feature/dircache/dirserv.h
@@ -0,0 +1,119 @@
+/* Copyright (c) 2001 Matej Pfajfar.
+ * Copyright (c) 2001-2004, Roger Dingledine.
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file dirserv.h
+ * \brief Header file for dirserv.c.
+ **/
+
+#ifndef TOR_DIRSERV_H
+#define TOR_DIRSERV_H
+
+struct ed25519_public_key_t;
+
+#include "lib/testsupport/testsupport.h"
+
+/** Ways to convert a spoolable_resource_t to a bunch of bytes. */
+typedef enum dir_spool_source_t {
+ DIR_SPOOL_SERVER_BY_DIGEST=1, DIR_SPOOL_SERVER_BY_FP,
+ DIR_SPOOL_EXTRA_BY_DIGEST, DIR_SPOOL_EXTRA_BY_FP,
+ DIR_SPOOL_MICRODESC,
+ DIR_SPOOL_NETWORKSTATUS,
+ DIR_SPOOL_CONSENSUS_CACHE_ENTRY,
+} dir_spool_source_t;
+#define dir_spool_source_bitfield_t ENUM_BF(dir_spool_source_t)
+
+/** Object to remember the identity of an object that we are spooling,
+ * or about to spool, in response to a directory request.
+ *
+ * (Why do we spool? Because some directory responses are very large,
+ * and we don't want to just shove the complete answer into the output
+ * buffer: that would take a ridiculous amount of RAM.)
+ *
+ * If the spooled resource is relatively small (like microdescriptors,
+ * descriptors, etc), we look them up by ID as needed, and add the whole
+ * thing onto the output buffer at once. If the spooled reseource is
+ * big (like networkstatus documents), we reference-count it, and add it
+ * a few K at a time.
+ */
+typedef struct spooled_resource_t {
+ /**
+ * If true, we add the entire object to the outbuf. If false,
+ * we spool the object a few K at a time.
+ */
+ unsigned spool_eagerly : 1;
+ /**
+ * Tells us what kind of object to get, and how to look it up.
+ */
+ dir_spool_source_bitfield_t spool_source : 7;
+ /**
+ * Tells us the specific object to spool.
+ */
+ uint8_t digest[DIGEST256_LEN];
+ /**
+ * A large object that we're spooling. Holds a reference count. Only
+ * used when spool_eagerly is false.
+ */
+ struct cached_dir_t *cached_dir_ref;
+ /**
+ * A different kind of large object that we might be spooling. Also
+ * reference-counted. Also only used when spool_eagerly is false.
+ */
+ struct consensus_cache_entry_t *consensus_cache_entry;
+ const uint8_t *cce_body;
+ size_t cce_len;
+ /**
+ * The current offset into cached_dir or cce_body. Only used when
+ * spool_eagerly is false */
+ off_t cached_dir_offset;
+} spooled_resource_t;
+
+int connection_dirserv_flushed_some(dir_connection_t *conn);
+
+int directory_fetches_from_authorities(const or_options_t *options);
+int directory_fetches_dir_info_early(const or_options_t *options);
+int directory_fetches_dir_info_later(const or_options_t *options);
+int directory_caches_unknown_auth_certs(const or_options_t *options);
+int directory_caches_dir_info(const or_options_t *options);
+int directory_permits_begindir_requests(const or_options_t *options);
+int directory_too_idle_to_fetch_descriptors(const or_options_t *options,
+ time_t now);
+
+cached_dir_t *dirserv_get_consensus(const char *flavor_name);
+void dirserv_set_cached_consensus_networkstatus(const char *consensus,
+ const char *flavor_name,
+ const common_digests_t *digests,
+ const uint8_t *sha3_as_signed,
+ time_t published);
+void dirserv_clear_old_networkstatuses(time_t cutoff);
+int dirserv_get_routerdesc_spool(smartlist_t *spools_out, const char *key,
+ dir_spool_source_t source,
+ int conn_is_encrypted,
+ const char **msg_out);
+int dirserv_get_routerdescs(smartlist_t *descs_out, const char *key,
+ const char **msg);
+
+void dirserv_free_all(void);
+void cached_dir_decref(cached_dir_t *d);
+cached_dir_t *new_cached_dir(char *s, time_t published);
+
+spooled_resource_t *spooled_resource_new(dir_spool_source_t source,
+ const uint8_t *digest,
+ size_t digestlen);
+spooled_resource_t *spooled_resource_new_from_cache_entry(
+ struct consensus_cache_entry_t *entry);
+void spooled_resource_free_(spooled_resource_t *spooled);
+#define spooled_resource_free(sp) \
+ FREE_AND_NULL(spooled_resource_t, spooled_resource_free_, (sp))
+void dirserv_spool_remove_missing_and_guess_size(dir_connection_t *conn,
+ time_t cutoff,
+ int compression,
+ size_t *size_out,
+ int *n_expired_out);
+void dirserv_spool_sort(dir_connection_t *conn);
+void dir_conn_clear_spool(dir_connection_t *conn);
+
+#endif /* !defined(TOR_DIRSERV_H) */