aboutsummaryrefslogtreecommitdiff
path: root/src/common/address_set.c
diff options
context:
space:
mode:
authorNick Mathewson <nickm@torproject.org>2018-02-07 09:49:35 -0500
committerDavid Goulet <dgoulet@torproject.org>2018-02-08 14:38:11 -0500
commit46bd2aed915f17d520f9ff237262d1510fe25e12 (patch)
tree8a20c6425bee4fb1216b9fe639c347de29e7eb57 /src/common/address_set.c
parenta2aaf9509ba578f4e7705b506ee9a0f764d24ff2 (diff)
downloadtor-46bd2aed915f17d520f9ff237262d1510fe25e12.tar.gz
tor-46bd2aed915f17d520f9ff237262d1510fe25e12.zip
Add an address-set backend using a bloom filter.
We're going to need this to make our anti-DoS code (see 24902) more robust.
Diffstat (limited to 'src/common/address_set.c')
-rw-r--r--src/common/address_set.c120
1 files changed, 120 insertions, 0 deletions
diff --git a/src/common/address_set.c b/src/common/address_set.c
new file mode 100644
index 0000000000..df7022174c
--- /dev/null
+++ b/src/common/address_set.c
@@ -0,0 +1,120 @@
+/* Copyright (c) 2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file address_set.c
+ * \brief Implementation for a set of addresses.
+ *
+ * This module was first written on a semi-emergency basis to improve the
+ * robustness of the anti-DoS module. As such, it's written in a pretty
+ * conservative way, and should be susceptible to improvement later on.
+ **/
+
+#include "orconfig.h"
+#include "address_set.h"
+#include "address.h"
+#include "compat.h"
+#include "container.h"
+#include "crypto.h"
+#include "util.h"
+#include "siphash.h"
+
+/** How many 64-bit siphash values to extract per address */
+#define N_HASHES 2
+/** How many bloom-filter bits we set per address. This is twice the N_HASHES
+ * value, since we split the siphash outcome two 32-bit values. */
+#define N_BITS_PER_ITEM (N_HASHES * 2)
+
+/* XXXX This code is largely duplicated with digestset_t. We should merge
+ * them together into a common bloom-filter implementation. I'm keeping
+ * them separate for now, though, since this module needs to be backported
+ * all the way to 0.2.9.
+ *
+ * The main difference between digestset_t and this code is that we use
+ * independent siphashes rather than messing around with bit-shifts. The
+ * approach here is probably more sound, and we should prefer it if&when we
+ * unify the implementations.
+ **/
+
+struct address_set_t {
+ /** siphash keys to make N_HASHES independent hashes for each address. */
+ struct sipkey key[N_HASHES];
+ int mask; /**< One less than the number of bits in <b>ba</b>; always one less
+ * than a power of two. */
+ bitarray_t *ba; /**< A bit array to implement the Bloom filter. */
+};
+
+/**
+ * Allocate and return an address_set, suitable for holding up to
+ * <b>max_address_guess</b> distinct values.
+ */
+address_set_t *
+address_set_new(int max_addresses_guess)
+{
+ /* See digestset_new() for rationale on this equation. */
+ int n_bits = 1u << (tor_log2(max_addresses_guess)+5);
+
+ address_set_t *set = tor_malloc_zero(sizeof(address_set_t));
+ set->mask = n_bits - 1;
+ set->ba = bitarray_init_zero(n_bits);
+ crypto_rand((char*) set->key, sizeof(set->key));
+
+ return set;
+}
+
+/**
+ * Release all storage associated with <b>set</b>
+ */
+void
+address_set_free(address_set_t *set)
+{
+ if (! set)
+ return;
+
+ bitarray_free(set->ba);
+ tor_free(set);
+}
+
+/** Yield the bit index corresponding to 'val' for set. */
+#define BIT(set, val) ((val) & (set)->mask)
+
+/**
+ * Add <b>addr</b> to <b>set</b>.
+ *
+ * All future queries for <b>addr</b> in set will return true. Removing
+ * items is not possible.
+ */
+void
+address_set_add(address_set_t *set, const struct tor_addr_t *addr)
+{
+ int i;
+ for (i = 0; i < N_HASHES; ++i) {
+ uint64_t h = tor_addr_keyed_hash(&set->key[i], addr);
+ uint32_t high_bits = (uint32_t)(h >> 32);
+ uint32_t low_bits = (uint32_t)(h);
+ bitarray_set(set->ba, BIT(set, high_bits));
+ bitarray_set(set->ba, BIT(set, low_bits));
+ }
+}
+
+/**
+ * Return true if <b>addr</b> if a member of <b>set</b>. (And probably,
+ * return false if <b>addr</b> is not a member of set.)
+ */
+int
+address_set_probably_contains(address_set_t *set,
+ const struct tor_addr_t *addr)
+{
+ int i, matches = 0;
+ for (i = 0; i < N_HASHES; ++i) {
+ uint64_t h = tor_addr_keyed_hash(&set->key[i], addr);
+ uint32_t high_bits = (uint32_t)(h >> 32);
+ uint32_t low_bits = (uint32_t)(h);
+ // Note that !! is necessary here, since bitarray_is_set does not
+ // necessarily return 1 on true.
+ matches += !! bitarray_is_set(set->ba, BIT(set, high_bits));
+ matches += !! bitarray_is_set(set->ba, BIT(set, low_bits));
+ }
+ return matches == N_BITS_PER_ITEM;
+}
+