summaryrefslogtreecommitdiff
path: root/src/common
diff options
context:
space:
mode:
authorNick Mathewson <nickm@torproject.org>2018-06-26 13:18:23 -0400
committerNick Mathewson <nickm@torproject.org>2018-06-26 13:27:23 -0400
commitbf89278c799e718a3b6c1ea139a6069db054ac09 (patch)
treeaddcdf08f01a3aff7645ea42606d2cc7ed5e1a25 /src/common
parent860b9a991879c5be2b32cf98766adf5fdd349d41 (diff)
downloadtor-bf89278c799e718a3b6c1ea139a6069db054ac09.tar.gz
tor-bf89278c799e718a3b6c1ea139a6069db054ac09.zip
Refactor bloom filter logic not to be digest-specific.
Now the address-set code and the digest-set code share the same backend. Closes ticket 26510
Diffstat (limited to 'src/common')
-rw-r--r--src/common/address_set.c85
-rw-r--r--src/common/address_set.h12
2 files changed, 19 insertions, 78 deletions
diff --git a/src/common/address_set.c b/src/common/address_set.c
index f4c5f581c8..369d33e9a6 100644
--- a/src/common/address_set.c
+++ b/src/common/address_set.c
@@ -14,35 +14,19 @@
#include "common/address_set.h"
#include "common/address.h"
#include "common/compat.h"
-#include "lib/container/bitarray.h"
+#include "lib/container/bloomfilt.h"
#include "lib/crypt_ops/crypto_rand.h"
#include "common/util.h"
#include "siphash.h"
-/** How many 64-bit siphash values to extract per address */
-#define N_HASHES 2
-/** How many bloom-filter bits we set per address. This is twice the N_HASHES
- * value, since we split the siphash output into two 32-bit values. */
-#define N_BITS_PER_ITEM (N_HASHES * 2)
-
-/* XXXX This code is largely duplicated with digestset_t. We should merge
- * them together into a common bloom-filter implementation. I'm keeping
- * them separate for now, though, since this module needs to be backported
- * all the way to 0.2.9.
- *
- * The main difference between digestset_t and this code is that we use
- * independent siphashes rather than messing around with bit-shifts. The
- * approach here is probably more sound, and we should prefer it if&when we
- * unify the implementations.
- */
-
-struct address_set_t {
- /** siphash keys to make N_HASHES independent hashes for each address. */
- struct sipkey key[N_HASHES];
- int mask; /**< One less than the number of bits in <b>ba</b>; always one less
- * than a power of two. */
- bitarray_t *ba; /**< A bit array to implement the Bloom filter. */
-};
+/* Wrap our hash function to have the signature that the bloom filter
+ * needs. */
+static uint64_t
+bloomfilt_addr_hash(const struct sipkey *key,
+ const void *item)
+{
+ return tor_addr_keyed_hash(key, item);
+}
/**
* Allocate and return an address_set, suitable for holding up to
@@ -51,33 +35,11 @@ struct address_set_t {
address_set_t *
address_set_new(int max_addresses_guess)
{
- /* See digestset_new() for rationale on this equation. */
- int n_bits = 1u << (tor_log2(max_addresses_guess)+5);
-
- address_set_t *set = tor_malloc_zero(sizeof(address_set_t));
- set->mask = n_bits - 1;
- set->ba = bitarray_init_zero(n_bits);
- crypto_rand((char*) set->key, sizeof(set->key));
-
- return set;
-}
-
-/**
- * Release all storage associated with <b>set</b>.
- */
-void
-address_set_free(address_set_t *set)
-{
- if (! set)
- return;
-
- bitarray_free(set->ba);
- tor_free(set);
+ uint8_t k[BLOOMFILT_KEY_LEN];
+ crypto_rand((void*)k, sizeof(k));
+ return bloomfilt_new(max_addresses_guess, bloomfilt_addr_hash, k);
}
-/** Yield the bit index corresponding to 'val' for set. */
-#define BIT(set, val) ((val) & (set)->mask)
-
/**
* Add <b>addr</b> to <b>set</b>.
*
@@ -87,14 +49,7 @@ address_set_free(address_set_t *set)
void
address_set_add(address_set_t *set, const struct tor_addr_t *addr)
{
- int i;
- for (i = 0; i < N_HASHES; ++i) {
- uint64_t h = tor_addr_keyed_hash(&set->key[i], addr);
- uint32_t high_bits = (uint32_t)(h >> 32);
- uint32_t low_bits = (uint32_t)(h);
- bitarray_set(set->ba, BIT(set, high_bits));
- bitarray_set(set->ba, BIT(set, low_bits));
- }
+ bloomfilt_add(set, addr);
}
/** As address_set_add(), but take an ipv4 address in host order. */
@@ -111,18 +66,8 @@ address_set_add_ipv4h(address_set_t *set, uint32_t addr)
* return false if <b>addr</b> is not a member of set.)
*/
int
-address_set_probably_contains(address_set_t *set,
+address_set_probably_contains(const address_set_t *set,
const struct tor_addr_t *addr)
{
- int i, matches = 0;
- for (i = 0; i < N_HASHES; ++i) {
- uint64_t h = tor_addr_keyed_hash(&set->key[i], addr);
- uint32_t high_bits = (uint32_t)(h >> 32);
- uint32_t low_bits = (uint32_t)(h);
- // Note that !! is necessary here, since bitarray_is_set does not
- // necessarily return 1 on true.
- matches += !! bitarray_is_set(set->ba, BIT(set, high_bits));
- matches += !! bitarray_is_set(set->ba, BIT(set, low_bits));
- }
- return matches == N_BITS_PER_ITEM;
+ return bloomfilt_probably_contains(set, addr);
}
diff --git a/src/common/address_set.h b/src/common/address_set.h
index cfee89cfb8..2efa1cb03b 100644
--- a/src/common/address_set.h
+++ b/src/common/address_set.h
@@ -4,10 +4,6 @@
/**
* \file address_set.h
* \brief Types to handle sets of addresses.
- *
- * This module was first written on a semi-emergency basis to improve the
- * robustness of the anti-DoS module. As such, it's written in a pretty
- * conservative way, and should be susceptible to improvement later on.
**/
#ifndef TOR_ADDRESS_SET_H
@@ -15,21 +11,21 @@
#include "orconfig.h"
#include "lib/cc/torint.h"
+#include "lib/container/bloomfilt.h"
/**
* An address_set_t represents a set of tor_addr_t values. The implementation
* is probabilistic: false negatives cannot occur but false positives are
* possible.
*/
-typedef struct address_set_t address_set_t;
+typedef struct bloomfilt_t address_set_t;
struct tor_addr_t;
address_set_t *address_set_new(int max_addresses_guess);
-void address_set_free(address_set_t *set);
+#define address_set_free(set) bloomfilt_free(set)
void address_set_add(address_set_t *set, const struct tor_addr_t *addr);
void address_set_add_ipv4h(address_set_t *set, uint32_t addr);
-int address_set_probably_contains(address_set_t *set,
+int address_set_probably_contains(const address_set_t *set,
const struct tor_addr_t *addr);
#endif
-