diff options
author | Nick Mathewson <nickm@torproject.org> | 2018-06-26 13:18:23 -0400 |
---|---|---|
committer | Nick Mathewson <nickm@torproject.org> | 2018-06-26 13:27:23 -0400 |
commit | bf89278c799e718a3b6c1ea139a6069db054ac09 (patch) | |
tree | addcdf08f01a3aff7645ea42606d2cc7ed5e1a25 /src/common | |
parent | 860b9a991879c5be2b32cf98766adf5fdd349d41 (diff) | |
download | tor-bf89278c799e718a3b6c1ea139a6069db054ac09.tar.gz tor-bf89278c799e718a3b6c1ea139a6069db054ac09.zip |
Refactor bloom filter logic not to be digest-specific.
Now the address-set code and the digest-set code share the same
backend.
Closes ticket 26510
Diffstat (limited to 'src/common')
-rw-r--r-- | src/common/address_set.c | 85 | ||||
-rw-r--r-- | src/common/address_set.h | 12 |
2 files changed, 19 insertions, 78 deletions
diff --git a/src/common/address_set.c b/src/common/address_set.c index f4c5f581c8..369d33e9a6 100644 --- a/src/common/address_set.c +++ b/src/common/address_set.c @@ -14,35 +14,19 @@ #include "common/address_set.h" #include "common/address.h" #include "common/compat.h" -#include "lib/container/bitarray.h" +#include "lib/container/bloomfilt.h" #include "lib/crypt_ops/crypto_rand.h" #include "common/util.h" #include "siphash.h" -/** How many 64-bit siphash values to extract per address */ -#define N_HASHES 2 -/** How many bloom-filter bits we set per address. This is twice the N_HASHES - * value, since we split the siphash output into two 32-bit values. */ -#define N_BITS_PER_ITEM (N_HASHES * 2) - -/* XXXX This code is largely duplicated with digestset_t. We should merge - * them together into a common bloom-filter implementation. I'm keeping - * them separate for now, though, since this module needs to be backported - * all the way to 0.2.9. - * - * The main difference between digestset_t and this code is that we use - * independent siphashes rather than messing around with bit-shifts. The - * approach here is probably more sound, and we should prefer it if&when we - * unify the implementations. - */ - -struct address_set_t { - /** siphash keys to make N_HASHES independent hashes for each address. */ - struct sipkey key[N_HASHES]; - int mask; /**< One less than the number of bits in <b>ba</b>; always one less - * than a power of two. */ - bitarray_t *ba; /**< A bit array to implement the Bloom filter. */ -}; +/* Wrap our hash function to have the signature that the bloom filter + * needs. */ +static uint64_t +bloomfilt_addr_hash(const struct sipkey *key, + const void *item) +{ + return tor_addr_keyed_hash(key, item); +} /** * Allocate and return an address_set, suitable for holding up to @@ -51,33 +35,11 @@ struct address_set_t { address_set_t * address_set_new(int max_addresses_guess) { - /* See digestset_new() for rationale on this equation. */ - int n_bits = 1u << (tor_log2(max_addresses_guess)+5); - - address_set_t *set = tor_malloc_zero(sizeof(address_set_t)); - set->mask = n_bits - 1; - set->ba = bitarray_init_zero(n_bits); - crypto_rand((char*) set->key, sizeof(set->key)); - - return set; -} - -/** - * Release all storage associated with <b>set</b>. - */ -void -address_set_free(address_set_t *set) -{ - if (! set) - return; - - bitarray_free(set->ba); - tor_free(set); + uint8_t k[BLOOMFILT_KEY_LEN]; + crypto_rand((void*)k, sizeof(k)); + return bloomfilt_new(max_addresses_guess, bloomfilt_addr_hash, k); } -/** Yield the bit index corresponding to 'val' for set. */ -#define BIT(set, val) ((val) & (set)->mask) - /** * Add <b>addr</b> to <b>set</b>. * @@ -87,14 +49,7 @@ address_set_free(address_set_t *set) void address_set_add(address_set_t *set, const struct tor_addr_t *addr) { - int i; - for (i = 0; i < N_HASHES; ++i) { - uint64_t h = tor_addr_keyed_hash(&set->key[i], addr); - uint32_t high_bits = (uint32_t)(h >> 32); - uint32_t low_bits = (uint32_t)(h); - bitarray_set(set->ba, BIT(set, high_bits)); - bitarray_set(set->ba, BIT(set, low_bits)); - } + bloomfilt_add(set, addr); } /** As address_set_add(), but take an ipv4 address in host order. */ @@ -111,18 +66,8 @@ address_set_add_ipv4h(address_set_t *set, uint32_t addr) * return false if <b>addr</b> is not a member of set.) */ int -address_set_probably_contains(address_set_t *set, +address_set_probably_contains(const address_set_t *set, const struct tor_addr_t *addr) { - int i, matches = 0; - for (i = 0; i < N_HASHES; ++i) { - uint64_t h = tor_addr_keyed_hash(&set->key[i], addr); - uint32_t high_bits = (uint32_t)(h >> 32); - uint32_t low_bits = (uint32_t)(h); - // Note that !! is necessary here, since bitarray_is_set does not - // necessarily return 1 on true. - matches += !! bitarray_is_set(set->ba, BIT(set, high_bits)); - matches += !! bitarray_is_set(set->ba, BIT(set, low_bits)); - } - return matches == N_BITS_PER_ITEM; + return bloomfilt_probably_contains(set, addr); } diff --git a/src/common/address_set.h b/src/common/address_set.h index cfee89cfb8..2efa1cb03b 100644 --- a/src/common/address_set.h +++ b/src/common/address_set.h @@ -4,10 +4,6 @@ /** * \file address_set.h * \brief Types to handle sets of addresses. - * - * This module was first written on a semi-emergency basis to improve the - * robustness of the anti-DoS module. As such, it's written in a pretty - * conservative way, and should be susceptible to improvement later on. **/ #ifndef TOR_ADDRESS_SET_H @@ -15,21 +11,21 @@ #include "orconfig.h" #include "lib/cc/torint.h" +#include "lib/container/bloomfilt.h" /** * An address_set_t represents a set of tor_addr_t values. The implementation * is probabilistic: false negatives cannot occur but false positives are * possible. */ -typedef struct address_set_t address_set_t; +typedef struct bloomfilt_t address_set_t; struct tor_addr_t; address_set_t *address_set_new(int max_addresses_guess); -void address_set_free(address_set_t *set); +#define address_set_free(set) bloomfilt_free(set) void address_set_add(address_set_t *set, const struct tor_addr_t *addr); void address_set_add_ipv4h(address_set_t *set, uint32_t addr); -int address_set_probably_contains(address_set_t *set, +int address_set_probably_contains(const address_set_t *set, const struct tor_addr_t *addr); #endif - |