diff options
author | Nick Mathewson <nickm@torproject.org> | 2018-06-26 13:18:23 -0400 |
---|---|---|
committer | Nick Mathewson <nickm@torproject.org> | 2018-06-26 13:27:23 -0400 |
commit | bf89278c799e718a3b6c1ea139a6069db054ac09 (patch) | |
tree | addcdf08f01a3aff7645ea42606d2cc7ed5e1a25 /src/lib | |
parent | 860b9a991879c5be2b32cf98766adf5fdd349d41 (diff) | |
download | tor-bf89278c799e718a3b6c1ea139a6069db054ac09.tar.gz tor-bf89278c799e718a3b6c1ea139a6069db054ac09.zip |
Refactor bloom filter logic not to be digest-specific.
Now the address-set code and the digest-set code share the same
backend.
Closes ticket 26510
Diffstat (limited to 'src/lib')
-rw-r--r-- | src/lib/container/bloomfilt.c | 79 | ||||
-rw-r--r-- | src/lib/container/bloomfilt.h | 71 | ||||
-rw-r--r-- | src/lib/crypt_ops/digestset.c | 58 | ||||
-rw-r--r-- | src/lib/crypt_ops/digestset.h | 32 | ||||
-rw-r--r-- | src/lib/crypt_ops/include.am | 6 |
5 files changed, 191 insertions, 55 deletions
diff --git a/src/lib/container/bloomfilt.c b/src/lib/container/bloomfilt.c index 2133c280a5..1cab817e18 100644 --- a/src/lib/container/bloomfilt.c +++ b/src/lib/container/bloomfilt.c @@ -9,14 +9,75 @@ **/ #include <stdlib.h> + #include "lib/malloc/util_malloc.h" #include "lib/container/bloomfilt.h" #include "lib/intmath/bits.h" +#include "lib/log/util_bug.h" +#include "siphash.h" + +/** How many bloom-filter bits we set per address. This is twice the + * BLOOMFILT_N_HASHES value, since we split the siphash output into two 32-bit + * values. */ +#define N_BITS_PER_ITEM (BLOOMFILT_N_HASHES * 2) + +struct bloomfilt_t { + /** siphash keys to make BLOOMFILT_N_HASHES independent hashes for each + * items. */ + struct sipkey key[BLOOMFILT_N_HASHES]; + bloomfilt_hash_fn hashfn; /**< Function used to generate hashes */ + uint32_t mask; /**< One less than the number of bits in <b>ba</b>; always + * one less than a power of two. */ + bitarray_t *ba; /**< A bit array to implement the Bloom filter. */ +}; + +#define BIT(set, n) ((n) & (set)->mask) + +/** Add the element <b>item</b> to <b>set</b>. */ +void +bloomfilt_add(bloomfilt_t *set, + const void *item) +{ + int i; + for (i = 0; i < BLOOMFILT_N_HASHES; ++i) { + uint64_t h = set->hashfn(&set->key[i], item); + uint32_t high_bits = (uint32_t)(h >> 32); + uint32_t low_bits = (uint32_t)(h); + bitarray_set(set->ba, BIT(set, high_bits)); + bitarray_set(set->ba, BIT(set, low_bits)); + } +} -/** Return a newly allocated digestset_t, optimized to hold a total of - * <b>max_elements</b> digests with a reasonably low false positive weight. */ -digestset_t * -digestset_new(int max_elements) +/** If <b>item</b> is in <b>set</b>, return nonzero. Otherwise, + * <em>probably</em> return zero. */ +int +bloomfilt_probably_contains(const bloomfilt_t *set, + const void *item) +{ + int i, matches = 0; + for (i = 0; i < BLOOMFILT_N_HASHES; ++i) { + uint64_t h = set->hashfn(&set->key[i], item); + uint32_t high_bits = (uint32_t)(h >> 32); + uint32_t low_bits = (uint32_t)(h); + // Note that !! is necessary here, since bitarray_is_set does not + // necessarily return 1 on true. + matches += !! bitarray_is_set(set->ba, BIT(set, high_bits)); + matches += !! bitarray_is_set(set->ba, BIT(set, low_bits)); + } + return matches == N_BITS_PER_ITEM; +} + +/** Return a newly allocated bloomfilt_t, optimized to hold a total of + * <b>max_elements</b> elements with a reasonably low false positive weight. + * + * Uses the siphash-based function <b>hashfn</b> to compute hard-to-collide + * functions of the items, and the key material <b>random_key</b> to + * key the hash. There must be BLOOMFILT_KEY_LEN bytes in the supplied key. + **/ +bloomfilt_t * +bloomfilt_new(int max_elements, + bloomfilt_hash_fn hashfn, + const uint8_t *random_key) { /* The probability of false positives is about P=(1 - exp(-kn/m))^k, where k * is the number of hash functions per entry, m is the bits in the array, @@ -29,15 +90,21 @@ digestset_new(int max_elements) * conserve CPU, and k==13 is pretty big. */ int n_bits = 1u << (tor_log2(max_elements)+5); - digestset_t *r = tor_malloc(sizeof(digestset_t)); + bloomfilt_t *r = tor_malloc(sizeof(bloomfilt_t)); r->mask = n_bits - 1; r->ba = bitarray_init_zero(n_bits); + + tor_assert(sizeof(r->key) == BLOOMFILT_KEY_LEN); + memcpy(r->key, random_key, sizeof(r->key)); + + r->hashfn = hashfn; + return r; } /** Free all storage held in <b>set</b>. */ void -digestset_free_(digestset_t *set) +bloomfilt_free_(bloomfilt_t *set) { if (!set) return; diff --git a/src/lib/container/bloomfilt.h b/src/lib/container/bloomfilt.h index adcdb10fc3..577acf5e48 100644 --- a/src/lib/container/bloomfilt.h +++ b/src/lib/container/bloomfilt.h @@ -9,50 +9,27 @@ #include "orconfig.h" #include "lib/cc/torint.h" #include "lib/container/bitarray.h" -#include "siphash.h" - -/** A set of digests, implemented as a Bloom filter. */ -typedef struct { - int mask; /**< One less than the number of bits in <b>ba</b>; always one less - * than a power of two. */ - bitarray_t *ba; /**< A bit array to implement the Bloom filter. */ -} digestset_t; - -#define BIT(n) ((n) & set->mask) -/** Add the digest <b>digest</b> to <b>set</b>. */ -static inline void -digestset_add(digestset_t *set, const char *digest) -{ - const uint64_t x = siphash24g(digest, 20); - const uint32_t d1 = (uint32_t) x; - const uint32_t d2 = (uint32_t)( (x>>16) + x); - const uint32_t d3 = (uint32_t)( (x>>32) + x); - const uint32_t d4 = (uint32_t)( (x>>48) + x); - bitarray_set(set->ba, BIT(d1)); - bitarray_set(set->ba, BIT(d2)); - bitarray_set(set->ba, BIT(d3)); - bitarray_set(set->ba, BIT(d4)); -} - -/** If <b>digest</b> is in <b>set</b>, return nonzero. Otherwise, - * <em>probably</em> return zero. */ -static inline int -digestset_contains(const digestset_t *set, const char *digest) -{ - const uint64_t x = siphash24g(digest, 20); - const uint32_t d1 = (uint32_t) x; - const uint32_t d2 = (uint32_t)( (x>>16) + x); - const uint32_t d3 = (uint32_t)( (x>>32) + x); - const uint32_t d4 = (uint32_t)( (x>>48) + x); - return bitarray_is_set(set->ba, BIT(d1)) && - bitarray_is_set(set->ba, BIT(d2)) && - bitarray_is_set(set->ba, BIT(d3)) && - bitarray_is_set(set->ba, BIT(d4)); -} -#undef BIT - -digestset_t *digestset_new(int max_elements); -void digestset_free_(digestset_t* set); -#define digestset_free(set) FREE_AND_NULL(digestset_t, digestset_free_, (set)) - -#endif /* !defined(TOR_CONTAINER_H) */ + +/** A set of elements, implemented as a Bloom filter. */ +typedef struct bloomfilt_t bloomfilt_t; + +/** How many 64-bit siphash values to extract per item. */ +#define BLOOMFILT_N_HASHES 2 + +/** How much key material do we need to randomize hashes? */ +#define BLOOMFILT_KEY_LEN (BLOOMFILT_N_HASHES * 16) + +struct sipkey; +typedef uint64_t (*bloomfilt_hash_fn)(const struct sipkey *key, + const void *item); + +void bloomfilt_add(bloomfilt_t *set, const void *item); +int bloomfilt_probably_contains(const bloomfilt_t *set, const void *item); + +bloomfilt_t *bloomfilt_new(int max_elements, + bloomfilt_hash_fn hashfn, + const uint8_t *random_key); +void bloomfilt_free_(bloomfilt_t* set); +#define bloomfilt_free(set) FREE_AND_NULL(bloomfilt_t, bloomfilt_free_, (set)) + +#endif /* !defined(TOR_BLOOMFILT_H) */ diff --git a/src/lib/crypt_ops/digestset.c b/src/lib/crypt_ops/digestset.c new file mode 100644 index 0000000000..89dd377a9c --- /dev/null +++ b/src/lib/crypt_ops/digestset.c @@ -0,0 +1,58 @@ +/* Copyright (c) 2018-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file digestset.c + * \brief Implementation for a set of digests + **/ + +#include "orconfig.h" +#include "lib/container/bloomfilt.h" +#include "lib/crypt_ops/crypto_rand.h" +#include "lib/defs/digest_sizes.h" +#include "lib/crypt_ops/digestset.h" +#include "siphash.h" + +/* Wrap our hash function to have the signature that the bloom filter + * needs. */ +static uint64_t +bloomfilt_digest_hash(const struct sipkey *key, + const void *item) +{ + return siphash24(item, DIGEST_LEN, key); +} + +/** + * Allocate and return an digestset, suitable for holding up to + * <b>max_guess</b> distinct values. + */ +digestset_t * +digestset_new(int max_guess) +{ + uint8_t k[BLOOMFILT_KEY_LEN]; + crypto_rand((void*)k, sizeof(k)); + return bloomfilt_new(max_guess, bloomfilt_digest_hash, k); +} + +/** + * Add <b>digest</b> to <b>set</b>. + * + * All future queries for <b>digest</b> in set will return true. Removing + * items is not possible. + */ +void +digestset_add(digestset_t *set, const char *digest) +{ + bloomfilt_add(set, digest); +} + +/** + * Return true if <b>digest</b> is a member of <b>set</b>. (And probably, + * return false if <b>digest</b> is not a member of set.) + */ +int +digestset_probably_contains(const digestset_t *set, + const char *digest) +{ + return bloomfilt_probably_contains(set, digest); +} diff --git a/src/lib/crypt_ops/digestset.h b/src/lib/crypt_ops/digestset.h new file mode 100644 index 0000000000..3b5d62c310 --- /dev/null +++ b/src/lib/crypt_ops/digestset.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file digestset.h + * \brief Types to handle sets of digests, based on bloom filters. + **/ + +#ifndef TOR_DIGESTSET_H +#define TOR_DIGESTSET_H + +#include "orconfig.h" +#include "lib/cc/torint.h" +#include "lib/container/bloomfilt.h" + +/** + * An digestset_t represents a set of 20-byte digest values. The + * implementation is probabilistic: false negatives cannot occur but false + * positives are possible. + */ +typedef struct bloomfilt_t digestset_t; + +digestset_t *digestset_new(int max_addresses_guess); +#define digestset_free(set) bloomfilt_free(set) +void digestset_add(digestset_t *set, const char *addr); +int digestset_probably_contains(const digestset_t *set, + const char *addr); + +// XXXX to remove. +#define digestset_contains digestset_probably_contains + +#endif diff --git a/src/lib/crypt_ops/include.am b/src/lib/crypt_ops/include.am index b881c689d8..1b88b880d0 100644 --- a/src/lib/crypt_ops/include.am +++ b/src/lib/crypt_ops/include.am @@ -19,7 +19,8 @@ src_lib_libtor_crypt_ops_a_SOURCES = \ src/lib/crypt_ops/crypto_rand.c \ src/lib/crypt_ops/crypto_rsa.c \ src/lib/crypt_ops/crypto_s2k.c \ - src/lib/crypt_ops/crypto_util.c + src/lib/crypt_ops/crypto_util.c \ + src/lib/crypt_ops/digestset.c src_lib_libtor_crypt_ops_testing_a_SOURCES = \ $(src_lib_libtor_crypt_ops_a_SOURCES) @@ -41,4 +42,5 @@ noinst_HEADERS += \ src/lib/crypt_ops/crypto_rand.h \ src/lib/crypt_ops/crypto_rsa.h \ src/lib/crypt_ops/crypto_s2k.h \ - src/lib/crypt_ops/crypto_util.h + src/lib/crypt_ops/crypto_util.h \ + src/lib/crypt_ops/digestset.h |