diff options
author | Nick Mathewson <nickm@torproject.org> | 2018-02-07 09:49:35 -0500 |
---|---|---|
committer | David Goulet <dgoulet@torproject.org> | 2018-02-08 14:38:11 -0500 |
commit | 46bd2aed915f17d520f9ff237262d1510fe25e12 (patch) | |
tree | 8a20c6425bee4fb1216b9fe639c347de29e7eb57 /src/common/address_set.c | |
parent | a2aaf9509ba578f4e7705b506ee9a0f764d24ff2 (diff) | |
download | tor-46bd2aed915f17d520f9ff237262d1510fe25e12.tar.gz tor-46bd2aed915f17d520f9ff237262d1510fe25e12.zip |
Add an address-set backend using a bloom filter.
We're going to need this to make our anti-DoS code (see 24902) more
robust.
Diffstat (limited to 'src/common/address_set.c')
-rw-r--r-- | src/common/address_set.c | 120 |
1 files changed, 120 insertions, 0 deletions
diff --git a/src/common/address_set.c b/src/common/address_set.c new file mode 100644 index 0000000000..df7022174c --- /dev/null +++ b/src/common/address_set.c @@ -0,0 +1,120 @@ +/* Copyright (c) 2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file address_set.c + * \brief Implementation for a set of addresses. + * + * This module was first written on a semi-emergency basis to improve the + * robustness of the anti-DoS module. As such, it's written in a pretty + * conservative way, and should be susceptible to improvement later on. + **/ + +#include "orconfig.h" +#include "address_set.h" +#include "address.h" +#include "compat.h" +#include "container.h" +#include "crypto.h" +#include "util.h" +#include "siphash.h" + +/** How many 64-bit siphash values to extract per address */ +#define N_HASHES 2 +/** How many bloom-filter bits we set per address. This is twice the N_HASHES + * value, since we split the siphash outcome two 32-bit values. */ +#define N_BITS_PER_ITEM (N_HASHES * 2) + +/* XXXX This code is largely duplicated with digestset_t. We should merge + * them together into a common bloom-filter implementation. I'm keeping + * them separate for now, though, since this module needs to be backported + * all the way to 0.2.9. + * + * The main difference between digestset_t and this code is that we use + * independent siphashes rather than messing around with bit-shifts. The + * approach here is probably more sound, and we should prefer it if&when we + * unify the implementations. + **/ + +struct address_set_t { + /** siphash keys to make N_HASHES independent hashes for each address. */ + struct sipkey key[N_HASHES]; + int mask; /**< One less than the number of bits in <b>ba</b>; always one less + * than a power of two. */ + bitarray_t *ba; /**< A bit array to implement the Bloom filter. */ +}; + +/** + * Allocate and return an address_set, suitable for holding up to + * <b>max_address_guess</b> distinct values. + */ +address_set_t * +address_set_new(int max_addresses_guess) +{ + /* See digestset_new() for rationale on this equation. */ + int n_bits = 1u << (tor_log2(max_addresses_guess)+5); + + address_set_t *set = tor_malloc_zero(sizeof(address_set_t)); + set->mask = n_bits - 1; + set->ba = bitarray_init_zero(n_bits); + crypto_rand((char*) set->key, sizeof(set->key)); + + return set; +} + +/** + * Release all storage associated with <b>set</b> + */ +void +address_set_free(address_set_t *set) +{ + if (! set) + return; + + bitarray_free(set->ba); + tor_free(set); +} + +/** Yield the bit index corresponding to 'val' for set. */ +#define BIT(set, val) ((val) & (set)->mask) + +/** + * Add <b>addr</b> to <b>set</b>. + * + * All future queries for <b>addr</b> in set will return true. Removing + * items is not possible. + */ +void +address_set_add(address_set_t *set, const struct tor_addr_t *addr) +{ + int i; + for (i = 0; i < N_HASHES; ++i) { + uint64_t h = tor_addr_keyed_hash(&set->key[i], addr); + uint32_t high_bits = (uint32_t)(h >> 32); + uint32_t low_bits = (uint32_t)(h); + bitarray_set(set->ba, BIT(set, high_bits)); + bitarray_set(set->ba, BIT(set, low_bits)); + } +} + +/** + * Return true if <b>addr</b> if a member of <b>set</b>. (And probably, + * return false if <b>addr</b> is not a member of set.) + */ +int +address_set_probably_contains(address_set_t *set, + const struct tor_addr_t *addr) +{ + int i, matches = 0; + for (i = 0; i < N_HASHES; ++i) { + uint64_t h = tor_addr_keyed_hash(&set->key[i], addr); + uint32_t high_bits = (uint32_t)(h >> 32); + uint32_t low_bits = (uint32_t)(h); + // Note that !! is necessary here, since bitarray_is_set does not + // necessarily return 1 on true. + matches += !! bitarray_is_set(set->ba, BIT(set, high_bits)); + matches += !! bitarray_is_set(set->ba, BIT(set, low_bits)); + } + return matches == N_BITS_PER_ITEM; +} + |