diff options
author | David Goulet <dgoulet@torproject.org> | 2018-02-08 16:56:21 -0500 |
---|---|---|
committer | David Goulet <dgoulet@torproject.org> | 2018-02-08 16:56:21 -0500 |
commit | 112638921b479f09bb473952c1870b27a0867971 (patch) | |
tree | 3d457ec6311ddbbc16f0c55cf5f68b9a78eeac53 | |
parent | 22a5d3dd2ab793336e911d3aceb8cacd278e0f48 (diff) | |
parent | a445327b80478c72093d8f1b0e205a279318f651 (diff) | |
download | tor-112638921b479f09bb473952c1870b27a0867971.tar.gz tor-112638921b479f09bb473952c1870b27a0867971.zip |
Merge branch 'ticket25183_029_01' into ticket24902_029_05
-rw-r--r-- | src/common/address.c | 22 | ||||
-rw-r--r-- | src/common/address.h | 2 | ||||
-rw-r--r-- | src/common/address_set.c | 129 | ||||
-rw-r--r-- | src/common/address_set.h | 35 | ||||
-rw-r--r-- | src/common/include.am | 2 | ||||
-rw-r--r-- | src/or/nodelist.c | 78 | ||||
-rw-r--r-- | src/or/nodelist.h | 3 | ||||
-rw-r--r-- | src/test/include.am | 1 | ||||
-rw-r--r-- | src/test/test.c | 1 | ||||
-rw-r--r-- | src/test/test.h | 1 | ||||
-rw-r--r-- | src/test/test_address_set.c | 174 |
11 files changed, 448 insertions, 0 deletions
diff --git a/src/common/address.c b/src/common/address.c index 773e688554..1bd52d24b6 100644 --- a/src/common/address.c +++ b/src/common/address.c @@ -1200,6 +1200,28 @@ tor_addr_hash(const tor_addr_t *addr) } } +/** As tor_addr_hash, but use a particular siphash key. */ +uint64_t +tor_addr_keyed_hash(const struct sipkey *key, const tor_addr_t *addr) +{ + /* This is duplicate code with tor_addr_hash, since this function needs to + * be backportable all the way to 0.2.9. */ + + switch (tor_addr_family(addr)) { + case AF_INET: + return siphash24(&addr->addr.in_addr.s_addr, 4, key); + case AF_UNSPEC: + return 0x4e4d5342; + case AF_INET6: + return siphash24(&addr->addr.in6_addr.s6_addr, 16, key); + default: + /* LCOV_EXCL_START */ + tor_fragile_assert(); + return 0; + /* LCOV_EXCL_END */ + } +} + /** Return a newly allocated string with a representation of <b>addr</b>. */ char * tor_addr_to_str_dup(const tor_addr_t *addr) diff --git a/src/common/address.h b/src/common/address.h index 51db42c315..d57abd0d9e 100644 --- a/src/common/address.h +++ b/src/common/address.h @@ -228,6 +228,8 @@ int tor_addr_compare_masked(const tor_addr_t *addr1, const tor_addr_t *addr2, #define tor_addr_eq(a,b) (0==tor_addr_compare((a),(b),CMP_EXACT)) uint64_t tor_addr_hash(const tor_addr_t *addr); +struct sipkey; +uint64_t tor_addr_keyed_hash(const struct sipkey *key, const tor_addr_t *addr); int tor_addr_is_v4(const tor_addr_t *addr); int tor_addr_is_internal_(const tor_addr_t *ip, int for_listening, const char *filename, int lineno); diff --git a/src/common/address_set.c b/src/common/address_set.c new file mode 100644 index 0000000000..6fa942b0dc --- /dev/null +++ b/src/common/address_set.c @@ -0,0 +1,129 @@ +/* Copyright (c) 2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file address_set.c + * \brief Implementation for a set of addresses. + * + * This module was first written on a semi-emergency basis to improve the + * robustness of the anti-DoS module. As such, it's written in a pretty + * conservative way, and should be susceptible to improvement later on. + **/ + +#include "orconfig.h" +#include "address_set.h" +#include "address.h" +#include "compat.h" +#include "container.h" +#include "crypto.h" +#include "util.h" +#include "siphash.h" + +/** How many 64-bit siphash values to extract per address */ +#define N_HASHES 2 +/** How many bloom-filter bits we set per address. This is twice the N_HASHES + * value, since we split the siphash outcome two 32-bit values. */ +#define N_BITS_PER_ITEM (N_HASHES * 2) + +/* XXXX This code is largely duplicated with digestset_t. We should merge + * them together into a common bloom-filter implementation. I'm keeping + * them separate for now, though, since this module needs to be backported + * all the way to 0.2.9. + * + * The main difference between digestset_t and this code is that we use + * independent siphashes rather than messing around with bit-shifts. The + * approach here is probably more sound, and we should prefer it if&when we + * unify the implementations. + **/ + +struct address_set_t { + /** siphash keys to make N_HASHES independent hashes for each address. */ + struct sipkey key[N_HASHES]; + int mask; /**< One less than the number of bits in <b>ba</b>; always one less + * than a power of two. */ + bitarray_t *ba; /**< A bit array to implement the Bloom filter. */ +}; + +/** + * Allocate and return an address_set, suitable for holding up to + * <b>max_address_guess</b> distinct values. + */ +address_set_t * +address_set_new(int max_addresses_guess) +{ + /* See digestset_new() for rationale on this equation. */ + int n_bits = 1u << (tor_log2(max_addresses_guess)+5); + + address_set_t *set = tor_malloc_zero(sizeof(address_set_t)); + set->mask = n_bits - 1; + set->ba = bitarray_init_zero(n_bits); + crypto_rand((char*) set->key, sizeof(set->key)); + + return set; +} + +/** + * Release all storage associated with <b>set</b> + */ +void +address_set_free(address_set_t *set) +{ + if (! set) + return; + + bitarray_free(set->ba); + tor_free(set); +} + +/** Yield the bit index corresponding to 'val' for set. */ +#define BIT(set, val) ((val) & (set)->mask) + +/** + * Add <b>addr</b> to <b>set</b>. + * + * All future queries for <b>addr</b> in set will return true. Removing + * items is not possible. + */ +void +address_set_add(address_set_t *set, const struct tor_addr_t *addr) +{ + int i; + for (i = 0; i < N_HASHES; ++i) { + uint64_t h = tor_addr_keyed_hash(&set->key[i], addr); + uint32_t high_bits = (uint32_t)(h >> 32); + uint32_t low_bits = (uint32_t)(h); + bitarray_set(set->ba, BIT(set, high_bits)); + bitarray_set(set->ba, BIT(set, low_bits)); + } +} + +/** As address_set_add(), but take an ipv4 address in host order. */ +void +address_set_add_ipv4h(address_set_t *set, uint32_t addr) +{ + tor_addr_t a; + tor_addr_from_ipv4h(&a, addr); + address_set_add(set, &a); +} + +/** + * Return true if <b>addr</b> if a member of <b>set</b>. (And probably, + * return false if <b>addr</b> is not a member of set.) + */ +int +address_set_probably_contains(address_set_t *set, + const struct tor_addr_t *addr) +{ + int i, matches = 0; + for (i = 0; i < N_HASHES; ++i) { + uint64_t h = tor_addr_keyed_hash(&set->key[i], addr); + uint32_t high_bits = (uint32_t)(h >> 32); + uint32_t low_bits = (uint32_t)(h); + // Note that !! is necessary here, since bitarray_is_set does not + // necessarily return 1 on true. + matches += !! bitarray_is_set(set->ba, BIT(set, high_bits)); + matches += !! bitarray_is_set(set->ba, BIT(set, low_bits)); + } + return matches == N_BITS_PER_ITEM; +} + diff --git a/src/common/address_set.h b/src/common/address_set.h new file mode 100644 index 0000000000..aedf17fc66 --- /dev/null +++ b/src/common/address_set.h @@ -0,0 +1,35 @@ +/* Copyright (c) 2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file addressset.h + * \brief Types to handle sets of addresses. + * + * This module was first written on a semi-emergency basis to improve the + * robustness of the anti-DoS module. As such, it's written in a pretty + * conservative way, and should be susceptible to improvement later on. + **/ + +#ifndef TOR_ADDRESS_SET_H +#define TOR_ADDRESS_SET_H + +#include "orconfig.h" +#include "torint.h" + +/** + * An address_set_t represents a set of tor_addr_t values. The implementation + * is probabilistic: false negatives cannot occur but false positives are + * possible. + */ +typedef struct address_set_t address_set_t; +struct tor_addr_t; + +address_set_t *address_set_new(int max_addresses_guess); +void address_set_free(address_set_t *set); +void address_set_add(address_set_t *set, const struct tor_addr_t *addr); +void address_set_add_ipv4h(address_set_t *set, uint32_t addr); +int address_set_probably_contains(address_set_t *set, + const struct tor_addr_t *addr); + +#endif + diff --git a/src/common/include.am b/src/common/include.am index 40c463c9d9..cb307e9d5f 100644 --- a/src/common/include.am +++ b/src/common/include.am @@ -80,6 +80,7 @@ src_common_libor_ctime_testing_a_CFLAGS = @CFLAGS_CONSTTIME@ $(TEST_CFLAGS) LIBOR_A_SRC = \ src/common/address.c \ + src/common/address_set.c \ src/common/backtrace.c \ src/common/compat.c \ src/common/compat_threads.c \ @@ -135,6 +136,7 @@ src_common_libor_event_testing_a_CFLAGS = $(AM_CFLAGS) $(TEST_CFLAGS) COMMONHEADERS = \ src/common/address.h \ + src/common/address_set.h \ src/common/backtrace.h \ src/common/aes.h \ src/common/ciphers.inc \ diff --git a/src/or/nodelist.c b/src/or/nodelist.c index 0e9a651818..5a02648c5c 100644 --- a/src/or/nodelist.c +++ b/src/or/nodelist.c @@ -14,6 +14,7 @@ #include "or.h" #include "address.h" +#include "address_set.h" #include "config.h" #include "control.h" #include "dirserv.h" @@ -52,6 +53,7 @@ static void count_usable_descriptors(int *num_present, static void update_router_have_minimum_dir_info(void); static double get_frac_paths_needed_for_circs(const or_options_t *options, const networkstatus_t *ns); +static void node_add_to_address_set(const node_t *node); /** A nodelist_t holds a node_t object for every router we're "willing to use * for something". Specifically, it should hold a node_t for every node that @@ -63,6 +65,8 @@ typedef struct nodelist_t { /* Hash table to map from node ID digest to node. */ HT_HEAD(nodelist_map, node_t) nodes_by_id; + /* Set of addresses that belong to nodes we believe in. */ + address_set_t *node_addrs; } nodelist_t; static inline unsigned int @@ -150,6 +154,50 @@ node_addrs_changed(node_t *node) node->country = -1; } +/** Add all address information about <b>node</b> to the current address + * set (if there is one). + */ +static void +node_add_to_address_set(const node_t *node) +{ + if (!the_nodelist || !the_nodelist->node_addrs) + return; + + /* These various address sources can be redundant, but it's likely faster + * to add them all than to compare them all for equality. */ + + if (node->rs) { + if (node->rs->addr) + address_set_add_ipv4h(the_nodelist->node_addrs, node->rs->addr); + if (!tor_addr_is_null(&node->rs->ipv6_addr)) + address_set_add(the_nodelist->node_addrs, &node->rs->ipv6_addr); + } + if (node->ri) { + if (node->ri->addr) + address_set_add_ipv4h(the_nodelist->node_addrs, node->ri->addr); + if (!tor_addr_is_null(&node->ri->ipv6_addr)) + address_set_add(the_nodelist->node_addrs, &node->ri->ipv6_addr); + } + if (node->md) { + if (!tor_addr_is_null(&node->md->ipv6_addr)) + address_set_add(the_nodelist->node_addrs, &node->md->ipv6_addr); + } +} + +/** Return true if <b>addr</b> is the address of some node in the nodelist. + * If not, probably return false. */ +int +nodelist_probably_contains_address(const tor_addr_t *addr) +{ + if (BUG(!addr)) + return 0; + + if (!the_nodelist || !the_nodelist->node_addrs) + return 0; + + return address_set_probably_contains(the_nodelist->node_addrs, addr); +} + /** Add <b>ri</b> to an appropriate node in the nodelist. If we replace an * old routerinfo, and <b>ri_old_out</b> is not NULL, set *<b>ri_old_out</b> * to the previous routerinfo. @@ -188,6 +236,8 @@ nodelist_set_routerinfo(routerinfo_t *ri, routerinfo_t **ri_old_out) dirserv_set_node_flags_from_authoritative_status(node, status); } + node_add_to_address_set(node); + return node; } @@ -219,9 +269,23 @@ nodelist_add_microdesc(microdesc_t *md) node->md = md; md->held_by_nodes++; } + + node_add_to_address_set(node); + return node; } +/* Default value. */ +#define ESTIMATED_ADDRESS_PER_NODE 2 + +/* Return the estimated number of address per node_t. This is used for the + * size of the bloom filter in the nodelist (node_addrs). */ +MOCK_IMPL(int, +get_estimated_address_per_node, (void)) +{ + return ESTIMATED_ADDRESS_PER_NODE; +} + /** Tell the nodelist that the current usable consensus is <b>ns</b>. * This makes the nodelist change all of the routerstatus entries for * the nodes, drop nodes that no longer have enough info to get used, @@ -240,6 +304,12 @@ nodelist_set_consensus(networkstatus_t *ns) SMARTLIST_FOREACH(the_nodelist->nodes, node_t *, node, node->rs = NULL); + /* Conservatively estimate that every node will have 2 addresses. */ + const int estimated_addresses = smartlist_len(ns->routerstatus_list) * + get_estimated_address_per_node(); + address_set_free(the_nodelist->node_addrs); + the_nodelist->node_addrs = address_set_new(estimated_addresses); + SMARTLIST_FOREACH_BEGIN(ns->routerstatus_list, routerstatus_t *, rs) { node_t *node = node_get_or_create(rs->identity_digest); node->rs = rs; @@ -278,6 +348,11 @@ nodelist_set_consensus(networkstatus_t *ns) nodelist_purge(); + /* Now add all the nodes we have to the address set. */ + SMARTLIST_FOREACH_BEGIN(the_nodelist->nodes, node_t *, node) { + node_add_to_address_set(node); + } SMARTLIST_FOREACH_END(node); + if (! authdir) { SMARTLIST_FOREACH_BEGIN(the_nodelist->nodes, node_t *, node) { /* We have no routerstatus for this router. Clear flags so we can skip @@ -430,6 +505,9 @@ nodelist_free_all(void) smartlist_free(the_nodelist->nodes); + address_set_free(the_nodelist->node_addrs); + the_nodelist->node_addrs = NULL; + tor_free(the_nodelist); } diff --git a/src/or/nodelist.h b/src/or/nodelist.h index 71a91e107f..098f1d1555 100644 --- a/src/or/nodelist.h +++ b/src/or/nodelist.h @@ -22,6 +22,7 @@ const node_t *node_get_by_hex_id(const char *identity_digest); node_t *nodelist_set_routerinfo(routerinfo_t *ri, routerinfo_t **ri_old_out); node_t *nodelist_add_microdesc(microdesc_t *md); void nodelist_set_consensus(networkstatus_t *ns); +int nodelist_probably_contains_address(const tor_addr_t *addr); void nodelist_remove_microdesc(const char *identity_digest, microdesc_t *md); void nodelist_remove_routerinfo(routerinfo_t *ri); @@ -124,5 +125,7 @@ void router_dir_info_changed(void); const char *get_dir_info_status_string(void); int count_loading_descriptors_progress(void); +MOCK_DECL(int, get_estimated_address_per_node, (void)); + #endif diff --git a/src/test/include.am b/src/test/include.am index 91b0a59101..0ee3d1169f 100644 --- a/src/test/include.am +++ b/src/test/include.am @@ -72,6 +72,7 @@ src_test_test_SOURCES = \ src/test/test_accounting.c \ src/test/test_addr.c \ src/test/test_address.c \ + src/test/test_address_set.c \ src/test/test_buffers.c \ src/test/test_cell_formats.c \ src/test/test_cell_queue.c \ diff --git a/src/test/test.c b/src/test/test.c index 70258addfd..0fef697909 100644 --- a/src/test/test.c +++ b/src/test/test.c @@ -1200,6 +1200,7 @@ struct testgroup_t testgroups[] = { { "accounting/", accounting_tests }, { "addr/", addr_tests }, { "address/", address_tests }, + { "address_set/", address_set_tests }, { "buffer/", buffer_tests }, { "cellfmt/", cell_format_tests }, { "cellqueue/", cell_queue_tests }, diff --git a/src/test/test.h b/src/test/test.h index 41df6b1340..028082386e 100644 --- a/src/test/test.h +++ b/src/test/test.h @@ -175,6 +175,7 @@ extern const struct testcase_setup_t ed25519_test_setup; extern struct testcase_t accounting_tests[]; extern struct testcase_t addr_tests[]; extern struct testcase_t address_tests[]; +extern struct testcase_t address_set_tests[]; extern struct testcase_t buffer_tests[]; extern struct testcase_t cell_format_tests[]; extern struct testcase_t cell_queue_tests[]; diff --git a/src/test/test_address_set.c b/src/test/test_address_set.c new file mode 100644 index 0000000000..df022f539a --- /dev/null +++ b/src/test/test_address_set.c @@ -0,0 +1,174 @@ +/* Copyright (c) 2017, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#include "or.h" +#include "address_set.h" +#include "microdesc.h" +#include "networkstatus.h" +#include "nodelist.h" +#include "routerlist.h" +#include "torcert.h" + +#include "test.h" + +static networkstatus_t *dummy_ns = NULL; +static networkstatus_t * +mock_networkstatus_get_latest_consensus(void) +{ + return dummy_ns; +} + +static networkstatus_t * +mock_networkstatus_get_latest_consensus_by_flavor(consensus_flavor_t f) +{ + tor_assert(f == FLAV_MICRODESC); + return dummy_ns; +} + +/* Number of address a single node_t can have. Default to the production + * value. This is to control the size of the bloom filter. */ +static int addr_per_node = 2; +static int +mock_get_estimated_address_per_node(void) +{ + return addr_per_node; +} + +static void +test_contains(void *arg) +{ + int ret; + address_set_t *set = NULL; + + (void) arg; + + /* Setup an IPv4 and IPv6 addresses. */ + tor_addr_t addr_v6; + tor_addr_parse(&addr_v6, "1:2:3:4::"); + tor_addr_t addr_v4; + tor_addr_parse(&addr_v4, "42.42.42.42"); + uint32_t ipv4h = tor_addr_to_ipv4h(&addr_v4); + + /* Make it very big so the chance of failing the contain test will be + * extremely rare. */ + set = address_set_new(1024); + tt_assert(set); + + /* Add and lookup IPv6. */ + address_set_add(set, &addr_v6); + ret = address_set_probably_contains(set, &addr_v6); + tt_int_op(ret, OP_EQ, 1); + + /* Add and lookup IPv4. */ + address_set_add_ipv4h(set, ipv4h); + ret = address_set_probably_contains(set, &addr_v4); + tt_int_op(ret, OP_EQ, 1); + + /* Try a lookup of rubbish. */ + tor_addr_t dummy_addr; + memset(&dummy_addr, 'A', sizeof(dummy_addr)); + dummy_addr.family = AF_INET; + ret = address_set_probably_contains(set, &dummy_addr); + tt_int_op(ret, OP_EQ, 0); + dummy_addr.family = AF_INET6; + ret = address_set_probably_contains(set, &dummy_addr); + tt_int_op(ret, OP_EQ, 0); + + done: + address_set_free(set); +} + +static void +test_nodelist(void *arg) +{ + int ret; + routerstatus_t *rs = NULL; microdesc_t *md = NULL; routerinfo_t *ri = NULL; + + (void) arg; + + MOCK(networkstatus_get_latest_consensus, + mock_networkstatus_get_latest_consensus); + MOCK(networkstatus_get_latest_consensus_by_flavor, + mock_networkstatus_get_latest_consensus_by_flavor); + MOCK(get_estimated_address_per_node, + mock_get_estimated_address_per_node); + + dummy_ns = tor_malloc_zero(sizeof(*dummy_ns)); + dummy_ns->flavor = FLAV_MICRODESC; + dummy_ns->routerstatus_list = smartlist_new(); + + tor_addr_t addr_v4, addr_v6, dummy_addr; + tor_addr_parse(&addr_v4, "42.42.42.42"); + uint32_t ipv4h = tor_addr_to_ipv4h(&addr_v4); + tor_addr_parse(&addr_v6, "1:2:3:4::"); + memset(&dummy_addr, 'A', sizeof(dummy_addr)); + + /* This will make the nodelist bloom filter very large + * (the_nodelist->node_addrs) so we will fail the contain test rarely. */ + addr_per_node = 1024; + + /* No node no nothing. The lookups should be empty. */ + nodelist_set_consensus(dummy_ns); + + /* The address set should be empty. */ + ret = nodelist_probably_contains_address(&addr_v4); + tt_int_op(ret, OP_EQ, 0); + ret = nodelist_probably_contains_address(&addr_v6); + tt_int_op(ret, OP_EQ, 0); + dummy_addr.family = AF_INET; + ret = nodelist_probably_contains_address(&dummy_addr); + tt_int_op(ret, OP_EQ, 0); + dummy_addr.family = AF_INET6; + ret = nodelist_probably_contains_address(&dummy_addr); + tt_int_op(ret, OP_EQ, 0); + + md = tor_malloc_zero(sizeof(*md)); + ri = tor_malloc_zero(sizeof(*ri)); + rs = tor_malloc_zero(sizeof(*rs)); + crypto_rand(rs->identity_digest, sizeof(rs->identity_digest)); + crypto_rand(md->digest, sizeof(md->digest)); + memcpy(rs->descriptor_digest, md->digest, DIGEST256_LEN); + + /* Setup the rs, ri and md addresses. */ + rs->addr = ipv4h; + tor_addr_parse(&rs->ipv6_addr, "1:2:3:4::"); + ri->addr = ipv4h; + tor_addr_parse(&ri->ipv6_addr, "1:2:3:4::"); + tor_addr_parse(&md->ipv6_addr, "1:2:3:4::"); + + /* Add the rs to the consensus becoming a node_t. */ + smartlist_add(dummy_ns->routerstatus_list, rs); + nodelist_set_consensus(dummy_ns); + + /* At this point, the address set should be initialized in the nodelist and + * we should be able to lookup. */ + ret = nodelist_probably_contains_address(&addr_v4); + tt_int_op(ret, OP_EQ, 1); + ret = nodelist_probably_contains_address(&addr_v6); + tt_int_op(ret, OP_EQ, 1); + /* Lookup unknown address. */ + dummy_addr.family = AF_INET; + ret = nodelist_probably_contains_address(&dummy_addr); + tt_int_op(ret, OP_EQ, 0); + dummy_addr.family = AF_INET6; + ret = nodelist_probably_contains_address(&dummy_addr); + tt_int_op(ret, OP_EQ, 0); + + done: + routerstatus_free(rs); routerinfo_free(ri); microdesc_free(md); + smartlist_clear(dummy_ns->routerstatus_list); + networkstatus_vote_free(dummy_ns); + UNMOCK(networkstatus_get_latest_consensus); + UNMOCK(networkstatus_get_latest_consensus_by_flavor); + UNMOCK(get_estimated_address_per_node); +} + +struct testcase_t address_set_tests[] = { + { "contains", test_contains, TT_FORK, + NULL, NULL }, + { "nodelist", test_nodelist, TT_FORK, + NULL, NULL }, + + END_OF_TESTCASES +}; + |