diff options
-rw-r--r-- | ChangeLog | 4 | ||||
-rw-r--r-- | src/common/container.c | 20 | ||||
-rw-r--r-- | src/common/container.h | 43 | ||||
-rw-r--r-- | src/or/routerlist.c | 51 | ||||
-rw-r--r-- | src/or/test.c | 103 |
5 files changed, 201 insertions, 20 deletions
@@ -51,6 +51,10 @@ Changes in version 0.2.1.1-alpha - 2008-??-?? - New --hush command-line option similar to --quiet. While --quiet disables all logging to the console on startup, --hush limits the output to messages of warning and error severity. + - Use a Bloom filter rather than a digest-based set to track which + descriptors we need to keep around when we're cleaning out old + router descriptors. This speeds up the computation significantly, and + may reduce fragmentation. o Code simplifications and refactoring: - Refactor code using connection_ap_handshake_attach_circuit() to diff --git a/src/common/container.c b/src/common/container.c index a33a70116e..6d3a8fd5f9 100644 --- a/src/common/container.c +++ b/src/common/container.c @@ -1192,3 +1192,23 @@ IMPLEMENT_ORDER_FUNC(find_nth_double, double) IMPLEMENT_ORDER_FUNC(find_nth_uint32, uint32_t) IMPLEMENT_ORDER_FUNC(find_nth_long, long) +/** Return a newly allocated digestset_t, optimized to hold a total of + * <b>max_elements</b> digests with a reasonably low false positive weight. */ +digestset_t * +digestset_new(int max_elements) +{ + int n_bits = 1u << (tor_log2(max_elements)+5); + digestset_t *r = tor_malloc(sizeof(digestset_t)); + r->mask = n_bits - 1; + r->ba = bitarray_init_zero(n_bits); + return r; +} + +/** Free all storage held in <b>set</b>. */ +void +digestset_free(digestset_t *set) +{ + bitarray_free(set->ba); + tor_free(set); +} + diff --git a/src/common/container.h b/src/common/container.h index 866cb1b8a9..3746c30567 100644 --- a/src/common/container.h +++ b/src/common/container.h @@ -564,6 +564,49 @@ bitarray_is_set(bitarray_t *b, int bit) return b[bit >> BITARRAY_SHIFT] & (1u << (bit & BITARRAY_MASK)); } +/** A set of digests, implemented as a Bloom filter. */ +typedef struct { + int mask; /* One less than the number of bits in <b>ba</b>; always one less + * than a power of two. */ + bitarray_t *ba; /* A bit array to implement the Bloom filter. */ +} digestset_t; + +#define BIT(n) ((n) & set->mask) +/** Add the digest <b>digest</b> to <b>set</b>. */ +static INLINE void +digestset_add(digestset_t *set, const char *digest) +{ + const uint32_t *p = (const uint32_t *)digest; + const uint32_t d1 = p[0] + (p[1]>>16); + const uint32_t d2 = p[1] + (p[2]>>16); + const uint32_t d3 = p[2] + (p[3]>>16); + const uint32_t d4 = p[3] + (p[0]>>16); + bitarray_set(set->ba, BIT(d1)); + bitarray_set(set->ba, BIT(d2)); + bitarray_set(set->ba, BIT(d3)); + bitarray_set(set->ba, BIT(d4)); +} + +/** If <b>digest</b> is in <b>set</b>, return nonzero. Otherwise, + * <em>probably</em> return zero. */ +static INLINE int +digestset_isin(const digestset_t *set, const char *digest) +{ + const uint32_t *p = (const uint32_t *)digest; + const uint32_t d1 = p[0] + (p[1]>>16); + const uint32_t d2 = p[1] + (p[2]>>16); + const uint32_t d3 = p[2] + (p[3]>>16); + const uint32_t d4 = p[3] + (p[0]>>16); + return bitarray_is_set(set->ba, BIT(d1)) && + bitarray_is_set(set->ba, BIT(d2)) && + bitarray_is_set(set->ba, BIT(d3)) && + bitarray_is_set(set->ba, BIT(d4)); +} +#undef BIT + +digestset_t *digestset_new(int max_elements); +void digestset_free(digestset_t* set); + /* These functions, given an <b>array</b> of <b>n_elements</b>, return the * <b>nth</b> lowest element. <b>nth</b>=0 gives the lowest element; * <b>n_elements</b>-1 gives the highest; and (<b>n_elements</b>-1) / 2 gives diff --git a/src/or/routerlist.c b/src/or/routerlist.c index 46248fb67b..60427bb586 100644 --- a/src/or/routerlist.c +++ b/src/or/routerlist.c @@ -2957,7 +2957,7 @@ _compare_duration_idx(const void *_d1, const void *_d2) static void routerlist_remove_old_cached_routers_with_id(time_t now, time_t cutoff, int lo, int hi, - digestmap_t *retain) + digestset_t *retain) { int i, n = hi-lo+1; unsigned n_extra, n_rmv = 0; @@ -2974,10 +2974,9 @@ routerlist_remove_old_cached_routers_with_id(time_t now, tor_assert(!memcmp(ident, r->identity_digest, DIGEST_LEN)); } #endif - /* Check whether we need to do anything at all. */ { - int mdpr = directory_caches_dir_info(get_options()) ? 5 : 2; + int mdpr = directory_caches_dir_info(get_options()) ? 2 : 1; if (n <= mdpr) return; n_extra = n - mdpr; @@ -2993,7 +2992,7 @@ routerlist_remove_old_cached_routers_with_id(time_t now, signed_descriptor_t *r_next; lifespans[i-lo].idx = i; if (r->last_listed_as_valid_until >= now || - (retain && digestmap_get(retain, r->signed_descriptor_digest))) { + (retain && digestset_isin(retain, r->signed_descriptor_digest))) { must_keep[i-lo] = 1; } if (i < hi) { @@ -3049,10 +3048,11 @@ routerlist_remove_old_routers(void) time_t cutoff; routerinfo_t *router; signed_descriptor_t *sd; - digestmap_t *retain; + digestset_t *retain; int caches = directory_caches_dir_info(get_options()); const networkstatus_t *consensus = networkstatus_get_latest_consensus(); const smartlist_t *networkstatus_v2_list = networkstatus_get_v2_list(); + int n_expected_retain = 0; trusted_dirs_remove_old_certs(); @@ -3061,7 +3061,18 @@ routerlist_remove_old_routers(void) // routerlist_assert_ok(routerlist); - retain = digestmap_new(); + n_expected_retain = smartlist_len(consensus->routerstatus_list); + if (caches && + networkstatus_v2_list && smartlist_len(networkstatus_v2_list)) { + SMARTLIST_FOREACH(networkstatus_v2_list, networkstatus_v2_t *, ns, + n_expected_retain += smartlist_len(ns->entries)); + /*XXXX021 too much magic. */ + n_expected_retain /= (smartlist_len(networkstatus_v2_list)/2+1); + } + //log_notice(LD_DIR,"n_expected_retain=%d",n_expected_retain); + + retain = digestset_new(n_expected_retain); + cutoff = now - OLD_ROUTER_DESC_MAX_AGE; /* Build a list of all the descriptors that _anybody_ lists. */ if (caches) { @@ -3077,7 +3088,7 @@ routerlist_remove_old_routers(void) * system will obsolete this whole thing in 0.2.0.x. */ SMARTLIST_FOREACH(ns->entries, routerstatus_t *, rs, if (rs->published_on >= cutoff) - digestmap_set(retain, rs->descriptor_digest, (void*)1)); + digestset_add(retain, rs->descriptor_digest)); }); } @@ -3085,13 +3096,13 @@ routerlist_remove_old_routers(void) if (consensus) { SMARTLIST_FOREACH(consensus->routerstatus_list, routerstatus_t *, rs, if (rs->published_on >= cutoff) - digestmap_set(retain, rs->descriptor_digest, (void*)1)); + digestset_add(retain, rs->descriptor_digest)); } - /* If we have a bunch of networkstatuses, we should consider pruning current - * routers that are too old and that nobody recommends. (If we don't have - * enough networkstatuses, then we should get more before we decide to kill - * routers.) */ + /* If we have nearly as many networkstatuses as we want, we should consider + * pruning current routers that are too old and that nobody recommends. (If + * we don't have enough networkstatuses, then we should get more before we + * decide to kill routers.) */ if (!caches || smartlist_len(networkstatus_v2_list) > get_n_v2_authorities() / 2) { cutoff = now - ROUTER_MAX_AGE; @@ -3100,7 +3111,8 @@ routerlist_remove_old_routers(void) router = smartlist_get(routerlist->routers, i); if (router->cache_info.published_on <= cutoff && router->cache_info.last_listed_as_valid_until < now && - !digestmap_get(retain,router->cache_info.signed_descriptor_digest)) { + !digestset_isin(retain, + router->cache_info.signed_descriptor_digest)) { /* Too old: remove it. (If we're a cache, just move it into * old_routers.) */ log_info(LD_DIR, @@ -3120,7 +3132,7 @@ routerlist_remove_old_routers(void) sd = smartlist_get(routerlist->old_routers, i); if (sd->published_on <= cutoff && sd->last_listed_as_valid_until < now && - !digestmap_get(retain, sd->signed_descriptor_digest)) { + !digestset_isin(retain, sd->signed_descriptor_digest)) { /* Too old. Remove it. */ routerlist_remove_old(routerlist, sd, i--); } @@ -3128,11 +3140,9 @@ routerlist_remove_old_routers(void) //routerlist_assert_ok(routerlist); - log_info(LD_DIR, "We have %d live routers and %d old router descriptors. " - "At most %d must be retained because of networkstatuses.", + log_info(LD_DIR, "We have %d live routers and %d old router descriptors.", smartlist_len(routerlist->routers), - smartlist_len(routerlist->old_routers), - digestmap_size(retain)); + smartlist_len(routerlist->old_routers)); /* Now we might have to look at routerlist->old_routers for extraneous * members. (We'd keep all the members if we could, but we need to save @@ -3141,9 +3151,10 @@ routerlist_remove_old_routers(void) * total number doesn't approach max_descriptors_per_router()*len(router). */ if (smartlist_len(routerlist->old_routers) < - smartlist_len(routerlist->routers) * (caches?4:2)) + smartlist_len(routerlist->routers)) goto done; + /* Sort by identity, then fix indices. */ smartlist_sort(routerlist->old_routers, _compare_old_routers_by_identity); /* Fix indices. */ for (i = 0; i < smartlist_len(routerlist->old_routers); ++i) { @@ -3171,7 +3182,7 @@ routerlist_remove_old_routers(void) //routerlist_assert_ok(routerlist); done: - digestmap_free(retain, NULL); + digestset_free(retain); } /** We just added a new set of descriptors. Take whatever extra steps diff --git a/src/or/test.c b/src/or/test.c index c421523a3f..41d1d354aa 100644 --- a/src/or/test.c +++ b/src/or/test.c @@ -1922,6 +1922,40 @@ test_util_bitarray(void) bitarray_free(ba); } +static void +test_util_digestset(void) +{ + smartlist_t *included = smartlist_create(); + char d[DIGEST_LEN]; + int i; + int ok = 1; + int false_positives = 0; + digestset_t *set; + + for (i = 0; i < 1000; ++i) { + crypto_rand(d, DIGEST_LEN); + smartlist_add(included, tor_memdup(d, DIGEST_LEN)); + } + set = digestset_new(1000); + SMARTLIST_FOREACH(included, const char *, cp, + if (digestset_isin(set, cp)) + ok = 0); + test_assert(ok); + SMARTLIST_FOREACH(included, const char *, cp, + digestset_add(set, cp)); + SMARTLIST_FOREACH(included, const char *, cp, + if (!digestset_isin(set, cp)) + ok = 0); + test_assert(ok); + for (i = 0; i < 1000; ++i) { + crypto_rand(d, DIGEST_LEN); + if (digestset_isin(set, d)) + ++false_positives; + } + test_assert(false_positives < 50); /* Should be far lower. */ + digestset_free(set); +} + /* stop threads running at once. */ static tor_mutex_t *_thread_test_mutex = NULL; /* make sure that threads have to run at the same time. */ @@ -3362,6 +3396,69 @@ bench_aes(void) } static void +bench_dmap(void) +{ + smartlist_t *sl = smartlist_create(); + smartlist_t *sl2 = smartlist_create(); + struct timeval start, end, pt2, pt3, pt4; + const int iters = 10000; + const int elts = 4000; + const int fpostests = 1000000; + char d[20]; + int i,n=0, fp = 0; + digestmap_t *dm = digestmap_new(); + digestset_t *ds = digestset_new(elts); + + for (i = 0; i < elts; ++i) { + crypto_rand(d, 20); + smartlist_add(sl, tor_memdup(d, 20)); + } + for (i = 0; i < elts; ++i) { + crypto_rand(d, 20); + smartlist_add(sl2, tor_memdup(d, 20)); + } + printf("nbits=%d\n", ds->mask+1); + + tor_gettimeofday(&start); + for (i = 0; i < iters; ++i) { + SMARTLIST_FOREACH(sl, const char *, cp, digestmap_set(dm, cp, (void*)1)); + } + tor_gettimeofday(&pt2); + for (i = 0; i < iters; ++i) { + SMARTLIST_FOREACH(sl, const char *, cp, digestmap_get(dm, cp)); + SMARTLIST_FOREACH(sl2, const char *, cp, digestmap_get(dm, cp)); + } + tor_gettimeofday(&pt3); + for (i = 0; i < iters; ++i) { + SMARTLIST_FOREACH(sl, const char *, cp, digestset_add(ds, cp)); + } + tor_gettimeofday(&pt4); + for (i = 0; i < iters; ++i) { + SMARTLIST_FOREACH(sl, const char *, cp, n += digestset_isin(ds, cp)); + SMARTLIST_FOREACH(sl2, const char *, cp, n += digestset_isin(ds, cp)); + } + tor_gettimeofday(&end); + + for (i = 0; i < fpostests; ++i) { + crypto_rand(d, 20); + if (digestset_isin(ds, d)) ++fp; + } + + printf("%ld\n",(unsigned long)tv_udiff(&start, &pt2)); + printf("%ld\n",(unsigned long)tv_udiff(&pt2, &pt3)); + printf("%ld\n",(unsigned long)tv_udiff(&pt3, &pt4)); + printf("%ld\n",(unsigned long)tv_udiff(&pt4, &end)); + printf("-- %d\n", n); + printf("++ %f\n", fp/(double)fpostests); + digestmap_free(dm, NULL); + digestset_free(ds); + SMARTLIST_FOREACH(sl, char *, cp, tor_free(cp)); + SMARTLIST_FOREACH(sl2, char *, cp, tor_free(cp)); + smartlist_free(sl); + smartlist_free(sl2); +} + +static void test_util_mempool(void) { mp_pool_t *pool; @@ -3850,6 +3947,7 @@ static struct { SUBENT(util, datadir), SUBENT(util, smartlist), SUBENT(util, bitarray), + SUBENT(util, digestset), SUBENT(util, mempool), SUBENT(util, memarea), SUBENT(util, strmap), @@ -3960,6 +4058,11 @@ main(int c, char**v) return 0; } + if (0) { + bench_dmap(); + return 0; + } + atexit(remove_directory); printf("Running Tor unit tests on %s\n", get_uname()); |