summaryrefslogtreecommitdiff
path: root/src/lib/container/bloomfilt.c
blob: 2133c280a5a59c249fe02965e14da58cf6233b41 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
/* Copyright (c) 2003-2004, Roger Dingledine
 * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
 * Copyright (c) 2007-2018, The Tor Project, Inc. */
/* See LICENSE for licensing information */

/**
 * \file bloomfilt.c
 * \brief Uses bitarray_t to implement a bloom filter.
 **/

#include <stdlib.h>
#include "lib/malloc/util_malloc.h"
#include "lib/container/bloomfilt.h"
#include "lib/intmath/bits.h"

/** Return a newly allocated digestset_t, optimized to hold a total of
 * <b>max_elements</b> digests with a reasonably low false positive weight. */
digestset_t *
digestset_new(int max_elements)
{
  /* The probability of false positives is about P=(1 - exp(-kn/m))^k, where k
   * is the number of hash functions per entry, m is the bits in the array,
   * and n is the number of elements inserted.  For us, k==4, n<=max_elements,
   * and m==n_bits= approximately max_elements*32.  This gives
   *   P<(1-exp(-4*n/(32*n)))^4 == (1-exp(1/-8))^4 == .00019
   *
   * It would be more optimal in space vs false positives to get this false
   * positive rate by going for k==13, and m==18.5n, but we also want to
   * conserve CPU, and k==13 is pretty big.
   */
  int n_bits = 1u << (tor_log2(max_elements)+5);
  digestset_t *r = tor_malloc(sizeof(digestset_t));
  r->mask = n_bits - 1;
  r->ba = bitarray_init_zero(n_bits);
  return r;
}

/** Free all storage held in <b>set</b>. */
void
digestset_free_(digestset_t *set)
{
  if (!set)
    return;
  bitarray_free(set->ba);
  tor_free(set);
}