diff options
Diffstat (limited to 'src/lib/container')
-rw-r--r-- | src/lib/container/.may_include | 18 | ||||
-rw-r--r-- | src/lib/container/bitarray.h | 86 | ||||
-rw-r--r-- | src/lib/container/bloomfilt.c | 113 | ||||
-rw-r--r-- | src/lib/container/bloomfilt.h | 41 | ||||
-rw-r--r-- | src/lib/container/buffers.c | 932 | ||||
-rw-r--r-- | src/lib/container/buffers.h | 122 | ||||
-rw-r--r-- | src/lib/container/handles.h | 153 | ||||
-rw-r--r-- | src/lib/container/include.am | 27 | ||||
-rw-r--r-- | src/lib/container/map.c | 413 | ||||
-rw-r--r-- | src/lib/container/map.h | 261 | ||||
-rw-r--r-- | src/lib/container/order.c | 48 | ||||
-rw-r--r-- | src/lib/container/order.h | 60 | ||||
-rw-r--r-- | src/lib/container/smartlist.c | 866 | ||||
-rw-r--r-- | src/lib/container/smartlist.h | 168 |
14 files changed, 3308 insertions, 0 deletions
diff --git a/src/lib/container/.may_include b/src/lib/container/.may_include new file mode 100644 index 0000000000..90de5eda40 --- /dev/null +++ b/src/lib/container/.may_include @@ -0,0 +1,18 @@ +orconfig.h +lib/cc/*.h +lib/container/*.h +lib/ctime/*.h +lib/defs/*.h +lib/malloc/*.h +lib/err/*.h +lib/smartlist_core/*.h +lib/string/*.h +lib/testsupport/testsupport.h +lib/intmath/*.h +lib/log/*.h + +# XXXX I am unsure about this one. It's only here for buffers.c +lib/time/*.h + +ht.h +siphash.h diff --git a/src/lib/container/bitarray.h b/src/lib/container/bitarray.h new file mode 100644 index 0000000000..910d5fea65 --- /dev/null +++ b/src/lib/container/bitarray.h @@ -0,0 +1,86 @@ +/* Copyright (c) 2003-2004, Roger Dingledine + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2019, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#ifndef TOR_BITARRAY_H +#define TOR_BITARRAY_H + +/** + * \file bitarray.h + * + * \brief Implements a variable-sized (but non-resizeable) bit-array. + **/ + +#include "orconfig.h" +#include <string.h> +#include "lib/cc/torint.h" +#include "lib/malloc/malloc.h" + +#if SIZEOF_INT == 4 +#define BITARRAY_SHIFT 5 +#elif SIZEOF_INT == 8 +#define BITARRAY_SHIFT 6 +#else +#error "int is neither 4 nor 8 bytes. I can't deal with that." +#endif /* SIZEOF_INT == 4 || ... */ +#define BITARRAY_MASK ((1u<<BITARRAY_SHIFT)-1) + +/** A random-access array of one-bit-wide elements. */ +typedef unsigned int bitarray_t; +/** Create a new bit array that can hold <b>n_bits</b> bits. */ +static inline bitarray_t * +bitarray_init_zero(unsigned int n_bits) +{ + /* round up to the next int. */ + size_t sz = (n_bits+BITARRAY_MASK) >> BITARRAY_SHIFT; + return tor_calloc(sz, sizeof(unsigned int)); +} +/** Expand <b>ba</b> from holding <b>n_bits_old</b> to <b>n_bits_new</b>, + * clearing all new bits. Returns a possibly changed pointer to the + * bitarray. */ +static inline bitarray_t * +bitarray_expand(bitarray_t *ba, + unsigned int n_bits_old, unsigned int n_bits_new) +{ + size_t sz_old = (n_bits_old+BITARRAY_MASK) >> BITARRAY_SHIFT; + size_t sz_new = (n_bits_new+BITARRAY_MASK) >> BITARRAY_SHIFT; + char *ptr; + if (sz_new <= sz_old) + return ba; + ptr = tor_reallocarray(ba, sz_new, sizeof(unsigned int)); + /* This memset does nothing to the older excess bytes. But they were + * already set to 0 by bitarry_init_zero. */ + memset(ptr+sz_old*sizeof(unsigned int), 0, + (sz_new-sz_old)*sizeof(unsigned int)); + return (bitarray_t*) ptr; +} +/** Free the bit array <b>ba</b>. */ +static inline void +bitarray_free_(bitarray_t *ba) +{ + tor_free(ba); +} +#define bitarray_free(ba) FREE_AND_NULL(bitarray_t, bitarray_free_, (ba)) + +/** Set the <b>bit</b>th bit in <b>b</b> to 1. */ +static inline void +bitarray_set(bitarray_t *b, int bit) +{ + b[bit >> BITARRAY_SHIFT] |= (1u << (bit & BITARRAY_MASK)); +} +/** Set the <b>bit</b>th bit in <b>b</b> to 0. */ +static inline void +bitarray_clear(bitarray_t *b, int bit) +{ + b[bit >> BITARRAY_SHIFT] &= ~ (1u << (bit & BITARRAY_MASK)); +} +/** Return true iff <b>bit</b>th bit in <b>b</b> is nonzero. NOTE: does + * not necessarily return 1 on true. */ +static inline unsigned int +bitarray_is_set(bitarray_t *b, int bit) +{ + return b[bit >> BITARRAY_SHIFT] & (1u << (bit & BITARRAY_MASK)); +} + +#endif /* !defined(TOR_CONTAINER_H) */ diff --git a/src/lib/container/bloomfilt.c b/src/lib/container/bloomfilt.c new file mode 100644 index 0000000000..9aa9b1ee56 --- /dev/null +++ b/src/lib/container/bloomfilt.c @@ -0,0 +1,113 @@ +/* Copyright (c) 2003-2004, Roger Dingledine + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2019, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file bloomfilt.c + * \brief Uses bitarray_t to implement a bloom filter. + **/ + +#include <stdlib.h> + +#include "lib/malloc/malloc.h" +#include "lib/container/bloomfilt.h" +#include "lib/intmath/bits.h" +#include "lib/log/util_bug.h" +#include "siphash.h" + +/** How many bloom-filter bits we set per address. This is twice the + * BLOOMFILT_N_HASHES value, since we split the siphash output into two 32-bit + * values. */ +#define N_BITS_PER_ITEM (BLOOMFILT_N_HASHES * 2) + +struct bloomfilt_t { + /** siphash keys to make BLOOMFILT_N_HASHES independent hashes for each + * items. */ + struct sipkey key[BLOOMFILT_N_HASHES]; + bloomfilt_hash_fn hashfn; /**< Function used to generate hashes */ + uint32_t mask; /**< One less than the number of bits in <b>ba</b>; always + * one less than a power of two. */ + bitarray_t *ba; /**< A bit array to implement the Bloom filter. */ +}; + +#define BIT(set, n) ((n) & (set)->mask) + +/** Add the element <b>item</b> to <b>set</b>. */ +void +bloomfilt_add(bloomfilt_t *set, + const void *item) +{ + int i; + for (i = 0; i < BLOOMFILT_N_HASHES; ++i) { + uint64_t h = set->hashfn(&set->key[i], item); + uint32_t high_bits = (uint32_t)(h >> 32); + uint32_t low_bits = (uint32_t)(h); + bitarray_set(set->ba, BIT(set, high_bits)); + bitarray_set(set->ba, BIT(set, low_bits)); + } +} + +/** If <b>item</b> is in <b>set</b>, return nonzero. Otherwise, + * <em>probably</em> return zero. */ +int +bloomfilt_probably_contains(const bloomfilt_t *set, + const void *item) +{ + int i, matches = 0; + for (i = 0; i < BLOOMFILT_N_HASHES; ++i) { + uint64_t h = set->hashfn(&set->key[i], item); + uint32_t high_bits = (uint32_t)(h >> 32); + uint32_t low_bits = (uint32_t)(h); + // Note that !! is necessary here, since bitarray_is_set does not + // necessarily return 1 on true. + matches += !! bitarray_is_set(set->ba, BIT(set, high_bits)); + matches += !! bitarray_is_set(set->ba, BIT(set, low_bits)); + } + return matches == N_BITS_PER_ITEM; +} + +/** Return a newly allocated bloomfilt_t, optimized to hold a total of + * <b>max_elements</b> elements with a reasonably low false positive weight. + * + * Uses the siphash-based function <b>hashfn</b> to compute hard-to-collide + * functions of the items, and the key material <b>random_key</b> to + * key the hash. There must be BLOOMFILT_KEY_LEN bytes in the supplied key. + **/ +bloomfilt_t * +bloomfilt_new(int max_elements, + bloomfilt_hash_fn hashfn, + const uint8_t *random_key) +{ + /* The probability of false positives is about P=(1 - exp(-kn/m))^k, where k + * is the number of hash functions per entry, m is the bits in the array, + * and n is the number of elements inserted. For us, k==4, n<=max_elements, + * and m==n_bits= approximately max_elements*32. This gives + * P<(1-exp(-4*n/(32*n)))^4 == (1-exp(1/-8))^4 == .00019 + * + * It would be more optimal in space vs false positives to get this false + * positive rate by going for k==13, and m==18.5n, but we also want to + * conserve CPU, and k==13 is pretty big. + */ + int n_bits = 1u << (tor_log2(max_elements)+5); + bloomfilt_t *r = tor_malloc(sizeof(bloomfilt_t)); + r->mask = n_bits - 1; + r->ba = bitarray_init_zero(n_bits); + + tor_assert(sizeof(r->key) == BLOOMFILT_KEY_LEN); + memcpy(r->key, random_key, sizeof(r->key)); + + r->hashfn = hashfn; + + return r; +} + +/** Free all storage held in <b>set</b>. */ +void +bloomfilt_free_(bloomfilt_t *set) +{ + if (!set) + return; + bitarray_free(set->ba); + tor_free(set); +} diff --git a/src/lib/container/bloomfilt.h b/src/lib/container/bloomfilt.h new file mode 100644 index 0000000000..0ce18bd3ec --- /dev/null +++ b/src/lib/container/bloomfilt.h @@ -0,0 +1,41 @@ +/* Copyright (c) 2003-2004, Roger Dingledine + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2019, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#ifndef TOR_BLOOMFILT_H +#define TOR_BLOOMFILT_H + +/** + * \file bloomfilt.h + * + * \brief Header for bloomfilt.c + **/ + +#include "orconfig.h" +#include "lib/cc/torint.h" +#include "lib/container/bitarray.h" + +/** A set of elements, implemented as a Bloom filter. */ +typedef struct bloomfilt_t bloomfilt_t; + +/** How many 64-bit siphash values to extract per item. */ +#define BLOOMFILT_N_HASHES 2 + +/** How much key material do we need to randomize hashes? */ +#define BLOOMFILT_KEY_LEN (BLOOMFILT_N_HASHES * 16) + +struct sipkey; +typedef uint64_t (*bloomfilt_hash_fn)(const struct sipkey *key, + const void *item); + +void bloomfilt_add(bloomfilt_t *set, const void *item); +int bloomfilt_probably_contains(const bloomfilt_t *set, const void *item); + +bloomfilt_t *bloomfilt_new(int max_elements, + bloomfilt_hash_fn hashfn, + const uint8_t *random_key); +void bloomfilt_free_(bloomfilt_t* set); +#define bloomfilt_free(set) FREE_AND_NULL(bloomfilt_t, bloomfilt_free_, (set)) + +#endif /* !defined(TOR_BLOOMFILT_H) */ diff --git a/src/lib/container/buffers.c b/src/lib/container/buffers.c new file mode 100644 index 0000000000..67887f2f30 --- /dev/null +++ b/src/lib/container/buffers.c @@ -0,0 +1,932 @@ +/* Copyright (c) 2001 Matej Pfajfar. + * Copyright (c) 2001-2004, Roger Dingledine. + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2019, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file buffers.c + * \brief Implements a generic buffer interface. + * + * A buf_t is a (fairly) opaque byte-oriented FIFO that can read to or flush + * from memory, sockets, file descriptors, TLS connections, or another buf_t. + * Buffers are implemented as linked lists of memory chunks. + * + * All socket-backed and TLS-based connection_t objects have a pair of + * buffers: one for incoming data, and one for outcoming data. These are fed + * and drained from functions in connection.c, trigged by events that are + * monitored in main.c. + * + * This module only handles the buffer implementation itself. To use a buffer + * with the network, a compressor, or a TLS connection, see the other buffer_* + * modules. + **/ + +#define BUFFERS_PRIVATE +#include "orconfig.h" +#include <stddef.h> +#include "lib/container/buffers.h" +#include "lib/cc/torint.h" +#include "lib/log/log.h" +#include "lib/log/util_bug.h" +#include "lib/ctime/di_ops.h" +#include "lib/malloc/malloc.h" +#include "lib/string/printf.h" +#include "lib/time/compat_time.h" + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif + +#include <stdlib.h> +#include <string.h> + +//#define PARANOIA + +#ifdef PARANOIA +/** Helper: If PARANOIA is defined, assert that the buffer in local variable + * <b>buf</b> is well-formed. */ +#define check() STMT_BEGIN buf_assert_ok(buf); STMT_END +#else +#define check() STMT_NIL +#endif /* defined(PARANOIA) */ + +/* Implementation notes: + * + * After flirting with memmove, and dallying with ring-buffers, we're finally + * getting up to speed with the 1970s and implementing buffers as a linked + * list of small chunks. Each buffer has such a list; data is removed from + * the head of the list, and added at the tail. The list is singly linked, + * and the buffer keeps a pointer to the head and the tail. + * + * Every chunk, except the tail, contains at least one byte of data. Data in + * each chunk is contiguous. + * + * When you need to treat the first N characters on a buffer as a contiguous + * string, use the buf_pullup function to make them so. Don't do this more + * than necessary. + * + * The major free Unix kernels have handled buffers like this since, like, + * forever. + */ + +/* Chunk manipulation functions */ + +#define CHUNK_HEADER_LEN offsetof(chunk_t, mem[0]) + +/* We leave this many NUL bytes at the end of the buffer. */ +#ifdef DISABLE_MEMORY_SENTINELS +#define SENTINEL_LEN 0 +#else +#define SENTINEL_LEN 4 +#endif + +/* Header size plus NUL bytes at the end */ +#define CHUNK_OVERHEAD (CHUNK_HEADER_LEN + SENTINEL_LEN) + +/** Return the number of bytes needed to allocate a chunk to hold + * <b>memlen</b> bytes. */ +#define CHUNK_ALLOC_SIZE(memlen) (CHUNK_OVERHEAD + (memlen)) +/** Return the number of usable bytes in a chunk allocated with + * malloc(<b>memlen</b>). */ +#define CHUNK_SIZE_WITH_ALLOC(memlen) ((memlen) - CHUNK_OVERHEAD) + +#define DEBUG_SENTINEL + +#if defined(DEBUG_SENTINEL) && !defined(DISABLE_MEMORY_SENTINELS) +#define DBG_S(s) s +#else +#define DBG_S(s) (void)0 +#endif + +#ifdef DISABLE_MEMORY_SENTINELS +#define CHUNK_SET_SENTINEL(chunk, alloclen) STMT_NIL +#else +#define CHUNK_SET_SENTINEL(chunk, alloclen) do { \ + uint8_t *a = (uint8_t*) &(chunk)->mem[(chunk)->memlen]; \ + DBG_S(uint8_t *b = &((uint8_t*)(chunk))[(alloclen)-SENTINEL_LEN]); \ + DBG_S(tor_assert(a == b)); \ + memset(a,0,SENTINEL_LEN); \ + } while (0) +#endif /* defined(DISABLE_MEMORY_SENTINELS) */ + +/** Move all bytes stored in <b>chunk</b> to the front of <b>chunk</b>->mem, + * to free up space at the end. */ +static inline void +chunk_repack(chunk_t *chunk) +{ + if (chunk->datalen && chunk->data != &chunk->mem[0]) { + memmove(chunk->mem, chunk->data, chunk->datalen); + } + chunk->data = &chunk->mem[0]; +} + +/** Keep track of total size of allocated chunks for consistency asserts */ +static size_t total_bytes_allocated_in_chunks = 0; +static void +buf_chunk_free_unchecked(chunk_t *chunk) +{ + if (!chunk) + return; +#ifdef DEBUG_CHUNK_ALLOC + tor_assert(CHUNK_ALLOC_SIZE(chunk->memlen) == chunk->DBG_alloc); +#endif + tor_assert(total_bytes_allocated_in_chunks >= + CHUNK_ALLOC_SIZE(chunk->memlen)); + total_bytes_allocated_in_chunks -= CHUNK_ALLOC_SIZE(chunk->memlen); + tor_free(chunk); +} +static inline chunk_t * +chunk_new_with_alloc_size(size_t alloc) +{ + chunk_t *ch; + ch = tor_malloc(alloc); + ch->next = NULL; + ch->datalen = 0; +#ifdef DEBUG_CHUNK_ALLOC + ch->DBG_alloc = alloc; +#endif + ch->memlen = CHUNK_SIZE_WITH_ALLOC(alloc); + total_bytes_allocated_in_chunks += alloc; + ch->data = &ch->mem[0]; + CHUNK_SET_SENTINEL(ch, alloc); + return ch; +} + +/** Expand <b>chunk</b> until it can hold <b>sz</b> bytes, and return a + * new pointer to <b>chunk</b>. Old pointers are no longer valid. */ +static inline chunk_t * +chunk_grow(chunk_t *chunk, size_t sz) +{ + off_t offset; + const size_t memlen_orig = chunk->memlen; + const size_t orig_alloc = CHUNK_ALLOC_SIZE(memlen_orig); + const size_t new_alloc = CHUNK_ALLOC_SIZE(sz); + tor_assert(sz > chunk->memlen); + offset = chunk->data - chunk->mem; + chunk = tor_realloc(chunk, new_alloc); + chunk->memlen = sz; + chunk->data = chunk->mem + offset; +#ifdef DEBUG_CHUNK_ALLOC + tor_assert(chunk->DBG_alloc == orig_alloc); + chunk->DBG_alloc = new_alloc; +#endif + total_bytes_allocated_in_chunks += new_alloc - orig_alloc; + CHUNK_SET_SENTINEL(chunk, new_alloc); + return chunk; +} + +/** Every chunk should take up at least this many bytes. */ +#define MIN_CHUNK_ALLOC 256 +/** No chunk should take up more than this many bytes. */ +#define MAX_CHUNK_ALLOC 65536 + +/** Return the allocation size we'd like to use to hold <b>target</b> + * bytes. */ +size_t +buf_preferred_chunk_size(size_t target) +{ + tor_assert(target <= SIZE_T_CEILING - CHUNK_OVERHEAD); + if (CHUNK_ALLOC_SIZE(target) >= MAX_CHUNK_ALLOC) + return CHUNK_ALLOC_SIZE(target); + size_t sz = MIN_CHUNK_ALLOC; + while (CHUNK_SIZE_WITH_ALLOC(sz) < target) { + sz <<= 1; + } + return sz; +} + +/** Collapse data from the first N chunks from <b>buf</b> into buf->head, + * growing it as necessary, until buf->head has the first <b>bytes</b> bytes + * of data from the buffer, or until buf->head has all the data in <b>buf</b>. + * + * Set *<b>head_out</b> to point to the first byte of available data, and + * *<b>len_out</b> to the number of bytes of data available at + * *<b>head_out</b>. Note that *<b>len_out</b> may be more or less than + * <b>bytes</b>, depending on the number of bytes available. + */ +void +buf_pullup(buf_t *buf, size_t bytes, const char **head_out, size_t *len_out) +{ + chunk_t *dest, *src; + size_t capacity; + if (!buf->head) { + *head_out = NULL; + *len_out = 0; + return; + } + + check(); + if (buf->datalen < bytes) + bytes = buf->datalen; + + capacity = bytes; + if (buf->head->datalen >= bytes) { + *head_out = buf->head->data; + *len_out = buf->head->datalen; + return; + } + + if (buf->head->memlen >= capacity) { + /* We don't need to grow the first chunk, but we might need to repack it.*/ + size_t needed = capacity - buf->head->datalen; + if (CHUNK_REMAINING_CAPACITY(buf->head) < needed) + chunk_repack(buf->head); + tor_assert(CHUNK_REMAINING_CAPACITY(buf->head) >= needed); + } else { + chunk_t *newhead; + size_t newsize; + /* We need to grow the chunk. */ + chunk_repack(buf->head); + newsize = CHUNK_SIZE_WITH_ALLOC(buf_preferred_chunk_size(capacity)); + newhead = chunk_grow(buf->head, newsize); + tor_assert(newhead->memlen >= capacity); + if (newhead != buf->head) { + if (buf->tail == buf->head) + buf->tail = newhead; + buf->head = newhead; + } + } + + dest = buf->head; + while (dest->datalen < bytes) { + size_t n = bytes - dest->datalen; + src = dest->next; + tor_assert(src); + if (n >= src->datalen) { + memcpy(CHUNK_WRITE_PTR(dest), src->data, src->datalen); + dest->datalen += src->datalen; + dest->next = src->next; + if (buf->tail == src) + buf->tail = dest; + buf_chunk_free_unchecked(src); + } else { + memcpy(CHUNK_WRITE_PTR(dest), src->data, n); + dest->datalen += n; + src->data += n; + src->datalen -= n; + tor_assert(dest->datalen == bytes); + } + } + + check(); + *head_out = buf->head->data; + *len_out = buf->head->datalen; +} + +#ifdef TOR_UNIT_TESTS +/* Write sz bytes from cp into a newly allocated buffer buf. + * Returns NULL when passed a NULL cp or zero sz. + * Asserts on failure: only for use in unit tests. + * buf must be freed using buf_free(). */ +buf_t * +buf_new_with_data(const char *cp, size_t sz) +{ + /* Validate arguments */ + if (!cp || sz <= 0 || sz >= INT_MAX) { + return NULL; + } + + tor_assert(sz < SSIZE_T_CEILING); + + /* Allocate a buffer */ + buf_t *buf = buf_new_with_capacity(sz); + tor_assert(buf); + buf_assert_ok(buf); + tor_assert(!buf->head); + + /* Allocate a chunk that is sz bytes long */ + buf->head = chunk_new_with_alloc_size(CHUNK_ALLOC_SIZE(sz)); + buf->tail = buf->head; + tor_assert(buf->head); + buf_assert_ok(buf); + tor_assert(buf_allocation(buf) >= sz); + + /* Copy the data and size the buffers */ + tor_assert(sz <= buf_slack(buf)); + tor_assert(sz <= CHUNK_REMAINING_CAPACITY(buf->head)); + memcpy(&buf->head->mem[0], cp, sz); + buf->datalen = sz; + buf->head->datalen = sz; + buf->head->data = &buf->head->mem[0]; + buf_assert_ok(buf); + + /* Make sure everything is large enough */ + tor_assert(buf_allocation(buf) >= sz); + tor_assert(buf_allocation(buf) >= buf_datalen(buf) + buf_slack(buf)); + /* Does the buffer implementation allocate more than the requested size? + * (for example, by rounding up). If so, these checks will fail. */ + tor_assert(buf_datalen(buf) == sz); + tor_assert(buf_slack(buf) == 0); + + return buf; +} +#endif /* defined(TOR_UNIT_TESTS) */ + +/** Remove the first <b>n</b> bytes from buf. */ +void +buf_drain(buf_t *buf, size_t n) +{ + tor_assert(buf->datalen >= n); + while (n) { + tor_assert(buf->head); + if (buf->head->datalen > n) { + buf->head->datalen -= n; + buf->head->data += n; + buf->datalen -= n; + return; + } else { + chunk_t *victim = buf->head; + n -= victim->datalen; + buf->datalen -= victim->datalen; + buf->head = victim->next; + if (buf->tail == victim) + buf->tail = NULL; + buf_chunk_free_unchecked(victim); + } + } + check(); +} + +/** Create and return a new buf with default chunk capacity <b>size</b>. + */ +buf_t * +buf_new_with_capacity(size_t size) +{ + buf_t *b = buf_new(); + b->default_chunk_size = buf_preferred_chunk_size(size); + return b; +} + +/** Allocate and return a new buffer with default capacity. */ +buf_t * +buf_new(void) +{ + buf_t *buf = tor_malloc_zero(sizeof(buf_t)); + buf->magic = BUFFER_MAGIC; + buf->default_chunk_size = 4096; + return buf; +} + +size_t +buf_get_default_chunk_size(const buf_t *buf) +{ + return buf->default_chunk_size; +} + +/** Remove all data from <b>buf</b>. */ +void +buf_clear(buf_t *buf) +{ + chunk_t *chunk, *next; + buf->datalen = 0; + for (chunk = buf->head; chunk; chunk = next) { + next = chunk->next; + buf_chunk_free_unchecked(chunk); + } + buf->head = buf->tail = NULL; +} + +/** Return the number of bytes stored in <b>buf</b> */ +MOCK_IMPL(size_t, +buf_datalen, (const buf_t *buf)) +{ + return buf->datalen; +} + +/** Return the total length of all chunks used in <b>buf</b>. */ +size_t +buf_allocation(const buf_t *buf) +{ + size_t total = 0; + const chunk_t *chunk; + for (chunk = buf->head; chunk; chunk = chunk->next) { + total += CHUNK_ALLOC_SIZE(chunk->memlen); + } + return total; +} + +/** Return the number of bytes that can be added to <b>buf</b> without + * performing any additional allocation. */ +size_t +buf_slack(const buf_t *buf) +{ + if (!buf->tail) + return 0; + else + return CHUNK_REMAINING_CAPACITY(buf->tail); +} + +/** Release storage held by <b>buf</b>. */ +void +buf_free_(buf_t *buf) +{ + if (!buf) + return; + + buf_clear(buf); + buf->magic = 0xdeadbeef; + tor_free(buf); +} + +/** Return a new copy of <b>in_chunk</b> */ +static chunk_t * +chunk_copy(const chunk_t *in_chunk) +{ + chunk_t *newch = tor_memdup(in_chunk, CHUNK_ALLOC_SIZE(in_chunk->memlen)); + total_bytes_allocated_in_chunks += CHUNK_ALLOC_SIZE(in_chunk->memlen); +#ifdef DEBUG_CHUNK_ALLOC + newch->DBG_alloc = CHUNK_ALLOC_SIZE(in_chunk->memlen); +#endif + newch->next = NULL; + if (in_chunk->data) { + off_t offset = in_chunk->data - in_chunk->mem; + newch->data = newch->mem + offset; + } + return newch; +} + +/** Return a new copy of <b>buf</b> */ +buf_t * +buf_copy(const buf_t *buf) +{ + chunk_t *ch; + buf_t *out = buf_new(); + out->default_chunk_size = buf->default_chunk_size; + for (ch = buf->head; ch; ch = ch->next) { + chunk_t *newch = chunk_copy(ch); + if (out->tail) { + out->tail->next = newch; + out->tail = newch; + } else { + out->head = out->tail = newch; + } + } + out->datalen = buf->datalen; + return out; +} + +/** Append a new chunk with enough capacity to hold <b>capacity</b> bytes to + * the tail of <b>buf</b>. If <b>capped</b>, don't allocate a chunk bigger + * than MAX_CHUNK_ALLOC. */ +chunk_t * +buf_add_chunk_with_capacity(buf_t *buf, size_t capacity, int capped) +{ + chunk_t *chunk; + + if (CHUNK_ALLOC_SIZE(capacity) < buf->default_chunk_size) { + chunk = chunk_new_with_alloc_size(buf->default_chunk_size); + } else if (capped && CHUNK_ALLOC_SIZE(capacity) > MAX_CHUNK_ALLOC) { + chunk = chunk_new_with_alloc_size(MAX_CHUNK_ALLOC); + } else { + chunk = chunk_new_with_alloc_size(buf_preferred_chunk_size(capacity)); + } + + chunk->inserted_time = monotime_coarse_get_stamp(); + + if (buf->tail) { + tor_assert(buf->head); + buf->tail->next = chunk; + buf->tail = chunk; + } else { + tor_assert(!buf->head); + buf->head = buf->tail = chunk; + } + check(); + return chunk; +} + +/** Return the age of the oldest chunk in the buffer <b>buf</b>, in + * timestamp units. Requires the current monotonic timestamp as its + * input <b>now</b>. + */ +uint32_t +buf_get_oldest_chunk_timestamp(const buf_t *buf, uint32_t now) +{ + if (buf->head) { + return now - buf->head->inserted_time; + } else { + return 0; + } +} + +size_t +buf_get_total_allocation(void) +{ + return total_bytes_allocated_in_chunks; +} + +/** Append <b>string_len</b> bytes from <b>string</b> to the end of + * <b>buf</b>. + * + * Return the new length of the buffer on success, -1 on failure. + */ +int +buf_add(buf_t *buf, const char *string, size_t string_len) +{ + if (!string_len) + return (int)buf->datalen; + check(); + + if (BUG(buf->datalen >= INT_MAX)) + return -1; + if (BUG(buf->datalen >= INT_MAX - string_len)) + return -1; + + while (string_len) { + size_t copy; + if (!buf->tail || !CHUNK_REMAINING_CAPACITY(buf->tail)) + buf_add_chunk_with_capacity(buf, string_len, 1); + + copy = CHUNK_REMAINING_CAPACITY(buf->tail); + if (copy > string_len) + copy = string_len; + memcpy(CHUNK_WRITE_PTR(buf->tail), string, copy); + string_len -= copy; + string += copy; + buf->datalen += copy; + buf->tail->datalen += copy; + } + + check(); + tor_assert(buf->datalen < INT_MAX); + return (int)buf->datalen; +} + +/** Add a nul-terminated <b>string</b> to <b>buf</b>, not including the + * terminating NUL. */ +void +buf_add_string(buf_t *buf, const char *string) +{ + buf_add(buf, string, strlen(string)); +} + +/** As tor_snprintf, but write the results into a buf_t */ +void +buf_add_printf(buf_t *buf, const char *format, ...) +{ + va_list ap; + va_start(ap,format); + buf_add_vprintf(buf, format, ap); + va_end(ap); +} + +/** As tor_vsnprintf, but write the results into a buf_t. */ +void +buf_add_vprintf(buf_t *buf, const char *format, va_list args) +{ + /* XXXX Faster implementations are easy enough, but let's optimize later */ + char *tmp; + tor_vasprintf(&tmp, format, args); + buf_add(buf, tmp, strlen(tmp)); + tor_free(tmp); +} + +/** Return a heap-allocated string containing the contents of <b>buf</b>, plus + * a NUL byte. If <b>sz_out</b> is provided, set *<b>sz_out</b> to the length + * of the returned string, not including the terminating NUL. */ +char * +buf_extract(buf_t *buf, size_t *sz_out) +{ + tor_assert(buf); + + size_t sz = buf_datalen(buf); + char *result; + result = tor_malloc(sz+1); + buf_peek(buf, result, sz); + result[sz] = 0; + if (sz_out) + *sz_out = sz; + return result; +} + +/** Helper: copy the first <b>string_len</b> bytes from <b>buf</b> + * onto <b>string</b>. + */ +void +buf_peek(const buf_t *buf, char *string, size_t string_len) +{ + chunk_t *chunk; + + tor_assert(string); + /* make sure we don't ask for too much */ + tor_assert(string_len <= buf->datalen); + /* buf_assert_ok(buf); */ + + chunk = buf->head; + while (string_len) { + size_t copy = string_len; + tor_assert(chunk); + if (chunk->datalen < copy) + copy = chunk->datalen; + memcpy(string, chunk->data, copy); + string_len -= copy; + string += copy; + chunk = chunk->next; + } +} + +/** Remove <b>string_len</b> bytes from the front of <b>buf</b>, and store + * them into <b>string</b>. Return the new buffer size. <b>string_len</b> + * must be \<= the number of bytes on the buffer. + */ +int +buf_get_bytes(buf_t *buf, char *string, size_t string_len) +{ + /* There must be string_len bytes in buf; write them onto string, + * then memmove buf back (that is, remove them from buf). + * + * Return the number of bytes still on the buffer. */ + + check(); + buf_peek(buf, string, string_len); + buf_drain(buf, string_len); + check(); + tor_assert(buf->datalen < INT_MAX); + return (int)buf->datalen; +} + +/** Move up to *<b>buf_flushlen</b> bytes from <b>buf_in</b> to + * <b>buf_out</b>, and modify *<b>buf_flushlen</b> appropriately. + * Return the number of bytes actually copied. + */ +int +buf_move_to_buf(buf_t *buf_out, buf_t *buf_in, size_t *buf_flushlen) +{ + /* We can do way better here, but this doesn't turn up in any profiles. */ + char b[4096]; + size_t cp, len; + + if (BUG(buf_out->datalen >= INT_MAX || *buf_flushlen >= INT_MAX)) + return -1; + if (BUG(buf_out->datalen >= INT_MAX - *buf_flushlen)) + return -1; + + len = *buf_flushlen; + if (len > buf_in->datalen) + len = buf_in->datalen; + + cp = len; /* Remember the number of bytes we intend to copy. */ + tor_assert(cp < INT_MAX); + while (len) { + /* This isn't the most efficient implementation one could imagine, since + * it does two copies instead of 1, but I kinda doubt that this will be + * critical path. */ + size_t n = len > sizeof(b) ? sizeof(b) : len; + buf_get_bytes(buf_in, b, n); + buf_add(buf_out, b, n); + len -= n; + } + *buf_flushlen -= cp; + return (int)cp; +} + +/** Moves all data from <b>buf_in</b> to <b>buf_out</b>, without copying. + */ +void +buf_move_all(buf_t *buf_out, buf_t *buf_in) +{ + tor_assert(buf_out); + if (!buf_in) + return; + if (BUG(buf_out->datalen >= INT_MAX || buf_in->datalen >= INT_MAX)) + return; + if (BUG(buf_out->datalen >= INT_MAX - buf_in->datalen)) + return; + + if (buf_out->head == NULL) { + buf_out->head = buf_in->head; + buf_out->tail = buf_in->tail; + } else { + buf_out->tail->next = buf_in->head; + buf_out->tail = buf_in->tail; + } + + buf_out->datalen += buf_in->datalen; + buf_in->head = buf_in->tail = NULL; + buf_in->datalen = 0; +} + +/** Internal structure: represents a position in a buffer. */ +typedef struct buf_pos_t { + const chunk_t *chunk; /**< Which chunk are we pointing to? */ + int pos;/**< Which character inside the chunk's data are we pointing to? */ + size_t chunk_pos; /**< Total length of all previous chunks. */ +} buf_pos_t; + +/** Initialize <b>out</b> to point to the first character of <b>buf</b>.*/ +static void +buf_pos_init(const buf_t *buf, buf_pos_t *out) +{ + out->chunk = buf->head; + out->pos = 0; + out->chunk_pos = 0; +} + +/** Advance <b>out</b> to the first appearance of <b>ch</b> at the current + * position of <b>out</b>, or later. Return -1 if no instances are found; + * otherwise returns the absolute position of the character. */ +static off_t +buf_find_pos_of_char(char ch, buf_pos_t *out) +{ + const chunk_t *chunk; + int pos; + tor_assert(out); + if (out->chunk) { + if (out->chunk->datalen) { + tor_assert(out->pos < (off_t)out->chunk->datalen); + } else { + tor_assert(out->pos == 0); + } + } + pos = out->pos; + for (chunk = out->chunk; chunk; chunk = chunk->next) { + char *cp = memchr(chunk->data+pos, ch, chunk->datalen - pos); + if (cp) { + out->chunk = chunk; + tor_assert(cp - chunk->data < INT_MAX); + out->pos = (int)(cp - chunk->data); + return out->chunk_pos + out->pos; + } else { + out->chunk_pos += chunk->datalen; + pos = 0; + } + } + return -1; +} + +/** Advance <b>pos</b> by a single character, if there are any more characters + * in the buffer. Returns 0 on success, -1 on failure. */ +static inline int +buf_pos_inc(buf_pos_t *pos) +{ + tor_assert(pos->pos < INT_MAX - 1); + ++pos->pos; + if (pos->pos == (off_t)pos->chunk->datalen) { + if (!pos->chunk->next) + return -1; + pos->chunk_pos += pos->chunk->datalen; + pos->chunk = pos->chunk->next; + pos->pos = 0; + } + return 0; +} + +/** Return true iff the <b>n</b>-character string in <b>s</b> appears + * (verbatim) at <b>pos</b>. */ +static int +buf_matches_at_pos(const buf_pos_t *pos, const char *s, size_t n) +{ + buf_pos_t p; + if (!n) + return 1; + + memcpy(&p, pos, sizeof(p)); + + while (1) { + char ch = p.chunk->data[p.pos]; + if (ch != *s) + return 0; + ++s; + /* If we're out of characters that don't match, we match. Check this + * _before_ we test incrementing pos, in case we're at the end of the + * string. */ + if (--n == 0) + return 1; + if (buf_pos_inc(&p)<0) + return 0; + } +} + +/** Return the first position in <b>buf</b> at which the <b>n</b>-character + * string <b>s</b> occurs, or -1 if it does not occur. */ +int +buf_find_string_offset(const buf_t *buf, const char *s, size_t n) +{ + buf_pos_t pos; + buf_pos_init(buf, &pos); + while (buf_find_pos_of_char(*s, &pos) >= 0) { + if (buf_matches_at_pos(&pos, s, n)) { + tor_assert(pos.chunk_pos + pos.pos < INT_MAX); + return (int)(pos.chunk_pos + pos.pos); + } else { + if (buf_pos_inc(&pos)<0) + return -1; + } + } + return -1; +} + +/** Return 1 iff <b>buf</b> starts with <b>cmd</b>. <b>cmd</b> must be a null + * terminated string, of no more than PEEK_BUF_STARTSWITH_MAX bytes. */ +int +buf_peek_startswith(const buf_t *buf, const char *cmd) +{ + char tmp[PEEK_BUF_STARTSWITH_MAX]; + size_t clen = strlen(cmd); + if (clen == 0) + return 1; + if (BUG(clen > sizeof(tmp))) + return 0; + if (buf->datalen < clen) + return 0; + buf_peek(buf, tmp, clen); + return fast_memeq(tmp, cmd, clen); +} + +/** Return the index within <b>buf</b> at which <b>ch</b> first appears, + * or -1 if <b>ch</b> does not appear on buf. */ +static off_t +buf_find_offset_of_char(buf_t *buf, char ch) +{ + chunk_t *chunk; + off_t offset = 0; + tor_assert(buf->datalen < INT_MAX); + for (chunk = buf->head; chunk; chunk = chunk->next) { + char *cp = memchr(chunk->data, ch, chunk->datalen); + if (cp) + return offset + (cp - chunk->data); + else + offset += chunk->datalen; + } + return -1; +} + +/** Try to read a single LF-terminated line from <b>buf</b>, and write it + * (including the LF), NUL-terminated, into the *<b>data_len</b> byte buffer + * at <b>data_out</b>. Set *<b>data_len</b> to the number of bytes in the + * line, not counting the terminating NUL. Return 1 if we read a whole line, + * return 0 if we don't have a whole line yet, and return -1 if the line + * length exceeds *<b>data_len</b>. + */ +int +buf_get_line(buf_t *buf, char *data_out, size_t *data_len) +{ + size_t sz; + off_t offset; + + if (!buf->head) + return 0; + + offset = buf_find_offset_of_char(buf, '\n'); + if (offset < 0) + return 0; + sz = (size_t) offset; + if (sz+2 > *data_len) { + *data_len = sz + 2; + return -1; + } + buf_get_bytes(buf, data_out, sz+1); + data_out[sz+1] = '\0'; + *data_len = sz+1; + return 1; +} + +/** Set *<b>output</b> to contain a copy of the data in *<b>input</b> */ +int +buf_set_to_copy(buf_t **output, + const buf_t *input) +{ + if (*output) + buf_free(*output); + *output = buf_copy(input); + return 0; +} + +/** Log an error and exit if <b>buf</b> is corrupted. + */ +void +buf_assert_ok(buf_t *buf) +{ + tor_assert(buf); + tor_assert(buf->magic == BUFFER_MAGIC); + + if (! buf->head) { + tor_assert(!buf->tail); + tor_assert(buf->datalen == 0); + } else { + chunk_t *ch; + size_t total = 0; + tor_assert(buf->tail); + for (ch = buf->head; ch; ch = ch->next) { + total += ch->datalen; + tor_assert(ch->datalen <= ch->memlen); + tor_assert(ch->datalen < INT_MAX); + tor_assert(ch->data >= &ch->mem[0]); + tor_assert(ch->data <= &ch->mem[0]+ch->memlen); + if (ch->data == &ch->mem[0]+ch->memlen) { + /* LCOV_EXCL_START */ + static int warned = 0; + if (! warned) { + log_warn(LD_BUG, "Invariant violation in buf.c related to #15083"); + warned = 1; + } + /* LCOV_EXCL_STOP */ + } + tor_assert(ch->data+ch->datalen <= &ch->mem[0] + ch->memlen); + if (!ch->next) + tor_assert(ch == buf->tail); + } + tor_assert(buf->datalen == total); + } +} diff --git a/src/lib/container/buffers.h b/src/lib/container/buffers.h new file mode 100644 index 0000000000..c103b93a82 --- /dev/null +++ b/src/lib/container/buffers.h @@ -0,0 +1,122 @@ +/* Copyright (c) 2001 Matej Pfajfar. + * Copyright (c) 2001-2004, Roger Dingledine. + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2019, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file buffers.h + * + * \brief Header file for buffers.c. + **/ + +#ifndef TOR_BUFFERS_H +#define TOR_BUFFERS_H + +#include "lib/cc/compat_compiler.h" +#include "lib/cc/torint.h" +#include "lib/testsupport/testsupport.h" + +#include <stdarg.h> + +typedef struct buf_t buf_t; + +buf_t *buf_new(void); +buf_t *buf_new_with_capacity(size_t size); +size_t buf_get_default_chunk_size(const buf_t *buf); +void buf_free_(buf_t *buf); +#define buf_free(b) FREE_AND_NULL(buf_t, buf_free_, (b)) +void buf_clear(buf_t *buf); +buf_t *buf_copy(const buf_t *buf); + +MOCK_DECL(size_t, buf_datalen, (const buf_t *buf)); +size_t buf_allocation(const buf_t *buf); +size_t buf_slack(const buf_t *buf); + +uint32_t buf_get_oldest_chunk_timestamp(const buf_t *buf, uint32_t now); +size_t buf_get_total_allocation(void); + +int buf_add(buf_t *buf, const char *string, size_t string_len); +void buf_add_string(buf_t *buf, const char *string); +void buf_add_printf(buf_t *buf, const char *format, ...) + CHECK_PRINTF(2, 3); +void buf_add_vprintf(buf_t *buf, const char *format, va_list args) + CHECK_PRINTF(2, 0); +int buf_move_to_buf(buf_t *buf_out, buf_t *buf_in, size_t *buf_flushlen); +void buf_move_all(buf_t *buf_out, buf_t *buf_in); +void buf_peek(const buf_t *buf, char *string, size_t string_len); +void buf_drain(buf_t *buf, size_t n); +int buf_get_bytes(buf_t *buf, char *string, size_t string_len); +int buf_get_line(buf_t *buf, char *data_out, size_t *data_len); + +#define PEEK_BUF_STARTSWITH_MAX 16 +int buf_peek_startswith(const buf_t *buf, const char *cmd); + +int buf_set_to_copy(buf_t **output, + const buf_t *input); + +void buf_assert_ok(buf_t *buf); + +int buf_find_string_offset(const buf_t *buf, const char *s, size_t n); +void buf_pullup(buf_t *buf, size_t bytes, + const char **head_out, size_t *len_out); +char *buf_extract(buf_t *buf, size_t *sz_out); + +#ifdef BUFFERS_PRIVATE +#ifdef TOR_UNIT_TESTS +buf_t *buf_new_with_data(const char *cp, size_t sz); +#endif +size_t buf_preferred_chunk_size(size_t target); + +#define DEBUG_CHUNK_ALLOC +/** A single chunk on a buffer. */ +typedef struct chunk_t { + struct chunk_t *next; /**< The next chunk on the buffer. */ + size_t datalen; /**< The number of bytes stored in this chunk */ + size_t memlen; /**< The number of usable bytes of storage in <b>mem</b>. */ +#ifdef DEBUG_CHUNK_ALLOC + size_t DBG_alloc; +#endif + char *data; /**< A pointer to the first byte of data stored in <b>mem</b>. */ + uint32_t inserted_time; /**< Timestamp when this chunk was inserted. */ + char mem[FLEXIBLE_ARRAY_MEMBER]; /**< The actual memory used for storage in + * this chunk. */ +} chunk_t; + +/** Magic value for buf_t.magic, to catch pointer errors. */ +#define BUFFER_MAGIC 0xB0FFF312u +/** A resizeable buffer, optimized for reading and writing. */ +struct buf_t { + uint32_t magic; /**< Magic cookie for debugging: Must be set to + * BUFFER_MAGIC. */ + size_t datalen; /**< How many bytes is this buffer holding right now? */ + size_t default_chunk_size; /**< Don't allocate any chunks smaller than + * this for this buffer. */ + chunk_t *head; /**< First chunk in the list, or NULL for none. */ + chunk_t *tail; /**< Last chunk in the list, or NULL for none. */ +}; + +chunk_t *buf_add_chunk_with_capacity(buf_t *buf, size_t capacity, int capped); +/** If a read onto the end of a chunk would be smaller than this number, then + * just start a new chunk. */ +#define MIN_READ_LEN 8 + +/** Return the number of bytes that can be written onto <b>chunk</b> without + * running out of space. */ +static inline size_t +CHUNK_REMAINING_CAPACITY(const chunk_t *chunk) +{ + return (chunk->mem + chunk->memlen) - (chunk->data + chunk->datalen); +} + +/** Return the next character in <b>chunk</b> onto which data can be appended. + * If the chunk is full, this might be off the end of chunk->mem. */ +static inline char * +CHUNK_WRITE_PTR(chunk_t *chunk) +{ + return chunk->data + chunk->datalen; +} + +#endif /* defined(BUFFERS_PRIVATE) */ + +#endif /* !defined(TOR_BUFFERS_H) */ diff --git a/src/lib/container/handles.h b/src/lib/container/handles.h new file mode 100644 index 0000000000..ca7c94559e --- /dev/null +++ b/src/lib/container/handles.h @@ -0,0 +1,153 @@ +/* Copyright (c) 2016-2019, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file handles.h + * \brief Macros for C weak-handle implementation. + * + * A 'handle' is a pointer to an object that is allowed to go away while + * the handle stays alive. When you dereference the handle, you might get + * the object, or you might get "NULL". + * + * Use this pattern when an object has a single obvious lifespan, so you don't + * want to use reference counting, but when other objects might need to refer + * to the first object without caring about its lifetime. + * + * To enable a type to have handles, add a HANDLE_ENTRY() field in its + * definition, as in: + * + * struct walrus { + * HANDLE_ENTRY(wlr, walrus); + * // ... + * }; + * + * And invoke HANDLE_DECL(wlr, walrus, [static]) to declare the handle + * manipulation functions (typically in a header): + * + * // opaque handle to walrus. + * typedef struct wlr_handle_t wlr_handle_t; + * + * // make a new handle + * struct wlr_handle_t *wlr_handle_new(struct walrus *); + * + * // release a handle + * void wlr_handle_free(wlr_handle_t *); + * + * // return the pointed-to walrus, or NULL. + * struct walrus *wlr_handle_get(wlr_handle_t *). + * + * // call this function when you're about to free the walrus; + * // it invalidates all handles. (IF YOU DON'T, YOU WILL HAVE + * // DANGLING REFERENCES) + * void wlr_handles_clear(struct walrus *); + * + * Finally, use HANDLE_IMPL() to define the above functions in some + * appropriate C file: HANDLE_IMPL(wlr, walrus, [static]) + * + **/ + +#ifndef TOR_HANDLE_H +#define TOR_HANDLE_H + +#include "orconfig.h" + +#include "lib/log/util_bug.h" +#include "lib/malloc/malloc.h" + +#define HANDLE_ENTRY(name, structname) \ + struct name ## _handle_head_t *handle_head + +#define HANDLE_DECL(name, structname, linkage) \ + typedef struct name ## _handle_t name ## _handle_t; \ + linkage name ## _handle_t *name ## _handle_new(struct structname *object); \ + linkage void name ## _handle_free_(name ## _handle_t *); \ + linkage struct structname *name ## _handle_get(name ## _handle_t *); \ + linkage void name ## _handles_clear(struct structname *object); + +/* + * Implementation notes: there are lots of possible implementations here. We + * could keep a linked list of handles, each with a backpointer to the object, + * and set all of their backpointers to NULL when the object is freed. Or we + * could have the clear function invalidate the object, but not actually let + * the object get freed until the all the handles went away. We could even + * have a hash-table mapping unique identifiers to objects, and have each + * handle be a copy of the unique identifier. (We'll want to build that last + * one eventually if we want cross-process handles.) + * + * But instead we're opting for a single independent 'head' that knows how + * many handles there are, and where the object is (or isn't). This makes + * all of our functions O(1), and most as fast as a single pointer access. + * + * The handles themselves are opaque structures holding a pointer to the head. + * We could instead have each foo_handle_t* be identical to foo_handle_head_t + * *, and save some allocations ... but doing so would make handle leaks + * harder to debug. As it stands, every handle leak is a memory leak, and + * existing memory debugging tools should help with those. We can revisit + * this decision if handles are too slow. + */ + +#define HANDLE_IMPL(name, structname, linkage) \ + /* The 'head' object for a handle-accessible type. This object */ \ + /* persists for as long as the object, or any handles, exist. */ \ + typedef struct name ## _handle_head_t { \ + struct structname *object; /* pointed-to object, or NULL */ \ + unsigned int references; /* number of existing handles */ \ + } name ## _handle_head_t; \ + \ + struct name ## _handle_t { \ + struct name ## _handle_head_t *head; /* reference to the 'head'. */ \ + }; \ + \ + linkage struct name ## _handle_t * \ + name ## _handle_new(struct structname *object) \ + { \ + tor_assert(object); \ + name ## _handle_head_t *head = object->handle_head; \ + if (PREDICT_UNLIKELY(head == NULL)) { \ + head = object->handle_head = tor_malloc_zero(sizeof(*head)); \ + head->object = object; \ + } \ + name ## _handle_t *new_ref = tor_malloc_zero(sizeof(*new_ref)); \ + new_ref->head = head; \ + ++head->references; \ + return new_ref; \ + } \ + \ + linkage void \ + name ## _handle_free_(struct name ## _handle_t *ref) \ + { \ + if (! ref) return; \ + name ## _handle_head_t *head = ref->head; \ + tor_assert(head); \ + --head->references; \ + tor_free(ref); \ + if (head->object == NULL && head->references == 0) { \ + tor_free(head); \ + return; \ + } \ + } \ + \ + linkage struct structname * \ + name ## _handle_get(struct name ## _handle_t *ref) \ + { \ + tor_assert(ref); \ + name ## _handle_head_t *head = ref->head; \ + tor_assert(head); \ + return head->object; \ + } \ + \ + linkage void \ + name ## _handles_clear(struct structname *object) \ + { \ + tor_assert(object); \ + name ## _handle_head_t *head = object->handle_head; \ + if (! head) \ + return; \ + object->handle_head = NULL; \ + head->object = NULL; \ + if (head->references == 0) { \ + tor_free(head); \ + } \ + } + +#endif /* !defined(TOR_HANDLE_H) */ diff --git a/src/lib/container/include.am b/src/lib/container/include.am new file mode 100644 index 0000000000..e6492098b5 --- /dev/null +++ b/src/lib/container/include.am @@ -0,0 +1,27 @@ + +noinst_LIBRARIES += src/lib/libtor-container.a + +if UNITTESTS_ENABLED +noinst_LIBRARIES += src/lib/libtor-container-testing.a +endif + +src_lib_libtor_container_a_SOURCES = \ + src/lib/container/bloomfilt.c \ + src/lib/container/buffers.c \ + src/lib/container/map.c \ + src/lib/container/order.c \ + src/lib/container/smartlist.c + +src_lib_libtor_container_testing_a_SOURCES = \ + $(src_lib_libtor_container_a_SOURCES) +src_lib_libtor_container_testing_a_CPPFLAGS = $(AM_CPPFLAGS) $(TEST_CPPFLAGS) +src_lib_libtor_container_testing_a_CFLAGS = $(AM_CFLAGS) $(TEST_CFLAGS) + +noinst_HEADERS += \ + src/lib/container/bitarray.h \ + src/lib/container/bloomfilt.h \ + src/lib/container/buffers.h \ + src/lib/container/handles.h \ + src/lib/container/map.h \ + src/lib/container/order.h \ + src/lib/container/smartlist.h diff --git a/src/lib/container/map.c b/src/lib/container/map.c new file mode 100644 index 0000000000..d213ad50bf --- /dev/null +++ b/src/lib/container/map.c @@ -0,0 +1,413 @@ +/* Copyright (c) 2003-2004, Roger Dingledine + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2019, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file map.c + * + * \brief Hash-table implementations of a string-to-void* map, and of + * a digest-to-void* map. + **/ + +#include "lib/container/map.h" +#include "lib/ctime/di_ops.h" +#include "lib/defs/digest_sizes.h" +#include "lib/string/util_string.h" +#include "lib/malloc/malloc.h" + +#include "lib/log/util_bug.h" + +#include <stdlib.h> +#include <string.h> + +#include "ht.h" + +/** Helper: Declare an entry type and a map type to implement a mapping using + * ht.h. The map type will be called <b>maptype</b>. The key part of each + * entry is declared using the C declaration <b>keydecl</b>. All functions + * and types associated with the map get prefixed with <b>prefix</b> */ +#define DEFINE_MAP_STRUCTS(maptype, keydecl, prefix) \ + typedef struct prefix ## entry_t { \ + HT_ENTRY(prefix ## entry_t) node; \ + void *val; \ + keydecl; \ + } prefix ## entry_t; \ + struct maptype { \ + HT_HEAD(prefix ## impl, prefix ## entry_t) head; \ + } + +DEFINE_MAP_STRUCTS(strmap_t, char *key, strmap_); +DEFINE_MAP_STRUCTS(digestmap_t, char key[DIGEST_LEN], digestmap_); +DEFINE_MAP_STRUCTS(digest256map_t, uint8_t key[DIGEST256_LEN], digest256map_); + +/** Helper: compare strmap_entry_t objects by key value. */ +static inline int +strmap_entries_eq(const strmap_entry_t *a, const strmap_entry_t *b) +{ + return !strcmp(a->key, b->key); +} + +/** Helper: return a hash value for a strmap_entry_t. */ +static inline unsigned int +strmap_entry_hash(const strmap_entry_t *a) +{ + return (unsigned) siphash24g(a->key, strlen(a->key)); +} + +/** Helper: compare digestmap_entry_t objects by key value. */ +static inline int +digestmap_entries_eq(const digestmap_entry_t *a, const digestmap_entry_t *b) +{ + return tor_memeq(a->key, b->key, DIGEST_LEN); +} + +/** Helper: return a hash value for a digest_map_t. */ +static inline unsigned int +digestmap_entry_hash(const digestmap_entry_t *a) +{ + return (unsigned) siphash24g(a->key, DIGEST_LEN); +} + +/** Helper: compare digestmap_entry_t objects by key value. */ +static inline int +digest256map_entries_eq(const digest256map_entry_t *a, + const digest256map_entry_t *b) +{ + return tor_memeq(a->key, b->key, DIGEST256_LEN); +} + +/** Helper: return a hash value for a digest_map_t. */ +static inline unsigned int +digest256map_entry_hash(const digest256map_entry_t *a) +{ + return (unsigned) siphash24g(a->key, DIGEST256_LEN); +} + +HT_PROTOTYPE(strmap_impl, strmap_entry_t, node, strmap_entry_hash, + strmap_entries_eq) +HT_GENERATE2(strmap_impl, strmap_entry_t, node, strmap_entry_hash, + strmap_entries_eq, 0.6, tor_reallocarray_, tor_free_) + +HT_PROTOTYPE(digestmap_impl, digestmap_entry_t, node, digestmap_entry_hash, + digestmap_entries_eq) +HT_GENERATE2(digestmap_impl, digestmap_entry_t, node, digestmap_entry_hash, + digestmap_entries_eq, 0.6, tor_reallocarray_, tor_free_) + +HT_PROTOTYPE(digest256map_impl, digest256map_entry_t, node, + digest256map_entry_hash, + digest256map_entries_eq) +HT_GENERATE2(digest256map_impl, digest256map_entry_t, node, + digest256map_entry_hash, + digest256map_entries_eq, 0.6, tor_reallocarray_, tor_free_) + +#define strmap_entry_free(ent) \ + FREE_AND_NULL(strmap_entry_t, strmap_entry_free_, (ent)) +#define digestmap_entry_free(ent) \ + FREE_AND_NULL(digestmap_entry_t, digestmap_entry_free_, (ent)) +#define digest256map_entry_free(ent) \ + FREE_AND_NULL(digest256map_entry_t, digest256map_entry_free_, (ent)) + +static inline void +strmap_entry_free_(strmap_entry_t *ent) +{ + tor_free(ent->key); + tor_free(ent); +} +static inline void +digestmap_entry_free_(digestmap_entry_t *ent) +{ + tor_free(ent); +} +static inline void +digest256map_entry_free_(digest256map_entry_t *ent) +{ + tor_free(ent); +} + +static inline void +strmap_assign_tmp_key(strmap_entry_t *ent, const char *key) +{ + ent->key = (char*)key; +} +static inline void +digestmap_assign_tmp_key(digestmap_entry_t *ent, const char *key) +{ + memcpy(ent->key, key, DIGEST_LEN); +} +static inline void +digest256map_assign_tmp_key(digest256map_entry_t *ent, const uint8_t *key) +{ + memcpy(ent->key, key, DIGEST256_LEN); +} +static inline void +strmap_assign_key(strmap_entry_t *ent, const char *key) +{ + ent->key = tor_strdup(key); +} +static inline void +digestmap_assign_key(digestmap_entry_t *ent, const char *key) +{ + memcpy(ent->key, key, DIGEST_LEN); +} +static inline void +digest256map_assign_key(digest256map_entry_t *ent, const uint8_t *key) +{ + memcpy(ent->key, key, DIGEST256_LEN); +} + +/** + * Macro: implement all the functions for a map that are declared in + * map.h by the DECLARE_MAP_FNS() macro. You must additionally define a + * prefix_entry_free_() function to free entries (and their keys), a + * prefix_assign_tmp_key() function to temporarily set a stack-allocated + * entry to hold a key, and a prefix_assign_key() function to set a + * heap-allocated entry to hold a key. + */ +#define IMPLEMENT_MAP_FNS(maptype, keytype, prefix) \ + /** Create and return a new empty map. */ \ + MOCK_IMPL(maptype *, \ + prefix##_new,(void)) \ + { \ + maptype *result; \ + result = tor_malloc(sizeof(maptype)); \ + HT_INIT(prefix##_impl, &result->head); \ + return result; \ + } \ + \ + /** Return the item from <b>map</b> whose key matches <b>key</b>, or \ + * NULL if no such value exists. */ \ + void * \ + prefix##_get(const maptype *map, const keytype key) \ + { \ + prefix ##_entry_t *resolve; \ + prefix ##_entry_t search; \ + tor_assert(map); \ + tor_assert(key); \ + prefix ##_assign_tmp_key(&search, key); \ + resolve = HT_FIND(prefix ##_impl, &map->head, &search); \ + if (resolve) { \ + return resolve->val; \ + } else { \ + return NULL; \ + } \ + } \ + \ + /** Add an entry to <b>map</b> mapping <b>key</b> to <b>val</b>; \ + * return the previous value, or NULL if no such value existed. */ \ + void * \ + prefix##_set(maptype *map, const keytype key, void *val) \ + { \ + prefix##_entry_t search; \ + void *oldval; \ + tor_assert(map); \ + tor_assert(key); \ + tor_assert(val); \ + prefix##_assign_tmp_key(&search, key); \ + /* We a lot of our time in this function, so the code below is */ \ + /* meant to optimize the check/alloc/set cycle by avoiding the two */\ + /* trips to the hash table that we would do in the unoptimized */ \ + /* version of this code. (Each of HT_INSERT and HT_FIND calls */ \ + /* HT_SET_HASH and HT_FIND_P.) */ \ + HT_FIND_OR_INSERT_(prefix##_impl, node, prefix##_entry_hash, \ + &(map->head), \ + prefix##_entry_t, &search, ptr, \ + { \ + /* we found an entry. */ \ + oldval = (*ptr)->val; \ + (*ptr)->val = val; \ + return oldval; \ + }, \ + { \ + /* We didn't find the entry. */ \ + prefix##_entry_t *newent = \ + tor_malloc_zero(sizeof(prefix##_entry_t)); \ + prefix##_assign_key(newent, key); \ + newent->val = val; \ + HT_FOI_INSERT_(node, &(map->head), \ + &search, newent, ptr); \ + return NULL; \ + }); \ + } \ + \ + /** Remove the value currently associated with <b>key</b> from the map. \ + * Return the value if one was set, or NULL if there was no entry for \ + * <b>key</b>. \ + * \ + * Note: you must free any storage associated with the returned value. \ + */ \ + void * \ + prefix##_remove(maptype *map, const keytype key) \ + { \ + prefix##_entry_t *resolve; \ + prefix##_entry_t search; \ + void *oldval; \ + tor_assert(map); \ + tor_assert(key); \ + prefix##_assign_tmp_key(&search, key); \ + resolve = HT_REMOVE(prefix##_impl, &map->head, &search); \ + if (resolve) { \ + oldval = resolve->val; \ + prefix##_entry_free(resolve); \ + return oldval; \ + } else { \ + return NULL; \ + } \ + } \ + \ + /** Return the number of elements in <b>map</b>. */ \ + int \ + prefix##_size(const maptype *map) \ + { \ + return HT_SIZE(&map->head); \ + } \ + \ + /** Return true iff <b>map</b> has no entries. */ \ + int \ + prefix##_isempty(const maptype *map) \ + { \ + return HT_EMPTY(&map->head); \ + } \ + \ + /** Assert that <b>map</b> is not corrupt. */ \ + void \ + prefix##_assert_ok(const maptype *map) \ + { \ + tor_assert(!prefix##_impl_HT_REP_IS_BAD_(&map->head)); \ + } \ + \ + /** Remove all entries from <b>map</b>, and deallocate storage for \ + * those entries. If free_val is provided, invoked it every value in \ + * <b>map</b>. */ \ + MOCK_IMPL(void, \ + prefix##_free_, (maptype *map, void (*free_val)(void*))) \ + { \ + prefix##_entry_t **ent, **next, *this; \ + if (!map) \ + return; \ + for (ent = HT_START(prefix##_impl, &map->head); ent != NULL; \ + ent = next) { \ + this = *ent; \ + next = HT_NEXT_RMV(prefix##_impl, &map->head, ent); \ + if (free_val) \ + free_val(this->val); \ + prefix##_entry_free(this); \ + } \ + tor_assert(HT_EMPTY(&map->head)); \ + HT_CLEAR(prefix##_impl, &map->head); \ + tor_free(map); \ + } \ + \ + /** return an <b>iterator</b> pointer to the front of a map. \ + * \ + * Iterator example: \ + * \ + * \code \ + * // uppercase values in "map", removing empty values. \ + * \ + * strmap_iter_t *iter; \ + * const char *key; \ + * void *val; \ + * char *cp; \ + * \ + * for (iter = strmap_iter_init(map); !strmap_iter_done(iter); ) { \ + * strmap_iter_get(iter, &key, &val); \ + * cp = (char*)val; \ + * if (!*cp) { \ + * iter = strmap_iter_next_rmv(map,iter); \ + * free(val); \ + * } else { \ + * for (;*cp;cp++) *cp = TOR_TOUPPER(*cp); \ + */ \ + prefix##_iter_t * \ + prefix##_iter_init(maptype *map) \ + { \ + tor_assert(map); \ + return HT_START(prefix##_impl, &map->head); \ + } \ + \ + /** Advance <b>iter</b> a single step to the next entry, and return \ + * its new value. */ \ + prefix##_iter_t * \ + prefix##_iter_next(maptype *map, prefix##_iter_t *iter) \ + { \ + tor_assert(map); \ + tor_assert(iter); \ + return HT_NEXT(prefix##_impl, &map->head, iter); \ + } \ + /** Advance <b>iter</b> a single step to the next entry, removing the \ + * current entry, and return its new value. */ \ + prefix##_iter_t * \ + prefix##_iter_next_rmv(maptype *map, prefix##_iter_t *iter) \ + { \ + prefix##_entry_t *rmv; \ + tor_assert(map); \ + tor_assert(iter); \ + tor_assert(*iter); \ + rmv = *iter; \ + iter = HT_NEXT_RMV(prefix##_impl, &map->head, iter); \ + prefix##_entry_free(rmv); \ + return iter; \ + } \ + /** Set *<b>keyp</b> and *<b>valp</b> to the current entry pointed \ + * to by iter. */ \ + void \ + prefix##_iter_get(prefix##_iter_t *iter, const keytype *keyp, \ + void **valp) \ + { \ + tor_assert(iter); \ + tor_assert(*iter); \ + tor_assert(keyp); \ + tor_assert(valp); \ + *keyp = (*iter)->key; \ + *valp = (*iter)->val; \ + } \ + /** Return true iff <b>iter</b> has advanced past the last entry of \ + * <b>map</b>. */ \ + int \ + prefix##_iter_done(prefix##_iter_t *iter) \ + { \ + return iter == NULL; \ + } + +IMPLEMENT_MAP_FNS(strmap_t, char *, strmap) +IMPLEMENT_MAP_FNS(digestmap_t, char *, digestmap) +IMPLEMENT_MAP_FNS(digest256map_t, uint8_t *, digest256map) + +/** Same as strmap_set, but first converts <b>key</b> to lowercase. */ +void * +strmap_set_lc(strmap_t *map, const char *key, void *val) +{ + /* We could be a little faster by using strcasecmp instead, and a separate + * type, but I don't think it matters. */ + void *v; + char *lc_key = tor_strdup(key); + tor_strlower(lc_key); + v = strmap_set(map,lc_key,val); + tor_free(lc_key); + return v; +} + +/** Same as strmap_get, but first converts <b>key</b> to lowercase. */ +void * +strmap_get_lc(const strmap_t *map, const char *key) +{ + void *v; + char *lc_key = tor_strdup(key); + tor_strlower(lc_key); + v = strmap_get(map,lc_key); + tor_free(lc_key); + return v; +} + +/** Same as strmap_remove, but first converts <b>key</b> to lowercase */ +void * +strmap_remove_lc(strmap_t *map, const char *key) +{ + void *v; + char *lc_key = tor_strdup(key); + tor_strlower(lc_key); + v = strmap_remove(map,lc_key); + tor_free(lc_key); + return v; +} diff --git a/src/lib/container/map.h b/src/lib/container/map.h new file mode 100644 index 0000000000..a2d1b01d12 --- /dev/null +++ b/src/lib/container/map.h @@ -0,0 +1,261 @@ +/* Copyright (c) 2003-2004, Roger Dingledine + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2019, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#ifndef TOR_MAP_H +#define TOR_MAP_H + +/** + * \file map.h + * + * \brief Headers for map.c. + **/ + +#include "lib/testsupport/testsupport.h" +#include "lib/cc/torint.h" + +#include "siphash.h" + +#define DECLARE_MAP_FNS(maptype, keytype, prefix) \ + typedef struct maptype maptype; \ + typedef struct prefix##entry_t *prefix##iter_t; \ + MOCK_DECL(maptype*, prefix##new, (void)); \ + void* prefix##set(maptype *map, keytype key, void *val); \ + void* prefix##get(const maptype *map, keytype key); \ + void* prefix##remove(maptype *map, keytype key); \ + MOCK_DECL(void, prefix##free_, (maptype *map, void (*free_val)(void*))); \ + int prefix##isempty(const maptype *map); \ + int prefix##size(const maptype *map); \ + prefix##iter_t *prefix##iter_init(maptype *map); \ + prefix##iter_t *prefix##iter_next(maptype *map, prefix##iter_t *iter); \ + prefix##iter_t *prefix##iter_next_rmv(maptype *map, prefix##iter_t *iter); \ + void prefix##iter_get(prefix##iter_t *iter, keytype *keyp, void **valp); \ + int prefix##iter_done(prefix##iter_t *iter); \ + void prefix##assert_ok(const maptype *map) + +/* Map from const char * to void *. Implemented with a hash table. */ +DECLARE_MAP_FNS(strmap_t, const char *, strmap_); +/* Map from const char[DIGEST_LEN] to void *. Implemented with a hash table. */ +DECLARE_MAP_FNS(digestmap_t, const char *, digestmap_); +/* Map from const uint8_t[DIGEST256_LEN] to void *. Implemented with a hash + * table. */ +DECLARE_MAP_FNS(digest256map_t, const uint8_t *, digest256map_); + +#define MAP_FREE_AND_NULL(maptype, map, fn) \ + do { \ + maptype ## _free_((map), (fn)); \ + (map) = NULL; \ + } while (0) + +#define strmap_free(map, fn) MAP_FREE_AND_NULL(strmap, (map), (fn)) +#define digestmap_free(map, fn) MAP_FREE_AND_NULL(digestmap, (map), (fn)) +#define digest256map_free(map, fn) MAP_FREE_AND_NULL(digest256map, (map), (fn)) + +#undef DECLARE_MAP_FNS + +/** Iterates over the key-value pairs in a map <b>map</b> in order. + * <b>prefix</b> is as for DECLARE_MAP_FNS (i.e., strmap_ or digestmap_). + * The map's keys and values are of type keytype and valtype respectively; + * each iteration assigns them to keyvar and valvar. + * + * Example use: + * MAP_FOREACH(digestmap_, m, const char *, k, routerinfo_t *, r) { + * // use k and r + * } MAP_FOREACH_END. + */ +/* Unpacks to, approximately: + * { + * digestmap_iter_t *k_iter; + * for (k_iter = digestmap_iter_init(m); !digestmap_iter_done(k_iter); + * k_iter = digestmap_iter_next(m, k_iter)) { + * const char *k; + * void *r_voidp; + * routerinfo_t *r; + * digestmap_iter_get(k_iter, &k, &r_voidp); + * r = r_voidp; + * // use k and r + * } + * } + */ +#define MAP_FOREACH(prefix, map, keytype, keyvar, valtype, valvar) \ + STMT_BEGIN \ + prefix##iter_t *keyvar##_iter; \ + for (keyvar##_iter = prefix##iter_init(map); \ + !prefix##iter_done(keyvar##_iter); \ + keyvar##_iter = prefix##iter_next(map, keyvar##_iter)) { \ + keytype keyvar; \ + void *valvar##_voidp; \ + valtype valvar; \ + prefix##iter_get(keyvar##_iter, &keyvar, &valvar##_voidp); \ + valvar = valvar##_voidp; + +/** As MAP_FOREACH, except allows members to be removed from the map + * during the iteration via MAP_DEL_CURRENT. Example use: + * + * Example use: + * MAP_FOREACH(digestmap_, m, const char *, k, routerinfo_t *, r) { + * if (is_very_old(r)) + * MAP_DEL_CURRENT(k); + * } MAP_FOREACH_END. + **/ +/* Unpacks to, approximately: + * { + * digestmap_iter_t *k_iter; + * int k_del=0; + * for (k_iter = digestmap_iter_init(m); !digestmap_iter_done(k_iter); + * k_iter = k_del ? digestmap_iter_next(m, k_iter) + * : digestmap_iter_next_rmv(m, k_iter)) { + * const char *k; + * void *r_voidp; + * routerinfo_t *r; + * k_del=0; + * digestmap_iter_get(k_iter, &k, &r_voidp); + * r = r_voidp; + * if (is_very_old(r)) { + * k_del = 1; + * } + * } + * } + */ +#define MAP_FOREACH_MODIFY(prefix, map, keytype, keyvar, valtype, valvar) \ + STMT_BEGIN \ + prefix##iter_t *keyvar##_iter; \ + int keyvar##_del=0; \ + for (keyvar##_iter = prefix##iter_init(map); \ + !prefix##iter_done(keyvar##_iter); \ + keyvar##_iter = keyvar##_del ? \ + prefix##iter_next_rmv(map, keyvar##_iter) : \ + prefix##iter_next(map, keyvar##_iter)) { \ + keytype keyvar; \ + void *valvar##_voidp; \ + valtype valvar; \ + keyvar##_del=0; \ + prefix##iter_get(keyvar##_iter, &keyvar, &valvar##_voidp); \ + valvar = valvar##_voidp; + +/** Used with MAP_FOREACH_MODIFY to remove the currently-iterated-upon + * member of the map. */ +#define MAP_DEL_CURRENT(keyvar) \ + STMT_BEGIN \ + keyvar##_del = 1; \ + STMT_END + +/** Used to end a MAP_FOREACH() block. */ +#define MAP_FOREACH_END } STMT_END ; + +/** As MAP_FOREACH, but does not require declaration of prefix or keytype. + * Example use: + * DIGESTMAP_FOREACH(m, k, routerinfo_t *, r) { + * // use k and r + * } DIGESTMAP_FOREACH_END. + */ +#define DIGESTMAP_FOREACH(map, keyvar, valtype, valvar) \ + MAP_FOREACH(digestmap_, map, const char *, keyvar, valtype, valvar) + +/** As MAP_FOREACH_MODIFY, but does not require declaration of prefix or + * keytype. + * Example use: + * DIGESTMAP_FOREACH_MODIFY(m, k, routerinfo_t *, r) { + * if (is_very_old(r)) + * MAP_DEL_CURRENT(k); + * } DIGESTMAP_FOREACH_END. + */ +#define DIGESTMAP_FOREACH_MODIFY(map, keyvar, valtype, valvar) \ + MAP_FOREACH_MODIFY(digestmap_, map, const char *, keyvar, valtype, valvar) +/** Used to end a DIGESTMAP_FOREACH() block. */ +#define DIGESTMAP_FOREACH_END MAP_FOREACH_END + +#define DIGEST256MAP_FOREACH(map, keyvar, valtype, valvar) \ + MAP_FOREACH(digest256map_, map, const uint8_t *, keyvar, valtype, valvar) +#define DIGEST256MAP_FOREACH_MODIFY(map, keyvar, valtype, valvar) \ + MAP_FOREACH_MODIFY(digest256map_, map, const uint8_t *, \ + keyvar, valtype, valvar) +#define DIGEST256MAP_FOREACH_END MAP_FOREACH_END + +#define STRMAP_FOREACH(map, keyvar, valtype, valvar) \ + MAP_FOREACH(strmap_, map, const char *, keyvar, valtype, valvar) +#define STRMAP_FOREACH_MODIFY(map, keyvar, valtype, valvar) \ + MAP_FOREACH_MODIFY(strmap_, map, const char *, keyvar, valtype, valvar) +#define STRMAP_FOREACH_END MAP_FOREACH_END + +void* strmap_set_lc(strmap_t *map, const char *key, void *val); +void* strmap_get_lc(const strmap_t *map, const char *key); +void* strmap_remove_lc(strmap_t *map, const char *key); + +#define DECLARE_TYPED_DIGESTMAP_FNS(prefix, maptype, valtype) \ + typedef struct maptype maptype; \ + typedef struct prefix##iter_t *prefix##iter_t; \ + ATTR_UNUSED static inline maptype* \ + prefix##new(void) \ + { \ + return (maptype*)digestmap_new(); \ + } \ + ATTR_UNUSED static inline digestmap_t* \ + prefix##to_digestmap(maptype *map) \ + { \ + return (digestmap_t*)map; \ + } \ + ATTR_UNUSED static inline valtype* \ + prefix##get(maptype *map, const char *key) \ + { \ + return (valtype*)digestmap_get((digestmap_t*)map, key); \ + } \ + ATTR_UNUSED static inline valtype* \ + prefix##set(maptype *map, const char *key, valtype *val) \ + { \ + return (valtype*)digestmap_set((digestmap_t*)map, key, val); \ + } \ + ATTR_UNUSED static inline valtype* \ + prefix##remove(maptype *map, const char *key) \ + { \ + return (valtype*)digestmap_remove((digestmap_t*)map, key); \ + } \ + ATTR_UNUSED static inline void \ + prefix##f##ree_(maptype *map, void (*free_val)(void*)) \ + { \ + digestmap_free_((digestmap_t*)map, free_val); \ + } \ + ATTR_UNUSED static inline int \ + prefix##isempty(maptype *map) \ + { \ + return digestmap_isempty((digestmap_t*)map); \ + } \ + ATTR_UNUSED static inline int \ + prefix##size(maptype *map) \ + { \ + return digestmap_size((digestmap_t*)map); \ + } \ + ATTR_UNUSED static inline \ + prefix##iter_t *prefix##iter_init(maptype *map) \ + { \ + return (prefix##iter_t*) digestmap_iter_init((digestmap_t*)map); \ + } \ + ATTR_UNUSED static inline \ + prefix##iter_t *prefix##iter_next(maptype *map, prefix##iter_t *iter) \ + { \ + return (prefix##iter_t*) digestmap_iter_next( \ + (digestmap_t*)map, (digestmap_iter_t*)iter); \ + } \ + ATTR_UNUSED static inline prefix##iter_t* \ + prefix##iter_next_rmv(maptype *map, prefix##iter_t *iter) \ + { \ + return (prefix##iter_t*) digestmap_iter_next_rmv( \ + (digestmap_t*)map, (digestmap_iter_t*)iter); \ + } \ + ATTR_UNUSED static inline void \ + prefix##iter_get(prefix##iter_t *iter, \ + const char **keyp, \ + valtype **valp) \ + { \ + void *v; \ + digestmap_iter_get((digestmap_iter_t*) iter, keyp, &v); \ + *valp = v; \ + } \ + ATTR_UNUSED static inline int \ + prefix##iter_done(prefix##iter_t *iter) \ + { \ + return digestmap_iter_done((digestmap_iter_t*)iter); \ + } + +#endif /* !defined(TOR_CONTAINER_H) */ diff --git a/src/lib/container/order.c b/src/lib/container/order.c new file mode 100644 index 0000000000..f6503a124e --- /dev/null +++ b/src/lib/container/order.c @@ -0,0 +1,48 @@ +/* Copyright (c) 2003-2004, Roger Dingledine + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2019, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file order.c + * \brief Functions for finding the n'th element of an array. + **/ + +#include <stdlib.h> + +#include "lib/container/order.h" +#include "lib/log/util_bug.h" + +/** Declare a function called <b>funcname</b> that acts as a find_nth_FOO + * function for an array of type <b>elt_t</b>*. + * + * NOTE: The implementation kind of sucks: It's O(n log n), whereas finding + * the kth element of an n-element list can be done in O(n). Then again, this + * implementation is not in critical path, and it is obviously correct. */ +#define IMPLEMENT_ORDER_FUNC(funcname, elt_t) \ + static int \ + _cmp_ ## elt_t(const void *_a, const void *_b) \ + { \ + const elt_t *a = _a, *b = _b; \ + if (*a<*b) \ + return -1; \ + else if (*a>*b) \ + return 1; \ + else \ + return 0; \ + } \ + elt_t \ + funcname(elt_t *array, int n_elements, int nth) \ + { \ + tor_assert(nth >= 0); \ + tor_assert(nth < n_elements); \ + qsort(array, n_elements, sizeof(elt_t), _cmp_ ##elt_t); \ + return array[nth]; \ + } + +IMPLEMENT_ORDER_FUNC(find_nth_int, int) +IMPLEMENT_ORDER_FUNC(find_nth_time, time_t) +IMPLEMENT_ORDER_FUNC(find_nth_double, double) +IMPLEMENT_ORDER_FUNC(find_nth_uint32, uint32_t) +IMPLEMENT_ORDER_FUNC(find_nth_int32, int32_t) +IMPLEMENT_ORDER_FUNC(find_nth_long, long) diff --git a/src/lib/container/order.h b/src/lib/container/order.h new file mode 100644 index 0000000000..a176d6d8a6 --- /dev/null +++ b/src/lib/container/order.h @@ -0,0 +1,60 @@ +/* Copyright (c) 2003-2004, Roger Dingledine + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2019, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#ifndef TOR_ORDER_H +#define TOR_ORDER_H + +/** + * \file order.h + * + * \brief Header for order.c. + **/ + +#include "lib/cc/compat_compiler.h" +#include "lib/cc/torint.h" + +/* These functions, given an <b>array</b> of <b>n_elements</b>, return the + * <b>nth</b> lowest element. <b>nth</b>=0 gives the lowest element; + * <b>n_elements</b>-1 gives the highest; and (<b>n_elements</b>-1) / 2 gives + * the median. As a side effect, the elements of <b>array</b> are sorted. */ +int find_nth_int(int *array, int n_elements, int nth); +time_t find_nth_time(time_t *array, int n_elements, int nth); +double find_nth_double(double *array, int n_elements, int nth); +int32_t find_nth_int32(int32_t *array, int n_elements, int nth); +uint32_t find_nth_uint32(uint32_t *array, int n_elements, int nth); +long find_nth_long(long *array, int n_elements, int nth); +static inline int +median_int(int *array, int n_elements) +{ + return find_nth_int(array, n_elements, (n_elements-1)/2); +} +static inline time_t +median_time(time_t *array, int n_elements) +{ + return find_nth_time(array, n_elements, (n_elements-1)/2); +} +static inline double +median_double(double *array, int n_elements) +{ + return find_nth_double(array, n_elements, (n_elements-1)/2); +} +static inline uint32_t +median_uint32(uint32_t *array, int n_elements) +{ + return find_nth_uint32(array, n_elements, (n_elements-1)/2); +} +static inline int32_t +median_int32(int32_t *array, int n_elements) +{ + return find_nth_int32(array, n_elements, (n_elements-1)/2); +} + +static inline uint32_t +third_quartile_uint32(uint32_t *array, int n_elements) +{ + return find_nth_uint32(array, n_elements, (n_elements*3)/4); +} + +#endif /* !defined(TOR_CONTAINER_H) */ diff --git a/src/lib/container/smartlist.c b/src/lib/container/smartlist.c new file mode 100644 index 0000000000..3ab2797d68 --- /dev/null +++ b/src/lib/container/smartlist.c @@ -0,0 +1,866 @@ +/* Copyright (c) 2003-2004, Roger Dingledine + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2019, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file smartlist.c + * + * \brief Higher-level functions for the "smartlist" resizeable array + * abstraction. + * + * The functions declared here use higher-level functionality than those in + * smartlist_core.c, and handle things like smartlists of different types, + * sorting, searching, heap-structured smartlists, and other convenience + * functions. + **/ + +#include "lib/container/smartlist.h" +#include "lib/err/torerr.h" +#include "lib/malloc/malloc.h" +#include "lib/defs/digest_sizes.h" +#include "lib/ctime/di_ops.h" +#include "lib/string/compat_ctype.h" +#include "lib/string/compat_string.h" +#include "lib/string/util_string.h" +#include "lib/string/printf.h" + +#include "lib/log/util_bug.h" + +#include <stdlib.h> +#include <string.h> + +/** Append the string produced by tor_asprintf(<b>pattern</b>, <b>...</b>) + * to <b>sl</b>. */ +void +smartlist_add_asprintf(struct smartlist_t *sl, const char *pattern, ...) +{ + va_list ap; + va_start(ap, pattern); + smartlist_add_vasprintf(sl, pattern, ap); + va_end(ap); +} + +/** va_list-based backend of smartlist_add_asprintf. */ +void +smartlist_add_vasprintf(struct smartlist_t *sl, const char *pattern, + va_list args) +{ + char *str = NULL; + + tor_vasprintf(&str, pattern, args); + tor_assert(str != NULL); + + smartlist_add(sl, str); +} + +/** Reverse the order of the items in <b>sl</b>. */ +void +smartlist_reverse(smartlist_t *sl) +{ + int i, j; + void *tmp; + tor_assert(sl); + for (i = 0, j = sl->num_used-1; i < j; ++i, --j) { + tmp = sl->list[i]; + sl->list[i] = sl->list[j]; + sl->list[j] = tmp; + } +} + +/** If there are any strings in sl equal to element, remove and free them. + * Does not preserve order. */ +void +smartlist_string_remove(smartlist_t *sl, const char *element) +{ + int i; + tor_assert(sl); + tor_assert(element); + for (i = 0; i < sl->num_used; ++i) { + if (!strcmp(element, sl->list[i])) { + tor_free(sl->list[i]); + sl->list[i] = sl->list[--sl->num_used]; /* swap with the end */ + i--; /* so we process the new i'th element */ + sl->list[sl->num_used] = NULL; + } + } +} + +/** Return true iff <b>sl</b> has some element E such that + * !strcmp(E,<b>element</b>) + */ +int +smartlist_contains_string(const smartlist_t *sl, const char *element) +{ + int i; + if (!sl) return 0; + for (i=0; i < sl->num_used; i++) + if (strcmp((const char*)sl->list[i],element)==0) + return 1; + return 0; +} + +/** If <b>element</b> is equal to an element of <b>sl</b>, return that + * element's index. Otherwise, return -1. */ +int +smartlist_string_pos(const smartlist_t *sl, const char *element) +{ + int i; + if (!sl) return -1; + for (i=0; i < sl->num_used; i++) + if (strcmp((const char*)sl->list[i],element)==0) + return i; + return -1; +} + +/** If <b>element</b> is the same pointer as an element of <b>sl</b>, return + * that element's index. Otherwise, return -1. */ +int +smartlist_pos(const smartlist_t *sl, const void *element) +{ + int i; + if (!sl) return -1; + for (i=0; i < sl->num_used; i++) + if (element == sl->list[i]) + return i; + return -1; +} + +/** Return true iff <b>sl</b> has some element E such that + * !strcasecmp(E,<b>element</b>) + */ +int +smartlist_contains_string_case(const smartlist_t *sl, const char *element) +{ + int i; + if (!sl) return 0; + for (i=0; i < sl->num_used; i++) + if (strcasecmp((const char*)sl->list[i],element)==0) + return 1; + return 0; +} + +/** Return true iff <b>sl</b> has some element E such that E is equal + * to the decimal encoding of <b>num</b>. + */ +int +smartlist_contains_int_as_string(const smartlist_t *sl, int num) +{ + char buf[32]; /* long enough for 64-bit int, and then some. */ + tor_snprintf(buf,sizeof(buf),"%d", num); + return smartlist_contains_string(sl, buf); +} + +/** Return true iff the two lists contain the same strings in the same + * order, or if they are both NULL. */ +int +smartlist_strings_eq(const smartlist_t *sl1, const smartlist_t *sl2) +{ + if (sl1 == NULL) + return sl2 == NULL; + if (sl2 == NULL) + return 0; + if (smartlist_len(sl1) != smartlist_len(sl2)) + return 0; + SMARTLIST_FOREACH(sl1, const char *, cp1, { + const char *cp2 = smartlist_get(sl2, cp1_sl_idx); + if (strcmp(cp1, cp2)) + return 0; + }); + return 1; +} + +/** Return true iff the two lists contain the same int pointer values in + * the same order, or if they are both NULL. */ +int +smartlist_ints_eq(const smartlist_t *sl1, const smartlist_t *sl2) +{ + if (sl1 == NULL) + return sl2 == NULL; + if (sl2 == NULL) + return 0; + if (smartlist_len(sl1) != smartlist_len(sl2)) + return 0; + SMARTLIST_FOREACH(sl1, int *, cp1, { + int *cp2 = smartlist_get(sl2, cp1_sl_idx); + if (*cp1 != *cp2) + return 0; + }); + return 1; +} + +/** + * Return true if there is shallow equality between smartlists - + * i.e. all indices correspond to exactly same object (pointer + * values are matching). Otherwise, return false. + */ +int +smartlist_ptrs_eq(const smartlist_t *s1, const smartlist_t *s2) +{ + if (s1 == s2) + return 1; + + // Note: pointers cannot both be NULL at this point, because + // above check. + if (s1 == NULL || s2 == NULL) + return 0; + + if (smartlist_len(s1) != smartlist_len(s2)) + return 0; + + for (int i = 0; i < smartlist_len(s1); i++) { + if (smartlist_get(s1, i) != smartlist_get(s2, i)) + return 0; + } + + return 1; +} + +/** Return true iff <b>sl</b> has some element E such that + * tor_memeq(E,<b>element</b>,DIGEST_LEN) + */ +int +smartlist_contains_digest(const smartlist_t *sl, const char *element) +{ + int i; + if (!sl) return 0; + for (i=0; i < sl->num_used; i++) + if (tor_memeq((const char*)sl->list[i],element,DIGEST_LEN)) + return 1; + return 0; +} + +/** Return true iff some element E of sl2 has smartlist_contains(sl1,E). + */ +int +smartlist_overlap(const smartlist_t *sl1, const smartlist_t *sl2) +{ + int i; + for (i=0; i < sl2->num_used; i++) + if (smartlist_contains(sl1, sl2->list[i])) + return 1; + return 0; +} + +/** Remove every element E of sl1 such that !smartlist_contains(sl2,E). + * Does not preserve the order of sl1. + */ +void +smartlist_intersect(smartlist_t *sl1, const smartlist_t *sl2) +{ + int i; + for (i=0; i < sl1->num_used; i++) + if (!smartlist_contains(sl2, sl1->list[i])) { + sl1->list[i] = sl1->list[--sl1->num_used]; /* swap with the end */ + i--; /* so we process the new i'th element */ + sl1->list[sl1->num_used] = NULL; + } +} + +/** Remove every element E of sl1 such that smartlist_contains(sl2,E). + * Does not preserve the order of sl1. + */ +void +smartlist_subtract(smartlist_t *sl1, const smartlist_t *sl2) +{ + int i; + for (i=0; i < sl2->num_used; i++) + smartlist_remove(sl1, sl2->list[i]); +} + +/** Allocate and return a new string containing the concatenation of + * the elements of <b>sl</b>, in order, separated by <b>join</b>. If + * <b>terminate</b> is true, also terminate the string with <b>join</b>. + * If <b>len_out</b> is not NULL, set <b>len_out</b> to the length of + * the returned string. Requires that every element of <b>sl</b> is + * NUL-terminated string. + */ +char * +smartlist_join_strings(smartlist_t *sl, const char *join, + int terminate, size_t *len_out) +{ + return smartlist_join_strings2(sl,join,strlen(join),terminate,len_out); +} + +/** As smartlist_join_strings, but instead of separating/terminated with a + * NUL-terminated string <b>join</b>, uses the <b>join_len</b>-byte sequence + * at <b>join</b>. (Useful for generating a sequence of NUL-terminated + * strings.) + */ +char * +smartlist_join_strings2(smartlist_t *sl, const char *join, + size_t join_len, int terminate, size_t *len_out) +{ + int i; + size_t n = 0; + char *r = NULL, *dst, *src; + + tor_assert(sl); + tor_assert(join); + + if (terminate) + n = join_len; + + for (i = 0; i < sl->num_used; ++i) { + n += strlen(sl->list[i]); + if (i+1 < sl->num_used) /* avoid double-counting the last one */ + n += join_len; + } + dst = r = tor_malloc(n+1); + for (i = 0; i < sl->num_used; ) { + for (src = sl->list[i]; *src; ) + *dst++ = *src++; + if (++i < sl->num_used) { + memcpy(dst, join, join_len); + dst += join_len; + } + } + if (terminate) { + memcpy(dst, join, join_len); + dst += join_len; + } + *dst = '\0'; + + if (len_out) + *len_out = dst-r; + return r; +} + +/** Sort the members of <b>sl</b> into an order defined by + * the ordering function <b>compare</b>, which returns less then 0 if a + * precedes b, greater than 0 if b precedes a, and 0 if a 'equals' b. + */ +void +smartlist_sort(smartlist_t *sl, int (*compare)(const void **a, const void **b)) +{ + if (!sl->num_used) + return; + qsort(sl->list, sl->num_used, sizeof(void*), + (int (*)(const void *,const void*))compare); +} + +/** Given a smartlist <b>sl</b> sorted with the function <b>compare</b>, + * return the most frequent member in the list. Break ties in favor of + * later elements. If the list is empty, return NULL. If count_out is + * non-null, set it to the count of the most frequent member. + */ +void * +smartlist_get_most_frequent_(const smartlist_t *sl, + int (*compare)(const void **a, const void **b), + int *count_out) +{ + const void *most_frequent = NULL; + int most_frequent_count = 0; + + const void *cur = NULL; + int i, count=0; + + if (!sl->num_used) { + if (count_out) + *count_out = 0; + return NULL; + } + for (i = 0; i < sl->num_used; ++i) { + const void *item = sl->list[i]; + if (cur && 0 == compare(&cur, &item)) { + ++count; + } else { + if (cur && count >= most_frequent_count) { + most_frequent = cur; + most_frequent_count = count; + } + cur = item; + count = 1; + } + } + if (cur && count >= most_frequent_count) { + most_frequent = cur; + most_frequent_count = count; + } + if (count_out) + *count_out = most_frequent_count; + return (void*)most_frequent; +} + +/** Given a sorted smartlist <b>sl</b> and the comparison function used to + * sort it, remove all duplicate members. If free_fn is provided, calls + * free_fn on each duplicate. Otherwise, just removes them. Preserves order. + */ +void +smartlist_uniq(smartlist_t *sl, + int (*compare)(const void **a, const void **b), + void (*free_fn)(void *a)) +{ + int i; + for (i=1; i < sl->num_used; ++i) { + if (compare((const void **)&(sl->list[i-1]), + (const void **)&(sl->list[i])) == 0) { + if (free_fn) + free_fn(sl->list[i]); + smartlist_del_keeporder(sl, i--); + } + } +} + +/** Assuming the members of <b>sl</b> are in order, return a pointer to the + * member that matches <b>key</b>. Ordering and matching are defined by a + * <b>compare</b> function that returns 0 on a match; less than 0 if key is + * less than member, and greater than 0 if key is greater then member. + */ +void * +smartlist_bsearch(const smartlist_t *sl, const void *key, + int (*compare)(const void *key, const void **member)) +{ + int found, idx; + idx = smartlist_bsearch_idx(sl, key, compare, &found); + return found ? smartlist_get(sl, idx) : NULL; +} + +/** Assuming the members of <b>sl</b> are in order, return the index of the + * member that matches <b>key</b>. If no member matches, return the index of + * the first member greater than <b>key</b>, or smartlist_len(sl) if no member + * is greater than <b>key</b>. Set <b>found_out</b> to true on a match, to + * false otherwise. Ordering and matching are defined by a <b>compare</b> + * function that returns 0 on a match; less than 0 if key is less than member, + * and greater than 0 if key is greater then member. + */ +int +smartlist_bsearch_idx(const smartlist_t *sl, const void *key, + int (*compare)(const void *key, const void **member), + int *found_out) +{ + int hi, lo, cmp, mid, len, diff; + + tor_assert(sl); + tor_assert(compare); + tor_assert(found_out); + + len = smartlist_len(sl); + + /* Check for the trivial case of a zero-length list */ + if (len == 0) { + *found_out = 0; + /* We already know smartlist_len(sl) is 0 in this case */ + return 0; + } + + /* Okay, we have a real search to do */ + tor_assert(len > 0); + lo = 0; + hi = len - 1; + + /* + * These invariants are always true: + * + * For all i such that 0 <= i < lo, sl[i] < key + * For all i such that hi < i <= len, sl[i] > key + */ + + while (lo <= hi) { + diff = hi - lo; + /* + * We want mid = (lo + hi) / 2, but that could lead to overflow, so + * instead diff = hi - lo (non-negative because of loop condition), and + * then hi = lo + diff, mid = (lo + lo + diff) / 2 = lo + (diff / 2). + */ + mid = lo + (diff / 2); + cmp = compare(key, (const void**) &(sl->list[mid])); + if (cmp == 0) { + /* sl[mid] == key; we found it */ + *found_out = 1; + return mid; + } else if (cmp > 0) { + /* + * key > sl[mid] and an index i such that sl[i] == key must + * have i > mid if it exists. + */ + + /* + * Since lo <= mid <= hi, hi can only decrease on each iteration (by + * being set to mid - 1) and hi is initially len - 1, mid < len should + * always hold, and this is not symmetric with the left end of list + * mid > 0 test below. A key greater than the right end of the list + * should eventually lead to lo == hi == mid == len - 1, and then + * we set lo to len below and fall out to the same exit we hit for + * a key in the middle of the list but not matching. Thus, we just + * assert for consistency here rather than handle a mid == len case. + */ + tor_assert(mid < len); + /* Move lo to the element immediately after sl[mid] */ + lo = mid + 1; + } else { + /* This should always be true in this case */ + tor_assert(cmp < 0); + + /* + * key < sl[mid] and an index i such that sl[i] == key must + * have i < mid if it exists. + */ + + if (mid > 0) { + /* Normal case, move hi to the element immediately before sl[mid] */ + hi = mid - 1; + } else { + /* These should always be true in this case */ + tor_assert(mid == lo); + tor_assert(mid == 0); + /* + * We were at the beginning of the list and concluded that every + * element e compares e > key. + */ + *found_out = 0; + return 0; + } + } + } + + /* + * lo > hi; we have no element matching key but we have elements falling + * on both sides of it. The lo index points to the first element > key. + */ + tor_assert(lo == hi + 1); /* All other cases should have been handled */ + tor_assert(lo >= 0); + tor_assert(lo <= len); + tor_assert(hi >= 0); + tor_assert(hi <= len); + + if (lo < len) { + cmp = compare(key, (const void **) &(sl->list[lo])); + tor_assert(cmp < 0); + } else { + cmp = compare(key, (const void **) &(sl->list[len-1])); + tor_assert(cmp > 0); + } + + *found_out = 0; + return lo; +} + +/** Helper: compare two const char **s. */ +static int +compare_string_ptrs_(const void **_a, const void **_b) +{ + return strcmp((const char*)*_a, (const char*)*_b); +} + +/** Sort a smartlist <b>sl</b> containing strings into lexically ascending + * order. */ +void +smartlist_sort_strings(smartlist_t *sl) +{ + smartlist_sort(sl, compare_string_ptrs_); +} + +/** Return the most frequent string in the sorted list <b>sl</b> */ +const char * +smartlist_get_most_frequent_string(smartlist_t *sl) +{ + return smartlist_get_most_frequent(sl, compare_string_ptrs_); +} + +/** Return the most frequent string in the sorted list <b>sl</b>. + * If <b>count_out</b> is provided, set <b>count_out</b> to the + * number of times that string appears. + */ +const char * +smartlist_get_most_frequent_string_(smartlist_t *sl, int *count_out) +{ + return smartlist_get_most_frequent_(sl, compare_string_ptrs_, count_out); +} + +/** Remove duplicate strings from a sorted list, and free them with tor_free(). + */ +void +smartlist_uniq_strings(smartlist_t *sl) +{ + smartlist_uniq(sl, compare_string_ptrs_, tor_free_); +} + +/** Helper: compare two pointers. */ +static int +compare_ptrs_(const void **_a, const void **_b) +{ + const void *a = *_a, *b = *_b; + if (a<b) + return -1; + else if (a==b) + return 0; + else + return 1; +} + +/** Sort <b>sl</b> in ascending order of the pointers it contains. */ +void +smartlist_sort_pointers(smartlist_t *sl) +{ + smartlist_sort(sl, compare_ptrs_); +} + +/* Heap-based priority queue implementation for O(lg N) insert and remove. + * Recall that the heap property is that, for every index I, h[I] < + * H[LEFT_CHILD[I]] and h[I] < H[RIGHT_CHILD[I]]. + * + * For us to remove items other than the topmost item, each item must store + * its own index within the heap. When calling the pqueue functions, tell + * them about the offset of the field that stores the index within the item. + * + * Example: + * + * typedef struct timer_t { + * struct timeval tv; + * int heap_index; + * } timer_t; + * + * static int compare(const void *p1, const void *p2) { + * const timer_t *t1 = p1, *t2 = p2; + * if (t1->tv.tv_sec < t2->tv.tv_sec) { + * return -1; + * } else if (t1->tv.tv_sec > t2->tv.tv_sec) { + * return 1; + * } else { + * return t1->tv.tv_usec - t2->tv_usec; + * } + * } + * + * void timer_heap_insert(smartlist_t *heap, timer_t *timer) { + * smartlist_pqueue_add(heap, compare, offsetof(timer_t, heap_index), + * timer); + * } + * + * void timer_heap_pop(smartlist_t *heap) { + * return smartlist_pqueue_pop(heap, compare, + * offsetof(timer_t, heap_index)); + * } + */ + +/** @{ */ +/** Functions to manipulate heap indices to find a node's parent and children. + * + * For a 1-indexed array, we would use LEFT_CHILD[x] = 2*x and RIGHT_CHILD[x] + * = 2*x + 1. But this is C, so we have to adjust a little. */ + +/* MAX_PARENT_IDX is the largest IDX in the smartlist which might have + * children whose indices fit inside an int. + * LEFT_CHILD(MAX_PARENT_IDX) == INT_MAX-2; + * RIGHT_CHILD(MAX_PARENT_IDX) == INT_MAX-1; + * LEFT_CHILD(MAX_PARENT_IDX + 1) == INT_MAX // impossible, see max list size. + */ +#define MAX_PARENT_IDX ((INT_MAX - 2) / 2) +/* If this is true, then i is small enough to potentially have children + * in the smartlist, and it is save to use LEFT_CHILD/RIGHT_CHILD on it. */ +#define IDX_MAY_HAVE_CHILDREN(i) ((i) <= MAX_PARENT_IDX) +#define LEFT_CHILD(i) ( 2*(i) + 1 ) +#define RIGHT_CHILD(i) ( 2*(i) + 2 ) +#define PARENT(i) ( ((i)-1) / 2 ) +/** }@ */ + +/** @{ */ +/** Helper macros for heaps: Given a local variable <b>idx_field_offset</b> + * set to the offset of an integer index within the heap element structure, + * IDX_OF_ITEM(p) gives you the index of p, and IDXP(p) gives you a pointer to + * where p's index is stored. Given additionally a local smartlist <b>sl</b>, + * UPDATE_IDX(i) sets the index of the element at <b>i</b> to the correct + * value (that is, to <b>i</b>). + */ +#define IDXP(p) ((int*)STRUCT_VAR_P(p, idx_field_offset)) + +#define UPDATE_IDX(i) do { \ + void *updated = sl->list[i]; \ + *IDXP(updated) = i; \ + } while (0) + +#define IDX_OF_ITEM(p) (*IDXP(p)) +/** @} */ + +/** Helper. <b>sl</b> may have at most one violation of the heap property: + * the item at <b>idx</b> may be greater than one or both of its children. + * Restore the heap property. */ +static inline void +smartlist_heapify(smartlist_t *sl, + int (*compare)(const void *a, const void *b), + int idx_field_offset, + int idx) +{ + while (1) { + if (! IDX_MAY_HAVE_CHILDREN(idx)) { + /* idx is so large that it cannot have any children, since doing so + * would mean the smartlist was over-capacity. Therefore it cannot + * violate the heap property by being greater than a child (since it + * doesn't have any). */ + return; + } + + int left_idx = LEFT_CHILD(idx); + int best_idx; + + if (left_idx >= sl->num_used) + return; + if (compare(sl->list[idx],sl->list[left_idx]) < 0) + best_idx = idx; + else + best_idx = left_idx; + if (left_idx+1 < sl->num_used && + compare(sl->list[left_idx+1],sl->list[best_idx]) < 0) + best_idx = left_idx + 1; + + if (best_idx == idx) { + return; + } else { + void *tmp = sl->list[idx]; + sl->list[idx] = sl->list[best_idx]; + sl->list[best_idx] = tmp; + UPDATE_IDX(idx); + UPDATE_IDX(best_idx); + + idx = best_idx; + } + } +} + +/** Insert <b>item</b> into the heap stored in <b>sl</b>, where order is + * determined by <b>compare</b> and the offset of the item in the heap is + * stored in an int-typed field at position <b>idx_field_offset</b> within + * item. + */ +void +smartlist_pqueue_add(smartlist_t *sl, + int (*compare)(const void *a, const void *b), + int idx_field_offset, + void *item) +{ + int idx; + smartlist_add(sl,item); + UPDATE_IDX(sl->num_used-1); + + for (idx = sl->num_used - 1; idx; ) { + int parent = PARENT(idx); + if (compare(sl->list[idx], sl->list[parent]) < 0) { + void *tmp = sl->list[parent]; + sl->list[parent] = sl->list[idx]; + sl->list[idx] = tmp; + UPDATE_IDX(parent); + UPDATE_IDX(idx); + idx = parent; + } else { + return; + } + } +} + +/** Remove and return the top-priority item from the heap stored in <b>sl</b>, + * where order is determined by <b>compare</b> and the item's position is + * stored at position <b>idx_field_offset</b> within the item. <b>sl</b> must + * not be empty. */ +void * +smartlist_pqueue_pop(smartlist_t *sl, + int (*compare)(const void *a, const void *b), + int idx_field_offset) +{ + void *top; + tor_assert(sl->num_used); + + top = sl->list[0]; + *IDXP(top)=-1; + if (--sl->num_used) { + sl->list[0] = sl->list[sl->num_used]; + sl->list[sl->num_used] = NULL; + UPDATE_IDX(0); + smartlist_heapify(sl, compare, idx_field_offset, 0); + } + sl->list[sl->num_used] = NULL; + return top; +} + +/** Remove the item <b>item</b> from the heap stored in <b>sl</b>, + * where order is determined by <b>compare</b> and the item's position is + * stored at position <b>idx_field_offset</b> within the item. <b>sl</b> must + * not be empty. */ +void +smartlist_pqueue_remove(smartlist_t *sl, + int (*compare)(const void *a, const void *b), + int idx_field_offset, + void *item) +{ + int idx = IDX_OF_ITEM(item); + tor_assert(idx >= 0); + tor_assert(sl->list[idx] == item); + --sl->num_used; + *IDXP(item) = -1; + if (idx == sl->num_used) { + sl->list[sl->num_used] = NULL; + return; + } else { + sl->list[idx] = sl->list[sl->num_used]; + sl->list[sl->num_used] = NULL; + UPDATE_IDX(idx); + smartlist_heapify(sl, compare, idx_field_offset, idx); + } +} + +/** Assert that the heap property is correctly maintained by the heap stored + * in <b>sl</b>, where order is determined by <b>compare</b>. */ +void +smartlist_pqueue_assert_ok(smartlist_t *sl, + int (*compare)(const void *a, const void *b), + int idx_field_offset) +{ + int i; + for (i = sl->num_used - 1; i >= 0; --i) { + if (i>0) + tor_assert(compare(sl->list[PARENT(i)], sl->list[i]) <= 0); + tor_assert(IDX_OF_ITEM(sl->list[i]) == i); + } +} + +/** Helper: compare two DIGEST_LEN digests. */ +static int +compare_digests_(const void **_a, const void **_b) +{ + return tor_memcmp((const char*)*_a, (const char*)*_b, DIGEST_LEN); +} + +/** Sort the list of DIGEST_LEN-byte digests into ascending order. */ +void +smartlist_sort_digests(smartlist_t *sl) +{ + smartlist_sort(sl, compare_digests_); +} + +/** Remove duplicate digests from a sorted list, and free them with tor_free(). + */ +void +smartlist_uniq_digests(smartlist_t *sl) +{ + smartlist_uniq(sl, compare_digests_, tor_free_); +} + +/** Helper: compare two DIGEST256_LEN digests. */ +static int +compare_digests256_(const void **_a, const void **_b) +{ + return tor_memcmp((const char*)*_a, (const char*)*_b, DIGEST256_LEN); +} + +/** Sort the list of DIGEST256_LEN-byte digests into ascending order. */ +void +smartlist_sort_digests256(smartlist_t *sl) +{ + smartlist_sort(sl, compare_digests256_); +} + +/** Return the most frequent member of the sorted list of DIGEST256_LEN + * digests in <b>sl</b> */ +const uint8_t * +smartlist_get_most_frequent_digest256(smartlist_t *sl) +{ + return smartlist_get_most_frequent(sl, compare_digests256_); +} + +/** Remove duplicate 256-bit digests from a sorted list, and free them with + * tor_free(). + */ +void +smartlist_uniq_digests256(smartlist_t *sl) +{ + smartlist_uniq(sl, compare_digests256_, tor_free_); +} diff --git a/src/lib/container/smartlist.h b/src/lib/container/smartlist.h new file mode 100644 index 0000000000..77682db03e --- /dev/null +++ b/src/lib/container/smartlist.h @@ -0,0 +1,168 @@ +/* Copyright (c) 2003-2004, Roger Dingledine + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2019, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#ifndef TOR_SMARTLIST_H +#define TOR_SMARTLIST_H + +/** + * \file smartlist.h + * + * \brief Header for smartlist.c + **/ + +#include <stdarg.h> + +#include "lib/smartlist_core/smartlist_core.h" +#include "lib/smartlist_core/smartlist_foreach.h" +#include "lib/smartlist_core/smartlist_split.h" + +void smartlist_add_asprintf(struct smartlist_t *sl, const char *pattern, ...) + CHECK_PRINTF(2, 3); +void smartlist_add_vasprintf(struct smartlist_t *sl, const char *pattern, + va_list args) + CHECK_PRINTF(2, 0); +void smartlist_reverse(smartlist_t *sl); +void smartlist_string_remove(smartlist_t *sl, const char *element); +int smartlist_contains_string(const smartlist_t *sl, const char *element); +int smartlist_pos(const smartlist_t *sl, const void *element); +int smartlist_string_pos(const smartlist_t *, const char *elt); +int smartlist_contains_string_case(const smartlist_t *sl, const char *element); +int smartlist_contains_int_as_string(const smartlist_t *sl, int num); +int smartlist_strings_eq(const smartlist_t *sl1, const smartlist_t *sl2); +int smartlist_contains_digest(const smartlist_t *sl, const char *element); +int smartlist_ints_eq(const smartlist_t *sl1, const smartlist_t *sl2); +int smartlist_overlap(const smartlist_t *sl1, const smartlist_t *sl2); +void smartlist_intersect(smartlist_t *sl1, const smartlist_t *sl2); +void smartlist_subtract(smartlist_t *sl1, const smartlist_t *sl2); + +int smartlist_ptrs_eq(const smartlist_t *s1, + const smartlist_t *s2); + +void smartlist_sort(smartlist_t *sl, + int (*compare)(const void **a, const void **b)); +void *smartlist_get_most_frequent_(const smartlist_t *sl, + int (*compare)(const void **a, const void **b), + int *count_out); +#define smartlist_get_most_frequent(sl, compare) \ + smartlist_get_most_frequent_((sl), (compare), NULL) +void smartlist_uniq(smartlist_t *sl, + int (*compare)(const void **a, const void **b), + void (*free_fn)(void *elt)); + +void smartlist_sort_strings(smartlist_t *sl); +void smartlist_sort_digests(smartlist_t *sl); +void smartlist_sort_digests256(smartlist_t *sl); +void smartlist_sort_pointers(smartlist_t *sl); + +const char *smartlist_get_most_frequent_string(smartlist_t *sl); +const char *smartlist_get_most_frequent_string_(smartlist_t *sl, + int *count_out); +const uint8_t *smartlist_get_most_frequent_digest256(smartlist_t *sl); + +void smartlist_uniq_strings(smartlist_t *sl); +void smartlist_uniq_digests(smartlist_t *sl); +void smartlist_uniq_digests256(smartlist_t *sl); +void *smartlist_bsearch(const smartlist_t *sl, const void *key, + int (*compare)(const void *key, const void **member)); +int smartlist_bsearch_idx(const smartlist_t *sl, const void *key, + int (*compare)(const void *key, const void **member), + int *found_out); + +void smartlist_pqueue_add(smartlist_t *sl, + int (*compare)(const void *a, const void *b), + int idx_field_offset, + void *item); +void *smartlist_pqueue_pop(smartlist_t *sl, + int (*compare)(const void *a, const void *b), + int idx_field_offset); +void smartlist_pqueue_remove(smartlist_t *sl, + int (*compare)(const void *a, const void *b), + int idx_field_offset, + void *item); +void smartlist_pqueue_assert_ok(smartlist_t *sl, + int (*compare)(const void *a, const void *b), + int idx_field_offset); + +char *smartlist_join_strings(smartlist_t *sl, const char *join, int terminate, + size_t *len_out) ATTR_MALLOC; +char *smartlist_join_strings2(smartlist_t *sl, const char *join, + size_t join_len, int terminate, size_t *len_out) + ATTR_MALLOC; + +/* Helper: Given two lists of items, possibly of different types, such that + * both lists are sorted on some common field (as determined by a comparison + * expression <b>cmpexpr</b>), and such that one list (<b>sl1</b>) has no + * duplicates on the common field, loop through the lists in lockstep, and + * execute <b>unmatched_var2</b> on items in var2 that do not appear in + * var1. + * + * WARNING: It isn't safe to add remove elements from either list while the + * loop is in progress. + * + * Example use: + * SMARTLIST_FOREACH_JOIN(routerstatus_list, routerstatus_t *, rs, + * routerinfo_list, routerinfo_t *, ri, + * tor_memcmp(rs->identity_digest, ri->identity_digest, 20), + * log_info(LD_GENERAL,"No match for %s", ri->nickname)) { + * log_info(LD_GENERAL, "%s matches routerstatus %p", ri->nickname, rs); + * } SMARTLIST_FOREACH_JOIN_END(rs, ri); + **/ +/* The example above unpacks (approximately) to: + * int rs_sl_idx = 0, rs_sl_len = smartlist_len(routerstatus_list); + * int ri_sl_idx, ri_sl_len = smartlist_len(routerinfo_list); + * int rs_ri_cmp; + * routerstatus_t *rs; + * routerinfo_t *ri; + * for (; ri_sl_idx < ri_sl_len; ++ri_sl_idx) { + * ri = smartlist_get(routerinfo_list, ri_sl_idx); + * while (rs_sl_idx < rs_sl_len) { + * rs = smartlist_get(routerstatus_list, rs_sl_idx); + * rs_ri_cmp = tor_memcmp(rs->identity_digest, ri->identity_digest, 20); + * if (rs_ri_cmp > 0) { + * break; + * } else if (rs_ri_cmp == 0) { + * goto matched_ri; + * } else { + * ++rs_sl_idx; + * } + * } + * log_info(LD_GENERAL,"No match for %s", ri->nickname); + * continue; + * matched_ri: { + * log_info(LD_GENERAL,"%s matches with routerstatus %p",ri->nickname,rs); + * } + * } + */ +#define SMARTLIST_FOREACH_JOIN(sl1, type1, var1, sl2, type2, var2, \ + cmpexpr, unmatched_var2) \ + STMT_BEGIN \ + int var1 ## _sl_idx = 0, var1 ## _sl_len=(sl1)->num_used; \ + int var2 ## _sl_idx = 0, var2 ## _sl_len=(sl2)->num_used; \ + int var1 ## _ ## var2 ## _cmp; \ + type1 var1; \ + type2 var2; \ + for (; var2##_sl_idx < var2##_sl_len; ++var2##_sl_idx) { \ + var2 = (sl2)->list[var2##_sl_idx]; \ + while (var1##_sl_idx < var1##_sl_len) { \ + var1 = (sl1)->list[var1##_sl_idx]; \ + var1##_##var2##_cmp = (cmpexpr); \ + if (var1##_##var2##_cmp > 0) { \ + break; \ + } else if (var1##_##var2##_cmp == 0) { \ + goto matched_##var2; \ + } else { \ + ++var1##_sl_idx; \ + } \ + } \ + /* Ran out of v1, or no match for var2. */ \ + unmatched_var2; \ + continue; \ + matched_##var2: ; \ + +#define SMARTLIST_FOREACH_JOIN_END(var1, var2) \ + } \ + STMT_END + +#endif /* !defined(TOR_CONTAINER_H) */ |