diff options
author | Nick Mathewson <nickm@torproject.org> | 2005-11-23 04:18:45 +0000 |
---|---|---|
committer | Nick Mathewson <nickm@torproject.org> | 2005-11-23 04:18:45 +0000 |
commit | a39269572fbca21eeba3ac98e2d4b74a094cd212 (patch) | |
tree | 81a5cf4433a2b5895f11b10d3f7dc0a85ba8464a /src/common/ht.h | |
parent | ae67b87f9ac2927e5e6009461e3095da7a01cce3 (diff) | |
download | tor-a39269572fbca21eeba3ac98e2d4b74a094cd212.tar.gz tor-a39269572fbca21eeba3ac98e2d4b74a094cd212.zip |
Replace balanced trees with hash tables: this should make stuff significantly faster.
svn:r5441
Diffstat (limited to 'src/common/ht.h')
-rw-r--r-- | src/common/ht.h | 495 |
1 files changed, 495 insertions, 0 deletions
diff --git a/src/common/ht.h b/src/common/ht.h new file mode 100644 index 0000000000..54b451613a --- /dev/null +++ b/src/common/ht.h @@ -0,0 +1,495 @@ +/* Copyright 2002 Christopher Clark */ +/* Copyright 2005 Nick Mathewson */ +/* See license at end. */ +/* $Id$ */ + +/* Based on ideas by Christopher Clark and interfaces from Niels Provos. */ + +#ifndef __HT_H +#define __HT_H +#define HT_H_ID "$Id$" + +#define HT_HEAD(name, type) \ + struct name { \ + /* How long is the hash table? */ \ + unsigned hth_table_length; \ + /* The hash table itself. */ \ + struct type **hth_table; \ + /* How many elements does the table contain? */ \ + unsigned hth_n_entries; \ + /* How many elements will we allow in the table before resizing it? */ \ + unsigned hth_load_limit; \ + /* Position of hth_table_length in the primes table. */ \ + int hth_prime_idx; \ + } + +#define HT_INITIALIZER() \ + { 0, NULL, 0, 0, -1 } + +#define HT_INIT(root) do { \ + (root)->hth_table_length = 0; \ + (root)->hth_table = NULL; \ + (root)->hth_n_entries = 0; \ + (root)->hth_load_limit = 0; \ + (root)->hth_prime_idx = -1; \ + } while (0) + +#define HT_ENTRY(type) \ + struct { \ + struct type *hte_next; \ + unsigned hte_hash; \ + } + +#define HT_EMPTY(head) \ + ((head)->hth_n_entries == 0) + +/* Helper: alias for the bucket containing 'elm'. */ +#define _HT_BUCKET(head, field, elm) \ + ((head)->hth_table[elm->field.hte_hash % head->hth_table_length]) + +/* How many elements in 'head'? */ +#define HT_SIZE(head) \ + ((head)->hth_n_entries) + +#define HT_FIND(name, head, elm) name##_HT_FIND((head), (elm)) +#define HT_INSERT(name, head, elm) name##_HT_INSERT((head), (elm)) +#define HT_REPLACE(name, head, elm) name##_HT_REPLACE((head), (elm)) +#define HT_REMOVE(name, head, elm) name##_HT_REMOVE((head), (elm)) +#define HT_START(name, head) name##_HT_START(head) +#define HT_NEXT(name, head, elm) name##_HT_NEXT((head), (elm)) +#define HT_NEXT_RMV(name, head, elm) name##_HT_NEXT_RMV((head), (elm)) +#define HT_CLEAR(name, head) name##_HT_CLEAR(head) + +/* Helper: */ +static __inline unsigned +ht_improve_hash(unsigned h) +{ + /* Aim to protect against poor hash functions by adding logic here + * - logic taken from java 1.4 hashtable source */ + h += ~(h << 9); + h ^= ((h >> 14) | (h << 18)); /* >>> */ + h += (h << 4); + h ^= ((h >> 10) | (h << 22)); /* >>> */ + return h; +} + +/** Basic string hash function, from Java standard String.hashCode(). */ +static __inline unsigned +ht_string_hash(const char *s) +{ + unsigned h = 0; + int m = 1; + while (*s) { + h += ((signed char)*s++)*m; + m = (m<<5)-1; /* m *= 31 */ + } + return h; +} + +#define _HT_SET_HASH(elm, field, hashfn) \ + do { \ + elm->field.hte_next = NULL; \ + elm->field.hte_hash = hashfn(elm); \ + } while (0) + +#define HT_FOREACH(x, name, head) \ + for ((x) = HT_START(name, head); \ + (x) != NULL; \ + (x) = HT_NEXT(name, head, x)) + +#define HT_PROTOTYPE(name, type, field, hashfn, eqfn) \ + int name##_HT_GROW(struct name *ht, unsigned min_capacity); \ + void name##_HT_CLEAR(struct name *ht); \ + int _##name##_HT_REP_OK(struct name *ht); \ + /* Helper: returns a pointer to the right location in the table \ + * 'head' to find or insert the element 'elm'. */ \ + static __inline struct type ** \ + _##name##_HT_FIND_P(struct name *head, struct type *elm) \ + { \ + struct type **p; \ + if (!head->hth_table) \ + return NULL; \ + p = &_HT_BUCKET(head, field, elm); \ + while (*p) { \ + if (eqfn(*p, elm)) \ + return p; \ + p = &(*p)->field.hte_next; \ + } \ + return p; \ + } \ + /* Return a pointer to the element in the table 'head' matching 'elm', \ + * or NULL if no such element exists */ \ + static __inline struct type * \ + name##_HT_FIND(struct name *head, struct type *elm) \ + { \ + struct type **p; \ + _HT_SET_HASH(elm, field, hashfn); \ + p = _##name##_HT_FIND_P(head, elm); \ + return p ? *p : NULL; \ + } \ + /* Insert the element 'elm' into the table 'head'. Do not call this \ + * function if the table might already contain a matching element. */ \ + static __inline void \ + name##_HT_INSERT(struct name *head, struct type *elm) \ + { \ + struct type **p; \ + if (!head->hth_table || head->hth_n_entries >= head->hth_load_limit) \ + name##_HT_GROW(head, head->hth_n_entries+1); \ + ++head->hth_n_entries; \ + _HT_SET_HASH(elm, field, hashfn); \ + p = &_HT_BUCKET(head, field, elm); \ + elm->field.hte_next = *p; \ + *p = elm; \ + } \ + /* Insert the element 'elm' into the table 'head'. If there already \ + * a matching element in the table, replace that element and return \ + * it. */ \ + static __inline struct type * \ + name##_HT_REPLACE(struct name *head, struct type *elm) \ + { \ + struct type **p, *r; \ + if (!head->hth_table || head->hth_n_entries >= head->hth_load_limit) \ + name##_HT_GROW(head, head->hth_n_entries+1); \ + _HT_SET_HASH(elm, field, hashfn); \ + p = _##name##_HT_FIND_P(head, elm); \ + r = *p; \ + *p = elm; \ + if (r && (r!=elm)) { \ + elm->field.hte_next = r->field.hte_next; \ + r->field.hte_next = NULL; \ + return r; \ + } else { \ + ++head->hth_n_entries; \ + return NULL; \ + } \ + } \ + /* Remove any element matching 'elm' from the table 'head'. If such \ + * an element is found, return it; otherwise return NULL. */ \ + static __inline struct type * \ + name##_HT_REMOVE(struct name *head, struct type *elm) \ + { \ + struct type **p, *r; \ + _HT_SET_HASH(elm, field, hashfn); \ + p = _##name##_HT_FIND_P(head,elm); \ + if (!p || !*p) \ + return NULL; \ + r = *p; \ + *p = r->field.hte_next; \ + r->field.hte_next = NULL; \ + --head->hth_n_entries; \ + return r; \ + } \ + /* Invoke the function 'fn' on every element of the table 'head', \ + * using 'data' as its second argument. If the function returns \ + * nonzero, remove the most recently examined element before invoking \ + * the function again. */ \ + static __inline void \ + name##_HT_FOREACH_FN(struct name *head, \ + int (*fn)(struct type *, void *), \ + void *data) \ + { \ + /* XXXX use tricks to prevent concurrent mod? */ \ + unsigned idx; \ + int remove; \ + struct type **p, **nextp, *next; \ + if (!head->hth_table) \ + return; \ + for (idx=0; idx < head->hth_table_length; ++idx) { \ + p = &head->hth_table[idx]; \ + while (*p) { \ + nextp = &(*p)->field.hte_next; \ + next = *nextp; \ + remove = fn(*p, data); \ + if (remove) { \ + --head->hth_n_entries; \ + *p = next; \ + } else { \ + p = nextp; \ + } \ + } \ + } \ + } \ + /* Return a pointer to the first element in the table 'head', under \ + * an arbitrary order. This order is stable under remove operations, \ + * but not under others. If the table is empty, return NULL. */ \ + static __inline struct type ** \ + name##_HT_START(struct name *head) \ + { \ + unsigned b = 0; \ + while (b < head->hth_table_length) { \ + if (head->hth_table[b]) \ + return &head->hth_table[b]; \ + ++b; \ + } \ + return NULL; \ + } \ + /* Return the next element in 'head' after 'elm', under the arbitrary \ + * order used by HT_START. If there are no more elements, return \ + * NULL. If 'elm' is to be removed from the table, you must call \ + * this function for the next value before you remove it. \ + */ \ + static __inline struct type ** \ + name##_HT_NEXT(struct name *head, struct type **elm) \ + { \ + if ((*elm)->field.hte_next) { \ + return &(*elm)->field.hte_next; \ + } else { \ + unsigned b = ((*elm)->field.hte_hash % head->hth_table_length)+1; \ + while (b < head->hth_table_length) { \ + if (head->hth_table[b]) \ + return &head->hth_table[b]; \ + ++b; \ + } \ + return NULL; \ + } \ + } \ + static __inline struct type ** \ + name##_HT_NEXT_RMV(struct name *head, struct type **elm) \ + { \ + unsigned h = (*elm)->field.hte_hash; \ + *elm = (*elm)->field.hte_next; \ + --head->hth_n_entries; \ + if (*elm) { \ + return elm; \ + } else { \ + unsigned b = (h % head->hth_table_length)+1; \ + while (b < head->hth_table_length) { \ + if (head->hth_table[b]) \ + return &head->hth_table[b]; \ + ++b; \ + } \ + return NULL; \ + } \ + } + + +#if 0 +/* Helpers for an iterator type that saves some mod operations at the expense + * of many branches. Not worth it, it seems. */ + +#define HT_ITER(type) \ + struct type##_ITER { \ + struct type **hti_nextp; \ + unsigned hti_bucket; \ + } + + static __inline void \ + name##_HT_ITER_START(struct name *head, struct type##_ITER *iter) \ + { \ + /* XXXX Magic to stop modifications? */ \ + iter->hti_bucket = 0; \ + while (iter->hti_bucket < head->hth_table_length) { \ + iter->hti_nextp = &head->hth_table[iter->hti_bucket]; \ + if (*iter->hti_nextp) \ + return; \ + ++iter->hti_bucket; \ + } \ + iter->hti_nextp = NULL; \ + } \ + static __inline int \ + name##_HT_ITER_DONE(struct name *head, struct type##_ITER *iter) \ + { \ + return iter->hti_nextp == NULL; \ + } \ + static __inline struct type * \ + name##_HT_ITER_GET(struct name *head, struct type##_ITER *iter) \ + { \ + return *iter->hti_nextp; \ + } \ + static __inline void \ + name##_HT_ITER_NEXT(struct name *head, struct type##_ITER *iter) \ + { \ + if (!iter->hti_nextp) \ + return; \ + if ((*iter->hti_nextp)->field.hte_next) { \ + iter->hti_nextp = &(*iter->hti_nextp)->field.hte_next; \ + return; \ + } \ + while (++iter->hti_bucket < head->hth_table_length) { \ + iter->hti_nextp = &head->hth_table[iter->hti_bucket]; \ + if (*iter->hti_nextp) \ + return; \ + ++iter->hti_bucket; \ + } \ + iter->hti_nextp = NULL; \ + } \ + static __inline void \ + name##_HT_ITER_NEXT_RMV(struct name *head, struct type##_ITER *iter) \ + { \ + if (!iter->hti_nextp) \ + return; \ + --head->hth_n_entries; \ + if ((*iter->hti_nextp)->field.hte_next) { \ + *iter->hti_nextp = (*iter->hti_nextp)->field.hte_next; \ + if (*iter->hti_nextp) \ + return; \ + } \ + while (++iter->hti_bucket < head->hth_table_length) { \ + iter->hti_nextp = &head->hth_table[iter->hti_bucket]; \ + if (*iter->hti_nextp) \ + return; \ + ++iter->hti_bucket; \ + } \ + iter->hti_nextp = NULL; \ + } +#endif + +#define HT_GENERATE(name, type, field, hashfn, eqfn, load, mallocfn, reallocfn, freefn) \ + static unsigned name##_PRIMES[] = { \ + 53, 97, 193, 389, \ + 769, 1543, 3079, 6151, \ + 12289, 24593, 49157, 98317, \ + 196613, 393241, 786433, 1572869, \ + 3145739, 6291469, 12582917, 25165843, \ + 50331653, 100663319, 201326611, 402653189, \ + 805306457, 1610612741 \ + }; \ + static unsigned name##_N_PRIMES = \ + sizeof(name##_PRIMES)/sizeof(name##_PRIMES[0]); \ + /* Expand the internal table of 'head' until it is large enough to \ + * hold 'size' elements. Return 0 on success, -1 on allocation \ + * failure. */ \ + int \ + name##_HT_GROW(struct name *head, unsigned size) \ + { \ + unsigned new_len, new_load_limit; \ + int prime_idx; \ + struct type **new_table; \ + if (head->hth_prime_idx == (int)name##_N_PRIMES - 1) \ + return 0; \ + if (head->hth_load_limit > size) \ + return 0; \ + prime_idx = head->hth_prime_idx; \ + do { \ + new_len = name##_PRIMES[++prime_idx]; \ + new_load_limit = (unsigned)(load*new_len); \ + } while (new_load_limit <= size && \ + prime_idx < (int)name##_N_PRIMES); \ + if ((new_table = mallocfn(new_len*sizeof(struct type*)))) { \ + unsigned b; \ + memset(new_table, 0, new_len*sizeof(struct type*)); \ + for (b = 0; b < head->hth_table_length; ++b) { \ + struct type *elm, *next; \ + unsigned b2; \ + elm = head->hth_table[b]; \ + while (elm) { \ + next = elm->field.hte_next; \ + b2 = elm->field.hte_hash % new_len; \ + elm->field.hte_next = new_table[b2]; \ + new_table[b2] = elm; \ + elm = next; \ + } \ + } \ + freefn(head->hth_table); \ + head->hth_table = new_table; \ + } else { \ + unsigned b, b2; \ + new_table = reallocfn(head->hth_table, new_len*sizeof(struct type*)); \ + if (!new_table) return -1; \ + memset(new_table + head->hth_table_length, 0, \ + (new_len - head->hth_table_length)*sizeof(struct type*)); \ + for (b=0; b < head->hth_table_length; ++b) { \ + struct type *e, **pE; \ + for (pE = &new_table[b], e = *pE; e != NULL; e = *pE) { \ + b2 = e->field.hte_hash % new_len; \ + if (b2 == b) { \ + pE = &e->field.hte_next; \ + } else { \ + *pE = e->field.hte_next; \ + e->field.hte_next = new_table[b2]; \ + new_table[b2] = e; \ + } \ + } \ + } \ + head->hth_table = new_table; \ + } \ + head->hth_table_length = new_len; \ + head->hth_prime_idx = prime_idx; \ + head->hth_load_limit = new_load_limit; \ + return 0; \ + } \ + /* Free all storage held by 'head'. Does not free 'head' itself, or \ + * individual elements. */ \ + void \ + name##_HT_CLEAR(struct name *head) \ + { \ + if (head->hth_table) \ + freefn(head->hth_table); \ + head->hth_table_length = 0; \ + HT_INIT(head); \ + } \ + /* Debugging helper: return true iff the representation of 'head' is \ + * internally consistent. */ \ + int \ + _##name##_HT_REP_OK(struct name *head) \ + { \ + unsigned n, i; \ + struct type *elm; \ + if (!head->hth_table_length) { \ + return !head->hth_table && !head->hth_n_entries && \ + !head->hth_load_limit && head->hth_prime_idx == -1; \ + } \ + if (!head->hth_table || head->hth_prime_idx < 0 || \ + !head->hth_load_limit) \ + return 0; \ + if (head->hth_n_entries > head->hth_load_limit) \ + return 0; \ + if (head->hth_table_length != name##_PRIMES[head->hth_prime_idx]) \ + return 0; \ + if (head->hth_load_limit != (unsigned)(load*head->hth_table_length)) \ + return 0; \ + for (n = i = 0; i < head->hth_table_length; ++i) { \ + for (elm = head->hth_table[i]; elm; elm = elm->field.hte_next) { \ + if (elm->field.hte_hash != hashfn(elm)) \ + return 0; \ + if ((elm->field.hte_hash % head->hth_table_length) != i) \ + return 0; \ + ++n; \ + } \ + } \ + if (n != head->hth_n_entries) \ + return 0; \ + return 1; \ + } + +/* + * Copyright 2005, Nick Mathewson. Implementation logic is adapted from code + * by Cristopher Clark, retrofit to allow drop-in memory management, and to + * use the same interface as Niels Provos's HT_H. I'm not sure whether this + * is a derived work any more, but whether it is or not, the license below + * applies. + * + * Copyright (c) 2002, Christopher Clark + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of the original author; nor the names of any contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#endif + |