aboutsummaryrefslogtreecommitdiff
path: root/src/feature
diff options
context:
space:
mode:
authorNick Mathewson <nickm@torproject.org>2018-08-28 11:34:31 -0400
committerNick Mathewson <nickm@torproject.org>2018-11-19 08:26:10 -0500
commit83be4d2bbd7a4ed584f42d043558a4026c4a449d (patch)
treeb6e2f286a66ae95a7a1a2d89729599947bf3c879 /src/feature
parenta182301152afe9cd066516ae02f588840b2efc43 (diff)
downloadtor-83be4d2bbd7a4ed584f42d043558a4026c4a449d.tar.gz
tor-83be4d2bbd7a4ed584f42d043558a4026c4a449d.zip
Backend for compact node-family representation.
This representation is meant to save memory in microdescriptors -- we can't use it in routerinfo_t yet, since those families need to be encoded losslessly for directory voting to work. This representation saves memory in three ways: 1. It uses only one allocation per family. (The old way used a smartlist (2 allocs) plus one strdup per entry.) 2. It stores identity digests in binary, not hex. 3. It keeps families in a canonical format, memoizes, and reference-counts them. Part of #27359.
Diffstat (limited to 'src/feature')
-rw-r--r--src/feature/nodelist/nodefamily.c373
-rw-r--r--src/feature/nodelist/nodefamily.h47
-rw-r--r--src/feature/nodelist/nodefamily_st.h48
3 files changed, 468 insertions, 0 deletions
diff --git a/src/feature/nodelist/nodefamily.c b/src/feature/nodelist/nodefamily.c
new file mode 100644
index 0000000000..6b504c0ac4
--- /dev/null
+++ b/src/feature/nodelist/nodefamily.c
@@ -0,0 +1,373 @@
+/* Copyright (c) 2001 Matej Pfajfar.
+ * Copyright (c) 2001-2004, Roger Dingledine.
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file nodefamily.c
+ * \brief Code to manipulate encoded, reference-counted node families. We
+ * use these tricks to save space, since these families would otherwise
+ * require a large number of tiny allocations.
+ **/
+
+#include "core/or/or.h"
+#include "feature/nodelist/nickname.h"
+#include "feature/nodelist/nodefamily.h"
+#include "feature/nodelist/nodefamily_st.h"
+#include "feature/nodelist/nodelist.h"
+#include "feature/relay/router.h"
+#include "feature/nodelist/routerlist.h"
+
+#include "ht.h"
+#include "siphash.h"
+
+#include "lib/container/smartlist.h"
+#include "lib/ctime/di_ops.h"
+#include "lib/defs/digest_sizes.h"
+#include "lib/log/util_bug.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+/**
+ * Allocate and return a blank node family with space to hold <b>n_members</b>
+ * members.
+ */
+static nodefamily_t *
+nodefamily_alloc(int n_members)
+{
+ size_t alloc_len = offsetof(nodefamily_t, family_members) +
+ NODEFAMILY_ARRAY_SIZE(n_members);
+ nodefamily_t *nf = tor_malloc_zero(alloc_len);
+ nf->n_members = n_members;
+ return nf;
+}
+
+/**
+ * Hashtable hash implementation.
+ */
+static inline unsigned int
+nodefamily_hash(const nodefamily_t *nf)
+{
+ return (unsigned) siphash24g(nf->family_members,
+ NODEFAMILY_ARRAY_SIZE(nf->n_members));
+}
+
+/**
+ * Hashtable equality implementation.
+ */
+static inline unsigned int
+nodefamily_eq(const nodefamily_t *a, const nodefamily_t *b)
+{
+ return (a->n_members == b->n_members) &&
+ fast_memeq(a->family_members, b->family_members,
+ NODEFAMILY_ARRAY_SIZE(a->n_members));
+}
+
+static HT_HEAD(nodefamily_map, nodefamily_t) the_node_families
+ = HT_INITIALIZER();
+
+HT_PROTOTYPE(nodefamily_map, nodefamily_t, ht_ent, nodefamily_hash,
+ nodefamily_eq)
+HT_GENERATE2(nodefamily_map, nodefamily_t, ht_ent, nodefamily_hash,
+ node_family_eq, 0.6, tor_reallocarray_, tor_free_)
+
+/**
+ * Parse the family declaration in <b>s</b>, returning the canonical
+ * <b>nodefamily_t</b> for its members. Return NULL on error.
+ *
+ * If <b>rsa_id_self</b> is provided, it is a DIGEST_LEN-byte digest
+ * for the router that declared this family: insert it into the
+ * family declaration if it is not there already.
+ *
+ * If NF_WARN_MALFORMED is set in <b>flags</b>, warn about any
+ * elements that we can't parse. (By default, we log at info.)
+ *
+ * If NF_REJECT_MALFORMED is set in <b>flags</b>, treat any unparseable
+ * elements as an error. (By default, we simply omit them.)
+ **/
+nodefamily_t *
+nodefamily_parse(const char *s, const uint8_t *rsa_id_self,
+ unsigned flags)
+{
+ smartlist_t *sl = smartlist_new();
+ smartlist_split_string(sl, s, NULL, SPLIT_SKIP_SPACE|SPLIT_IGNORE_BLANK, 0);
+ nodefamily_t *result = nodefamily_from_members(sl, rsa_id_self, flags);
+ SMARTLIST_FOREACH(sl, char *, cp, tor_free(cp));
+ smartlist_free(sl);
+ return result;
+}
+
+/**
+ * qsort helper for encoded nodefamily elements.
+ **/
+static int
+compare_members(const void *a, const void *b)
+{
+ return fast_memcmp(a, b, NODEFAMILY_MEMBER_LEN);
+}
+
+/**
+ * Parse the member strings in <b>members</b>, returning their canonical
+ * <b>nodefamily_t</b>. Return NULL on error.
+ *
+ * If <b>rsa_id_self</b> is provided, it is a DIGEST_LEN-byte digest
+ * for the router that declared this family: insert it into the
+ * family declaration if it is not there already.
+ *
+ * The <b>flags</b> element is interpreted as in nodefamily_parse().
+ **/
+nodefamily_t *
+nodefamily_from_members(const smartlist_t *members,
+ const uint8_t *rsa_id_self,
+ unsigned flags)
+{
+ const int n_self = rsa_id_self ? 1 : 0;
+ int n_bad_elements = 0;
+ int n_members = smartlist_len(members) + n_self;
+ nodefamily_t *tmp = nodefamily_alloc(n_members);
+ uint8_t *ptr = NODEFAMILY_MEMBER_PTR(tmp, 0);
+
+ SMARTLIST_FOREACH_BEGIN(members, const char *, cp) {
+ bool bad_element = true;
+ if (is_legal_nickname(cp)) {
+ ptr[0] = NODEFAMILY_BY_NICKNAME;
+ tor_assert(strlen(cp) < DIGEST_LEN); // guaranteed by is_legal_nickname
+ memcpy(ptr+1, cp, strlen(cp));
+ bad_element = false;
+ } else if (is_legal_hexdigest(cp)) {
+ char digest_buf[DIGEST_LEN];
+ char nn_buf[MAX_NICKNAME_LEN+1];
+ char nn_char=0;
+ if (hex_digest_nickname_decode(cp, digest_buf, &nn_char, nn_buf)==0) {
+ bad_element = false;
+ ptr[0] = NODEFAMILY_BY_RSA_ID;
+ memcpy(ptr+1, digest_buf, DIGEST_LEN);
+ }
+ }
+
+ if (bad_element) {
+ const int severity = (flags & NF_WARN_MALFORMED) ? LOG_WARN : LOG_INFO;
+ log_fn(severity, LD_GENERAL,
+ "Bad element %s while parsing a node family.",
+ escaped(cp));
+ ++n_bad_elements;
+ } else {
+ ptr += NODEFAMILY_MEMBER_LEN;
+ }
+ } SMARTLIST_FOREACH_END(cp);
+
+ if (n_bad_elements && (flags & NF_REJECT_MALFORMED))
+ goto err;
+
+ if (rsa_id_self) {
+ /* Add self. */
+ ptr[0] = NODEFAMILY_BY_RSA_ID;
+ memcpy(ptr+1, rsa_id_self, DIGEST_LEN);
+ }
+
+ n_members -= n_bad_elements;
+
+ /* Sort tmp into canonical order. */
+ qsort(tmp->family_members, n_members, NODEFAMILY_MEMBER_LEN,
+ compare_members);
+
+ /* Remove duplicates. */
+ int i;
+ for (i = 0; i < n_members-1; ++i) {
+ uint8_t *thisptr = NODEFAMILY_MEMBER_PTR(tmp, i);
+ uint8_t *nextptr = NODEFAMILY_MEMBER_PTR(tmp, i+1);
+ if (fast_memeq(thisptr, nextptr, NODEFAMILY_MEMBER_LEN)) {
+ memmove(thisptr, nextptr, (n_members-i-1)*NODEFAMILY_MEMBER_LEN);
+ --n_members;
+ --i;
+ }
+ }
+ int n_members_alloc = tmp->n_members;
+ tmp->n_members = n_members;
+
+ /* See if we already allocated this family. */
+ nodefamily_t *found = HT_FIND(nodefamily_map, &the_node_families, tmp);
+ if (found) {
+ /* If we did, great: incref it and return it. */
+ ++found->refcnt;
+ tor_free(tmp);
+ return found;
+ } else {
+ /* If not, insert it into the hashtable. */
+ if (n_members_alloc != n_members) {
+ /* Compact the family if needed */
+ nodefamily_t *tmp2 = nodefamily_alloc(n_members);
+ memcpy(tmp2->family_members, tmp->family_members,
+ n_members * NODEFAMILY_MEMBER_LEN);
+ tor_free(tmp);
+ tmp = tmp2;
+ }
+
+ tmp->refcnt = 1;
+ HT_INSERT(nodefamily_map, &the_node_families, tmp);
+ return tmp;
+ }
+
+ err:
+ tor_free(tmp);
+ return NULL;
+}
+
+/**
+ * Drop our reference to <b>family</b>, freeing it if there are no more
+ * references.
+ */
+void
+nodefamily_free_(nodefamily_t *family)
+{
+ if (family == NULL)
+ return;
+
+ --family->refcnt;
+
+ if (family->refcnt == 0) {
+ HT_REMOVE(nodefamily_map, &the_node_families, family);
+ tor_free(family);
+ }
+}
+
+/**
+ * Return true iff <b>family</b> contains the SHA1 RSA1024 identity
+ * <b>rsa_id</b>.
+ */
+bool
+nodefamily_contains_rsa_id(const nodefamily_t *family,
+ const uint8_t *rsa_id)
+{
+ if (family == NULL)
+ return false;
+
+ unsigned i;
+ for (i = 0; i < family->n_members; ++i) {
+ const uint8_t *ptr = NODEFAMILY_MEMBER_PTR(family, i);
+ if (ptr[0] == NODEFAMILY_BY_RSA_ID &&
+ fast_memeq(ptr+1, rsa_id, DIGEST_LEN)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/**
+ * Return true iff <b>family</b> contains the nickname <b>name</b>.
+ */
+bool
+nodefamily_contains_nickname(const nodefamily_t *family,
+ const char *name)
+{
+ if (family == NULL)
+ return false;
+
+ unsigned i;
+ for (i = 0; i < family->n_members; ++i) {
+ const uint8_t *ptr = NODEFAMILY_MEMBER_PTR(family, i);
+ // note that the strcasecmp() is safe because there is always at least one
+ // NUL in the encoded nickname, because all legal nicknames are less than
+ // DIGEST_LEN bytes long.
+ if (ptr[0] == NODEFAMILY_BY_NICKNAME && !strcasecmp((char*)ptr+1, name)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/**
+ * Return true if <b>family</b> contains the nickname or the RSA ID for
+ * <b>node</b>
+ **/
+bool
+nodefamily_contains_node(const nodefamily_t *family,
+ const node_t *node)
+{
+ return
+ nodefamily_contains_nickname(family, node_get_nickname(node))
+ ||
+ nodefamily_contains_rsa_id(family, node_get_rsa_id_digest(node));
+}
+
+/**
+ * Look up every entry in <b>family</b>, and add add the corresponding
+ * node_t to <b>out</b>.
+ **/
+void
+nodefamily_add_nodes_to_smartlist(const nodefamily_t *family,
+ smartlist_t *out)
+{
+ if (!family)
+ return;
+
+ unsigned i;
+ for (i = 0; i < family->n_members; ++i) {
+ const uint8_t *ptr = NODEFAMILY_MEMBER_PTR(family, i);
+ const node_t *node = NULL;
+ switch (ptr[0]) {
+ case NODEFAMILY_BY_NICKNAME:
+ node = node_get_by_nickname((char*)ptr+1, NNF_NO_WARN_UNNAMED);
+ break;
+ case NODEFAMILY_BY_RSA_ID:
+ node = node_get_by_id((char*)ptr+1);
+ break;
+ default:
+ /* LCOV_EXCL_START */
+ tor_assert_nonfatal_unreached();
+ break;
+ /* LCOV_EXCL_STOP */
+ }
+ if (node)
+ smartlist_add(out, (void *)node);
+ }
+}
+
+/**
+ * Encode <b>family</b> as a space-separated string.
+ */
+char *
+nodefamily_format(const nodefamily_t *family)
+{
+ if (!family)
+ return tor_strdup("");
+
+ unsigned i;
+ smartlist_t *sl = smartlist_new();
+ for (i = 0; i < family->n_members; ++i) {
+ const uint8_t *ptr = NODEFAMILY_MEMBER_PTR(family, i);
+ switch (ptr[0]) {
+ case NODEFAMILY_BY_NICKNAME:
+ smartlist_add_strdup(sl, (char*)ptr+1);
+ break;
+ case NODEFAMILY_BY_RSA_ID: {
+ char buf[HEX_DIGEST_LEN+2];
+ buf[0]='$';
+ base16_encode(buf+1, sizeof(buf)-1, (char*)ptr+1, DIGEST_LEN);
+ smartlist_add_strdup(sl, buf);
+ break;
+ }
+ default:
+ /* LCOV_EXCL_START */
+ tor_assert_nonfatal_unreached();
+ break;
+ /* LCOV_EXCL_STOP */
+ }
+ }
+
+ char *result = smartlist_join_strings(sl, " ", 0, NULL);
+ SMARTLIST_FOREACH(sl, char *, cp, tor_free(cp));
+ smartlist_free(sl);
+ return result;
+}
+
+/**
+ * Free all storage held in the nodefamily map.
+ **/
+void
+nodefamily_free_all(void)
+{
+ HT_CLEAR(nodefamily_map, &the_node_families);
+}
diff --git a/src/feature/nodelist/nodefamily.h b/src/feature/nodelist/nodefamily.h
new file mode 100644
index 0000000000..342f161a07
--- /dev/null
+++ b/src/feature/nodelist/nodefamily.h
@@ -0,0 +1,47 @@
+/* Copyright (c) 2001 Matej Pfajfar.
+ * Copyright (c) 2001-2004, Roger Dingledine.
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file nodefamily.h
+ * \brief Header file for nodefamily.c.
+ **/
+
+#ifndef TOR_NODEFAMILY_H
+#define TOR_NODEFAMILY_H
+
+#include "lib/malloc/malloc.h"
+#include <stdbool.h>
+
+typedef struct nodefamily_t nodefamily_t;
+struct node_t;
+struct smartlist_t;
+
+#define NF_WARN_MALFORMED (1u<<0)
+#define NF_REJECT_MALFORMED (1u<<1)
+
+nodefamily_t *nodefamily_parse(const char *s,
+ const uint8_t *rsa_id_self,
+ unsigned flags);
+nodefamily_t *nodefamily_from_members(const struct smartlist_t *members,
+ const uint8_t *rsa_id_self,
+ unsigned flags);
+void nodefamily_free_(nodefamily_t *family);
+#define nodefamily_free(family) \
+ FREE_AND_NULL(nodefamily_t, nodefamily_free_, (family))
+
+bool nodefamily_contains_rsa_id(const nodefamily_t *family,
+ const uint8_t *rsa_id);
+bool nodefamily_contains_nickname(const nodefamily_t *family,
+ const char *name);
+bool nodefamily_contains_node(const nodefamily_t *family,
+ const struct node_t *node);
+void nodefamily_add_nodes_to_smartlist(const nodefamily_t *family,
+ struct smartlist_t *out);
+char *nodefamily_format(const nodefamily_t *family);
+
+void nodefamily_free_all(void);
+
+#endif
diff --git a/src/feature/nodelist/nodefamily_st.h b/src/feature/nodelist/nodefamily_st.h
new file mode 100644
index 0000000000..f88ada494a
--- /dev/null
+++ b/src/feature/nodelist/nodefamily_st.h
@@ -0,0 +1,48 @@
+/* Copyright (c) 2001 Matej Pfajfar.
+ * Copyright (c) 2001-2004, Roger Dingledine.
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+#ifndef TOR_NODEFAMILY_ST_H
+#define TOR_NODEFAMILY_ST_H
+
+#include "orconfig.h"
+#include "ht.h"
+
+struct nodefamily_t {
+ /** Entry for this nodefamily_t within the hashtable. */
+ HT_ENTRY(nodefamily_t) ht_ent;
+ /** Reference count. (The hashtable is not treated as a reference */
+ uint32_t refcnt;
+ /** Number of items encoded in <b>family_members</b>. */
+ uint32_t n_members;
+ /* A byte-array encoding the members of this family. We encode each member
+ * as one byte to indicate whether it's a nickname or a fingerprint, plus
+ * DIGEST_LEN bytes of data. The entries are lexically sorted.
+ */
+ uint8_t family_members[FLEXIBLE_ARRAY_MEMBER];
+};
+
+#define NODEFAMILY_MEMBER_LEN (1+DIGEST_LEN)
+
+/** Tag byte, indicates that the following bytes are a NUL-padded nickname.
+ */
+#define NODEFAMILY_BY_NICKNAME 0
+/** Tag byte, indicates that the following bytes are a RSA1024 SHA1 ID.
+ */
+#define NODEFAMILY_BY_RSA_ID 1
+
+/**
+ * Number of bytes to allocate in the array for a nodefamily_t with N members.
+ **/
+#define NODEFAMILY_ARRAY_SIZE(n) \
+ ((n) * NODEFAMILY_MEMBER_LEN)
+
+/**
+ * Pointer to the i'th member of <b>nf</b>, as encoded.
+ */
+#define NODEFAMILY_MEMBER_PTR(nf, i) \
+ (&((nf)->family_members[(i) * NODEFAMILY_MEMBER_LEN]))
+
+#endif