summaryrefslogtreecommitdiff
path: root/src/or
diff options
context:
space:
mode:
Diffstat (limited to 'src/or')
-rw-r--r--src/or/consdiff.c337
-rw-r--r--src/or/consdiff.h30
2 files changed, 240 insertions, 127 deletions
diff --git a/src/or/consdiff.c b/src/or/consdiff.c
index dfb97d432a..848ea13ea7 100644
--- a/src/or/consdiff.c
+++ b/src/or/consdiff.c
@@ -28,12 +28,19 @@
* and will send small couples of slices to calc_changes, keeping the running
* time near-linear. This is explained in more detail in the gen_ed_diff
* comments.
+ *
+ * The allocation strategy tries to save time and memory by avoiding needless
+ * copies. Instead of actually splitting the inputs into separate strings, we
+ * allocate cdline_t objects, each of which represents a line in the original
+ * object or in the output. We use memarea_t allocators to manage the
+ * temporary memory we use when generating or applying diffs.
**/
#define CONSDIFF_PRIVATE
#include "or.h"
#include "consdiff.h"
+#include "memarea.h"
#include "routerparse.h"
static const char* ns_diff_version = "network-status-diff-version 1";
@@ -41,6 +48,36 @@ static const char* hash_token = "hash";
static char *consensus_join_lines(const smartlist_t *inp);
+/** Return true iff a and b have the same contents. */
+STATIC int
+lines_eq(const cdline_t *a, const cdline_t *b)
+{
+ return a->len == b->len && fast_memeq(a->s, b->s, a->len);
+}
+
+/** Return true iff a has the same contents as the nul-terminated string b. */
+STATIC int
+line_str_eq(const cdline_t *a, const char *b)
+{
+ const size_t len = strlen(b);
+ tor_assert(len <= UINT32_MAX);
+ cdline_t bline = { b, (uint32_t)len };
+ return lines_eq(a, &bline);
+}
+
+/** Add a cdline_t to <b>lst</b> holding as its contents the nul-terminated
+ * string s. Use the provided memory area for storage. */
+STATIC void
+smartlist_add_linecpy(smartlist_t *lst, memarea_t *area, const char *s)
+{
+ size_t len = strlen(s);
+ const char *ss = memarea_memdup(area, s, len);
+ cdline_t *line = memarea_alloc(area, sizeof(cdline_t));
+ line->s = ss;
+ line->len = (uint32_t)len;
+ smartlist_add(lst, line);
+}
+
/** Compute the digest of <b>cons</b>, and store the result in
* <b>digest_out</b>. Return 0 on success, -1 on failure. */
/* This is a separate, mockable function so that we can override it when
@@ -114,7 +151,7 @@ lcs_lengths(const smartlist_slice_t *slice1, const smartlist_slice_t *slice2,
for (int i = 0; i < slice1->len; ++i, si+=direction) {
- const char *line1 = smartlist_get(slice1->list, si);
+ const cdline_t *line1 = smartlist_get(slice1->list, si);
/* Store the last results. */
memcpy(prev, result, a_size);
@@ -125,8 +162,8 @@ lcs_lengths(const smartlist_slice_t *slice1, const smartlist_slice_t *slice2,
for (int j = 0; j < slice2->len; ++j, sj+=direction) {
- const char *line2 = smartlist_get(slice2->list, sj);
- if (!strcmp(line1, line2)) {
+ const cdline_t *line2 = smartlist_get(slice2->list, sj);
+ if (lines_eq(line1, line2)) {
/* If the lines are equal, the lcs is one line longer. */
result[j + 1] = prev[j] + 1;
} else {
@@ -146,9 +183,9 @@ STATIC void
trim_slices(smartlist_slice_t *slice1, smartlist_slice_t *slice2)
{
while (slice1->len>0 && slice2->len>0) {
- const char *line1 = smartlist_get(slice1->list, slice1->offset);
- const char *line2 = smartlist_get(slice2->list, slice2->offset);
- if (strcmp(line1, line2)) {
+ const cdline_t *line1 = smartlist_get(slice1->list, slice1->offset);
+ const cdline_t *line2 = smartlist_get(slice2->list, slice2->offset);
+ if (!lines_eq(line1, line2)) {
break;
}
slice1->offset++; slice1->len--;
@@ -159,9 +196,9 @@ trim_slices(smartlist_slice_t *slice1, smartlist_slice_t *slice2)
int i2 = (slice2->offset+slice2->len)-1;
while (slice1->len>0 && slice2->len>0) {
- const char *line1 = smartlist_get(slice1->list, i1);
- const char *line2 = smartlist_get(slice2->list, i2);
- if (strcmp(line1, line2)) {
+ const cdline_t *line1 = smartlist_get(slice1->list, i1);
+ const cdline_t *line2 = smartlist_get(slice2->list, i2);
+ if (!lines_eq(line1, line2)) {
break;
}
i1--;
@@ -171,15 +208,17 @@ trim_slices(smartlist_slice_t *slice1, smartlist_slice_t *slice2)
}
}
-/** Like smartlist_string_pos, but limited to the bounds of the slice.
+/** Like smartlist_string_pos, but uses a cdline_t, and is restricted to the
+ * bounds of the slice.
*/
STATIC int
-smartlist_slice_string_pos(const smartlist_slice_t *slice, const char *string)
+smartlist_slice_string_pos(const smartlist_slice_t *slice,
+ const cdline_t *string)
{
int end = slice->offset + slice->len;
for (int i = slice->offset; i < end; ++i) {
- const char *el = smartlist_get(slice->list, i);
- if (!strcmp(el, string)) {
+ const cdline_t *el = smartlist_get(slice->list, i);
+ if (lines_eq(el, string)) {
return i;
}
}
@@ -199,7 +238,7 @@ set_changed(bitarray_t *changed1, bitarray_t *changed2,
tor_assert(slice1->len == 0 || slice1->len == 1);
if (slice1->len == 1) {
- const char *line_common = smartlist_get(slice1->list, slice1->offset);
+ const cdline_t *line_common = smartlist_get(slice1->list, slice1->offset);
toskip = smartlist_slice_string_pos(slice2, line_common);
if (toskip == -1) {
bitarray_set(changed1, slice1->offset);
@@ -318,16 +357,20 @@ static const uint8_t base64_compare_table[256] = {
/** Helper: Get the identity hash from a router line, assuming that the line
* at least appears to be a router line and thus starts with "r ".
+ *
+ * If an identity hash is found, store it (without decoding it) in
+ * <b>hash_out</b>, and return 0. On failure, return -1.
*/
-STATIC const char *
-get_id_hash(const char *r_line)
+STATIC int
+get_id_hash(const cdline_t *line, cdline_t *hash_out)
{
- r_line += strlen("r ");
+ if (line->len < 2)
+ return -1;
/* Skip the router name. */
- const char *hash = strchr(r_line, ' ');
+ const char *hash = memchr(line->s + 2, ' ', line->len - 2);
if (!hash) {
- return NULL;
+ return -1;
}
hash++;
@@ -335,28 +378,34 @@ get_id_hash(const char *r_line)
/* Stop when the first non-base64 character is found. Use unsigned chars to
* avoid negative indexes causing crashes.
*/
- while (base64_compare_table[*((unsigned char*)hash_end)] != X) {
+ while (base64_compare_table[*((unsigned char*)hash_end)] != X &&
+ hash_end < line->s + line->len) {
hash_end++;
}
/* Empty hash. */
if (hash_end == hash) {
- return NULL;
+ return -1;
}
- return hash;
+ hash_out->s = hash;
+ /* Always true because lines are limited to this length */
+ tor_assert(hash_end - hash <= UINT32_MAX);
+ hash_out->len = (uint32_t)(hash_end - hash);
+
+ return 0;
}
/** Helper: Check that a line is a valid router entry. We must at least be
* able to fetch a proper identity hash from it for it to be valid.
*/
STATIC int
-is_valid_router_entry(const char *line)
+is_valid_router_entry(const cdline_t *line)
{
- if (strcmpstart(line, "r ") != 0) {
+ if (line->len < 2 || fast_memneq(line->s, "r ", 2))
return 0;
- }
- return (get_id_hash(line) != NULL);
+ cdline_t tmp;
+ return (get_id_hash(line, &tmp) == 0);
}
/** Helper: Find the next router line starting at the current position.
@@ -374,7 +423,7 @@ next_router(const smartlist_t *cons, int cur)
return len;
}
- const char *line = smartlist_get(cons, cur);
+ const cdline_t *line = smartlist_get(cons, cur);
while (!is_valid_router_entry(line)) {
if (++cur >= len) {
return len;
@@ -384,26 +433,28 @@ next_router(const smartlist_t *cons, int cur)
return cur;
}
-/** Helper: compare two base64-encoded identity hashes which may be of
+/** Helper: compare two base64-encoded identity hashes, which may be of
* different lengths. Comparison ends when the first non-base64 char is found.
*/
STATIC int
-base64cmp(const char *hash1, const char *hash2)
+base64cmp(const cdline_t *hash1, const cdline_t *hash2)
{
/* NULL is always lower, useful for last_hash which starts at NULL. */
- if (!hash1 && !hash2) {
+ if (!hash1->s && !hash2->s) {
return 0;
}
- if (!hash1) {
+ if (!hash1->s) {
return -1;
}
- if (!hash2) {
+ if (!hash2->s) {
return 1;
}
/* Don't index with a char; char may be signed. */
- const unsigned char *a = (unsigned char*)hash1;
- const unsigned char *b = (unsigned char*)hash2;
+ const unsigned char *a = (unsigned char*)hash1->s;
+ const unsigned char *b = (unsigned char*)hash2->s;
+ const unsigned char *a_end = a + hash1->len;
+ const unsigned char *b_end = b + hash2->len;
while (1) {
uint8_t av = base64_compare_table[*a];
uint8_t bv = base64_compare_table[*b];
@@ -427,6 +478,15 @@ base64cmp(const char *hash1, const char *hash2)
} else {
a++;
b++;
+ if (a == a_end) {
+ if (b == b_end) {
+ return 0;
+ } else {
+ return -1;
+ }
+ } else if (b == b_end) {
+ return 1;
+ }
}
}
}
@@ -436,6 +496,9 @@ base64cmp(const char *hash1, const char *hash2)
* happen if any lines the script had to add matched "." or if the routers
* were not properly ordered.
*
+ * All cdline_t objects in the resulting object are either references to lines
+ * in one of the inputs, or are newly allocated lines in the provided memarea.
+ *
* This implementation is consensus-specific. To generate an ed diff for any
* given input in quadratic time, you can replace all the code until the
* navigation in reverse order with the following:
@@ -449,7 +512,8 @@ base64cmp(const char *hash1, const char *hash2)
* calc_changes(cons1_sl, cons2_sl, changed1, changed2);
*/
STATIC smartlist_t *
-gen_ed_diff(const smartlist_t *cons1, const smartlist_t *cons2)
+gen_ed_diff(const smartlist_t *cons1, const smartlist_t *cons2,
+ memarea_t *area)
{
int len1 = smartlist_len(cons1);
int len2 = smartlist_len(cons2);
@@ -463,12 +527,9 @@ gen_ed_diff(const smartlist_t *cons1, const smartlist_t *cons2)
int i1=-1, i2=-1;
int start1=0, start2=0;
- const char *hash1 = NULL;
- const char *hash2 = NULL;
-
/* To check that hashes are ordered properly */
- const char *last_hash1 = NULL;
- const char *last_hash2 = NULL;
+ cdline_t hash1 = { NULL, 0 }, hash2 = { NULL, 0 };
+ cdline_t last_hash1 = { NULL, 0 }, last_hash2 = { NULL, 0 };
/* i1 and i2 are initialized at the first line of each consensus. They never
* reach past len1 and len2 respectively, since next_router doesn't let that
@@ -485,9 +546,9 @@ gen_ed_diff(const smartlist_t *cons1, const smartlist_t *cons2)
if (i1 < len1) {
i1 = next_router(cons1, i1);
if (i1 != len1) {
- last_hash1 = hash1;
- hash1 = get_id_hash(smartlist_get(cons1, i1));
- if (base64cmp(hash1, last_hash1) <= 0) {
+ memcpy(&last_hash1, &hash1, sizeof(cdline_t));
+ if (get_id_hash(smartlist_get(cons1, i1), &hash1) < 0 ||
+ base64cmp(&hash1, &last_hash1) <= 0) {
log_warn(LD_CONSDIFF, "Refusing to generate consensus diff because "
"the base consensus doesn't have its router entries "
"sorted properly.");
@@ -499,9 +560,9 @@ gen_ed_diff(const smartlist_t *cons1, const smartlist_t *cons2)
if (i2 < len2) {
i2 = next_router(cons2, i2);
if (i2 != len2) {
- last_hash2 = hash2;
- hash2 = get_id_hash(smartlist_get(cons2, i2));
- if (base64cmp(hash2, last_hash2) <= 0) {
+ memcpy(&last_hash2, &hash2, sizeof(cdline_t));
+ if (get_id_hash(smartlist_get(cons2, i2), &hash2) < 0 ||
+ base64cmp(&hash2, &last_hash2) <= 0) {
log_warn(LD_CONSDIFF, "Refusing to generate consensus diff because "
"the target consensus doesn't have its router entries "
"sorted properly.");
@@ -525,7 +586,7 @@ gen_ed_diff(const smartlist_t *cons1, const smartlist_t *cons2)
* consensus has already reached the end, both are extended to their
* respecting ends since we are done.
*/
- int cmp = base64cmp(hash1, hash2);
+ int cmp = base64cmp(&hash1, &hash2);
while (cmp != 0) {
if (i1 < len1 && cmp < 0) {
i1 = next_router(cons1, i1);
@@ -536,9 +597,9 @@ gen_ed_diff(const smartlist_t *cons1, const smartlist_t *cons2)
i2 = len2;
break;
}
- last_hash1 = hash1;
- hash1 = get_id_hash(smartlist_get(cons1, i1));
- if (base64cmp(hash1, last_hash1) <= 0) {
+ memcpy(&last_hash1, &hash1, sizeof(cdline_t));
+ if (get_id_hash(smartlist_get(cons1, i1), &hash1) < 0 ||
+ base64cmp(&hash1, &last_hash1) <= 0) {
log_warn(LD_CONSDIFF, "Refusing to generate consensus diff "
"because the base consensus doesn't have its router entries "
"sorted properly.");
@@ -553,9 +614,9 @@ gen_ed_diff(const smartlist_t *cons1, const smartlist_t *cons2)
i1 = len1;
break;
}
- last_hash2 = hash2;
- hash2 = get_id_hash(smartlist_get(cons2, i2));
- if (base64cmp(hash2, last_hash2) <= 0) {
+ memcpy(&last_hash2, &hash2, sizeof(cdline_t));
+ if (get_id_hash(smartlist_get(cons2, i2), &hash2) < 0 ||
+ base64cmp(&hash2, &last_hash2) <= 0) {
log_warn(LD_CONSDIFF, "Refusing to generate consensus diff "
"because the target consensus doesn't have its router entries "
"sorted properly.");
@@ -566,7 +627,7 @@ gen_ed_diff(const smartlist_t *cons1, const smartlist_t *cons2)
i2 = len2;
break;
}
- cmp = base64cmp(hash1, hash2);
+ cmp = base64cmp(&hash1, &hash2);
}
}
@@ -595,6 +656,7 @@ gen_ed_diff(const smartlist_t *cons1, const smartlist_t *cons2)
* each chunk of changes.
*/
i1=len1-1, i2=len2-1;
+ char buf[128];
while (i1 > 0 || i2 > 0) {
int start1x, start2x, end1, end2, added, deleted;
@@ -626,31 +688,36 @@ gen_ed_diff(const smartlist_t *cons1, const smartlist_t *cons2)
if (added == 0) {
if (deleted == 1) {
- smartlist_add_asprintf(result, "%id", start1x+1);
+ tor_snprintf(buf, sizeof(buf), "%id", start1x+1);
+ smartlist_add_linecpy(result, area, buf);
} else {
- smartlist_add_asprintf(result, "%i,%id", start1x+1, start1x+deleted);
+ tor_snprintf(buf, sizeof(buf), "%i,%id", start1x+1, start1x+deleted);
+ smartlist_add_linecpy(result, area, buf);
}
} else {
int i;
if (deleted == 0) {
- smartlist_add_asprintf(result, "%ia", start1x);
+ tor_snprintf(buf, sizeof(buf), "%ia", start1x);
+ smartlist_add_linecpy(result, area, buf);
} else if (deleted == 1) {
- smartlist_add_asprintf(result, "%ic", start1x+1);
+ tor_snprintf(buf, sizeof(buf), "%ic", start1x+1);
+ smartlist_add_linecpy(result, area, buf);
} else {
- smartlist_add_asprintf(result, "%i,%ic", start1x+1, start1x+deleted);
+ tor_snprintf(buf, sizeof(buf), "%i,%ic", start1x+1, start1x+deleted);
+ smartlist_add_linecpy(result, area, buf);
}
for (i = start2x; i <= end2; ++i) {
- const char *line = smartlist_get(cons2, i);
- if (!strcmp(line, ".")) {
+ cdline_t *line = smartlist_get(cons2, i);
+ if (line_str_eq(line, ".")) {
log_warn(LD_CONSDIFF, "Cannot generate consensus diff because "
"one of the lines to be added is \".\".");
goto error_cleanup;
}
- smartlist_add(result, tor_strdup(line));
+ smartlist_add(result, line);
}
- smartlist_add_asprintf(result, ".");
+ smartlist_add_linecpy(result, area, ".");
}
}
@@ -664,7 +731,6 @@ gen_ed_diff(const smartlist_t *cons1, const smartlist_t *cons2)
bitarray_free(changed1);
bitarray_free(changed2);
- SMARTLIST_FOREACH(result, char*, line, tor_free(line));
smartlist_free(result);
return NULL;
@@ -673,6 +739,9 @@ gen_ed_diff(const smartlist_t *cons1, const smartlist_t *cons2)
/** Apply the ed diff, starting at <b>diff_starting_line</b>, to the consensus
* and return a new consensus, also as a line-based smartlist. Will return
* NULL if the ed diff is not properly formatted.
+ *
+ * All cdline_t objects in the resulting object are references to lines
+ * in one of the inputs; nothing is copied.
*/
STATIC smartlist_t *
apply_ed_diff(const smartlist_t *cons1, const smartlist_t *diff,
@@ -683,9 +752,17 @@ apply_ed_diff(const smartlist_t *cons1, const smartlist_t *diff,
smartlist_t *cons2 = smartlist_new();
for (int i=diff_starting_line; i<diff_len; ++i) {
- const char *diff_line = smartlist_get(diff, i);
+ const cdline_t *diff_cdline = smartlist_get(diff, i);
+ char diff_line[128];
char *endptr1, *endptr2;
+ if (diff_cdline->len > sizeof(diff_line) - 1) {
+ log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
+ "an ed command was far too long");
+ goto error_cleanup;
+ }
+ memcpy(diff_line, diff_cdline->s, diff_cdline->len);
+ diff_line[diff_cdline->len] = 0;
int start = (int)strtol(diff_line, &endptr1, 10);
int end;
if (endptr1 == diff_line) {
@@ -748,8 +825,8 @@ apply_ed_diff(const smartlist_t *cons1, const smartlist_t *diff,
/* Add unchanged lines. */
for (; j && j > end; --j) {
- const char *cons_line = smartlist_get(cons1, j-1);
- smartlist_add(cons2, tor_strdup(cons_line));
+ cdline_t *cons_line = smartlist_get(cons1, j-1);
+ smartlist_add(cons2, cons_line);
}
/* Ignore removed lines. */
@@ -767,7 +844,7 @@ apply_ed_diff(const smartlist_t *cons1, const smartlist_t *diff,
i++; /* Skip the line with the range and command. */
while (i < diff_len) {
- if (!strcmp(smartlist_get(diff, i), ".")) {
+ if (line_str_eq(smartlist_get(diff, i), ".")) {
break;
}
if (++i == diff_len) {
@@ -787,16 +864,16 @@ apply_ed_diff(const smartlist_t *cons1, const smartlist_t *diff,
}
while (added_i > added_end) {
- const char *added_line = smartlist_get(diff, added_i--);
- smartlist_add(cons2, tor_strdup(added_line));
+ cdline_t *added_line = smartlist_get(diff, added_i--);
+ smartlist_add(cons2, added_line);
}
}
}
/* Add remaining unchanged lines. */
for (; j > 0; --j) {
- const char *cons_line = smartlist_get(cons1, j-1);
- smartlist_add(cons2, tor_strdup(cons_line));
+ cdline_t *cons_line = smartlist_get(cons1, j-1);
+ smartlist_add(cons2, cons_line);
}
/* Reverse the whole thing since we did it from the end. */
@@ -805,7 +882,6 @@ apply_ed_diff(const smartlist_t *cons1, const smartlist_t *diff,
error_cleanup:
- SMARTLIST_FOREACH(cons2, char*, line, tor_free(line));
smartlist_free(cons2);
return NULL;
@@ -817,11 +893,13 @@ apply_ed_diff(const smartlist_t *cons1, const smartlist_t *diff,
* up to the caller to free their resources.
*/
smartlist_t *
-consdiff_gen_diff(const smartlist_t *cons1, const smartlist_t *cons2,
+consdiff_gen_diff(const smartlist_t *cons1,
+ const smartlist_t *cons2,
const consensus_digest_t *digests1,
- const consensus_digest_t *digests2)
+ const consensus_digest_t *digests2,
+ memarea_t *area)
{
- smartlist_t *ed_diff = gen_ed_diff(cons1, cons2);
+ smartlist_t *ed_diff = gen_ed_diff(cons1, cons2, area);
/* ed diff could not be generated - reason already logged by gen_ed_diff. */
if (!ed_diff) {
goto error_cleanup;
@@ -838,8 +916,18 @@ consdiff_gen_diff(const smartlist_t *cons1, const smartlist_t *cons2,
/* LCOV_EXCL_STOP */
}
- int cons2_eq = smartlist_strings_eq(cons2, ed_cons2);
- SMARTLIST_FOREACH(ed_cons2, char*, line, tor_free(line));
+ int cons2_eq = 1;
+ if (smartlist_len(cons2) == smartlist_len(ed_cons2)) {
+ SMARTLIST_FOREACH_BEGIN(cons2, const cdline_t *, line1) {
+ const cdline_t *line2 = smartlist_get(ed_cons2, line1_sl_idx);
+ if (! lines_eq(line1, line2) ) {
+ cons2_eq = 0;
+ break;
+ }
+ } SMARTLIST_FOREACH_END(line1);
+ } else {
+ cons2_eq = 0;
+ }
smartlist_free(ed_cons2);
if (!cons2_eq) {
/* LCOV_EXCL_START -- impossible if diff generation is correct. */
@@ -858,10 +946,13 @@ consdiff_gen_diff(const smartlist_t *cons1, const smartlist_t *cons2,
(const char*)digests2->sha3_256, DIGEST256_LEN);
/* Create the resulting consensus diff. */
+ char buf[160];
smartlist_t *result = smartlist_new();
- smartlist_add_asprintf(result, "%s", ns_diff_version);
- smartlist_add_asprintf(result, "%s %s %s", hash_token,
+ tor_snprintf(buf, sizeof(buf), "%s", ns_diff_version);
+ smartlist_add_linecpy(result, area, buf);
+ tor_snprintf(buf, sizeof(buf), "%s %s %s", hash_token,
cons1_hash_hex, cons2_hash_hex);
+ smartlist_add_linecpy(result, area, buf);
smartlist_add_all(result, ed_diff);
smartlist_free(ed_diff);
return result;
@@ -870,7 +961,6 @@ consdiff_gen_diff(const smartlist_t *cons1, const smartlist_t *cons2,
if (ed_diff) {
/* LCOV_EXCL_START -- ed_diff is NULL except in unreachable cases above */
- SMARTLIST_FOREACH(ed_diff, char *, cp, tor_free(cp));
smartlist_free(ed_diff);
/* LCOV_EXCL_STOP */
}
@@ -888,7 +978,7 @@ consdiff_get_digests(const smartlist_t *diff,
char *digest2_out)
{
smartlist_t *hash_words = NULL;
- const char *format;
+ const cdline_t *format;
char cons1_hash[DIGEST256_LEN], cons2_hash[DIGEST256_LEN];
char *cons1_hash_hex, *cons2_hash_hex;
if (smartlist_len(diff) < 2) {
@@ -898,14 +988,19 @@ consdiff_get_digests(const smartlist_t *diff,
/* Check that it's the format and version we know. */
format = smartlist_get(diff, 0);
- if (strcmp(format, ns_diff_version)) {
+ if (!line_str_eq(format, ns_diff_version)) {
log_warn(LD_CONSDIFF, "The provided consensus diff format is not known.");
goto error_cleanup;
}
/* Grab the base16 digests. */
hash_words = smartlist_new();
- smartlist_split_string(hash_words, smartlist_get(diff, 1), " ", 0, 0);
+ {
+ const cdline_t *line2 = smartlist_get(diff, 1);
+ char *h = tor_memdup_nulterm(line2->s, line2->len);
+ smartlist_split_string(hash_words, h, " ", 0, 0);
+ tor_free(h);
+ }
/* There have to be three words, the first of which must be hash_token. */
if (smartlist_len(hash_words) != 3 ||
@@ -1038,7 +1133,6 @@ consdiff_apply_diff(const smartlist_t *cons1,
done:
if (cons2) {
- SMARTLIST_FOREACH(cons2, char *, cp, tor_free(cp));
smartlist_free(cons2);
}
@@ -1046,34 +1140,40 @@ consdiff_apply_diff(const smartlist_t *cons1,
}
/**
- * Helper: For every NL-terminated line in <b>s</b>, add that line
- * (without trailing newline) to <b>out</b>. Return -1 if there are any
- * non-NL terminated lines; 0 otherwise.
- *
- * Modifies <b>s</b> in place: don't do anything with <b>s</b> after you're
- * done here, besides freeing it.
+ * Helper: For every NL-terminated line in <b>s</b>, add a cdline referring to
+ * that line (without trailing newline) to <b>out</b>. Return -1 if there are
+ * any non-NL terminated lines; 0 otherwise.
*
* Unlike tor_split_lines, this function avoids ambiguity on its
* handling of a final line that isn't NL-terminated.
+ *
+ * All cdline_t objects are allocated in the provided memarea. Strings
+ * are not copied: if <b>s</b> changes or becomes invalid, then all
+ * generated cdlines will become invalid.
*/
-static int
-consensus_split_lines(smartlist_t *out, char *s)
+STATIC int
+consensus_split_lines(smartlist_t *out, const char *s, memarea_t *area)
{
- /* XXXX If we used string slices, we could avoid a bunch of copies here. */
while (*s) {
- char *eol = strchr(s, '\n');
+ const char *eol = strchr(s, '\n');
if (!eol) {
/* File doesn't end with newline. */
return -1;
}
- *eol = 0;
- smartlist_add(out, s);
+ if (eol - s > UINT32_MAX) {
+ /* Line is far too long. */
+ return -1;
+ }
+ cdline_t *line = memarea_alloc(area, sizeof(cdline_t));
+ line->s = s;
+ line->len = (uint32_t)(eol - s);
+ smartlist_add(out, line);
s = eol+1;
}
return 0;
}
-/** Given a list of lines, return a newly allocated string containing
+/** Given a list of cdline_t, return a newly allocated string containing
* all of the lines, terminated with NL, concatenated.
*
* Unlike smartlist_join_strings(), avoids lossy operations on empty
@@ -1082,16 +1182,15 @@ static char *
consensus_join_lines(const smartlist_t *inp)
{
size_t n = 0;
- SMARTLIST_FOREACH(inp, const char *, cp, n += strlen(cp) + 1);
+ SMARTLIST_FOREACH(inp, const cdline_t *, cdline, n += cdline->len + 1);
n += 1;
char *result = tor_malloc(n);
char *out = result;
- SMARTLIST_FOREACH_BEGIN(inp, const char *, cp) {
- size_t len = strlen(cp);
- memcpy(out, cp, len);
- out += len;
+ SMARTLIST_FOREACH_BEGIN(inp, const cdline_t *, cdline) {
+ memcpy(out, cdline->s, cdline->len);
+ out += cdline->len;
*out++ = '\n';
- } SMARTLIST_FOREACH_END(cp);
+ } SMARTLIST_FOREACH_END(cdline);
*out++ = '\0';
tor_assert(out == result+n);
return result;
@@ -1114,28 +1213,26 @@ consensus_diff_generate(const char *cons1,
if (BUG(r1 < 0 || r2 < 0))
return NULL; // LCOV_EXCL_LINE
- char *cons1_copy = tor_strdup(cons1);
- char *cons2_copy = tor_strdup(cons2);
+ memarea_t *area = memarea_new();
lines1 = smartlist_new();
lines2 = smartlist_new();
- if (consensus_split_lines(lines1, cons1_copy) < 0)
+ if (consensus_split_lines(lines1, cons1, area) < 0)
goto done;
- if (consensus_split_lines(lines2, cons2_copy) < 0)
+ if (consensus_split_lines(lines2, cons2, area) < 0)
goto done;
- result_lines = consdiff_gen_diff(lines1, lines2, &d1, &d2);
+ result_lines = consdiff_gen_diff(lines1, lines2, &d1, &d2, area);
done:
- smartlist_free(lines1);
- smartlist_free(lines2);
- tor_free(cons1_copy);
- tor_free(cons2_copy);
-
if (result_lines) {
result = consensus_join_lines(result_lines);
- SMARTLIST_FOREACH(result_lines, char *, cp, tor_free(cp));
smartlist_free(result_lines);
}
+
+ memarea_drop_all(area);
+ smartlist_free(lines1);
+ smartlist_free(lines2);
+
return result;
}
@@ -1150,18 +1247,17 @@ consensus_diff_apply(const char *consensus,
smartlist_t *lines1 = NULL, *lines2 = NULL;
int r1;
char *result = NULL;
+ memarea_t *area = memarea_new();
r1 = consensus_compute_digest(consensus, &d1);
if (BUG(r1 < 0))
return NULL; // LCOV_EXCL_LINE
- char *cons_copy = tor_strdup(consensus);
- char *diff_copy = tor_strdup(diff);
lines1 = smartlist_new();
lines2 = smartlist_new();
- if (consensus_split_lines(lines1, cons_copy) < 0)
+ if (consensus_split_lines(lines1, consensus, area) < 0)
goto done;
- if (consensus_split_lines(lines2, diff_copy) < 0)
+ if (consensus_split_lines(lines2, diff, area) < 0)
goto done;
result = consdiff_apply_diff(lines1, lines2, &d1);
@@ -1169,8 +1265,7 @@ consensus_diff_apply(const char *consensus,
done:
smartlist_free(lines1);
smartlist_free(lines2);
- tor_free(cons_copy);
- tor_free(diff_copy);
+ memarea_drop_all(area);
return result;
}
diff --git a/src/or/consdiff.h b/src/or/consdiff.h
index 7797067f32..e9d175136e 100644
--- a/src/or/consdiff.h
+++ b/src/or/consdiff.h
@@ -13,6 +13,16 @@ char *consensus_diff_apply(const char *consensus,
const char *diff);
#ifdef CONSDIFF_PRIVATE
+struct memarea_t;
+
+/** Line type used for constructing consensus diffs. Each of these lines
+ * refers to a chunk of memory allocated elsewhere, and is not necessarily
+ * NUL-terminated: this helps us avoid copies and save memory. */
+typedef struct cdline_t {
+ const char *s;
+ uint32_t len;
+} cdline_t;
+
typedef struct consensus_digest_t {
uint8_t sha3_256[DIGEST256_LEN];
} consensus_digest_t;
@@ -20,7 +30,8 @@ typedef struct consensus_digest_t {
STATIC smartlist_t *consdiff_gen_diff(const smartlist_t *cons1,
const smartlist_t *cons2,
const consensus_digest_t *digests1,
- const consensus_digest_t *digests2);
+ const consensus_digest_t *digests2,
+ struct memarea_t *area);
STATIC char *consdiff_apply_diff(const smartlist_t *cons1,
const smartlist_t *diff,
const consensus_digest_t *digests1);
@@ -41,7 +52,8 @@ typedef struct smartlist_slice_t {
int len;
} smartlist_slice_t;
STATIC smartlist_t *gen_ed_diff(const smartlist_t *cons1,
- const smartlist_t *cons2);
+ const smartlist_t *cons2,
+ struct memarea_t *area);
STATIC smartlist_t *apply_ed_diff(const smartlist_t *cons1,
const smartlist_t *diff,
int start_line);
@@ -54,14 +66,20 @@ STATIC int *lcs_lengths(const smartlist_slice_t *slice1,
const smartlist_slice_t *slice2,
int direction);
STATIC void trim_slices(smartlist_slice_t *slice1, smartlist_slice_t *slice2);
-STATIC int base64cmp(const char *hash1, const char *hash2);
-STATIC const char *get_id_hash(const char *r_line);
-STATIC int is_valid_router_entry(const char *line);
+STATIC int base64cmp(const cdline_t *hash1, const cdline_t *hash2);
+STATIC int get_id_hash(const cdline_t *line, cdline_t *hash_out);
+STATIC int is_valid_router_entry(const cdline_t *line);
STATIC int smartlist_slice_string_pos(const smartlist_slice_t *slice,
- const char *string);
+ const cdline_t *string);
STATIC void set_changed(bitarray_t *changed1, bitarray_t *changed2,
const smartlist_slice_t *slice1,
const smartlist_slice_t *slice2);
+STATIC int consensus_split_lines(smartlist_t *out, const char *s,
+ struct memarea_t *area);
+STATIC void smartlist_add_linecpy(smartlist_t *lst, struct memarea_t *area,
+ const char *s);
+STATIC int lines_eq(const cdline_t *a, const cdline_t *b);
+STATIC int line_str_eq(const cdline_t *a, const char *b);
MOCK_DECL(STATIC int,
consensus_compute_digest,(const char *cons,