From 99b87d7ca48c5ed80aec5f24902843f72975bfea Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Wed, 3 Apr 2019 13:53:06 -0400 Subject: Even more diagnostic messages for bug 28223. Try to figure out _where exactly_ we are first encountering NULs in microdescriptors, and what we are doing when that happens. --- changes/diagnostic_28223_redux | 4 ++++ src/feature/dirparse/microdesc_parse.c | 17 ++++++++++++++++- src/feature/nodelist/microdesc.c | 21 +++++++++++++++++++++ 3 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 changes/diagnostic_28223_redux diff --git a/changes/diagnostic_28223_redux b/changes/diagnostic_28223_redux new file mode 100644 index 0000000000..0d7499832e --- /dev/null +++ b/changes/diagnostic_28223_redux @@ -0,0 +1,4 @@ + o Minor features (diagnostic): + - Add more diagnostic log messages in an attempt to solve + the issue of NUL bytes appearing in a microdescriptor cache. + Related to ticket 28223. diff --git a/src/feature/dirparse/microdesc_parse.c b/src/feature/dirparse/microdesc_parse.c index 5a75af3994..1e26901ce8 100644 --- a/src/feature/dirparse/microdesc_parse.c +++ b/src/feature/dirparse/microdesc_parse.c @@ -159,7 +159,22 @@ microdescs_parse_from_string(const char *s, const char *eos, if (tokenize_string(area, s, start_of_next_microdesc, tokens, microdesc_token_table, flags)) { - log_warn(LD_DIR, "Unparseable microdescriptor"); + const char *location; + switch (where) { + case SAVED_NOWHERE: + location = "download or generated string"; + break; + case SAVED_IN_CACHE: + location = "cache"; + break; + case SAVED_IN_JOURNAL: + location = "journal"; + break; + default: + location = "unknown location"; + break; + } + log_warn(LD_DIR, "Unparseable microdescriptor found in %s", location); goto next; } diff --git a/src/feature/nodelist/microdesc.c b/src/feature/nodelist/microdesc.c index dafaabb5e5..af4f608d2f 100644 --- a/src/feature/nodelist/microdesc.c +++ b/src/feature/nodelist/microdesc.c @@ -221,6 +221,13 @@ dump_microdescriptor(int fd, microdesc_t *md, size_t *annotation_len_out) } md->off = tor_fd_getpos(fd); + const char *nulpos = memchr(md->body, 0, md->bodylen); + if (BUG(nulpos)) { + log_warn(LD_BUG, "About to dump a NUL into a microdescriptor file. " + "offset %"PRId64", bodylen %zu, nul position %zu", + (int64_t)md->off, md->bodylen, + (size_t)(nulpos - md->body)); + } written = write_all_to_fd(fd, md->body, md->bodylen); if (written != (ssize_t)md->bodylen) { written = written < 0 ? 0 : written; @@ -480,6 +487,17 @@ microdesc_cache_clear(microdesc_cache_t *cache) cache->bytes_dropped = 0; } +static void +warn_if_nul_found(const char *inp, size_t len, const char *description) +{ + const char *nul_found = memchr(inp, 0, len); + if (BUG(nul_found)) { + log_warn(LD_BUG, "Found unexpected NUL while reading %s, at " + "position %zu/%zu.", + description, (nul_found - inp), len); + } +} + /** Reload the contents of cache from disk. If it is empty, load it * for the first time. Return 0 on success, -1 on failure. */ int @@ -497,6 +515,7 @@ microdesc_cache_reload(microdesc_cache_t *cache) mm = cache->cache_content = tor_mmap_file(cache->cache_fname); if (mm) { + warn_if_nul_found(mm->data, mm->size, "microdesc cache"); added = microdescs_add_to_cache(cache, mm->data, mm->data+mm->size, SAVED_IN_CACHE, 0, -1, NULL); if (added) { @@ -509,6 +528,8 @@ microdesc_cache_reload(microdesc_cache_t *cache) RFTS_IGNORE_MISSING, &st); if (journal_content) { cache->journal_len = (size_t) st.st_size; + warn_if_nul_found(journal_content, cache->journal_len, + "microdesc journal"); added = microdescs_add_to_cache(cache, journal_content, journal_content+st.st_size, SAVED_IN_JOURNAL, 0, -1, NULL); -- cgit v1.2.3-54-g00ecf From 821d29e42040789c59d271ba2f1d87a54d6d1435 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Wed, 3 Apr 2019 13:53:36 -0400 Subject: fdio.c: add more includes. This is just in case there is some rogue platform that uses a nonstandard value for SEEK_*, and does not define that macro in unistd.h. I think that's unlikely, but it's conceivable. --- src/lib/fdio/fdio.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/lib/fdio/fdio.c b/src/lib/fdio/fdio.c index 6c87af791d..078af6a9ba 100644 --- a/src/lib/fdio/fdio.c +++ b/src/lib/fdio/fdio.c @@ -17,12 +17,16 @@ #ifdef _WIN32 #include #endif +#ifdef HAVE_SYS_TYPES_H +#include +#endif #include "lib/fdio/fdio.h" #include "lib/cc/torint.h" #include "lib/err/torerr.h" #include +#include /** @{ */ /** Some old versions of Unix didn't define constants for these values, -- cgit v1.2.3-54-g00ecf From 0c42ddf28c330280f66feb7cc92b63f3998468e1 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Mon, 15 Apr 2019 15:21:18 -0400 Subject: fixup! Even more diagnostic messages for bug 28223. Use TOR_PRIuSZ in place of %zu. --- src/feature/nodelist/microdesc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/feature/nodelist/microdesc.c b/src/feature/nodelist/microdesc.c index af4f608d2f..eaa7f0cafd 100644 --- a/src/feature/nodelist/microdesc.c +++ b/src/feature/nodelist/microdesc.c @@ -224,7 +224,8 @@ dump_microdescriptor(int fd, microdesc_t *md, size_t *annotation_len_out) const char *nulpos = memchr(md->body, 0, md->bodylen); if (BUG(nulpos)) { log_warn(LD_BUG, "About to dump a NUL into a microdescriptor file. " - "offset %"PRId64", bodylen %zu, nul position %zu", + "offset %"PRId64", bodylen %"TOR_PRIuSZ", " + "nul position %"TOR_PRIuSZ".", (int64_t)md->off, md->bodylen, (size_t)(nulpos - md->body)); } @@ -493,7 +494,7 @@ warn_if_nul_found(const char *inp, size_t len, const char *description) const char *nul_found = memchr(inp, 0, len); if (BUG(nul_found)) { log_warn(LD_BUG, "Found unexpected NUL while reading %s, at " - "position %zu/%zu.", + "position %"TOR_PRIuSZ"/%"TOR_PRIuSZ".", description, (nul_found - inp), len); } } -- cgit v1.2.3-54-g00ecf From 950d890f77d8f060702b100e6075d5083d536577 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Mon, 15 Apr 2019 15:33:09 -0400 Subject: In warn_if_nul_found, log surrounding context. We need to encode here instead of doing escaped(), since fwict escaped() does not currently handle NUL bytes. Also, use warn_if_nul_found in more cases to avoid duplication. --- src/feature/nodelist/microdesc.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/src/feature/nodelist/microdesc.c b/src/feature/nodelist/microdesc.c index eaa7f0cafd..f4fa8eeae2 100644 --- a/src/feature/nodelist/microdesc.c +++ b/src/feature/nodelist/microdesc.c @@ -69,6 +69,8 @@ struct microdesc_cache_t { }; static microdesc_cache_t *get_microdesc_cache_noload(void); +static void warn_if_nul_found(const char *inp, size_t len, int64_t offset, + const char *activity); /** Helper: computes a hash of md to place it in a hash table. */ static inline unsigned int @@ -221,14 +223,8 @@ dump_microdescriptor(int fd, microdesc_t *md, size_t *annotation_len_out) } md->off = tor_fd_getpos(fd); - const char *nulpos = memchr(md->body, 0, md->bodylen); - if (BUG(nulpos)) { - log_warn(LD_BUG, "About to dump a NUL into a microdescriptor file. " - "offset %"PRId64", bodylen %"TOR_PRIuSZ", " - "nul position %"TOR_PRIuSZ".", - (int64_t)md->off, md->bodylen, - (size_t)(nulpos - md->body)); - } + warn_if_nul_found(md->body, md->bodylen, (int64_t) md->off, + "dumping a microdescriptor"); written = write_all_to_fd(fd, md->body, md->bodylen); if (written != (ssize_t)md->bodylen) { written = written < 0 ? 0 : written; @@ -489,13 +485,23 @@ microdesc_cache_clear(microdesc_cache_t *cache) } static void -warn_if_nul_found(const char *inp, size_t len, const char *description) +warn_if_nul_found(const char *inp, size_t len, int64_t offset, + const char *activity) { const char *nul_found = memchr(inp, 0, len); if (BUG(nul_found)) { - log_warn(LD_BUG, "Found unexpected NUL while reading %s, at " - "position %"TOR_PRIuSZ"/%"TOR_PRIuSZ".", - description, (nul_found - inp), len); + log_warn(LD_BUG, "Found unexpected NUL while %s, offset %"PRId64 + "at position %"TOR_PRIuSZ"/%"TOR_PRIuSZ".", + activity, offset, (nul_found - inp), len); + const char *start_excerpt_at, *eos = inp + len; + if ((nul_found - inp) >= 16) + start_excerpt_at = nul_found - 16; + else + start_excerpt_at = inp; + size_t excerpt_len = MIN(32, eos - start_excerpt_at); + char tmp[65]; + base16_encode(tmp, sizeof(tmp), start_excerpt_at, excerpt_len); + log_warn(LD_BUG, " surrounding string: %s", tmp); } } @@ -516,7 +522,7 @@ microdesc_cache_reload(microdesc_cache_t *cache) mm = cache->cache_content = tor_mmap_file(cache->cache_fname); if (mm) { - warn_if_nul_found(mm->data, mm->size, "microdesc cache"); + warn_if_nul_found(mm->data, mm->size, 0, "scanning microdesc cache"); added = microdescs_add_to_cache(cache, mm->data, mm->data+mm->size, SAVED_IN_CACHE, 0, -1, NULL); if (added) { @@ -529,8 +535,8 @@ microdesc_cache_reload(microdesc_cache_t *cache) RFTS_IGNORE_MISSING, &st); if (journal_content) { cache->journal_len = (size_t) st.st_size; - warn_if_nul_found(journal_content, cache->journal_len, - "microdesc journal"); + warn_if_nul_found(journal_content, cache->journal_len, 0, + "reading microdesc journal"); added = microdescs_add_to_cache(cache, journal_content, journal_content+st.st_size, SAVED_IN_JOURNAL, 0, -1, NULL); -- cgit v1.2.3-54-g00ecf