diff options
-rw-r--r-- | changes/bug10169 | 4 | ||||
-rw-r--r-- | changes/bug9686 | 3 | ||||
-rw-r--r-- | doc/tor.1.txt | 10 | ||||
-rw-r--r-- | src/or/buffers.c | 59 | ||||
-rw-r--r-- | src/or/buffers.h | 5 | ||||
-rw-r--r-- | src/or/circuitlist.c | 151 | ||||
-rw-r--r-- | src/or/config.c | 9 | ||||
-rw-r--r-- | src/or/or.h | 8 | ||||
-rw-r--r-- | src/or/relay.c | 3 |
9 files changed, 207 insertions, 45 deletions
diff --git a/changes/bug10169 b/changes/bug10169 new file mode 100644 index 0000000000..979c4e4288 --- /dev/null +++ b/changes/bug10169 @@ -0,0 +1,4 @@ + o Major features: + - Also consider stream buffer sizes when calculating OOM + conditions. Rename MaxMemInCellQueues to MaxMemInQueues. Fixes + bug 10169. diff --git a/changes/bug9686 b/changes/bug9686 new file mode 100644 index 0000000000..82a8f44471 --- /dev/null +++ b/changes/bug9686 @@ -0,0 +1,3 @@ + o Minor changes: + - Decrease the lower limit of MaxMemInQueues to 256 MBytes, to + appease raspberry pi users. Fixes bug 9686.
\ No newline at end of file diff --git a/doc/tor.1.txt b/doc/tor.1.txt index b571b26cbb..7f0465a7ef 100644 --- a/doc/tor.1.txt +++ b/doc/tor.1.txt @@ -1727,13 +1727,13 @@ is non-zero): localhost, RFC1918 addresses, and so on. This can create security issues; you should probably leave it off. (Default: 0) -[[MaxMemInCellQueues]] **MaxMemInCellQueues** __N__ **bytes**|**KB**|**MB**|**GB**:: +[[MaxMemInQueues]] **MaxMemInQueues** __N__ **bytes**|**KB**|**MB**|**GB**:: This option configures a threshold above which Tor will assume that it - needs to stop queueing cells because it's about to run out of memory. - If it hits this threshold, it will begin killing circuits until it - has recovered at least 10% of this memory. Do not set this option too + needs to stop queueing or buffering data because it's about to run out of + memory. If it hits this threshold, it will begin killing circuits until + it has recovered at least 10% of this memory. Do not set this option too low, or your relay may be unreliable under load. This option only - affects circuit queues, so the actual process size will be larger than + affects some queues, so the actual process size will be larger than this. (Default: 8GB) DIRECTORY SERVER OPTIONS diff --git a/src/or/buffers.c b/src/or/buffers.c index 50016d3a86..50b1d9b45a 100644 --- a/src/or/buffers.c +++ b/src/or/buffers.c @@ -70,6 +70,8 @@ typedef struct chunk_t { size_t datalen; /**< The number of bytes stored in this chunk */ size_t memlen; /**< The number of usable bytes of storage in <b>mem</b>. */ char *data; /**< A pointer to the first byte of data stored in <b>mem</b>. */ + uint32_t inserted_time; /**< Timestamp in truncated ms since epoch + * when this chunk was inserted. */ char mem[FLEXIBLE_ARRAY_MEMBER]; /**< The actual memory used for storage in * this chunk. */ } chunk_t; @@ -141,6 +143,9 @@ static chunk_freelist_t freelists[] = { * could help with? */ static uint64_t n_freelist_miss = 0; +/** DOCDOC */ +static size_t total_bytes_allocated_in_chunks = 0; + static void assert_freelist_ok(chunk_freelist_t *fl); /** Return the freelist to hold chunks of size <b>alloc</b>, or NULL if @@ -174,6 +179,8 @@ chunk_free_unchecked(chunk_t *chunk) } else { if (freelist) ++freelist->n_free; + tor_assert(total_bytes_allocated_in_chunks >= alloc); + total_bytes_allocated_in_chunks -= alloc; tor_free(chunk); } } @@ -200,6 +207,7 @@ chunk_new_with_alloc_size(size_t alloc) else ++n_freelist_miss; ch = tor_malloc(alloc); + total_bytes_allocated_in_chunks += alloc; } ch->next = NULL; ch->datalen = 0; @@ -211,6 +219,10 @@ chunk_new_with_alloc_size(size_t alloc) static void chunk_free_unchecked(chunk_t *chunk) { + if (!chunk) + return; + tor_assert(total_bytes_allocated_in_chunks >= CHUNK_ALLOC_SIZE(chunk->memlen)); + total_bytes_allocated_in_chunks -= CHUNK_ALLOC_SIZE(chunk->memlen); tor_free(chunk); } static INLINE chunk_t * @@ -221,6 +233,7 @@ chunk_new_with_alloc_size(size_t alloc) ch->next = NULL; ch->datalen = 0; ch->memlen = CHUNK_SIZE_WITH_ALLOC(alloc); + total_bytes_allocated_in_chunks += alloc; ch->data = &ch->mem[0]; return ch; } @@ -232,11 +245,13 @@ static INLINE chunk_t * chunk_grow(chunk_t *chunk, size_t sz) { off_t offset; + size_t memlen_orig = chunk->memlen; tor_assert(sz > chunk->memlen); offset = chunk->data - chunk->mem; chunk = tor_realloc(chunk, CHUNK_ALLOC_SIZE(sz)); chunk->memlen = sz; chunk->data = chunk->mem + offset; + total_bytes_allocated_in_chunks += CHUNK_ALLOC_SIZE(sz) - CHUNK_ALLOC_SIZE(memlen_orig); return chunk; } @@ -261,12 +276,14 @@ preferred_chunk_size(size_t target) } /** Remove from the freelists most chunks that have not been used since the - * last call to buf_shrink_freelists(). */ -void + * last call to buf_shrink_freelists(). Return the amount of memory + * freed. */ +size_t buf_shrink_freelists(int free_all) { #ifdef ENABLE_BUF_FREELISTS int i; + size_t total_freed = 0; disable_control_logging(); for (i = 0; freelists[i].alloc_size; ++i) { int slack = freelists[i].slack; @@ -298,6 +315,9 @@ buf_shrink_freelists(int free_all) *chp = NULL; while (chunk) { chunk_t *next = chunk->next; + tor_assert(total_bytes_allocated_in_chunks >= CHUNK_ALLOC_SIZE(chunk->memlen)); + total_bytes_allocated_in_chunks -= CHUNK_ALLOC_SIZE(chunk->memlen); + total_freed += CHUNK_ALLOC_SIZE(chunk->memlen); tor_free(chunk); chunk = next; --n_to_free; @@ -315,18 +335,21 @@ buf_shrink_freelists(int free_all) } // tor_assert(!n_to_free); freelists[i].cur_length = new_length; + tor_assert(orig_n_to_skip == new_length); log_info(LD_MM, "Cleaned freelist for %d-byte chunks: original " - "length %d, kept %d, dropped %d.", + "length %d, kept %d, dropped %d. New length is %d", (int)freelists[i].alloc_size, orig_length, - orig_n_to_skip, orig_n_to_free); + orig_n_to_skip, orig_n_to_free, new_length); } freelists[i].lowest_length = freelists[i].cur_length; assert_freelist_ok(&freelists[i]); } done: enable_control_logging(); + return total_freed; #else (void) free_all; + return 0; #endif } @@ -531,7 +554,7 @@ buf_allocation(const buf_t *buf) size_t total = 0; const chunk_t *chunk; for (chunk = buf->head; chunk; chunk = chunk->next) { - total += chunk->memlen; + total += CHUNK_ALLOC_SIZE(chunk->memlen); } return total; } @@ -564,6 +587,7 @@ static chunk_t * chunk_copy(const chunk_t *in_chunk) { chunk_t *newch = tor_memdup(in_chunk, CHUNK_ALLOC_SIZE(in_chunk->memlen)); + total_bytes_allocated_in_chunks += CHUNK_ALLOC_SIZE(in_chunk->memlen); newch->next = NULL; if (in_chunk->data) { off_t offset = in_chunk->data - in_chunk->mem; @@ -599,6 +623,7 @@ static chunk_t * buf_add_chunk_with_capacity(buf_t *buf, size_t capacity, int capped) { chunk_t *chunk; + struct timeval now; if (CHUNK_ALLOC_SIZE(capacity) < buf->default_chunk_size) { chunk = chunk_new_with_alloc_size(buf->default_chunk_size); } else if (capped && CHUNK_ALLOC_SIZE(capacity) > MAX_CHUNK_ALLOC) { @@ -606,6 +631,10 @@ buf_add_chunk_with_capacity(buf_t *buf, size_t capacity, int capped) } else { chunk = chunk_new_with_alloc_size(preferred_chunk_size(capacity)); } + + tor_gettimeofday_cached(&now); + chunk->inserted_time = (uint32_t)tv_to_msec(&now); + if (buf->tail) { tor_assert(buf->head); buf->tail->next = chunk; @@ -618,6 +647,26 @@ buf_add_chunk_with_capacity(buf_t *buf, size_t capacity, int capped) return chunk; } +/** Return the age of the oldest chunk in the buffer <b>buf</b>, in + * milliseconds. Requires the current time, in truncated milliseconds since + * the epoch, as its input <b>now</b>. + */ +uint32_t +buf_get_oldest_chunk_timestamp(const buf_t *buf, uint32_t now) +{ + if (buf->head) { + return now - buf->head->inserted_time; + } else { + return 0; + } +} + +size_t +buf_get_total_allocation(void) +{ + return total_bytes_allocated_in_chunks; +} + /** Read up to <b>at_most</b> bytes from the socket <b>fd</b> into * <b>chunk</b> (which must be on <b>buf</b>). If we get an EOF, set * *<b>reached_eof</b> to 1. Return -1 on error, 0 on eof or blocking, diff --git a/src/or/buffers.h b/src/or/buffers.h index 48b1185204..70539bf55d 100644 --- a/src/or/buffers.h +++ b/src/or/buffers.h @@ -20,13 +20,16 @@ void buf_free(buf_t *buf); void buf_clear(buf_t *buf); buf_t *buf_copy(const buf_t *buf); void buf_shrink(buf_t *buf); -void buf_shrink_freelists(int free_all); +size_t buf_shrink_freelists(int free_all); void buf_dump_freelist_sizes(int severity); size_t buf_datalen(const buf_t *buf); size_t buf_allocation(const buf_t *buf); size_t buf_slack(const buf_t *buf); +uint32_t buf_get_oldest_chunk_timestamp(const buf_t *buf, uint32_t now); +size_t buf_get_total_allocation(void); + int read_to_buf(tor_socket_t s, size_t at_most, buf_t *buf, int *reached_eof, int *socket_error); int read_to_buf_tls(tor_tls_t *tls, size_t at_most, buf_t *buf); diff --git a/src/or/circuitlist.c b/src/or/circuitlist.c index 9474896367..eb652301ed 100644 --- a/src/or/circuitlist.c +++ b/src/or/circuitlist.c @@ -1612,6 +1612,38 @@ marked_circuit_free_cells(circuit_t *circ) cell_queue_clear(& TO_OR_CIRCUIT(circ)->p_chan_cells); } +/** Aggressively free buffer contents on all the buffers of all streams in the + * list starting at <b>stream</b>. Return the number of bytes recovered. */ +static size_t +marked_circuit_streams_free_bytes(edge_connection_t *stream) +{ + size_t result = 0; + for ( ; stream; stream = stream->next_stream) { + connection_t *conn = TO_CONN(stream); + if (conn->inbuf) { + result += buf_allocation(conn->inbuf); + buf_clear(conn->inbuf); + } + if (conn->outbuf) { + result += buf_allocation(conn->outbuf); + buf_clear(conn->outbuf); + } + } + return result; +} + +/** Aggressively free buffer contents on all the buffers of all streams on + * circuit <b>c</b>. Return the number of bytes recovered. */ +static size_t +marked_circuit_free_stream_bytes(circuit_t *c) +{ + if (CIRCUIT_IS_ORIGIN(c)) { + return marked_circuit_streams_free_bytes(TO_ORIGIN_CIRCUIT(c)->p_streams); + } else { + return marked_circuit_streams_free_bytes(TO_OR_CIRCUIT(c)->n_streams); + } +} + /** Return the number of cells used by the circuit <b>c</b>'s cell queues. */ STATIC size_t n_cells_in_circ_queues(const circuit_t *c) @@ -1652,20 +1684,68 @@ circuit_max_queued_cell_age(const circuit_t *c, uint32_t now) return age; } -/** Temporary variable for circuits_compare_by_oldest_queued_cell_ This is a - * kludge to work around the fact that qsort doesn't provide a way for - * comparison functions to take an extra argument. */ -static uint32_t circcomp_now_tmp; +/** Return the age in milliseconds of the oldest buffer chunk on any stream in + * the linked list <b>stream</b>, where age is taken in milliseconds before + * the time <b>now</b> (in truncated milliseconds since the epoch). */ +static uint32_t +circuit_get_streams_max_data_age(const edge_connection_t *stream, uint32_t now) +{ + uint32_t age = 0, age2; + for (; stream; stream = stream->next_stream) { + const connection_t *conn = TO_CONN(stream); + if (conn->outbuf) { + age2 = buf_get_oldest_chunk_timestamp(conn->outbuf, now); + if (age2 > age) + age = age2; + } + if (conn->inbuf) { + age2 = buf_get_oldest_chunk_timestamp(conn->inbuf, now); + if (age2 > age) + age = age2; + } + } -/** Helper to sort a list of circuit_t by age of oldest cell, in descending - * order. Requires that circcomp_now_tmp is set correctly. */ + return age; +} + +/** Return the age in milliseconds of the oldest buffer chunk on any stream + * attached to the circuit <b>c</b>, where age is taken in milliseconds before + * the time <b>now</b> (in truncated milliseconds since the epoch). */ +static uint32_t +circuit_max_queued_data_age(const circuit_t *c, uint32_t now) +{ + if (CIRCUIT_IS_ORIGIN(c)) { + return circuit_get_streams_max_data_age( + TO_ORIGIN_CIRCUIT((circuit_t*)c)->p_streams, now); + } else { + return circuit_get_streams_max_data_age( + TO_OR_CIRCUIT((circuit_t*)c)->n_streams, now); + } +} + +/** Return the age of the oldest cell or stream buffer chunk on the circuit + * <b>c</b>, where age is taken in milliseconds before the time <b>now</b> (in + * truncated milliseconds since the epoch). */ +static uint32_t +circuit_max_queued_item_age(const circuit_t *c, uint32_t now) +{ + uint32_t cell_age = circuit_max_queued_cell_age(c, now); + uint32_t data_age = circuit_max_queued_data_age(c, now); + if (cell_age > data_age) + return cell_age; + else + return data_age; +} + +/** Helper to sort a list of circuit_t by age of oldest item, in descending + * order. */ static int -circuits_compare_by_oldest_queued_cell_(const void **a_, const void **b_) +circuits_compare_by_oldest_queued_item_(const void **a_, const void **b_) { const circuit_t *a = *a_; const circuit_t *b = *b_; - uint32_t age_a = circuit_max_queued_cell_age(a, circcomp_now_tmp); - uint32_t age_b = circuit_max_queued_cell_age(b, circcomp_now_tmp); + uint32_t age_a = a->age_tmp; + uint32_t age_b = b->age_tmp; if (age_a < age_b) return 1; @@ -1675,66 +1755,85 @@ circuits_compare_by_oldest_queued_cell_(const void **a_, const void **b_) return -1; } -#define FRACTION_OF_CELLS_TO_RETAIN_ON_OOM 0.90 +#define FRACTION_OF_DATA_TO_RETAIN_ON_OOM 0.90 /** We're out of memory for cells, having allocated <b>current_allocation</b> * bytes' worth. Kill the 'worst' circuits until we're under - * FRACTION_OF_CIRCS_TO_RETAIN_ON_OOM of our maximum usage. */ + * FRACTION_OF_DATA_TO_RETAIN_ON_OOM of our maximum usage. */ void circuits_handle_oom(size_t current_allocation) { /* Let's hope there's enough slack space for this allocation here... */ smartlist_t *circlist = smartlist_new(); circuit_t *circ; - size_t n_cells_removed=0, n_cells_to_remove; + size_t mem_to_recover; + size_t mem_recovered=0; int n_circuits_killed=0; struct timeval now; + uint32_t now_ms; log_notice(LD_GENERAL, "We're low on memory. Killing circuits with " "over-long queues. (This behavior is controlled by " - "MaxMemInCellQueues.)"); + "MaxMemInQueues.)"); + + { + const size_t recovered = buf_shrink_freelists(1); + if (recovered >= current_allocation) { + log_warn(LD_BUG, "We somehow recovered more memory from freelists " + "than we thought we had allocated"); + current_allocation = 0; + } else { + current_allocation -= recovered; + } + } { - size_t mem_target = (size_t)(get_options()->MaxMemInCellQueues * - FRACTION_OF_CELLS_TO_RETAIN_ON_OOM); - size_t mem_to_recover; + size_t mem_target = (size_t)(get_options()->MaxMemInQueues * + FRACTION_OF_DATA_TO_RETAIN_ON_OOM); if (current_allocation <= mem_target) return; mem_to_recover = current_allocation - mem_target; - n_cells_to_remove = CEIL_DIV(mem_to_recover, packed_cell_mem_cost()); } + tor_gettimeofday_cached(&now); + now_ms = (uint32_t)tv_to_msec(&now); + /* This algorithm itself assumes that you've got enough memory slack * to actually run it. */ - TOR_LIST_FOREACH(circ, &global_circuitlist, head) + TOR_LIST_FOREACH(circ, &global_circuitlist, head) { + circ->age_tmp = circuit_max_queued_item_age(circ, now_ms); smartlist_add(circlist, circ); - - /* Set circcomp_now_tmp so that the sort can work. */ - tor_gettimeofday_cached(&now); - circcomp_now_tmp = (uint32_t)tv_to_msec(&now); + } /* This is O(n log n); there are faster algorithms we could use instead. * Let's hope this doesn't happen enough to be in the critical path. */ - smartlist_sort(circlist, circuits_compare_by_oldest_queued_cell_); + smartlist_sort(circlist, circuits_compare_by_oldest_queued_item_); /* Okay, now the worst circuits are at the front of the list. Let's mark * them, and reclaim their storage aggressively. */ SMARTLIST_FOREACH_BEGIN(circlist, circuit_t *, circ) { size_t n = n_cells_in_circ_queues(circ); + size_t freed; if (! circ->marked_for_close) { circuit_mark_for_close(circ, END_CIRC_REASON_RESOURCELIMIT); } marked_circuit_free_cells(circ); + freed = marked_circuit_free_stream_bytes(circ); ++n_circuits_killed; - n_cells_removed += n; - if (n_cells_removed >= n_cells_to_remove) + + mem_recovered += n * packed_cell_mem_cost(); + mem_recovered += freed; + + if (mem_recovered >= mem_to_recover) break; } SMARTLIST_FOREACH_END(circ); clean_cell_pool(); /* In case this helps. */ + buf_shrink_freelists(1); /* This is necessary to actually release buffer + chunks. */ log_notice(LD_GENERAL, "Removed "U64_FORMAT" bytes by killing %d circuits.", - U64_PRINTF_ARG(n_cells_removed * packed_cell_mem_cost()), + U64_PRINTF_ARG(mem_recovered), n_circuits_killed); smartlist_free(circlist); diff --git a/src/or/config.c b/src/or/config.c index d2981771c0..e5694e46eb 100644 --- a/src/or/config.c +++ b/src/or/config.c @@ -85,6 +85,7 @@ static config_abbrev_t option_abbrevs_[] = { { "DirFetchPostPeriod", "StatusFetchPeriod", 0, 0}, { "DirServer", "DirAuthority", 0, 0}, /* XXXX024 later, make this warn? */ { "MaxConn", "ConnLimit", 0, 1}, + { "MaxMemInCellQueues", "MaxMemInQueues", 0, 0}, { "ORBindAddress", "ORListenAddress", 0, 0}, { "DirBindAddress", "DirListenAddress", 0, 0}, { "SocksBindAddress", "SocksListenAddress", 0, 0}, @@ -306,7 +307,7 @@ static config_var_t option_vars_[] = { V(MaxAdvertisedBandwidth, MEMUNIT, "1 GB"), V(MaxCircuitDirtiness, INTERVAL, "10 minutes"), V(MaxClientCircuitsPending, UINT, "32"), - V(MaxMemInCellQueues, MEMUNIT, "8 GB"), + V(MaxMemInQueues, MEMUNIT, "8 GB"), OBSOLETE("MaxOnionsPending"), V(MaxOnionQueueDelay, MSEC_INTERVAL, "1750 msec"), V(MinMeasuredBWsForAuthToIgnoreAdvertised, INT, "500"), @@ -2754,10 +2755,10 @@ options_validate(or_options_t *old_options, or_options_t *options, REJECT("If EntryNodes is set, UseEntryGuards must be enabled."); } - if (options->MaxMemInCellQueues < (500 << 20)) { - log_warn(LD_CONFIG, "MaxMemInCellQueues must be at least 500 MB for now. " + if (options->MaxMemInQueues < (256 << 20)) { + log_warn(LD_CONFIG, "MaxMemInQueues must be at least 256 MB for now. " "Ideally, have it as large as you can afford."); - options->MaxMemInCellQueues = (500 << 20); + options->MaxMemInQueues = (256 << 20); } options->AllowInvalid_ = 0; diff --git a/src/or/or.h b/src/or/or.h index cc4e5ed9d8..4727c6ba04 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -2825,6 +2825,9 @@ typedef struct circuit_t { * more. */ int deliver_window; + /** Temporary field used during circuits_handle_oom. */ + uint32_t age_tmp; + /** For storage while n_chan is pending (state CIRCUIT_STATE_CHAN_WAIT). */ struct create_cell_t *n_chan_create_cell; @@ -3478,9 +3481,8 @@ typedef struct { config_line_t *DirPort_lines; config_line_t *DNSPort_lines; /**< Ports to listen on for DNS requests. */ - uint64_t MaxMemInCellQueues; /**< If we have more memory than this allocated - * for circuit cell queues, run the OOM handler - */ + uint64_t MaxMemInQueues; /**< If we have more memory than this allocated + * for queues and buffers, run the OOM handler */ /** @name port booleans * diff --git a/src/or/relay.c b/src/or/relay.c index 94016b49f9..041a9e8b5c 100644 --- a/src/or/relay.c +++ b/src/or/relay.c @@ -2205,7 +2205,8 @@ static int cell_queues_check_size(void) { size_t alloc = total_cells_allocated * packed_cell_mem_cost(); - if (alloc >= get_options()->MaxMemInCellQueues) { + alloc += buf_get_total_allocation(); + if (alloc >= get_options()->MaxMemInQueues) { circuits_handle_oom(alloc); return 1; } |