diff options
author | Nick Mathewson <nickm@torproject.org> | 2009-08-26 11:36:40 -0400 |
---|---|---|
committer | Nick Mathewson <nickm@torproject.org> | 2009-08-26 11:36:40 -0400 |
commit | 1d9b8a1e16b3007999a29879b01c63c7836e2e12 (patch) | |
tree | 6c901960facca0df950d04243f69bffe018da825 | |
parent | c9203749a2ead39433fa4f62452a2360e0901e42 (diff) | |
parent | 8c29b7920ae18a46ce0527806507275783d1ae42 (diff) | |
download | tor-1d9b8a1e16b3007999a29879b01c63c7836e2e12.tar.gz tor-1d9b8a1e16b3007999a29879b01c63c7836e2e12.zip |
Merge commit 'karsten/proposal-166-impl-master'
-rw-r--r-- | ChangeLog | 35 | ||||
-rw-r--r-- | configure.in | 28 | ||||
-rw-r--r-- | doc/spec/dir-spec.txt | 194 | ||||
-rw-r--r-- | doc/spec/proposals/000-index.txt | 4 | ||||
-rw-r--r-- | doc/spec/proposals/166-statistics-extra-info-docs.txt | 16 | ||||
-rw-r--r-- | doc/tor.1.in | 30 | ||||
-rw-r--r-- | src/or/circuitlist.c | 4 | ||||
-rw-r--r-- | src/or/config.c | 67 | ||||
-rw-r--r-- | src/or/connection.c | 8 | ||||
-rw-r--r-- | src/or/connection_edge.c | 9 | ||||
-rw-r--r-- | src/or/directory.c | 4 | ||||
-rw-r--r-- | src/or/geoip.c | 253 | ||||
-rw-r--r-- | src/or/main.c | 47 | ||||
-rw-r--r-- | src/or/or.h | 77 | ||||
-rw-r--r-- | src/or/relay.c | 94 | ||||
-rw-r--r-- | src/or/rephist.c | 274 | ||||
-rw-r--r-- | src/or/router.c | 147 | ||||
-rw-r--r-- | src/or/routerparse.c | 50 | ||||
-rw-r--r-- | src/or/test.c | 6 |
19 files changed, 903 insertions, 444 deletions
@@ -7,23 +7,24 @@ Changes in version 0.2.2.1-alpha - 2009-0?-?? Code by Christopher Davis. o New options for gathering stats safely: - - Directories that configure with --enable-dirreq-stats and set - "DirReqStatistics 1" write directory request stats to disk every - 24 hours. As compared to the --enable-geoip-stats flag in 0.2.1.x, - there are a few improvements: 1) stats are written to disk exactly - every 24 hours; 2) estimated shares of v2 and v3 requests are - determined as mean values, not at the end of a measurement period; - 3) unresolved requests are listed with country code '??'; - 4) directories also measure download times. - - Exit nodes that configure with --enable-exit-stats and set - "ExitPortStatistics 1" write statistics on the number of exit - streams and transferred bytes per port to disk every 24 hours. - - Relays that configure with --enable-buffer-stats and set - "CellStatistics 1" write statistics to disk every 24 hours on how - long cells spend in their circuit queues. - - Entry nodes that configure with --enable-entry-stats and set - "EntryStatistics 1" write statistics to disk every 24 hours on - the rough number and origins of connecting clients. + - Directories that set "DirReqStatistics 1" write statistics on + directory request to disk every 24 hours. As compared to the + --enable-geoip-stats flag in 0.2.1.x, there are a few improvements: + 1) stats are written to disk exactly every 24 hours; 2) estimated + shares of v2 and v3 requests are determined as mean values, not at + the end of a measurement period; 3) unresolved requests are listed + with country code '??'; 4) directories also measure download times. + - Exit nodes that set "ExitPortStatistics 1" write statistics on the + number of exit streams and transferred bytes per port to disk every + 24 hours. + - Relays that set "CellStatistics 1" write statistics on how long + cells spend in their circuit queues to disk every 24 hours. + - Entry nodes that set "EntryStatistics 1" write statistics on the + rough number and origins of connecting clients to disk every 24 + hours. + - Relays that write any of the above statistics to disk and set + "ExtraInfoStatistics 1" include the past 24 hours of statistics in + their extra-info documents. o Minor features: - New --digests command-line switch to output the digests of the diff --git a/configure.in b/configure.in index c21c130a05..1ac2cdb721 100644 --- a/configure.in +++ b/configure.in @@ -85,34 +85,6 @@ case $host in ;; esac -AC_ARG_ENABLE(exit-stats, - AS_HELP_STRING(--enable-exit-stats, enable code for exits to collect per-port statistics)) - -if test "$enable_exit_stats" = "yes"; then - AC_DEFINE(ENABLE_EXIT_STATS, 1, [Defined if we try to collect per-port statistics on exits]) -fi - -AC_ARG_ENABLE(dirreq-stats, - AS_HELP_STRING(--enable-dirreq-stats, enable code for directories to collect per-country statistics)) - -if test "$enable_dirreq_stats" = "yes"; then - AC_DEFINE(ENABLE_DIRREQ_STATS, 1, [Defined if we try to collect per-country statistics]) -fi - -AC_ARG_ENABLE(buffer-stats, - AS_HELP_STRING(--enable-buffer-stats, enable code for relays to collect buffer statistics)) - -if test "$enable_buffer_stats" = "yes"; then - AC_DEFINE(ENABLE_BUFFER_STATS, 1, [Defined if we try to collect buffer statistics]) -fi - -AC_ARG_ENABLE(entry-stats, - AS_HELP_STRING(--enable-entry-stats, enable code for entry guards to collect per-country statistics)) - -if test "$enable_entry_stats" = "yes"; then - AC_DEFINE(ENABLE_ENTRY_STATS, 1, [Defined if we try to collect per-country statistics]) -fi - AC_ARG_ENABLE(gcc-warnings, AS_HELP_STRING(--enable-gcc-warnings, enable verbose warnings)) diff --git a/doc/spec/dir-spec.txt b/doc/spec/dir-spec.txt index 4bec7b00d0..483a33b73c 100644 --- a/doc/spec/dir-spec.txt +++ b/doc/spec/dir-spec.txt @@ -641,6 +641,200 @@ "geoip-start" is the time at which we began collecting geoip statistics. + "dirreq-stats-end" YYYY-MM-DD HH:MM:SS (NSEC s) NL + [At most once.] + + YYYY-MM-DD HH:MM:SS defines the end of the included measurement + interval of length NSEC seconds (86400 seconds by default). + + A "dirreq-stats-end" line, as well as any other "dirreq-*" line, + is only added when the relay has opened its Dir port and after 24 + hours of measuring directory requests. + + "dirreq-v2-ips" CC=N,CC=N,... NL + [At most once.] + "dirreq-v3-ips" CC=N,CC=N,... NL + [At most once.] + + List of mappings from two-letter country codes to the number of + unique IP addresses that have connected from that country to + request a v2/v3 network status, rounded up to the nearest multiple + of 8. Only those IP addresses are counted that the directory can + answer with a 200 OK status code. + + "dirreq-v2-reqs" CC=N,CC=N,... NL + [At most once.] + "dirreq-v3-reqs" CC=N,CC=N,... NL + [At most once.] + + List of mappings from two-letter country codes to the number of + requests for v2/v3 network statuses from that country, rounded up + to the nearest multiple of 8. Only those requests are counted that + the directory can answer with a 200 OK status code. + + "dirreq-v2-share" num% NL + [At most once.] + "dirreq-v3-share" num% NL + [At most once.] + + The share of v2/v3 network status requests that the directory + expects to receive from clients based on its advertised bandwidth + compared to the overall network bandwidth capacity. Shares are + formatted in percent with two decimal places. Shares are + calculated as means over the whole 24-hour interval. + + "dirreq-v2-resp" status=num,... NL + [At most once.] + "dirreq-v3-resp" status=nul,... NL + [At most once.] + + List of mappings from response statuses to the number of requests + for v2/v3 network statuses that were answered with that response + status, rounded up to the nearest multiple of 4. Only response + statuses with at least 1 response are reported. New response + statuses can be added at any time. The current list of response + statuses is as follows: + + "ok": a network status request is answered; this number + corresponds to the sum of all requests as reported in + "dirreq-v2-reqs" or "dirreq-v3-reqs", respectively, before + rounding up. + "not-enough-sigs: a version 3 network status is not signed by a + sufficient number of requested authorities. + "unavailable": a requested network status object is unavailable. + "not-found": a requested network status is not found. + "not-modified": a network status has not been modified since the + If-Modified-Since time that is included in the request. + "busy": the directory is busy. + + "dirreq-v2-direct-dl" key=val,... NL + [At most once.] + "dirreq-v3-direct-dl" key=val,... NL + [At most once.] + "dirreq-v2-tunneled-dl" key=val,... NL + [At most once.] + "dirreq-v3-tunneled-dl" key=val,... NL + [At most once.] + + List of statistics about possible failures in the download process + of v2/v3 network statuses. Requests are either "direct" + HTTP-encoded requests over the relay's directory port, or + "tunneled" requests using a BEGIN_DIR cell over the relay's OR + port. The list of possible statistics can change, and statistics + can be left out from reporting. The current list of statistics is + as follows: + + Successful downloads and failures: + + "complete": a client has finished the download successfully. + "timeout": a download did not finish within 10 minutes after + starting to send the response. + "running": a download is still running at the end of the + measurement period for less than 10 minutes after starting to + send the response. + + Download times: + + "min", "max": smallest and largest measured bandwidth in B/s. + "d[1-4,6-9]": 1st to 4th and 6th to 9th decile of measured + bandwidth in B/s. For a given decile i, i/10 of all downloads + had a smaller bandwidth than di, and (10-i)/10 of all downloads + had a larger bandwidth than di. + "q[1,3]": 1st and 3rd quartile of measured bandwidth in B/s. One + fourth of all downloads had a smaller bandwidth than q1, one + fourth of all downloads had a larger bandwidth than q3, and the + remaining half of all downloads had a bandwidth between q1 and + q3. + "md": median of measured bandwidth in B/s. Half of the downloads + had a smaller bandwidth than md, the other half had a larger + bandwidth than md. + + "entry-stats-end" YYYY-MM-DD HH:MM:SS (NSEC s) NL + [At most once.] + + YYYY-MM-DD HH:MM:SS defines the end of the included measurement + interval of length NSEC seconds (86400 seconds by default). + + An "entry-stats-end" line, as well as any other "entry-*" + line, is first added after the relay has been running for at least + 24 hours. + + "entry-ips" CC=N,CC=N,... NL + [At most once.] + + List of mappings from two-letter country codes to the number of + unique IP addresses that have connected from that country to the + relay and which are no known other relays, rounded up to the + nearest multiple of 8. + + "cell-stats-end" YYYY-MM-DD HH:MM:SS (NSEC s) NL + [At most once.] + + YYYY-MM-DD HH:MM:SS defines the end of the included measurement + interval of length NSEC seconds (86400 seconds by default). + + A "cell-stats-end" line, as well as any other "cell-*" line, + is first added after the relay has been running for at least 24 + hours. + + "cell-processed-cells" num,...,num NL + [At most once.] + + Mean number of processed cells per circuit, subdivided into + deciles of circuits by the number of cells they have processed in + descending order from loudest to quietest circuits. + + "cell-queued-cells" num,...,num NL + [At most once.] + + Mean number of cells contained in queues by circuit decile. These + means are calculated by 1) determining the mean number of cells in + a single circuit between its creation and its termination and 2) + calculating the mean for all circuits in a given decile as + determined in "cell-processed-cells". Numbers have a precision of + two decimal places. + + "cell-time-in-queue" num,...,num NL + [At most once.] + + Mean time cells spend in circuit queues in milliseconds. Times are + calculated by 1) determining the mean time cells spend in the + queue of a single circuit and 2) calculating the mean for all + circuits in a given decile as determined in + "cell-processed-cells". + + "cell-circuits-per-decile" num NL + [At most once.] + + Mean number of circuits that are included in any of the deciles, + rounded up to the next integer. + + "exit-stats-end" YYYY-MM-DD HH:MM:SS (NSEC s) NL + [At most once.] + + YYYY-MM-DD HH:MM:SS defines the end of the included measurement + interval of length NSEC seconds (86400 seconds by default). + + An "exit-stats-end" line, as well as any other "exit-*" line, is + first added after the relay has been running for at least 24 hours + and only if the relay permits exiting (where exiting to a single + port and IP address is sufficient). + + "exit-kibibytes-written" port=N,port=N,... NL + [At most once.] + "exit-kibibytes-read" port=N,port=N,... NL + [At most once.] + + List of mappings from ports to the number of kibibytes that the + relay has written to or read from exit connections to that port, + rounded up to the next full kibibyte. + + "exit-streams-opened" port=N,port=N,... NL + [At most once.] + + List of mappings from ports to the number of opened exit streams + to that port, rounded up to the nearest multiple of 4. + "router-signature" NL Signature NL [At end, exactly once.] diff --git a/doc/spec/proposals/000-index.txt b/doc/spec/proposals/000-index.txt index af1f40bf6d..d2d3ca5d72 100644 --- a/doc/spec/proposals/000-index.txt +++ b/doc/spec/proposals/000-index.txt @@ -86,7 +86,7 @@ Proposals by number: 163 Detecting whether a connection comes from a client [OPEN] 164 Reporting the status of server votes [OPEN] 165 Easy migration for voting authority sets [OPEN] -166 Including Network Statistics in Extra-Info Documents [OPEN] +166 Including Network Statistics in Extra-Info Documents [ACCEPTED] Proposals by status: @@ -114,7 +114,6 @@ Proposals by status: 163 Detecting whether a connection comes from a client [for 0.2.2] 164 Reporting the status of server votes [for 0.2.2] 165 Easy migration for voting authority sets - 166 Including Network Statistics in Extra-Info Documents [for 0.2.2] ACCEPTED: 110 Avoiding infinite length circuits [for 0.2.1.x] [in 0.2.1.3-alpha] 117 IPv6 exits [for 0.2.1.x] @@ -122,6 +121,7 @@ Proposals by status: 140 Provide diffs between consensuses [for 0.2.2.x] 147 Eliminate the need for v2 directories in generating v3 directories [for 0.2.1.x] 157 Make certificate downloads specific [for 0.2.1.x] + 166 Including Network Statistics in Extra-Info Documents [for 0.2.2] META: 000 Index of Tor Proposals 001 The Tor Proposal Process diff --git a/doc/spec/proposals/166-statistics-extra-info-docs.txt b/doc/spec/proposals/166-statistics-extra-info-docs.txt index 3716c049cc..ab2716a71c 100644 --- a/doc/spec/proposals/166-statistics-extra-info-docs.txt +++ b/doc/spec/proposals/166-statistics-extra-info-docs.txt @@ -3,7 +3,7 @@ Title: Including Network Statistics in Extra-Info Documents Author: Karsten Loesing Created: 21-Jul-2009 Target: 0.2.2 -Status: Open +Status: Accepted Change history: @@ -298,7 +298,7 @@ Exit statistics: The last type of statistics affects exit nodes counting the number of bytes written and read and the number of streams opened per port and - per 24 hours. Exit port statistics can be measured from looking of + per 24 hours. Exit port statistics can be measured from looking at headers of BEGIN and DATA cells. A BEGIN cell contains the exit port that is required for the exit node to open a new exit stream. Subsequent DATA cells coming from the client or being sent back to the @@ -361,7 +361,7 @@ Implementation notes: basically means renaming keywords. 2. The timing of writing the four *-stats files should be unified, so - that they are written exactly after 24 hours after starting the + that they are written exactly 24 hours after starting the relay. Right now, the measurement intervals for dirreq, entry, and exit stats starts with the first observed request, and files are written when observing the first request that occurs more than 24 @@ -373,14 +373,14 @@ Implementation notes: directory until they are included in extra-info documents. The reason is that the 24-hour measurement interval can be very different from the 18-hour publication interval of extra-info - documents. When a relay crashed after finishing a measurement + documents. When a relay crashes after finishing a measurement interval, but before publishing the next extra-info document, statistics would get lost. Therefore, statistics are written to disk when finishing a measurement interval and read from disk when - generating an extra-info document. As a result, the *-stats files - need to be overwritten after 24 hours, rather than appending new - statistics to them. Further, the contents of the *-stats files need - to be checked in the process of generating extra-info documents. + generating an extra-info document. Only the statistics that were + appended to the *-stats files within the past 24 hours are included + in extra-info documents. Further, the contents of the *-stats files + need to be checked in the process of generating extra-info documents. 4. With the statistics patches being tested, the ./configure options should be removed and the statistics code be compiled by default. diff --git a/doc/tor.1.in b/doc/tor.1.in index 9386b90a14..ba703079c8 100644 --- a/doc/tor.1.in +++ b/doc/tor.1.in @@ -1075,6 +1075,36 @@ behalf of clients. .TP \fBGeoIPFile \fR\fIfilename\fP A filename containing GeoIP data, for use with BridgeRecordUsageByCountry. +.LP +.TP +\fBCellStatistics \fR\fB0\fR|\fB1\fR\fP +When this option is enabled, Tor writes statistics on the mean time that +cells spend in circuit queues to disk every 24 hours. Cannot be changed +while Tor is running. (Default: 0) +.LP +.TP +\fBDirReqStatistics \fR\fB0\fR|\fB1\fR\fP +When this option is enabled, Tor writes statistics on the number and +response time of network status requests to disk every 24 hours. Cannot be +changed while Tor is running. (Default: 0) +.LP +.TP +\fBEntryStatistics \fR\fB0\fR|\fB1\fR\fP +When this option is enabled, Tor writes statistics on the number of +directly connecting clients to disk every 24 hours. Cannot be changed +while Tor is running. (Default: 0) +.LP +.TP +\fBExitPortStatistics \fR\fB0\fR|\fB1\fR\fP +When this option is enabled, Tor writes statistics on the number of +relayed bytes and opened stream per exit port to disk every 24 hours. +Cannot be changed while Tor is running. (Default: 0) +.LP +.TP +\fBExtraInfoStatistics \fR\fB0\fR|\fB1\fR\fP +When this option is enabled, Tor includes previously gathered statistics +in its extra-info documents that it uploads to the directory authorities. +(Default: 0) .SH DIRECTORY SERVER OPTIONS .PP diff --git a/src/or/circuitlist.c b/src/or/circuitlist.c index 5a20e7ebde..065559620c 100644 --- a/src/or/circuitlist.c +++ b/src/or/circuitlist.c @@ -447,11 +447,9 @@ circuit_free(circuit_t *circ) rend_data_free(ocirc->rend_data); } else { or_circuit_t *ocirc = TO_OR_CIRCUIT(circ); -#ifdef ENABLE_BUFFER_STATS /* Remember cell statistics for this circuit before deallocating. */ if (get_options()->CellStatistics) - add_circ_to_buffer_stats(circ, time(NULL)); -#endif + rep_hist_buffer_stats_add_circ(circ, time(NULL)); mem = ocirc; memlen = sizeof(or_circuit_t); tor_assert(circ->magic == OR_CIRCUIT_MAGIC); diff --git a/src/or/config.c b/src/or/config.c index 7944aa1001..b7317c75d3 100644 --- a/src/or/config.c +++ b/src/or/config.c @@ -188,12 +188,10 @@ static config_var_t _option_vars[] = { V(DirPort, UINT, "0"), V(DirPortFrontPage, FILENAME, NULL), OBSOLETE("DirPostPeriod"), -#ifdef ENABLE_DIRREQ_STATS OBSOLETE("DirRecordUsageByCountry"), OBSOLETE("DirRecordUsageGranularity"), OBSOLETE("DirRecordUsageRetainIPs"), OBSOLETE("DirRecordUsageSaveInterval"), -#endif V(DirReqStatistics, BOOL, "0"), VAR("DirServer", LINELIST, DirServers, NULL), V(DNSPort, UINT, "0"), @@ -210,6 +208,7 @@ static config_var_t _option_vars[] = { V(ExitPolicy, LINELIST, NULL), V(ExitPolicyRejectPrivate, BOOL, "1"), V(ExitPortStatistics, BOOL, "0"), + V(ExtraInfoStatistics, BOOL, "0"), V(FallbackNetworkstatusFile, FILENAME, SHARE_DATADIR PATH_SEPARATOR "tor" PATH_SEPARATOR "fallback-consensus"), V(FascistFirewall, BOOL, "0"), @@ -1413,47 +1412,13 @@ options_act(or_options_t *old_options) tor_free(actual_fname); } - if (options->DirReqStatistics) { -#ifdef ENABLE_DIRREQ_STATS + if (options->DirReqStatistics && !geoip_is_loaded()) { /* Check if GeoIP database could be loaded. */ - if (!geoip_is_loaded()) { - log_warn(LD_CONFIG, "Configured to measure directory request " - "statistics, but no GeoIP database found!"); - return -1; - } - log_notice(LD_CONFIG, "Configured to count directory requests by " - "country and write aggregate statistics to disk. Check the " - "dirreq-stats file in your data directory that will first " - "be written in 24 hours from now."); -#else - log_warn(LD_CONFIG, "DirReqStatistics enabled, but Tor was built " - "without support for directory request statistics."); -#endif + log_warn(LD_CONFIG, "Configured to measure directory request " + "statistics, but no GeoIP database found!"); + return -1; } -#ifdef ENABLE_EXIT_STATS - if (options->ExitPortStatistics) - log_notice(LD_CONFIG, "Configured to measure exit port statistics. " - "Look for the exit-stats file that will first be written to " - "the data directory in 24 hours from now."); -#else - if (options->ExitPortStatistics) - log_warn(LD_CONFIG, "ExitPortStatistics enabled, but Tor was built " - "without port statistics support."); -#endif - -#ifdef ENABLE_BUFFER_STATS - if (options->CellStatistics) - log_notice(LD_CONFIG, "Configured to measure cell statistics. Look " - "for the buffer-stats file that will first be written to " - "the data directory in 24 hours from now."); -#else - if (options->CellStatistics) - log_warn(LD_CONFIG, "CellStatistics enabled, but Tor was built " - "without cell statistics support."); -#endif - -#ifdef ENABLE_ENTRY_STATS if (options->EntryStatistics) { if (should_record_bridge_info(options)) { /* Don't allow measuring statistics on entry guards when configured @@ -1466,17 +1431,9 @@ options_act(or_options_t *old_options) log_warn(LD_CONFIG, "Configured to measure entry node statistics, " "but no GeoIP database found!"); return -1; - } else - log_notice(LD_CONFIG, "Configured to measure entry node " - "statistics. Look for the entry-stats file that will " - "first be written to the data directory in 24 hours " - "from now."); + } } -#else - if (options->EntryStatistics) - log_warn(LD_CONFIG, "EntryStatistics enabled, but Tor was built " - "without entry node statistics support."); -#endif + /* Check if we need to parse and add the EntryNodes config option. */ if (options->EntryNodes && (!old_options || @@ -3861,6 +3818,16 @@ options_transition_allowed(or_options_t *old, or_options_t *new_val, return -1; } + if (old->CellStatistics != new_val->CellStatistics || + old->DirReqStatistics != new_val->DirReqStatistics || + old->EntryStatistics != new_val->EntryStatistics || + old->ExitPortStatistics != new_val->ExitPortStatistics) { + *msg = tor_strdup("While Tor is running, changing either " + "CellStatistics, DirReqStatistics, EntryStatistics, " + "or ExitPortStatistics is not allowed."); + return -1; + } + return 0; } diff --git a/src/or/connection.c b/src/or/connection.c index fac56593b7..49c94c111e 100644 --- a/src/or/connection.c +++ b/src/or/connection.c @@ -2054,12 +2054,12 @@ connection_buckets_decrement(connection_t *conn, time_t now, if (num_read > 0) { if (conn->type == CONN_TYPE_EXIT) - rep_hist_note_exit_bytes_read(conn->port, num_read, now); + rep_hist_note_exit_bytes_read(conn->port, num_read); rep_hist_note_bytes_read(num_read, now); } if (num_written > 0) { if (conn->type == CONN_TYPE_EXIT) - rep_hist_note_exit_bytes_written(conn->port, num_written, now); + rep_hist_note_exit_bytes_written(conn->port, num_written); rep_hist_note_bytes_written(num_written, now); } @@ -2652,13 +2652,13 @@ connection_handle_write(connection_t *conn, int force) /* else open, or closing */ result = flush_buf_tls(or_conn->tls, conn->outbuf, max_to_write, &conn->outbuf_flushlen); -#ifdef ENABLE_DIRREQ_STATS + /* If we just flushed the last bytes, check if this tunneled dir * request is done. */ if (buf_datalen(conn->outbuf) == 0 && conn->dirreq_id) geoip_change_dirreq_state(conn->dirreq_id, DIRREQ_TUNNELED, DIRREQ_OR_CONN_BUFFER_FLUSHED); -#endif + switch (result) { CASE_TOR_TLS_ERROR_ANY: case TOR_TLS_CLOSE: diff --git a/src/or/connection_edge.c b/src/or/connection_edge.c index 04fffd9f57..f25202725e 100644 --- a/src/or/connection_edge.c +++ b/src/or/connection_edge.c @@ -333,7 +333,7 @@ connection_edge_finished_connecting(edge_connection_t *edge_conn) escaped_safe_str(conn->address),conn->port, safe_str(fmt_addr(&conn->addr))); - rep_hist_note_exit_stream_opened(conn->port, approx_time()); + rep_hist_note_exit_stream_opened(conn->port); conn->state = EXIT_CONN_STATE_OPEN; connection_watch_events(conn, READ_EVENT); /* stop writing, keep reading */ @@ -2544,11 +2544,11 @@ connection_exit_begin_conn(cell_t *cell, circuit_t *circ) log_debug(LD_EXIT,"Creating new exit connection."); n_stream = edge_connection_new(CONN_TYPE_EXIT, AF_INET); -#ifdef ENABLE_DIRREQ_STATS + /* Remember the tunneled request ID in the new edge connection, so that * we can measure download times. */ TO_CONN(n_stream)->dirreq_id = circ->dirreq_id; -#endif + n_stream->_base.purpose = EXIT_PURPOSE_CONNECT; n_stream->stream_id = rh.stream_id; @@ -2785,11 +2785,10 @@ connection_exit_connect_dir(edge_connection_t *exitconn) dirconn->_base.purpose = DIR_PURPOSE_SERVER; dirconn->_base.state = DIR_CONN_STATE_SERVER_COMMAND_WAIT; -#ifdef ENABLE_DIRREQ_STATS /* Note that the new dir conn belongs to the same tunneled request as * the edge conn, so that we can measure download times. */ TO_CONN(dirconn)->dirreq_id = TO_CONN(exitconn)->dirreq_id; -#endif + connection_link_connections(TO_CONN(dirconn), TO_CONN(exitconn)); if (connection_add(TO_CONN(exitconn))<0) { diff --git a/src/or/directory.c b/src/or/directory.c index 3a72b94327..d37da18e65 100644 --- a/src/or/directory.c +++ b/src/or/directory.c @@ -2573,7 +2573,6 @@ directory_handle_command_get(dir_connection_t *conn, const char *headers, goto done; } -#ifdef ENABLE_DIRREQ_STATS { struct in_addr in; if (tor_inet_aton((TO_CONN(conn))->address, &in)) { @@ -2589,7 +2588,6 @@ directory_handle_command_get(dir_connection_t *conn, const char *headers, DIRREQ_DIRECT); } } -#endif // note_request(request_type,dlen); (void) request_type; @@ -3221,7 +3219,6 @@ connection_dir_finished_flushing(dir_connection_t *conn) tor_assert(conn); tor_assert(conn->_base.type == CONN_TYPE_DIR); -#ifdef ENABLE_DIRREQ_STATS /* Note that we have finished writing the directory response. For direct * connections this means we're done, for tunneled connections its only * an intermediate step. */ @@ -3232,7 +3229,6 @@ connection_dir_finished_flushing(dir_connection_t *conn) geoip_change_dirreq_state(TO_CONN(conn)->global_identifier, DIRREQ_DIRECT, DIRREQ_FLUSHING_DIR_CONN_FINISHED); -#endif switch (conn->_base.state) { case DIR_CONN_STATE_CLIENT_SENDING: log_debug(LD_DIR,"client finished sending command."); diff --git a/src/or/geoip.c b/src/or/geoip.c index c61ac3c5fd..befc3d9e06 100644 --- a/src/or/geoip.c +++ b/src/or/geoip.c @@ -12,8 +12,6 @@ #include "ht.h" static void clear_geoip_db(void); -static void dump_geoip_stats(void); -static void dump_entry_stats(void); /** An entry from the GeoIP file: maps an IP range to a country. */ typedef struct geoip_entry_t { @@ -347,7 +345,6 @@ geoip_determine_shares(time_t now) last_time_determined_shares = now; } -#ifdef ENABLE_DIRREQ_STATS /** Calculate which fraction of v2 and v3 directory requests aimed at caches * have been sent to us since the last call of this function up to time * <b>now</b>. Set *<b>v2_share_out</b> and *<b>v3_share_out</b> to the @@ -367,7 +364,23 @@ geoip_get_mean_shares(time_t now, double *v2_share_out, share_seconds = 0; return 0; } -#endif + +/* Rotate period of v2 and v3 network status requests. */ +static void +rotate_request_period(void) +{ + SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, { + memmove(&c->n_v2_ns_requests[0], &c->n_v2_ns_requests[1], + sizeof(uint32_t)*(REQUEST_HIST_LEN-1)); + memmove(&c->n_v3_ns_requests[0], &c->n_v3_ns_requests[1], + sizeof(uint32_t)*(REQUEST_HIST_LEN-1)); + c->n_v2_ns_requests[REQUEST_HIST_LEN-1] = 0; + c->n_v3_ns_requests[REQUEST_HIST_LEN-1] = 0; + }); + current_request_period_starts += REQUEST_HIST_PERIOD; + if (n_old_request_periods < REQUEST_HIST_LEN-1) + ++n_old_request_periods; +} /** Note that we've seen a client connect from the IP <b>addr</b> (host order) * at time <b>now</b>. Ignored by all but bridges and directories if @@ -379,55 +392,37 @@ geoip_note_client_seen(geoip_client_action_t action, or_options_t *options = get_options(); clientmap_entry_t lookup, *ent; if (action == GEOIP_CLIENT_CONNECT) { -#ifdef ENABLE_ENTRY_STATS - if (!options->EntryStatistics) + /* Only remember statistics as entry guard or as bridge. */ + if (!options->EntryStatistics || + (!(options->BridgeRelay && options->BridgeRecordUsageByCountry))) return; -#else - if (!(options->BridgeRelay && options->BridgeRecordUsageByCountry)) - return; -#endif /* Did we recently switch from bridge to relay or back? */ if (client_history_starts > now) return; } else { -#ifndef ENABLE_DIRREQ_STATS - return; -#else if (options->BridgeRelay || options->BridgeAuthoritativeDir || !options->DirReqStatistics) return; -#endif } - /* Rotate the current request period. */ - while (current_request_period_starts + REQUEST_HIST_PERIOD < now) { - if (!geoip_countries) - geoip_countries = smartlist_create(); - if (!current_request_period_starts) { - current_request_period_starts = now; - break; + /* As a bridge that doesn't rotate request periods every 24 hours, + * possibly rotate now. */ + if (options->BridgeRelay) { + while (current_request_period_starts + REQUEST_HIST_PERIOD < now) { + if (!geoip_countries) + geoip_countries = smartlist_create(); + if (!current_request_period_starts) { + current_request_period_starts = now; + break; + } + /* Also discard all items in the client history that are too old. + * (This only works here because bridge and directory stats are + * independent. Otherwise, we'd only want to discard those items + * with action GEOIP_CLIENT_NETWORKSTATUS{_V2}.) */ + geoip_remove_old_clients(current_request_period_starts); + /* Now rotate request period */ + rotate_request_period(); } - /* Also discard all items in the client history that are too old. - * (This only works here because bridge and directory stats are - * independent. Otherwise, we'd only want to discard those items - * with action GEOIP_CLIENT_NETWORKSTATUS{_V2}.) */ - geoip_remove_old_clients(current_request_period_starts); - /* Before rotating, write the current stats to disk. */ - dump_geoip_stats(); - if (get_options()->EntryStatistics) - dump_entry_stats(); - /* Now rotate request period */ - SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, { - memmove(&c->n_v2_ns_requests[0], &c->n_v2_ns_requests[1], - sizeof(uint32_t)*(REQUEST_HIST_LEN-1)); - memmove(&c->n_v3_ns_requests[0], &c->n_v3_ns_requests[1], - sizeof(uint32_t)*(REQUEST_HIST_LEN-1)); - c->n_v2_ns_requests[REQUEST_HIST_LEN-1] = 0; - c->n_v3_ns_requests[REQUEST_HIST_LEN-1] = 0; - }); - current_request_period_starts += REQUEST_HIST_PERIOD; - if (n_old_request_periods < REQUEST_HIST_LEN-1) - ++n_old_request_periods; } lookup.ipaddr = addr; @@ -495,7 +490,6 @@ geoip_remove_old_clients(time_t cutoff) client_history_starts = cutoff; } -#ifdef ENABLE_DIRREQ_STATS /** How many responses are we giving to clients requesting v2 network * statuses? */ static uint32_t ns_v2_responses[GEOIP_NS_RESPONSE_NUM]; @@ -503,7 +497,6 @@ static uint32_t ns_v2_responses[GEOIP_NS_RESPONSE_NUM]; /** How many responses are we giving to clients requesting v3 network * statuses? */ static uint32_t ns_v3_responses[GEOIP_NS_RESPONSE_NUM]; -#endif /** Note that we've rejected a client's request for a v2 or v3 network * status, encoded in <b>action</b> for reason <b>reason</b> at time @@ -512,7 +505,6 @@ void geoip_note_ns_response(geoip_client_action_t action, geoip_ns_response_t response) { -#ifdef ENABLE_DIRREQ_STATS static int arrays_initialized = 0; if (!get_options()->DirReqStatistics) return; @@ -528,10 +520,6 @@ geoip_note_ns_response(geoip_client_action_t action, ns_v3_responses[response]++; else ns_v2_responses[response]++; -#else - (void) action; - (void) response; -#endif } /** Do not mention any country from which fewer than this number of IPs have @@ -709,7 +697,6 @@ geoip_change_dirreq_state(uint64_t dirreq_id, dirreq_type_t type, } } -#ifdef ENABLE_DIRREQ_STATS /** Return a newly allocated comma-separated string containing statistics * on network status downloads. The string contains the number of completed * requests, timeouts, and still running requests as well as the download @@ -811,25 +798,18 @@ geoip_get_dirreq_history(geoip_client_action_t action, smartlist_free(dirreq_completed); return result; } -#endif /** How long do we have to have observed per-country request history before we * are willing to talk about it? */ #define GEOIP_MIN_OBSERVATION_TIME (12*60*60) -/** Return a newly allocated comma-separated string containing entries for all - * the countries from which we've seen enough clients connect. The entry - * format is cc=num where num is the number of IPs we've seen connecting from - * that country, and cc is a lowercased country code. Returns NULL if we don't - * want to export geoip data yet. */ -char * -geoip_get_client_history(time_t now, geoip_client_action_t action) +/** Helper for geoip_get_client_history_dirreq() and + * geoip_get_client_history_bridge(). */ +static char * +geoip_get_client_history(time_t now, geoip_client_action_t action, + int min_observation_time, unsigned granularity) { char *result = NULL; - int min_observation_time = GEOIP_MIN_OBSERVATION_TIME; -#ifdef ENABLE_DIRREQ_STATS - min_observation_time = DIR_RECORD_USAGE_MIN_OBSERVATION_TIME; -#endif if (!geoip_is_loaded()) return NULL; if (client_history_starts < (now - min_observation_time)) { @@ -841,10 +821,6 @@ geoip_get_client_history(time_t now, geoip_client_action_t action) clientmap_entry_t **ent; unsigned *counts = tor_malloc_zero(sizeof(unsigned)*n_countries); unsigned total = 0; - unsigned granularity = IP_GRANULARITY; -#ifdef ENABLE_DIRREQ_STATS - granularity = DIR_RECORD_USAGE_GRANULARITY; -#endif HT_FOREACH(ent, clientmap, &client_history) { int country; if ((*ent)->action != (int)action) @@ -900,6 +876,34 @@ geoip_get_client_history(time_t now, geoip_client_action_t action) return result; } +/** Return a newly allocated comma-separated string containing entries for + * all the countries from which we've seen enough clients connect as a + * directory. The entry format is cc=num where num is the number of IPs + * we've seen connecting from that country, and cc is a lowercased country + * code. Returns NULL if we don't want to export geoip data yet. */ +char * +geoip_get_client_history_dirreq(time_t now, + geoip_client_action_t action) +{ + return geoip_get_client_history(now, action, + DIR_RECORD_USAGE_MIN_OBSERVATION_TIME, + DIR_RECORD_USAGE_GRANULARITY); +} + +/** Return a newly allocated comma-separated string containing entries for + * all the countries from which we've seen enough clients connect as a + * bridge. The entry format is cc=num where num is the number of IPs + * we've seen connecting from that country, and cc is a lowercased country + * code. Returns NULL if we don't want to export geoip data yet. */ +char * +geoip_get_client_history_bridge(time_t now, + geoip_client_action_t action) +{ + return geoip_get_client_history(now, action, + GEOIP_MIN_OBSERVATION_TIME, + IP_GRANULARITY); +} + /** Return a newly allocated string holding the per-country request history * for <b>action</b> in a format suitable for an extra-info document, or NULL * on failure. */ @@ -910,10 +914,6 @@ geoip_get_request_history(time_t now, geoip_client_action_t action) char *result; unsigned granularity = IP_GRANULARITY; int min_observation_time = GEOIP_MIN_OBSERVATION_TIME; -#ifdef ENABLE_DIRREQ_STATS - granularity = DIR_RECORD_USAGE_GRANULARITY; - min_observation_time = DIR_RECORD_USAGE_MIN_OBSERVATION_TIME; -#endif if (client_history_starts >= (now - min_observation_time)) return NULL; @@ -955,16 +955,23 @@ geoip_get_request_history(time_t now, geoip_client_action_t action) return result; } -/** Store all our geoip statistics into $DATADIR/dirreq-stats. */ -static void -dump_geoip_stats(void) +/** Start time of directory request stats. */ +static time_t start_of_dirreq_stats_interval; + +/** Initialize directory request stats. */ +void +geoip_dirreq_stats_init(time_t now) +{ + start_of_dirreq_stats_interval = now; +} + +/** Write dirreq statistics to $DATADIR/stats/dirreq-stats. */ +void +geoip_dirreq_stats_write(time_t now) { -#ifdef ENABLE_DIRREQ_STATS - time_t now = time(NULL); - time_t request_start; - char *filename = get_datadir_fname("dirreq-stats"); + char *statsdir = NULL, *filename = NULL; char *data_v2 = NULL, *data_v3 = NULL; - char since[ISO_TIME_LEN+1], written[ISO_TIME_LEN+1]; + char written[ISO_TIME_LEN+1]; open_file_t *open_file = NULL; double v2_share = 0.0, v3_share = 0.0; FILE *out; @@ -973,28 +980,33 @@ dump_geoip_stats(void) if (!get_options()->DirReqStatistics) goto done; - data_v2 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS_V2); - data_v3 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS); - format_iso_time(since, geoip_get_history_start()); + /* Discard all items in the client history that are too old. */ + geoip_remove_old_clients(start_of_dirreq_stats_interval); + + statsdir = get_datadir_fname("stats"); + if (check_private_dir(statsdir, CPD_CREATE) < 0) + goto done; + filename = get_datadir_fname("stats"PATH_SEPARATOR"dirreq-stats"); + data_v2 = geoip_get_client_history_dirreq(now, + GEOIP_CLIENT_NETWORKSTATUS_V2); + data_v3 = geoip_get_client_history_dirreq(now, + GEOIP_CLIENT_NETWORKSTATUS); format_iso_time(written, now); out = start_writing_to_stdio_file(filename, OPEN_FLAGS_APPEND, 0600, &open_file); if (!out) goto done; - if (fprintf(out, "written %s\nstarted-at %s\nns-ips %s\nns-v2-ips %s\n", - written, since, + if (fprintf(out, "dirreq-stats-end %s (%d s)\ndirreq-v3-ips %s\n" + "dirreq-v2-ips %s\n", written, + (unsigned) (now - start_of_dirreq_stats_interval), data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0) goto done; tor_free(data_v2); tor_free(data_v3); - request_start = current_request_period_starts - - (n_old_request_periods * REQUEST_HIST_PERIOD); - format_iso_time(since, request_start); data_v2 = geoip_get_request_history(now, GEOIP_CLIENT_NETWORKSTATUS_V2); data_v3 = geoip_get_request_history(now, GEOIP_CLIENT_NETWORKSTATUS); - if (fprintf(out, "requests-start %s\nn-ns-reqs %s\nn-v2-ns-reqs %s\n", - since, + if (fprintf(out, "dirreq-v3-reqs %s\ndirreq-v2-reqs %s\n", data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0) goto done; #define RESPONSE_GRANULARITY 8 @@ -1005,7 +1017,7 @@ dump_geoip_stats(void) ns_v3_responses[i], RESPONSE_GRANULARITY); } #undef RESPONSE_GRANULARITY - if (fprintf(out, "n-ns-resp ok=%u,not-enough-sigs=%u,unavailable=%u," + if (fprintf(out, "dirreq-v3-resp ok=%u,not-enough-sigs=%u,unavailable=%u," "not-found=%u,not-modified=%u,busy=%u\n", ns_v3_responses[GEOIP_SUCCESS], ns_v3_responses[GEOIP_REJECT_NOT_ENOUGH_SIGS], @@ -1014,7 +1026,7 @@ dump_geoip_stats(void) ns_v3_responses[GEOIP_REJECT_NOT_MODIFIED], ns_v3_responses[GEOIP_REJECT_BUSY]) < 0) goto done; - if (fprintf(out, "n-v2-ns-resp ok=%u,unavailable=%u," + if (fprintf(out, "dirreq-v2-resp ok=%u,unavailable=%u," "not-found=%u,not-modified=%u,busy=%u\n", ns_v2_responses[GEOIP_SUCCESS], ns_v2_responses[GEOIP_REJECT_UNAVAILABLE], @@ -1025,9 +1037,9 @@ dump_geoip_stats(void) memset(ns_v2_responses, 0, sizeof(ns_v2_responses)); memset(ns_v3_responses, 0, sizeof(ns_v3_responses)); if (!geoip_get_mean_shares(now, &v2_share, &v3_share)) { - if (fprintf(out, "v2-ns-share %0.2lf%%\n", v2_share*100) < 0) + if (fprintf(out, "dirreq-v2-share %0.2lf%%\n", v2_share*100) < 0) goto done; - if (fprintf(out, "v3-ns-share %0.2lf%%\n", v3_share*100) < 0) + if (fprintf(out, "dirreq-v3-share %0.2lf%%\n", v3_share*100) < 0) goto done; } @@ -1035,7 +1047,7 @@ dump_geoip_stats(void) DIRREQ_DIRECT); data_v3 = geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS, DIRREQ_DIRECT); - if (fprintf(out, "ns-direct-dl %s\nns-v2-direct-dl %s\n", + if (fprintf(out, "dirreq-v3-direct-dl %s\ndirreq-v2-direct-dl %s\n", data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0) goto done; tor_free(data_v2); @@ -1044,53 +1056,78 @@ dump_geoip_stats(void) DIRREQ_TUNNELED); data_v3 = geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS, DIRREQ_TUNNELED); - if (fprintf(out, "ns-tunneled-dl %s\nns-v2-tunneled-dl %s\n", + if (fprintf(out, "dirreq-v3-tunneled-dl %s\ndirreq-v2-tunneled-dl %s\n", data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0) goto done; finish_writing_to_file(open_file); open_file = NULL; + + /* Rotate request period */ + rotate_request_period(); + + start_of_dirreq_stats_interval = now; + done: if (open_file) abort_writing_to_file(open_file); tor_free(filename); + tor_free(statsdir); tor_free(data_v2); tor_free(data_v3); -#endif } -/** Store all our geoip statistics as entry guards into - * $DATADIR/entry-stats. */ -static void -dump_entry_stats(void) +/** Start time of entry stats. */ +static time_t start_of_entry_stats_interval; + +/** Initialize entry stats. */ +void +geoip_entry_stats_init(time_t now) +{ + start_of_entry_stats_interval = now; +} + +/** Write entry statistics to $DATADIR/stats/entry-stats. */ +void +geoip_entry_stats_write(time_t now) { -#ifdef ENABLE_ENTRY_STATS - time_t now = time(NULL); - char *filename = get_datadir_fname("entry-stats"); + char *statsdir = NULL, *filename = NULL; char *data = NULL; - char since[ISO_TIME_LEN+1], written[ISO_TIME_LEN+1]; + char written[ISO_TIME_LEN+1]; open_file_t *open_file = NULL; FILE *out; - data = geoip_get_client_history(now, GEOIP_CLIENT_CONNECT); - format_iso_time(since, geoip_get_history_start()); + if (!get_options()->EntryStatistics) + goto done; + + /* Discard all items in the client history that are too old. */ + geoip_remove_old_clients(start_of_entry_stats_interval); + + statsdir = get_datadir_fname("stats"); + if (check_private_dir(statsdir, CPD_CREATE) < 0) + goto done; + filename = get_datadir_fname("stats"PATH_SEPARATOR"entry-stats"); + data = geoip_get_client_history_dirreq(now, GEOIP_CLIENT_CONNECT); format_iso_time(written, now); out = start_writing_to_stdio_file(filename, OPEN_FLAGS_APPEND, 0600, &open_file); if (!out) goto done; - if (fprintf(out, "written %s\nstarted-at %s\nips %s\n", - written, since, data ? data : "") < 0) + if (fprintf(out, "entry-stats-end %s (%u s)\nentry-ips %s\n", + written, (unsigned) (now - start_of_entry_stats_interval), + data ? data : "") < 0) goto done; + start_of_entry_stats_interval = now; + finish_writing_to_file(open_file); open_file = NULL; done: if (open_file) abort_writing_to_file(open_file); tor_free(filename); + tor_free(statsdir); tor_free(data); -#endif } /** Helper used to implement GETINFO ip-to-country/... controller command. */ diff --git a/src/or/main.c b/src/or/main.c index 52cc0282b3..5701727ad1 100644 --- a/src/or/main.c +++ b/src/or/main.c @@ -830,9 +830,7 @@ run_scheduled_events(time_t now) static time_t time_to_clean_caches = 0; static time_t time_to_recheck_bandwidth = 0; static time_t time_to_check_for_expired_networkstatus = 0; -#ifdef ENABLE_BUFFER_STATS - static time_t time_to_dump_buffer_stats = 0; -#endif + static time_t time_to_write_stats_files = 0; static time_t time_to_retry_dns_init = 0; or_options_t *options = get_options(); int i; @@ -960,13 +958,44 @@ run_scheduled_events(time_t now) time_to_check_for_expired_networkstatus = now + CHECK_EXPIRED_NS_INTERVAL; } -#ifdef ENABLE_BUFFER_STATS - if (time_to_dump_buffer_stats < now) { - if (get_options()->CellStatistics && time_to_dump_buffer_stats) - dump_buffer_stats(); - time_to_dump_buffer_stats = now + DUMP_BUFFER_STATS_INTERVAL; + /* 1g. Check whether we should write statistics to disk. + */ + if (time_to_write_stats_files >= 0 && time_to_write_stats_files < now) { +#define WRITE_STATS_INTERVAL (24*60*60) + if (options->CellStatistics || options->DirReqStatistics || + options->EntryStatistics || options->ExitPortStatistics) { + if (!time_to_write_stats_files) { + /* Initialize stats. */ + if (options->CellStatistics) + rep_hist_buffer_stats_init(now); + if (options->DirReqStatistics) + geoip_dirreq_stats_init(now); + if (options->EntryStatistics) + geoip_entry_stats_init(now); + if (options->ExitPortStatistics) + rep_hist_exit_stats_init(now); + log_notice(LD_CONFIG, "Configured to measure statistics. Look for " + "the *-stats files that will first be written to the " + "data directory in %d hours from now.", + WRITE_STATS_INTERVAL / (60 * 60)); + time_to_write_stats_files = now + WRITE_STATS_INTERVAL; + } else { + /* Write stats to disk. */ + time_to_write_stats_files += WRITE_STATS_INTERVAL; + if (options->CellStatistics) + rep_hist_buffer_stats_write(time_to_write_stats_files); + if (options->DirReqStatistics) + geoip_dirreq_stats_write(time_to_write_stats_files); + if (options->EntryStatistics) + geoip_entry_stats_write(time_to_write_stats_files); + if (options->ExitPortStatistics) + rep_hist_exit_stats_write(time_to_write_stats_files); + } + } else { + /* Never write stats to disk */ + time_to_write_stats_files = -1; + } } -#endif /* Remove old information from rephist and the rend cache. */ if (time_to_clean_caches < now) { diff --git a/src/or/or.h b/src/or/or.h index 1aa40af61b..aaae9053ee 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -20,12 +20,6 @@ #ifndef INSTRUMENT_DOWNLOADS #define INSTRUMENT_DOWNLOADS 1 #endif -#ifndef ENABLE_DIRREQ_STATS -#define ENABLE_DIRREQ_STATS 1 -#endif -#ifndef ENABLE_BUFFER_STATS -#define ENABLE_BUFFER_STATS 1 -#endif #endif #ifdef MS_WINDOWS @@ -854,17 +848,30 @@ typedef struct var_cell_t { typedef struct packed_cell_t { struct packed_cell_t *next; /**< Next cell queued on this circuit. */ char body[CELL_NETWORK_SIZE]; /**< Cell as packed for network. */ -#ifdef ENABLE_BUFFER_STATS - struct timeval packed_timeval; /**< When was this cell packed? */ -#endif } packed_cell_t; +/** Number of cells added to a circuit queue including their insertion + * time on 10 millisecond detail; used for buffer statistics. */ +typedef struct insertion_time_elem_t { + struct insertion_time_elem_t *next; /**< Next element in queue. */ + uint32_t insertion_time; /**< When were cells inserted (in 10 ms steps + * starting at 0:00 of the current day)? */ + unsigned counter; /**< How many cells were inserted? */ +} insertion_time_elem_t; + +/** Queue of insertion times. */ +typedef struct insertion_time_queue_t { + struct insertion_time_elem_t *first; /**< First element in queue. */ + struct insertion_time_elem_t *last; /**< Last element in queue. */ +} insertion_time_queue_t; + /** A queue of cells on a circuit, waiting to be added to the * or_connection_t's outbuf. */ typedef struct cell_queue_t { packed_cell_t *head; /**< The first cell, or NULL if the queue is empty. */ packed_cell_t *tail; /**< The last cell, or NULL if the queue is empty. */ int n; /**< The number of cells in the queue. */ + insertion_time_queue_t *insertion_times; /**< Insertion times of cells. */ } cell_queue_t; /** Beginning of a RELAY cell payload. */ @@ -991,11 +998,8 @@ typedef struct connection_t { * to the evdns_server_port is uses to listen to and answer connections. */ struct evdns_server_port *dns_server_port; -#ifdef ENABLE_DIRREQ_STATS /** Unique ID for measuring tunneled network status requests. */ uint64_t dirreq_id; -#endif - } connection_t; /** Stores flags and information related to the portion of a v2 Tor OR @@ -1985,10 +1989,9 @@ typedef struct circuit_t { * linked to an OR connection. */ struct circuit_t *prev_active_on_n_conn; struct circuit_t *next; /**< Next circuit in linked list of all circuits. */ -#ifdef ENABLE_DIRREQ_STATS + /** Unique ID for measuring tunneled network status requests. */ uint64_t dirreq_id; -#endif } circuit_t; /** Largest number of relay_early cells that we can send on a given @@ -2112,7 +2115,6 @@ typedef struct or_circuit_t { /** True iff this circuit was made with a CREATE_FAST cell. */ unsigned int is_first_hop : 1; -#ifdef ENABLE_BUFFER_STATS /** Number of cells that were removed from circuit queue; reset every * time when writing buffer stats to disk. */ uint32_t processed_cells; @@ -2121,7 +2123,6 @@ typedef struct or_circuit_t { * exit-ward queues of this circuit; reset every time when writing * buffer stats to disk. */ uint64_t total_cell_waiting_time; -#endif } or_circuit_t; /** Convert a circuit subtype to a circuit_t.*/ @@ -2558,6 +2559,9 @@ typedef struct { /** If true, the user wants us to collect statistics as entry node. */ int EntryStatistics; + /** If true, include statistics file contents in extra-info documents. */ + int ExtraInfoStatistics; + /** If true, do not believe anybody who tells us that a domain resolves * to an internal address, or that an internal address has a PTR mapping. * Helps avoid some cross-site attacks. */ @@ -3697,15 +3701,11 @@ int dnsserv_launch_request(const char *name, int is_reverse); * leaking information. */ #define DIR_RECORD_USAGE_GRANULARITY 8 /** Time interval: Flush geoip data to disk this often. */ -#define DIR_RECORD_USAGE_RETAIN_IPS (24*60*60) +#define DIR_ENTRY_RECORD_USAGE_RETAIN_IPS (24*60*60) /** How long do we have to have observed per-country request history before * we are willing to talk about it? */ #define DIR_RECORD_USAGE_MIN_OBSERVATION_TIME (24*60*60) -/** Time interval: Flush geoip data to disk this often when measuring on an - * entry guard. */ -#define ENTRY_RECORD_USAGE_RETAIN_IPS (24*60*60) - #ifdef GEOIP_PRIVATE int geoip_parse_entry(const char *line); #endif @@ -3752,7 +3752,10 @@ typedef enum { void geoip_note_ns_response(geoip_client_action_t action, geoip_ns_response_t response); time_t geoip_get_history_start(void); -char *geoip_get_client_history(time_t now, geoip_client_action_t action); +char *geoip_get_client_history_dirreq(time_t now, + geoip_client_action_t action); +char *geoip_get_client_history_bridge(time_t now, + geoip_client_action_t action); char *geoip_get_request_history(time_t now, geoip_client_action_t action); int getinfo_helper_geoip(control_connection_t *control_conn, const char *question, char **answer); @@ -3792,6 +3795,11 @@ void geoip_start_dirreq(uint64_t dirreq_id, size_t response_size, void geoip_change_dirreq_state(uint64_t dirreq_id, dirreq_type_t type, dirreq_state_t new_state); +void geoip_dirreq_stats_init(time_t now); +void geoip_dirreq_stats_write(time_t now); +void geoip_entry_stats_init(time_t now); +void geoip_entry_stats_write(time_t now); + /********************************* hibernate.c **********************/ int accounting_parse_options(or_options_t *options, int validate_only); @@ -4133,17 +4141,11 @@ void rep_hist_note_extend_failed(const char *from_name, const char *to_name); void rep_hist_dump_stats(time_t now, int severity); void rep_hist_note_bytes_read(size_t num_bytes, time_t when); void rep_hist_note_bytes_written(size_t num_bytes, time_t when); -#ifdef ENABLE_EXIT_STATS -void rep_hist_note_exit_bytes_read(uint16_t port, size_t num_bytes, - time_t now); -void rep_hist_note_exit_bytes_written(uint16_t port, size_t num_bytes, - time_t now); -void rep_hist_note_exit_stream_opened(uint16_t port, time_t now); -#else -#define rep_hist_note_exit_bytes_read(p,n,t) STMT_NIL -#define rep_hist_note_exit_bytes_written(p,n,t) STMT_NIL -#define rep_hist_note_exit_stream_opened(p,t) STMT_NIL -#endif +void rep_hist_note_exit_bytes_read(uint16_t port, size_t num_bytes); +void rep_hist_note_exit_bytes_written(uint16_t port, size_t num_bytes); +void rep_hist_note_exit_stream_opened(uint16_t port); +void rep_hist_exit_stats_init(time_t now); +void rep_hist_exit_stats_write(time_t now); int rep_hist_bandwidth_assess(void); char *rep_hist_get_bandwidth_lines(int for_extrainfo); void rep_hist_update_state(or_state_t *state); @@ -4195,11 +4197,10 @@ void hs_usage_note_fetch_successful(const char *service_id, time_t now); void hs_usage_write_statistics_to_file(time_t now); void hs_usage_free_all(void); -#ifdef ENABLE_BUFFER_STATS -#define DUMP_BUFFER_STATS_INTERVAL (24*60*60) -void add_circ_to_buffer_stats(circuit_t *circ, time_t end_of_interval); -void dump_buffer_stats(void); -#endif +void rep_hist_buffer_stats_init(time_t now); +void rep_hist_buffer_stats_add_circ(circuit_t *circ, + time_t end_of_interval); +void rep_hist_buffer_stats_write(time_t now); /********************************* rendclient.c ***************************/ diff --git a/src/or/relay.c b/src/or/relay.c index 76577848f3..c81b8311a0 100644 --- a/src/or/relay.c +++ b/src/or/relay.c @@ -533,13 +533,11 @@ relay_send_command_from_edge(uint16_t stream_id, circuit_t *circ, log_debug(LD_OR,"delivering %d cell %s.", relay_command, cell_direction == CELL_DIRECTION_OUT ? "forward" : "backward"); -#ifdef ENABLE_DIRREQ_STATS /* If we are sending an END cell and this circuit is used for a tunneled * directory request, advance its state. */ if (relay_command == RELAY_COMMAND_END && circ->dirreq_id) geoip_change_dirreq_state(circ->dirreq_id, DIRREQ_TUNNELED, DIRREQ_END_CELL_SENT); -#endif if (cell_direction == CELL_DIRECTION_OUT && circ->n_conn) { /* if we're using relaybandwidthrate, this conn wants priority */ @@ -1047,7 +1045,6 @@ connection_edge_process_relay_cell(cell_t *cell, circuit_t *circ, "Begin cell for known stream. Dropping."); return 0; } -#ifdef ENABLE_DIRREQ_STATS if (rh.command == RELAY_COMMAND_BEGIN_DIR) { /* Assign this circuit and its app-ward OR connection a unique ID, * so that we can measure download times. The local edge and dir @@ -1057,7 +1054,6 @@ connection_edge_process_relay_cell(cell_t *cell, circuit_t *circ, circ->dirreq_id = ++next_id; TO_CONN(TO_OR_CIRCUIT(circ)->p_conn)->dirreq_id = circ->dirreq_id; } -#endif return connection_exit_begin_conn(cell, circ); case RELAY_COMMAND_DATA: @@ -1529,6 +1525,10 @@ static int total_cells_allocated = 0; /** A memory pool to allocate packed_cell_t objects. */ static mp_pool_t *cell_pool = NULL; +/** Memory pool to allocate insertion_time_elem_t objects used for cell + * statistics. */ +static mp_pool_t *it_pool = NULL; + /** Allocate structures to hold cells. */ void init_cell_pool(void) @@ -1537,7 +1537,8 @@ init_cell_pool(void) cell_pool = mp_pool_new(sizeof(packed_cell_t), 128*1024); } -/** Free all storage used to hold cells. */ +/** Free all storage used to hold cells (and insertion times if we measure + * cell statistics). */ void free_cell_pool(void) { @@ -1546,6 +1547,10 @@ free_cell_pool(void) mp_pool_destroy(cell_pool); cell_pool = NULL; } + if (it_pool) { + mp_pool_destroy(it_pool); + it_pool = NULL; + } } /** Free excess storage in cell pool. */ @@ -1621,11 +1626,35 @@ void cell_queue_append_packed_copy(cell_queue_t *queue, const cell_t *cell) { packed_cell_t *copy = packed_cell_copy(cell); -#ifdef ENABLE_BUFFER_STATS - /* Remember the exact time when this cell was put in the queue. */ - if (get_options()->CellStatistics) - tor_gettimeofday(©->packed_timeval); -#endif + /* Remember the time when this cell was put in the queue. */ + if (get_options()->CellStatistics) { + struct timeval now; + uint32_t added; + insertion_time_queue_t *it_queue = queue->insertion_times; + if (!it_pool) + it_pool = mp_pool_new(sizeof(insertion_time_elem_t), 1024); + tor_gettimeofday(&now); +#define SECONDS_IN_A_DAY 86400L + added = (now.tv_sec % SECONDS_IN_A_DAY) * 100L + now.tv_usec / 10000L; + if (!it_queue) { + it_queue = tor_malloc_zero(sizeof(insertion_time_queue_t)); + queue->insertion_times = it_queue; + } + if (it_queue->last && it_queue->last->insertion_time == added) { + it_queue->last->counter++; + } else { + insertion_time_elem_t *elem = mp_pool_get(it_pool); + elem->next = NULL; + elem->insertion_time = added; + elem->counter = 1; + if (it_queue->last) { + it_queue->last->next = elem; + it_queue->last = elem; + } else { + it_queue->first = it_queue->last = elem; + } + } + } cell_queue_append(queue, copy); } @@ -1642,6 +1671,14 @@ cell_queue_clear(cell_queue_t *queue) } queue->head = queue->tail = NULL; queue->n = 0; + if (queue->insertion_times) { + while (queue->insertion_times->first) { + insertion_time_elem_t *elem = queue->insertion_times->first; + queue->insertion_times->first = elem->next; + mp_pool_release(elem); + } + tor_free(queue->insertion_times); + } } /** Extract and return the cell at the head of <b>queue</b>; return NULL if @@ -1835,28 +1872,41 @@ connection_or_flush_from_first_active_circuit(or_connection_t *conn, int max, packed_cell_t *cell = cell_queue_pop(queue); tor_assert(*next_circ_on_conn_p(circ,conn)); -#ifdef ENABLE_BUFFER_STATS /* Calculate the exact time that this cell has spent in the queue. */ if (get_options()->CellStatistics && !CIRCUIT_IS_ORIGIN(circ)) { - struct timeval flushed_from_queue; + struct timeval now; + uint32_t flushed; uint32_t cell_waiting_time; - or_circuit_t *orcirc = TO_OR_CIRCUIT(circ); - tor_gettimeofday(&flushed_from_queue); - cell_waiting_time = (uint32_t) - tv_mdiff(&cell->packed_timeval, &flushed_from_queue); - - orcirc->total_cell_waiting_time += cell_waiting_time; - orcirc->processed_cells++; + insertion_time_queue_t *it_queue = queue->insertion_times; + tor_gettimeofday(&now); + flushed = (now.tv_sec % SECONDS_IN_A_DAY) * 100L + + now.tv_usec / 10000L; + if (!it_queue || !it_queue->first) { + log_warn(LD_BUG, "Cannot determine insertion time of cell."); + } else { + or_circuit_t *orcirc = TO_OR_CIRCUIT(circ); + insertion_time_elem_t *elem = it_queue->first; + cell_waiting_time = (flushed * 10L + SECONDS_IN_A_DAY * 1000L - + elem->insertion_time * 10L) % (SECONDS_IN_A_DAY * 1000L); +#undef SECONDS_IN_A_DAY + elem->counter--; + if (elem->counter < 1) { + it_queue->first = elem->next; + if (elem == it_queue->last) + it_queue->last = NULL; + mp_pool_release(elem); + } + orcirc->total_cell_waiting_time += cell_waiting_time; + orcirc->processed_cells++; + } } -#endif -#ifdef ENABLE_DIRREQ_STATS + /* If we just flushed our queue and this circuit is used for a * tunneled directory request, possibly advance its state. */ if (queue->n == 0 && TO_CONN(conn)->dirreq_id) geoip_change_dirreq_state(TO_CONN(conn)->dirreq_id, DIRREQ_TUNNELED, DIRREQ_CIRC_QUEUE_FLUSHED); -#endif connection_write_to_buf(cell->body, CELL_NETWORK_SIZE, TO_CONN(conn)); diff --git a/src/or/rephist.c b/src/or/rephist.c index c0b9ae14a4..8d78ac26c3 100644 --- a/src/or/rephist.c +++ b/src/or/rephist.c @@ -1320,10 +1320,7 @@ rep_hist_note_bytes_read(size_t num_bytes, time_t when) add_obs(read_array, when, num_bytes); } -#ifdef ENABLE_EXIT_STATS /* Some constants */ -/** How long are the intervals for measuring exit stats? */ -#define EXIT_STATS_INTERVAL_SEC (24 * 60 * 60) /** To what multiple should byte numbers be rounded up? */ #define EXIT_STATS_ROUND_UP_BYTES 1024 /** To what multiple should stream counts be rounded up? */ @@ -1337,118 +1334,137 @@ rep_hist_note_bytes_read(size_t num_bytes, time_t when) /* The following data structures are arrays and no fancy smartlists or maps, * so that all write operations can be done in constant time. This comes at * the price of some memory (1.25 MB) and linear complexity when writing - * stats. */ + * stats for measuring relays. */ /** Number of bytes read in current period by exit port */ -static uint64_t exit_bytes_read[EXIT_STATS_NUM_PORTS]; +static uint64_t *exit_bytes_read = NULL; /** Number of bytes written in current period by exit port */ -static uint64_t exit_bytes_written[EXIT_STATS_NUM_PORTS]; +static uint64_t *exit_bytes_written = NULL; /** Number of streams opened in current period by exit port */ -static uint32_t exit_streams[EXIT_STATS_NUM_PORTS]; +static uint32_t *exit_streams = NULL; /** When does the current exit stats period end? */ -static time_t end_of_current_exit_stats_period = 0; +static time_t start_of_exit_stats_interval; -/** Write exit stats for the current period to disk and reset counters. */ -static void -write_exit_stats(time_t when) +/** Initialize exit port stats. */ +void +rep_hist_exit_stats_init(time_t now) +{ + start_of_exit_stats_interval = now; + exit_bytes_read = tor_malloc_zero(EXIT_STATS_NUM_PORTS * + sizeof(uint64_t)); + exit_bytes_written = tor_malloc_zero(EXIT_STATS_NUM_PORTS * + sizeof(uint64_t)); + exit_streams = tor_malloc_zero(EXIT_STATS_NUM_PORTS * + sizeof(uint32_t)); +} + +/** Write exit stats to $DATADIR/stats/exit-stats and reset counters. */ +void +rep_hist_exit_stats_write(time_t now) { char t[ISO_TIME_LEN+1]; int r, i, comma; uint64_t *b, total_bytes, threshold_bytes, other_bytes; uint32_t other_streams; - char *filename = get_datadir_fname("exit-stats"); + char *statsdir = NULL, *filename = NULL; open_file_t *open_file = NULL; FILE *out = NULL; - log_debug(LD_HIST, "Considering writing exit port statistics to disk.."); - while (when > end_of_current_exit_stats_period) { - format_iso_time(t, end_of_current_exit_stats_period); - log_info(LD_HIST, "Writing exit port statistics to disk for period " - "ending at %s.", t); - - if (!open_file) { - out = start_writing_to_stdio_file(filename, OPEN_FLAGS_APPEND, - 0600, &open_file); - if (!out) { - log_warn(LD_HIST, "Couldn't open '%s'.", filename); - goto done; - } - } + if (!exit_streams) + return; /* Not initialized */ - /* written yyyy-mm-dd HH:MM:SS (n s) */ - if (fprintf(out, "written %s (%d s)\n", t, EXIT_STATS_INTERVAL_SEC) < 0) + statsdir = get_datadir_fname("stats"); + if (check_private_dir(statsdir, CPD_CREATE) < 0) + goto done; + filename = get_datadir_fname("stats"PATH_SEPARATOR"exit-stats"); + format_iso_time(t, now); + log_info(LD_HIST, "Writing exit port statistics to disk for period " + "ending at %s.", t); + + if (!open_file) { + out = start_writing_to_stdio_file(filename, OPEN_FLAGS_APPEND, + 0600, &open_file); + if (!out) { + log_warn(LD_HIST, "Couldn't open '%s'.", filename); goto done; - - /* Count the total number of bytes, so that we can attribute all - * observations below a threshold of 1 / EXIT_STATS_THRESHOLD_RECIPROCAL - * of all bytes to a special port 'other'. */ - total_bytes = 0; - for (i = 1; i < EXIT_STATS_NUM_PORTS; i++) { - total_bytes += exit_bytes_read[i]; - total_bytes += exit_bytes_written[i]; } - threshold_bytes = total_bytes / EXIT_STATS_THRESHOLD_RECIPROCAL; - - /* kibibytes-(read|written) port=kibibytes,.. */ - for (r = 0; r < 2; r++) { - b = r ? exit_bytes_read : exit_bytes_written; - tor_assert(b); - if (fprintf(out, "%s ", - r ? "kibibytes-read" : "kibibytes-written")<0) - goto done; - - comma = 0; - other_bytes = 0; - for (i = 1; i < EXIT_STATS_NUM_PORTS; i++) { - if (b[i] > 0) { - if (exit_bytes_read[i] + exit_bytes_written[i] > threshold_bytes) { - uint64_t num = round_uint64_to_next_multiple_of(b[i], - EXIT_STATS_ROUND_UP_BYTES); - num /= 1024; - if (fprintf(out, "%s%d="U64_FORMAT, - comma++ ? "," : "", i, - U64_PRINTF_ARG(num)) < 0) - goto done; - } else - other_bytes += b[i]; - } - } - other_bytes = round_uint64_to_next_multiple_of(other_bytes, - EXIT_STATS_ROUND_UP_BYTES); - other_bytes /= 1024; - if (fprintf(out, "%sother="U64_FORMAT"\n", - comma ? "," : "", U64_PRINTF_ARG(other_bytes))<0) - goto done; - } - /* streams-opened port=num,.. */ - if (fprintf(out, "streams-opened ")<0) + } + + /* written yyyy-mm-dd HH:MM:SS (n s) */ + if (fprintf(out, "exit-stats-end %s (%d s)\n", t, + (unsigned) (now - start_of_exit_stats_interval)) < 0) + goto done; + + /* Count the total number of bytes, so that we can attribute all + * observations below a threshold of 1 / EXIT_STATS_THRESHOLD_RECIPROCAL + * of all bytes to a special port 'other'. */ + total_bytes = 0; + for (i = 1; i < EXIT_STATS_NUM_PORTS; i++) { + total_bytes += exit_bytes_read[i]; + total_bytes += exit_bytes_written[i]; + } + threshold_bytes = total_bytes / EXIT_STATS_THRESHOLD_RECIPROCAL; + + /* exit-kibibytes-(read|written) port=kibibytes,.. */ + for (r = 0; r < 2; r++) { + b = r ? exit_bytes_read : exit_bytes_written; + tor_assert(b); + if (fprintf(out, "%s ", + r ? "exit-kibibytes-read" + : "exit-kibibytes-written") < 0) goto done; + comma = 0; - other_streams = 0; + other_bytes = 0; for (i = 1; i < EXIT_STATS_NUM_PORTS; i++) { - if (exit_streams[i] > 0) { + if (b[i] > 0) { if (exit_bytes_read[i] + exit_bytes_written[i] > threshold_bytes) { - uint32_t num = round_uint32_to_next_multiple_of(exit_streams[i], - EXIT_STATS_ROUND_UP_STREAMS); - if (fprintf(out, "%s%d=%u", - comma++ ? "," : "", i, num)<0) + uint64_t num = round_uint64_to_next_multiple_of(b[i], + EXIT_STATS_ROUND_UP_BYTES); + num /= 1024; + if (fprintf(out, "%s%d="U64_FORMAT, + comma++ ? "," : "", i, + U64_PRINTF_ARG(num)) < 0) goto done; } else - other_streams += exit_streams[i]; + other_bytes += b[i]; } } - other_streams = round_uint32_to_next_multiple_of(other_streams, - EXIT_STATS_ROUND_UP_STREAMS); - if (fprintf(out, "%sother=%u\n", - comma ? "," : "", other_streams)<0) + other_bytes = round_uint64_to_next_multiple_of(other_bytes, + EXIT_STATS_ROUND_UP_BYTES); + other_bytes /= 1024; + if (fprintf(out, "%sother="U64_FORMAT"\n", + comma ? "," : "", U64_PRINTF_ARG(other_bytes))<0) goto done; - /* Reset counters */ - memset(exit_bytes_read, 0, sizeof(exit_bytes_read)); - memset(exit_bytes_written, 0, sizeof(exit_bytes_written)); - memset(exit_streams, 0, sizeof(exit_streams)); - end_of_current_exit_stats_period += EXIT_STATS_INTERVAL_SEC; } + /* exit-streams-opened port=num,.. */ + if (fprintf(out, "exit-streams-opened ") < 0) + goto done; + comma = 0; + other_streams = 0; + for (i = 1; i < EXIT_STATS_NUM_PORTS; i++) { + if (exit_streams[i] > 0) { + if (exit_bytes_read[i] + exit_bytes_written[i] > threshold_bytes) { + uint32_t num = round_uint32_to_next_multiple_of(exit_streams[i], + EXIT_STATS_ROUND_UP_STREAMS); + if (fprintf(out, "%s%d=%u", + comma++ ? "," : "", i, num)<0) + goto done; + } else + other_streams += exit_streams[i]; + } + } + other_streams = round_uint32_to_next_multiple_of(other_streams, + EXIT_STATS_ROUND_UP_STREAMS); + if (fprintf(out, "%sother=%u\n", + comma ? "," : "", other_streams)<0) + goto done; + /* Reset counters */ + memset(exit_bytes_read, 0, sizeof(exit_bytes_read)); + memset(exit_bytes_written, 0, sizeof(exit_bytes_written)); + memset(exit_streams, 0, sizeof(exit_streams)); + start_of_exit_stats_interval = now; if (open_file) finish_writing_to_file(open_file); @@ -1457,63 +1473,48 @@ write_exit_stats(time_t when) if (open_file) abort_writing_to_file(open_file); tor_free(filename); -} - -/** Prepare to add an exit stats observation at second <b>when</b> by - * checking whether this observation lies in the current observation - * period; if not, shift the current period forward by one until the - * reported event fits it and write all results in between to disk. */ -static void -add_exit_obs(time_t when) -{ - if (when > end_of_current_exit_stats_period) { - if (end_of_current_exit_stats_period) - write_exit_stats(when); - else - end_of_current_exit_stats_period = when + EXIT_STATS_INTERVAL_SEC; - } + tor_free(statsdir); } /** Note that we wrote <b>num_bytes</b> to an exit connection to - * <b>port</b> in second <b>when</b>. */ + * <b>port</b>. */ void -rep_hist_note_exit_bytes_written(uint16_t port, size_t num_bytes, - time_t when) +rep_hist_note_exit_bytes_written(uint16_t port, size_t num_bytes) { if (!get_options()->ExitPortStatistics) return; - add_exit_obs(when); + if (!exit_bytes_written) + return; /* Not initialized */ exit_bytes_written[port] += num_bytes; log_debug(LD_HIST, "Written %lu bytes to exit connection to port %d.", (unsigned long)num_bytes, port); } /** Note that we read <b>num_bytes</b> from an exit connection to - * <b>port</b> in second <b>when</b>. */ + * <b>port</b>. */ void -rep_hist_note_exit_bytes_read(uint16_t port, size_t num_bytes, - time_t when) +rep_hist_note_exit_bytes_read(uint16_t port, size_t num_bytes) { if (!get_options()->ExitPortStatistics) return; - add_exit_obs(when); + if (!exit_bytes_read) + return; /* Not initialized */ exit_bytes_read[port] += num_bytes; log_debug(LD_HIST, "Read %lu bytes from exit connection to port %d.", (unsigned long)num_bytes, port); } -/** Note that we opened an exit stream to <b>port</b> in second - * <b>when</b>. */ +/** Note that we opened an exit stream to <b>port</b>. */ void -rep_hist_note_exit_stream_opened(uint16_t port, time_t when) +rep_hist_note_exit_stream_opened(uint16_t port) { if (!get_options()->ExitPortStatistics) return; - add_exit_obs(when); + if (!exit_streams) + return; /* Not initialized */ exit_streams[port]++; log_debug(LD_HIST, "Opened exit stream to port %d", port); } -#endif /** Helper: Return the largest value in b->maxima. (This is equal to the * most bandwidth used in any NUM_SECS_ROLLING_MEASURE period for the last @@ -2049,6 +2050,9 @@ rep_hist_free_all(void) tor_free(read_array); tor_free(write_array); tor_free(last_stability_doc); + tor_free(exit_bytes_read); + tor_free(exit_bytes_written); + tor_free(exit_streams); built_last_stability_doc_at = 0; predicted_ports_free(); } @@ -2603,9 +2607,15 @@ hs_usage_write_statistics_to_file(time_t now) /*** cell statistics ***/ -#ifdef ENABLE_BUFFER_STATS /** Start of the current buffer stats interval. */ -time_t start_of_buffer_stats_interval; +static time_t start_of_buffer_stats_interval; + +/** Initialize buffer stats. */ +void +rep_hist_buffer_stats_init(time_t now) +{ + start_of_buffer_stats_interval = now; +} typedef struct circ_buffer_stats_t { uint32_t processed_cells; @@ -2621,7 +2631,7 @@ smartlist_t *circuits_for_buffer_stats = NULL; * <b>end_of_interval</b> and reset cell counters in case the circuit * remains open in the next measurement interval. */ void -add_circ_to_buffer_stats(circuit_t *circ, time_t end_of_interval) +rep_hist_buffer_stats_add_circ(circuit_t *circ, time_t end_of_interval) { circ_buffer_stats_t *stat; time_t start_of_interval; @@ -2667,12 +2677,11 @@ _buffer_stats_compare_entries(const void **_a, const void **_b) return 0; } -/** Append buffer statistics to local file. */ +/** Write buffer statistics to $DATADIR/stats/buffer-stats. */ void -dump_buffer_stats(void) +rep_hist_buffer_stats_write(time_t now) { - time_t now = time(NULL); - char *filename; + char *statsdir = NULL, *filename = NULL; char written[ISO_TIME_LEN+1]; open_file_t *open_file = NULL; FILE *out; @@ -2686,7 +2695,7 @@ dump_buffer_stats(void) circuit_t *circ; /* add current circuits to stats */ for (circ = _circuit_get_global_list(); circ; circ = circ->next) - add_circ_to_buffer_stats(circ, now); + rep_hist_buffer_stats_add_circ(circ, now); /* calculate deciles */ memset(processed_cells, 0, SHARES * sizeof(int)); memset(circs_in_share, 0, SHARES * sizeof(int)); @@ -2711,14 +2720,17 @@ dump_buffer_stats(void) stat, tor_free(stat)); smartlist_clear(circuits_for_buffer_stats); /* write to file */ - filename = get_datadir_fname("buffer-stats"); + statsdir = get_datadir_fname("stats"); + if (check_private_dir(statsdir, CPD_CREATE) < 0) + goto done; + filename = get_datadir_fname("stats"PATH_SEPARATOR"buffer-stats"); out = start_writing_to_stdio_file(filename, OPEN_FLAGS_APPEND, 0600, &open_file); if (!out) goto done; format_iso_time(written, now); - if (fprintf(out, "written %s (%d s)\n", written, - DUMP_BUFFER_STATS_INTERVAL) < 0) + if (fprintf(out, "cell-stats-end %s (%d s)\n", written, + (unsigned) (now - start_of_buffer_stats_interval)) < 0) goto done; for (i = 0; i < SHARES; i++) { tor_snprintf(buf, sizeof(buf), "%d", !circs_in_share[i] ? 0 : @@ -2726,7 +2738,7 @@ dump_buffer_stats(void) smartlist_add(str_build, tor_strdup(buf)); } str = smartlist_join_strings(str_build, ",", 0, NULL); - if (fprintf(out, "processed-cells %s\n", str) < 0) + if (fprintf(out, "cell-processed-cells %s\n", str) < 0) goto done; tor_free(str); SMARTLIST_FOREACH(str_build, char *, c, tor_free(c)); @@ -2737,7 +2749,7 @@ dump_buffer_stats(void) smartlist_add(str_build, tor_strdup(buf)); } str = smartlist_join_strings(str_build, ",", 0, NULL); - if (fprintf(out, "queued-cells %s\n", str) < 0) + if (fprintf(out, "cell-queued-cells %s\n", str) < 0) goto done; tor_free(str); SMARTLIST_FOREACH(str_build, char *, c, tor_free(c)); @@ -2748,13 +2760,13 @@ dump_buffer_stats(void) smartlist_add(str_build, tor_strdup(buf)); } str = smartlist_join_strings(str_build, ",", 0, NULL); - if (fprintf(out, "time-in-queue %s\n", str) < 0) + if (fprintf(out, "cell-time-in-queue %s\n", str) < 0) goto done; tor_free(str); SMARTLIST_FOREACH(str_build, char *, c, tor_free(c)); smartlist_free(str_build); str_build = NULL; - if (fprintf(out, "number-of-circuits-per-share %d\n", + if (fprintf(out, "cell-circuits-per-decile %d\n", (number_of_circuits + SHARES - 1) / SHARES) < 0) goto done; finish_writing_to_file(open_file); @@ -2763,6 +2775,7 @@ dump_buffer_stats(void) if (open_file) abort_writing_to_file(open_file); tor_free(filename); + tor_free(statsdir); if (str_build) { SMARTLIST_FOREACH(str_build, char *, c, tor_free(c)); smartlist_free(str_build); @@ -2770,5 +2783,4 @@ dump_buffer_stats(void) tor_free(str); #undef SHARES } -#endif diff --git a/src/or/router.c b/src/or/router.c index f2747f5141..ea7a430fa2 100644 --- a/src/or/router.c +++ b/src/or/router.c @@ -1269,6 +1269,7 @@ router_rebuild_descriptor(int force) uint32_t addr; char platform[256]; int hibernating = we_are_hibernating(); + size_t ei_size; or_options_t *options = get_options(); if (desc_clean_since && !force) @@ -1382,9 +1383,10 @@ router_rebuild_descriptor(int force) ei->cache_info.published_on = ri->cache_info.published_on; memcpy(ei->cache_info.identity_digest, ri->cache_info.identity_digest, DIGEST_LEN); - ei->cache_info.signed_descriptor_body = tor_malloc(8192); - if (extrainfo_dump_to_string(ei->cache_info.signed_descriptor_body, 8192, - ei, get_identity_key()) < 0) { + ei_size = options->ExtraInfoStatistics ? MAX_EXTRAINFO_UPLOAD_SIZE : 8192; + ei->cache_info.signed_descriptor_body = tor_malloc(ei_size); + if (extrainfo_dump_to_string(ei->cache_info.signed_descriptor_body, + ei_size, ei, get_identity_key()) < 0) { log_warn(LD_BUG, "Couldn't generate extra-info descriptor."); extrainfo_free(ei); return -1; @@ -1822,6 +1824,57 @@ router_dump_router_to_string(char *s, size_t maxlen, routerinfo_t *router, return (int)written+1; } +/** Load the contents of <b>filename</b>, find the last line starting with + * <b>end_line</b>, ensure that its timestamp is not more than 25 hours in + * the past or more than 1 hour in the future with respect to <b>now</b>, + * and write the file contents starting with that line to **<b>out</b>. + * Return 1 for success, 0 if the file does not exist or does not contain + * a line matching these criteria, or -1 for failure. */ +static int +load_stats_file(const char *filename, const char *end_line, time_t now, + char **out) +{ + int r = -1; + char *fname = get_datadir_fname(filename); + char *contents, *start = NULL, *tmp, timestr[ISO_TIME_LEN+1]; + time_t written; + switch (file_status(fname)) { + case FN_FILE: + /* X022 Find an alternative to reading the whole file to memory. */ + if ((contents = read_file_to_str(fname, 0, NULL))) { + tmp = strstr(contents, end_line); + /* Find last block starting with end_line */ + while (tmp) { + start = tmp; + tmp = strstr(tmp + 1, end_line); + } + if (!start) + goto notfound; + if (strlen(start) < strlen(end_line) + 1 + sizeof(timestr)) + goto notfound; + strlcpy(timestr, start + 1 + strlen(end_line), sizeof(timestr)); + if (parse_iso_time(timestr, &written) < 0) + goto notfound; + if (written < now - (25*60*60) || written > now + (1*60*60)) + goto notfound; + *out = tor_strdup(start); + r = 1; + } + notfound: + tor_free(contents); + break; + case FN_NOENT: + r = 0; + break; + case FN_ERROR: + case FN_DIR: + default: + break; + } + tor_free(fname); + return r; +} + /** Write the contents of <b>extrainfo</b> to the <b>maxlen</b>-byte string * <b>s</b>, signing them with <b>ident_key</b>. Return 0 on success, * negative on failure. */ @@ -1836,6 +1889,7 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo, char *bandwidth_usage; int result; size_t len; + static int write_stats_to_extrainfo = 1; base16_encode(identity, sizeof(identity), extrainfo->cache_info.identity_digest, DIGEST_LEN); @@ -1847,6 +1901,61 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo, "published %s\n%s", extrainfo->nickname, identity, published, bandwidth_usage); + + if (options->ExtraInfoStatistics && write_stats_to_extrainfo) { + char *contents = NULL; + time_t since = time(NULL) - (24*60*60); + log_info(LD_GENERAL, "Adding stats to extra-info descriptor."); + if (options->DirReqStatistics && + load_stats_file("stats"PATH_SEPARATOR"dirreq-stats", + "dirreq-stats-end", since, &contents) > 0) { + int pos = strlen(s); + if (strlcpy(s + pos, contents, maxlen - strlen(s)) != + strlen(contents)) { + log_warn(LD_DIR, "Could not write dirreq-stats to extra-info " + "descriptor."); + s[pos] = '\0'; + } + tor_free(contents); + } + if (options->EntryStatistics && + load_stats_file("stats"PATH_SEPARATOR"entry-stats", + "entry-stats-end", since, &contents) > 0) { + int pos = strlen(s); + if (strlcpy(s + pos, contents, maxlen - strlen(s)) != + strlen(contents)) { + log_warn(LD_DIR, "Could not write entry-stats to extra-info " + "descriptor."); + s[pos] = '\0'; + } + tor_free(contents); + } + if (options->CellStatistics && + load_stats_file("stats"PATH_SEPARATOR"buffer-stats", + "cell-stats-end", since, &contents) > 0) { + int pos = strlen(s); + if (strlcpy(s + pos, contents, maxlen - strlen(s)) != + strlen(contents)) { + log_warn(LD_DIR, "Could not write buffer-stats to extra-info " + "descriptor."); + s[pos] = '\0'; + } + tor_free(contents); + } + if (options->ExitPortStatistics && + load_stats_file("stats"PATH_SEPARATOR"exit-stats", + "exit-stats-end", since, &contents) > 0) { + int pos = strlen(s); + if (strlcpy(s + pos, contents, maxlen - strlen(s)) != + strlen(contents)) { + log_warn(LD_DIR, "Could not write exit-stats to extra-info " + "descriptor."); + s[pos] = '\0'; + } + tor_free(contents); + } + } + tor_free(bandwidth_usage); if (result<0) return -1; @@ -1875,7 +1984,6 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo, if (router_append_dirobj_signature(s+len, maxlen-len, digest, ident_key)<0) return -1; -#ifdef DEBUG_ROUTER_DUMP_ROUTER_TO_STRING { char *cp, *s_dup; extrainfo_t *ei_tmp; @@ -1890,7 +1998,24 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo, tor_free(s_dup); extrainfo_free(ei_tmp); } -#endif + + if (options->ExtraInfoStatistics && write_stats_to_extrainfo) { + char *cp, *s_dup; + extrainfo_t *ei_tmp; + cp = s_dup = tor_strdup(s); + ei_tmp = extrainfo_parse_entry_from_string(cp, NULL, 1, NULL); + if (!ei_tmp) { + log_warn(LD_GENERAL, + "We just generated an extra-info descriptor with " + "statistics that we can't parse. Not adding statistics to " + "this or any future extra-info descriptors. Descriptor " + "was:\n%s", s); + write_stats_to_extrainfo = 0; + extrainfo_dump_to_string(s, maxlen, extrainfo, ident_key); + } + tor_free(s_dup); + extrainfo_free(ei_tmp); + } return (int)strlen(s)+1; } @@ -1905,13 +2030,9 @@ char * extrainfo_get_client_geoip_summary(time_t now) { static time_t last_purged_at = 0; - int geoip_purge_interval = 48*60*60; -#ifdef ENABLE_DIRREQ_STATS - geoip_purge_interval = DIR_RECORD_USAGE_RETAIN_IPS; -#endif -#ifdef ENABLE_ENTRY_STATS - geoip_purge_interval = ENTRY_RECORD_USAGE_RETAIN_IPS; -#endif + int geoip_purge_interval = + (get_options()->DirReqStatistics || get_options()->EntryStatistics) ? + DIR_ENTRY_RECORD_USAGE_RETAIN_IPS : 48*60*60; if (now > last_purged_at+geoip_purge_interval) { /* (Note that this also discards items in the client history with * action GEOIP_CLIENT_NETWORKSTATUS{_V2}, which doesn't matter @@ -1920,7 +2041,7 @@ extrainfo_get_client_geoip_summary(time_t now) geoip_remove_old_clients(now-geoip_purge_interval); last_purged_at = now; } - return geoip_get_client_history(now, GEOIP_CLIENT_CONNECT); + return geoip_get_client_history_bridge(now, GEOIP_CLIENT_CONNECT); } /** Return true iff <b>s</b> is a legally valid server nickname. */ diff --git a/src/or/routerparse.c b/src/or/routerparse.c index 9736c4e000..8fecd9ab47 100644 --- a/src/or/routerparse.c +++ b/src/or/routerparse.c @@ -62,6 +62,31 @@ typedef enum { K_HIDDEN_SERVICE_DIR, K_ALLOW_SINGLE_HOP_EXITS, + K_DIRREQ_END, + K_DIRREQ_V2_IPS, + K_DIRREQ_V3_IPS, + K_DIRREQ_V2_REQS, + K_DIRREQ_V3_REQS, + K_DIRREQ_V2_SHARE, + K_DIRREQ_V3_SHARE, + K_DIRREQ_V2_RESP, + K_DIRREQ_V3_RESP, + K_DIRREQ_V2_DIR, + K_DIRREQ_V3_DIR, + K_DIRREQ_V2_TUN, + K_DIRREQ_V3_TUN, + K_ENTRY_END, + K_ENTRY_IPS, + K_CELL_END, + K_CELL_PROCESSED, + K_CELL_QUEUED, + K_CELL_TIME, + K_CELL_CIRCS, + K_EXIT_END, + K_EXIT_WRITTEN, + K_EXIT_READ, + K_EXIT_OPENED, + K_DIR_KEY_CERTIFICATE_VERSION, K_DIR_IDENTITY_KEY, K_DIR_KEY_PUBLISHED, @@ -257,6 +282,31 @@ static token_rule_t extrainfo_token_table[] = { T0N("opt", K_OPT, CONCAT_ARGS, OBJ_OK ), T01("read-history", K_READ_HISTORY, ARGS, NO_OBJ ), T01("write-history", K_WRITE_HISTORY, ARGS, NO_OBJ ), + T01("dirreq-stats-end", K_DIRREQ_END, ARGS, NO_OBJ ), + T01("dirreq-v2-ips", K_DIRREQ_V2_IPS, ARGS, NO_OBJ ), + T01("dirreq-v3-ips", K_DIRREQ_V3_IPS, ARGS, NO_OBJ ), + T01("dirreq-v2-reqs", K_DIRREQ_V2_REQS, ARGS, NO_OBJ ), + T01("dirreq-v3-reqs", K_DIRREQ_V3_REQS, ARGS, NO_OBJ ), + T01("dirreq-v2-share", K_DIRREQ_V2_SHARE, ARGS, NO_OBJ ), + T01("dirreq-v3-share", K_DIRREQ_V3_SHARE, ARGS, NO_OBJ ), + T01("dirreq-v2-resp", K_DIRREQ_V2_RESP, ARGS, NO_OBJ ), + T01("dirreq-v3-resp", K_DIRREQ_V3_RESP, ARGS, NO_OBJ ), + T01("dirreq-v2-direct-dl", K_DIRREQ_V2_DIR, ARGS, NO_OBJ ), + T01("dirreq-v3-direct-dl", K_DIRREQ_V3_DIR, ARGS, NO_OBJ ), + T01("dirreq-v2-tunneled-dl", K_DIRREQ_V2_TUN, ARGS, NO_OBJ ), + T01("dirreq-v3-tunneled-dl", K_DIRREQ_V3_TUN, ARGS, NO_OBJ ), + T01("entry-stats-end", K_ENTRY_END, ARGS, NO_OBJ ), + T01("entry-ips", K_ENTRY_IPS, ARGS, NO_OBJ ), + T01("cell-stats-end", K_CELL_END, ARGS, NO_OBJ ), + T01("cell-processed-cells", K_CELL_PROCESSED, ARGS, NO_OBJ ), + T01("cell-queued-cells", K_CELL_QUEUED, ARGS, NO_OBJ ), + T01("cell-time-in-queue", K_CELL_TIME, ARGS, NO_OBJ ), + T01("cell-circuits-per-decile", K_CELL_CIRCS, ARGS, NO_OBJ ), + T01("exit-stats-end", K_EXIT_END, ARGS, NO_OBJ ), + T01("exit-kibibytes-written", K_EXIT_WRITTEN, ARGS, NO_OBJ ), + T01("exit-kibibytes-read", K_EXIT_READ, ARGS, NO_OBJ ), + T01("exit-streams-opened", K_EXIT_OPENED, ARGS, NO_OBJ ), + T1_START( "extra-info", K_EXTRA_INFO, GE(2), NO_OBJ ), END_OF_TABLE diff --git a/src/or/test.c b/src/or/test.c index 0f121a84bb..d4afdeeb3c 100644 --- a/src/or/test.c +++ b/src/or/test.c @@ -4774,14 +4774,16 @@ test_geoip(void) /* and 17 observations in ZZ... */ for (i=110; i < 127; ++i) geoip_note_client_seen(GEOIP_CLIENT_CONNECT, i, now); - s = geoip_get_client_history(now+5*24*60*60, GEOIP_CLIENT_CONNECT); + s = geoip_get_client_history_bridge(now+5*24*60*60, + GEOIP_CLIENT_CONNECT); test_assert(s); test_streq("zz=24,ab=16,xy=8", s); tor_free(s); /* Now clear out all the AB observations. */ geoip_remove_old_clients(now-6000); - s = geoip_get_client_history(now+5*24*60*60, GEOIP_CLIENT_CONNECT); + s = geoip_get_client_history_bridge(now+5*24*60*60, + GEOIP_CLIENT_CONNECT); test_assert(s); test_streq("zz=24,xy=8", s); |