summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Mathewson <nickm@torproject.org>2008-06-10 18:08:56 +0000
committerNick Mathewson <nickm@torproject.org>2008-06-10 18:08:56 +0000
commitd0a4ad3a1cda334f5213b5abdd4d15483d3eb805 (patch)
tree262dd58b46d1760983d9759b58150bc90ee00d49
parentf4e12fa66db7d3b1d621aaf3396c080aac573a02 (diff)
downloadtor-d0a4ad3a1cda334f5213b5abdd4d15483d3eb805.tar.gz
tor-d0a4ad3a1cda334f5213b5abdd4d15483d3eb805.zip
r16127@tombo: nickm | 2008-06-10 14:03:01 -0400
Improved code for counting clients by country: support recording by number of directory status requests in addition to number of IPs seen. svn:r15097
-rw-r--r--ChangeLog3
-rw-r--r--src/common/util.c8
-rw-r--r--src/or/geoip.c130
-rw-r--r--src/or/or.h1
4 files changed, 121 insertions, 21 deletions
diff --git a/ChangeLog b/ChangeLog
index 0735c3fb88..ba742486e3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -107,7 +107,8 @@ Changes in version 0.2.1.1-alpha - 2008-??-??
- Allow comments in geoip file.
- New configure/torrc options (--enable-geoip-stats,
DirRecordUsageByCountry) to record how many IPs we've served directory
- info to in each country code.
+ info to in each country code, and how many status documents total
+ we've sent to each country code.
- Never use OpenSSL compression: it wastes RAM and CPU trying to
compress cells, which are basically all encrypted, compressed, or
both.
diff --git a/src/common/util.c b/src/common/util.c
index a048779872..17962fc723 100644
--- a/src/common/util.c
+++ b/src/common/util.c
@@ -1604,8 +1604,8 @@ fdopen_file(open_file_t *file_data)
return file_data->stdio_file;
tor_assert(file_data->fd >= 0);
if (!(file_data->stdio_file = fdopen(file_data->fd, "a"))) {
- log_warn(LD_FS, "Couldn't fdopen \"%s\": %s", file_data->filename,
- strerror(errno));
+ log_warn(LD_FS, "Couldn't fdopen \"%s\" [%d]: %s", file_data->filename,
+ file_data->fd, strerror(errno));
}
return file_data->stdio_file;
}
@@ -1619,8 +1619,10 @@ start_writing_to_stdio_file(const char *fname, int open_flags, int mode,
FILE *res;
if (start_writing_to_file(fname, open_flags, mode, data_out)<0)
return NULL;
- if (!(res = fdopen_file(*data_out)))
+ if (!(res = fdopen_file(*data_out))) {
abort_writing_to_file(*data_out);
+ *data_out = NULL;
+ }
return res;
}
diff --git a/src/or/geoip.c b/src/or/geoip.c
index 241281317a..c3bde4f226 100644
--- a/src/or/geoip.c
+++ b/src/or/geoip.c
@@ -23,7 +23,17 @@ typedef struct geoip_entry_t {
intptr_t country; /**< An index into geoip_countries */
} geoip_entry_t;
-/** A list of lowercased two-letter country codes. */
+/** DOCDOC */
+#define REQUEST_HIST_LEN 3
+#define REQUEST_HIST_PERIOD (8*60*60)
+
+typedef struct geoip_country_t {
+ char countrycode[3];
+ uint32_t n_v2_ns_requests[REQUEST_HIST_LEN];
+ uint32_t n_v3_ns_requests[REQUEST_HIST_LEN];
+} geoip_country_t;
+
+/** A list of geoip_country_t */
static smartlist_t *geoip_countries = NULL;
/** A map from lowercased country codes to their position in geoip_countries.
* The index is encoded in the pointer, and 1 is added so that NULL can mean
@@ -48,15 +58,19 @@ geoip_add_entry(uint32_t low, uint32_t high, const char *country)
_idxplus1 = strmap_get_lc(country_idxplus1_by_lc_code, country);
if (!_idxplus1) {
- char *c = tor_strdup(country);
- tor_strlower(c);
+ geoip_country_t *c = tor_malloc_zero(sizeof(geoip_country_t));
+ strlcpy(c->countrycode, country, sizeof(c->countrycode));
+ tor_strlower(c->countrycode);
smartlist_add(geoip_countries, c);
idx = smartlist_len(geoip_countries) - 1;
strmap_set_lc(country_idxplus1_by_lc_code, country, (void*)(idx+1));
} else {
idx = ((uintptr_t)_idxplus1)-1;
}
- tor_assert(!strcasecmp(smartlist_get(geoip_countries, idx), country));
+ {
+ geoip_country_t *c = smartlist_get(geoip_countries, idx);
+ tor_assert(!strcasecmp(c->countrycode, country));
+ }
ent = tor_malloc_zero(sizeof(geoip_entry_t));
ent->ip_low = low;
ent->ip_high = high;
@@ -198,9 +212,10 @@ geoip_get_n_countries(void)
const char *
geoip_get_country_name(int num)
{
- if (geoip_countries && num >= 0 && num < smartlist_len(geoip_countries))
- return smartlist_get(geoip_countries, num);
- else
+ if (geoip_countries && num >= 0 && num < smartlist_len(geoip_countries)) {
+ geoip_country_t *c = smartlist_get(geoip_countries, num);
+ return c->countrycode;
+ } else
return "??";
}
@@ -226,9 +241,13 @@ typedef struct clientmap_entry_t {
/** Map from client IP address to last time seen. */
static HT_HEAD(clientmap, clientmap_entry_t) client_history =
HT_INITIALIZER();
-/** Time at which we started tracking client history. */
+/** Time at which we started tracking client IP history. */
static time_t client_history_starts = 0;
+/** DOCDOC */
+static time_t current_request_period_starts = 0;
+static int n_old_request_periods = 0;
+
/** Hashtable helper: compute a hash of a clientmap_entry_t. */
static INLINE unsigned
clientmap_entry_hash(const clientmap_entry_t *a)
@@ -268,8 +287,23 @@ geoip_note_client_seen(geoip_client_action_t action,
#endif
}
+ /* DOCDOC */
+ while (current_request_period_starts + REQUEST_HIST_PERIOD >= now) {
+ SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, {
+ memmove(&c->n_v2_ns_requests[0], &c->n_v2_ns_requests[1],
+ sizeof(uint32_t)*(REQUEST_HIST_LEN-1));
+ memmove(&c->n_v3_ns_requests[0], &c->n_v3_ns_requests[1],
+ sizeof(uint32_t)*(REQUEST_HIST_LEN-1));
+ c->n_v2_ns_requests[REQUEST_HIST_LEN-1] = 0;
+ c->n_v3_ns_requests[REQUEST_HIST_LEN-1] = 0;
+ });
+ current_request_period_starts += REQUEST_HIST_PERIOD;
+ if (n_old_request_periods < REQUEST_HIST_PERIOD-1)
+ ++n_old_request_periods;
+ }
+
/* We use the low 3 bits of the time to encode the action. Since we're
- * potentially remembering times of clients, we don't want to make
+ * potentially remembering tons of clients, we don't want to make
* clientmap_entry_t larger than it has to be. */
now = (now & ~ACTION_MASK) | (((int)action) & ACTION_MASK);
lookup.ipaddr = addr;
@@ -282,8 +316,23 @@ geoip_note_client_seen(geoip_client_action_t action,
ent->last_seen = now;
HT_INSERT(clientmap, &client_history, ent);
}
- if (!client_history_starts)
+
+ if (action == GEOIP_CLIENT_NETWORKSTATUS ||
+ action == GEOIP_CLIENT_NETWORKSTATUS_V2) {
+ int country_idx = geoip_get_country_by_ip(addr);
+ if (country_idx >= 0 && country_idx < smartlist_len(geoip_countries)) {
+ geoip_country_t *country = smartlist_get(geoip_countries, country_idx);
+ if (action == GEOIP_CLIENT_NETWORKSTATUS)
+ ++country->n_v3_ns_requests[REQUEST_HIST_LEN-1];
+ else
+ ++country->n_v2_ns_requests[REQUEST_HIST_LEN-1];
+ }
+ }
+
+ if (!client_history_starts) {
client_history_starts = now;
+ current_request_period_starts = now;
+ }
}
/** HT_FOREACH helper: remove a clientmap_entry_t from the hashtable if it's
@@ -350,6 +399,9 @@ _c_hist_compare(const void **_a, const void **_b)
return strcmp(a->country, b->country);
}
+/*DOCDOC*/
+#define GEOIP_MIN_OBSERVATION_TIME (12*60*60)
+
/** Return a newly allocated comma-separated string containing entries for all
* the countries from which we've seen enough clients connect. The entry
* format is cc=num where num is the number of IPs we've seen connecting from
@@ -361,7 +413,7 @@ geoip_get_client_history(time_t now, geoip_client_action_t action)
char *result = NULL;
if (!geoip_is_loaded())
return NULL;
- if (client_history_starts < (now - 12*60*60)) {
+ if (client_history_starts < (now - GEOIP_MIN_OBSERVATION_TIME)) {
char buf[32];
smartlist_t *chunks = NULL;
smartlist_t *entries = NULL;
@@ -435,11 +487,43 @@ geoip_get_client_history(time_t now, geoip_client_action_t action)
return result;
}
+ /**DOCDOC*/
+char *
+geoip_get_request_history(time_t now, geoip_client_action_t action)
+{
+ smartlist_t *entries;
+ char *result;
+ if (client_history_starts >= (now - GEOIP_MIN_OBSERVATION_TIME))
+ return NULL;
+ if (action != GEOIP_CLIENT_NETWORKSTATUS &&
+ action != GEOIP_CLIENT_NETWORKSTATUS_V2)
+ return NULL;
+ if (!geoip_countries)
+ return NULL;
+ entries = smartlist_create();
+ SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, {
+ uint32_t *n = (action == GEOIP_CLIENT_NETWORKSTATUS)
+ ? c->n_v3_ns_requests : c->n_v2_ns_requests;
+ uint32_t tot = 0;
+ int i;
+ char buf[32];
+ for (i=0; i < REQUEST_HIST_LEN; ++i)
+ tot += n[i];
+ tor_snprintf(buf, sizeof(buf), "%s=%ld", c->countrycode, (long)n);
+ smartlist_add(entries, tor_strdup(buf));
+ });
+ smartlist_sort_strings(entries);
+ result = smartlist_join_strings(entries, ",", 0, NULL);
+ SMARTLIST_FOREACH(entries, char *, cp, tor_free(cp));
+ return result;
+}
+
void
dump_geoip_stats(void)
{
#ifdef ENABLE_GEOIP_STATS
time_t now = time(NULL);
+ time_t request_start;
char *filename = get_datadir_fname("geoip-stats");
char *data_v2 = NULL, *data_v3 = NULL;
char since[ISO_TIME_LEN+1], written[ISO_TIME_LEN+1];
@@ -450,13 +534,25 @@ dump_geoip_stats(void)
data_v3 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS);
format_iso_time(since, geoip_get_history_start());
format_iso_time(written, now);
- if (!data_v2 || !data_v3)
- goto done;
- out = start_writing_to_stdio_file(filename, 0, 0600, &open_file);
+ out = start_writing_to_stdio_file(filename, OPEN_FLAGS_REPLACE,
+ 0600, &open_file);
if (!out)
goto done;
- if (fprintf(out, "written %s\nstarted-at %s\nns %s\nns-v2%s\n",
- written, since, data_v3, data_v2) < 0)
+ if (fprintf(out, "written %s\nstarted-at %s\nns-ips %s\nns-v2-ips%s\n",
+ written, since,
+ data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0)
+ goto done;
+ tor_free(data_v2);
+ tor_free(data_v3);
+
+ request_start = current_request_period_starts -
+ (n_old_request_periods * REQUEST_HIST_PERIOD);
+ format_iso_time(since, request_start);
+ data_v2 = geoip_get_request_history(now, GEOIP_CLIENT_NETWORKSTATUS_V2);
+ data_v3 = geoip_get_request_history(now, GEOIP_CLIENT_NETWORKSTATUS);
+ if (fprintf(out, "requests-start %s\nn-ns-reqs %s\nn-v2-ns_reqs%s\n",
+ since,
+ data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0)
goto done;
finish_writing_to_file(open_file);
@@ -495,7 +591,7 @@ static void
clear_geoip_db(void)
{
if (geoip_countries) {
- SMARTLIST_FOREACH(geoip_countries, char *, cp, tor_free(cp));
+ SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, tor_free(c));
smartlist_free(geoip_countries);
}
if (country_idxplus1_by_lc_code)
diff --git a/src/or/or.h b/src/or/or.h
index 4ef2ed1cf4..3b7c6b5912 100644
--- a/src/or/or.h
+++ b/src/or/or.h
@@ -3342,6 +3342,7 @@ void geoip_note_client_seen(geoip_client_action_t action,
void geoip_remove_old_clients(time_t cutoff);
time_t geoip_get_history_start(void);
char *geoip_get_client_history(time_t now, geoip_client_action_t action);
+char *geoip_get_request_history(time_t now, geoip_client_action_t action);
int getinfo_helper_geoip(control_connection_t *control_conn,
const char *question, char **answer);
void geoip_free_all(void);