summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoger Dingledine <arma@torproject.org>2004-12-05 07:10:08 +0000
committerRoger Dingledine <arma@torproject.org>2004-12-05 07:10:08 +0000
commitef6c9d18e799e5b02505ba73bbf36bfe92ce5a8b (patch)
tree24e864902bc7196fb3e2ca1a39fc7463c2582d1c
parent32e74d352500dc228a1de5d5bc97e219897ef09b (diff)
downloadtor-ef6c9d18e799e5b02505ba73bbf36bfe92ce5a8b.tar.gz
tor-ef6c9d18e799e5b02505ba73bbf36bfe92ce5a8b.zip
New circuit building strategy: keep a list of ports that we've used in the past 6 hours, and always try to have 2 circuits open or on the way
that will handle each such port. (We can extend this to include addresses if exit policies shift to require that.) Seed us with port 80 so web browsers won't complain that Tor is "slow to start up". This was necessary because our old circuit building strategy just involved counting circuits, and as time went by we would build up a big pile of circuits that had peculiar exit policies (e.g. only exit to 9001-9100) which would take up space in the circuit pile but never get used. Fix router_compare_addr_to_addr_policy: it was not treating a port of * as always matching, so we were picking reject *:* nodes as exit nodes too. If you haven't used a clean circuit in an hour, throw it away, just to be on the safe side. This means after 6 hours a totally unused Tor client will have no circuits open. svn:r3078
-rw-r--r--src/or/circuitbuild.c87
-rw-r--r--src/or/circuituse.c70
-rw-r--r--src/or/connection_edge.c3
-rw-r--r--src/or/directory.c4
-rw-r--r--src/or/or.h6
-rw-r--r--src/or/rephist.c77
-rw-r--r--src/or/routerlist.c24
7 files changed, 228 insertions, 43 deletions
diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c
index 43dccf7dbb..373f1c78a8 100644
--- a/src/or/circuitbuild.c
+++ b/src/or/circuitbuild.c
@@ -787,6 +787,56 @@ static int new_route_len(double cw, uint8_t purpose, smartlist_t *routers) {
return routelen;
}
+/** Fetch the list of predicted ports, turn it into a smartlist of
+ * strings, remove the ones that are already handled by an
+ * existing circuit, and return it.
+ */
+static smartlist_t *
+circuit_get_unhandled_ports(time_t now) {
+ char *pp = rep_hist_get_predicted_ports(now);
+ smartlist_t *needed_ports = smartlist_create();
+ smartlist_split_string(needed_ports, pp, " ", SPLIT_SKIP_SPACE|SPLIT_IGNORE_BLANK, 0);
+ tor_free(pp);
+
+ circuit_remove_handled_ports(needed_ports);
+ return needed_ports;
+}
+
+/** Return 1 if we already have circuits present or on the way for
+ * all anticipated ports. Return 0 if we should make more.
+ */
+int
+circuit_all_predicted_ports_handled(time_t now) {
+ int enough;
+ smartlist_t *sl = circuit_get_unhandled_ports(now);
+ enough = (smartlist_len(sl) == 0);
+ smartlist_free(sl);
+ return enough;
+}
+
+/** Return 1 if <b>router</b> can handle one or more of the ports in
+ * <b>needed_ports</b>, else return 0.
+ */
+static int
+router_handles_some_port(routerinfo_t *router, smartlist_t *needed_ports) {
+ int i;
+ uint16_t port;
+
+ for (i = 0; i < smartlist_len(needed_ports); ++i) {
+ port = *(uint16_t *)smartlist_get(needed_ports, i);
+ tor_assert(port);
+ if (router_compare_addr_to_addr_policy(0, port, router->exit_policy) !=
+ ADDR_POLICY_REJECTED)
+ return 1;
+ }
+ return 0;
+}
+
+/** How many circuits do we want simultaneously in-progress to handle
+ * a given stream?
+ */
+#define MIN_CIRCUITS_HANDLING_STREAM 2
+
/** Return a pointer to a suitable router to be the exit node for the
* general-purpose circuit we're about to build.
*
@@ -820,7 +870,7 @@ static routerinfo_t *choose_good_exit_server_general(routerlist_t *dir)
if (carray[i]->type == CONN_TYPE_AP &&
carray[i]->state == AP_CONN_STATE_CIRCUIT_WAIT &&
!carray[i]->marked_for_close &&
- !circuit_stream_is_being_handled(carray[i]))
+ !circuit_stream_is_being_handled(carray[i], 0, MIN_CIRCUITS_HANDLING_STREAM))
++n_pending_connections;
}
// log_fn(LOG_DEBUG, "Choosing exit node; %d connections are pending",
@@ -873,7 +923,7 @@ static routerinfo_t *choose_good_exit_server_general(routerlist_t *dir)
if (carray[j]->type != CONN_TYPE_AP ||
carray[j]->state != AP_CONN_STATE_CIRCUIT_WAIT ||
carray[j]->marked_for_close ||
- circuit_stream_is_being_handled(carray[j]))
+ circuit_stream_is_being_handled(carray[j], 0, MIN_CIRCUITS_HANDLING_STREAM))
continue; /* Skip everything but APs in CIRCUIT_WAIT */
if (connection_ap_can_use_exit(carray[j], router)) {
++n_supported[i];
@@ -920,18 +970,35 @@ static routerinfo_t *choose_good_exit_server_general(routerlist_t *dir)
router = routerlist_sl_choose_by_bandwidth(sl);
} else {
/* Either there are no pending connections, or no routers even seem to
- * possibly support any of them. Choose a router at random. */
+ * possibly support any of them. Choose a router at random that satisfies
+ * at least one predicted exit port. */
+
+ int try;
+ smartlist_t *needed_ports = circuit_get_unhandled_ports(time(NULL));
+
if (best_support == -1) {
log(LOG_WARN, "All routers are down or middleman -- choosing a doomed exit at random.");
}
- for (i = 0; i < smartlist_len(dir->routers); i++)
- if (n_supported[i] != -1)
- smartlist_add(sl, smartlist_get(dir->routers, i));
+ for (try = 0; try < 2; try++) {
+ /* try once to pick only from routers that satisfy a needed port,
+ * then if there are none, pick from any that support exiting. */
+ for (i = 0; i < smartlist_len(dir->routers); i++) {
+ router = smartlist_get(dir->routers, i);
+ if (n_supported[i] != -1 &&
+ (try || router_handles_some_port(router, needed_ports))) {
+ log_fn(LOG_DEBUG,"Try %d: '%s' is a possibility.", try, router->nickname);
+ smartlist_add(sl, router);
+ }
+ }
- smartlist_subtract(sl,excludedexits);
- if (options->StrictExitNodes || smartlist_overlap(sl,preferredexits))
- smartlist_intersect(sl,preferredexits);
- router = routerlist_sl_choose_by_bandwidth(sl);
+ smartlist_subtract(sl,excludedexits);
+ if (options->StrictExitNodes || smartlist_overlap(sl,preferredexits))
+ smartlist_intersect(sl,preferredexits);
+ router = routerlist_sl_choose_by_bandwidth(sl);
+ if (router)
+ break;
+ }
+ smartlist_free(needed_ports);
}
smartlist_free(preferredexits);
diff --git a/src/or/circuituse.c b/src/or/circuituse.c
index 64604679da..e316522d0f 100644
--- a/src/or/circuituse.c
+++ b/src/or/circuituse.c
@@ -249,16 +249,34 @@ void circuit_expire_building(time_t now) {
}
}
-/** How many circuits do we want simultaneously in-progress to handle
- * a given stream?
+/** Remove any elements in <b>needed_ports</b> that are handled by an
+ * open or in-progress circuit.
*/
-#define MIN_CIRCUITS_HANDLING_STREAM 2
+void
+circuit_remove_handled_ports(smartlist_t *needed_ports) {
+ int i;
+ uint16_t port;
+ char *portstring;
+
+ for (i = 0; i < smartlist_len(needed_ports); ++i) {
+ portstring = smartlist_get(needed_ports, i);
+ port = *(uint16_t*)(portstring);
+ tor_assert(port);
+ if (circuit_stream_is_being_handled(NULL, port, 2)) {
+// log_fn(LOG_DEBUG,"Port %d is already being handled; removing.", port);
+ smartlist_del(needed_ports, i--);
+ tor_free(portstring);
+ } else {
+ log_fn(LOG_DEBUG,"Port %d is not handled.", port);
+ }
+ }
+}
-/** Return 1 if at least MIN_CIRCUITS_HANDLING_STREAM non-open
- * general-purpose circuits will have an acceptable exit node for
- * conn. Else return 0.
+/** Return 1 if at least <b>min</b> general-purpose circuits will have
+ * an acceptable exit node for conn if conn is defined, else for "*:port".
+ * Else return 0.
*/
-int circuit_stream_is_being_handled(connection_t *conn) {
+int circuit_stream_is_being_handled(connection_t *conn, uint16_t port, int min) {
circuit_t *circ;
routerinfo_t *exitrouter;
int num=0;
@@ -266,15 +284,19 @@ int circuit_stream_is_being_handled(connection_t *conn) {
for (circ=global_circuitlist;circ;circ = circ->next) {
if (CIRCUIT_IS_ORIGIN(circ) &&
- circ->state != CIRCUIT_STATE_OPEN &&
!circ->marked_for_close &&
circ->purpose == CIRCUIT_PURPOSE_C_GENERAL &&
(!circ->timestamp_dirty ||
circ->timestamp_dirty + get_options()->NewCircuitPeriod < now)) {
exitrouter = router_get_by_digest(circ->build_state->chosen_exit_digest);
- if (exitrouter && connection_ap_can_use_exit(conn, exitrouter))
- if (++num >= MIN_CIRCUITS_HANDLING_STREAM)
+ if (exitrouter &&
+ ((conn && connection_ap_can_use_exit(conn, exitrouter)) ||
+ (!conn &&
+ router_compare_addr_to_addr_policy(0, port, exitrouter->exit_policy) !=
+ ADDR_POLICY_REJECTED))) {
+ if (++num >= min)
return 1;
+ }
}
}
return 0;
@@ -316,6 +338,7 @@ void circuit_build_needed_circs(time_t now) {
}
}
+#if 0
/** How many simultaneous in-progress general-purpose circuits do we
* want to be building at once, if there are no open general-purpose
* circuits?
@@ -327,6 +350,14 @@ void circuit_build_needed_circs(time_t now) {
< CIRCUIT_MIN_BUILDING_GENERAL) {
circuit_launch_by_identity(CIRCUIT_PURPOSE_C_GENERAL, NULL);
}
+#endif
+
+ /* if we know of a port that's been requested recently and no
+ * circuit is currently available that can handle it, start one
+ * for that too. */
+ if (!circuit_all_predicted_ports_handled(now)) {
+ circuit_launch_by_identity(CIRCUIT_PURPOSE_C_GENERAL, NULL);
+ }
/* XXX count idle rendezvous circs and build more */
}
@@ -471,12 +502,19 @@ circuit_expire_old_circuits(void)
} else if (!circ->timestamp_dirty && CIRCUIT_IS_ORIGIN(circ) &&
circ->state == CIRCUIT_STATE_OPEN &&
circ->purpose == CIRCUIT_PURPOSE_C_GENERAL) {
- /* Also, gather a list of open unused general circuits that we created.
- * Because we add elements to the front of global_circuitlist,
- * the last elements of unused_open_circs will be the oldest
- * ones.
- */
- smartlist_add(unused_open_circs, circ);
+#define CIRCUIT_UNUSED_CIRC_TIMEOUT 3600 /* an hour */
+ if (circ->timestamp_created + CIRCUIT_UNUSED_CIRC_TIMEOUT < now) {
+ log_fn(LOG_DEBUG,"Closing circuit that has been unused for %d seconds.",
+ (int)(now - circ->timestamp_created));
+ circuit_mark_for_close(circ);
+ } else {
+ /* Also, gather a list of open unused general circuits that we created.
+ * Because we add elements to the front of global_circuitlist,
+ * the last elements of unused_open_circs will be the oldest
+ * ones.
+ */
+ smartlist_add(unused_open_circs, circ);
+ }
}
}
for (i = MAX_UNUSED_OPEN_CIRCUITS; i < smartlist_len(unused_open_circs); ++i) {
diff --git a/src/or/connection_edge.c b/src/or/connection_edge.c
index 1ef8242f5d..f25719a98a 100644
--- a/src/or/connection_edge.c
+++ b/src/or/connection_edge.c
@@ -405,6 +405,7 @@ static int connection_ap_handshake_process_socks(connection_t *conn) {
return -1;
}
conn->state = AP_CONN_STATE_CIRCUIT_WAIT;
+ rep_hist_note_used_port(socks->port, time(NULL)); /* help predict this next time */
return connection_ap_handshake_attach_circuit(conn);
} else {
/* it's a hidden-service request */
@@ -1016,7 +1017,7 @@ int connection_ap_can_use_exit(connection_t *conn, routerinfo_t *exit)
} else {
addr = client_dns_lookup_entry(conn->socks_request->address);
if (router_compare_addr_to_addr_policy(addr, conn->socks_request->port,
- exit->exit_policy) < 0)
+ exit->exit_policy) == ADDR_POLICY_REJECTED)
return 0;
}
return 1;
diff --git a/src/or/directory.c b/src/or/directory.c
index b58d3d35f7..66c30960ab 100644
--- a/src/or/directory.c
+++ b/src/or/directory.c
@@ -582,7 +582,7 @@ connection_dir_client_reached_eof(connection_t *conn)
if (conn->purpose == DIR_PURPOSE_FETCH_DIR) {
/* fetch/process the directory to learn about new routers. */
- log_fn(LOG_INFO,"Received directory (size %d):\n%s", (int)body_len, body);
+ log_fn(LOG_INFO,"Received directory (size %d)", (int)body_len);
if (status_code == 503 || body_len == 0) {
log_fn(LOG_INFO,"Empty directory. Ignoring.");
tor_free(body); tor_free(headers);
@@ -606,7 +606,7 @@ connection_dir_client_reached_eof(connection_t *conn)
running_routers_t *rrs;
routerlist_t *rl;
/* just update our list of running routers, if this list is new info */
- log_fn(LOG_INFO,"Received running-routers list (size %d):\n%s", (int)body_len, body);
+ log_fn(LOG_INFO,"Received running-routers list (size %d)", (int)body_len);
if (status_code != 200) {
log_fn(LOG_WARN,"Received http status code %d from dirserver. Failing.",
status_code);
diff --git a/src/or/or.h b/src/or/or.h
index a2fb27a75b..85d7d54dfc 100644
--- a/src/or/or.h
+++ b/src/or/or.h
@@ -1048,6 +1048,7 @@ int circuit_init_cpath_crypto(crypt_path_t *cpath, char *key_data, int reverse);
int circuit_finish_handshake(circuit_t *circ, char *reply);
int circuit_truncated(circuit_t *circ, crypt_path_t *layer);
int onionskin_answer(circuit_t *circ, unsigned char *payload, unsigned char *keys);
+int circuit_all_predicted_ports_handled(time_t now);
void onion_append_to_cpath(crypt_path_t **head_ptr, crypt_path_t *new_hop);
/********************************* circuitlist.c ***********************/
@@ -1082,7 +1083,8 @@ void assert_circuit_ok(const circuit_t *c);
/********************************* circuituse.c ************************/
void circuit_expire_building(time_t now);
-int circuit_stream_is_being_handled(connection_t *conn);
+void circuit_remove_handled_ports(smartlist_t *needed_ports);
+int circuit_stream_is_being_handled(connection_t *conn, uint16_t port, int min);
void circuit_build_needed_circs(time_t now);
void circuit_detach_stream(circuit_t *circ, connection_t *conn);
void circuit_about_to_close_connection(connection_t *conn);
@@ -1447,6 +1449,8 @@ void rep_hist_note_bytes_written(int num_bytes, time_t when);
int rep_hist_bandwidth_assess(void);
char *rep_hist_get_bandwidth_lines(void);
void rep_history_clean(time_t before);
+void rep_hist_note_used_port(uint16_t port, time_t now);
+char *rep_hist_get_predicted_ports(time_t now);
/********************************* rendclient.c ***************************/
diff --git a/src/or/rephist.c b/src/or/rephist.c
index 2f1c976f08..3095ad46ac 100644
--- a/src/or/rephist.c
+++ b/src/or/rephist.c
@@ -11,6 +11,7 @@ const char rephist_c_id[] = "$Id$";
#include "or.h"
static void bw_arrays_init(void);
+static void predicted_ports_init(void);
/** History of an OR-\>OR link. */
typedef struct link_history_t {
@@ -133,6 +134,7 @@ void rep_hist_init(void)
{
history_map = strmap_new();
bw_arrays_init();
+ predicted_ports_init();
}
/** Remember that an attempt to connect to the OR with identity digest
@@ -617,3 +619,78 @@ rep_hist_get_bandwidth_lines(void)
return buf;
}
+/** A list of port numbers that have been used recently. */
+static smartlist_t *predicted_ports_list=NULL;
+/** The corresponding most recently used time for each port. */
+static smartlist_t *predicted_ports_times=NULL;
+
+static void add_predicted_port(uint16_t port, time_t now) {
+ uint16_t *tmp_port = tor_malloc(sizeof(uint16_t));
+ time_t *tmp_time = tor_malloc(sizeof(time_t));
+ *tmp_port = port;
+ *tmp_time = now;
+ smartlist_add(predicted_ports_list, tmp_port);
+ smartlist_add(predicted_ports_times, tmp_time);
+}
+
+static void predicted_ports_init(void) {
+ predicted_ports_list = smartlist_create();
+ predicted_ports_times = smartlist_create();
+ add_predicted_port(80, time(NULL)); /* add one to kickstart us */
+}
+
+/** Remember that <b>port</b> has been asked for as of time <b>now</b>.
+ * This is used for predicting what sorts of streams we'll make in the
+ * future and making circuits to anticipate that.
+ */
+void rep_hist_note_used_port(uint16_t port, time_t now) {
+ int i;
+ uint16_t *tmp_port;
+ time_t *tmp_time;
+
+ tor_assert(predicted_ports_list);
+ tor_assert(predicted_ports_times);
+
+ if(!port) /* record nothing */
+ return;
+
+ for (i = 0; i < smartlist_len(predicted_ports_list); ++i) {
+ tmp_port = smartlist_get(predicted_ports_list, i);
+ tmp_time = smartlist_get(predicted_ports_times, i);
+ if (*tmp_port == port) {
+ *tmp_time = now;
+ return;
+ }
+ }
+ /* it's not there yet; we need to add it */
+ add_predicted_port(port, now);
+}
+
+#define PREFERRED_PORTS_RELEVANCE_TIME (6*3600) /* 6 hours */
+
+/** Allocate and return a string of space-separated port numbers that
+ * are likely to be asked for in the near future.
+ */
+char *rep_hist_get_predicted_ports(time_t now) {
+ int i;
+ uint16_t *tmp_port;
+ time_t *tmp_time;
+
+ tor_assert(predicted_ports_list);
+ tor_assert(predicted_ports_times);
+
+ /* clean out obsolete entries */
+ for (i = 0; i < smartlist_len(predicted_ports_list); ++i) {
+ tmp_time = smartlist_get(predicted_ports_times, i);
+ if (*tmp_time + PREFERRED_PORTS_RELEVANCE_TIME < now) {
+ tmp_port = smartlist_get(predicted_ports_list, i);
+ smartlist_del(predicted_ports_list, i);
+ smartlist_del(predicted_ports_times, i);
+ tor_free(tmp_port);
+ tor_free(tmp_time);
+ i--;
+ }
+ }
+ return smartlist_join_strings(predicted_ports_list, " ", 0, NULL);
+}
+
diff --git a/src/or/routerlist.c b/src/or/routerlist.c
index 9a364d4898..5abca8c149 100644
--- a/src/or/routerlist.c
+++ b/src/or/routerlist.c
@@ -936,10 +936,11 @@ router_resolve_routerlist(routerlist_t *rl)
/** Decide whether a given addr:port is definitely accepted, definitely
* rejected, or neither by a given policy. If <b>addr</b> is 0, we
- * don't know the IP of the target address.
+ * don't know the IP of the target address. If <b>port</b> is 0, we
+ * don't know the port of the target address.
*
- * Returns -1 for "rejected", 0 for "accepted", 1 for "maybe" (since IP is
- * unknown).
+ * Returns -1 for "rejected", 0 for "accepted", 1 for "maybe" (since IP or
+ * port is unknown).
*/
int router_compare_addr_to_addr_policy(uint32_t addr, uint16_t port,
addr_policy_t *policy)
@@ -948,7 +949,6 @@ int router_compare_addr_to_addr_policy(uint32_t addr, uint16_t port,
int maybe_accept = 0;
int match = 0;
int maybe = 0;
- struct in_addr in;
addr_policy_t *tmpe;
for (tmpe=policy; tmpe; tmpe=tmpe->next) {
@@ -956,7 +956,8 @@ int router_compare_addr_to_addr_policy(uint32_t addr, uint16_t port,
maybe = 0;
if (!addr) {
/* Address is unknown. */
- if (port >= tmpe->prt_min && port <= tmpe->prt_max) {
+ if ((port >= tmpe->prt_min && port <= tmpe->prt_max) ||
+ (!port && tmpe->prt_min<=1 && tmpe->prt_max>=65535)) {
/* The port definitely matches. */
if (tmpe->msk == 0) {
match = 1;
@@ -965,10 +966,6 @@ int router_compare_addr_to_addr_policy(uint32_t addr, uint16_t port,
}
} else if (!port) {
/* The port maybe matches. */
- /* XXX Nick: it looks port 0 only means something special for resolve
- * commands, which can currently be handled by any exit node.
- * Should we treat those specially elsewhere?
- */
maybe = 1;
}
} else {
@@ -989,9 +986,10 @@ int router_compare_addr_to_addr_policy(uint32_t addr, uint16_t port,
maybe_accept = 1;
}
if (match) {
- in.s_addr = htonl(addr);
- log_fn(LOG_DEBUG,"Address %s:%d matches policy '%s'",
- inet_ntoa(in), port, tmpe->string);
+// struct in_addr in;
+// in.s_addr = htonl(addr);
+// log_fn(LOG_DEBUG,"Address %s:%d matches policy '%s'",
+// inet_ntoa(in), port, tmpe->string);
if (tmpe->policy_type == ADDR_POLICY_ACCEPT) {
/* If we already hit a clause that might trigger a 'reject', than we
* can't be sure of this certain 'accept'.*/
@@ -1024,7 +1022,7 @@ int router_exit_policy_all_routers_reject(uint32_t addr, uint16_t port) {
/** Return true iff <b>router</b> does not permit exit streams.
*/
int router_exit_policy_rejects_all(routerinfo_t *router) {
- return router_compare_addr_to_addr_policy(0, 1, router->exit_policy)
+ return router_compare_addr_to_addr_policy(0, 0, router->exit_policy)
== ADDR_POLICY_REJECTED;
}