summaryrefslogtreecommitdiff
path: root/src/or
diff options
context:
space:
mode:
Diffstat (limited to 'src/or')
-rw-r--r--src/or/buffers.c20
-rw-r--r--src/or/circuitstats.c13
-rw-r--r--src/or/command.c20
-rw-r--r--src/or/connection_edge.c45
-rw-r--r--src/or/control.c26
-rw-r--r--src/or/cpuworker.c6
-rw-r--r--src/or/dircollate.c34
-rw-r--r--src/or/dirserv.c18
-rw-r--r--src/or/dns.c53
-rw-r--r--src/or/dns_structs.h12
-rw-r--r--src/or/fp_pair.c8
-rw-r--r--src/or/geoip.c18
-rw-r--r--src/or/hibernate.c24
-rw-r--r--src/or/keypin.c12
-rw-r--r--src/or/ntmain.c10
-rw-r--r--src/or/onion.c52
-rw-r--r--src/or/onion_fast.c18
-rw-r--r--src/or/onion_ntor.c11
-rw-r--r--src/or/onion_tap.c12
-rw-r--r--src/or/periodic.c4
-rw-r--r--src/or/protover.c24
-rw-r--r--src/or/protover.h7
-rw-r--r--src/or/reasons.c6
-rw-r--r--src/or/rephist.c70
-rw-r--r--src/or/replaycache.c12
-rw-r--r--src/or/routerlist.c86
-rw-r--r--src/or/routerparse.c63
-rw-r--r--src/or/routerset.c14
-rw-r--r--src/or/statefile.c17
-rw-r--r--src/or/status.c8
-rw-r--r--src/or/tor_main.c6
31 files changed, 694 insertions, 35 deletions
diff --git a/src/or/buffers.c b/src/or/buffers.c
index c08da63a0d..412879606a 100644
--- a/src/or/buffers.c
+++ b/src/or/buffers.c
@@ -6,10 +6,22 @@
/**
* \file buffers.c
- * \brief Implements a generic interface buffer. Buffers are
- * fairly opaque string holders that can read to or flush from:
- * memory, file descriptors, or TLS connections. Buffers are implemented
- * as linked lists of memory chunks.
+ * \brief Implements a generic buffer interface.
+ *
+ * A buf_t is a (fairly) opaque byte-oriented FIFO that can read to or flush
+ * from memory, sockets, file descriptors, TLS connections, or another buf_t.
+ * Buffers are implemented as linked lists of memory chunks.
+ *
+ * All socket-backed and TLS-based connection_t objects have a pair of
+ * buffers: one for incoming data, and one for outcoming data. These are fed
+ * and drained from functions in connection.c, trigged by events that are
+ * monitored in main.c.
+ *
+ * This module has basic support for reading and writing on buf_t objects. It
+ * also contains specialized functions for handling particular protocols
+ * on a buf_t backend, including SOCKS (used in connection_edge.c), Tor cells
+ * (used in connection_or.c and channeltls.c), HTTP (used in directory.c), and
+ * line-oriented communication (used in control.c).
**/
#define BUFFERS_PRIVATE
#include "or.h"
diff --git a/src/or/circuitstats.c b/src/or/circuitstats.c
index 3d64113521..418acc0024 100644
--- a/src/or/circuitstats.c
+++ b/src/or/circuitstats.c
@@ -9,6 +9,18 @@
*
* \brief Maintains and analyzes statistics about circuit built times, so we
* can tell how long we may need to wait for a fast circuit to be constructed.
+ *
+ * By keeping these statistics, a client learns when it should time out a slow
+ * circuit for being too slow, and when it should keep a circuit open in order
+ * to wait for it to complete.
+ *
+ * The information here is kept in a circuit_built_times_t structure, which is
+ * currently a singleton, but doesn't need to be. It's updated by calls to
+ * circuit_build_times_count_timeout() from circuituse.c,
+ * circuit_build_times_count_close() from circuituse.c, and
+ * circuit_build_times_add_time() from circuitbuild.c, and inspected by other
+ * calls into this module, mostly from circuitlist.c. Observations are
+ * persisted to disk via the or_state_t-related calls.
*/
#define CIRCUITSTATS_PRIVATE
@@ -329,7 +341,6 @@ circuit_build_times_min_timeout(void)
"circuit_build_times_min_timeout() called, cbtmintimeout is %d",
num);
}
-
return num;
}
diff --git a/src/or/command.c b/src/or/command.c
index 5ad92bed1e..5866c386e4 100644
--- a/src/or/command.c
+++ b/src/or/command.c
@@ -7,6 +7,26 @@
/**
* \file command.c
* \brief Functions for processing incoming cells.
+ *
+ * When we receive a cell from a client or a relay, it arrives on some
+ * channel, and tells us what to do with it. In this module, we dispatch based
+ * on the cell type using the functions command_process_cell() and
+ * command_process_var_cell(), and deal with the cell accordingly. (These
+ * handlers are installed on a channel with the command_setup_channel()
+ * function.)
+ *
+ * Channels have a chance to handle some cell types on their own before they
+ * are ever passed here --- typically, they do this for cells that are
+ * specific to a given channel type. For example, in channeltls.c, the cells
+ * for the initial connection handshake are handled before we get here. (Of
+ * course, the fact that there _is_ only one channel type for now means that
+ * we may have gotten the factoring wrong here.)
+ *
+ * Handling other cell types is mainly farmed off to other modules, after
+ * initial sanity-checking. CREATE* cells are handled ultimately in onion.c,
+ * CREATED* cells trigger circuit creation in circuitbuild.c, DESTROY cells
+ * are handled here (since they're simple), and RELAY cells, in all their
+ * complexity, are passed off to relay.c.
**/
/* In-points to command.c:
diff --git a/src/or/connection_edge.c b/src/or/connection_edge.c
index 788b7ee066..24842e4107 100644
--- a/src/or/connection_edge.c
+++ b/src/or/connection_edge.c
@@ -7,6 +7,51 @@
/**
* \file connection_edge.c
* \brief Handle edge streams.
+ *
+ * An edge_connection_t is a subtype of a connection_t, and represents two
+ * critical concepts in Tor: a stream, and an edge connection. From the Tor
+ * protocol's point of view, a stream is a bi-directional channel that is
+ * multiplexed on a single circuit. Each stream on a circuit is identified
+ * with a separate 16-bit stream ID, local to the (circuit,exit) pair.
+ * Streams are created in response to client requests.
+ *
+ * An edge connection is one thing that can implement a stream: it is either a
+ * TCP application socket that has arrived via (e.g.) a SOCKS request, or an
+ * exit connection.
+ *
+ * Not every instance of edge_connection_t truly represents an edge connction,
+ * however. (Sorry!) We also create edge_connection_t objects for streams that
+ * we will not be handling with TCP. The types of these streams are:
+ * <ul>
+ * <li>DNS lookup streams, created on the client side in response to
+ * a UDP DNS request received on a DNSPort, or a RESOLVE command
+ * on a controller.
+ * <li>DNS lookup streams, created on the exit side in response to
+ * a RELAY_RESOLVE cell from a client.
+ * <li>Tunneled directory streams, created on the directory cache side
+ * in response to a RELAY_BEGINDIR cell. These streams attach directly
+ * to a dir_connection_t object without ever using TCP.
+ * </ul>
+ *
+ * This module handles general-purpose functionality having to do with
+ * edge_connection_t. On the client side, it accepts various types of
+ * application requests on SocksPorts, TransPorts, and NATDPorts, and
+ * creates streams appropriately.
+ *
+ * This module is also responsible for implementing stream isolation:
+ * ensuring that streams that should not be linkable to one another are
+ * kept to different circuits.
+ *
+ * On the exit side, this module handles the various stream-creating
+ * type of RELAY cells by launching appropriate outgoing connections,
+ * DNS requests, or directory connection objects.
+ *
+ * And for all edge connections, this module is responsible for handling
+ * incoming and outdoing data as it arrives or leaves in the relay.c
+ * module. (Outgoing data will be packaged in
+ * connection_edge_process_inbuf() as it calls
+ * connection_edge_package_raw_inbuf(); incoming data from RELAY_DATA
+ * cells is applied in connection_edge_process_relay_cell().)
**/
#define CONNECTION_EDGE_PRIVATE
diff --git a/src/or/control.c b/src/or/control.c
index 8962075e1e..a4f8e3713b 100644
--- a/src/or/control.c
+++ b/src/or/control.c
@@ -5,7 +5,31 @@
/**
* \file control.c
* \brief Implementation for Tor's control-socket interface.
- * See doc/spec/control-spec.txt for full details on protocol.
+ *
+ * A "controller" is an external program that monitors and controls a Tor
+ * instance via a text-based protocol. It connects to Tor via a connection
+ * to a local socket.
+ *
+ * The protocol is line-driven. The controller sends commands terminated by a
+ * CRLF. Tor sends lines that are either <em>replies</em> to what the
+ * controller has said, or <em>events</em> that Tor sends to the controller
+ * asynchronously based on occurrences in the Tor network model.
+ *
+ * See the control-spec.txt file in the torspec.git repository for full
+ * details on protocol.
+ *
+ * This module generally has two kinds of entry points: those based on having
+ * received a command on a controller socket, which are handled in
+ * connection_control_process_inbuf(), and dispatched to individual functions
+ * with names like control_handle_COMMANDNAME(); and those based on events
+ * that occur elsewhere in Tor, which are handled by functions with names like
+ * control_event_EVENTTYPE().
+ *
+ * Controller events are not sent immediately; rather, they are inserted into
+ * the queued_control_events array, and flushed later from
+ * flush_queued_events_cb(). Doing this simplifies our callgraph greatly,
+ * by limiting the number of places in Tor that can call back into the network
+ * stack.
**/
#define CONTROL_PRIVATE
diff --git a/src/or/cpuworker.c b/src/or/cpuworker.c
index 2e76ea5b78..26bc54b55c 100644
--- a/src/or/cpuworker.c
+++ b/src/or/cpuworker.c
@@ -8,7 +8,11 @@
* \brief Uses the workqueue/threadpool code to farm CPU-intensive activities
* out to subprocesses.
*
- * Right now, we only use this for processing onionskins.
+ * The multithreading backend for this module is in workqueue.c; this module
+ * specializes workqueue.c.
+ *
+ * Right now, we only use this for processing onionskins, and invoke it mostly
+ * from onion.c.
**/
#include "or.h"
#include "channel.h"
diff --git a/src/or/dircollate.c b/src/or/dircollate.c
index c6afd95926..033a7afe0f 100644
--- a/src/or/dircollate.c
+++ b/src/or/dircollate.c
@@ -8,6 +8,17 @@
*
* \brief Collation code for figuring out which identities to vote for in
* the directory voting process.
+ *
+ * During the consensus calculation, when an authority is looking at the vote
+ * documents from all the authorities, it needs to compute the consensus for
+ * each relay listed by at least one authority. But the notion of "each
+ * relay" can be tricky: some relays have Ed25519 keys, and others don't.
+ *
+ * Moreover, older consensus methods did RSA-based ID collation alone, and
+ * ignored Ed25519 keys. We need to support those too until we're completely
+ * sure that authorities will never downgrade.
+ *
+ * This module is invoked exclusively from dirvote.c.
*/
#define DIRCOLLATE_PRIVATE
@@ -21,6 +32,9 @@ static void dircollator_collate_by_ed25519(dircollator_t *dc);
* RSA SHA1 digest) to an array of vote_routerstatus_t. */
typedef struct ddmap_entry_s {
HT_ENTRY(ddmap_entry_s) node;
+ /** A SHA1-RSA1024 identity digest and Ed25519 identity key,
+ * concatenated. (If there is no ed25519 identity key, there is no
+ * entry in this table.) */
uint8_t d[DIGEST_LEN + DIGEST256_LEN];
/* The nth member of this array corresponds to the vote_routerstatus_t (if
* any) received for this digest pair from the nth voter. */
@@ -43,12 +57,16 @@ ddmap_entry_new(int n_votes)
sizeof(vote_routerstatus_t *) * n_votes);
}
+/** Helper: compute a hash of a single ddmap_entry_t's identity (or
+ * identities) */
static unsigned
ddmap_entry_hash(const ddmap_entry_t *ent)
{
return (unsigned) siphash24g(ent->d, sizeof(ent->d));
}
+/** Helper: return true if <b>a</b> and <b>b</b> have the same
+ * identity/identities. */
static unsigned
ddmap_entry_eq(const ddmap_entry_t *a, const ddmap_entry_t *b)
{
@@ -56,7 +74,7 @@ ddmap_entry_eq(const ddmap_entry_t *a, const ddmap_entry_t *b)
}
/** Record the RSA identity of <b>ent</b> as <b>rsa_sha1</b>, and the
- * ed25519 identity as <b>ed25519</b>. */
+ * ed25519 identity as <b>ed25519</b>. Both must be provided. */
static void
ddmap_entry_set_digests(ddmap_entry_t *ent,
const uint8_t *rsa_sha1,
@@ -72,8 +90,12 @@ HT_GENERATE2(double_digest_map, ddmap_entry_s, node, ddmap_entry_hash,
ddmap_entry_eq, 0.6, tor_reallocarray, tor_free_)
/** Helper: add a single vote_routerstatus_t <b>vrs</b> to the collator
- * <b>dc</b>, indexing it by its RSA key digest, and by the 2-tuple of
- * its RSA key digest and Ed25519 key. */
+ * <b>dc</b>, indexing it by its RSA key digest, and by the 2-tuple of its RSA
+ * key digest and Ed25519 key. It must come from the <b>vote_num</b>th
+ * vote.
+ *
+ * Requires that the vote is well-formed -- that is, that it has no duplicate
+ * routerstatus entries. We already checked for that when parsing the vote. */
static void
dircollator_add_routerstatus(dircollator_t *dc,
int vote_num,
@@ -82,9 +104,12 @@ dircollator_add_routerstatus(dircollator_t *dc,
{
const char *id = vrs->status.identity_digest;
+ /* Clear this flag; we might set it later during the voting process */
vrs->ed25519_reflects_consensus = 0;
- (void) vote;
+ (void) vote; // We don't currently need this.
+
+ /* First, add this item to the appropriate RSA-SHA-Id array. */
vote_routerstatus_t **vrs_lst = digestmap_get(dc->by_rsa_sha1, id);
if (NULL == vrs_lst) {
vrs_lst = tor_calloc(dc->n_votes, sizeof(vote_routerstatus_t *));
@@ -98,6 +123,7 @@ dircollator_add_routerstatus(dircollator_t *dc,
if (! vrs->has_ed25519_listing)
return;
+ /* Now add it to the appropriate <Ed,RSA-SHA-Id> array. */
ddmap_entry_t search, *found;
memset(&search, 0, sizeof(search));
ddmap_entry_set_digests(&search, (const uint8_t *)id, ed);
diff --git a/src/or/dirserv.c b/src/or/dirserv.c
index e8d60d0db8..41945fe1d1 100644
--- a/src/or/dirserv.c
+++ b/src/or/dirserv.c
@@ -36,6 +36,24 @@
* \file dirserv.c
* \brief Directory server core implementation. Manages directory
* contents and generates directories.
+ *
+ * This module implements most of directory cache functionality, and some of
+ * the directory authority functionality. The directory.c module delegates
+ * here in order to handle incoming requests from clients, via
+ * connection_dirserv_flushed_some() and its kin. In order to save RAM, this
+ * module is reponsible for spooling directory objects (in whole or in part)
+ * onto buf_t instances, and then closing the dir_connection_t once the
+ * objects are totally flushed.
+ *
+ * The directory.c module also delegates here for handling descriptor uploads
+ * via dirserv_add_multiple_descriptors().
+ *
+ * Additionally, this module handles some aspects of voting, including:
+ * deciding how to vote on individual flags (based on decisions reached in
+ * rephist.c), of formatting routerstatus lines, and deciding what relays to
+ * include in an authority's vote. (TODO: Those functions could profitably be
+ * split off. They only live in this file because historically they were
+ * shared among the v1, v2, and v3 directory code.)
*/
/** How far in the future do we allow a router to get? (seconds) */
diff --git a/src/or/dns.c b/src/or/dns.c
index aaffad77fc..5f9813b912 100644
--- a/src/or/dns.c
+++ b/src/or/dns.c
@@ -9,6 +9,42 @@
* This is implemented as a wrapper around Adam Langley's eventdns.c code.
* (We can't just use gethostbyname() and friends because we really need to
* be nonblocking.)
+ *
+ * There are three main cases when a Tor relay uses dns.c to launch a DNS
+ * request:
+ * <ol>
+ * <li>To check whether the DNS server is working more or less correctly.
+ * This happens via dns_launch_correctness_checks(). The answer is
+ * reported in the return value from later calls to
+ * dns_seems_to_be_broken().
+ * <li>When a client has asked the relay, in a RELAY_BEGIN cell, to connect
+ * to a given server by hostname. This happens via dns_resolve().
+ * <li>When a client has asked the rela, in a RELAY_RESOLVE cell, to look
+ * up a given server's IP address(es) by hostname. This also happens via
+ * dns_resolve().
+ * </ol>
+ *
+ * Each of these gets handled a little differently.
+ *
+ * To check for correctness, we look up some hostname we expect to exist and
+ * have real entries, some hostnames which we expect to definitely not exist,
+ * and some hostnames that we expect to probably not exist. If too many of
+ * the hostnames that shouldn't exist do exist, that's a DNS hijacking
+ * attempt. If too many of the hostnames that should exist have the same
+ * addresses as the ones that shouldn't exist, that's a very bad DNS hijacking
+ * attempt, or a very naughty captive portal. And if the hostnames that
+ * should exist simply don't exist, we probably have a broken nameserver.
+ *
+ * To handle client requests, we first check our cache for answers. If there
+ * isn't something up-to-date, we've got to launch A or AAAA requests as
+ * appropriate. How we handle responses to those in particular is a bit
+ * complex; see dns_lookup() and set_exitconn_info_from_resolve().
+ *
+ * When a lookup is finally complete, the inform_pending_connections()
+ * function will tell all of the streams that have been waiting for the
+ * resolve, by calling connection_exit_connect() if the client sent a
+ * RELAY_BEGIN cell, and by calling send_resolved_cell() or
+ * send_hostname_cell() if the client sent a RELAY_RESOLVE cell.
**/
#define DNS_PRIVATE
@@ -793,8 +829,14 @@ dns_resolve_impl,(edge_connection_t *exitconn, int is_resolve,
}
/** Given an exit connection <b>exitconn</b>, and a cached_resolve_t
- * <b>resolve</b> whose DNS lookups have all succeeded or failed, update the
- * appropriate fields (address_ttl and addr) of <b>exitconn</b>.
+ * <b>resolve</b> whose DNS lookups have all either succeeded or failed,
+ * update the appropriate fields (address_ttl and addr) of <b>exitconn</b>.
+ *
+ * The logic can be complicated here, since we might have launched both
+ * an A lookup and an AAAA lookup, and since either of those might have
+ * succeeded or failed, and since we want to answer a RESOLVE cell with
+ * a full answer but answer a BEGIN cell with whatever answer the client
+ * would accept <i>and</i> we could still connect to.
*
* If this is a reverse lookup, set *<b>hostname_out</b> to a newly allocated
* copy of the name resulting hostname.
@@ -1137,7 +1179,12 @@ dns_found_answer(const char *address, uint8_t query_type,
/** Given a pending cached_resolve_t that we just finished resolving,
* inform every connection that was waiting for the outcome of that
- * resolution. */
+ * resolution.
+ *
+ * Do this by sending a RELAY_RESOLVED cell (if the pending stream had sent us
+ * RELAY_RESOLVE cell), or by launching an exit connection (if the pending
+ * stream had send us a RELAY_BEGIN cell).
+ */
static void
inform_pending_connections(cached_resolve_t *resolve)
{
diff --git a/src/or/dns_structs.h b/src/or/dns_structs.h
index bb67459d7b..bc6067213d 100644
--- a/src/or/dns_structs.h
+++ b/src/or/dns_structs.h
@@ -1,3 +1,15 @@
+/* Copyright (c) 2003-2004, Roger Dingledine.
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2016, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file dns_structs.h
+ *
+ * \brief Structures used in dns.c. Exposed to dns.c, and to the unit tests
+ * that declare DNS_PRIVATE.
+ */
+
#ifndef TOR_DNS_STRUCTS_H
#define TOR_DNS_STRUCTS_H
diff --git a/src/or/fp_pair.c b/src/or/fp_pair.c
index 53b311e580..eeeb0f1de3 100644
--- a/src/or/fp_pair.c
+++ b/src/or/fp_pair.c
@@ -7,6 +7,14 @@
* \brief Manages data structures for associating pairs of fingerprints. Used
* to handle combinations of identity/signing-key fingerprints for
* authorities.
+ *
+ * This is a nice, simple, compact data structure module that handles a map
+ * from (signing key fingerprint, identity key fingerprint) to void *. The
+ * fingerprints here are SHA1 digests of RSA keys.
+ *
+ * This structure is used in directory.c and in routerlist.c for handling
+ * handling authority certificates, since we never want more than a single
+ * certificate for any (ID key, signing key) pair.
**/
#include "or.h"
diff --git a/src/or/geoip.c b/src/or/geoip.c
index 6eb0ce7669..ba65dfe56c 100644
--- a/src/or/geoip.c
+++ b/src/or/geoip.c
@@ -7,6 +7,24 @@
* to summarizing client connections by country to entry guards, bridges,
* and directory servers; and for statistics on answering network status
* requests.
+ *
+ * There are two main kinds of functions in this module: geoip functions,
+ * which map groups of IPv4 and IPv6 addresses to country codes, and
+ * statistical functions, which collect statistics about different kinds of
+ * per-country usage.
+ *
+ * The geoip lookup tables are implemented as sorted lists of disjoint address
+ * ranges, each mapping to a singleton geoip_country_t. These country objects
+ * are also indexed by their names in a hashtable.
+ *
+ * The tables are populated from disk at startup by the geoip_load_file()
+ * function. For more information on the file format they read, see that
+ * function. See the scripts and the README file in src/config for more
+ * information about how those files are generated.
+ *
+ * Tor uses GeoIP information in order to implement user requests (such as
+ * ExcludeNodes {cc}), and to keep track of how much usage relays are getting
+ * for each country.
*/
#define GEOIP_PRIVATE
diff --git a/src/or/hibernate.c b/src/or/hibernate.c
index 7e25306234..aaf5c4bdcd 100644
--- a/src/or/hibernate.c
+++ b/src/or/hibernate.c
@@ -8,6 +8,12 @@
* etc in preparation for closing down or going dormant; and to track
* bandwidth and time intervals to know when to hibernate and when to
* stop hibernating.
+ *
+ * Ordinarily a Tor relay is "Live".
+ *
+ * A live relay can stop accepting connections for one of two reasons: either
+ * it is trying to conserve bandwidth because of bandwidth accounting rules
+ * ("soft hibernation"), or it is about to shut down ("exiting").
**/
/*
@@ -49,8 +55,10 @@ typedef enum {
UNIT_MONTH=1, UNIT_WEEK=2, UNIT_DAY=3,
} time_unit_t;
-/* Fields for accounting logic. Accounting overview:
+/*
+ * @file hibernate.c
*
+ * <h4>Accounting</h4>
* Accounting is designed to ensure that no more than N bytes are sent in
* either direction over a given interval (currently, one month, one week, or
* one day) We could
@@ -64,17 +72,21 @@ typedef enum {
*
* Each interval runs as follows:
*
- * 1. We guess our bandwidth usage, based on how much we used
+ * <ol>
+ * <li>We guess our bandwidth usage, based on how much we used
* last time. We choose a "wakeup time" within the interval to come up.
- * 2. Until the chosen wakeup time, we hibernate.
- * 3. We come up at the wakeup time, and provide bandwidth until we are
+ * <li>Until the chosen wakeup time, we hibernate.
+ * <li> We come up at the wakeup time, and provide bandwidth until we are
* "very close" to running out.
- * 4. Then we go into low-bandwidth mode, and stop accepting new
+ * <li> Then we go into low-bandwidth mode, and stop accepting new
* connections, but provide bandwidth until we run out.
- * 5. Then we hibernate until the end of the interval.
+ * <li> Then we hibernate until the end of the interval.
*
* If the interval ends before we run out of bandwidth, we go back to
* step one.
+ *
+ * Accounting is controlled by the AccountingMax, AccountingRule, and
+ * AccountingStart options.
*/
/** How many bytes have we read in this accounting interval? */
diff --git a/src/or/keypin.c b/src/or/keypin.c
index 335c793cd9..2d4c4e92d2 100644
--- a/src/or/keypin.c
+++ b/src/or/keypin.c
@@ -39,16 +39,28 @@
* @brief Key-pinning for RSA and Ed25519 identity keys at directory
* authorities.
*
+ * Many older clients, and many internal interfaces, still refer to relays by
+ * their RSA1024 identity keys. We can make this more secure, however:
+ * authorities use this module to track which RSA keys have been used along
+ * with which Ed25519 keys, and force such associations to be permanent.
+ *
* This module implements a key-pinning mechanism to ensure that it's safe
* to use RSA keys as identitifers even as we migrate to Ed25519 keys. It
* remembers, for every Ed25519 key we've seen, what the associated Ed25519
* key is. This way, if we see a different Ed25519 key with that RSA key,
* we'll know that there's a mismatch.
*
+ * (As of this writing, these key associations are advisory only, mostly
+ * because some relay operators kept mishandling their Ed25519 keys during
+ * the initial Ed25519 rollout. We should fix this problem, and then toggle
+ * the AuthDirPinKeys option.)
+ *
* We persist these entries to disk using a simple format, where each line
* has a base64-encoded RSA SHA1 hash, then a base64-endoded Ed25519 key.
* Empty lines, misformed lines, and lines beginning with # are
* ignored. Lines beginning with @ are reserved for future extensions.
+ *
+ * The dirserv.c module is the main user of these functions.
*/
static int keypin_journal_append_entry(const uint8_t *rsa_id_digest,
diff --git a/src/or/ntmain.c b/src/or/ntmain.c
index a1b886bb5a..4c65805b32 100644
--- a/src/or/ntmain.c
+++ b/src/or/ntmain.c
@@ -6,7 +6,15 @@
/**
* \file ntmain.c
*
- * \brief Entry points for running/configuring Tor as Windows Service.
+ * \brief Entry points for running/configuring Tor as a Windows Service.
+ *
+ * Windows Services expect to be registered with the operating system, and to
+ * have entry points for starting, stopping, and monitoring them. This module
+ * implements those entry points so that a tor relay or client or hidden
+ * service can run as a Windows service. Therefore, this module
+ * is only compiled when building for Windows.
+ *
+ * Warning: this module is not very well tested or very well maintained.
*/
#ifdef _WIN32
diff --git a/src/or/onion.c b/src/or/onion.c
index 8a566af766..a987883802 100644
--- a/src/or/onion.c
+++ b/src/or/onion.c
@@ -8,6 +8,58 @@
* \file onion.c
* \brief Functions to queue create cells, wrap the various onionskin types,
* and parse and create the CREATE cell and its allies.
+ *
+ * This module has a few functions, all related to the CREATE/CREATED
+ * handshake that we use on links in order to create a circuit, and the
+ * related EXTEND/EXTENDED handshake that we use over circuits in order to
+ * extend them an additional hop.
+ *
+ * In this module, we provide a set of abstractions to create a uniform
+ * interface over the three circuit extension handshakes that Tor has used
+ * over the years (TAP, CREATE_FAST, and ntor). These handshakes are
+ * implemented in onion_tap.c, onion_fast.c, and onion_ntor.c respectively.
+ *
+ * All[*] of these handshakes follow a similar pattern: a client, knowing
+ * some key from the relay it wants to extend through, generates the
+ * first part of a handshake. A relay receives that handshake, and sends
+ * a reply. Once the client handles the reply, it knows that it is
+ * talking to the right relay, and it shares some freshly negotiated key
+ * material with that relay.
+ *
+ * We sometimes call the client's part of the handshake an "onionskin".
+ * We do this because historically, Onion Routing used a multi-layer
+ * structure called an "onion" to construct circuits. Each layer of the
+ * onion contained key material chosen by the client, the identity of
+ * the next relay in the circuit, and a smaller onion, encrypted with
+ * the key of the next relay. When we changed Tor to use a telescoping
+ * circuit extension design, it corresponded to sending each layer of the
+ * onion separately -- as a series of onionskins.
+ *
+ * Clients invoke these functions when creating or extending a circuit,
+ * from circuitbuild.c.
+ *
+ * Relays invoke these functions when they receive a CREATE or EXTEND
+ * cell in command.c or relay.c, in order to queue the pending request.
+ * They also invoke them from cpuworker.c, which handles dispatching
+ * onionskin requests to different worker threads.
+ *
+ * <br>
+ *
+ * This module also handles:
+ * <ul>
+ * <li> Queueing incoming onionskins on the relay side before passing
+ * them to worker threads.
+ * <li>Expiring onionskins on the relay side if they have waited for
+ * too long.
+ * <li>Packaging private keys on the server side in order to pass
+ * them to worker threads.
+ * <li>Encoding and decoding CREATE, CREATED, CREATE2, and CREATED2 cells.
+ * <li>Encoding and decodign EXTEND, EXTENDED, EXTEND2, and EXTENDED2
+ * relay cells.
+ * </ul>
+ *
+ * [*] The CREATE_FAST handshake is weaker than described here; see
+ * onion_fast.c for more information.
**/
#include "or.h"
diff --git a/src/or/onion_fast.c b/src/or/onion_fast.c
index 6b5d12e407..8dcbfe22d8 100644
--- a/src/or/onion_fast.c
+++ b/src/or/onion_fast.c
@@ -7,6 +7,24 @@
/**
* \file onion_fast.c
* \brief Functions implement the CREATE_FAST circuit handshake.
+ *
+ * The "CREATE_FAST" handshake is an unauthenticated, non-forward-secure
+ * key derivation mechanism based on SHA1. We used to use it for the
+ * first hop of each circuit, since the TAP handshake provided no
+ * additional security beyond the security already provided by the TLS
+ * handshake [*].
+ *
+ * When we switched to ntor, we deprecated CREATE_FAST, since ntor is
+ * stronger than our TLS handshake was, and fast enough to not be worrisome.
+ *
+ * This handshake, like the other circuit-extension handshakes, is
+ * invoked from onion.c.
+ *
+ * [*]Actually, it's possible that TAP _was_ a little better than TLS with
+ * RSA1024 certificates and EDH1024 for forward secrecy, if you
+ * hypothesize an adversary who can compute discrete logarithms on a
+ * small number of targetted DH1024 fields, but who can't break all that
+ * many RSA1024 keys.
**/
#include "or.h"
diff --git a/src/or/onion_ntor.c b/src/or/onion_ntor.c
index d1a268f4cd..ded97ee73d 100644
--- a/src/or/onion_ntor.c
+++ b/src/or/onion_ntor.c
@@ -5,6 +5,17 @@
* \file onion_ntor.c
*
* \brief Implementation for the ntor handshake.
+ *
+ * The ntor circuit-extension handshake was developed as a replacement
+ * for the old TAP handshake. It uses Elliptic-curve Diffie-Hellman and
+ * a hash function in order to perform a one-way authenticated key
+ * exchange. The ntor handshake is meant to replace the old "TAP"
+ * handshake.
+ *
+ * We instantiate ntor with curve25519, HMAC-SHA256, and HKDF.
+ *
+ * This handshake, like the other circuit-extension handshakes, is
+ * invoked from onion.c.
*/
#include "orconfig.h"
diff --git a/src/or/onion_tap.c b/src/or/onion_tap.c
index abe779351f..2769300945 100644
--- a/src/or/onion_tap.c
+++ b/src/or/onion_tap.c
@@ -9,10 +9,22 @@
* \brief Functions to implement the original Tor circuit extension handshake
* (a.k.a TAP).
*
+ * The "TAP" handshake is the first one that was widely used in Tor: It
+ * combines RSA1024-OAEP and AES128-CTR to perform a hybrid encryption over
+ * the first message DH1024 key exchange. (The RSA-encrypted part of the
+ * encryption is authenticated; the AES-encrypted part isn't. This was
+ * not a smart choice.)
+ *
* We didn't call it "TAP" ourselves -- Ian Goldberg named it in "On the
* Security of the Tor Authentication Protocol". (Spoiler: it's secure, but
* its security is kind of fragile and implementation dependent. Never modify
* this implementation without reading and understanding that paper at least.)
+ *
+ * We have deprecated TAP since the ntor handshake came into general use. It
+ * is still used for hidden service IP and RP connections, however.
+ *
+ * This handshake, like the other circuit-extension handshakes, is
+ * invoked from onion.c.
**/
#include "or.h"
diff --git a/src/or/periodic.c b/src/or/periodic.c
index 0bccc6ec20..d02d4a7bbb 100644
--- a/src/or/periodic.c
+++ b/src/or/periodic.c
@@ -5,6 +5,10 @@
* \file periodic.c
*
* \brief Generic backend for handling periodic events.
+ *
+ * The events in this module are used by main.c to track items that need
+ * to fire once every N seconds, possibly picking a new interval each time
+ * that they fire. See periodic_events[] in main.c for examples.
*/
#include "or.h"
diff --git a/src/or/protover.c b/src/or/protover.c
index 9e0391a410..a6a5d18f36 100644
--- a/src/or/protover.c
+++ b/src/or/protover.c
@@ -1,3 +1,24 @@
+/* Copyright (c) 2016, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file protover.c
+ * \brief Versioning information for different pieces of the Tor protocol.
+ *
+ * Starting in version 0.2.9.3-alpha, Tor places separate version numbers on
+ * each of the different components of its protocol. Relays use these numbers
+ * to advertise what versions of the protocols they can support, and clients
+ * use them to find what they can ask a given relay to do. Authorities vote
+ * on the supported protocol versions for each relay, and also vote on the
+ * which protocols you should have to support in order to be on the Tor
+ * network. All Tor instances use these required/recommended protocol versions
+ * to
+ *
+ * The main advantage of these protocol versions numbers over using Tor
+ * version numbers is that they allow different implementations of the Tor
+ * protocols to develop independently, without having to claim compatibility
+ * with specific versions of Tor.
+ **/
#define PROTOVER_PRIVATE
@@ -699,6 +720,9 @@ protover_compute_for_old_tor(const char *version)
}
}
+/**
+ * Release all storage held by static fields in protover.c
+ */
void
protover_free_all(void)
{
diff --git a/src/or/protover.h b/src/or/protover.h
index 075405e7ca..5c658931ea 100644
--- a/src/or/protover.h
+++ b/src/or/protover.h
@@ -1,3 +1,10 @@
+/* Copyright (c) 2016, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file protover.h
+ * \brief Headers and type declarations for protover.c
+ **/
#ifndef TOR_PROTOVER_H
#define TOR_PROTOVER_H
diff --git a/src/or/reasons.c b/src/or/reasons.c
index 36921cafcd..a1566e2299 100644
--- a/src/or/reasons.c
+++ b/src/or/reasons.c
@@ -6,6 +6,12 @@
* \file reasons.c
* \brief Convert circuit, stream, and orconn error reasons to and/or from
* strings and errno values.
+ *
+ * This module is just a bunch of functions full of case statements that
+ * convert from one representation of our error codes to another. These are
+ * mainly used in generating log messages, in sending messages to the
+ * controller in control.c, and in converting errors from one protocol layer
+ * to another.
**/
#include "or.h"
diff --git a/src/or/rephist.c b/src/or/rephist.c
index 6a54904746..8bcd7396aa 100644
--- a/src/or/rephist.c
+++ b/src/or/rephist.c
@@ -4,10 +4,74 @@
/**
* \file rephist.c
- * \brief Basic history and "reputation" functionality to remember
+ * \brief Basic history and performance-tracking functionality.
+ *
+ * Basic history and performance-tracking functionality to remember
* which servers have worked in the past, how much bandwidth we've
* been using, which ports we tend to want, and so on; further,
* exit port statistics, cell statistics, and connection statistics.
+ *
+ * The history and information tracked in this module could sensibly be
+ * divided into several categories:
+ *
+ * <ul><li>Statistics used by authorities to remember the uptime and
+ * stability information about various relays, including "uptime",
+ * "weighted fractional uptime" and "mean time between failures".
+ *
+ * <li>Bandwidth usage history, used by relays to self-report how much
+ * bandwidth they've used for different purposes over last day or so,
+ * in order to generate the {dirreq-,}{read,write}-history lines in
+ * that they publish.
+ *
+ * <li>Predicted ports, used by clients to remember how long it's been
+ * since they opened an exit connection to each given target
+ * port. Clients use this information in order to try to keep circuits
+ * open to exit nodes that can connect to the ports that they care
+ * about. (The predicted ports mechanism also handles predicted circuit
+ * usage that _isn't_ port-specific, such as resolves, internal circuits,
+ * and so on.)
+ *
+ * <li>Public key operation counters, for tracking how many times we've
+ * done each public key operation. (This is unmaintained and we should
+ * remove it.)
+ *
+ * <li>Exit statistics by port, used by exits to keep track of the
+ * number of streams and bytes they've served at each exit port, so they
+ * can generate their exit-kibibytes-{read,written} and
+ * exit-streams-opened statistics.
+ *
+ * <li>Circuit stats, used by relays instances to tract circuit
+ * queue fullness and delay over time, and generate cell-processed-cells,
+ * cell-queued-cells, cell-time-in-queue, and cell-circuits-per-decile
+ * statistics.
+ *
+ * <li>Descriptor serving statistics, used by directory caches to track
+ * how many descriptors they've served.
+ *
+ * <li>Connection statistics, used by relays to track one-way and
+ * bidirectional connections.
+ *
+ * <li>Onion handshake statistics, used by relays to count how many
+ * TAP and ntor handshakes they've handled.
+ *
+ * <li>Hidden service statistics, used by relays to count rendezvous
+ * traffic and HSDir-stored descriptors.
+ *
+ * <li>Link protocol statistics, used by relays to count how many times
+ * each link protocol has been used.
+ *
+ * </ul>
+ *
+ * The entry points for this module are scattered throughout the
+ * codebase. Sending data, receiving data, connecting to a relay,
+ * losing a connection to a relay, and so on can all trigger a change in
+ * our current stats. Relays also invoke this module in order to
+ * extract their statistics when building routerinfo and extrainfo
+ * objects in router.c.
+ *
+ * TODO: This module should be broken up.
+ *
+ * (The "rephist" name originally stood for "reputation and history". )
**/
#include "or.h"
@@ -2650,7 +2714,9 @@ rep_hist_desc_stats_write(time_t now)
return start_of_served_descs_stats_interval + WRITE_STATS_INTERVAL;
}
-/* DOCDOC rep_hist_note_desc_served */
+/** Called to note that we've served a given descriptor (by
+ * digest). Incrememnts the count of descriptors served, and the number
+ * of times we've served this descriptor. */
void
rep_hist_note_desc_served(const char * desc)
{
diff --git a/src/or/replaycache.c b/src/or/replaycache.c
index 23a1737b18..c17cba5f87 100644
--- a/src/or/replaycache.c
+++ b/src/or/replaycache.c
@@ -5,6 +5,18 @@
* \file replaycache.c
*
* \brief Self-scrubbing replay cache for rendservice.c
+ *
+ * To prevent replay attacks, hidden services need to recognize INTRODUCE2
+ * cells that they've already seen, and drop them. If they didn't, then
+ * sending the same INTRODUCE2 cell over and over would force the hidden
+ * service to make a huge number of circuits to the same rendezvous
+ * point, aiding traffic analysis.
+ *
+ * (It's not that simple, actually. We only check for replays in the
+ * RSA-encrypted portion of the handshake, since the rest of the handshake is
+ * malleable.)
+ *
+ * This module is used from rendservice.c.
*/
#define REPLAYCACHE_PRIVATE
diff --git a/src/or/routerlist.c b/src/or/routerlist.c
index 0e637f4833..85eb999ad6 100644
--- a/src/or/routerlist.c
+++ b/src/or/routerlist.c
@@ -9,6 +9,85 @@
* \brief Code to
* maintain and access the global list of routerinfos for known
* servers.
+ *
+ * A "routerinfo_t" object represents a single self-signed router
+ * descriptor, as generated by a Tor relay in order to tell the rest of
+ * the world about its keys, address, and capabilities. An
+ * "extrainfo_t" object represents an adjunct "extra-info" object,
+ * certified by a corresponding router descriptor, reporting more
+ * information about the relay that nearly all users will not need.
+ *
+ * Most users will not use router descriptors for most relays. Instead,
+ * they use the information in microdescriptors and in the consensus
+ * networkstatus.
+ *
+ * Right now, routerinfo_t objects are used in these ways:
+ * <ul>
+ * <li>By clients, in order to learn about bridge keys and capabilities.
+ * (Bridges aren't listed in the consensus networkstatus, so they
+ * can't have microdescriptors.)
+ * <li>By relays, since relays want more information about other relays
+ * than they can learn from microdescriptors. (TODO: Is this still true?)
+ * <li>By authorities, which receive them and use them to generate the
+ * consensus and the microdescriptors.
+ * <li>By all directory caches, which download them in case somebody
+ * else wants them.
+ * </ul>
+ *
+ * Routerinfos are mostly created by parsing them from a string, in
+ * routerparse.c. We store them to disk on receiving them, and
+ * periodically discard the ones we don't need. On restarting, we
+ * re-read them from disk. (This also applies to extrainfo documents, if
+ * we are configured to fetch them.)
+ *
+ * In order to keep our list of routerinfos up-to-date, we periodically
+ * check whether there are any listed in the latest consensus (or in the
+ * votes from other authorities, if we are an authority) that we don't
+ * have. (This also applies to extrainfo documents, if we are
+ * configured to fetch them.)
+ *
+ * Almost nothing in Tor should use a routerinfo_t to refer directly to
+ * a relay; instead, almost everything should use node_t (implemented in
+ * nodelist.c), which provides a common interface to routerinfo_t,
+ * routerstatus_t, and microdescriptor_t.
+ *
+ * <br>
+ *
+ * This module also has some of the functions used for choosing random
+ * nodes according to different rules and weights. Historically, they
+ * were all in this module. Now, they are spread across this module,
+ * nodelist.c, and networkstatus.c. (TODO: Fix that.)
+ *
+ * <br>
+ *
+ * (For historical reasons) this module also contains code for handling
+ * the list of fallback directories, the list of directory authorities,
+ * and the list of authority certificates.
+ *
+ * For the directory authorities, we have a list containing the public
+ * identity key, and contact points, for each authority. The
+ * authorities receive descriptors from relays, and publish consensuses,
+ * descriptors, and microdescriptors. This list is pre-configured.
+ *
+ * Fallback directories are well-known, stable, but untrusted directory
+ * caches that clients which have not yet bootstrapped can use to get
+ * their first networkstatus consensus, in order to find out where the
+ * Tor network really is. This list is pre-configured in
+ * fallback_dirs.inc. Every authority also serves as a fallback.
+ *
+ * Both fallback directories and directory authorities are are
+ * represented by a dir_server_t.
+ *
+ * Authority certificates are signed with authority identity keys; they
+ * are used to authenticate shorter-term authority signing keys. We
+ * fetch them when we find a consensus or a vote that has been signed
+ * with a signing key we don't recognize. We cache them on disk and
+ * load them on startup. Authority operators generate them with the
+ * "tor-gencert" utility.
+ *
+ * TODO: Authority certificates should be a separate module.
+ *
+ * TODO: dir_server_t stuff should be in a separate module.
**/
#define ROUTERLIST_PRIVATE
@@ -46,6 +125,9 @@
/****************************************************************************/
+/* Typed wrappers for different digestmap types; used to avoid type
+ * confusion. */
+
DECLARE_TYPED_DIGESTMAP_FNS(sdmap_, digest_sd_map_t, signed_descriptor_t)
DECLARE_TYPED_DIGESTMAP_FNS(rimap_, digest_ri_map_t, routerinfo_t)
DECLARE_TYPED_DIGESTMAP_FNS(eimap_, digest_ei_map_t, extrainfo_t)
@@ -800,7 +882,9 @@ static const char *BAD_SIGNING_KEYS[] = {
NULL,
};
-/* DOCDOC */
+/** Return true iff <b>cert</b> authenticates some atuhority signing key
+ * which, because of the old openssl heartbleed vulnerability, should
+ * never be trusted. */
int
authority_cert_is_blacklisted(const authority_cert_t *cert)
{
diff --git a/src/or/routerparse.c b/src/or/routerparse.c
index df9b76800d..93484660d2 100644
--- a/src/or/routerparse.c
+++ b/src/or/routerparse.c
@@ -6,7 +6,51 @@
/**
* \file routerparse.c
- * \brief Code to parse and validate router descriptors and directories.
+ * \brief Code to parse and validate router descriptors, consenus directories,
+ * and similar objects.
+ *
+ * The objects parsed by this module use a common text-based metaformat,
+ * documented in dir-spec.txt in torspec.git. This module is itself divided
+ * into two major kinds of function: code to handle the metaformat, and code
+ * to convert from particular instances of the metaformat into the
+ * objects that Tor uses.
+ *
+ * The generic parsing code works by calling a table-based tokenizer on the
+ * input string. Each token corresponds to a single line with a token, plus
+ * optional arguments on that line, plus an optional base-64 encoded object
+ * after that line. Each token has a definition in a table of token_rule_t
+ * entries that describes how many arguments it can take, whether it takes an
+ * object, how many times it may appear, whether it must appear first, and so
+ * on.
+ *
+ * The tokenizer function tokenize_string() converts its string input into a
+ * smartlist full of instances of directory_token_t, according to a provided
+ * table of token_rule_t.
+ *
+ * The generic parts of this module additionally include functions for
+ * finding the start and end of signed information inside a signed object, and
+ * computing the digest that will be signed.
+ *
+ * There are also functions for saving objects to disk that have caused
+ * parsing to fail.
+ *
+ * The specific parts of this module describe conversions between
+ * particular lists of directory_token_t and particular objects. The
+ * kinds of objects that can be parsed here are:
+ * <ul>
+ * <li>router descriptors (managed from routerlist.c)
+ * <li>extra-info documents (managed from routerlist.c)
+ * <li>microdescriptors (managed from microdesc.c)
+ * <li>vote and consensus networkstatus documents, and the routerstatus_t
+ * objects that they comprise (managed from networkstatus.c)
+ * <li>detached-signature objects used by authorities for gathering
+ * signatures on the networkstatus consensus (managed from dirvote.c)
+ * <li>authority key certificates (managed from routerlist.c)
+ * <li>hidden service descriptors (managed from rendcommon.c and rendcache.c)
+ * </ul>
+ *
+ * For no terribly good reason, the functions to <i>generate</i> signatures on
+ * the above directory objects are also in this module.
**/
#define ROUTERPARSE_PRIVATE
@@ -258,12 +302,14 @@ typedef struct token_rule_t {
int is_annotation;
} token_rule_t;
-/*
+/**
+ * @name macros for defining token rules
+ *
* Helper macros to define token tables. 's' is a string, 't' is a
* directory_keyword, 'a' is a trio of argument multiplicities, and 'o' is an
* object syntax.
- *
*/
+/**@{*/
/** Appears to indicate the end of a table. */
#define END_OF_TABLE { NULL, NIL_, 0,0,0, NO_OBJ, 0, INT_MAX, 0, 0 }
@@ -284,16 +330,17 @@ typedef struct token_rule_t {
/** An annotation that must appear no more than once */
#define A01(s,t,a,o) { s, t, a, o, 0, 1, 0, 1 }
-/* Argument multiplicity: any number of arguments. */
+/** Argument multiplicity: any number of arguments. */
#define ARGS 0,INT_MAX,0
-/* Argument multiplicity: no arguments. */
+/** Argument multiplicity: no arguments. */
#define NO_ARGS 0,0,0
-/* Argument multiplicity: concatenate all arguments. */
+/** Argument multiplicity: concatenate all arguments. */
#define CONCAT_ARGS 1,1,1
-/* Argument multiplicity: at least <b>n</b> arguments. */
+/** Argument multiplicity: at least <b>n</b> arguments. */
#define GE(n) n,INT_MAX,0
-/* Argument multiplicity: exactly <b>n</b> arguments. */
+/** Argument multiplicity: exactly <b>n</b> arguments. */
#define EQ(n) n,n,0
+/**@}*/
/** List of tokens recognized in router descriptors */
static token_rule_t routerdesc_token_table[] = {
diff --git a/src/or/routerset.c b/src/or/routerset.c
index f260914f4b..58b66ea777 100644
--- a/src/or/routerset.c
+++ b/src/or/routerset.c
@@ -9,6 +9,20 @@
*
* \brief Functions and structures to handle set-type selection of routers
* by name, ID, address, etc.
+ *
+ * This module implements the routerset_t data structure, whose purpose
+ * is to specify a set of relays based on a list of their identities or
+ * properties. Routersets can restrict relays by IP address mask,
+ * identity fingerprint, country codes, and nicknames (deprecated).
+ *
+ * Routersets are typically used for user-specified restrictions, and
+ * are created by invoking routerset_new and routerset_parse from
+ * config.c and confparse.c. To use a routerset, invoke one of
+ * routerset_contains_...() functions , or use
+ * routerstatus_get_all_nodes() / routerstatus_subtract_nodes() to
+ * manipulate a smartlist of node_t pointers.
+ *
+ * Country-code restrictions are implemented in geoip.c.
*/
#define ROUTERSET_PRIVATE
diff --git a/src/or/statefile.c b/src/or/statefile.c
index adf9d9f038..8fa4324b25 100644
--- a/src/or/statefile.c
+++ b/src/or/statefile.c
@@ -9,6 +9,23 @@
*
* \brief Handles parsing and encoding the persistent 'state' file that carries
* miscellaneous persistent state between Tor invocations.
+ *
+ * This 'state' file is a typed key-value store that allows multiple
+ * entries for the same key. It follows the same metaformat as described
+ * in confparse.c, and uses the same code to read and write itself.
+ *
+ * The state file is most suitable for small values that don't change too
+ * frequently. For values that become very large, we typically use a separate
+ * file -- for example, see how we handle microdescriptors, by storing them in
+ * a separate file with a journal.
+ *
+ * The current state is accessed via get_or_state(), which returns a singleton
+ * or_state_t object. Functions that change it should call
+ * or_state_mark_dirty() to ensure that it will get written to disk.
+ *
+ * The or_state_save() function additionally calls various functioens
+ * throughout Tor that might want to flush more state to the the disk,
+ * including some in rephist.c, entrynodes.c, circuitstats.c, hibernate.c.
*/
#define STATEFILE_PRIVATE
diff --git a/src/or/status.c b/src/or/status.c
index 749cee4edf..fce6a10157 100644
--- a/src/or/status.c
+++ b/src/or/status.c
@@ -3,7 +3,13 @@
/**
* \file status.c
- * \brief Keep status information and log the heartbeat messages.
+ * \brief Collect status information and log heartbeat messages.
+ *
+ * This module is responsible for implementing the heartbeat log messages,
+ * which periodically inform users and operators about basic facts to
+ * do with their Tor instance. The log_heartbeat() function, invoked from
+ * main.c, is the principle entry point. It collects data from elsewhere
+ * in Tor, and logs it in a human-readable format.
**/
#define STATUS_PRIVATE
diff --git a/src/or/tor_main.c b/src/or/tor_main.c
index 21fbe3efb5..d67eda2ac9 100644
--- a/src/or/tor_main.c
+++ b/src/or/tor_main.c
@@ -17,8 +17,10 @@ const char tor_git_revision[] =
/**
* \file tor_main.c
- * \brief Stub module containing a main() function. Allows unit
- * test binary to link against main.c.
+ * \brief Stub module containing a main() function.
+ *
+ * We keep the main function in a separate module so that the unit
+ * tests, which have their own main()s, can link against main.c.
**/
int tor_main(int argc, char *argv[]);