diff options
author | Nick Mathewson <nickm@torproject.org> | 2016-10-14 20:08:51 -0400 |
---|---|---|
committer | Nick Mathewson <nickm@torproject.org> | 2016-10-17 10:16:59 -0400 |
commit | aae034d13e458dfe82b503d3a1b54b0e5200b6b8 (patch) | |
tree | f2f69832a97045fbe2384e7320c73d3ea7c86ba8 /src | |
parent | 55c468c5211d5b74acb677767f14d91cd0304771 (diff) | |
download | tor-aae034d13e458dfe82b503d3a1b54b0e5200b6b8.tar.gz tor-aae034d13e458dfe82b503d3a1b54b0e5200b6b8.zip |
Write a bunch of module documentation.
This commit adds or improves the module-level documenation for:
buffers.c circuitstats.c command.c connection_edge.c control.c
cpuworker.c crypto_curve25519.c crypto_curve25519.h
crypto_ed25519.c crypto_format.c dircollate.c dirserv.c dns.c
dns_structs.h fp_pair.c geoip.c hibernate.c keypin.c ntmain.c
onion.c onion_fast.c onion_ntor.c onion_tap.c periodic.c
protover.c protover.h reasons.c rephist.c replaycache.c
routerlist.c routerparse.c routerset.c statefile.c status.c
tor_main.c workqueue.c
In particular, I've tried to explain (for each documented module)
what each module does, what's in it, what the big idea is, why it
belongs in Tor, and who calls it. In a few cases, I've added TODO
notes about refactoring opportunities.
I've also renamed an argument, and fixed a few DOCDOC comments.
Diffstat (limited to 'src')
36 files changed, 801 insertions, 42 deletions
diff --git a/src/common/crypto_curve25519.c b/src/common/crypto_curve25519.c index 58ec923638..0a744ef56d 100644 --- a/src/common/crypto_curve25519.c +++ b/src/common/crypto_curve25519.c @@ -5,6 +5,14 @@ * \file crypto_curve25519.c * * \brief Wrapper code for a curve25519 implementation. + * + * Curve25519 is an Elliptic-Curve Diffie Hellman handshake, designed by + * Dan Bernstein. For more information, see https://cr.yp.to/ecdh.html + * + * Tor uses Curve25519 as the basis of its "ntor" circuit extension + * handshake, and in related code. The functions in this module are + * used to find the most suitable available Curve25519 implementation, + * to provide wrappers around it, and so on. */ #define CRYPTO_CURVE25519_PRIVATE @@ -39,15 +47,23 @@ int curve25519_donna(uint8_t *mypublic, static void pick_curve25519_basepoint_impl(void); +/** This is set to 1 if we have an optimized Ed25519-based + * implementation for multiplying a value by the basepoint; to 0 if we + * don't, and to -1 if we haven't checked. */ static int curve25519_use_ed = -1; +/** + * Helper function: call the most appropriate backend to compute the + * scalar "secret" times the point "point". Store the result in + * "output". Return 0 on success, negative on failure. + **/ STATIC int curve25519_impl(uint8_t *output, const uint8_t *secret, - const uint8_t *basepoint) + const uint8_t *point) { uint8_t bp[CURVE25519_PUBKEY_LEN]; int r; - memcpy(bp, basepoint, CURVE25519_PUBKEY_LEN); + memcpy(bp, point, CURVE25519_PUBKEY_LEN); /* Clear the high bit, in case our backend foolishly looks at it. */ bp[31] &= 0x7f; #ifdef USE_CURVE25519_DONNA @@ -61,6 +77,11 @@ curve25519_impl(uint8_t *output, const uint8_t *secret, return r; } +/** + * Helper function: Multiply the scalar "secret" by the Curve25519 + * basepoint (X=9), and store the result in "output". Return 0 on + * success, -1 on false. + */ STATIC int curve25519_basepoint_impl(uint8_t *output, const uint8_t *secret) { @@ -85,6 +106,10 @@ curve25519_basepoint_impl(uint8_t *output, const uint8_t *secret) return r; } +/** + * Override the decision of whether to use the Ed25519-based basepoint + * multiply function. Used for testing. + */ void curve25519_set_impl_params(int use_ed) { @@ -142,6 +167,10 @@ curve25519_secret_key_generate(curve25519_secret_key_t *key_out, return 0; } +/** + * Given a secret key in <b>seckey</b>, create the corresponding public + * key in <b>key_out</b>. + */ void curve25519_public_key_generate(curve25519_public_key_t *key_out, const curve25519_secret_key_t *seckey) @@ -149,6 +178,11 @@ curve25519_public_key_generate(curve25519_public_key_t *key_out, curve25519_basepoint_impl(key_out->public_key, seckey->secret_key); } +/** + * Construct a new keypair in *<b>keypair_out</b>. If <b>extra_strong</b> + * is true, this key is possibly going to get used more than once, so + * use a better-than-usual RNG. Return 0 on success, -1 on failure. */ + */ int curve25519_keypair_generate(curve25519_keypair_t *keypair_out, int extra_strong) @@ -159,7 +193,13 @@ curve25519_keypair_generate(curve25519_keypair_t *keypair_out, return 0; } -/* DOCDOC */ +/** Store the keypair <b>keypair</b>, including its secret and public + * parts, to the file <b>fname</b>. Use the string tag <b>tag</b> to + * distinguish this from other Curve25519 keypairs. Return 0 on success, + * -1 on failure. + * + * See crypto_write_tagged_contents_to_file() for more information on + * the metaformat used for these keys.*/ int curve25519_keypair_write_to_file(const curve25519_keypair_t *keypair, const char *fname, @@ -182,7 +222,10 @@ curve25519_keypair_write_to_file(const curve25519_keypair_t *keypair, return r; } -/* DOCDOC */ +/** Read a curve25519 keypair from a file named <b>fname</b> created by + * curve25519_keypair_write_to_file(). Store the keypair in + * <b>keypair_out</b>, and the associated tag string in <b>tag_out</b>. + * Return 0 on success, and -1 on failure. */ int curve25519_keypair_read_from_file(curve25519_keypair_t *keypair_out, char **tag_out, @@ -197,6 +240,7 @@ curve25519_keypair_read_from_file(curve25519_keypair_t *keypair_out, if (len != sizeof(content)) goto end; + /* Make sure that the public key matches the secret key */ memcpy(keypair_out->seckey.secret_key, content, CURVE25519_SECKEY_LEN); curve25519_public_key_generate(&keypair_out->pubkey, &keypair_out->seckey); if (tor_memneq(keypair_out->pubkey.public_key, diff --git a/src/common/crypto_curve25519.h b/src/common/crypto_curve25519.h index 547e393567..4011820949 100644 --- a/src/common/crypto_curve25519.h +++ b/src/common/crypto_curve25519.h @@ -14,12 +14,20 @@ /** Length of the result of a curve25519 handshake. */ #define CURVE25519_OUTPUT_LEN 32 -/** Wrapper type for a curve25519 public key */ +/** Wrapper type for a curve25519 public key. + * + * (We define a separate type for these to make it less likely that we'll + * mistake them for secret keys.) + * */ typedef struct curve25519_public_key_t { uint8_t public_key[CURVE25519_PUBKEY_LEN]; } curve25519_public_key_t; -/** Wrapper type for a curve25519 secret key */ +/** Wrapper type for a curve25519 secret key + * + * (We define a separate type for these to make it less likely that we'll + * mistake them for public keys.) + **/ typedef struct curve25519_secret_key_t { uint8_t secret_key[CURVE25519_SECKEY_LEN]; } curve25519_secret_key_t; diff --git a/src/common/crypto_ed25519.c b/src/common/crypto_ed25519.c index 817c1a271b..30ed772274 100644 --- a/src/common/crypto_ed25519.c +++ b/src/common/crypto_ed25519.c @@ -5,6 +5,14 @@ * \file crypto_ed25519.c * * \brief Wrapper code for an ed25519 implementation. + * + * Ed25519 is a Schnorr signature on a Twisted Edwards curve, defined + * by Dan Bernstein. For more information, see https://ed25519.cr.yp.to/ + * + * This module wraps our choice of Ed25519 backend, and provides a few + * convenience functions for checking and generating signatures. It also + * provides Tor-specific tools for key blinding and for converting Ed25519 + * keys to and from the corresponding Curve25519 keys. */ #include "orconfig.h" @@ -28,7 +36,7 @@ static void pick_ed25519_impl(void); static int ed25519_impl_spot_check(void); -/** An Ed25519 implementation */ +/** An Ed25519 implementation, as a set of function pointers. */ typedef struct { int (*selftest)(void); @@ -53,6 +61,8 @@ typedef struct { int); } ed25519_impl_t; +/** The Ref10 Ed25519 implementation. This one is pure C and lightly + * optimized. */ static const ed25519_impl_t impl_ref10 = { NULL, @@ -71,6 +81,8 @@ static const ed25519_impl_t impl_ref10 = { ed25519_ref10_pubkey_from_curve25519_pubkey, }; +/** The Ref10 Ed25519 implementation. This one is heavily optimized, but still + * mostly C. The C still tends to be heavily platform-specific. */ static const ed25519_impl_t impl_donna = { ed25519_donna_selftest, @@ -89,8 +101,15 @@ static const ed25519_impl_t impl_donna = { ed25519_donna_pubkey_from_curve25519_pubkey, }; +/** Which Ed25519 implementation are we using? NULL if we haven't decided + * yet. */ static const ed25519_impl_t *ed25519_impl = NULL; +/** Helper: Return our chosen Ed25519 implementation. + * + * This should only be called after we've picked an implementation, but + * it _does_ recover if you forget this. + **/ static inline const ed25519_impl_t * get_ed_impl(void) { @@ -101,7 +120,12 @@ get_ed_impl(void) } #ifdef TOR_UNIT_TESTS +/** For testing: used to remember our actual choice of Ed25519 + * implementation */ static const ed25519_impl_t *saved_ed25519_impl = NULL; +/** For testing: Use the Ed25519 implementation called <b>name</b> until + * crypto_ed25519_testing_restore_impl is called. Recognized names are + * "donna" and "ref10". */ void crypto_ed25519_testing_force_impl(const char *name) { @@ -114,6 +138,9 @@ crypto_ed25519_testing_force_impl(const char *name) ed25519_impl = &impl_ref10; } } +/** For testing: go back to whatever Ed25519 implementation we had picked + * before crypto_ed25519_testing_force_impl was called. + */ void crypto_ed25519_testing_restore_impl(void) { diff --git a/src/common/crypto_format.c b/src/common/crypto_format.c index bdf9bfd613..2f6d847c83 100644 --- a/src/common/crypto_format.c +++ b/src/common/crypto_format.c @@ -123,6 +123,10 @@ crypto_read_tagged_contents_from_file(const char *fname, return r; } +/** Encode <b>pkey</b> as a base64-encoded string, without trailing "=" + * characters, in the buffer <b>output</b>, which must have at least + * CURVE25519_BASE64_PADDED_LEN+1 bytes available. Return 0 on success, -1 on + * failure. */ int curve25519_public_to_base64(char *output, const curve25519_public_key_t *pkey) @@ -135,6 +139,9 @@ curve25519_public_to_base64(char *output, return 0; } +/** Try to decode a base64-encoded curve25519 public key from <b>input</b> + * into the object at <b>pkey</b>. Return 0 on success, -1 on failure. + * Accepts keys with or without a trailing "=". */ int curve25519_public_from_base64(curve25519_public_key_t *pkey, const char *input) diff --git a/src/common/workqueue.c b/src/common/workqueue.c index 48c0cca01f..e1fb663a2a 100644 --- a/src/common/workqueue.c +++ b/src/common/workqueue.c @@ -6,6 +6,20 @@ * * \brief Implements worker threads, queues of work for them, and mechanisms * for them to send answers back to the main thread. + * + * The main structure here is a threadpool_t : it manages a set of worker + * threads, a queue of pending work, and a reply queue. Every piece of work + * is a workqueue_entry_t, containing data to process and a function to + * process it with. + * + * The main thread informs the worker threads of pending work by using a + * condition variable. The workers inform the main process of completed work + * by using an alert_sockets_t object, as implemented in compat_threads.c. + * + * The main thread can also queue an "update" that will be handled by all the + * workers. This is useful for updating state that all the workers share. + * + * In Tor today, there is currently only one thread pool, used in cpuworker.c. */ #include "orconfig.h" diff --git a/src/or/buffers.c b/src/or/buffers.c index c08da63a0d..412879606a 100644 --- a/src/or/buffers.c +++ b/src/or/buffers.c @@ -6,10 +6,22 @@ /** * \file buffers.c - * \brief Implements a generic interface buffer. Buffers are - * fairly opaque string holders that can read to or flush from: - * memory, file descriptors, or TLS connections. Buffers are implemented - * as linked lists of memory chunks. + * \brief Implements a generic buffer interface. + * + * A buf_t is a (fairly) opaque byte-oriented FIFO that can read to or flush + * from memory, sockets, file descriptors, TLS connections, or another buf_t. + * Buffers are implemented as linked lists of memory chunks. + * + * All socket-backed and TLS-based connection_t objects have a pair of + * buffers: one for incoming data, and one for outcoming data. These are fed + * and drained from functions in connection.c, trigged by events that are + * monitored in main.c. + * + * This module has basic support for reading and writing on buf_t objects. It + * also contains specialized functions for handling particular protocols + * on a buf_t backend, including SOCKS (used in connection_edge.c), Tor cells + * (used in connection_or.c and channeltls.c), HTTP (used in directory.c), and + * line-oriented communication (used in control.c). **/ #define BUFFERS_PRIVATE #include "or.h" diff --git a/src/or/circuitstats.c b/src/or/circuitstats.c index 3d64113521..418acc0024 100644 --- a/src/or/circuitstats.c +++ b/src/or/circuitstats.c @@ -9,6 +9,18 @@ * * \brief Maintains and analyzes statistics about circuit built times, so we * can tell how long we may need to wait for a fast circuit to be constructed. + * + * By keeping these statistics, a client learns when it should time out a slow + * circuit for being too slow, and when it should keep a circuit open in order + * to wait for it to complete. + * + * The information here is kept in a circuit_built_times_t structure, which is + * currently a singleton, but doesn't need to be. It's updated by calls to + * circuit_build_times_count_timeout() from circuituse.c, + * circuit_build_times_count_close() from circuituse.c, and + * circuit_build_times_add_time() from circuitbuild.c, and inspected by other + * calls into this module, mostly from circuitlist.c. Observations are + * persisted to disk via the or_state_t-related calls. */ #define CIRCUITSTATS_PRIVATE @@ -329,7 +341,6 @@ circuit_build_times_min_timeout(void) "circuit_build_times_min_timeout() called, cbtmintimeout is %d", num); } - return num; } diff --git a/src/or/command.c b/src/or/command.c index 5ad92bed1e..5866c386e4 100644 --- a/src/or/command.c +++ b/src/or/command.c @@ -7,6 +7,26 @@ /** * \file command.c * \brief Functions for processing incoming cells. + * + * When we receive a cell from a client or a relay, it arrives on some + * channel, and tells us what to do with it. In this module, we dispatch based + * on the cell type using the functions command_process_cell() and + * command_process_var_cell(), and deal with the cell accordingly. (These + * handlers are installed on a channel with the command_setup_channel() + * function.) + * + * Channels have a chance to handle some cell types on their own before they + * are ever passed here --- typically, they do this for cells that are + * specific to a given channel type. For example, in channeltls.c, the cells + * for the initial connection handshake are handled before we get here. (Of + * course, the fact that there _is_ only one channel type for now means that + * we may have gotten the factoring wrong here.) + * + * Handling other cell types is mainly farmed off to other modules, after + * initial sanity-checking. CREATE* cells are handled ultimately in onion.c, + * CREATED* cells trigger circuit creation in circuitbuild.c, DESTROY cells + * are handled here (since they're simple), and RELAY cells, in all their + * complexity, are passed off to relay.c. **/ /* In-points to command.c: diff --git a/src/or/connection_edge.c b/src/or/connection_edge.c index 788b7ee066..24842e4107 100644 --- a/src/or/connection_edge.c +++ b/src/or/connection_edge.c @@ -7,6 +7,51 @@ /** * \file connection_edge.c * \brief Handle edge streams. + * + * An edge_connection_t is a subtype of a connection_t, and represents two + * critical concepts in Tor: a stream, and an edge connection. From the Tor + * protocol's point of view, a stream is a bi-directional channel that is + * multiplexed on a single circuit. Each stream on a circuit is identified + * with a separate 16-bit stream ID, local to the (circuit,exit) pair. + * Streams are created in response to client requests. + * + * An edge connection is one thing that can implement a stream: it is either a + * TCP application socket that has arrived via (e.g.) a SOCKS request, or an + * exit connection. + * + * Not every instance of edge_connection_t truly represents an edge connction, + * however. (Sorry!) We also create edge_connection_t objects for streams that + * we will not be handling with TCP. The types of these streams are: + * <ul> + * <li>DNS lookup streams, created on the client side in response to + * a UDP DNS request received on a DNSPort, or a RESOLVE command + * on a controller. + * <li>DNS lookup streams, created on the exit side in response to + * a RELAY_RESOLVE cell from a client. + * <li>Tunneled directory streams, created on the directory cache side + * in response to a RELAY_BEGINDIR cell. These streams attach directly + * to a dir_connection_t object without ever using TCP. + * </ul> + * + * This module handles general-purpose functionality having to do with + * edge_connection_t. On the client side, it accepts various types of + * application requests on SocksPorts, TransPorts, and NATDPorts, and + * creates streams appropriately. + * + * This module is also responsible for implementing stream isolation: + * ensuring that streams that should not be linkable to one another are + * kept to different circuits. + * + * On the exit side, this module handles the various stream-creating + * type of RELAY cells by launching appropriate outgoing connections, + * DNS requests, or directory connection objects. + * + * And for all edge connections, this module is responsible for handling + * incoming and outdoing data as it arrives or leaves in the relay.c + * module. (Outgoing data will be packaged in + * connection_edge_process_inbuf() as it calls + * connection_edge_package_raw_inbuf(); incoming data from RELAY_DATA + * cells is applied in connection_edge_process_relay_cell().) **/ #define CONNECTION_EDGE_PRIVATE diff --git a/src/or/control.c b/src/or/control.c index 8962075e1e..a4f8e3713b 100644 --- a/src/or/control.c +++ b/src/or/control.c @@ -5,7 +5,31 @@ /** * \file control.c * \brief Implementation for Tor's control-socket interface. - * See doc/spec/control-spec.txt for full details on protocol. + * + * A "controller" is an external program that monitors and controls a Tor + * instance via a text-based protocol. It connects to Tor via a connection + * to a local socket. + * + * The protocol is line-driven. The controller sends commands terminated by a + * CRLF. Tor sends lines that are either <em>replies</em> to what the + * controller has said, or <em>events</em> that Tor sends to the controller + * asynchronously based on occurrences in the Tor network model. + * + * See the control-spec.txt file in the torspec.git repository for full + * details on protocol. + * + * This module generally has two kinds of entry points: those based on having + * received a command on a controller socket, which are handled in + * connection_control_process_inbuf(), and dispatched to individual functions + * with names like control_handle_COMMANDNAME(); and those based on events + * that occur elsewhere in Tor, which are handled by functions with names like + * control_event_EVENTTYPE(). + * + * Controller events are not sent immediately; rather, they are inserted into + * the queued_control_events array, and flushed later from + * flush_queued_events_cb(). Doing this simplifies our callgraph greatly, + * by limiting the number of places in Tor that can call back into the network + * stack. **/ #define CONTROL_PRIVATE diff --git a/src/or/cpuworker.c b/src/or/cpuworker.c index 2e76ea5b78..26bc54b55c 100644 --- a/src/or/cpuworker.c +++ b/src/or/cpuworker.c @@ -8,7 +8,11 @@ * \brief Uses the workqueue/threadpool code to farm CPU-intensive activities * out to subprocesses. * - * Right now, we only use this for processing onionskins. + * The multithreading backend for this module is in workqueue.c; this module + * specializes workqueue.c. + * + * Right now, we only use this for processing onionskins, and invoke it mostly + * from onion.c. **/ #include "or.h" #include "channel.h" diff --git a/src/or/dircollate.c b/src/or/dircollate.c index c6afd95926..033a7afe0f 100644 --- a/src/or/dircollate.c +++ b/src/or/dircollate.c @@ -8,6 +8,17 @@ * * \brief Collation code for figuring out which identities to vote for in * the directory voting process. + * + * During the consensus calculation, when an authority is looking at the vote + * documents from all the authorities, it needs to compute the consensus for + * each relay listed by at least one authority. But the notion of "each + * relay" can be tricky: some relays have Ed25519 keys, and others don't. + * + * Moreover, older consensus methods did RSA-based ID collation alone, and + * ignored Ed25519 keys. We need to support those too until we're completely + * sure that authorities will never downgrade. + * + * This module is invoked exclusively from dirvote.c. */ #define DIRCOLLATE_PRIVATE @@ -21,6 +32,9 @@ static void dircollator_collate_by_ed25519(dircollator_t *dc); * RSA SHA1 digest) to an array of vote_routerstatus_t. */ typedef struct ddmap_entry_s { HT_ENTRY(ddmap_entry_s) node; + /** A SHA1-RSA1024 identity digest and Ed25519 identity key, + * concatenated. (If there is no ed25519 identity key, there is no + * entry in this table.) */ uint8_t d[DIGEST_LEN + DIGEST256_LEN]; /* The nth member of this array corresponds to the vote_routerstatus_t (if * any) received for this digest pair from the nth voter. */ @@ -43,12 +57,16 @@ ddmap_entry_new(int n_votes) sizeof(vote_routerstatus_t *) * n_votes); } +/** Helper: compute a hash of a single ddmap_entry_t's identity (or + * identities) */ static unsigned ddmap_entry_hash(const ddmap_entry_t *ent) { return (unsigned) siphash24g(ent->d, sizeof(ent->d)); } +/** Helper: return true if <b>a</b> and <b>b</b> have the same + * identity/identities. */ static unsigned ddmap_entry_eq(const ddmap_entry_t *a, const ddmap_entry_t *b) { @@ -56,7 +74,7 @@ ddmap_entry_eq(const ddmap_entry_t *a, const ddmap_entry_t *b) } /** Record the RSA identity of <b>ent</b> as <b>rsa_sha1</b>, and the - * ed25519 identity as <b>ed25519</b>. */ + * ed25519 identity as <b>ed25519</b>. Both must be provided. */ static void ddmap_entry_set_digests(ddmap_entry_t *ent, const uint8_t *rsa_sha1, @@ -72,8 +90,12 @@ HT_GENERATE2(double_digest_map, ddmap_entry_s, node, ddmap_entry_hash, ddmap_entry_eq, 0.6, tor_reallocarray, tor_free_) /** Helper: add a single vote_routerstatus_t <b>vrs</b> to the collator - * <b>dc</b>, indexing it by its RSA key digest, and by the 2-tuple of - * its RSA key digest and Ed25519 key. */ + * <b>dc</b>, indexing it by its RSA key digest, and by the 2-tuple of its RSA + * key digest and Ed25519 key. It must come from the <b>vote_num</b>th + * vote. + * + * Requires that the vote is well-formed -- that is, that it has no duplicate + * routerstatus entries. We already checked for that when parsing the vote. */ static void dircollator_add_routerstatus(dircollator_t *dc, int vote_num, @@ -82,9 +104,12 @@ dircollator_add_routerstatus(dircollator_t *dc, { const char *id = vrs->status.identity_digest; + /* Clear this flag; we might set it later during the voting process */ vrs->ed25519_reflects_consensus = 0; - (void) vote; + (void) vote; // We don't currently need this. + + /* First, add this item to the appropriate RSA-SHA-Id array. */ vote_routerstatus_t **vrs_lst = digestmap_get(dc->by_rsa_sha1, id); if (NULL == vrs_lst) { vrs_lst = tor_calloc(dc->n_votes, sizeof(vote_routerstatus_t *)); @@ -98,6 +123,7 @@ dircollator_add_routerstatus(dircollator_t *dc, if (! vrs->has_ed25519_listing) return; + /* Now add it to the appropriate <Ed,RSA-SHA-Id> array. */ ddmap_entry_t search, *found; memset(&search, 0, sizeof(search)); ddmap_entry_set_digests(&search, (const uint8_t *)id, ed); diff --git a/src/or/dirserv.c b/src/or/dirserv.c index e8d60d0db8..41945fe1d1 100644 --- a/src/or/dirserv.c +++ b/src/or/dirserv.c @@ -36,6 +36,24 @@ * \file dirserv.c * \brief Directory server core implementation. Manages directory * contents and generates directories. + * + * This module implements most of directory cache functionality, and some of + * the directory authority functionality. The directory.c module delegates + * here in order to handle incoming requests from clients, via + * connection_dirserv_flushed_some() and its kin. In order to save RAM, this + * module is reponsible for spooling directory objects (in whole or in part) + * onto buf_t instances, and then closing the dir_connection_t once the + * objects are totally flushed. + * + * The directory.c module also delegates here for handling descriptor uploads + * via dirserv_add_multiple_descriptors(). + * + * Additionally, this module handles some aspects of voting, including: + * deciding how to vote on individual flags (based on decisions reached in + * rephist.c), of formatting routerstatus lines, and deciding what relays to + * include in an authority's vote. (TODO: Those functions could profitably be + * split off. They only live in this file because historically they were + * shared among the v1, v2, and v3 directory code.) */ /** How far in the future do we allow a router to get? (seconds) */ diff --git a/src/or/dns.c b/src/or/dns.c index aaffad77fc..5f9813b912 100644 --- a/src/or/dns.c +++ b/src/or/dns.c @@ -9,6 +9,42 @@ * This is implemented as a wrapper around Adam Langley's eventdns.c code. * (We can't just use gethostbyname() and friends because we really need to * be nonblocking.) + * + * There are three main cases when a Tor relay uses dns.c to launch a DNS + * request: + * <ol> + * <li>To check whether the DNS server is working more or less correctly. + * This happens via dns_launch_correctness_checks(). The answer is + * reported in the return value from later calls to + * dns_seems_to_be_broken(). + * <li>When a client has asked the relay, in a RELAY_BEGIN cell, to connect + * to a given server by hostname. This happens via dns_resolve(). + * <li>When a client has asked the rela, in a RELAY_RESOLVE cell, to look + * up a given server's IP address(es) by hostname. This also happens via + * dns_resolve(). + * </ol> + * + * Each of these gets handled a little differently. + * + * To check for correctness, we look up some hostname we expect to exist and + * have real entries, some hostnames which we expect to definitely not exist, + * and some hostnames that we expect to probably not exist. If too many of + * the hostnames that shouldn't exist do exist, that's a DNS hijacking + * attempt. If too many of the hostnames that should exist have the same + * addresses as the ones that shouldn't exist, that's a very bad DNS hijacking + * attempt, or a very naughty captive portal. And if the hostnames that + * should exist simply don't exist, we probably have a broken nameserver. + * + * To handle client requests, we first check our cache for answers. If there + * isn't something up-to-date, we've got to launch A or AAAA requests as + * appropriate. How we handle responses to those in particular is a bit + * complex; see dns_lookup() and set_exitconn_info_from_resolve(). + * + * When a lookup is finally complete, the inform_pending_connections() + * function will tell all of the streams that have been waiting for the + * resolve, by calling connection_exit_connect() if the client sent a + * RELAY_BEGIN cell, and by calling send_resolved_cell() or + * send_hostname_cell() if the client sent a RELAY_RESOLVE cell. **/ #define DNS_PRIVATE @@ -793,8 +829,14 @@ dns_resolve_impl,(edge_connection_t *exitconn, int is_resolve, } /** Given an exit connection <b>exitconn</b>, and a cached_resolve_t - * <b>resolve</b> whose DNS lookups have all succeeded or failed, update the - * appropriate fields (address_ttl and addr) of <b>exitconn</b>. + * <b>resolve</b> whose DNS lookups have all either succeeded or failed, + * update the appropriate fields (address_ttl and addr) of <b>exitconn</b>. + * + * The logic can be complicated here, since we might have launched both + * an A lookup and an AAAA lookup, and since either of those might have + * succeeded or failed, and since we want to answer a RESOLVE cell with + * a full answer but answer a BEGIN cell with whatever answer the client + * would accept <i>and</i> we could still connect to. * * If this is a reverse lookup, set *<b>hostname_out</b> to a newly allocated * copy of the name resulting hostname. @@ -1137,7 +1179,12 @@ dns_found_answer(const char *address, uint8_t query_type, /** Given a pending cached_resolve_t that we just finished resolving, * inform every connection that was waiting for the outcome of that - * resolution. */ + * resolution. + * + * Do this by sending a RELAY_RESOLVED cell (if the pending stream had sent us + * RELAY_RESOLVE cell), or by launching an exit connection (if the pending + * stream had send us a RELAY_BEGIN cell). + */ static void inform_pending_connections(cached_resolve_t *resolve) { diff --git a/src/or/dns_structs.h b/src/or/dns_structs.h index bb67459d7b..bc6067213d 100644 --- a/src/or/dns_structs.h +++ b/src/or/dns_structs.h @@ -1,3 +1,15 @@ +/* Copyright (c) 2003-2004, Roger Dingledine. + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2016, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file dns_structs.h + * + * \brief Structures used in dns.c. Exposed to dns.c, and to the unit tests + * that declare DNS_PRIVATE. + */ + #ifndef TOR_DNS_STRUCTS_H #define TOR_DNS_STRUCTS_H diff --git a/src/or/fp_pair.c b/src/or/fp_pair.c index 53b311e580..eeeb0f1de3 100644 --- a/src/or/fp_pair.c +++ b/src/or/fp_pair.c @@ -7,6 +7,14 @@ * \brief Manages data structures for associating pairs of fingerprints. Used * to handle combinations of identity/signing-key fingerprints for * authorities. + * + * This is a nice, simple, compact data structure module that handles a map + * from (signing key fingerprint, identity key fingerprint) to void *. The + * fingerprints here are SHA1 digests of RSA keys. + * + * This structure is used in directory.c and in routerlist.c for handling + * handling authority certificates, since we never want more than a single + * certificate for any (ID key, signing key) pair. **/ #include "or.h" diff --git a/src/or/geoip.c b/src/or/geoip.c index 6eb0ce7669..ba65dfe56c 100644 --- a/src/or/geoip.c +++ b/src/or/geoip.c @@ -7,6 +7,24 @@ * to summarizing client connections by country to entry guards, bridges, * and directory servers; and for statistics on answering network status * requests. + * + * There are two main kinds of functions in this module: geoip functions, + * which map groups of IPv4 and IPv6 addresses to country codes, and + * statistical functions, which collect statistics about different kinds of + * per-country usage. + * + * The geoip lookup tables are implemented as sorted lists of disjoint address + * ranges, each mapping to a singleton geoip_country_t. These country objects + * are also indexed by their names in a hashtable. + * + * The tables are populated from disk at startup by the geoip_load_file() + * function. For more information on the file format they read, see that + * function. See the scripts and the README file in src/config for more + * information about how those files are generated. + * + * Tor uses GeoIP information in order to implement user requests (such as + * ExcludeNodes {cc}), and to keep track of how much usage relays are getting + * for each country. */ #define GEOIP_PRIVATE diff --git a/src/or/hibernate.c b/src/or/hibernate.c index 7e25306234..aaf5c4bdcd 100644 --- a/src/or/hibernate.c +++ b/src/or/hibernate.c @@ -8,6 +8,12 @@ * etc in preparation for closing down or going dormant; and to track * bandwidth and time intervals to know when to hibernate and when to * stop hibernating. + * + * Ordinarily a Tor relay is "Live". + * + * A live relay can stop accepting connections for one of two reasons: either + * it is trying to conserve bandwidth because of bandwidth accounting rules + * ("soft hibernation"), or it is about to shut down ("exiting"). **/ /* @@ -49,8 +55,10 @@ typedef enum { UNIT_MONTH=1, UNIT_WEEK=2, UNIT_DAY=3, } time_unit_t; -/* Fields for accounting logic. Accounting overview: +/* + * @file hibernate.c * + * <h4>Accounting</h4> * Accounting is designed to ensure that no more than N bytes are sent in * either direction over a given interval (currently, one month, one week, or * one day) We could @@ -64,17 +72,21 @@ typedef enum { * * Each interval runs as follows: * - * 1. We guess our bandwidth usage, based on how much we used + * <ol> + * <li>We guess our bandwidth usage, based on how much we used * last time. We choose a "wakeup time" within the interval to come up. - * 2. Until the chosen wakeup time, we hibernate. - * 3. We come up at the wakeup time, and provide bandwidth until we are + * <li>Until the chosen wakeup time, we hibernate. + * <li> We come up at the wakeup time, and provide bandwidth until we are * "very close" to running out. - * 4. Then we go into low-bandwidth mode, and stop accepting new + * <li> Then we go into low-bandwidth mode, and stop accepting new * connections, but provide bandwidth until we run out. - * 5. Then we hibernate until the end of the interval. + * <li> Then we hibernate until the end of the interval. * * If the interval ends before we run out of bandwidth, we go back to * step one. + * + * Accounting is controlled by the AccountingMax, AccountingRule, and + * AccountingStart options. */ /** How many bytes have we read in this accounting interval? */ diff --git a/src/or/keypin.c b/src/or/keypin.c index 335c793cd9..2d4c4e92d2 100644 --- a/src/or/keypin.c +++ b/src/or/keypin.c @@ -39,16 +39,28 @@ * @brief Key-pinning for RSA and Ed25519 identity keys at directory * authorities. * + * Many older clients, and many internal interfaces, still refer to relays by + * their RSA1024 identity keys. We can make this more secure, however: + * authorities use this module to track which RSA keys have been used along + * with which Ed25519 keys, and force such associations to be permanent. + * * This module implements a key-pinning mechanism to ensure that it's safe * to use RSA keys as identitifers even as we migrate to Ed25519 keys. It * remembers, for every Ed25519 key we've seen, what the associated Ed25519 * key is. This way, if we see a different Ed25519 key with that RSA key, * we'll know that there's a mismatch. * + * (As of this writing, these key associations are advisory only, mostly + * because some relay operators kept mishandling their Ed25519 keys during + * the initial Ed25519 rollout. We should fix this problem, and then toggle + * the AuthDirPinKeys option.) + * * We persist these entries to disk using a simple format, where each line * has a base64-encoded RSA SHA1 hash, then a base64-endoded Ed25519 key. * Empty lines, misformed lines, and lines beginning with # are * ignored. Lines beginning with @ are reserved for future extensions. + * + * The dirserv.c module is the main user of these functions. */ static int keypin_journal_append_entry(const uint8_t *rsa_id_digest, diff --git a/src/or/ntmain.c b/src/or/ntmain.c index a1b886bb5a..4c65805b32 100644 --- a/src/or/ntmain.c +++ b/src/or/ntmain.c @@ -6,7 +6,15 @@ /** * \file ntmain.c * - * \brief Entry points for running/configuring Tor as Windows Service. + * \brief Entry points for running/configuring Tor as a Windows Service. + * + * Windows Services expect to be registered with the operating system, and to + * have entry points for starting, stopping, and monitoring them. This module + * implements those entry points so that a tor relay or client or hidden + * service can run as a Windows service. Therefore, this module + * is only compiled when building for Windows. + * + * Warning: this module is not very well tested or very well maintained. */ #ifdef _WIN32 diff --git a/src/or/onion.c b/src/or/onion.c index 8a566af766..a987883802 100644 --- a/src/or/onion.c +++ b/src/or/onion.c @@ -8,6 +8,58 @@ * \file onion.c * \brief Functions to queue create cells, wrap the various onionskin types, * and parse and create the CREATE cell and its allies. + * + * This module has a few functions, all related to the CREATE/CREATED + * handshake that we use on links in order to create a circuit, and the + * related EXTEND/EXTENDED handshake that we use over circuits in order to + * extend them an additional hop. + * + * In this module, we provide a set of abstractions to create a uniform + * interface over the three circuit extension handshakes that Tor has used + * over the years (TAP, CREATE_FAST, and ntor). These handshakes are + * implemented in onion_tap.c, onion_fast.c, and onion_ntor.c respectively. + * + * All[*] of these handshakes follow a similar pattern: a client, knowing + * some key from the relay it wants to extend through, generates the + * first part of a handshake. A relay receives that handshake, and sends + * a reply. Once the client handles the reply, it knows that it is + * talking to the right relay, and it shares some freshly negotiated key + * material with that relay. + * + * We sometimes call the client's part of the handshake an "onionskin". + * We do this because historically, Onion Routing used a multi-layer + * structure called an "onion" to construct circuits. Each layer of the + * onion contained key material chosen by the client, the identity of + * the next relay in the circuit, and a smaller onion, encrypted with + * the key of the next relay. When we changed Tor to use a telescoping + * circuit extension design, it corresponded to sending each layer of the + * onion separately -- as a series of onionskins. + * + * Clients invoke these functions when creating or extending a circuit, + * from circuitbuild.c. + * + * Relays invoke these functions when they receive a CREATE or EXTEND + * cell in command.c or relay.c, in order to queue the pending request. + * They also invoke them from cpuworker.c, which handles dispatching + * onionskin requests to different worker threads. + * + * <br> + * + * This module also handles: + * <ul> + * <li> Queueing incoming onionskins on the relay side before passing + * them to worker threads. + * <li>Expiring onionskins on the relay side if they have waited for + * too long. + * <li>Packaging private keys on the server side in order to pass + * them to worker threads. + * <li>Encoding and decoding CREATE, CREATED, CREATE2, and CREATED2 cells. + * <li>Encoding and decodign EXTEND, EXTENDED, EXTEND2, and EXTENDED2 + * relay cells. + * </ul> + * + * [*] The CREATE_FAST handshake is weaker than described here; see + * onion_fast.c for more information. **/ #include "or.h" diff --git a/src/or/onion_fast.c b/src/or/onion_fast.c index 6b5d12e407..8dcbfe22d8 100644 --- a/src/or/onion_fast.c +++ b/src/or/onion_fast.c @@ -7,6 +7,24 @@ /** * \file onion_fast.c * \brief Functions implement the CREATE_FAST circuit handshake. + * + * The "CREATE_FAST" handshake is an unauthenticated, non-forward-secure + * key derivation mechanism based on SHA1. We used to use it for the + * first hop of each circuit, since the TAP handshake provided no + * additional security beyond the security already provided by the TLS + * handshake [*]. + * + * When we switched to ntor, we deprecated CREATE_FAST, since ntor is + * stronger than our TLS handshake was, and fast enough to not be worrisome. + * + * This handshake, like the other circuit-extension handshakes, is + * invoked from onion.c. + * + * [*]Actually, it's possible that TAP _was_ a little better than TLS with + * RSA1024 certificates and EDH1024 for forward secrecy, if you + * hypothesize an adversary who can compute discrete logarithms on a + * small number of targetted DH1024 fields, but who can't break all that + * many RSA1024 keys. **/ #include "or.h" diff --git a/src/or/onion_ntor.c b/src/or/onion_ntor.c index d1a268f4cd..ded97ee73d 100644 --- a/src/or/onion_ntor.c +++ b/src/or/onion_ntor.c @@ -5,6 +5,17 @@ * \file onion_ntor.c * * \brief Implementation for the ntor handshake. + * + * The ntor circuit-extension handshake was developed as a replacement + * for the old TAP handshake. It uses Elliptic-curve Diffie-Hellman and + * a hash function in order to perform a one-way authenticated key + * exchange. The ntor handshake is meant to replace the old "TAP" + * handshake. + * + * We instantiate ntor with curve25519, HMAC-SHA256, and HKDF. + * + * This handshake, like the other circuit-extension handshakes, is + * invoked from onion.c. */ #include "orconfig.h" diff --git a/src/or/onion_tap.c b/src/or/onion_tap.c index abe779351f..2769300945 100644 --- a/src/or/onion_tap.c +++ b/src/or/onion_tap.c @@ -9,10 +9,22 @@ * \brief Functions to implement the original Tor circuit extension handshake * (a.k.a TAP). * + * The "TAP" handshake is the first one that was widely used in Tor: It + * combines RSA1024-OAEP and AES128-CTR to perform a hybrid encryption over + * the first message DH1024 key exchange. (The RSA-encrypted part of the + * encryption is authenticated; the AES-encrypted part isn't. This was + * not a smart choice.) + * * We didn't call it "TAP" ourselves -- Ian Goldberg named it in "On the * Security of the Tor Authentication Protocol". (Spoiler: it's secure, but * its security is kind of fragile and implementation dependent. Never modify * this implementation without reading and understanding that paper at least.) + * + * We have deprecated TAP since the ntor handshake came into general use. It + * is still used for hidden service IP and RP connections, however. + * + * This handshake, like the other circuit-extension handshakes, is + * invoked from onion.c. **/ #include "or.h" diff --git a/src/or/periodic.c b/src/or/periodic.c index 0bccc6ec20..d02d4a7bbb 100644 --- a/src/or/periodic.c +++ b/src/or/periodic.c @@ -5,6 +5,10 @@ * \file periodic.c * * \brief Generic backend for handling periodic events. + * + * The events in this module are used by main.c to track items that need + * to fire once every N seconds, possibly picking a new interval each time + * that they fire. See periodic_events[] in main.c for examples. */ #include "or.h" diff --git a/src/or/protover.c b/src/or/protover.c index 9e0391a410..a6a5d18f36 100644 --- a/src/or/protover.c +++ b/src/or/protover.c @@ -1,3 +1,24 @@ +/* Copyright (c) 2016, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file protover.c + * \brief Versioning information for different pieces of the Tor protocol. + * + * Starting in version 0.2.9.3-alpha, Tor places separate version numbers on + * each of the different components of its protocol. Relays use these numbers + * to advertise what versions of the protocols they can support, and clients + * use them to find what they can ask a given relay to do. Authorities vote + * on the supported protocol versions for each relay, and also vote on the + * which protocols you should have to support in order to be on the Tor + * network. All Tor instances use these required/recommended protocol versions + * to + * + * The main advantage of these protocol versions numbers over using Tor + * version numbers is that they allow different implementations of the Tor + * protocols to develop independently, without having to claim compatibility + * with specific versions of Tor. + **/ #define PROTOVER_PRIVATE @@ -699,6 +720,9 @@ protover_compute_for_old_tor(const char *version) } } +/** + * Release all storage held by static fields in protover.c + */ void protover_free_all(void) { diff --git a/src/or/protover.h b/src/or/protover.h index 075405e7ca..5c658931ea 100644 --- a/src/or/protover.h +++ b/src/or/protover.h @@ -1,3 +1,10 @@ +/* Copyright (c) 2016, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file protover.h + * \brief Headers and type declarations for protover.c + **/ #ifndef TOR_PROTOVER_H #define TOR_PROTOVER_H diff --git a/src/or/reasons.c b/src/or/reasons.c index 36921cafcd..a1566e2299 100644 --- a/src/or/reasons.c +++ b/src/or/reasons.c @@ -6,6 +6,12 @@ * \file reasons.c * \brief Convert circuit, stream, and orconn error reasons to and/or from * strings and errno values. + * + * This module is just a bunch of functions full of case statements that + * convert from one representation of our error codes to another. These are + * mainly used in generating log messages, in sending messages to the + * controller in control.c, and in converting errors from one protocol layer + * to another. **/ #include "or.h" diff --git a/src/or/rephist.c b/src/or/rephist.c index 6a54904746..8bcd7396aa 100644 --- a/src/or/rephist.c +++ b/src/or/rephist.c @@ -4,10 +4,74 @@ /** * \file rephist.c - * \brief Basic history and "reputation" functionality to remember + * \brief Basic history and performance-tracking functionality. + * + * Basic history and performance-tracking functionality to remember * which servers have worked in the past, how much bandwidth we've * been using, which ports we tend to want, and so on; further, * exit port statistics, cell statistics, and connection statistics. + * + * The history and information tracked in this module could sensibly be + * divided into several categories: + * + * <ul><li>Statistics used by authorities to remember the uptime and + * stability information about various relays, including "uptime", + * "weighted fractional uptime" and "mean time between failures". + * + * <li>Bandwidth usage history, used by relays to self-report how much + * bandwidth they've used for different purposes over last day or so, + * in order to generate the {dirreq-,}{read,write}-history lines in + * that they publish. + * + * <li>Predicted ports, used by clients to remember how long it's been + * since they opened an exit connection to each given target + * port. Clients use this information in order to try to keep circuits + * open to exit nodes that can connect to the ports that they care + * about. (The predicted ports mechanism also handles predicted circuit + * usage that _isn't_ port-specific, such as resolves, internal circuits, + * and so on.) + * + * <li>Public key operation counters, for tracking how many times we've + * done each public key operation. (This is unmaintained and we should + * remove it.) + * + * <li>Exit statistics by port, used by exits to keep track of the + * number of streams and bytes they've served at each exit port, so they + * can generate their exit-kibibytes-{read,written} and + * exit-streams-opened statistics. + * + * <li>Circuit stats, used by relays instances to tract circuit + * queue fullness and delay over time, and generate cell-processed-cells, + * cell-queued-cells, cell-time-in-queue, and cell-circuits-per-decile + * statistics. + * + * <li>Descriptor serving statistics, used by directory caches to track + * how many descriptors they've served. + * + * <li>Connection statistics, used by relays to track one-way and + * bidirectional connections. + * + * <li>Onion handshake statistics, used by relays to count how many + * TAP and ntor handshakes they've handled. + * + * <li>Hidden service statistics, used by relays to count rendezvous + * traffic and HSDir-stored descriptors. + * + * <li>Link protocol statistics, used by relays to count how many times + * each link protocol has been used. + * + * </ul> + * + * The entry points for this module are scattered throughout the + * codebase. Sending data, receiving data, connecting to a relay, + * losing a connection to a relay, and so on can all trigger a change in + * our current stats. Relays also invoke this module in order to + * extract their statistics when building routerinfo and extrainfo + * objects in router.c. + * + * TODO: This module should be broken up. + * + * (The "rephist" name originally stood for "reputation and history". ) **/ #include "or.h" @@ -2650,7 +2714,9 @@ rep_hist_desc_stats_write(time_t now) return start_of_served_descs_stats_interval + WRITE_STATS_INTERVAL; } -/* DOCDOC rep_hist_note_desc_served */ +/** Called to note that we've served a given descriptor (by + * digest). Incrememnts the count of descriptors served, and the number + * of times we've served this descriptor. */ void rep_hist_note_desc_served(const char * desc) { diff --git a/src/or/replaycache.c b/src/or/replaycache.c index 23a1737b18..c17cba5f87 100644 --- a/src/or/replaycache.c +++ b/src/or/replaycache.c @@ -5,6 +5,18 @@ * \file replaycache.c * * \brief Self-scrubbing replay cache for rendservice.c + * + * To prevent replay attacks, hidden services need to recognize INTRODUCE2 + * cells that they've already seen, and drop them. If they didn't, then + * sending the same INTRODUCE2 cell over and over would force the hidden + * service to make a huge number of circuits to the same rendezvous + * point, aiding traffic analysis. + * + * (It's not that simple, actually. We only check for replays in the + * RSA-encrypted portion of the handshake, since the rest of the handshake is + * malleable.) + * + * This module is used from rendservice.c. */ #define REPLAYCACHE_PRIVATE diff --git a/src/or/routerlist.c b/src/or/routerlist.c index 0e637f4833..85eb999ad6 100644 --- a/src/or/routerlist.c +++ b/src/or/routerlist.c @@ -9,6 +9,85 @@ * \brief Code to * maintain and access the global list of routerinfos for known * servers. + * + * A "routerinfo_t" object represents a single self-signed router + * descriptor, as generated by a Tor relay in order to tell the rest of + * the world about its keys, address, and capabilities. An + * "extrainfo_t" object represents an adjunct "extra-info" object, + * certified by a corresponding router descriptor, reporting more + * information about the relay that nearly all users will not need. + * + * Most users will not use router descriptors for most relays. Instead, + * they use the information in microdescriptors and in the consensus + * networkstatus. + * + * Right now, routerinfo_t objects are used in these ways: + * <ul> + * <li>By clients, in order to learn about bridge keys and capabilities. + * (Bridges aren't listed in the consensus networkstatus, so they + * can't have microdescriptors.) + * <li>By relays, since relays want more information about other relays + * than they can learn from microdescriptors. (TODO: Is this still true?) + * <li>By authorities, which receive them and use them to generate the + * consensus and the microdescriptors. + * <li>By all directory caches, which download them in case somebody + * else wants them. + * </ul> + * + * Routerinfos are mostly created by parsing them from a string, in + * routerparse.c. We store them to disk on receiving them, and + * periodically discard the ones we don't need. On restarting, we + * re-read them from disk. (This also applies to extrainfo documents, if + * we are configured to fetch them.) + * + * In order to keep our list of routerinfos up-to-date, we periodically + * check whether there are any listed in the latest consensus (or in the + * votes from other authorities, if we are an authority) that we don't + * have. (This also applies to extrainfo documents, if we are + * configured to fetch them.) + * + * Almost nothing in Tor should use a routerinfo_t to refer directly to + * a relay; instead, almost everything should use node_t (implemented in + * nodelist.c), which provides a common interface to routerinfo_t, + * routerstatus_t, and microdescriptor_t. + * + * <br> + * + * This module also has some of the functions used for choosing random + * nodes according to different rules and weights. Historically, they + * were all in this module. Now, they are spread across this module, + * nodelist.c, and networkstatus.c. (TODO: Fix that.) + * + * <br> + * + * (For historical reasons) this module also contains code for handling + * the list of fallback directories, the list of directory authorities, + * and the list of authority certificates. + * + * For the directory authorities, we have a list containing the public + * identity key, and contact points, for each authority. The + * authorities receive descriptors from relays, and publish consensuses, + * descriptors, and microdescriptors. This list is pre-configured. + * + * Fallback directories are well-known, stable, but untrusted directory + * caches that clients which have not yet bootstrapped can use to get + * their first networkstatus consensus, in order to find out where the + * Tor network really is. This list is pre-configured in + * fallback_dirs.inc. Every authority also serves as a fallback. + * + * Both fallback directories and directory authorities are are + * represented by a dir_server_t. + * + * Authority certificates are signed with authority identity keys; they + * are used to authenticate shorter-term authority signing keys. We + * fetch them when we find a consensus or a vote that has been signed + * with a signing key we don't recognize. We cache them on disk and + * load them on startup. Authority operators generate them with the + * "tor-gencert" utility. + * + * TODO: Authority certificates should be a separate module. + * + * TODO: dir_server_t stuff should be in a separate module. **/ #define ROUTERLIST_PRIVATE @@ -46,6 +125,9 @@ /****************************************************************************/ +/* Typed wrappers for different digestmap types; used to avoid type + * confusion. */ + DECLARE_TYPED_DIGESTMAP_FNS(sdmap_, digest_sd_map_t, signed_descriptor_t) DECLARE_TYPED_DIGESTMAP_FNS(rimap_, digest_ri_map_t, routerinfo_t) DECLARE_TYPED_DIGESTMAP_FNS(eimap_, digest_ei_map_t, extrainfo_t) @@ -800,7 +882,9 @@ static const char *BAD_SIGNING_KEYS[] = { NULL, }; -/* DOCDOC */ +/** Return true iff <b>cert</b> authenticates some atuhority signing key + * which, because of the old openssl heartbleed vulnerability, should + * never be trusted. */ int authority_cert_is_blacklisted(const authority_cert_t *cert) { diff --git a/src/or/routerparse.c b/src/or/routerparse.c index df9b76800d..93484660d2 100644 --- a/src/or/routerparse.c +++ b/src/or/routerparse.c @@ -6,7 +6,51 @@ /** * \file routerparse.c - * \brief Code to parse and validate router descriptors and directories. + * \brief Code to parse and validate router descriptors, consenus directories, + * and similar objects. + * + * The objects parsed by this module use a common text-based metaformat, + * documented in dir-spec.txt in torspec.git. This module is itself divided + * into two major kinds of function: code to handle the metaformat, and code + * to convert from particular instances of the metaformat into the + * objects that Tor uses. + * + * The generic parsing code works by calling a table-based tokenizer on the + * input string. Each token corresponds to a single line with a token, plus + * optional arguments on that line, plus an optional base-64 encoded object + * after that line. Each token has a definition in a table of token_rule_t + * entries that describes how many arguments it can take, whether it takes an + * object, how many times it may appear, whether it must appear first, and so + * on. + * + * The tokenizer function tokenize_string() converts its string input into a + * smartlist full of instances of directory_token_t, according to a provided + * table of token_rule_t. + * + * The generic parts of this module additionally include functions for + * finding the start and end of signed information inside a signed object, and + * computing the digest that will be signed. + * + * There are also functions for saving objects to disk that have caused + * parsing to fail. + * + * The specific parts of this module describe conversions between + * particular lists of directory_token_t and particular objects. The + * kinds of objects that can be parsed here are: + * <ul> + * <li>router descriptors (managed from routerlist.c) + * <li>extra-info documents (managed from routerlist.c) + * <li>microdescriptors (managed from microdesc.c) + * <li>vote and consensus networkstatus documents, and the routerstatus_t + * objects that they comprise (managed from networkstatus.c) + * <li>detached-signature objects used by authorities for gathering + * signatures on the networkstatus consensus (managed from dirvote.c) + * <li>authority key certificates (managed from routerlist.c) + * <li>hidden service descriptors (managed from rendcommon.c and rendcache.c) + * </ul> + * + * For no terribly good reason, the functions to <i>generate</i> signatures on + * the above directory objects are also in this module. **/ #define ROUTERPARSE_PRIVATE @@ -258,12 +302,14 @@ typedef struct token_rule_t { int is_annotation; } token_rule_t; -/* +/** + * @name macros for defining token rules + * * Helper macros to define token tables. 's' is a string, 't' is a * directory_keyword, 'a' is a trio of argument multiplicities, and 'o' is an * object syntax. - * */ +/**@{*/ /** Appears to indicate the end of a table. */ #define END_OF_TABLE { NULL, NIL_, 0,0,0, NO_OBJ, 0, INT_MAX, 0, 0 } @@ -284,16 +330,17 @@ typedef struct token_rule_t { /** An annotation that must appear no more than once */ #define A01(s,t,a,o) { s, t, a, o, 0, 1, 0, 1 } -/* Argument multiplicity: any number of arguments. */ +/** Argument multiplicity: any number of arguments. */ #define ARGS 0,INT_MAX,0 -/* Argument multiplicity: no arguments. */ +/** Argument multiplicity: no arguments. */ #define NO_ARGS 0,0,0 -/* Argument multiplicity: concatenate all arguments. */ +/** Argument multiplicity: concatenate all arguments. */ #define CONCAT_ARGS 1,1,1 -/* Argument multiplicity: at least <b>n</b> arguments. */ +/** Argument multiplicity: at least <b>n</b> arguments. */ #define GE(n) n,INT_MAX,0 -/* Argument multiplicity: exactly <b>n</b> arguments. */ +/** Argument multiplicity: exactly <b>n</b> arguments. */ #define EQ(n) n,n,0 +/**@}*/ /** List of tokens recognized in router descriptors */ static token_rule_t routerdesc_token_table[] = { diff --git a/src/or/routerset.c b/src/or/routerset.c index f260914f4b..58b66ea777 100644 --- a/src/or/routerset.c +++ b/src/or/routerset.c @@ -9,6 +9,20 @@ * * \brief Functions and structures to handle set-type selection of routers * by name, ID, address, etc. + * + * This module implements the routerset_t data structure, whose purpose + * is to specify a set of relays based on a list of their identities or + * properties. Routersets can restrict relays by IP address mask, + * identity fingerprint, country codes, and nicknames (deprecated). + * + * Routersets are typically used for user-specified restrictions, and + * are created by invoking routerset_new and routerset_parse from + * config.c and confparse.c. To use a routerset, invoke one of + * routerset_contains_...() functions , or use + * routerstatus_get_all_nodes() / routerstatus_subtract_nodes() to + * manipulate a smartlist of node_t pointers. + * + * Country-code restrictions are implemented in geoip.c. */ #define ROUTERSET_PRIVATE diff --git a/src/or/statefile.c b/src/or/statefile.c index adf9d9f038..8fa4324b25 100644 --- a/src/or/statefile.c +++ b/src/or/statefile.c @@ -9,6 +9,23 @@ * * \brief Handles parsing and encoding the persistent 'state' file that carries * miscellaneous persistent state between Tor invocations. + * + * This 'state' file is a typed key-value store that allows multiple + * entries for the same key. It follows the same metaformat as described + * in confparse.c, and uses the same code to read and write itself. + * + * The state file is most suitable for small values that don't change too + * frequently. For values that become very large, we typically use a separate + * file -- for example, see how we handle microdescriptors, by storing them in + * a separate file with a journal. + * + * The current state is accessed via get_or_state(), which returns a singleton + * or_state_t object. Functions that change it should call + * or_state_mark_dirty() to ensure that it will get written to disk. + * + * The or_state_save() function additionally calls various functioens + * throughout Tor that might want to flush more state to the the disk, + * including some in rephist.c, entrynodes.c, circuitstats.c, hibernate.c. */ #define STATEFILE_PRIVATE diff --git a/src/or/status.c b/src/or/status.c index 749cee4edf..fce6a10157 100644 --- a/src/or/status.c +++ b/src/or/status.c @@ -3,7 +3,13 @@ /** * \file status.c - * \brief Keep status information and log the heartbeat messages. + * \brief Collect status information and log heartbeat messages. + * + * This module is responsible for implementing the heartbeat log messages, + * which periodically inform users and operators about basic facts to + * do with their Tor instance. The log_heartbeat() function, invoked from + * main.c, is the principle entry point. It collects data from elsewhere + * in Tor, and logs it in a human-readable format. **/ #define STATUS_PRIVATE diff --git a/src/or/tor_main.c b/src/or/tor_main.c index 21fbe3efb5..d67eda2ac9 100644 --- a/src/or/tor_main.c +++ b/src/or/tor_main.c @@ -17,8 +17,10 @@ const char tor_git_revision[] = /** * \file tor_main.c - * \brief Stub module containing a main() function. Allows unit - * test binary to link against main.c. + * \brief Stub module containing a main() function. + * + * We keep the main function in a separate module so that the unit + * tests, which have their own main()s, can link against main.c. **/ int tor_main(int argc, char *argv[]); |