diff options
author | Nick Mathewson <nickm@torproject.org> | 2019-11-05 08:05:49 -0500 |
---|---|---|
committer | Nick Mathewson <nickm@torproject.org> | 2019-11-05 08:05:49 -0500 |
commit | 8933789fef0e2014852df6ae55b5435b9d3256b2 (patch) | |
tree | 03cba4960d76656fd248e60d4d749152bdc62340 /src/lib | |
parent | 60d5ff303d65bb7caf5c064675c661faac4cecf1 (diff) | |
parent | aac80a004f91a453733fed0ba62c00d7d1e2b76d (diff) | |
download | tor-8933789fef0e2014852df6ae55b5435b9d3256b2.tar.gz tor-8933789fef0e2014852df6ae55b5435b9d3256b2.zip |
Merge branch 'doxygen_libs'
Diffstat (limited to 'src/lib')
42 files changed, 703 insertions, 87 deletions
diff --git a/src/lib/arch/lib_arch.dox b/src/lib/arch/lib_arch.dox index 60b5fafeb4..edb0cbbf1d 100644 --- a/src/lib/arch/lib_arch.dox +++ b/src/lib/arch/lib_arch.dox @@ -1,4 +1,4 @@ /** -@dir lib/arch -@brief lib/arch +@dir /lib/arch +@brief lib/arch: Compatibility code for handling different CPU architectures. **/ diff --git a/src/lib/buf/lib_buf.dox b/src/lib/buf/lib_buf.dox index f21c4b1b72..a2ac23ee4c 100644 --- a/src/lib/buf/lib_buf.dox +++ b/src/lib/buf/lib_buf.dox @@ -1,4 +1,15 @@ /** -@dir lib/buf -@brief lib/buf +@dir /lib/buf +@brief lib/buf: An efficient byte queue. + +This module defines the buf_t type, which is used throughout our networking +code. The implementation is a singly-linked queue of buffer chunks, similar +to the BSD kernel's +["mbuf"](https://www.freebsd.org/cgi/man.cgi?query=mbuf&sektion=9) structure. + +The buf_t type is also reasonable for use in constructing long strings. + +See \refdir{lib/net} for networking code that uses buf_t, and +\refdir{lib/tls} for cryptographic code that uses buf_t. + **/ diff --git a/src/lib/cc/lib_cc.dox b/src/lib/cc/lib_cc.dox index 804260cb29..06f4e775bf 100644 --- a/src/lib/cc/lib_cc.dox +++ b/src/lib/cc/lib_cc.dox @@ -1,4 +1,4 @@ /** -@dir lib/cc -@brief lib/cc +@dir /lib/cc +@brief lib/cc: Macros for managing the C compiler and language. **/ diff --git a/src/lib/compress/lib_compress.dox b/src/lib/compress/lib_compress.dox index ac60794565..599126901a 100644 --- a/src/lib/compress/lib_compress.dox +++ b/src/lib/compress/lib_compress.dox @@ -1,4 +1,8 @@ /** -@dir lib/compress -@brief lib/compress +@dir /lib/compress +@brief lib/compress: Wraps several compression libraries + +Currently supported are zlib (mandatory), zstd (optional), and lzma +(optional). + **/ diff --git a/src/lib/conf/lib_conf.dox b/src/lib/conf/lib_conf.dox index 40a1d9f90f..be58fe5b55 100644 --- a/src/lib/conf/lib_conf.dox +++ b/src/lib/conf/lib_conf.dox @@ -1,4 +1,5 @@ /** -@dir lib/conf -@brief lib/conf +@dir /lib/conf +@brief lib/conf: Types and macros for declaring configuration options. + **/ diff --git a/src/lib/confmgt/lib_confmgt.dox b/src/lib/confmgt/lib_confmgt.dox index 964fe1d074..d18fa304ca 100644 --- a/src/lib/confmgt/lib_confmgt.dox +++ b/src/lib/confmgt/lib_confmgt.dox @@ -1,4 +1,9 @@ /** -@dir lib/confmgt -@brief lib/confmgt +@dir /lib/confmgt +@brief lib/confmgt: Parse, encode, manipulate configuration files. + +This logic is used in common by our state files (statefile.c) and +configuration files (config.c) to manage a set of named, typed fields, +reading and writing them to disk and to the controller. + **/ diff --git a/src/lib/container/lib_container.dox b/src/lib/container/lib_container.dox index 6ee719f47e..675aaeef3f 100644 --- a/src/lib/container/lib_container.dox +++ b/src/lib/container/lib_container.dox @@ -1,4 +1,51 @@ /** -@dir lib/container -@brief lib/container +@dir /lib/container +@brief lib/container: Hash tables, dynamic arrays, bit arrays, etc. + +### Smartlists: Neither lists, nor especially smart. + +For historical reasons, we call our dynamic-allocated array type +`smartlist_t`. It can grow or shrink as elements are added and removed. + +All smartlists hold an array of `void *`. Whenever you expose a smartlist +in an API you *must* document which types its pointers actually hold. + +<!-- It would be neat to fix that, wouldn't it? -NM --> + +Smartlists are created empty with `smartlist_new()` and freed with +`smartlist_free()`. See the `containers.h` header documentation for more +information; there are many convenience functions for commonly needed +operations. + +For low-level operations on smartlists, see also +\refdir{lib/smartlist_core}. + +<!-- TODO: WRITE more about what you can do with smartlists. --> + +### Digest maps, string maps, and more. + +Tor makes frequent use of maps from 160-bit digests, 256-bit digests, +or nul-terminated strings to `void *`. These types are `digestmap_t`, +`digest256map_t`, and `strmap_t` respectively. See the containers.h +module documentation for more information. + +### Intrusive lists and hashtables + +For performance-sensitive cases, we sometimes want to use "intrusive" +collections: ones where the bookkeeping pointers are stuck inside the +structures that belong to the collection. If you've used the +BSD-style sys/queue.h macros, you'll be familiar with these. + +Unfortunately, the `sys/queue.h` macros vary significantly between the +platforms that have them, so we provide our own variants in +`ext/tor_queue.h`. + +We also provide an intrusive hashtable implementation in `ext/ht.h`. +When you're using it, you'll need to define your own hash +functions. If attacker-induced collisions are a worry here, use the +cryptographic siphash24g function to extract hashes. + +<!-- TODO: WRITE about bloom filters, namemaps, bit-arrays, order functions. +--> + **/ diff --git a/src/lib/crypt_ops/lib_crypt_ops.dox b/src/lib/crypt_ops/lib_crypt_ops.dox index 1ea0b67d59..515c67f1c0 100644 --- a/src/lib/crypt_ops/lib_crypt_ops.dox +++ b/src/lib/crypt_ops/lib_crypt_ops.dox @@ -1,4 +1,139 @@ /** -@dir lib/crypt_ops -@brief lib/crypt_ops +@dir /lib/crypt_ops +@brief lib/crypt_ops: Cryptographic operations. + +This module contains wrappers around the cryptographic libraries that we +support, and implementations for some higher-level cryptographic +constructions that we use. + +It wraps our two major cryptographic backends (OpenSSL or NSS, as configured +by the user), and also wraps other cryptographic code in src/ext. + +Generally speaking, Tor code shouldn't be calling OpenSSL or NSS +(or any other crypto library) directly. Instead, we should indirect through +one of the functions in this directory, or through \refdir{lib/tls}. + +Cryptography functionality that's available is described below. + +### RNG facilities ### + +The most basic RNG capability in Tor is the crypto_rand() family of +functions. These currently use OpenSSL's RAND_() backend, but may use +something faster in the future. + +In addition to crypto_rand(), which fills in a buffer with random +bytes, we also have functions to produce random integers in certain +ranges; to produce random hostnames; to produce random doubles, etc. + +When you're creating a long-term cryptographic secret, you might want +to use crypto_strongest_rand() instead of crypto_rand(). It takes the +operating system's entropy source and combines it with output from +crypto_rand(). This is a pure paranoia measure, but it might help us +someday. + +You can use smartlist_choose() to pick a random element from a smartlist +and smartlist_shuffle() to randomize the order of a smartlist. Both are +potentially a bit slow. + +### Cryptographic digests and related functions ### + +We treat digests as separate types based on the length of their +outputs. We support one 160-bit digest (SHA1), two 256-bit digests +(SHA256 and SHA3-256), and two 512-bit digests (SHA512 and SHA3-512). + +You should not use SHA1 for anything new. + +The crypto_digest\*() family of functions manipulates digests. You +can either compute a digest of a chunk of memory all at once using +crypto_digest(), crypto_digest256(), or crypto_digest512(). Or you +can create a crypto_digest_t object with +crypto_digest{,256,512}_new(), feed information to it in chunks using +crypto_digest_add_bytes(), and then extract the final digest using +crypto_digest_get_digest(). You can copy the state of one of these +objects using crypto_digest_dup() or crypto_digest_assign(). + +We support the HMAC hash-based message authentication code +instantiated using SHA256. See crypto_hmac_sha256. (You should not +add any HMAC users with SHA1, and HMAC is not necessary with SHA3.) + +We also support the SHA3 cousins, SHAKE128 and SHAKE256. Unlike +digests, these are extendable output functions (or XOFs) where you can +get any amount of output. Use the crypto_xof_\*() functions to access +these. + +We have several ways to derive keys from cryptographically strong secret +inputs (like diffie-hellman outputs). The old +crypto_expand_key_material_TAP() performs an ad-hoc KDF based on SHA1 -- you +shouldn't use it for implementing anything but old versions of the Tor +protocol. You can use HKDF-SHA256 (as defined in RFC5869) for more modern +protocols. Also consider SHAKE256. + +If your input is potentially weak, like a password or passphrase, use a salt +along with the secret_to_key() functions as defined in crypto_s2k.c. Prefer +scrypt over other hashing methods when possible. If you're using a password +to encrypt something, see the "boxed file storage" section below. + +Finally, in order to store objects in hash tables, Tor includes the +randomized SipHash 2-4 function. Call it via the siphash24g() function in +src/ext/siphash.h whenever you're creating a hashtable whose keys may be +manipulated by an attacker in order to DoS you with collisions. + + +### Stream ciphers ### + +You can create instances of a stream cipher using crypto_cipher_new(). +These are stateful objects of type crypto_cipher_t. Note that these +objects only support AES-128 right now; a future version should add +support for AES-128 and/or ChaCha20. + +You can encrypt/decrypt with crypto_cipher_encrypt or +crypto_cipher_decrypt. The crypto_cipher_crypt_inplace function performs +an encryption without a copy. + +Note that sensible people should not use raw stream ciphers; they should +probably be using some kind of AEAD. Sorry. + +### Public key functionality ### + +We support four public key algorithms: DH1024, RSA, Curve25519, and +Ed25519. + +We support DH1024 over two prime groups. You access these via the +crypto_dh_\*() family of functions. + +We support RSA in many bit sizes for signing and encryption. You access +it via the crypto_pk_*() family of functions. Note that a crypto_pk_t +may or may not include a private key. See the crypto_pk_* functions in +crypto.c for a full list of functions here. + +For Curve25519 functionality, see the functions and types in +crypto_curve25519.c. Curve25519 is generally suitable for when you need +a secure fast elliptic-curve diffie hellman implementation. When +designing new protocols, prefer it over DH in Z_p. + +For Ed25519 functionality, see the functions and types in +crypto_ed25519.c. Ed25519 is a generally suitable as a secure fast +elliptic curve signature method. For new protocols, prefer it over RSA +signatures. + +### Metaformats for storage ### + +When OpenSSL manages the storage of some object, we use whatever format +OpenSSL provides -- typically, some kind of PEM-wrapped base 64 encoding +that starts with "----- BEGIN CRYPTOGRAPHIC OBJECT ----". + +When we manage the storage of some cryptographic object, we prefix the +object with 32-byte NUL-padded prefix in order to avoid accidental +object confusion; see the crypto_read_tagged_contents_from_file() and +crypto_write_tagged_contents_to_file() functions for manipulating +these. The prefix is "== type: tag ==", where type describes the object +and its encoding, and tag indicates which one it is. + +### Boxed-file storage ### + +When managing keys, you frequently want to have some way to write a +secret object to disk, encrypted with a passphrase. The crypto_pwbox +and crypto_unpwbox functions do so in a way that's likely to be +readable by future versions of Tor. + **/ diff --git a/src/lib/ctime/lib_ctime.dox b/src/lib/ctime/lib_ctime.dox index 476c95991c..2bcd0f036a 100644 --- a/src/lib/ctime/lib_ctime.dox +++ b/src/lib/ctime/lib_ctime.dox @@ -1,4 +1,16 @@ /** -@dir lib/ctime -@brief lib/ctime +@dir /lib/ctime +@brief lib/ctime: Constant-time code to avoid side-channels. + +This module contains constant-time implementations of various +data comparison and table lookup functions. We use these in preference to +memcmp() and so forth, since memcmp() can leak information about its inputs +based on how fast it returns. In general, your code should call tor_memeq() +and tor_memneq(), not memcmp(). + +We also define some _non_-constant-time wrappers for memcmp() here: Since we +consider calls to memcmp() to be in error, we require that code that actually +doesn't need to be constant-time to use the fast_memeq() / fast_memneq() / +fast_memcmp() aliases instead. + **/ diff --git a/src/lib/defs/lib_defs.dox b/src/lib/defs/lib_defs.dox index 5adb527fc7..8ed4d7a0af 100644 --- a/src/lib/defs/lib_defs.dox +++ b/src/lib/defs/lib_defs.dox @@ -1,4 +1,4 @@ /** -@dir lib/defs -@brief lib/defs +@dir /lib/defs +@brief lib/defs: Lowest-level constants, used in many places. **/ diff --git a/src/lib/dispatch/lib_dispatch.dox b/src/lib/dispatch/lib_dispatch.dox index f194eff481..955b7df64f 100644 --- a/src/lib/dispatch/lib_dispatch.dox +++ b/src/lib/dispatch/lib_dispatch.dox @@ -1,4 +1,16 @@ /** -@dir lib/dispatch -@brief lib/dispatch +@dir /lib/dispatch +@brief lib/dispatch: In-process message delivery. + +This module provides a general in-process "message dispatch" system in which +typed messages are sent on channels. The dispatch.h header has far more +information. + +It is used by by \refdir{lib/pubsub} to implement our general +inter-module publish/subscribe system. + +This is not a fancy multi-threaded many-to-many dispatcher as you may be used +to from more sophisticated architectures: this dispatcher is intended only +for use in improving Tor's architecture. + **/ diff --git a/src/lib/encoding/lib_encoding.dox b/src/lib/encoding/lib_encoding.dox index 4a5fad9271..ca698cb183 100644 --- a/src/lib/encoding/lib_encoding.dox +++ b/src/lib/encoding/lib_encoding.dox @@ -1,4 +1,8 @@ /** -@dir lib/encoding -@brief lib/encoding +@dir /lib/encoding +@brief lib/encoding: Encoding data in various forms, types, and transformations + +Here we have time formats (timefmt.c), quoted strings (qstring.c), C strings +(string.c) base-16/32/64 (binascii.c), and more. + **/ diff --git a/src/lib/err/lib_err.dox b/src/lib/err/lib_err.dox index 8994fa5fd8..d1479b1140 100644 --- a/src/lib/err/lib_err.dox +++ b/src/lib/err/lib_err.dox @@ -1,4 +1,15 @@ /** -@dir lib/err -@brief lib/err +@dir /lib/err +@brief lib/err: Lowest-level error handling code. + +This module is responsible for generating stack traces, handling raw +assertion failures, and otherwise reporting problems that might not be +safe to report via the regular logging module. + +There are three kinds of users for the functions in this module: + * Code that needs a way to assert(), but which cannot use the regular + `tor_assert()` macros in logging module. + * Code that needs signal-safe error reporting. + * Higher-level error handling code. + **/ diff --git a/src/lib/evloop/lib_evloop.dox b/src/lib/evloop/lib_evloop.dox index 86b60e3cd5..52fcf67755 100644 --- a/src/lib/evloop/lib_evloop.dox +++ b/src/lib/evloop/lib_evloop.dox @@ -1,4 +1,9 @@ /** -@dir lib/evloop -@brief lib/evloop +@dir /lib/evloop +@brief lib/evloop: Low-level event loop. + +This modules has tools to manage the [libevent](https://libevent.org/) event +loop and related functionality, in order to implement asynchronous +networking, timers, periodic events, and other scheduling tasks. + **/ diff --git a/src/lib/fdio/lib_fdio.dox b/src/lib/fdio/lib_fdio.dox index b868d28aab..9e2fda617a 100644 --- a/src/lib/fdio/lib_fdio.dox +++ b/src/lib/fdio/lib_fdio.dox @@ -1,4 +1,7 @@ /** -@dir lib/fdio -@brief lib/fdio +@dir /lib/fdio +@brief lib/fdio: Code to read/write on file descriptors. + +(This module also handles sockets, on platforms where a socket is not a kind +of fd.) **/ diff --git a/src/lib/fs/lib_fs.dox b/src/lib/fs/lib_fs.dox index ad775ba553..4466250bb8 100644 --- a/src/lib/fs/lib_fs.dox +++ b/src/lib/fs/lib_fs.dox @@ -1,4 +1,11 @@ /** -@dir lib/fs -@brief lib/fs +@dir /lib/fs +@brief lib/fs: Files, filenames, directories, etc. + +This module is mostly a set of compatibility wrappers around +operating-system-specific filesystem access. + +It also contains a set of convenience functions for safely writing to files, +creating directories, and so on. + **/ diff --git a/src/lib/geoip/lib_geoip.dox b/src/lib/geoip/lib_geoip.dox index 7ad99e8f55..da1123640b 100644 --- a/src/lib/geoip/lib_geoip.dox +++ b/src/lib/geoip/lib_geoip.dox @@ -1,4 +1,5 @@ /** -@dir lib/geoip -@brief lib/geoip +@dir /lib/geoip +@brief lib/geoip: IP-to-country mapping + **/ diff --git a/src/lib/intmath/lib_intmath.dox b/src/lib/intmath/lib_intmath.dox index ce71e455d1..e9b7044706 100644 --- a/src/lib/intmath/lib_intmath.dox +++ b/src/lib/intmath/lib_intmath.dox @@ -1,4 +1,4 @@ /** -@dir lib/intmath -@brief lib/intmath +@dir /lib/intmath +@brief lib/intmath: Integer mathematics. **/ diff --git a/src/lib/lib.dox b/src/lib/lib.dox index f1b2291c76..fdf2c47687 100644 --- a/src/lib/lib.dox +++ b/src/lib/lib.dox @@ -1,8 +1,133 @@ /** -@dir lib +@dir /lib @brief lib: low-level functionality. -The "lib" directory contains low-level functionality, most of it not -necessarily Tor-specific. +The "lib" directory contains low-level functionality. In general, this +code is not necessarily Tor-specific, but is instead possibly useful for +other applications. + +The modules in `lib` are currently well-factored: each one depends +only on lower-level modules. You can see an up-to-date list of the +modules, sorted from lowest to highest level, by running +`./scripts/maint/practracker/includes.py --toposort`. + +As of this writing, the library modules are (from lowest to highest +level): + + - \refdir{lib/cc} -- Macros for managing the C compiler and + language. + + - \refdir{lib/version} -- Holds the current version of Tor. + + - \refdir{lib/testsupport} -- Helpers for making + test-only code, and test mocking support. + + - \refdir{lib/defs} -- Lowest-level constants. + + - \refdir{lib/subsys} -- Types used for declaring a + "subsystem". (_A subsystem is a module with support for initialization, + shutdown, configuration, and so on._) + + - \refdir{lib/conf} -- For declaring configuration options. + + - \refdir{lib/arch} -- For handling differences in CPU + architecture. + + - \refdir{lib/err} -- Lowest-level error handling code. + + - \refdir{lib/malloc} -- Memory management. + management. + + - \refdir{lib/intmath} -- Integer mathematics. + + - \refdir{lib/fdio} -- For + reading and writing n file descriptors. + + - \refdir{lib/lock} -- Simple locking support. + (_Lower-level than the rest of the threading code._) + + - \refdir{lib/ctime} -- Constant-time code to avoid + side-channels. + + - \refdir{lib/string} -- Low-level string manipulation. + + - \refdir{lib/wallclock} -- + For inspecting and manipulating the current (UTC) time. + + - \refdir{lib/osinfo} -- For inspecting the OS version + and capabilities. + + - \refdir{lib/smartlist_core} -- The bare-bones + pieces of our dynamic array ("smartlist") implementation. + + - \refdir{lib/log} -- Log messages to files, syslogs, etc. + + - \refdir{lib/container} -- General purpose containers, + including dynamic arrays ("smartlists"), hashtables, bit arrays, + etc. + + - \refdir{lib/trace} -- A general-purpose API + function-tracing functionality Tor. (_Currently not much used._) + + - \refdir{lib/thread} -- Mid-level Threading. + + - \refdir{lib/term} -- Terminal manipulation + (like reading a password from the user). + + - \refdir{lib/memarea} -- A fast + "arena" style allocator, where the data is freed all at once. + + - \refdir{lib/encoding} -- Encoding + data in various formats, datatypes, and transformations. + + - \refdir{lib/dispatch} -- A general-purpose in-process + message delivery system. + + - \refdir{lib/sandbox} -- Our Linux seccomp2 sandbox + implementation. + + - \refdir{lib/pubsub} -- A publish/subscribe message passing system. + + - \refdir{lib/fs} -- Files, filenames, directories, etc. + + - \refdir{lib/confmgt} -- Parse, encode, and manipulate onfiguration files. + + - \refdir{lib/crypt_ops} -- Cryptographic operations. + + - \refdir{lib/meminfo} -- Functions for inspecting our + memory usage, if the malloc implementation exposes that to us. + + - \refdir{lib/time} -- Higher level time functions, including + fine-gained and monotonic timers. + + - \refdir{lib/math} -- Floating-point mathematical utilities. + + - \refdir{lib/buf} -- An efficient byte queue. + + - \refdir{lib/net} -- Networking code, including address + manipulation, compatibility wrappers, etc. + + - \refdir{lib/compress} -- Wraps several compression libraries. + + - \refdir{lib/geoip} -- IP-to-country mapping. + + - \refdir{lib/tls} -- TLS library wrappers. + + - \refdir{lib/evloop} -- Low-level event-loop. + + - \refdir{lib/process} -- Launch and manage subprocesses. + +### What belongs in lib? + +In general, if you can imagine some program wanting the functionality +you're writing, even if that program had nothing to do with Tor, your +functionality belongs in lib. + +If it falls into one of the existing "lib" categories, your +functionality belongs in lib. + +If you are using platform-specific `ifdef`s to manage compatibility +issues among platforms, you should probably consider whether you can +put your code into lib. **/ diff --git a/src/lib/lock/lib_lock.dox b/src/lib/lock/lib_lock.dox index 44693e7a69..868b5ba7d4 100644 --- a/src/lib/lock/lib_lock.dox +++ b/src/lib/lock/lib_lock.dox @@ -1,4 +1,8 @@ /** -@dir lib/lock -@brief lib/lock +@dir /lib/lock +@brief lib/lock: Simple locking support. + +This module is more low-level than the rest of the threading code, since it +is needed by more intermediate-level modules. + **/ diff --git a/src/lib/log/lib_log.dox b/src/lib/log/lib_log.dox index 915d652407..a772dc3207 100644 --- a/src/lib/log/lib_log.dox +++ b/src/lib/log/lib_log.dox @@ -1,4 +1,12 @@ /** -@dir lib/log -@brief lib/log +@dir /lib/log +@brief lib/log: Log messages to files, syslogs, etc. + +You can think of this as the logical "midpoint" of the +\refdir{lib} code": much of the higher-level code is higher-level +_because_ it uses the logging module, and much of the lower-level code is +specifically written to avoid having to log, because the logging module +depends on it. + + **/ diff --git a/src/lib/malloc/lib_malloc.dox b/src/lib/malloc/lib_malloc.dox index 4923f14463..c05e4c6473 100644 --- a/src/lib/malloc/lib_malloc.dox +++ b/src/lib/malloc/lib_malloc.dox @@ -1,4 +1,78 @@ /** -@dir lib/malloc -@brief lib/malloc +@dir /lib/malloc +@brief lib/malloc: Wrappers and utilities for memory management. + + +Tor imposes a few light wrappers over C's native malloc and free +functions, to improve convenience, and to allow wholescale replacement +of malloc and free as needed. + +You should never use 'malloc', 'calloc', 'realloc, or 'free' on their +own; always use the variants prefixed with 'tor_'. +They are the same as the standard C functions, with the following +exceptions: + + * `tor_free(NULL)` is a no-op. + * `tor_free()` is a macro that takes an lvalue as an argument and sets it to + NULL after freeing it. To avoid this behavior, you can use `tor_free_()` + instead. + * tor_malloc() and friends fail with an assertion if they are asked to + allocate a value so large that it is probably an underflow. + * It is always safe to `tor_malloc(0)`, regardless of whether your libc + allows it. + * `tor_malloc()`, `tor_realloc()`, and friends are never allowed to fail. + Instead, Tor will die with an assertion. This means that you never + need to check their return values. See the next subsection for + information on why we think this is a good idea. + +We define additional general-purpose memory allocation functions as well: + + * `tor_malloc_zero(x)` behaves as `calloc(1, x)`, except the it makes clear + the intent to allocate a single zeroed-out value. + * `tor_reallocarray(x,y)` behaves as the OpenBSD reallocarray function. + Use it for cases when you need to realloc() in a multiplication-safe + way. + +And specific-purpose functions as well: + + * `tor_strdup()` and `tor_strndup()` behaves as the underlying libc + functions, but use `tor_malloc()` instead of the underlying function. + * `tor_memdup()` copies a chunk of memory of a given size. + * `tor_memdup_nulterm()` copies a chunk of memory of a given size, then + NUL-terminates it just to be safe. + +#### Why assert on allocation failure? + +Why don't we allow `tor_malloc()` and its allies to return NULL? + +First, it's error-prone. Many programmers forget to check for NULL return +values, and testing for `malloc()` failures is a major pain. + +Second, it's not necessarily a great way to handle OOM conditions. It's +probably better (we think) to have a memory target where we dynamically free +things ahead of time in order to stay under the target. Trying to respond to +an OOM at the point of `tor_malloc()` failure, on the other hand, would involve +a rare operation invoked from deep in the call stack. (Again, that's +error-prone and hard to debug.) + +Third, thanks to the rise of Linux and other operating systems that allow +memory to be overcommitted, you can't actually ever rely on getting a NULL +from `malloc()` when you're out of memory; instead you have to use an approach +closer to tracking the total memory usage. + +#### Conventions for your own allocation functions. + +Whenever you create a new type, the convention is to give it a pair of +`x_new()` and `x_free_()` functions, named after the type. + +Calling `x_free(NULL)` should always be a no-op. + +There should additionally be an `x_free()` macro, defined in terms of +`x_free_()`. This macro should set its lvalue to NULL. You can define it +using the FREE_AND_NULL macro, as follows: + +``` +#define x_free(ptr) FREE_AND_NULL(x_t, x_free_, (ptr)) +``` + **/ diff --git a/src/lib/math/lib_math.dox b/src/lib/math/lib_math.dox index c2e121dc8c..f20d7092b3 100644 --- a/src/lib/math/lib_math.dox +++ b/src/lib/math/lib_math.dox @@ -1,4 +1,8 @@ /** -@dir lib/math -@brief lib/math +@dir /lib/math +@brief lib/math: Floating-point math utilities. + +This module includes a bunch of floating-point compatibility code, and +implementations for several probability distributions. + **/ diff --git a/src/lib/memarea/lib_memarea.dox b/src/lib/memarea/lib_memarea.dox index dbd98de5ec..041191482d 100644 --- a/src/lib/memarea/lib_memarea.dox +++ b/src/lib/memarea/lib_memarea.dox @@ -1,4 +1,30 @@ /** -@dir lib/memarea -@brief lib/memarea +@dir /lib/memarea +@brief lib/memarea: A fast arena-style allocator. + +This module has a fast "arena" style allocator, where memory is freed all at +once. This kind of allocation is very fast and avoids fragmentation, at the +expense of requiring all the data to be freed at the same time. We use this +for parsing and diff calculations. + +It's often handy to allocate a large number of tiny objects, all of which +need to disappear at the same time. You can do this in tor using the +memarea.c abstraction, which uses a set of grow-only buffers for allocation, +and only supports a single "free" operation at the end. + +Using memareas also helps you avoid memory fragmentation. You see, some libc +malloc implementations perform badly on the case where a large number of +small temporary objects are allocated at the same time as a few long-lived +objects of similar size. But if you use tor_malloc() for the long-lived ones +and a memarea for the temporary object, the malloc implementation is likelier +to do better. + +To create a new memarea, use `memarea_new()`. To drop all the storage from a +memarea, and invalidate its pointers, use `memarea_drop_all()`. + +The allocation functions `memarea_alloc()`, `memarea_alloc_zero()`, +`memarea_memdup()`, `memarea_strdup()`, and `memarea_strndup()` are analogous +to the similarly-named malloc() functions. There is intentionally no +`memarea_free()` or `memarea_realloc()`. + **/ diff --git a/src/lib/meminfo/lib_meminfo.dox b/src/lib/meminfo/lib_meminfo.dox index c8def7e2f9..b57e60525e 100644 --- a/src/lib/meminfo/lib_meminfo.dox +++ b/src/lib/meminfo/lib_meminfo.dox @@ -1,4 +1,7 @@ /** -@dir lib/meminfo -@brief lib/meminfo +@dir /lib/meminfo +@brief lib/meminfo: Inspecting malloc() usage. + +Only available when malloc() provides mallinfo() or something similar. + **/ diff --git a/src/lib/net/lib_net.dox b/src/lib/net/lib_net.dox index 03783c12aa..b4c00405d7 100644 --- a/src/lib/net/lib_net.dox +++ b/src/lib/net/lib_net.dox @@ -1,4 +1,8 @@ /** -@dir lib/net -@brief lib/net +@dir /lib/net +@brief lib/net: Low-level network-related code. + +This module includes address manipulation, compatibility wrappers, +convenience functions, and so on. + **/ diff --git a/src/lib/osinfo/lib_osinfo.dox b/src/lib/osinfo/lib_osinfo.dox index 7733755f20..4d9b1a6d76 100644 --- a/src/lib/osinfo/lib_osinfo.dox +++ b/src/lib/osinfo/lib_osinfo.dox @@ -1,4 +1,10 @@ /** -@dir lib/osinfo -@brief lib/osinfo +@dir /lib/osinfo +@brief lib/osinfo: For inspecting the OS version and capabilities. + +In general, we use this module when we're telling the user what operating +system they are running. We shouldn't make decisions based on the output of +these checks: instead, we should have more specific checks, either at compile +time or run time, based on the observed system behavior. + **/ diff --git a/src/lib/process/lib_process.dox b/src/lib/process/lib_process.dox index efb1adc091..723c9f193d 100644 --- a/src/lib/process/lib_process.dox +++ b/src/lib/process/lib_process.dox @@ -1,4 +1,4 @@ /** -@dir lib/process -@brief lib/process +@dir /lib/process +@brief lib/process: Launch and manage subprocesses. **/ diff --git a/src/lib/pubsub/lib_pubsub.dox b/src/lib/pubsub/lib_pubsub.dox index 9a3fc6dfac..c033660121 100644 --- a/src/lib/pubsub/lib_pubsub.dox +++ b/src/lib/pubsub/lib_pubsub.dox @@ -1,4 +1,16 @@ /** -@dir lib/pubsub -@brief lib/pubsub +@dir /lib/pubsub +@brief lib/pubsub: Publish-subscribe message passing. + +This module wraps the \refdir{lib/dispatch} module, to provide a more +ergonomic and type-safe approach to message passing. + +In general, we favor this mechanism for cases where higher-level modules +need to be notified when something happens in lower-level modules. (The +alternative would be calling up from the lower-level modules, which +would be error-prone; or maintaining lists of function-pointers, which +would be clumsy and tend to complicate the call graph.) + +See pubsub.c for more information. + **/ diff --git a/src/lib/sandbox/lib_sandbox.dox b/src/lib/sandbox/lib_sandbox.dox index eb42d97589..48eddac685 100644 --- a/src/lib/sandbox/lib_sandbox.dox +++ b/src/lib/sandbox/lib_sandbox.dox @@ -1,4 +1,17 @@ /** -@dir lib/sandbox -@brief lib/sandbox +@dir /lib/sandbox +@brief lib/sandbox: Linux seccomp2-based sandbox. + +This module uses Linux's seccomp2 facility via the +[`libseccomp` library](https://github.com/seccomp/libseccomp), to restrict +the set of system calls that Tor is allowed to invoke while it is running. + +Because there are many libc versions that invoke different system calls, and +because handling strings is quite complex, this module is more complex and +less portable than it needs to be. + +A better architecture would put the responsibility for invoking tricky system +calls (like open()) in another, less restricted process, and give that +process responsibility for enforcing our sandbox rules. + **/ diff --git a/src/lib/smartlist_core/lib_smartlist_core.dox b/src/lib/smartlist_core/lib_smartlist_core.dox index 507d0fe92f..73c3b69056 100644 --- a/src/lib/smartlist_core/lib_smartlist_core.dox +++ b/src/lib/smartlist_core/lib_smartlist_core.dox @@ -1,4 +1,12 @@ /** -@dir lib/smartlist_core -@brief lib/smartlist_core +@dir /lib/smartlist_core +@brief lib/smartlist_core: Minimal dynamic array implementation + +A `smartlist_t` is a dynamic array type for holding `void *`. We use it +throughout the rest of the codebase. + +There are higher-level pieces in \refdir{lib/container} but +the ones in lib/smartlist_core are used by the logging code, and therefore +cannot use the logging code. + **/ diff --git a/src/lib/stats/lib_stats.dox b/src/lib/stats/lib_stats.dox deleted file mode 100644 index 897c41418f..0000000000 --- a/src/lib/stats/lib_stats.dox +++ /dev/null @@ -1,4 +0,0 @@ -/** -@dir lib/stats -@brief lib/stats -**/ diff --git a/src/lib/string/lib_string.dox b/src/lib/string/lib_string.dox index 3e038ea072..c8793ddf91 100644 --- a/src/lib/string/lib_string.dox +++ b/src/lib/string/lib_string.dox @@ -1,4 +1,15 @@ /** -@dir lib/string -@brief lib/string +@dir /lib/string +@brief lib/string: Low-level string manipulation. + +We have a number of compatibility functions here: some are for handling +functionality that is not implemented (or not implemented the same) on every +platform; some are for providing locale-independent versions of libc +functions that would otherwise be defined differently for different users. + +Other functions here are for common string-manipulation operations that we do +in the rest of the codebase. + +Any string function high-level enough to need logging belongs in a +higher-level module. **/ diff --git a/src/lib/subsys/lib_subsys.dox b/src/lib/subsys/lib_subsys.dox index f9cd5eeb81..1a22a2d808 100644 --- a/src/lib/subsys/lib_subsys.dox +++ b/src/lib/subsys/lib_subsys.dox @@ -1,4 +1,34 @@ /** -@dir lib/subsys -@brief lib/subsys +@dir /lib/subsys +@brief lib/subsys: Types for declaring a "subsystem". + +## Subsystems in Tor + +A subsystem is a module with support for initialization, shutdown, +configuration, and so on. + +Many parts of Tor can be initialized, cleaned up, and configured somewhat +independently through a table-driven mechanism. Each such part is called a +"subsystem". + +To declare a subsystem, make a global `const` instance of the `subsys_fns_t` +type, filling in the function pointer fields that you require with ones +corresponding to your subsystem. Any function pointers left as "NULL" will +be a no-op. Each system must have a name and a "level", which corresponds to +the order in which it is initialized. (See `app/main/subsystem_list.c` for a +list of current subsystems and their levels.) + +Then, insert your subsystem in the list in `app/main/subsystem_list.c`. It +will need to occupy a position corresponding to its level. + +At this point, your subsystem will be handled like the others: it will get +initialized at startup, torn down at exit, and so on. + +Historical note: Not all of Tor's code is currently handled as +subsystems. As you work with older code, you may see some parts of the code +that are initialized from `tor_init()` or `run_tor_main_loop()` or +`tor_run_main()`; and torn down from `tor_cleanup()`. We aim to migrate +these to subsystems over time; please don't add any new code that follows +this pattern. + **/ diff --git a/src/lib/term/lib_term.dox b/src/lib/term/lib_term.dox index 2bc5125839..3bf2f960ab 100644 --- a/src/lib/term/lib_term.dox +++ b/src/lib/term/lib_term.dox @@ -1,4 +1,4 @@ /** -@dir lib/term -@brief lib/term +@dir /lib/term +@brief lib/term: Terminal operations (password input). **/ diff --git a/src/lib/testsupport/lib_testsupport.dox b/src/lib/testsupport/lib_testsupport.dox index 63ccc47d34..c09c32e478 100644 --- a/src/lib/testsupport/lib_testsupport.dox +++ b/src/lib/testsupport/lib_testsupport.dox @@ -1,4 +1,4 @@ /** -@dir lib/testsupport -@brief lib/testsupport +@dir /lib/testsupport +@brief lib/testsupport: Helpers for test-only code and for function mocking. **/ diff --git a/src/lib/thread/lib_thread.dox b/src/lib/thread/lib_thread.dox index 68937ef793..2773aa009d 100644 --- a/src/lib/thread/lib_thread.dox +++ b/src/lib/thread/lib_thread.dox @@ -1,4 +1,9 @@ /** -@dir lib/thread -@brief lib/thread +@dir /lib/thread +@brief lib/thread: Mid-level threading. + +This module contains compatibility and convenience code for multithreading, +except for low-level locks (which are in \refdir{lib/lock} and +workqueue/threadpool code (which belongs in \refdir{lib/evloop}.) + **/ diff --git a/src/lib/time/lib_time.dox b/src/lib/time/lib_time.dox index 50abf072f7..b76a31fb97 100644 --- a/src/lib/time/lib_time.dox +++ b/src/lib/time/lib_time.dox @@ -1,4 +1,11 @@ /** -@dir lib/time -@brief lib/time +@dir /lib/time +@brief lib/time: Higher-level time functions + +This includes both fine-grained timers and monotonic timers, along with +wrappers for them to try to improve efficiency. + +For "what time is it" in UTC, see \refdir{lib/wallclock}. For parsing and +encoding times and dates, see \refdir{lib/encoding}. + **/ diff --git a/src/lib/tls/lib_tls.dox b/src/lib/tls/lib_tls.dox index 40b7b2c27e..f0dba269e8 100644 --- a/src/lib/tls/lib_tls.dox +++ b/src/lib/tls/lib_tls.dox @@ -1,4 +1,13 @@ /** -@dir lib/tls -@brief lib/tls +@dir /lib/tls +@brief lib/tls: TLS library wrappers + +This module has compatibility wrappers around the library (NSS or OpenSSL, +depending on configuration) that Tor uses to implement the TLS link security +protocol. + +It also implements the logic for some legacy TLS protocol usage we used to +support in old versions of Tor, involving conditional delivery of certificate +chains (v1 link protocol) and conditional renegotiation (v2 link protocol). + **/ diff --git a/src/lib/trace/lib_trace.dox b/src/lib/trace/lib_trace.dox index a1ae256506..64f762bc3e 100644 --- a/src/lib/trace/lib_trace.dox +++ b/src/lib/trace/lib_trace.dox @@ -1,4 +1,8 @@ /** -@dir lib/trace -@brief lib/trace +@dir /lib/trace +@brief lib/trace: Function-tracing functionality API. + +This module is used for adding "trace" support (low-granularity function +logging) to Tor. Right now it doesn't have many users. + **/ diff --git a/src/lib/version/lib_version.dox b/src/lib/version/lib_version.dox index 213e1a1ae8..93d2fb6b9b 100644 --- a/src/lib/version/lib_version.dox +++ b/src/lib/version/lib_version.dox @@ -1,4 +1,4 @@ /** -@dir lib/version -@brief lib/version +@dir /lib/version +@brief lib/version: holds the current version of Tor. **/ diff --git a/src/lib/wallclock/lib_wallclock.dox b/src/lib/wallclock/lib_wallclock.dox index 7bb2b075d1..7d43fa6129 100644 --- a/src/lib/wallclock/lib_wallclock.dox +++ b/src/lib/wallclock/lib_wallclock.dox @@ -1,4 +1,13 @@ /** -@dir lib/wallclock -@brief lib/wallclock +@dir /lib/wallclock +@brief lib/wallclock: Inspect and manipulate the current time. + +This module handles our concept of "what time is it" or "what time does the +world agree it is?" Generally, if you want something derived from UTC, this +is the module for you. + +For versions of the time that are more local, more monotonic, or more +accurate, see \refdir{lib/time}. For parsing and encoding times and dates, +see \refdir{lib/encoding}. + **/ |