12 files changed, 593 insertions, 61 deletions
diff --git a/doc/HACKING b/doc/HACKING
index ac68a35a08..486fe6d10a 100644
--- a/doc/HACKING
+++ b/doc/HACKING
@@ -50,15 +50,16 @@ When you do a commit that needs a ChangeLog entry, add a new file to
 the "changes" toplevel subdirectory.  It should have the format of a
 one-entry changelog section from the current ChangeLog file, as in
 
-   o Major bugfixes:
-      - Fix a potential buffer overflow.  Fixes bug 9999.  Bugfix on
-        Tor 0.3.1.4-beta.
+  o Major bugfixes:
+    - Fix a potential buffer overflow. Fixes bug 9999; bugfix on
+      0.3.1.4-beta.
 
 To write a changes file, first categorize the change.  Some common categories
 are: Minor bugfixes, Major bugfixes, Minor features, Major features, Code
-simplifications and refactoring.  Then say what the change does.  Then, if
-it's a bugfix, then mention what bug it fixes and when the bug was
-introduced.
+simplifications and refactoring.  Then say what the change does.  If
+it's a bugfix, mention what bug it fixes and when the bug was
+introduced.  To find out which Git tag the change was introduced in,
+you can use "git describe --contains <sha1 of commit>".
 
 If at all possible, try to create this file in the same commit where
 you are making the change.  Please give it a distinctive name that no
@@ -129,6 +130,33 @@ compiler generated  no code for that line.  '######' means that the
 line was never reached.  Lines with numbers were called that number
 of times.
 
+Profiling Tor with oprofile
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The oprofile tool runs (on Linux only!) to tell you what functions Tor is
+spending its CPU time in, so we can identify berformance pottlenecks.
+
+Here are some basic instructions
+
+ - Build tor with debugging symbols (you probably already have, unless
+   you messed with CFLAGS during the build process).
+ - Build all the libraries you care about with debugging symbols
+   (probably you only care about libssl, maybe zlib and Libevent).
+ - Copy this tor to a new directory
+ - Copy all the libraries it uses to that dir too (ldd ./tor will
+   tell you)
+ - Set LD_LIBRARY_PATH to include that dir.  ldd ./tor should now
+   show you it's using the libs in that dir
+ - Run that tor
+ - Reset oprofiles counters/start it
+   * "opcontrol --reset; opcontrol --start", if Nick remembers right.
+ - After a while, have it dump the stats on tor and all the libs
+   in that dir you created.
+   * "opcontrol --dump;"
+   * "opreport -l that_dir/*"
+ - Profit
+
+
 Coding conventions
 ------------------
 
diff --git a/doc/Makefile.am b/doc/Makefile.am
index d976f9d6fa..68747c8d2d 100644
--- a/doc/Makefile.am
+++ b/doc/Makefile.am
@@ -15,25 +15,27 @@
 
 if USE_ASCIIDOC
 asciidoc_files = tor tor-gencert tor-resolve torify
+html_in = $(asciidoc_files:=.html.in)
+man_in = $(asciidoc_files:=.1.in)
+txt_in = $(asciidoc_files:=.1.txt)
+nodist_man_MANS = $(asciidoc_files:=.1)
+doc_DATA = $(asciidoc_files:=.html)
 else
 asciidoc_files =
+html_in =
+man_in =
+txt_in =
+nodist_man_MANS =
+doc_DATA =
 endif
 
-html_in = ${asciidoc_files:=.html.in}
-
-man_in = ${asciidoc_files:=.1.in}
-
 EXTRA_DIST = HACKING asciidoc-helper.sh                      \
-             $(html_in) $(man_in) ${asciidoc_files:=.1.txt}  \
+             $(html_in) $(man_in) $(txt_in)                  \
              tor-osx-dmg-creation.txt tor-rpm-creation.txt   \
              tor-win32-mingw-creation.txt
 
-nodist_man_MANS = ${asciidoc_files:=.1}
-
 docdir = @docdir@
 
-doc_DATA = ${asciidoc_files:=.html}
-
 asciidoc_product = $(nodist_man_MANS) $(doc_DATA)
 
 SUBDIRS = spec
@@ -77,5 +79,5 @@ torify.html : torify.html.in
 tor-gencert.html : tor-gencert.html.in
 tor-resolve.html : tor-resolve.html.in
 
-CLEANFILES = $(asciidoc_product)
+CLEANFILES = $(asciidoc_product) config.log
 DISTCLEANFILES = $(html_in) $(man_in)
diff --git a/doc/spec/control-spec.txt b/doc/spec/control-spec.txt
index 5a68864b29..31ce35074b 100644
--- a/doc/spec/control-spec.txt
+++ b/doc/spec/control-spec.txt
@@ -92,7 +92,7 @@
 
 2.4. General-use tokens
 
-  ; CRLF means, "the ASCII Carriage Return character (decimal value value 13)
+  ; CRLF means, "the ASCII Carriage Return character (decimal value 13)
   ; followed by the ASCII Linefeed character (decimal value 10)."
   CRLF = CR LF
 
@@ -1049,7 +1049,7 @@
 
       Reason = "MISC" / "RESOLVEFAILED" / "CONNECTREFUSED" /
                "EXITPOLICY" / "DESTROY" / "DONE" / "TIMEOUT" /
-               "HIBERNATING" / "INTERNAL"/ "RESOURCELIMIT" /
+               "NOROUTE" / "HIBERNATING" / "INTERNAL"/ "RESOURCELIMIT" /
                "CONNRESET" / "TORPROTOCOL" / "NOTDIRECTORY" / "END"
 
    The "REASON" field is provided only for FAILED, CLOSED, and DETACHED
diff --git a/doc/spec/dir-spec.txt b/doc/spec/dir-spec.txt
index a5abdf04bf..bd3b8ba245 100644
--- a/doc/spec/dir-spec.txt
+++ b/doc/spec/dir-spec.txt
@@ -779,6 +779,17 @@
            had a smaller bandwidth than md, the other half had a larger
            bandwidth than md.
 
+    "dirreq-read-history" YYYY-MM-DD HH:MM:SS (NSEC s) NUM,NUM,NUM... NL
+        [At most once]
+    "dirreq-write-history" YYYY-MM-DD HH:MM:SS (NSEC s) NUM,NUM,NUM... NL
+        [At most once]
+
+        Declare how much bandwidth the OR has spent on answering directory
+        requests.  Usage is divided into intervals of NSEC seconds.  The
+        YYYY-MM-DD HH:MM:SS field defines the end of the most recent
+        interval.  The numbers are the number of bytes used in the most
+        recent intervals, ordered from oldest to newest.
+
     "entry-stats-end" YYYY-MM-DD HH:MM:SS (NSEC s) NL
         [At most once.]
 
@@ -1351,9 +1362,9 @@
          Web - Weight for BEGIN_DIR-supporting Exit-flagged nodes
          Wdb - Weight for BEGIN_DIR-supporting Guard+Exit-flagged nodes
 
-         Wbg - Weight for Guard+Exit-flagged nodes for BEGIN_DIR requests
-         Wbm - Weight for Guard+Exit-flagged nodes for BEGIN_DIR requests
-         Wbe - Weight for Guard+Exit-flagged nodes for BEGIN_DIR requests
+         Wbg - Weight for Guard flagged nodes for BEGIN_DIR requests
+         Wbm - Weight for non-flagged nodes for BEGIN_DIR requests
+         Wbe - Weight for Exit-flagged nodes for BEGIN_DIR requests
          Wbd - Weight for Guard+Exit-flagged nodes for BEGIN_DIR requests
 
        These values are calculated as specified in Section 3.4.3.
diff --git a/doc/spec/proposals/000-index.txt b/doc/spec/proposals/000-index.txt
index 62327a1e61..f6f313e58d 100644
--- a/doc/spec/proposals/000-index.txt
+++ b/doc/spec/proposals/000-index.txt
@@ -91,6 +91,9 @@ Proposals by number:
 168  Reduce default circuit window [OPEN]
 169  Eliminate TLS renegotiation for the Tor connection handshake [DRAFT]
 170  Configuration options regarding circuit building [DRAFT]
+172  GETINFO controller option for circuit information [ACCEPTED]
+173  GETINFO Option Expansion [ACCEPTED]
+174  Optimistic Data for Tor: Server Side [OPEN]
 
 
 Proposals by status:
@@ -118,6 +121,7 @@ Proposals by status:
    164  Reporting the status of server votes [for 0.2.2]
    165  Easy migration for voting authority sets
    168  Reduce default circuit window [for 0.2.2]
+   174  Optimistic Data for Tor: Server Side
  ACCEPTED:
    110  Avoiding infinite length circuits [for 0.2.1.x] [in 0.2.1.3-alpha]
    117  IPv6 exits [for 0.2.1.x]
@@ -126,6 +130,8 @@ Proposals by status:
    147  Eliminate the need for v2 directories in generating v3 directories [for 0.2.1.x]
    157  Make certificate downloads specific [for 0.2.1.x]
    166  Including Network Statistics in Extra-Info Documents [for 0.2.2]
+   172  GETINFO controller option for circuit information
+   173  GETINFO Option Expansion
  META:
    000  Index of Tor Proposals
    001  The Tor Proposal Process
diff --git a/doc/spec/proposals/001-process.txt b/doc/spec/proposals/001-process.txt
index 636ba2c2fa..e2fe358fed 100644
--- a/doc/spec/proposals/001-process.txt
+++ b/doc/spec/proposals/001-process.txt
@@ -127,7 +127,8 @@ What should go in a proposal:
 
       Implementation: If the proposal will be tricky to implement in Tor's
         current architecture, the document can contain some discussion of how
-        to go about making it work.
+        to go about making it work.  Actual patches should go on public git
+        branches, or be uploaded to trac.
 
       Performance and scalability notes: If the feature will have an effect
         on performance (in RAM, CPU, bandwidth) or scalability, there should
diff --git a/doc/spec/proposals/172-circ-getinfo-option.txt b/doc/spec/proposals/172-circ-getinfo-option.txt
new file mode 100644
index 0000000000..b7fd79c9a8
--- /dev/null
+++ b/doc/spec/proposals/172-circ-getinfo-option.txt
@@ -0,0 +1,138 @@
+Filename: 172-circ-getinfo-option.txt
+Title: GETINFO controller option for circuit information
+Author: Damian Johnson
+Created: 03-June-2010
+Status: Accepted
+
+Overview:
+
+    This details an additional GETINFO option that would provide information
+    concerning a relay's current circuits.
+
+Motivation:
+
+    The original proposal was for connection related information, but Jake make
+    the excellent point that any information retrieved from the control port
+    is...
+    
+      1. completely ineffectual for auditing purposes since either (a) these
+      results can be fetched from netstat already or (b) the information would
+      only be provided via tor and can't be validated.
+      
+      2. The more useful uses for connection information can be achieved with
+      much less (and safer) information.
+    
+    Hence the proposal is now for circuit based rather than connection based
+    information. This would strip the most controversial and sensitive data
+    entirely (ip addresses, ports, and connection based bandwidth breakdowns)
+    while still being useful for the following purposes:
+
+    - Basic Relay Usage Questions
+    How is the bandwidth I'm contributing broken down? Is it being evenly
+    distributed or is someone hogging most of it? Do these circuits belong to
+    the hidden service I'm running or something else? Now that I'm using exit
+    policy X am I desirable as an exit, or are most people just using me as a
+    relay?
+
+    - Debugging
+    Say a relay has a restrictive firewall policy for outbound connections,
+    with the ORPort whitelisted but doesn't realize that tor needs random high
+    ports. Tor would report success ("your orport is reachable - excellent")
+    yet the relay would be nonfunctional. This proposed information would
+    reveal numerous RELAY -> YOU -> UNESTABLISHED circuits, giving a good
+    indicator of what's wrong.
+
+    - Visualization
+    A nice benefit of visualizing tor's behavior is that it becomes a helpful
+    tool in puzzling out how tor works. For instance, tor spawns numerous
+    client connections at startup (even if unused as a client). As a newcomer
+    to tor these asymmetric (outbound only) connections mystified me for quite
+    a while until until Roger explained their use to me. The proposed
+    TYPE_FLAGS would let controllers clearly label them as being client
+    related, making their purpose a bit clearer.
+
+    At the moment connection data can only be retrieved via commands like
+    netstat, ss, and lsof. However, providing an alternative via the control
+    port provides several advantages:
+
+      - scrubbing for private data
+          Raw connection data has no notion of what's sensitive and what is
+          not. The relay's flags and cached consensus can be used to take
+          educated guesses concerning which connections could possibly belong
+          to client or exit traffic, but this is both difficult and inaccurate.
+          Anything provided via the control port can scrubbed to make sure we
+          aren't providing anything we think relay operators should not see.
+     
+      - additional information
+          All connection querying commands strictly provide the ip address and
+          port of connections, and nothing else. However, for the uses listed
+          above the far more interesting attributes are the circuit's type,
+          bandwidth usage and uptime.
+     
+      - improved performance
+          Querying connection data is an expensive activity, especially for
+          busy relays or low end processors (such as mobile devices). Tor
+          already internally knows its circuits, allowing for vastly quicker
+          lookups.
+     
+      - cross platform capability
+          The connection querying utilities mentioned above not only aren't
+          available under Windows, but differ widely among different *nix
+          platforms. FreeBSD in particular takes a very unique approach,
+          dropping important options from netstat and assigning ss to a
+          spreadsheet application instead. A controller interface, however,
+          would provide a uniform means of retrieving this information.
+
+Security Implications:
+
+    This is an open question. This proposal lacks the most controversial pieces
+    of information (ip addresses and ports) and insight into potential threats
+    this would pose would be very welcomed!
+
+Specification:
+
+   The following addition would be made to the control-spec's GETINFO section:
+
+  "rcirc/id/<Circuit identity>" -- Provides entry for the associated relay
+    circuit, formatted as:
+      CIRC_ID=<circuit ID> CREATED=<timestamp> UPDATED=<timestamp> TYPE=<flag>
+        READ=<bytes> WRITE=<bytes>
+
+    none of the parameters contain whitespace, and additional results must be
+    ignored to allow for future expansion. Parameters are defined as follows:
+      CIRC_ID - Unique numeric identifier for the circuit this belongs to.
+      CREATED - Unix timestamp (as seconds since the Epoch) for when the
+          circuit was created.
+      UPDATED - Unix timestamp for when this information was last updated.
+      TYPE - Single character flags indicating attributes in the circuit:
+          (E)ntry : has a connection that doesn't belong to a known Tor server,
+            indicating that this is either the first hop or bridged
+          E(X)it : has been used for at least one exit stream
+          (R)elay : has been extended
+          Rende(Z)vous : is being used for a rendezvous point
+          (I)ntroduction : is being used for a hidden service introduction
+          (N)one of the above: none of the above have happened yet.
+      READ - Total bytes transmitted toward the exit over the circuit.
+      WRITE - Total bytes transmitted toward the client over the circuit.
+
+  "rcirc/all" -- The 'rcirc/id/*' output for all current circuits, joined by
+    newlines.
+
+   The following would be included for circ info update events.
+
+4.1.X. Relay circuit status changed
+
+  The syntax is:
+     "650" SP "RCIRC" SP CircID SP Notice [SP Created SP Updated SP Type SP
+          Read SP Write] CRLF
+     
+     Notice =
+            "NEW"    / ; first information being provided for this circuit
+            "UPDATE" / ; update for a previously reported circuit
+            "CLOSED"   ; notice that the circuit no longer exists
+    
+  Notice indicating that queryable information on a relay related circuit has
+  changed. If the Notice parameter is either "NEW" or "UPDATE" then this
+  provides the same fields that would be given by calling "GETINFO rcirc/id/"
+  with the CircID.
+
diff --git a/doc/spec/proposals/173-getinfo-option-expansion.txt b/doc/spec/proposals/173-getinfo-option-expansion.txt
new file mode 100644
index 0000000000..03e18ef8d4
--- /dev/null
+++ b/doc/spec/proposals/173-getinfo-option-expansion.txt
@@ -0,0 +1,101 @@
+Filename: 173-getinfo-option-expansion.txt
+Title: GETINFO Option Expansion
+Author: Damian Johnson
+Created: 02-June-2010
+Status: Accepted
+
+Overview:
+
+    Over the course of developing arm there's been numerous hacks and
+    workarounds to gleam pieces of basic, desirable information about the tor
+    process. As per Roger's request I've compiled a list of these pain points
+    to try and improve the control protocol interface.
+
+Motivation:
+
+    The purpose of this proposal is to expose additional process and relay
+    related information that is currently unavailable in a convenient,
+    dependable, and/or platform independent way. Examples of this are...
+    
+      - The relay's total contributed bandwidth. This is a highly requested
+        piece of information and, based on the following patch from pipe, looks
+        trivial to include.
+        http://www.mail-archive.com/or-talk@freehaven.net/msg13085.html
+      
+      - The process ID of the tor process. There is a high degree of guess work
+        in obtaining this. Arm for instance uses pidof, netstat, and ps yet
+        still fails on some platforms, and Orbot recently got a ticket about
+        its own attempt to fetch it with ps:
+        https://trac.torproject.org/projects/tor/ticket/1388
+    
+    This just includes the pieces of missing information I've noticed
+    (suggestions or questions of their usefulness are welcome!).
+
+Security Implications:
+
+    None that I'm aware of. From a security standpoint this seems decently
+    innocuous.
+
+Specification:
+
+    The following addition would be made to the control-spec's GETINFO section:
+    
+    "relay/bw-limit" -- Effective relayed bandwidth limit.
+    
+    "relay/burst-limit" -- Effective relayed burst limit.
+    
+    "relay/read-total" -- Total bytes relayed (download).
+    
+    "relay/write-total" -- Total bytes relayed (upload).
+    
+    "relay/flags" -- Space separated listing of flags currently held by the
+    relay as repored by the currently cached consensus.
+    
+    "process/user" -- Username under which the tor process is running,
+    providing an empty string if none exists.
+    
+    "process/pid" -- Process id belonging to the main tor process, -1 if none
+    exists for the platform.
+    
+    "process/uptime" -- Total uptime of the tor process (in seconds).
+    
+    "process/uptime-reset" -- Time since last reset (startup, sighup, or RELOAD
+    signal, in seconds).
+    
+    "process/descriptors-used" -- Count of file descriptors used.
+    
+    "process/descriptor-limit" -- File descriptor limit (getrlimit results).
+    
+    "ns/authority" -- Router status info (v2 directory style) for all
+    recognized directory authorities, joined by newlines.
+    
+    "state/names" -- A space-separated list of all the keys supported by this
+    version of Tor's state.
+    
+    "state/val/<key>" -- Provides the current state value belonging to the
+    given key. If undefined, this provides the key's default value.
+    
+    "status/ports-seen" -- A summary of which ports we've seen connections
+    circuits connect to recently, formatted the same as the EXITS_SEEN status
+    event described in Section 4.1.XX. This GETINFO option is currently
+    available only for exit relays.
+
+4.1.XX. Per-port exit stats
+
+  The syntax is:
+     "650" SP "EXITS_SEEN" SP TimeStarted SP PortSummary CRLF
+
+  We just generated a new summary of which ports we've seen exiting circuits
+  connecting to recently. The controller could display this for the user, e.g.
+  in their "relay" configuration window, to give them a sense of how they're
+  being used (popularity of the various ports they exit to). Currently only
+  exit relays will receive this event.
+  
+  TimeStarted is a quoted string indicating when the reported summary
+  counts from (in GMT).
+
+  The PortSummary keyword has as its argument a comma-separated, possibly
+  empty set of "port=count" pairs. For example (without linebreak),
+  650-EXITS_SEEN TimeStarted="2008-12-25 23:50:43"
+  PortSummary=80=16,443=8
+
diff --git a/doc/spec/proposals/174-optimistic-data-server.txt b/doc/spec/proposals/174-optimistic-data-server.txt
new file mode 100644
index 0000000000..d97c45e909
--- /dev/null
+++ b/doc/spec/proposals/174-optimistic-data-server.txt
@@ -0,0 +1,242 @@
+Filename: 174-optimistic-data-server.txt
+Title: Optimistic Data for Tor: Server Side
+Author: Ian Goldberg
+Created: 2-Aug-2010
+Status: Open
+
+Overview:
+
+When a SOCKS client opens a TCP connection through Tor (for an HTTP
+request, for example), the query latency is about 1.5x higher than it
+needs to be.  Simply, the problem is that the sequence of data flows
+is this:
+
+1. The SOCKS client opens a TCP connection to the OP
+2. The SOCKS client sends a SOCKS CONNECT command
+3. The OP sends a BEGIN cell to the Exit
+4. The Exit opens a TCP connection to the Server
+5. The Exit returns a CONNECTED cell to the OP
+6. The OP returns a SOCKS CONNECTED notification to the SOCKS client
+7. The SOCKS client sends some data (the GET request, for example)
+8. The OP sends a DATA cell to the Exit
+9. The Exit sends the GET to the server
+10. The Server returns the HTTP result to the Exit
+11. The Exit sends the DATA cells to the OP
+12. The OP returns the HTTP result to the SOCKS client
+
+Note that the Exit node knows that the connection to the Server was
+successful at the end of step 4, but is unable to send the HTTP query to
+the server until step 9.
+
+This proposal (as well as its upcoming sibling concerning the client
+side) aims to reduce the latency by allowing:
+1. SOCKS clients to optimistically send data before they are notified
+    that the SOCKS connection has completed successfully
+2. OPs to optimistically send DATA cells on streams in the CONNECT_WAIT
+    state
+3. Exit nodes to accept and queue DATA cells while in the
+    EXIT_CONN_STATE_CONNECTING state
+
+This particular proposal deals with #3.
+
+In this way, the flow would be as follows:
+
+1. The SOCKS client opens a TCP connection to the OP
+2. The SOCKS client sends a SOCKS CONNECT command, followed immediately
+    by data (such as the GET request)
+3. The OP sends a BEGIN cell to the Exit, followed immediately by DATA
+    cells
+4. The Exit opens a TCP connection to the Server
+5. The Exit returns a CONNECTED cell to the OP, and sends the queued GET
+    request to the Server
+6. The OP returns a SOCKS CONNECTED notification to the SOCKS client,
+    and the Server returns the HTTP result to the Exit
+7. The Exit sends the DATA cells to the OP
+8. The OP returns the HTTP result to the SOCKS client
+
+Motivation:
+
+This change will save one OP<->Exit round trip (down to one from two).
+There are still two SOCKS Client<->OP round trips (negligible time) and
+two Exit<->Server round trips.  Depending on the ratio of the
+Exit<->Server (Internet) RTT to the OP<->Exit (Tor) RTT, this will
+decrease the latency by 25 to 50 percent.  Experiments validate these
+predictions. [Goldberg, PETS 2010 rump session; see
+https://thunk.cs.uwaterloo.ca/optimistic-data-pets2010-rump.pdf ]
+
+Design:
+
+The current code actually correctly handles queued data at the Exit; if
+there is queued data in a EXIT_CONN_STATE_CONNECTING stream, that data
+will be immediately sent when the connection succeeds.  If the
+connection fails, the data will be correctly ignored and freed.  The
+problem with the current server code is that the server currently
+drops DATA cells on streams in the EXIT_CONN_STATE_CONNECTING state.
+Also, if you try to queue data in the EXIT_CONN_STATE_RESOLVING state,
+bad things happen because streams in that state don't yet have
+conn->write_event set, and so some existing sanity checks (any stream
+with queued data is at least potentially writable) are no longer sound.
+
+The solution is to simply not drop received DATA cells while in the
+EXIT_CONN_STATE_CONNECTING state.  Also do not send SENDME cells in this
+state, so that the OP cannot send more than one window's worth of data
+to be queued at the Exit.  Finally, patch the sanity checks so that
+streams in the EXIT_CONN_STATE_RESOLVING state that have buffered data
+can pass.
+
+If no clients ever send such optimistic data, the new code will never be
+executed, and the behaviour of Tor will not change.  When clients begin
+to send optimistic data, the performance of those clients' streams will
+improve.
+
+After discussion with nickm, it seems best to just have the server
+version number be the indicator of whether a particular Exit supports
+optimistic data.  (If a client sends optimistic data to an Exit which
+does not support it, the data will be dropped, and the client's request
+will fail to complete.)  What do version numbers for hypothetical future
+protocol-compatible implementations look like, though?
+
+Security implications:
+
+Servers (for sure the Exit, and possibly others, by watching the
+pattern of packets) will be able to tell that a particular client
+is using optimistic data.  This will be discussed more in the sibling
+proposal.
+
+On the Exit side, servers will be queueing a little bit extra data, but
+no more than one window.  Clients today can cause Exits to queue that
+much data anyway, simply by establishing a Tor connection to a slow
+machine, and sending one window of data.
+
+Specification:
+
+tor-spec section 6.2 currently says:
+
+    The OP waits for a RELAY_CONNECTED cell before sending any data.
+    Once a connection has been established, the OP and exit node
+    package stream data in RELAY_DATA cells, and upon receiving such
+    cells, echo their contents to the corresponding TCP stream.
+    RELAY_DATA cells sent to unrecognized streams are dropped.
+
+It is not clear exactly what an "unrecognized" stream is, but this last
+sentence would be changed to say that RELAY_DATA cells received on a
+stream that has processed a RELAY_BEGIN cell and has not yet issued a
+RELAY_END or a RELAY_CONNECTED cell are queued; that queue is processed
+immediately after a RELAY_CONNECTED cell is issued for the stream, or
+freed after a RELAY_END cell is issued for the stream.
+
+The earlier part of this section will be addressed in the sibling
+proposal.
+
+Compatibility:
+
+There are compatibility issues, as mentioned above.  OPs MUST NOT send
+optimistic data to Exit nodes whose version numbers predate (something).
+OPs MAY send optimistic data to Exit nodes whose version numbers match
+or follow that value.  (But see the question about independent server
+reimplementations, above.)
+
+Implementation:
+
+Here is a simple patch.  It seems to work with both regular streams and
+hidden services, but there may be other corner cases I'm not aware of.
+(Do streams used for directory fetches, hidden services, etc. take a
+different code path?)
+
+diff --git a/src/or/connection.c b/src/or/connection.c
+index 7b1493b..f80cd6e 100644
+--- a/src/or/connection.c
++++ b/src/or/connection.c
+@@ -2845,7 +2845,13 @@ _connection_write_to_buf_impl(const char *string, size_t len,
+     return;
+   }
+ 
+-  connection_start_writing(conn);
++  /* If we receive optimistic data in the EXIT_CONN_STATE_RESOLVING
++   * state, we don't want to try to write it right away, since
++   * conn->write_event won't be set yet.  Otherwise, write data from
++   * this conn as the socket is available. */
++  if (conn->state != EXIT_CONN_STATE_RESOLVING) {
++      connection_start_writing(conn);
++  }
+   if (zlib) {
+     conn->outbuf_flushlen += buf_datalen(conn->outbuf) - old_datalen;
+   } else {
+@@ -3382,7 +3388,11 @@ assert_connection_ok(connection_t *conn, time_t now)
+     tor_assert(conn->s < 0);
+ 
+   if (conn->outbuf_flushlen > 0) {
+-    tor_assert(connection_is_writing(conn) || conn->write_blocked_on_bw ||
++    /* With optimistic data, we may have queued data in
++     * EXIT_CONN_STATE_RESOLVING while the conn is not yet marked to writing.
++     * */
++    tor_assert(conn->state == EXIT_CONN_STATE_RESOLVING ||
++	    connection_is_writing(conn) || conn->write_blocked_on_bw ||
+             (CONN_IS_EDGE(conn) && TO_EDGE_CONN(conn)->edge_blocked_on_circ));
+   }
+ 
+diff --git a/src/or/relay.c b/src/or/relay.c
+index fab2d88..e45ff70 100644
+--- a/src/or/relay.c
++++ b/src/or/relay.c
+@@ -1019,6 +1019,9 @@ connection_edge_process_relay_cell(cell_t *cell, circuit_t *circ,
+   relay_header_t rh;
+   unsigned domain = layer_hint?LD_APP:LD_EXIT;
+   int reason;
++  int optimistic_data = 0;  /* Set to 1 if we receive data on a stream
++			       that's in the EXIT_CONN_STATE_RESOLVING
++			       or EXIT_CONN_STATE_CONNECTING states.*/
+ 
+   tor_assert(cell);
+   tor_assert(circ);
+@@ -1038,9 +1041,20 @@ connection_edge_process_relay_cell(cell_t *cell, circuit_t *circ,
+   /* either conn is NULL, in which case we've got a control cell, or else
+    * conn points to the recognized stream. */
+ 
+-  if (conn && !connection_state_is_open(TO_CONN(conn)))
+-    return connection_edge_process_relay_cell_not_open(
+-             &rh, cell, circ, conn, layer_hint);
++  if (conn && !connection_state_is_open(TO_CONN(conn))) {
++    if ((conn->_base.state == EXIT_CONN_STATE_CONNECTING ||
++	    conn->_base.state == EXIT_CONN_STATE_RESOLVING) &&
++	rh.command == RELAY_COMMAND_DATA) {
++	/* We're going to allow DATA cells to be delivered to an exit
++	 * node in state EXIT_CONN_STATE_CONNECTING or
++	 * EXIT_CONN_STATE_RESOLVING.  This speeds up HTTP, for example. */
++	log_warn(domain, "Optimistic data received.");
++	optimistic_data = 1;
++    } else {
++	return connection_edge_process_relay_cell_not_open(
++		 &rh, cell, circ, conn, layer_hint);
++    }
++  }
+ 
+   switch (rh.command) {
+     case RELAY_COMMAND_DROP:
+@@ -1090,7 +1104,9 @@ connection_edge_process_relay_cell(cell_t *cell, circuit_t *circ,
+       log_debug(domain,"circ deliver_window now %d.", layer_hint ?
+                 layer_hint->deliver_window : circ->deliver_window);
+ 
+-      circuit_consider_sending_sendme(circ, layer_hint);
++      if (!optimistic_data) {
++	  circuit_consider_sending_sendme(circ, layer_hint);
++      }
+ 
+       if (!conn) {
+         log_info(domain,"data cell dropped, unknown stream (streamid %d).",
+@@ -1107,7 +1123,9 @@ connection_edge_process_relay_cell(cell_t *cell, circuit_t *circ,
+       stats_n_data_bytes_received += rh.length;
+       connection_write_to_buf(cell->payload + RELAY_HEADER_SIZE,
+                               rh.length, TO_CONN(conn));
+-      connection_edge_consider_sending_sendme(conn);
++      if (!optimistic_data) {
++	  connection_edge_consider_sending_sendme(conn);
++      }
+       return 0;
+     case RELAY_COMMAND_END:
+       reason = rh.length > 0 ?
+
+Performance and scalability notes:
+
+There may be more RAM used at Exit nodes, as mentioned above, but it is
+transient.
diff --git a/doc/spec/rend-spec.txt b/doc/spec/rend-spec.txt
index cab97097bc..3c14ebc662 100644
--- a/doc/spec/rend-spec.txt
+++ b/doc/spec/rend-spec.txt
@@ -9,7 +9,7 @@
       RFC 2119.
 
    Read
-   https://www.torproject.org/doc/design-paper/tor-design.html#sec:rendezvous
+   https://svn.torproject.org/svn/projects/design-paper/tor-design.html#sec:rendezvous
    before you read this specification. It will make more sense.
 
    Rendezvous points provide location-hidden services (server
@@ -150,7 +150,7 @@
    The first time the OP provides an advertised service, it generates
    a public/private keypair (stored locally).
 
-   The OP choses a small number of Tor servers as introduction points.
+   The OP chooses a small number of Tor servers as introduction points.
    The OP establishes a new introduction circuit to each introduction
    point.  These circuits MUST NOT be used for anything but hidden service
    introduction.  To establish the introduction, Bob sends a
@@ -238,6 +238,9 @@
 
          permanent-id = H(public-key)[:10]
 
+       Note: If Bob's OP has "stealth" authorization enabled (see Section 2.2),
+       it uses the client key in place of the public hidden service key.
+
        "H(time-period | descriptor-cookie | replica)" is the (possibly
        secret) id part that is necessary to verify that the hidden service is
        the true originator of this descriptor and that is therefore contained
@@ -591,7 +594,8 @@
         RC   Rendezvous cookie    [20 octets]
 
    The rendezvous cookie is an arbitrary 20-byte value, chosen randomly by
-   Alice's OP.
+   Alice's OP. Alice SHOULD choose a new rendezvous cookie for each new
+   connection attempt.
 
    Upon receiving a RELAY_COMMAND_ESTABLISH_RENDEZVOUS cell, the OR associates
    the RC with the circuit that sent it.  It replies to Alice with an empty
@@ -667,8 +671,8 @@
    circuit. (If the PK_ID is unrecognized, the RELAY_COMMAND_INTRODUCE1 cell is
    discarded.)
 
-   After sending the RELAY_COMMAND_INTRODUCE2 cell, the OR replies to Alice
-   with an empty RELAY_COMMAND_INTRODUCE_ACK cell.  If no
+   After sending the RELAY_COMMAND_INTRODUCE2 cell to Bob, the OR replies to
+   Alice with an empty RELAY_COMMAND_INTRODUCE_ACK cell.  If no
    RELAY_COMMAND_INTRODUCE2 cell can be sent, the OR replies to Alice with a
    non-empty cell to indicate an error.  (The semantics of the cell body may be
    determined later; the current implementation sends a single '1' byte on
@@ -758,11 +762,11 @@
 2.1. Service with large-scale client authorization
 
    The first client authorization protocol aims at performing access control
-   while consuming as few additional resources as possible. A service
-   provider should be able to permit access to a large number of clients
-   while denying access for everyone else. However, the price for
-   scalability is that the service won't be able to hide its activity from
-   unauthorized or formerly authorized clients.
+   while consuming as few additional resources as possible. This is the "basic"
+   authorization protocol. A service provider should be able to permit access
+   to a large number of clients while denying access for everyone else.
+   However, the price for scalability is that the service won't be able to hide
+   its activity from unauthorized or formerly authorized clients.
 
    The main idea of this protocol is to encrypt the introduction-point part
    in hidden service descriptors to authorized clients using symmetric keys.
@@ -821,19 +825,19 @@
 2.2. Authorization for limited number of clients
 
    A second, more sophisticated client authorization protocol goes the extra
-   mile of hiding service activity from unauthorized clients. With all else
-   being equal to the preceding authorization protocol, the second protocol
-   publishes hidden service descriptors for each user separately and gets
-   along with encrypting the introduction-point part of descriptors to a
-   single client. This allows the service to stop publishing descriptors for
-   removed clients. As long as a removed client cannot link descriptors
-   issued for other clients to the service, it cannot derive service
-   activity any more. The downside of this approach is limited scalability.
-   Even though the distributed storage of descriptors (cf. proposal 114)
-   tackles the problem of limited scalability to a certain extent, this
-   protocol should not be used for services with more than 16 clients. (In
-   fact, Tor should refuse to advertise services for more than this number
-   of clients.)
+   mile of hiding service activity from unauthorized clients. This is the
+   "stealth" authorization protocol. With all else being equal to the preceding
+   authorization protocol, the second protocol publishes hidden service
+   descriptors for each user separately and gets along with encrypting the
+   introduction-point part of descriptors to a single client. This allows the
+   service to stop publishing descriptors for removed clients. As long as a
+   removed client cannot link descriptors issued for other clients to the
+   service, it cannot derive service activity any more. The downside of this
+   approach is limited scalability. Even though the distributed storage of
+   descriptors (cf. proposal 114) tackles the problem of limited scalability to
+   a certain extent, this protocol should not be used for services with more
+   than 16 clients. (In fact, Tor should refuse to advertise services for more
+   than this number of clients.)
 
    A hidden service generates an asymmetric "client key" and a symmetric
    "descriptor cookie" for each client. The client key is used as
@@ -881,14 +885,16 @@
    A hidden service that is meant to perform client authorization adds a
    new option HiddenServiceAuthorizeClient to its hidden service
    configuration. This option contains the authorization type which is
-   either "1" for the protocol described in 2.1 or "2" for the protocol in
-   2.2 and a comma-separated list of human-readable client names, so that
-   Tor can create authorization data for these clients:
+   either "basic" for the protocol described in 2.1 or "stealth" for the
+   protocol in 2.2 and a comma-separated list of human-readable client
+   names, so that Tor can create authorization data for these clients:
 
      HiddenServiceAuthorizeClient auth-type client-name,client-name,...
 
    If this option is configured, HiddenServiceVersion is automatically
-   reconfigured to contain only version numbers of 2 or higher.
+   reconfigured to contain only version numbers of 2 or higher. There is
+   a maximum of 512 client names for basic auth and a maximum of 16 for
+   stealth auth.
 
    Tor stores all generated authorization data for the authorization
    protocols described in Sections 2.1 and 2.2 in a new file using the
diff --git a/doc/spec/tor-spec.txt b/doc/spec/tor-spec.txt
index a08005c6da..91ad561b8d 100644
--- a/doc/spec/tor-spec.txt
+++ b/doc/spec/tor-spec.txt
@@ -843,7 +843,8 @@ see tor-design.pdf.
        6 -- REASON_DONE           (Anonymized TCP connection was closed)
        7 -- REASON_TIMEOUT        (Connection timed out, or OR timed out
                                    while connecting)
-       8 -- (unallocated) [**]
+       8 -- REASON_NOROUTE        (Routing error while attempting to
+                                   contact destination)
        9 -- REASON_HIBERNATING    (OR is temporarily hibernating)
       10 -- REASON_INTERNAL       (Internal error at the OR)
       11 -- REASON_RESOURCELIMIT  (OR has no resources to fulfill request)
@@ -865,8 +866,6 @@ see tor-design.pdf.
 
    [*] Older versions of Tor also send this reason when connections are
        reset.
-   [**] Due to a bug in versions of Tor through 0095, error reason 8 must
-        remain allocated until that version is obsolete.
 
    --- [The rest of this section describes unimplemented functionality.]
 
diff --git a/doc/tor.1.txt b/doc/tor.1.txt
index 222aaf103c..3b7e30bdfb 100644
--- a/doc/tor.1.txt
+++ b/doc/tor.1.txt
@@ -64,7 +64,8 @@ OPTIONS
 Other options can be specified either on the command-line (--option
     value), or in the configuration file (option value or option "value").
     Options are case-insensitive. C-style escaped characters are allowed inside
-    quoted values.
+    quoted values.   Options on the command line take precedence over
+    options found in the configuration file.
 
 **BandwidthRate** __N__ **bytes**|**KB**|**MB**|**GB**::
     A token bucket limits the average incoming bandwidth usage on this node to
@@ -964,23 +965,20 @@ is non-zero):
 
 **CellStatistics** **0**|**1**::
     When this option is enabled, Tor writes statistics on the mean time that
-    cells spend in circuit queues to disk every 24 hours. Cannot be changed
-    while Tor is running. (Default: 0)
+    cells spend in circuit queues to disk every 24 hours. (Default: 0)
 
 **DirReqStatistics** **0**|**1**::
     When this option is enabled, Tor writes statistics on the number and
-    response time of network status requests to disk every 24 hours. Cannot be
-    changed while Tor is running. (Default: 0)
+    response time of network status requests to disk every 24 hours.
+    (Default: 0)
 
 **EntryStatistics** **0**|**1**::
     When this option is enabled, Tor writes statistics on the number of
-    directly connecting clients to disk every 24 hours. Cannot be changed while
-    Tor is running. (Default: 0)
+    directly connecting clients to disk every 24 hours. (Default: 0)
 
 **ExitPortStatistics** **0**|**1**::
     When this option is enabled, Tor writes statistics on the number of relayed
-    bytes and opened stream per exit port to disk every 24 hours. Cannot be
-    changed while Tor is running. (Default: 0)
+    bytes and opened stream per exit port to disk every 24 hours. (Default: 0)
 
 **ExtraInfoStatistics** **0**|**1**::
     When this option is enabled, Tor includes previously gathered statistics in