diff options
41 files changed, 4 insertions, 19165 deletions
diff --git a/AUTHORS b/AUTHORS deleted file mode 100644 index e4e9bf65ef..0000000000 --- a/AUTHORS +++ /dev/null @@ -1,41 +0,0 @@ - This file lists the authors for Tor, - a free software project to provide anonymity on the Internet. - - For more information about Tor, see https://www.torproject.org/. - - If you got this file as a part of a larger bundle, - there are probably other authors that you should be aware of. - -Main authors: -------------- - -Roger Dingledine <arma@freehaven.net> overhauled all of the code, did -a bunch of new design work, etc. - -Nick Mathewson <nickm@freehaven.net> wrote lots of stuff too, in -particular the router and descriptor parsing, and the crypto and tls -wrappers. - -Matej Pfajfar <badbytes@freehaven.net> wrote the first version of the code -(called OR) in 2001-2002. - -Contributors: -------------- - -John Bashinski <jbash@velvet.com> contributed the initial rpm spec file. - -Christian Grothoff <grothoff@cs.purdue.edu> contributed better daemonizing -behavior. - -Steven Hazel <sah@thalassocracy.org> made 'make install' do the right -thing. - -Jason Holt <jason@lunkwill.org> contributed patches to the instructions -and the man page. - -Peter Palfrader <peter@palfrader.org> maintains everything that's -debian-specific, and has written other useful features. - -Aaron Turner <aturner@netscreen.com> contributed the first version of -the tor.sh initscripts shell script. - @@ -25,6 +25,10 @@ Changes in version 0.2.2.9-alpha - 2010-??-?? - Generate our manpage and HTML documentation using Asciidoc. This should make it easier to maintain the documentation, and produce nicer HTML. + - Removed some unnecessary files from the source distribution. The + AUTHORS file had its content merged into the people page on the + website. The roadmaps and design doc can now be found in the + projects directory in svn. o Removed features: - Stop shipping parts of the website and the design paper in the diff --git a/contrib/privoxy-tor-toggle b/contrib/privoxy-tor-toggle deleted file mode 100644 index 8f9cd51bd9..0000000000 --- a/contrib/privoxy-tor-toggle +++ /dev/null @@ -1,73 +0,0 @@ -#!/bin/sh -# A script to turn Tor SOCKS4a in Privoxy on or off. - -CONFFILE=/etc/privoxy/config # privoxy config file. -TOR_REG="forward.*localhost:9050" # Regular expression to find Tor in privoxy -PRIVOXY="/etc/init.d/privoxy restart" # command to reload privoxy config file. -SED="/bin/sed" # sed command, of course. -GREP="/bin/grep" # grep command. - -usage () { -echo "\ -privoxy-tor-toggle: Change Privoxy's configuration to use/not use Tor. -Usage: - privoxy.tor <-- Switch Tor on or off. - privoxy.tor [on|off] <-- Set Tor on or off. - privoxy.tor status <-- Display Tor's current status. - privoxy.tor [-h|--help|-?] <-- Print usage. -" -} - -# Find out the current status of tor. Set $tor_status -get_status () { - gret=`$GREP -l -e "^$TOR_REG" $CONFFILE` - if [ x$gret = x ] ; then - tor_status=off; - else - tor_status=on; - fi - return -} - -# Turn tor on/off according to $1 -set_tor () { - tor_gate=$1 - get_status - if [ $tor_status = $tor_gate ] ; then - echo "Tor is already $1." - return - elif [ $tor_gate = flip ] ; then - if [ $tor_status = on ] ; then - tor_gate=off - elif [ $tor_status = off ] ; then - tor_gate=on - fi - fi - echo "Turning Tor $tor_gate..." - if [ $tor_gate = on ] ; then - reg=s/^#\($TOR_REG\)/\\1/ - $SED -i.bak -r "$reg" $CONFFILE - else - reg=s/^\($TOR_REG\)/#\\1/ - $SED -i.bak -r "$reg" $CONFFILE - fi - $PRIVOXY - return 0; -} - -if [ x$1 = x ] ; then - set_tor flip -elif [ $1 = on ] ; then - set_tor on -elif [ $1 = off ] ; then - set_tor off -elif [ $1 = status ] ; then - get_status - echo "Tor is $tor_status" -elif [ $1 = --help ] || [ $1 = -h ] || [ $1 = "-?" ] ; then - usage - exit 0 -else - echo "Unrecognized option: \"$1\"" -fi - diff --git a/contrib/proxy-some-domains b/contrib/proxy-some-domains deleted file mode 100644 index eb238a2feb..0000000000 --- a/contrib/proxy-some-domains +++ /dev/null @@ -1,52 +0,0 @@ -Subject: -Re: Anonymous/Nonymous Communication Coexisting? -From: -Kristian Köhntopp <kris@xn--khntopp-90a.de> -Date: -Fri, 10 Jun 2005 08:56:19 +0200 -To: -or-talk@freehaven.net - -On Wednesday 08 June 2005 04:20, yancm@sdf.lonestar.org wrote: - ->> Is it possible to have a single application, such as a web ->> browser or a p2p client behave normally with normal url's but ->> use tor if the url is an xyz.onion address? Or is it ->> everything or nothing? - - -This is basically a question of using your proxy or not. You can -control the behaviour of your browser in great detail writing a -proxy.pac program in Javascript and setting that program as the -proxy autoconfiguration URL in your browser. - -An example: - -kris@jordan01:~> cat /srv/www/htdocs/proxy.pac - -function FindProxyForURL(url, host) -{ - var proxy_yes = "PROXY jordan01.int.cinetic.de:3128"; - var proxy_no = "DIRECT"; - - // Redirect all accesses to mlan hosts to the mlan proxy - if (dnsDomainIs(host, ".mlan.cinetic.de")) { - return proxy_yes; - } - - // Everything else is direct - return proxy_no; -} - -So here the program checks if the destination is a mlan-Host, and -if so, uses the appropriate proxy on jordan for the access, -while all other accesses are direct. - -You could do a similar thing with .onion accesses with a trivial -modification. - -Docs: -http://wp.netscape.com/eng/mozilla/2.0/relnotes/demo/proxy-live.html - -Kristian - diff --git a/contrib/tor-0.1.2.17.tar.gz.metalink.in b/contrib/tor-0.1.2.17.tar.gz.metalink.in deleted file mode 100644 index 559748865d..0000000000 --- a/contrib/tor-0.1.2.17.tar.gz.metalink.in +++ /dev/null @@ -1,41 +0,0 @@ -<?xml version="1.0" encoding="utf-8"?> -<metalink version="3.0" generator="Metalink Editor version 1.1.0" xmlns="http://www.metalinker.org/"> - <publisher> - <name>The Tor Project</name> - <url>https://www.torproject.org</url> - </publisher> - <license> - <name>BSD</name> - <url>http://opensource.org/licenses/bsd-license.php</url> - </license> - <identity>Tor</identity> - <version>@VERSION@</version> - <copyright>2007 The Tor Project, Inc.</copyright> - <description>Anonymity Online</description> - <files> - <file name="tor-@VERSION@.tar.gz"> - <size>1251636</size> - <language>en</language> - <os>Source</os> - <verification> - <hash type="md5">ef8fc7f45d167875c337063d437c9832</hash> - <hash type="sha1">01092fb75c407b5c1d7f33db069cf7641973d94d</hash> - <hash type="sha256">fc0fb0c2891ae09854a69512c6b4988964f2eaf62ce80ed6644cb21f87f6056a</hash> - <pieces type="sha1" length="262144"> - <hash piece="0">c778dd01e05734d57f769082545f9802386e42bb</hash> - <hash piece="1">39b172ed8b9290884c7bd129db633a79e28d5ae9</hash> - <hash piece="2">28d708e7489a1e9951e757443672535aedfa3abe</hash> - <hash piece="3">a7623e07081819a37300de0511bbdda0bdc960bd</hash> - <hash piece="4">f246021e55affe320a1f86eac5b049dd0caad828</hash> - </pieces> - </verification> - <resources> - <url type="http" location="at">http://tor.cypherpunks.at/dist/</url> - <url type="http" location="ca">http://tor.depthstrike.com/dist/</url> - <url type="http" location="ca">http://tor.hermetix.org/dist/</url> - <url type="http" location="ch">http://tor.boinc.ch/dist/</url> - <url type="http" location="cn">http://tor.anonymity.cn/dist/</url> - </resources> - </file> - </files> -</metalink> diff --git a/doc/codecon04.mgp b/doc/codecon04.mgp deleted file mode 100644 index e9815fcb37..0000000000 --- a/doc/codecon04.mgp +++ /dev/null @@ -1,357 +0,0 @@ -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%deffont "standard" xfont "comic sans ms-medium-r" -%%deffont "thick" xfont "arial black-medium-r" -%%deffont "typewriter" xfont "courier new-bold-r" -%%deffont "type2writer" xfont "arial narrow-bold-r" -%%deffont "standard" tfont "standard.ttf", tmfont "kochi-mincho.ttf" -%%deffont "thick" tfont "thick.ttf", tmfont "goth.ttf" -%%deffont "typewriter" tfont "typewriter.ttf", tmfont "goth.ttf" -%deffont "standard" xfont "helvetica-medium-r", tfont "arial.ttf", tmfont "times.ttf" -%deffont "thick" xfont "helvetica-bold-r", tfont "arialbd.ttf", tmfont "hoso6.ttf" -%deffont "italic" xfont "helvetica-italic-r", tfont "ariali.ttf", tmfont "hoso6.ttf" -%deffont "typewriter" xfont "courier-medium-r", tfont "typewriter.ttf", tmfont "hoso6.ttf" -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% -%% Default settings per each line numbers. -%% -%default 1 leftfill, size 8, fore "black", back "white", font "thick", hgap 1 -%default 2 size 8, vgap 10, prefix " ", ccolor "black" -%default 3 size 6, bar "gray70", vgap 0 -%default 4 size 6, fore "black", vgap 0, prefix " ", font "standard" -%% -%%default 1 area 90 90, leftfill, size 9, fore "yellow", back "blue", font "thick" -%%default 2 size 9, vgap 10, prefix " " -%%default 3 size 7, bar "gray70", vgap 10 -%%default 4 size 7, vgap 30, prefix " ", font "standard" -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% -%% Default settings that are applied to TAB-indented lines. -%% -%tab 1 size 5, vgap 40, prefix " ", icon arc "red" 50 -%tab 2 size 4, vgap 35, prefix " ", icon delta3 "blue" 40 -%tab 3 size 3, vgap 35, prefix " ", icon dia "DarkViolet" 40 -%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page -%nodefault -%center, size 9, font "thick", back "white", fore "black" - -Tor: -%size 8 -Next-generation Onion Routing - - -%size 7 -Roger Dingledine -Nick Mathewson -Paul Syverson - -The Free Haven Project -%font "typewriter", fore "blue" -http://freehaven.net/ - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page - -Low-latency anonymity system - -%leftfill -Deployed: 20 nodes, hundreds (?) of users - -Many improvements on earlier design - -Free software -- modified BSD license - -Design is not covered by earlier onion routing -patent - -Uses SOCKS to interface with client apps - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page - -We have working code - -(14 kloc of C) - -and a design document, -and a byte-level specification, -and a Debian package (in Unstable) - -Works on Linux, BSD, OSX, Cygwin, ... -User-space, doesn't need kernel mods or root - -%size 9 -http://freehaven.net/tor/ - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%page -%% -%%Talk Overview -%% -%%A bit about Onion Routing -%% -%%Improvements we've made -%% -%%Some related work -%% -%%Ask me questions -%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page - -Anonymity: Who needs it? - -Private citizens - advocacy, counseling, whistleblowing, reporting, ... -%size 6 -Higher-level protocols - voting, e-cash, auctions -%size 6 -Government applications - research, law enforcement -%size 6 -Business applications -%size 5 -(hide relationships and volumes of communication) - Who is visiting job sites? - Which groups are talking to patent lawyers? - Who are your suppliers and customers? - Is the CEO talking to a buyout partner? - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page - -Anonymity is a network effect - - Systems need traffic (many low-sensitivity users) to attract the high-sensitivity users - Most users do not value anonymity much - Weak security (fast system) can mean more users - which can mean -%cont, font "italic" -stronger -%cont, font "standard" -anonymity - High-sensitivity agents have incentive to run nodes - so they can be certain first node in their path is good - to attract traffic for their messages - There can be an optimal level of free-riding - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page - -Onion Routing is... - -An overlay network - -Users build virtual circuits through the network - -One layer of encryption at each hop - -Fixed-size cells - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page - -Tor's goals - -Conservative design - minimize new design work needed - -%size 6 -Support testing of future research - -Design for deployment; deploy for use - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page - -Threat model -- what we aim for - -Protect against somebody watching Alice - -Protect against curious Bob - -Protect against `some' curious nodes in the middle - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page - -Differences / limitations - - -We're TCP-only, not all IP (but we're user-space and very portable) - -Not as strong as high-latency systems (Mixmaster, Mixminion) - -Not peer-to-peer - -No protocol normalization - -Not unobservable (no steg, etc) - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page - -Perfect forward secrecy - - -Telescoping circuit - - negotiates keys at each hop - no more need for replay detection - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page - -No mixing, padding, traffic shaping (yet) - - -Please show us they're worth the usability tradeoff - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%page -%% -%%Many TCP streams can share one circuit -%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page - -Many TCP streams share a circuit - -Previous designs built a new circuit for each stream - - lots of public key ops per request - plus anonymity dangers from making so many circuits - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page - -Leaky-pipe circuit topology - -Alice can direct cells to any node in her circuit - - So we can support long-range padding, - have multiple streams exiting at different places in the circuit - etc - -%size 6 -Unclear whether this is dangerous or useful - -More research needed - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page - -Congestion control - - -Simple rate limiting - -Plus have to keep internal nodes from overflowing - -(Can't use global state or inter-node control) - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page - -Directory servers - -To solve the `introduction' problem - -Approve new servers - -Tell clients who's up right now - - plus their keys, location, etc - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page - -Variable exit policies - - -Each server allows different outgoing connections - -E.g. no servers allow outgoing mail currently - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page - -End-to-end integrity checking - - -In previous onion routing, an insider could change -the text being transmitted: - -"dir" => "rm *" - -Even an external adversary could do this! - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page - -Rendezvous points - -allow hidden services - -don't need (brittle) reply onions - - Access-controlled: Bob can control who he talks to - Robust: Bob's service is available even when some Tor nodes go down - Smear-resistant: Evil service can't frame a rendezvous router - Application-transparent: Don't need to modify Bob's apache - -%size 6 -(Not implemented yet) - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page - -How do we compare security? - -Assume adversary owns c of n nodes - can choose which -%size 6 -What's the chance for a random Alice and Bob that he wins? - -Freedom, Tor: (c/n)^2 -Peekabooty, six-four, etc: c/n -Jap (if no padding): 1 if c>1 -Anonymizer: 1 if c>0 - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page - -Future work - -Threshold directory agreement - -Scalability: Morphmix/p2p extensions? -Restricted-route (non-clique topology) - -Non-TCP transport - -Implement rendezvous points - -Make it work better - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page - -We have working code - -Plus a design document, -and a byte-level specification -and a Debian package (in Unstable) - -%size 9 -http://freehaven.net/tor/ - -%size 6 -Privacy Enhancing Technologies workshop - -%size 9 -http://petworkshop.org/ - diff --git a/doc/design-paper/Makefile b/doc/design-paper/Makefile deleted file mode 100644 index 3fdbbec131..0000000000 --- a/doc/design-paper/Makefile +++ /dev/null @@ -1,21 +0,0 @@ -cell-struct.eps: cell-struct.fig - fig2dev -L eps $< $@ -interaction.eps: interaction.fig - fig2dev -L eps $< $@ -cell-struct.pdf: cell-struct.fig - fig2dev -L pdf $< $@ -interaction.pdf: interaction.fig - fig2dev -L pdf $< $@ - -tor-design.ps: cell-struct.eps interaction.eps tor-design.bib tor-design.tex usenix.sty latex8.bst - latex tor-design.tex - bibtex tor-design - latex tor-design.tex - latex tor-design.tex - dvips -o $@ tor-design.dvi - -tor-design.pdf: cell-struct.pdf interaction.pdf tor-design.bib tor-design.tex usenix.sty latex8.bst - pdflatex tor-design.tex - bibtex tor-design - pdflatex tor-design.tex - pdflatex tor-design.tex
\ No newline at end of file diff --git a/doc/design-paper/blocking.html b/doc/design-paper/blocking.html deleted file mode 100644 index 6028f5dc1c..0000000000 --- a/doc/design-paper/blocking.html +++ /dev/null @@ -1,2112 +0,0 @@ -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" - "DTD/xhtml1-transitional.dtd"> -<html> -<meta name="GENERATOR" content="TtH 3.77"> -<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> - <style type="text/css"> div.p { margin-top: 7pt;}</style> - <style type="text/css"><!-- - td div.comp { margin-top: -0.6ex; margin-bottom: -1ex;} - td div.comb { margin-top: -0.6ex; margin-bottom: -.6ex;} - td div.hrcomp { line-height: 0.9; margin-top: -0.8ex; margin-bottom: -1ex;} - td div.norm {line-height:normal;} - span.roman {font-family: serif; font-style: normal; font-weight: normal;} - span.overacc2 {position: relative; left: .8em; top: -1.2ex;} - span.overacc1 {position: relative; left: .6em; top: -1.2ex;} --></style> - - -<title> Design of a blocking-resistant anonymity system\DRAFT</title> - -<h1 align="center">Design of a blocking-resistant anonymity system<br />DRAFT </h1> - -<div class="p"><!----></div> - -<h3 align="center">Roger Dingledine, Nick Mathewson </h3> - - -<div class="p"><!----></div> - -<h2> Abstract</h2> -Internet censorship is on the rise as websites around the world are -increasingly blocked by government-level firewalls. Although popular -anonymizing networks like Tor were originally designed to keep attackers from -tracing people's activities, many people are also using them to evade local -censorship. But if the censor simply denies access to the Tor network -itself, blocked users can no longer benefit from the security Tor offers. - -<div class="p"><!----></div> -Here we describe a design that builds upon the current Tor network -to provide an anonymizing network that resists blocking -by government-level attackers. - -<div class="p"><!----></div> - - <h2><a name="tth_sEc1"> -1</a> Introduction and Goals</h2> - -<div class="p"><!----></div> -Anonymizing networks like Tor [<a href="#tor-design" name="CITEtor-design">11</a>] bounce traffic around a -network of encrypting relays. Unlike encryption, which hides only <i>what</i> -is said, these networks also aim to hide who is communicating with whom, which -users are using which websites, and similar relations. These systems have a -broad range of users, including ordinary citizens who want to avoid being -profiled for targeted advertisements, corporations who don't want to reveal -information to their competitors, and law enforcement and government -intelligence agencies who need to do operations on the Internet without being -noticed. - -<div class="p"><!----></div> -Historical anonymity research has focused on an -attacker who monitors the user (call her Alice) and tries to discover her -activities, yet lets her reach any piece of the network. In more modern -threat models such as Tor's, the adversary is allowed to perform active -attacks such as modifying communications to trick Alice -into revealing her destination, or intercepting some connections -to run a man-in-the-middle attack. But these systems still assume that -Alice can eventually reach the anonymizing network. - -<div class="p"><!----></div> -An increasing number of users are using the Tor software -less for its anonymity properties than for its censorship -resistance properties — if they use Tor to access Internet sites like -Wikipedia -and Blogspot, they are no longer affected by local censorship -and firewall rules. In fact, an informal user study -showed China as the third largest user base -for Tor clients, with perhaps ten thousand people accessing the Tor -network from China each day. - -<div class="p"><!----></div> -The current Tor design is easy to block if the attacker controls Alice's -connection to the Tor network — by blocking the directory authorities, -by blocking all the server IP addresses in the directory, or by filtering -based on the fingerprint of the Tor TLS handshake. Here we describe an -extended design that builds upon the current Tor network to provide an -anonymizing -network that resists censorship as well as anonymity-breaking attacks. -In section <a href="#sec:adversary">2</a> we discuss our threat model — that is, -the assumptions we make about our adversary. Section <a href="#sec:current-tor">3</a> -describes the components of the current Tor design and how they can be -leveraged for a new blocking-resistant design. Section <a href="#sec:related">4</a> -explains the features and drawbacks of the currently deployed solutions. -In sections <a href="#sec:bridges">5</a> through <a href="#sec:discovery">7</a>, we explore the -components of our designs in detail. Section <a href="#sec:security">8</a> considers -security implications and Section <a href="#sec:reachability">9</a> presents other -issues with maintaining connectivity and sustainability for the design. -Section <a href="#sec:future">10</a> speculates about future more complex designs, -and finally Section <a href="#sec:conclusion">11</a> summarizes our next steps and -recommendations. - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> - <h2><a name="tth_sEc2"> -<a name="sec:adversary"> -2</a> Adversary assumptions</h2> -</a> - -<div class="p"><!----></div> -To design an effective anti-censorship tool, we need a good model for the -goals and resources of the censors we are evading. Otherwise, we risk -spending our effort on keeping the adversaries from doing things they have no -interest in doing, and thwarting techniques they do not use. -The history of blocking-resistance designs is littered with conflicting -assumptions about what adversaries to expect and what problems are -in the critical path to a solution. Here we describe our best -understanding of the current situation around the world. - -<div class="p"><!----></div> -In the traditional security style, we aim to defeat a strong -attacker — if we can defend against this attacker, we inherit protection -against weaker attackers as well. After all, we want a general design -that will work for citizens of China, Thailand, and other censored -countries; for -whistleblowers in firewalled corporate networks; and for people in -unanticipated oppressive situations. In fact, by designing with -a variety of adversaries in mind, we can take advantage of the fact that -adversaries will be in different stages of the arms race at each location, -so a server blocked in one locale can still be useful in others. - -<div class="p"><!----></div> -We assume that the attackers' goals are somewhat complex. - -<dl compact="compact"> - - <dt><b></b></dt> - <dd><li>The attacker would like to restrict the flow of certain kinds of - information, particularly when this information is seen as embarrassing to - those in power (such as information about rights violations or corruption), - or when it enables or encourages others to oppose them effectively (such as - information about opposition movements or sites that are used to organize - protests).</dd> - <dt><b></b></dt> - <dd><li>As a second-order effect, censors aim to chill citizens' behavior by - creating an impression that their online activities are monitored.</dd> - <dt><b></b></dt> - <dd><li>In some cases, censors make a token attempt to block a few sites for - obscenity, blasphemy, and so on, but their efforts here are mainly for - show. In other cases, they really do try hard to block such content.</dd> - <dt><b></b></dt> - <dd><li>Complete blocking (where nobody at all can ever download censored - content) is not a - goal. Attackers typically recognize that perfect censorship is not only - impossible, but unnecessary: if "undesirable" information is known only - to a small few, further censoring efforts can be focused elsewhere.</dd> - <dt><b></b></dt> - <dd><li>Similarly, the censors are not attempting to shut down or block <i> - every</i> anti-censorship tool — merely the tools that are popular and - effective (because these tools impede the censors' information restriction - goals) and those tools that are highly visible (thus making the censors - look ineffectual to their citizens and their bosses).</dd> - <dt><b></b></dt> - <dd><li>Reprisal against <i>most</i> passive consumers of <i>most</i> kinds of - blocked information is also not a goal, given the broadness of most - censorship regimes. This seems borne out by fact.<a href="#tthFtNtAAB" name="tthFrefAAB"><sup>1</sup></a></dd> - <dt><b></b></dt> - <dd><li>Producers and distributors of targeted information are in much - greater danger than consumers; the attacker would like to not only block - their work, but identify them for reprisal.</dd> - <dt><b></b></dt> - <dd><li>The censors (or their governments) would like to have a working, useful - Internet. There are economic, political, and social factors that prevent - them from "censoring" the Internet by outlawing it entirely, or by - blocking access to all but a tiny list of sites. - Nevertheless, the censors <i>are</i> willing to block innocuous content - (like the bulk of a newspaper's reporting) in order to censor other content - distributed through the same channels (like that newspaper's coverage of - the censored country). -</dd> -</dl> - -<div class="p"><!----></div> -We assume there are three main technical network attacks in use by censors -currently [<a href="#clayton:pet2006" name="CITEclayton:pet2006">7</a>]: - -<div class="p"><!----></div> - -<dl compact="compact"> - - <dt><b></b></dt> - <dd><li>Block a destination or type of traffic by automatically searching for - certain strings or patterns in TCP packets. Offending packets can be - dropped, or can trigger a response like closing the - connection.</dd> - <dt><b></b></dt> - <dd><li>Block a destination by listing its IP address at a - firewall or other routing control point.</dd> - <dt><b></b></dt> - <dd><li>Intercept DNS requests and give bogus responses for certain - destination hostnames. -</dd> -</dl> - -<div class="p"><!----></div> -We assume the network firewall has limited CPU and memory per -connection [<a href="#clayton:pet2006" name="CITEclayton:pet2006">7</a>]. Against an adversary who could carefully -examine the contents of every packet and correlate the packets in every -stream on the network, we would need some stronger mechanism such as -steganography, which introduces its own -problems [<a href="#active-wardens" name="CITEactive-wardens">15</a>,<a href="#tcpstego" name="CITEtcpstego">26</a>]. But we make a "weak -steganography" assumption here: to remain unblocked, it is necessary to -remain unobservable only by computational resources on par with a modern -router, firewall, proxy, or IDS. - -<div class="p"><!----></div> -We assume that while various different regimes can coordinate and share -notes, there will be a time lag between one attacker learning how to overcome -a facet of our design and other attackers picking it up. (The most common -vector of transmission seems to be commercial providers of censorship tools: -once a provider adds a feature to meet one country's needs or requests, the -feature is available to all of the provider's customers.) Conversely, we -assume that insider attacks become a higher risk only after the early stages -of network development, once the system has reached a certain level of -success and visibility. - -<div class="p"><!----></div> -We do not assume that government-level attackers are always uniform -across the country. For example, users of different ISPs in China -experience different censorship policies and mechanisms. - -<div class="p"><!----></div> -We assume that the attacker may be able to use political and economic -resources to secure the cooperation of extraterritorial or multinational -corporations and entities in investigating information sources. -For example, the censors can threaten the service providers of -troublesome blogs with economic reprisals if they do not reveal the -authors' identities. - -<div class="p"><!----></div> -We assume that our users have control over their hardware and -software — they don't have any spyware installed, there are no -cameras watching their screens, etc. Unfortunately, in many situations -these threats are real [<a href="#zuckerman-threatmodels" name="CITEzuckerman-threatmodels">28</a>]; yet -software-based security systems like ours are poorly equipped to handle -a user who is entirely observed and controlled by the adversary. See -Section <a href="#subsec:cafes-and-livecds">8.4</a> for more discussion of what little -we can do about this issue. - -<div class="p"><!----></div> -Similarly, we assume that the user will be able to fetch a genuine -version of Tor, rather than one supplied by the adversary; see -Section <a href="#subsec:trust-chain">8.5</a> for discussion on helping the user -confirm that he has a genuine version and that he can connect to the -real Tor network. - -<div class="p"><!----></div> - <h2><a name="tth_sEc3"> -<a name="sec:current-tor"> -3</a> Adapting the current Tor design to anti-censorship</h2> -</a> - -<div class="p"><!----></div> -Tor is popular and sees a lot of use — it's the largest anonymity -network of its kind, and has -attracted more than 800 volunteer-operated routers from around the -world. Tor protects each user by routing their traffic through a multiply -encrypted "circuit" built of a few randomly selected servers, each of which -can remove only a single layer of encryption. Each server sees only the step -before it and the step after it in the circuit, and so no single server can -learn the connection between a user and her chosen communication partners. -In this section, we examine some of the reasons why Tor has become popular, -with particular emphasis to how we can take advantage of these properties -for a blocking-resistance design. - -<div class="p"><!----></div> -Tor aims to provide three security properties: - -<dl compact="compact"> - - <dt><b></b></dt> - <dd>1. A local network attacker can't learn, or influence, your -destination.</dd> - <dt><b></b></dt> - <dd>2. No single router in the Tor network can link you to your -destination.</dd> - <dt><b></b></dt> - <dd>3. The destination, or somebody watching the destination, -can't learn your location. -</dd> -</dl> - -<div class="p"><!----></div> -For blocking-resistance, we care most clearly about the first -property. But as the arms race progresses, the second property -will become important — for example, to discourage an adversary -from volunteering a relay in order to learn that Alice is reading -or posting to certain websites. The third property helps keep users safe from -collaborating websites: consider websites and other Internet services -that have been pressured -recently into revealing the identity of bloggers -or treating clients differently depending on their network -location [<a href="#goodell-syverson06" name="CITEgoodell-syverson06">17</a>]. - -<div class="p"><!----></div> -The Tor design provides other features as well that are not typically -present in manual or ad hoc circumvention techniques. - -<div class="p"><!----></div> -First, Tor has a well-analyzed and well-understood way to distribute -information about servers. -Tor directory authorities automatically aggregate, test, -and publish signed summaries of the available Tor routers. Tor clients -can fetch these summaries to learn which routers are available and -which routers are suitable for their needs. Directory information is cached -throughout the Tor network, so once clients have bootstrapped they never -need to interact with the authorities directly. (To tolerate a minority -of compromised directory authorities, we use a threshold trust scheme — -see Section <a href="#subsec:trust-chain">8.5</a> for details.) - -<div class="p"><!----></div> -Second, the list of directory authorities is not hard-wired. -Clients use the default authorities if no others are specified, -but it's easy to start a separate (or even overlapping) Tor network just -by running a different set of authorities and convincing users to prefer -a modified client. For example, we could launch a distinct Tor network -inside China; some users could even use an aggregate network made up of -both the main network and the China network. (But we should not be too -quick to create other Tor networks — part of Tor's anonymity comes from -users behaving like other users, and there are many unsolved anonymity -questions if different users know about different pieces of the network.) - -<div class="p"><!----></div> -Third, in addition to automatically learning from the chosen directories -which Tor routers are available and working, Tor takes care of building -paths through the network and rebuilding them as needed. So the user -never has to know how paths are chosen, never has to manually pick -working proxies, and so on. More generally, at its core the Tor protocol -is simply a tool that can build paths given a set of routers. Tor is -quite flexible about how it learns about the routers and how it chooses -the paths. Harvard's Blossom project [<a href="#blossom-thesis" name="CITEblossom-thesis">16</a>] makes this -flexibility more concrete: Blossom makes use of Tor not for its security -properties but for its reachability properties. It runs a separate set -of directory authorities, its own set of Tor routers (called the Blossom -network), and uses Tor's flexible path-building to let users view Internet -resources from any point in the Blossom network. - -<div class="p"><!----></div> -Fourth, Tor separates the role of <em>internal relay</em> from the -role of <em>exit relay</em>. That is, some volunteers choose just to relay -traffic between Tor users and Tor routers, and others choose to also allow -connections to external Internet resources. Because we don't force all -volunteers to play both roles, we end up with more relays. This increased -diversity in turn is what gives Tor its security: the more options the -user has for her first hop, and the more options she has for her last hop, -the less likely it is that a given attacker will be watching both ends -of her circuit [<a href="#tor-design" name="CITEtor-design">11</a>]. As a bonus, because our design attracts -more internal relays that want to help out but don't want to deal with -being an exit relay, we end up providing more options for the first -hop — the one most critical to being able to reach the Tor network. - -<div class="p"><!----></div> -Fifth, Tor is sustainable. Zero-Knowledge Systems offered the commercial -but now defunct Freedom Network [<a href="#freedom21-security" name="CITEfreedom21-security">2</a>], a design with -security comparable to Tor's, but its funding model relied on collecting -money from users to pay relay operators. Modern commercial proxy systems -similarly -need to keep collecting money to support their infrastructure. On the -other hand, Tor has built a self-sustaining community of volunteers who -donate their time and resources. This community trust is rooted in Tor's -open design: we tell the world exactly how Tor works, and we provide all -the source code. Users can decide for themselves, or pay any security -expert to decide, whether it is safe to use. Further, Tor's modularity -as described above, along with its open license, mean that its impact -will continue to grow. - -<div class="p"><!----></div> -Sixth, Tor has an established user base of hundreds of -thousands of people from around the world. This diversity of -users contributes to sustainability as above: Tor is used by -ordinary citizens, activists, corporations, law enforcement, and -even government and military users, -and they can -only achieve their security goals by blending together in the same -network [<a href="#econymics" name="CITEeconymics">1</a>,<a href="#usability:weis2006" name="CITEusability:weis2006">9</a>]. This user base also provides -something else: hundreds of thousands of different and often-changing -addresses that we can leverage for our blocking-resistance design. - -<div class="p"><!----></div> -Finally and perhaps most importantly, Tor provides anonymity and prevents any -single server from linking users to their communication partners. Despite -initial appearances, <i>distributed-trust anonymity is critical for -anti-censorship efforts</i>. If any single server can expose dissident bloggers -or compile a list of users' behavior, the censors can profitably compromise -that server's operator, perhaps by applying economic pressure to their -employers, -breaking into their computer, pressuring their family (if they have relatives -in the censored area), or so on. Furthermore, in designs where any relay can -expose its users, the censors can spread suspicion that they are running some -of the relays and use this belief to chill use of the network. - -<div class="p"><!----></div> -We discuss and adapt these components further in -Section <a href="#sec:bridges">5</a>. But first we examine the strengths and -weaknesses of other blocking-resistance approaches, so we can expand -our repertoire of building blocks and ideas. - -<div class="p"><!----></div> - <h2><a name="tth_sEc4"> -<a name="sec:related"> -4</a> Current proxy solutions</h2> -</a> - -<div class="p"><!----></div> -Relay-based blocking-resistance schemes generally have two main -components: a relay component and a discovery component. The relay part -encompasses the process of establishing a connection, sending traffic -back and forth, and so on — everything that's done once the user knows -where she's going to connect. Discovery is the step before that: the -process of finding one or more usable relays. - -<div class="p"><!----></div> -For example, we can divide the pieces of Tor in the previous section -into the process of building paths and sending -traffic over them (relay) and the process of learning from the directory -servers about what routers are available (discovery). With this distinction -in mind, we now examine several categories of relay-based schemes. - -<div class="p"><!----></div> - <h3><a name="tth_sEc4.1"> -4.1</a> Centrally-controlled shared proxies</h3> - -<div class="p"><!----></div> -Existing commercial anonymity solutions (like Anonymizer.com) are based -on a set of single-hop proxies. In these systems, each user connects to -a single proxy, which then relays traffic between the user and her -destination. These public proxy -systems are typically characterized by two features: they control and -operate the proxies centrally, and many different users get assigned -to each proxy. - -<div class="p"><!----></div> -In terms of the relay component, single proxies provide weak security -compared to systems that distribute trust over multiple relays, since a -compromised proxy can trivially observe all of its users' actions, and -an eavesdropper only needs to watch a single proxy to perform timing -correlation attacks against all its users' traffic and thus learn where -everyone is connecting. Worse, all users -need to trust the proxy company to have good security itself as well as -to not reveal user activities. - -<div class="p"><!----></div> -On the other hand, single-hop proxies are easier to deploy, and they -can provide better performance than distributed-trust designs like Tor, -since traffic only goes through one relay. They're also more convenient -from the user's perspective — since users entirely trust the proxy, -they can just use their web browser directly. - -<div class="p"><!----></div> -Whether public proxy schemes are more or less scalable than Tor is -still up for debate: commercial anonymity systems can use some of their -revenue to provision more bandwidth as they grow, whereas volunteer-based -anonymity systems can attract thousands of fast relays to spread the load. - -<div class="p"><!----></div> -The discovery piece can take several forms. Most commercial anonymous -proxies have one or a handful of commonly known websites, and their users -log in to those websites and relay their traffic through them. When -these websites get blocked (generally soon after the company becomes -popular), if the company cares about users in the blocked areas, they -start renting lots of disparate IP addresses and rotating through them -as they get blocked. They notify their users of new addresses (by email, -for example). It's an arms race, since attackers can sign up to receive the -email too, but operators have one nice trick available to them: because they -have a list of paying subscribers, they can notify certain subscribers -about updates earlier than others. - -<div class="p"><!----></div> -Access control systems on the proxy let them provide service only to -users with certain characteristics, such as paying customers or people -from certain IP address ranges. - -<div class="p"><!----></div> -Discovery in the face of a government-level firewall is a complex and -unsolved -topic, and we're stuck in this same arms race ourselves; we explore it -in more detail in Section <a href="#sec:discovery">7</a>. But first we examine the -other end of the spectrum — getting volunteers to run the proxies, -and telling only a few people about each proxy. - -<div class="p"><!----></div> - <h3><a name="tth_sEc4.2"> -4.2</a> Independent personal proxies</h3> - -<div class="p"><!----></div> -Personal proxies such as Circumventor [<a href="#circumventor" name="CITEcircumventor">18</a>] and -CGIProxy [<a href="#cgiproxy" name="CITEcgiproxy">23</a>] use the same technology as the public ones as -far as the relay component goes, but they use a different strategy for -discovery. Rather than managing a few centralized proxies and constantly -getting new addresses for them as the old addresses are blocked, they -aim to have a large number of entirely independent proxies, each managing -its own (much smaller) set of users. - -<div class="p"><!----></div> -As the Circumventor site explains, "You don't -actually install the Circumventor <em>on</em> the computer that is blocked -from accessing Web sites. You, or a friend of yours, has to install the -Circumventor on some <em>other</em> machine which is not censored." - -<div class="p"><!----></div> -This tactic has great advantages in terms of blocking-resistance — recall -our assumption in Section <a href="#sec:adversary">2</a> that the attention -a system attracts from the attacker is proportional to its number of -users and level of publicity. If each proxy only has a few users, and -there is no central list of proxies, most of them will never get noticed by -the censors. - -<div class="p"><!----></div> -On the other hand, there's a huge scalability question that so far has -prevented these schemes from being widely useful: how does the fellow -in China find a person in Ohio who will run a Circumventor for him? In -some cases he may know and trust some people on the outside, but in many -cases he's just out of luck. Just as hard, how does a new volunteer in -Ohio find a person in China who needs it? - -<div class="p"><!----></div> - -<div class="p"><!----></div> -This challenge leads to a hybrid design-centrally — distributed -personal proxies — which we will investigate in more detail in -Section <a href="#sec:discovery">7</a>. - -<div class="p"><!----></div> - <h3><a name="tth_sEc4.3"> -4.3</a> Open proxies</h3> - -<div class="p"><!----></div> -Yet another currently used approach to bypassing firewalls is to locate -open and misconfigured proxies on the Internet. A quick Google search -for "open proxy list" yields a wide variety of freely available lists -of HTTP, HTTPS, and SOCKS proxies. Many small companies have sprung up -providing more refined lists to paying customers. - -<div class="p"><!----></div> -There are some downsides to using these open proxies though. First, -the proxies are of widely varying quality in terms of bandwidth and -stability, and many of them are entirely unreachable. Second, unlike -networks of volunteers like Tor, the legality of routing traffic through -these proxies is questionable: it's widely believed that most of them -don't realize what they're offering, and probably wouldn't allow it if -they realized. Third, in many cases the connection to the proxy is -unencrypted, so firewalls that filter based on keywords in IP packets -will not be hindered. Fourth, in many countries (including China), the -firewall authorities hunt for open proxies as well, to preemptively -block them. And last, many users are suspicious that some -open proxies are a little <em>too</em> convenient: are they run by the -adversary, in which case they get to monitor all the user's requests -just as single-hop proxies can? - -<div class="p"><!----></div> -A distributed-trust design like Tor resolves each of these issues for -the relay component, but a constantly changing set of thousands of open -relays is clearly a useful idea for a discovery component. For example, -users might be able to make use of these proxies to bootstrap their -first introduction into the Tor network. - -<div class="p"><!----></div> - <h3><a name="tth_sEc4.4"> -4.4</a> Blocking resistance and JAP</h3> - -<div class="p"><!----></div> -Köpsell and Hilling's Blocking Resistance -design [<a href="#koepsell:wpes2004" name="CITEkoepsell:wpes2004">20</a>] is probably -the closest related work, and is the starting point for the design in this -paper. In this design, the JAP anonymity system [<a href="#web-mix" name="CITEweb-mix">3</a>] is used -as a base instead of Tor. Volunteers operate a large number of access -points that relay traffic to the core JAP -network, which in turn anonymizes users' traffic. The software to run these -relays is, as in our design, included in the JAP client software and enabled -only when the user decides to enable it. Discovery is handled with a -CAPTCHA-based mechanism; users prove that they aren't an automated process, -and are given the address of an access point. (The problem of a determined -attacker with enough manpower to launch many requests and enumerate all the -access points is not considered in depth.) There is also some suggestion -that information about access points could spread through existing social -networks. - -<div class="p"><!----></div> - <h3><a name="tth_sEc4.5"> -4.5</a> Infranet</h3> - -<div class="p"><!----></div> -The Infranet design [<a href="#infranet" name="CITEinfranet">14</a>] uses one-hop relays to deliver web -content, but disguises its communications as ordinary HTTP traffic. Requests -are split into multiple requests for URLs on the relay, which then encodes -its responses in the content it returns. The relay needs to be an actual -website with plausible content and a number of URLs which the user might want -to access — if the Infranet software produced its own cover content, it would -be far easier for censors to identify. To keep the censors from noticing -that cover content changes depending on what data is embedded, Infranet needs -the cover content to have an innocuous reason for changing frequently: the -paper recommends watermarked images and webcams. - -<div class="p"><!----></div> -The attacker and relay operators in Infranet's threat model are significantly -different than in ours. Unlike our attacker, Infranet's censor can't be -bypassed with encrypted traffic (presumably because the censor blocks -encrypted traffic, or at least considers it suspicious), and has more -computational resources to devote to each connection than ours (so it can -notice subtle patterns over time). Unlike our bridge operators, Infranet's -operators (and users) have more bandwidth to spare; the overhead in typical -steganography schemes is far higher than Tor's. - -<div class="p"><!----></div> -The Infranet design does not include a discovery element. Discovery, -however, is a critical point: if whatever mechanism allows users to learn -about relays also allows the censor to do so, he can trivially discover and -block their addresses, even if the steganography would prevent mere traffic -observation from revealing the relays' addresses. - -<div class="p"><!----></div> - <h3><a name="tth_sEc4.6"> -4.6</a> RST-evasion and other packet-level tricks</h3> - -<div class="p"><!----></div> -In their analysis of China's firewall's content-based blocking, Clayton, -Murdoch and Watson discovered that rather than blocking all packets in a TCP -streams once a forbidden word was noticed, the firewall was simply forging -RST packets to make the communicating parties believe that the connection was -closed [<a href="#clayton:pet2006" name="CITEclayton:pet2006">7</a>]. They proposed altering operating systems -to ignore forged RST packets. This approach might work in some cases, but -in practice it appears that many firewalls start filtering by IP address -once a sufficient number of RST packets have been sent. - -<div class="p"><!----></div> -Other packet-level responses to filtering include splitting -sensitive words across multiple TCP packets, so that the censors' -firewalls can't notice them without performing expensive stream -reconstruction [<a href="#ptacek98insertion" name="CITEptacek98insertion">27</a>]. This technique relies on the -same insight as our weak steganography assumption. - -<div class="p"><!----></div> - <h3><a name="tth_sEc4.7"> -4.7</a> Internal caching networks</h3> - -<div class="p"><!----></div> -Freenet [<a href="#freenet-pets00" name="CITEfreenet-pets00">6</a>] is an anonymous peer-to-peer data store. -Analyzing Freenet's security can be difficult, as its design is in flux as -new discovery and routing mechanisms are proposed, and no complete -specification has (to our knowledge) been written. Freenet servers relay -requests for specific content (indexed by a digest of the content) -"toward" the server that hosts it, and then cache the content as it -follows the same path back to -the requesting user. If Freenet's routing mechanism is successful in -allowing nodes to learn about each other and route correctly even as some -node-to-node links are blocked by firewalls, then users inside censored areas -can ask a local Freenet server for a piece of content, and get an answer -without having to connect out of the country at all. Of course, operators of -servers inside the censored area can still be targeted, and the addresses of -external servers can still be blocked. - -<div class="p"><!----></div> - <h3><a name="tth_sEc4.8"> -4.8</a> Skype</h3> - -<div class="p"><!----></div> -The popular Skype voice-over-IP software uses multiple techniques to tolerate -restrictive networks, some of which allow it to continue operating in the -presence of censorship. By switching ports and using encryption, Skype -attempts to resist trivial blocking and content filtering. Even if no -encryption were used, it would still be expensive to scan all voice -traffic for sensitive words. Also, most current keyloggers are unable to -store voice traffic. Nevertheless, Skype can still be blocked, especially at -its central login server. - -<div class="p"><!----></div> - <h3><a name="tth_sEc4.9"> -4.9</a> Tor itself</h3> - -<div class="p"><!----></div> -And last, we include Tor itself in the list of current solutions -to firewalls. Tens of thousands of people use Tor from countries that -routinely filter their Internet. Tor's website has been blocked in most -of them. But why hasn't the Tor network been blocked yet? - -<div class="p"><!----></div> -We have several theories. The first is the most straightforward: tens of -thousands of people are simply too few to matter. It may help that Tor is -perceived to be for experts only, and thus not worth attention yet. The -more subtle variant on this theory is that we've positioned Tor in the -public eye as a tool for retaining civil liberties in more free countries, -so perhaps blocking authorities don't view it as a threat. (We revisit -this idea when we consider whether and how to publicize a Tor variant -that improves blocking-resistance — see Section <a href="#subsec:publicity">9.5</a> -for more discussion.) - -<div class="p"><!----></div> -The broader explanation is that the maintenance of most government-level -filters is aimed at stopping widespread information flow and appearing to be -in control, not by the impossible goal of blocking all possible ways to bypass -censorship. Censors realize that there will always -be ways for a few people to get around the firewall, and as long as Tor -has not publically threatened their control, they see no urgent need to -block it yet. - -<div class="p"><!----></div> -We should recognize that we're <em>already</em> in the arms race. These -constraints can give us insight into the priorities and capabilities of -our various attackers. - -<div class="p"><!----></div> - <h2><a name="tth_sEc5"> -<a name="sec:bridges"> -5</a> The relay component of our blocking-resistant design</h2> -</a> - -<div class="p"><!----></div> -Section <a href="#sec:current-tor">3</a> describes many reasons why Tor is -well-suited as a building block in our context, but several changes will -allow the design to resist blocking better. The most critical changes are -to get more relay addresses, and to distribute them to users differently. - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> - <h3><a name="tth_sEc5.1"> -5.1</a> Bridge relays</h3> - -<div class="p"><!----></div> -Today, Tor servers operate on less than a thousand distinct IP addresses; -an adversary -could enumerate and block them all with little trouble. To provide a -means of ingress to the network, we need a larger set of entry points, most -of which an adversary won't be able to enumerate easily. Fortunately, we -have such a set: the Tor users. - -<div class="p"><!----></div> -Hundreds of thousands of people around the world use Tor. We can leverage -our already self-selected user base to produce a list of thousands of -frequently-changing IP addresses. Specifically, we can give them a little -button in the GUI that says "Tor for Freedom", and users who click -the button will turn into <em>bridge relays</em> (or just <em>bridges</em> -for short). They can rate limit relayed connections to 10 KB/s (almost -nothing for a broadband user in a free country, but plenty for a user -who otherwise has no access at all), and since they are just relaying -bytes back and forth between blocked users and the main Tor network, they -won't need to make any external connections to Internet sites. Because -of this separation of roles, and because we're making use of software -that the volunteers have already installed for their own use, we expect -our scheme to attract and maintain more volunteers than previous schemes. - -<div class="p"><!----></div> -As usual, there are new anonymity and security implications from running a -bridge relay, particularly from letting people relay traffic through your -Tor client; but we leave this discussion for Section <a href="#sec:security">8</a>. - -<div class="p"><!----></div> - -<div class="p"><!----></div> - <h3><a name="tth_sEc5.2"> -5.2</a> The bridge directory authority</h3> - -<div class="p"><!----></div> -How do the bridge relays advertise their existence to the world? We -introduce a second new component of the design: a specialized directory -authority that aggregates and tracks bridges. Bridge relays periodically -publish server descriptors (summaries of their keys, locations, etc, -signed by their long-term identity key), just like the relays in the -"main" Tor network, but in this case they publish them only to the -bridge directory authorities. - -<div class="p"><!----></div> -The main difference between bridge authorities and the directory -authorities for the main Tor network is that the main authorities provide -a list of every known relay, but the bridge authorities only give -out a server descriptor if you already know its identity key. That is, -you can keep up-to-date on a bridge's location and other information -once you know about it, but you can't just grab a list of all the bridges. - -<div class="p"><!----></div> -The identity key, IP address, and directory port for each bridge -authority ship by default with the Tor software, so the bridge relays -can be confident they're publishing to the right location, and the -blocked users can establish an encrypted authenticated channel. See -Section <a href="#subsec:trust-chain">8.5</a> for more discussion of the public key -infrastructure and trust chain. - -<div class="p"><!----></div> -Bridges use Tor to publish their descriptors privately and securely, -so even an attacker monitoring the bridge directory authority's network -can't make a list of all the addresses contacting the authority. -Bridges may publish to only a subset of the -authorities, to limit the potential impact of an authority compromise. - -<div class="p"><!----></div> - -<div class="p"><!----></div> - <h3><a name="tth_sEc5.3"> -<a name="subsec:relay-together"> -5.3</a> Putting them together</h3> -</a> - -<div class="p"><!----></div> -If a blocked user knows the identity keys of a set of bridge relays, and -he has correct address information for at least one of them, he can use -that one to make a secure connection to the bridge authority and update -his knowledge about the other bridge relays. He can also use it to make -secure connections to the main Tor network and directory servers, so he -can build circuits and connect to the rest of the Internet. All of these -updates happen in the background: from the blocked user's perspective, -he just accesses the Internet via his Tor client like always. - -<div class="p"><!----></div> -So now we've reduced the problem from how to circumvent the firewall -for all transactions (and how to know that the pages you get have not -been modified by the local attacker) to how to learn about a working -bridge relay. - -<div class="p"><!----></div> -There's another catch though. We need to make sure that the network -traffic we generate by simply connecting to a bridge relay doesn't stand -out too much. - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> - <h2><a name="tth_sEc6"> -<a name="sec:network-fingerprint"> -<a name="subsec:enclave-dirs"> -6</a> Hiding Tor's network fingerprint</h2> -</a> -</a> - -<div class="p"><!----></div> -Currently, Tor uses two protocols for its network communications. The -main protocol uses TLS for encrypted and authenticated communication -between Tor instances. The second protocol is standard HTTP, used for -fetching directory information. All Tor servers listen on their "ORPort" -for TLS connections, and some of them opt to listen on their "DirPort" -as well, to serve directory information. Tor servers choose whatever port -numbers they like; the server descriptor they publish to the directory -tells users where to connect. - -<div class="p"><!----></div> -One format for communicating address information about a bridge relay is -its IP address and DirPort. From there, the user can ask the bridge's -directory cache for an up-to-date copy of its server descriptor, and -learn its current circuit keys, its ORPort, and so on. - -<div class="p"><!----></div> -However, connecting directly to the directory cache involves a plaintext -HTTP request. A censor could create a network fingerprint (known as a -<em>signature</em> in the intrusion detection field) for the request -and/or its response, thus preventing these connections. To resolve this -vulnerability, we've modified the Tor protocol so that users can connect -to the directory cache via the main Tor port — they establish a TLS -connection with the bridge as normal, and then send a special "begindir" -relay command to establish an internal connection to its directory cache. - -<div class="p"><!----></div> -Therefore a better way to summarize a bridge's address is by its IP -address and ORPort, so all communications between the client and the -bridge will use ordinary TLS. But there are other details that need -more investigation. - -<div class="p"><!----></div> -What port should bridges pick for their ORPort? We currently recommend -that they listen on port 443 (the default HTTPS port) if they want to -be most useful, because clients behind standard firewalls will have -the best chance to reach them. Is this the best choice in all cases, -or should we encourage some fraction of them pick random ports, or other -ports commonly permitted through firewalls like 53 (DNS) or 110 -(POP)? Or perhaps we should use other ports where TLS traffic is -expected, like 993 (IMAPS) or 995 (POP3S). We need more research on our -potential users, and their current and anticipated firewall restrictions. - -<div class="p"><!----></div> -Furthermore, we need to look at the specifics of Tor's TLS handshake. -Right now Tor uses some predictable strings in its TLS handshakes. For -example, it sets the X.509 organizationName field to "Tor", and it puts -the Tor server's nickname in the certificate's commonName field. We -should tweak the handshake protocol so it doesn't rely on any unusual details -in the certificate, yet it remains secure; the certificate itself -should be made to resemble an ordinary HTTPS certificate. We should also try -to make our advertised cipher-suites closer to what an ordinary web server -would support. - -<div class="p"><!----></div> -Tor's TLS handshake uses two-certificate chains: one certificate -contains the self-signed identity key for -the router, and the second contains a current TLS key, signed by the -identity key. We use these to authenticate that we're talking to the right -router, and to limit the impact of TLS-key exposure. Most (though far from -all) consumer-oriented HTTPS services provide only a single certificate. -These extra certificates may help identify Tor's TLS handshake; instead, -bridges should consider using only a single TLS key certificate signed by -their identity key, and providing the full value of the identity key in an -early handshake cell. More significantly, Tor currently has all clients -present certificates, so that clients are harder to distinguish from servers. -But in a blocking-resistance environment, clients should not present -certificates at all. - -<div class="p"><!----></div> -Last, what if the adversary starts observing the network traffic even -more closely? Even if our TLS handshake looks innocent, our traffic timing -and volume still look different than a user making a secure web connection -to his bank. The same techniques used in the growing trend to build tools -to recognize encrypted Bittorrent traffic -could be used to identify Tor communication and recognize bridge -relays. Rather than trying to look like encrypted web traffic, we may be -better off trying to blend with some other encrypted network protocol. The -first step is to compare typical network behavior for a Tor client to -typical network behavior for various other protocols. This statistical -cat-and-mouse game is made more complex by the fact that Tor transports a -variety of protocols, and we'll want to automatically handle web browsing -differently from, say, instant messaging. - -<div class="p"><!----></div> - -<div class="p"><!----></div> - <h3><a name="tth_sEc6.1"> -<a name="subsec:id-address"> -6.1</a> Identity keys as part of addressing information</h3> -</a> - -<div class="p"><!----></div> -We have described a way for the blocked user to bootstrap into the -network once he knows the IP address and ORPort of a bridge. What about -local spoofing attacks? That is, since we never learned an identity -key fingerprint for the bridge, a local attacker could intercept our -connection and pretend to be the bridge we had in mind. It turns out -that giving false information isn't that bad — since the Tor client -ships with trusted keys for the bridge directory authority and the Tor -network directory authorities, the user can learn whether he's being -given a real connection to the bridge authorities or not. (After all, -if the adversary intercepts every connection the user makes and gives -him a bad connection each time, there's nothing we can do.) - -<div class="p"><!----></div> -What about anonymity-breaking attacks from observing traffic, if the -blocked user doesn't start out knowing the identity key of his intended -bridge? The vulnerabilities aren't so bad in this case either — the -adversary could do similar attacks just by monitoring the network -traffic. - -<div class="p"><!----></div> -Once the Tor client has fetched the bridge's server descriptor, it should -remember the identity key fingerprint for that bridge relay. Thus if -the bridge relay moves to a new IP address, the client can query the -bridge directory authority to look up a fresh server descriptor using -this fingerprint. - -<div class="p"><!----></div> -So we've shown that it's <em>possible</em> to bootstrap into the network -just by learning the IP address and ORPort of a bridge, but are there -situations where it's more convenient or more secure to learn the bridge's -identity fingerprint as well as instead, while bootstrapping? We keep -that question in mind as we next investigate bootstrapping and discovery. - -<div class="p"><!----></div> - <h2><a name="tth_sEc7"> -<a name="sec:discovery"> -7</a> Discovering working bridge relays</h2> -</a> - -<div class="p"><!----></div> -Tor's modular design means that we can develop a better relay component -independently of developing the discovery component. This modularity's -great promise is that we can pick any discovery approach we like; but the -unfortunate fact is that we have no magic bullet for discovery. We're -in the same arms race as all the other designs we described in -Section <a href="#sec:related">4</a>. - -<div class="p"><!----></div> -In this section we describe a variety of approaches to adding discovery -components for our design. - -<div class="p"><!----></div> - <h3><a name="tth_sEc7.1"> -<a name="subsec:first-bridge"> -7.1</a> Bootstrapping: finding your first bridge.</h3> -</a> - -<div class="p"><!----></div> -In Section <a href="#subsec:relay-together">5.3</a>, we showed that a user who knows -a working bridge address can use it to reach the bridge authority and -to stay connected to the Tor network. But how do new users reach the -bridge authority in the first place? After all, the bridge authority -will be one of the first addresses that a censor blocks. - -<div class="p"><!----></div> -First, we should recognize that most government firewalls are not -perfect. That is, they may allow connections to Google cache or some -open proxy servers, or they let file-sharing traffic, Skype, instant -messaging, or World-of-Warcraft connections through. Different users will -have different mechanisms for bypassing the firewall initially. Second, -we should remember that most people don't operate in a vacuum; users will -hopefully know other people who are in other situations or have other -resources available. In the rest of this section we develop a toolkit -of different options and mechanisms, so that we can enable users in a -diverse set of contexts to bootstrap into the system. - -<div class="p"><!----></div> -(For users who can't use any of these techniques, hopefully they know -a friend who can — for example, perhaps the friend already knows some -bridge relay addresses. If they can't get around it at all, then we -can't help them — they should go meet more people or learn more about -the technology running the firewall in their area.) - -<div class="p"><!----></div> -By deploying all the schemes in the toolkit at once, we let bridges and -blocked users employ the discovery approach that is most appropriate -for their situation. - -<div class="p"><!----></div> - <h3><a name="tth_sEc7.2"> -7.2</a> Independent bridges, no central discovery</h3> - -<div class="p"><!----></div> -The first design is simply to have no centralized discovery component at -all. Volunteers run bridges, and we assume they have some blocked users -in mind and communicate their address information to them out-of-band -(for example, through Gmail). This design allows for small personal -bridges that have only one or a handful of users in mind, but it can -also support an entire community of users. For example, Citizen Lab's -upcoming Psiphon single-hop proxy tool [<a href="#psiphon" name="CITEpsiphon">13</a>] plans to use this -<em>social network</em> approach as its discovery component. - -<div class="p"><!----></div> -There are several ways to do bootstrapping in this design. In the simple -case, the operator of the bridge informs each chosen user about his -bridge's address information and/or keys. A different approach involves -blocked users introducing new blocked users to the bridges they know. -That is, somebody in the blocked area can pass along a bridge's address to -somebody else they trust. This scheme brings in appealing but complex game -theoretic properties: the blocked user making the decision has an incentive -only to delegate to trustworthy people, since an adversary who learns -the bridge's address and filters it makes it unavailable for both of them. -Also, delegating known bridges to members of your social network can be -dangerous: an the adversary who can learn who knows which bridges may -be able to reconstruct the social network. - -<div class="p"><!----></div> -Note that a central set of bridge directory authorities can still be -compatible with a decentralized discovery process. That is, how users -first learn about bridges is entirely up to the bridges, but the process -of fetching up-to-date descriptors for them can still proceed as described -in Section <a href="#sec:bridges">5</a>. Of course, creating a central place that -knows about all the bridges may not be smart, especially if every other -piece of the system is decentralized. Further, if a user only knows -about one bridge and he loses track of it, it may be quite a hassle to -reach the bridge authority. We address these concerns next. - -<div class="p"><!----></div> - <h3><a name="tth_sEc7.3"> -7.3</a> Families of bridges, no central discovery</h3> - -<div class="p"><!----></div> -Because the blocked users are running our software too, we have many -opportunities to improve usability or robustness. Our second design builds -on the first by encouraging volunteers to run several bridges at once -(or coordinate with other bridge volunteers), such that some -of the bridges are likely to be available at any given time. - -<div class="p"><!----></div> -The blocked user's Tor client would periodically fetch an updated set of -recommended bridges from any of the working bridges. Now the client can -learn new additions to the bridge pool, and can expire abandoned bridges -or bridges that the adversary has blocked, without the user ever needing -to care. To simplify maintenance of the community's bridge pool, each -community could run its own bridge directory authority — reachable via -the available bridges, and also mirrored at each bridge. - -<div class="p"><!----></div> - <h3><a name="tth_sEc7.4"> -7.4</a> Public bridges with central discovery</h3> - -<div class="p"><!----></div> -What about people who want to volunteer as bridges but don't know any -suitable blocked users? What about people who are blocked but don't -know anybody on the outside? Here we describe how to make use of these -<em>public bridges</em> in a way that still makes it hard for the attacker -to learn all of them. - -<div class="p"><!----></div> -The basic idea is to divide public bridges into a set of pools based on -identity key. Each pool corresponds to a <em>distribution strategy</em>: -an approach to distributing its bridge addresses to users. Each strategy -is designed to exercise a different scarce resource or property of -the user. - -<div class="p"><!----></div> -How do we divide bridges between these strategy pools such that they're -evenly distributed and the allocation is hard to influence or predict, -but also in a way that's amenable to creating more strategies later -on without reshuffling all the pools? We assign a given bridge -to a strategy pool by hashing the bridge's identity key along with a -secret that only the bridge authority knows: the first n bits of this -hash dictate the strategy pool number, where n is a parameter that -describes how many strategy pools we want at this point. We choose n=3 -to start, so we divide bridges between 8 pools; but as we later invent -new distribution strategies, we can increment n to split the 8 into -16. Since a bridge can't predict the next bit in its hash, it can't -anticipate which identity key will correspond to a certain new pool -when the pools are split. Further, since the bridge authority doesn't -provide any feedback to the bridge about which strategy pool it's in, -an adversary who signs up bridges with the goal of filling a certain -pool [<a href="#casc-rep" name="CITEcasc-rep">12</a>] will be hindered. - -<div class="p"><!----></div> - -<div class="p"><!----></div> -The first distribution strategy (used for the first pool) publishes bridge -addresses in a time-release fashion. The bridge authority divides the -available bridges into partitions, and each partition is deterministically -available only in certain time windows. That is, over the course of a -given time slot (say, an hour), each requester is given a random bridge -from within that partition. When the next time slot arrives, a new set -of bridges from the pool are available for discovery. Thus some bridge -address is always available when a new -user arrives, but to learn about all bridges the attacker needs to fetch -all new addresses at every new time slot. By varying the length of the -time slots, we can make it harder for the attacker to guess when to check -back. We expect these bridges will be the first to be blocked, but they'll -help the system bootstrap until they <em>do</em> get blocked. Further, -remember that we're dealing with different blocking regimes around the -world that will progress at different rates — so this pool will still -be useful to some users even as the arms races progress. - -<div class="p"><!----></div> -The second distribution strategy publishes bridge addresses based on the IP -address of the requesting user. Specifically, the bridge authority will -divide the available bridges in the pool into a bunch of partitions -(as in the first distribution scheme), hash the requester's IP address -with a secret of its own (as in the above allocation scheme for creating -pools), and give the requester a random bridge from the appropriate -partition. To raise the bar, we should discard the last octet of the -IP address before inputting it to the hash function, so an attacker -who only controls a single "/24" network only counts as one user. A -large attacker like China will still be able to control many addresses, -but the hassle of establishing connections from each network (or spoofing -TCP connections) may still slow them down. Similarly, as a special case, -we should treat IP addresses that are Tor exit nodes as all being on -the same network. - -<div class="p"><!----></div> -The third strategy combines the time-based and location-based -strategies to further constrain and rate-limit the available bridge -addresses. Specifically, the bridge address provided in a given time -slot to a given network location is deterministic within the partition, -rather than chosen randomly each time from the partition. Thus, repeated -requests during that time slot from a given network are given the same -bridge address as the first request. - -<div class="p"><!----></div> -The fourth strategy is based on Circumventor's discovery strategy. -The Circumventor project, realizing that its adoption will remain limited -if it has no central coordination mechanism, has started a mailing list to -distribute new proxy addresses every few days. From experimentation it -seems they have concluded that sending updates every three or four days -is sufficient to stay ahead of the current attackers. - -<div class="p"><!----></div> -The fifth strategy provides an alternative approach to a mailing list: -users provide an email address and receive an automated response -listing an available bridge address. We could limit one response per -email address. To further rate limit queries, we could require a CAPTCHA -solution -in each case too. In fact, we wouldn't need to -implement the CAPTCHA on our side: if we only deliver bridge addresses -to Yahoo or GMail addresses, we can leverage the rate-limiting schemes -that other parties already impose for account creation. - -<div class="p"><!----></div> -The sixth strategy ties in the social network design with public -bridges and a reputation system. We pick some seeds — trusted people in -blocked areas — and give them each a few dozen bridge addresses and a few -<em>delegation tokens</em>. We run a website next to the bridge authority, -where users can log in (they connect via Tor, and they don't need to -provide actual identities, just persistent pseudonyms). Users can delegate -trust to other people they know by giving them a token, which can be -exchanged for a new account on the website. Accounts in "good standing" -then accrue new bridge addresses and new tokens. As usual, reputation -schemes bring in a host of new complexities [<a href="#rep-anon" name="CITErep-anon">10</a>]: how do we -decide that an account is in good standing? We could tie reputation -to whether the bridges they're told about have been blocked — see -Section <a href="#subsec:geoip">7.7</a> below for initial thoughts on how to discover -whether bridges have been blocked. We could track reputation between -accounts (if you delegate to somebody who screws up, it impacts you too), -or we could use blinded delegation tokens [<a href="#chaum-blind" name="CITEchaum-blind">5</a>] to prevent -the website from mapping the seeds' social network. We put off deeper -discussion of the social network reputation strategy for future work. - -<div class="p"><!----></div> -Pools seven and eight are held in reserve, in case our currently deployed -tricks all fail at once and the adversary blocks all those bridges — so -we can adapt and move to new approaches quickly, and have some bridges -immediately available for the new schemes. New strategies might be based -on some other scarce resource, such as relaying traffic for others or -other proof of energy spent. (We might also worry about the incentives -for bridges that sign up and get allocated to the reserve pools: will they -be unhappy that they're not being used? But this is a transient problem: -if Tor users are bridges by default, nobody will mind not being used yet. -See also Section <a href="#subsec:incentives">9.4</a>.) - -<div class="p"><!----></div> - -<div class="p"><!----></div> - <h3><a name="tth_sEc7.5"> -7.5</a> Public bridges with coordinated discovery</h3> - -<div class="p"><!----></div> -We presented the above discovery strategies in the context of a single -bridge directory authority, but in practice we will want to distribute the -operations over several bridge authorities — a single point of failure -or attack is a bad move. The first answer is to run several independent -bridge directory authorities, and bridges gravitate to one based on -their identity key. The better answer would be some federation of bridge -authorities that work together to provide redundancy but don't introduce -new security issues. We could even imagine designs where the bridge -authorities have encrypted versions of the bridge's server descriptors, -and the users learn a decryption key that they keep private when they -first hear about the bridge — this way the bridge authorities would not -be able to learn the IP address of the bridges. - -<div class="p"><!----></div> -We leave this design question for future work. - -<div class="p"><!----></div> - <h3><a name="tth_sEc7.6"> -7.6</a> Assessing whether bridges are useful</h3> - -<div class="p"><!----></div> -Learning whether a bridge is useful is important in the bridge authority's -decision to include it in responses to blocked users. For example, if -we end up with a list of thousands of bridges and only a few dozen of -them are reachable right now, most blocked users will not end up knowing -about working bridges. - -<div class="p"><!----></div> -There are three components for assessing how useful a bridge is. First, -is it reachable from the public Internet? Second, what proportion of -the time is it available? Third, is it blocked in certain jurisdictions? - -<div class="p"><!----></div> -The first component can be tested just as we test reachability of -ordinary Tor servers. Specifically, the bridges do a self-test — connect -to themselves via the Tor network — before they are willing to -publish their descriptor, to make sure they're not obviously broken or -misconfigured. Once the bridges publish, the bridge authority also tests -reachability to make sure they're not confused or outright lying. - -<div class="p"><!----></div> -The second component can be measured and tracked by the bridge authority. -By doing periodic reachability tests, we can get a sense of how often the -bridge is available. More complex tests will involve bandwidth-intensive -checks to force the bridge to commit resources in order to be counted as -available. We need to evaluate how the relationship of uptime percentage -should weigh into our choice of which bridges to advertise. We leave -this to future work. - -<div class="p"><!----></div> -The third component is perhaps the trickiest: with many different -adversaries out there, how do we keep track of which adversaries have -blocked which bridges, and how do we learn about new blocks as they -occur? We examine this problem next. - -<div class="p"><!----></div> - <h3><a name="tth_sEc7.7"> -<a name="subsec:geoip"> -7.7</a> How do we know if a bridge relay has been blocked?</h3> -</a> - -<div class="p"><!----></div> -There are two main mechanisms for testing whether bridges are reachable -from inside each blocked area: active testing via users, and passive -testing via bridges. - -<div class="p"><!----></div> -In the case of active testing, certain users inside each area -sign up as testing relays. The bridge authorities can then use a -Blossom-like [<a href="#blossom-thesis" name="CITEblossom-thesis">16</a>] system to build circuits through them -to each bridge and see if it can establish the connection. But how do -we pick the users? If we ask random users to do the testing (or if we -solicit volunteers from the users), the adversary should sign up so he -can enumerate the bridges we test. Indeed, even if we hand-select our -testers, the adversary might still discover their location and monitor -their network activity to learn bridge addresses. - -<div class="p"><!----></div> -Another answer is not to measure directly, but rather let the bridges -report whether they're being used. -Specifically, bridges should install a GeoIP database such as the public -IP-To-Country list [<a href="#ip-to-country" name="CITEip-to-country">19</a>], and then periodically report to the -bridge authorities which countries they're seeing use from. This data -would help us track which countries are making use of the bridge design, -and can also let us learn about new steps the adversary has taken in -the arms race. (The compressed GeoIP database is only several hundred -kilobytes, and we could even automate the update process by serving it -from the bridge authorities.) -More analysis of this passive reachability -testing design is needed to resolve its many edge cases: for example, -if a bridge stops seeing use from a certain area, does that mean the -bridge is blocked or does that mean those users are asleep? - -<div class="p"><!----></div> -There are many more problems with the general concept of detecting whether -bridges are blocked. First, different zones of the Internet are blocked -in different ways, and the actual firewall jurisdictions do not match -country borders. Our bridge scheme could help us map out the topology -of the censored Internet, but this is a huge task. More generally, -if a bridge relay isn't reachable, is that because of a network block -somewhere, because of a problem at the bridge relay, or just a temporary -outage somewhere in between? And last, an attacker could poison our -bridge database by signing up already-blocked bridges. In this case, -if we're stingy giving out bridge addresses, users in that country won't -learn working bridges. - -<div class="p"><!----></div> -All of these issues are made more complex when we try to integrate this -testing into our social network reputation system above. -Since in that case we punish or reward users based on whether bridges -get blocked, the adversary has new attacks to trick or bog down the -reputation tracking. Indeed, the bridge authority doesn't even know -what zone the blocked user is in, so do we blame him for any possible -censored zone, or what? - -<div class="p"><!----></div> -Clearly more analysis is required. The eventual solution will probably -involve a combination of passive measurement via GeoIP and active -measurement from trusted testers. More generally, we can use the passive -feedback mechanism to track usage of the bridge network as a whole — which -would let us respond to attacks and adapt the design, and it would also -let the general public track the progress of the project. - -<div class="p"><!----></div> - -<div class="p"><!----></div> - <h3><a name="tth_sEc7.8"> -7.8</a> Advantages of deploying all solutions at once</h3> - -<div class="p"><!----></div> -For once, we're not in the position of the defender: we don't have to -defend against every possible filtering scheme; we just have to defend -against at least one. On the flip side, the attacker is forced to guess -how to allocate his resources to defend against each of these discovery -strategies. So by deploying all of our strategies at once, we not only -increase our chances of finding one that the adversary has difficulty -blocking, but we actually make <em>all</em> of the strategies more robust -in the face of an adversary with limited resources. - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> - <h2><a name="tth_sEc8"> -<a name="sec:security"> -8</a> Security considerations</h2> -</a> - -<div class="p"><!----></div> - <h3><a name="tth_sEc8.1"> -8.1</a> Possession of Tor in oppressed areas</h3> - -<div class="p"><!----></div> -Many people speculate that installing and using a Tor client in areas with -particularly extreme firewalls is a high risk — and the risk increases -as the firewall gets more restrictive. This notion certainly has merit, but -there's -a counter pressure as well: as the firewall gets more restrictive, more -ordinary people behind it end up using Tor for more mainstream activities, -such as learning -about Wall Street prices or looking at pictures of women's ankles. So -as the restrictive firewall pushes up the number of Tor users, the -"typical" Tor user becomes more mainstream, and therefore mere -use or possession of the Tor software is not so surprising. - -<div class="p"><!----></div> -It's hard to say which of these pressures will ultimately win out, -but we should keep both sides of the issue in mind. - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> - <h3><a name="tth_sEc8.2"> -<a name="subsec:upload-padding"> -8.2</a> Observers can tell who is publishing and who is reading</h3> -</a> - -<div class="p"><!----></div> -Tor encrypts traffic on the local network, and it obscures the eventual -destination of the communication, but it doesn't do much to obscure the -traffic volume. In particular, a user publishing a home video will have a -different network fingerprint than a user reading an online news article. -Based on our assumption in Section <a href="#sec:adversary">2</a> that users who -publish material are in more danger, should we work to improve Tor's -security in this situation? - -<div class="p"><!----></div> -In the general case this is an extremely challenging task: -effective <em>end-to-end traffic confirmation attacks</em> -are known where the adversary observes the origin and the -destination of traffic and confirms that they are part of the -same communication [<a href="#danezis:pet2004" name="CITEdanezis:pet2004">8</a>,<a href="#e2e-traffic" name="CITEe2e-traffic">24</a>]. Related are -<em>website fingerprinting attacks</em>, where the adversary downloads -a few hundred popular websites, makes a set of "fingerprints" for each -site, and then observes the target Tor client's traffic to look for -a match [<a href="#pet05-bissias" name="CITEpet05-bissias">4</a>,<a href="#defensive-dropping" name="CITEdefensive-dropping">21</a>]. But can we do better -against a limited adversary who just does coarse-grained sweeps looking -for unusually prolific publishers? - -<div class="p"><!----></div> -One answer is for bridge users to automatically send bursts of padding -traffic periodically. (This traffic can be implemented in terms of -long-range drop cells, which are already part of the Tor specification.) -Of course, convincingly simulating an actual human publishing interesting -content is a difficult arms race, but it may be worthwhile to at least -start the race. More research remains. - -<div class="p"><!----></div> - <h3><a name="tth_sEc8.3"> -8.3</a> Anonymity effects from acting as a bridge relay</h3> - -<div class="p"><!----></div> -Against some attacks, relaying traffic for others can improve -anonymity. The simplest example is an attacker who owns a small number -of Tor servers. He will see a connection from the bridge, but he won't -be able to know whether the connection originated there or was relayed -from somebody else. More generally, the mere uncertainty of whether the -traffic originated from that user may be helpful. - -<div class="p"><!----></div> -There are some cases where it doesn't seem to help: if an attacker can -watch all of the bridge's incoming and outgoing traffic, then it's easy -to learn which connections were relayed and which started there. (In this -case he still doesn't know the final destinations unless he is watching -them too, but in this case bridges are no better off than if they were -an ordinary client.) - -<div class="p"><!----></div> -There are also some potential downsides to running a bridge. First, while -we try to make it hard to enumerate all bridges, it's still possible to -learn about some of them, and for some people just the fact that they're -running one might signal to an attacker that they place a higher value -on their anonymity. Second, there are some more esoteric attacks on Tor -relays that are not as well-understood or well-tested — for example, an -attacker may be able to "observe" whether the bridge is sending traffic -even if he can't actually watch its network, by relaying traffic through -it and noticing changes in traffic timing [<a href="#attack-tor-oak05" name="CITEattack-tor-oak05">25</a>]. On -the other hand, it may be that limiting the bandwidth the bridge is -willing to relay will allow this sort of attacker to determine if it's -being used as a bridge but not easily learn whether it is adding traffic -of its own. - -<div class="p"><!----></div> -We also need to examine how entry guards fit in. Entry guards -(a small set of nodes that are always used for the first -step in a circuit) help protect against certain attacks -where the attacker runs a few Tor servers and waits for -the user to choose these servers as the beginning and end of her -circuit<a href="#tthFtNtAAC" name="tthFrefAAC"><sup>2</sup></a>. -If the blocked user doesn't use the bridge's entry guards, then the bridge -doesn't gain as much cover benefit. On the other hand, what design changes -are needed for the blocked user to use the bridge's entry guards without -learning what they are (this seems hard), and even if we solve that, -do they then need to use the guards' guards and so on down the line? - -<div class="p"><!----></div> -It is an open research question whether the benefits of running a bridge -outweigh the risks. A lot of the decision rests on which attacks the -users are most worried about. For most users, we don't think running a -bridge relay will be that damaging, and it could help quite a bit. - -<div class="p"><!----></div> - <h3><a name="tth_sEc8.4"> -<a name="subsec:cafes-and-livecds"> -8.4</a> Trusting local hardware: Internet cafes and LiveCDs</h3> -</a> - -<div class="p"><!----></div> -Assuming that users have their own trusted hardware is not -always reasonable. - -<div class="p"><!----></div> -For Internet cafe Windows computers that let you attach your own USB key, -a USB-based Tor image would be smart. There's Torpark, and hopefully -there will be more thoroughly analyzed and trustworthy options down the -road. Worries remain about hardware or software keyloggers and other -spyware, as well as physical surveillance. - -<div class="p"><!----></div> -If the system lets you boot from a CD or from a USB key, you can gain -a bit more security by bringing a privacy LiveCD with you. (This -approach isn't foolproof either of course, since hardware -keyloggers and physical surveillance are still a worry). - -<div class="p"><!----></div> -In fact, LiveCDs are also useful if it's your own hardware, since it's -easier to avoid leaving private data and logs scattered around the -system. - -<div class="p"><!----></div> - -<div class="p"><!----></div> - <h3><a name="tth_sEc8.5"> -<a name="subsec:trust-chain"> -8.5</a> The trust chain</h3> -</a> - -<div class="p"><!----></div> -Tor's "public key infrastructure" provides a chain of trust to -let users verify that they're actually talking to the right servers. -There are four pieces to this trust chain. - -<div class="p"><!----></div> -First, when Tor clients are establishing circuits, at each step -they demand that the next Tor server in the path prove knowledge of -its private key [<a href="#tor-design" name="CITEtor-design">11</a>]. This step prevents the first node -in the path from just spoofing the rest of the path. Second, the -Tor directory authorities provide a signed list of servers along with -their public keys — so unless the adversary can control a threshold -of directory authorities, he can't trick the Tor client into using other -Tor servers. Third, the location and keys of the directory authorities, -in turn, is hard-coded in the Tor source code — so as long as the user -got a genuine version of Tor, he can know that he is using the genuine -Tor network. And last, the source code and other packages are signed -with the GPG keys of the Tor developers, so users can confirm that they -did in fact download a genuine version of Tor. - -<div class="p"><!----></div> -In the case of blocked users contacting bridges and bridge directory -authorities, the same logic applies in parallel: the blocked users fetch -information from both the bridge authorities and the directory authorities -for the `main' Tor network, and they combine this information locally. - -<div class="p"><!----></div> -How can a user in an oppressed country know that he has the correct -key fingerprints for the developers? As with other security systems, it -ultimately comes down to human interaction. The keys are signed by dozens -of people around the world, and we have to hope that our users have met -enough people in the PGP web of trust -that they can learn -the correct keys. For users that aren't connected to the global security -community, though, this question remains a critical weakness. - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> - <h2><a name="tth_sEc9"> -<a name="sec:reachability"> -9</a> Maintaining reachability</h2> -</a> - -<div class="p"><!----></div> - <h3><a name="tth_sEc9.1"> -9.1</a> How many bridge relays should you know about?</h3> - -<div class="p"><!----></div> -The strategies described in Section <a href="#sec:discovery">7</a> talked about -learning one bridge address at a time. But if most bridges are ordinary -Tor users on cable modem or DSL connection, many of them will disappear -and/or move periodically. How many bridge relays should a blocked user -know about so that she is likely to have at least one reachable at any -given point? This is already a challenging problem if we only consider -natural churn: the best approach is to see what bridges we attract in -reality and measure their churn. We may also need to factor in a parameter -for how quickly bridges get discovered and blocked by the attacker; -we leave this for future work after we have more deployment experience. - -<div class="p"><!----></div> -A related question is: if the bridge relays change IP addresses -periodically, how often does the blocked user need to fetch updates in -order to keep from being cut out of the loop? - -<div class="p"><!----></div> -Once we have more experience and intuition, we should explore technical -solutions to this problem too. For example, if the discovery strategies -give out k bridge addresses rather than a single bridge address, perhaps -we can improve robustness from the user perspective without significantly -aiding the adversary. Rather than giving out a new random subset of k -addresses at each point, we could bind them together into <em>bridge -families</em>, so all users that learn about one member of the bridge family -are told about the rest as well. - -<div class="p"><!----></div> -This scheme may also help defend against attacks to map the set of -bridges. That is, if all blocked users learn a random subset of bridges, -the attacker should learn about a few bridges, monitor the country-level -firewall for connections to them, then watch those users to see what -other bridges they use, and repeat. By segmenting the bridge address -space, we can limit the exposure of other users. - -<div class="p"><!----></div> - <h3><a name="tth_sEc9.2"> -<a name="subsec:block-cable"> -9.2</a> Cablemodem users don't usually provide important websites</h3> -</a> - -<div class="p"><!----></div> -Another attacker we might be concerned about is that the attacker could -just block all DSL and cablemodem network addresses, on the theory that -they don't run any important services anyway. If most of our bridges -are on these networks, this attack could really hurt. - -<div class="p"><!----></div> -The first answer is to aim to get volunteers both from traditionally -"consumer" networks and also from traditionally "producer" networks. -Since bridges don't need to be Tor exit nodes, as we improve our usability -it seems quite feasible to get a lot of websites helping out. - -<div class="p"><!----></div> -The second answer (not as practical) would be to encourage more use of -consumer networks for popular and useful Internet services. - -<div class="p"><!----></div> -A related attack we might worry about is based on large countries putting -economic pressure on companies that want to expand their business. For -example, what happens if Verizon wants to sell services in China, and -China pressures Verizon to discourage its users in the free world from -running bridges? - -<div class="p"><!----></div> - <h3><a name="tth_sEc9.3"> -9.3</a> Scanning resistance: making bridges more subtle</h3> - -<div class="p"><!----></div> -If it's trivial to verify that a given address is operating as a bridge, -and most bridges run on a predictable port, then it's conceivable our -attacker could scan the whole Internet looking for bridges. (In fact, -he can just concentrate on scanning likely networks like cablemodem -and DSL services — see Section <a href="#subsec:block-cable">9.2</a> -above for -related attacks.) It would be nice to slow down this attack. It would -be even nicer to make it hard to learn whether we're a bridge without -first knowing some secret. We call this general property <em>scanning -resistance</em>, and it goes along with normalizing Tor's TLS handshake and -network fingerprint. - -<div class="p"><!----></div> -We could provide a password to the blocked user, and she (or her Tor -client) provides a nonced hash of this password when she connects. We'd -need to give her an ID key for the bridge too (in addition to the IP -address and port — see Section <a href="#subsec:id-address">6.1</a>), and wait to -present the password until we've finished the TLS handshake, else it -would look unusual. If Alice can authenticate the bridge before she -tries to send her password, we can resist an adversary who pretends -to be the bridge and launches a man-in-the-middle attack to learn the -password. But even if she can't, we still resist against widespread -scanning. - -<div class="p"><!----></div> -How should the bridge behave if accessed without the correct -authorization? Perhaps it should act like an unconfigured HTTPS server -("welcome to the default Apache page"), or maybe it should mirror -and act like common websites, or websites randomly chosen from Google. - -<div class="p"><!----></div> -We might assume that the attacker can recognize HTTPS connections that -use self-signed certificates. (This process would be resource-intensive -but not out of the realm of possibility.) But even in this case, many -popular websites around the Internet use self-signed or just plain broken -SSL certificates. - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> - <h3><a name="tth_sEc9.4"> -<a name="subsec:incentives"> -9.4</a> How to motivate people to run bridge relays</h3> -</a> - -<div class="p"><!----></div> -One of the traditional ways to get people to run software that benefits -others is to give them motivation to install it themselves. An often -suggested approach is to install it as a stunning screensaver so everybody -will be pleased to run it. We take a similar approach here, by leveraging -the fact that these users are already interested in protecting their -own Internet traffic, so they will install and run the software. - -<div class="p"><!----></div> -Eventually, we may be able to make all Tor users become bridges if they -pass their self-reachability tests — the software and installers need -more work on usability first, but we're making progress. - -<div class="p"><!----></div> -In the mean time, we can make a snazzy network graph with -Vidalia<a href="#tthFtNtAAD" name="tthFrefAAD"><sup>3</sup></a> that -emphasizes the connections the bridge user is currently relaying. - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> - <h3><a name="tth_sEc9.5"> -<a name="subsec:publicity"> -9.5</a> Publicity attracts attention</h3> -</a> - -<div class="p"><!----></div> -Many people working on this field want to publicize the existence -and extent of censorship concurrently with the deployment of their -circumvention software. The easy reason for this two-pronged push is -to attract volunteers for running proxies in their systems; but in many -cases their main goal is not to focus on actually allowing individuals -to circumvent the firewall, but rather to educate the world about the -censorship. The media also tries to do its part by broadcasting the -existence of each new circumvention system. - -<div class="p"><!----></div> -But at the same time, this publicity attracts the attention of the -censors. We can slow down the arms race by not attracting as much -attention, and just spreading by word of mouth. If our goal is to -establish a solid social network of bridges and bridge users before -the adversary gets involved, does this extra attention work to our -disadvantage? - -<div class="p"><!----></div> - <h3><a name="tth_sEc9.6"> -9.6</a> The Tor website: how to get the software</h3> - -<div class="p"><!----></div> -One of the first censoring attacks against a system like ours is to -block the website and make the software itself hard to find. Our system -should work well once the user is running an authentic -copy of Tor and has found a working bridge, but to get to that point -we rely on their individual skills and ingenuity. - -<div class="p"><!----></div> -Right now, most countries that block access to Tor block only the main -website and leave mirrors and the network itself untouched. -Falling back on word-of-mouth is always a good last resort, but we should -also take steps to make sure it's relatively easy for users to get a copy, -such as publicizing the mirrors more and making copies available through -other media. We might also mirror the latest version of the software on -each bridge, so users who hear about an honest bridge can get a good -copy. -See Section <a href="#subsec:first-bridge">7.1</a> for more discussion. - -<div class="p"><!----></div> - -<div class="p"><!----></div> - <h2><a name="tth_sEc10"> -<a name="sec:future"> -10</a> Future designs</h2> -</a> - -<div class="p"><!----></div> - <h3><a name="tth_sEc10.1"> -10.1</a> Bridges inside the blocked network too</h3> - -<div class="p"><!----></div> -Assuming actually crossing the firewall is the risky part of the -operation, can we have some bridge relays inside the blocked area too, -and more established users can use them as relays so they don't need to -communicate over the firewall directly at all? A simple example here is -to make new blocked users into internal bridges also — so they sign up -on the bridge authority as part of doing their query, and we give out -their addresses -rather than (or along with) the external bridge addresses. This design -is a lot trickier because it brings in the complexity of whether the -internal bridges will remain available, can maintain reachability with -the outside world, etc. - -<div class="p"><!----></div> -More complex future designs involve operating a separate Tor network -inside the blocked area, and using <em>hidden service bridges</em> — bridges -that can be accessed by users of the internal Tor network but whose -addresses are not published or findable, even by these users — to get -from inside the firewall to the rest of the Internet. But this design -requires directory authorities to run inside the blocked area too, -and they would be a fine target to take down the network. - -<div class="p"><!----></div> - -<div class="p"><!----></div> - <h2><a name="tth_sEc11"> -<a name="sec:conclusion"> -11</a> Next Steps</h2> -</a> - -<div class="p"><!----></div> -Technical solutions won't solve the whole censorship problem. After all, -the firewalls in places like China are <em>socially</em> very -successful, even if technologies and tricks exist to get around them. -However, having a strong technical solution is still necessary as one -important piece of the puzzle. - -<div class="p"><!----></div> -In this paper, we have shown that Tor provides a great set of building -blocks to start from. The next steps are to deploy prototype bridges and -bridge authorities, implement some of the proposed discovery strategies, -and then observe the system in operation and get more intuition about -the actual requirements and adversaries we're up against. - -<div class="p"><!----></div> - -<h2>References</h2> - -<dl compact="compact"> - <dt><a href="#CITEeconymics" name="econymics">[1]</a></dt><dd> -Alessandro Acquisti, Roger Dingledine, and Paul Syverson. - On the economics of anonymity. - In Rebecca N. Wright, editor, <em>Financial Cryptography</em>. - Springer-Verlag, LNCS 2742, 2003. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEfreedom21-security" name="freedom21-security">[2]</a></dt><dd> -Adam Back, Ian Goldberg, and Adam Shostack. - Freedom systems 2.1 security issues and analysis. - White paper, Zero Knowledge Systems, Inc., May 2001. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEweb-mix" name="web-mix">[3]</a></dt><dd> -Oliver Berthold, Hannes Federrath, and Stefan Köpsell. - Web MIXes: A system for anonymous and unobservable Internet - access. - In H. Federrath, editor, <em>Designing Privacy Enhancing - Technologies: Workshop on Design Issue in Anonymity and Unobservability</em>. - Springer-Verlag, LNCS 2009, 2000. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEpet05-bissias" name="pet05-bissias">[4]</a></dt><dd> -George Dean Bissias, Marc Liberatore, and Brian Neil Levine. - Privacy vulnerabilities in encrypted http streams. - In <em>Proceedings of Privacy Enhancing Technologies workshop (PET - 2005)</em>, May 2005. - - <a href="http://prisms.cs.umass.edu/brian/pubs/bissias.liberatore.pet.2005.pdf"><tt>http://prisms.cs.umass.edu/brian/pubs/bissias.liberatore.pet.2005.pdf</tt></a>. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEchaum-blind" name="chaum-blind">[5]</a></dt><dd> -David Chaum. - Blind signatures for untraceable payments. - In D. Chaum, R.L. Rivest, and A.T. Sherman, editors, <em>Advances in - Cryptology: Proceedings of Crypto 82</em>, pages 199-203. Plenum Press, 1983. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEfreenet-pets00" name="freenet-pets00">[6]</a></dt><dd> -Ian Clarke, Oskar Sandberg, Brandon Wiley, and Theodore W. Hong. - Freenet: A distributed anonymous information storage and retrieval - system. - In H. Federrath, editor, <em>Designing Privacy Enhancing - Technologies: Workshop on Design Issue in Anonymity and Unobservability</em>, - pages 46-66. Springer-Verlag, LNCS 2009, July 2000. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEclayton:pet2006" name="clayton:pet2006">[7]</a></dt><dd> -Richard Clayton, Steven J. Murdoch, and Robert N. M. Watson. - Ignoring the great firewall of china. - In <em>Proceedings of the Sixth Workshop on Privacy Enhancing - Technologies (PET 2006)</em>, Cambridge, UK, June 2006. Springer. - <a href="http://www.cl.cam.ac.uk/~rnc1/ignoring.pdf"><tt>http://www.cl.cam.ac.uk/~rnc1/ignoring.pdf</tt></a>. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEdanezis:pet2004" name="danezis:pet2004">[8]</a></dt><dd> -George Danezis. - The traffic analysis of continuous-time mixes. - In David Martin and Andrei Serjantov, editors, <em>Privacy Enhancing - Technologies (PET 2004)</em>, LNCS, May 2004. - <a href="http://www.cl.cam.ac.uk/users/gd216/cmm2.pdf"><tt>http://www.cl.cam.ac.uk/users/gd216/cmm2.pdf</tt></a>. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEusability:weis2006" name="usability:weis2006">[9]</a></dt><dd> -Roger Dingledine and Nick Mathewson. - Anonymity loves company: Usability and the network effect. - In <em>Proceedings of the Fifth Workshop on the Economics of - Information Security (WEIS 2006)</em>, Cambridge, UK, June 2006. - <a href="http://freehaven.net/doc/wupss04/usability.pdf"><tt>http://freehaven.net/doc/wupss04/usability.pdf</tt></a>. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITErep-anon" name="rep-anon">[10]</a></dt><dd> -Roger Dingledine, Nick Mathewson, and Paul Syverson. - Reputation in P2P Anonymity Systems. - In <em>Proceedings of Workshop on Economics of Peer-to-Peer - Systems</em>, June 2003. - <a href="http://freehaven.net/doc/econp2p03/econp2p03.pdf"><tt>http://freehaven.net/doc/econp2p03/econp2p03.pdf</tt></a>. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEtor-design" name="tor-design">[11]</a></dt><dd> -Roger Dingledine, Nick Mathewson, and Paul Syverson. - Tor: The second-generation onion router. - In <em>Proceedings of the 13th USENIX Security Symposium</em>, August - 2004. - <a href="http://tor.eff.org/tor-design.pdf"><tt>http://tor.eff.org/tor-design.pdf</tt></a>. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEcasc-rep" name="casc-rep">[12]</a></dt><dd> -Roger Dingledine and Paul Syverson. - Reliable MIX Cascade Networks through Reputation. - In Matt Blaze, editor, <em>Financial Cryptography</em>. Springer-Verlag, - LNCS 2357, 2002. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEpsiphon" name="psiphon">[13]</a></dt><dd> -Ronald Deibert et al. - Psiphon. - <a href="http://psiphon.civisec.org/"><tt>http://psiphon.civisec.org/</tt></a>. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEinfranet" name="infranet">[14]</a></dt><dd> -Nick Feamster, Magdalena Balazinska, Greg Harfst, Hari Balakrishnan, and David - Karger. - Infranet: Circumventing web censorship and surveillance. - In <em>Proceedings of the 11th USENIX Security Symposium</em>, August - 2002. - <a href="http://nms.lcs.mit.edu/~feamster/papers/usenixsec2002.pdf"><tt>http://nms.lcs.mit.edu/~feamster/papers/usenixsec2002.pdf</tt></a>. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEactive-wardens" name="active-wardens">[15]</a></dt><dd> -Gina Fisk, Mike Fisk, Christos Papadopoulos, and Joshua Neil. - Eliminating steganography in internet traffic with active wardens. - In Fabien Petitcolas, editor, <em>Information Hiding Workshop (IH - 2002)</em>. Springer-Verlag, LNCS 2578, October 2002. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEblossom-thesis" name="blossom-thesis">[16]</a></dt><dd> -Geoffrey Goodell. - <em>Perspective Access Networks</em>. - PhD thesis, Harvard University, July 2006. - <a href="http://afs.eecs.harvard.edu/~goodell/thesis.pdf"><tt>http://afs.eecs.harvard.edu/~goodell/thesis.pdf</tt></a>. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEgoodell-syverson06" name="goodell-syverson06">[17]</a></dt><dd> -Geoffrey Goodell and Paul Syverson. - The right place at the right time: The use of network location in - authentication and abuse prevention, 2006. - Submitted. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEcircumventor" name="circumventor">[18]</a></dt><dd> -Bennett Haselton. - How to install the Circumventor program. - - <a href="http://www.peacefire.org/circumventor/simple-circumventor-instructions.html"><tt>http://www.peacefire.org/circumventor/simple-circumventor-instructions.html</tt></a>. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEip-to-country" name="ip-to-country">[19]</a></dt><dd> -Ip-to-country database. - <a href="http://ip-to-country.webhosting.info/"><tt>http://ip-to-country.webhosting.info/</tt></a>. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEkoepsell:wpes2004" name="koepsell:wpes2004">[20]</a></dt><dd> -Stefan Köpsell and Ulf Hilling. - How to achieve blocking resistance for existing systems enabling - anonymous web surfing. - In <em>Proceedings of the Workshop on Privacy in the Electronic - Society (WPES 2004)</em>, Washington, DC, USA, October 2004. - <a href="http://freehaven.net/anonbib/papers/p103-koepsell.pdf"><tt>http://freehaven.net/anonbib/papers/p103-koepsell.pdf</tt></a>. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEdefensive-dropping" name="defensive-dropping">[21]</a></dt><dd> -Brian N. Levine, Michael K. Reiter, Chenxi Wang, and Matthew Wright. - Timing analysis in low-latency mix-based systems. - In Ari Juels, editor, <em>Financial Cryptography</em>. Springer-Verlag, - LNCS (forthcoming), 2004. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEmackinnon-personal" name="mackinnon-personal">[22]</a></dt><dd> -Rebecca MacKinnon. - Private communication, 2006. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEcgiproxy" name="cgiproxy">[23]</a></dt><dd> -James Marshall. - CGIProxy: HTTP/FTP Proxy in a CGI Script. - <a href="http://www.jmarshall.com/tools/cgiproxy/"><tt>http://www.jmarshall.com/tools/cgiproxy/</tt></a>. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEe2e-traffic" name="e2e-traffic">[24]</a></dt><dd> -Nick Mathewson and Roger Dingledine. - Practical traffic analysis: Extending and resisting statistical - disclosure. - In David Martin and Andrei Serjantov, editors, <em>Privacy Enhancing - Technologies (PET 2004)</em>, LNCS, May 2004. - <a href="http://freehaven.net/doc/e2e-traffic/e2e-traffic.pdf"><tt>http://freehaven.net/doc/e2e-traffic/e2e-traffic.pdf</tt></a>. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEattack-tor-oak05" name="attack-tor-oak05">[25]</a></dt><dd> -Steven J. Murdoch and George Danezis. - Low-cost traffic analysis of tor. - In <em>IEEE Symposium on Security and Privacy</em>. IEEE CS, May 2005. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEtcpstego" name="tcpstego">[26]</a></dt><dd> -Steven J. Murdoch and Stephen Lewis. - Embedding covert channels into TCP/IP. - In Mauro Barni, Jordi Herrera-Joancomartí, Stefan Katzenbeisser, - and Fernando Pérez-González, editors, <em>Information Hiding: 7th - International Workshop</em>, volume 3727 of <em>LNCS</em>, pages 247-261, - Barcelona, Catalonia (Spain), June 2005. Springer-Verlag. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEptacek98insertion" name="ptacek98insertion">[27]</a></dt><dd> -Thomas H. Ptacek and Timothy N. Newsham. - Insertion, evasion, and denial of service: Eluding network intrusion - detection. - Technical report, Secure Networks, Inc., Suite 330, 1201 5th Street - S.W, Calgary, Alberta, Canada, T2R-0Y6, 1998. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEzuckerman-threatmodels" name="zuckerman-threatmodels">[28]</a></dt><dd> -Ethan Zuckerman. - We've got to adjust some of our threat models. - <a href="http://www.ethanzuckerman.com/blog/?p=1019"><tt>http://www.ethanzuckerman.com/blog/?p=1019</tt></a>.</dd> -</dl> - - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> -<hr /><h3>Footnotes:</h3> - -<div class="p"><!----></div> -<a name="tthFtNtAAB"></a><a href="#tthFrefAAB"><sup>1</sup></a>So far in places - like China, the authorities mainly go after people who publish materials - and coordinate organized movements [<a href="#mackinnon-personal" name="CITEmackinnon-personal">22</a>]. - If they find that a - user happens to be reading a site that should be blocked, the typical - response is simply to block the site. Of course, even with an encrypted - connection, the adversary may be able to distinguish readers from - publishers by observing whether Alice is mostly downloading bytes or mostly - uploading them — we discuss this issue more in - Section <a href="#subsec:upload-padding">8.2</a>. -<div class="p"><!----></div> -<a name="tthFtNtAAC"></a><a href="#tthFrefAAC"><sup>2</sup></a><a href="http://wiki.noreply.org/noreply/TheOnionRouter/TorFAQ\#EntryGuards"><tt>http://wiki.noreply.org/noreply/TheOnionRouter/TorFAQ#EntryGuards</tt></a> -<div class="p"><!----></div> -<a name="tthFtNtAAD"></a><a href="#tthFrefAAD"><sup>3</sup></a><a href="http://vidalia-project.net/"><tt>http://vidalia-project.net/</tt></a> -<br /><br /><hr /><small>File translated from -T<sub><font size="-1">E</font></sub>X -by <a href="http://hutchinson.belmont.ma.us/tth/"> -T<sub><font size="-1">T</font></sub>H</a>, -version 3.77.<br />On 11 May 2007, 21:49.</small> -</html> - diff --git a/doc/design-paper/blocking.pdf b/doc/design-paper/blocking.pdf Binary files differdeleted file mode 100644 index 1ee0eb0bbd..0000000000 --- a/doc/design-paper/blocking.pdf +++ /dev/null diff --git a/doc/design-paper/blocking.tex b/doc/design-paper/blocking.tex deleted file mode 100644 index 3b7d05ca57..0000000000 --- a/doc/design-paper/blocking.tex +++ /dev/null @@ -1,1894 +0,0 @@ -%\documentclass{llncs} -\documentclass{usenixsubmit} -%\documentclass[twocolumn]{article} -%usepackage{usenix} - -\usepackage{url} -\usepackage{amsmath} -\usepackage{epsfig} - -\setlength{\textwidth}{6.0in} -\setlength{\textheight}{8.5in} -\setlength{\topmargin}{.5cm} -\setlength{\oddsidemargin}{1cm} -\setlength{\evensidemargin}{1cm} - -\newenvironment{tightlist}{\begin{list}{$\bullet$}{ - \setlength{\itemsep}{0mm} - \setlength{\parsep}{0mm} - % \setlength{\labelsep}{0mm} - % \setlength{\labelwidth}{0mm} - % \setlength{\topsep}{0mm} - }}{\end{list}} - -\newcommand{\workingnote}[1]{} % The version that hides the note. -%\newcommand{\workingnote}[1]{(**#1)} % makes the note visible. - -\date{} -\title{Design of a blocking-resistant anonymity system\\DRAFT} - -%\author{Roger Dingledine\inst{1} \and Nick Mathewson\inst{1}} -\author{Roger Dingledine \\ The Tor Project \\ arma@torproject.org \and -Nick Mathewson \\ The Tor Project \\ nickm@torproject.org} - -\begin{document} -\maketitle -\pagestyle{plain} - -\begin{abstract} - -Internet censorship is on the rise as websites around the world are -increasingly blocked by government-level firewalls. Although popular -anonymizing networks like Tor were originally designed to keep attackers from -tracing people's activities, many people are also using them to evade local -censorship. But if the censor simply denies access to the Tor network -itself, blocked users can no longer benefit from the security Tor offers. - -Here we describe a design that builds upon the current Tor network -to provide an anonymizing network that resists blocking -by government-level attackers. We have implemented and deployed this -design, and talk briefly about early use. - -\end{abstract} - -\section{Introduction} - -Anonymizing networks like Tor~\cite{tor-design} bounce traffic around a -network of encrypting relays. Unlike encryption, which hides only {\it what} -is said, these networks also aim to hide who is communicating with whom, which -users are using which websites, and so on. These systems have a -broad range of users, including ordinary citizens who want to avoid being -profiled for targeted advertisements, corporations who don't want to reveal -information to their competitors, and law enforcement and government -intelligence agencies who need to do operations on the Internet without being -noticed. - -Historical anonymity research has focused on an -attacker who monitors the user (call her Alice) and tries to discover her -activities, yet lets her reach any piece of the network. In more modern -threat models such as Tor's, the adversary is allowed to perform active -attacks such as modifying communications to trick Alice -into revealing her destination, or intercepting some connections -to run a man-in-the-middle attack. But these systems still assume that -Alice can eventually reach the anonymizing network. - -An increasing number of users are using the Tor software -less for its anonymity properties than for its censorship -resistance properties---if they use Tor to access Internet sites like -Wikipedia -and Blogspot, they are no longer affected by local censorship -and firewall rules. In fact, an informal user study -%(described in Appendix~\ref{app:geoip}) -showed that a few hundred thousand users people access the Tor network -each day, with about 20\% of them coming from China~\cite{something}. - -The current Tor design is easy to block if the attacker controls Alice's -connection to the Tor network---by blocking the directory authorities, -by blocking all the relay IP addresses in the directory, or by filtering -based on the network fingerprint of the Tor TLS handshake. Here we -describe an -extended design that builds upon the current Tor network to provide an -anonymizing -network that resists censorship as well as anonymity-breaking attacks. -In section~\ref{sec:adversary} we discuss our threat model---that is, -the assumptions we make about our adversary. Section~\ref{sec:current-tor} -describes the components of the current Tor design and how they can be -leveraged for a new blocking-resistant design. Section~\ref{sec:related} -explains the features and drawbacks of the currently deployed solutions. -In sections~\ref{sec:bridges} through~\ref{sec:discovery}, we explore the -components of our designs in detail. Section~\ref{sec:security} considers -security implications and Section~\ref{sec:reachability} presents other -issues with maintaining connectivity and sustainability for the design. -%Section~\ref{sec:future} speculates about future more complex designs, -Finally section~\ref{sec:conclusion} summarizes our next steps and -recommendations. - -% The other motivation is for places where we're concerned they will -% try to enumerate a list of Tor users. So even if they're not blocking -% the Tor network, it may be smart to not be visible as connecting to it. - -%And adding more different classes of users and goals to the Tor network -%improves the anonymity for all Tor users~\cite{econymics,usability:weis2006}. - -% Adding use classes for countering blocking as well as anonymity has -% benefits too. Should add something about how providing undetected -% access to Tor would facilitate people talking to, e.g., govt. authorities -% about threats to public safety etc. in an environment where Tor use -% is not otherwise widespread and would make one stand out. - -\section{Adversary assumptions} -\label{sec:adversary} - -To design an effective anti-censorship tool, we need a good model for the -goals and resources of the censors we are evading. Otherwise, we risk -spending our effort on keeping the adversaries from doing things they have no -interest in doing, and thwarting techniques they do not use. -The history of blocking-resistance designs is littered with conflicting -assumptions about what adversaries to expect and what problems are -in the critical path to a solution. Here we describe our best -understanding of the current situation around the world. - -In the traditional security style, we aim to defeat a strong -attacker---if we can defend against this attacker, we inherit protection -against weaker attackers as well. After all, we want a general design -that will work for citizens of China, Thailand, and other censored -countries; for -whistleblowers in firewalled corporate networks; and for people in -unanticipated oppressive situations. In fact, by designing with -a variety of adversaries in mind, we can take advantage of the fact that -adversaries will be in different stages of the arms race at each location, -so an address blocked in one locale can still be useful in others. -We focus on an attacker with somewhat complex goals: - -\begin{tightlist} -\item The attacker would like to restrict the flow of certain kinds of - information, particularly when this information is seen as embarrassing to - those in power (such as information about rights violations or corruption), - or when it enables or encourages others to oppose them effectively (such as - information about opposition movements or sites that are used to organize - protests). -\item As a second-order effect, censors aim to chill citizens' behavior by - creating an impression that their online activities are monitored. -\item In some cases, censors make a token attempt to block a few sites for - obscenity, blasphemy, and so on, but their efforts here are mainly for - show. In other cases, they really do try hard to block such content. -\item Complete blocking (where nobody at all can ever download censored - content) is not a - goal. Attackers typically recognize that perfect censorship is not only - impossible, it is unnecessary: if ``undesirable'' information is known only - to a small few, further censoring efforts can be focused elsewhere. -\item Similarly, the censors do not attempt to shut down or block {\it - every} anti-censorship tool---merely the tools that are popular and - effective (because these tools impede the censors' information restriction - goals) and those tools that are highly visible (thus making the censors - look ineffectual to their citizens and their bosses). -\item Reprisal against {\it most} passive consumers of {\it most} kinds of - blocked information is also not a goal, given the broadness of most - censorship regimes. This seems borne out by fact.\footnote{So far in places - like China, the authorities mainly go after people who publish materials - and coordinate organized movements~\cite{mackinnon-personal}. - If they find that a - user happens to be reading a site that should be blocked, the typical - response is simply to block the site. Of course, even with an encrypted - connection, the adversary may be able to distinguish readers from - publishers by observing whether Alice is mostly downloading bytes or mostly - uploading them---we discuss this issue more in - Section~\ref{subsec:upload-padding}.} -\item Producers and distributors of targeted information are in much - greater danger than consumers; the attacker would like to not only block - their work, but identify them for reprisal. -\item The censors (or their governments) would like to have a working, useful - Internet. There are economic, political, and social factors that prevent - them from ``censoring'' the Internet by outlawing it entirely, or by - blocking access to all but a tiny list of sites. - Nevertheless, the censors {\it are} willing to block innocuous content - (like the bulk of a newspaper's reporting) in order to censor other content - distributed through the same channels (like that newspaper's coverage of - the censored country). -\end{tightlist} - -We assume there are three main technical network attacks in use by censors -currently~\cite{clayton:pet2006}: - -\begin{tightlist} -\item Block a destination or type of traffic by automatically searching for - certain strings or patterns in TCP packets. Offending packets can be - dropped, or can trigger a response like closing the - connection. -\item Block certain IP addresses or destination ports at a - firewall or other routing control point. -\item Intercept DNS requests and give bogus responses for certain - destination hostnames. -\end{tightlist} - -We assume the network firewall has limited CPU and memory per -connection~\cite{clayton:pet2006}. Against an adversary who could carefully -examine the contents of every packet and correlate the packets in every -stream on the network, we would need some stronger mechanism such as -steganography, which introduces its own -problems~\cite{active-wardens,tcpstego}. But we make a ``weak -steganography'' assumption here: to remain unblocked, it is necessary to -remain unobservable only by computational resources on par with a modern -router, firewall, proxy, or IDS. - -We assume that while various different regimes can coordinate and share -notes, there will be a time lag between one attacker learning how to overcome -a facet of our design and other attackers picking it up. (The most common -vector of transmission seems to be commercial providers of censorship tools: -once a provider adds a feature to meet one country's needs or requests, the -feature is available to all of the provider's customers.) Conversely, we -assume that insider attacks become a higher risk only after the early stages -of network development, once the system has reached a certain level of -success and visibility. - -We do not assume that government-level attackers are always uniform -across the country. For example, users of different ISPs in China -experience different censorship policies and mechanisms~\cite{china-ccs07}. -%there is no single centralized place in China -%that coordinates its specific censorship decisions and steps. - -We assume that the attacker may be able to use political and economic -resources to secure the cooperation of extraterritorial or multinational -corporations and entities in investigating information sources. -For example, the censors can threaten the service providers of -troublesome blogs with economic reprisals if they do not reveal the -authors' identities. - -We assume that our users have control over their hardware and -software---they don't have any spyware installed, there are no -cameras watching their screens, etc. Unfortunately, in many situations -these threats are real~\cite{zuckerman-threatmodels}; yet -software-based security systems like ours are poorly equipped to handle -a user who is entirely observed and controlled by the adversary. See -Section~\ref{subsec:cafes-and-livecds} for more discussion of what little -we can do about this issue. - -Similarly, we assume that the user will be able to fetch a genuine -version of Tor, rather than one supplied by the adversary; see -Section~\ref{subsec:trust-chain} for discussion on helping the user -confirm that he has a genuine version and that he can connect to the -real Tor network. - -\section{Adapting the current Tor design to anti-censorship} -\label{sec:current-tor} - -Tor is popular and sees a lot of use---it's the largest anonymity -network of its kind, and has -attracted more than 1500 volunteer-operated routers from around the -world. Tor protects each user by routing their traffic through a multiply -encrypted ``circuit'' built of a few randomly selected relay, each of which -can remove only a single layer of encryption. Each relay sees only the step -before it and the step after it in the circuit, and so no single relay can -learn the connection between a user and her chosen communication partners. -In this section, we examine some of the reasons why Tor has become popular, -with particular emphasis to how we can take advantage of these properties -for a blocking-resistance design. - -Tor aims to provide three security properties: -\begin{tightlist} -\item 1. A local network attacker can't learn, or influence, your -destination. -\item 2. No single router in the Tor network can link you to your -destination. -\item 3. The destination, or somebody watching the destination, -can't learn your location. -\end{tightlist} - -For blocking-resistance, we care most clearly about the first -property. But as the arms race progresses, the second property -will become important---for example, to discourage an adversary -from volunteering a relay in order to learn that Alice is reading -or posting to certain websites. The third property helps keep users safe from -collaborating websites: consider websites and other Internet services -that have been pressured -recently into revealing the identity of bloggers -%~\cite{arrested-bloggers} -or treating clients differently depending on their network -location~\cite{netauth}. -%~\cite{google-geolocation}. - -The Tor design provides other features as well that are not typically -present in manual or ad hoc circumvention techniques. - -First, Tor has a well-analyzed and well-understood way to distribute -information about relay. -Tor directory authorities automatically aggregate, test, -and publish signed summaries of the available Tor routers. Tor clients -can fetch these summaries to learn which routers are available and -which routers are suitable for their needs. Directory information is cached -throughout the Tor network, so once clients have bootstrapped they never -need to interact with the authorities directly. (To tolerate a minority -of compromised directory authorities, we use a threshold trust scheme--- -see Section~\ref{subsec:trust-chain} for details.) - -Second, the list of directory authorities is not hard-wired. -Clients use the default authorities if no others are specified, -but it's easy to start a separate (or even overlapping) Tor network just -by running a different set of authorities and convincing users to prefer -a modified client. For example, we could launch a distinct Tor network -inside China; some users could even use an aggregate network made up of -both the main network and the China network. (But we should not be too -quick to create other Tor networks---part of Tor's anonymity comes from -users behaving like other users, and there are many unsolved anonymity -questions if different users know about different pieces of the network.) - -Third, in addition to automatically learning from the chosen directories -which Tor routers are available and working, Tor takes care of building -paths through the network and rebuilding them as needed. So the user -never has to know how paths are chosen, never has to manually pick -working proxies, and so on. More generally, at its core the Tor protocol -is simply a tool that can build paths given a set of routers. Tor is -quite flexible about how it learns about the routers and how it chooses -the paths. Harvard's Blossom project~\cite{blossom-thesis} makes this -flexibility more concrete: Blossom makes use of Tor not for its security -properties but for its reachability properties. It runs a separate set -of directory authorities, its own set of Tor routers (called the Blossom -network), and uses Tor's flexible path-building to let users view Internet -resources from any point in the Blossom network. - -Fourth, Tor separates the role of \emph{internal relay} from the -role of \emph{exit relay}. That is, some volunteers choose just to relay -traffic between Tor users and Tor routers, and others choose to also allow -connections to external Internet resources. Because we don't force all -volunteers to play both roles, we end up with more relays. This increased -diversity in turn is what gives Tor its security: the more options the -user has for her first hop, and the more options she has for her last hop, -the less likely it is that a given attacker will be watching both ends -of her circuit~\cite{tor-design}. As a bonus, because our design attracts -more internal relays that want to help out but don't want to deal with -being an exit relay, we end up providing more options for the first -hop---the one most critical to being able to reach the Tor network. - -Fifth, Tor is sustainable. Zero-Knowledge Systems offered the commercial -but now defunct Freedom Network~\cite{freedom21-security}, a design with -security comparable to Tor's, but its funding model relied on collecting -money from users to pay relay operators. Modern commercial proxy systems -similarly -need to keep collecting money to support their infrastructure. On the -other hand, Tor has built a self-sustaining community of volunteers who -donate their time and resources. This community trust is rooted in Tor's -open design: we tell the world exactly how Tor works, and we provide all -the source code. Users can decide for themselves, or pay any security -expert to decide, whether it is safe to use. Further, Tor's modularity -as described above, along with its open license, mean that its impact -will continue to grow. - -Sixth, Tor has an established user base of hundreds of -thousands of people from around the world. This diversity of -users contributes to sustainability as above: Tor is used by -ordinary citizens, activists, corporations, law enforcement, and -even government and military users, -%\footnote{\url{https://www.torproject.org/overview}} -and they can -only achieve their security goals by blending together in the same -network~\cite{econymics,usability:weis2006}. This user base also provides -something else: hundreds of thousands of different and often-changing -addresses that we can leverage for our blocking-resistance design. - -Finally and perhaps most importantly, Tor provides anonymity and prevents any -single relay from linking users to their communication partners. Despite -initial appearances, {\it distributed-trust anonymity is critical for -anti-censorship efforts}. If any single relay can expose dissident bloggers -or compile a list of users' behavior, the censors can profitably compromise -that relay's operator, perhaps by applying economic pressure to their -employers, -breaking into their computer, pressuring their family (if they have relatives -in the censored area), or so on. Furthermore, in designs where any relay can -expose its users, the censors can spread suspicion that they are running some -of the relays and use this belief to chill use of the network. - -We discuss and adapt these components further in -Section~\ref{sec:bridges}. But first we examine the strengths and -weaknesses of other blocking-resistance approaches, so we can expand -our repertoire of building blocks and ideas. - -\section{Current proxy solutions} -\label{sec:related} - -Relay-based blocking-resistance schemes generally have two main -components: a relay component and a discovery component. The relay part -encompasses the process of establishing a connection, sending traffic -back and forth, and so on---everything that's done once the user knows -where she's going to connect. Discovery is the step before that: the -process of finding one or more usable relays. - -For example, we can divide the pieces of Tor in the previous section -into the process of building paths and sending -traffic over them (relay) and the process of learning from the directory -authorities about what routers are available (discovery). With this -distinction -in mind, we now examine several categories of relay-based schemes. - -\subsection{Centrally-controlled shared proxies} - -Existing commercial anonymity solutions (like Anonymizer.com) are based -on a set of single-hop proxies. In these systems, each user connects to -a single proxy, which then relays traffic between the user and her -destination. These public proxy -systems are typically characterized by two features: they control and -operate the proxies centrally, and many different users get assigned -to each proxy. - -In terms of the relay component, single proxies provide weak security -compared to systems that distribute trust over multiple relays, since a -compromised proxy can trivially observe all of its users' actions, and -an eavesdropper only needs to watch a single proxy to perform timing -correlation attacks against all its users' traffic and thus learn where -everyone is connecting. Worse, all users -need to trust the proxy company to have good security itself as well as -to not reveal user activities. - -On the other hand, single-hop proxies are easier to deploy, and they -can provide better performance than distributed-trust designs like Tor, -since traffic only goes through one relay. They're also more convenient -from the user's perspective---since users entirely trust the proxy, -they can just use their web browser directly. - -Whether public proxy schemes are more or less scalable than Tor is -still up for debate: commercial anonymity systems can use some of their -revenue to provision more bandwidth as they grow, whereas volunteer-based -anonymity systems can attract thousands of fast relays to spread the load. - -The discovery piece can take several forms. Most commercial anonymous -proxies have one or a handful of commonly known websites, and their users -log in to those websites and relay their traffic through them. When -these websites get blocked (generally soon after the company becomes -popular), if the company cares about users in the blocked areas, they -start renting lots of disparate IP addresses and rotating through them -as they get blocked. They notify their users of new addresses (by email, -for example). It's an arms race, since attackers can sign up to receive the -email too, but operators have one nice trick available to them: because they -have a list of paying subscribers, they can notify certain subscribers -about updates earlier than others. - -Access control systems on the proxy let them provide service only to -users with certain characteristics, such as paying customers or people -from certain IP address ranges. - -Discovery in the face of a government-level firewall is a complex and -unsolved -topic, and we're stuck in this same arms race ourselves; we explore it -in more detail in Section~\ref{sec:discovery}. But first we examine the -other end of the spectrum---getting volunteers to run the proxies, -and telling only a few people about each proxy. - -\subsection{Independent personal proxies} - -Personal proxies such as Circumventor~\cite{circumventor} and -CGIProxy~\cite{cgiproxy} use the same technology as the public ones as -far as the relay component goes, but they use a different strategy for -discovery. Rather than managing a few centralized proxies and constantly -getting new addresses for them as the old addresses are blocked, they -aim to have a large number of entirely independent proxies, each managing -its own (much smaller) set of users. - -As the Circumventor site explains, ``You don't -actually install the Circumventor \emph{on} the computer that is blocked -from accessing Web sites. You, or a friend of yours, has to install the -Circumventor on some \emph{other} machine which is not censored.'' - -This tactic has great advantages in terms of blocking-resistance---recall -our assumption in Section~\ref{sec:adversary} that the attention -a system attracts from the attacker is proportional to its number of -users and level of publicity. If each proxy only has a few users, and -there is no central list of proxies, most of them will never get noticed by -the censors. - -On the other hand, there's a huge scalability question that so far has -prevented these schemes from being widely useful: how does the fellow -in China find a person in Ohio who will run a Circumventor for him? In -some cases he may know and trust some people on the outside, but in many -cases he's just out of luck. Just as hard, how does a new volunteer in -Ohio find a person in China who needs it? - -% another key feature of a proxy run by your uncle is that you -% self-censor, so you're unlikely to bring abuse complaints onto -% your uncle. self-censoring clearly has a downside too, though. - -This challenge leads to a hybrid design---centrally-distributed -personal proxies---which we will investigate in more detail in -Section~\ref{sec:discovery}. - -\subsection{Open proxies} - -Yet another currently used approach to bypassing firewalls is to locate -open and misconfigured proxies on the Internet. A quick Google search -for ``open proxy list'' yields a wide variety of freely available lists -of HTTP, HTTPS, and SOCKS proxies. Many small companies have sprung up -providing more refined lists to paying customers. - -There are some downsides to using these open proxies though. First, -the proxies are of widely varying quality in terms of bandwidth and -stability, and many of them are entirely unreachable. Second, unlike -networks of volunteers like Tor, the legality of routing traffic through -these proxies is questionable: it's widely believed that most of them -don't realize what they're offering, and probably wouldn't allow it if -they realized. Third, in many cases the connection to the proxy is -unencrypted, so firewalls that filter based on keywords in IP packets -will not be hindered. Fourth, in many countries (including China), the -firewall authorities hunt for open proxies as well, to preemptively -block them. And last, many users are suspicious that some -open proxies are a little \emph{too} convenient: are they run by the -adversary, in which case they get to monitor all the user's requests -just as single-hop proxies can? - -A distributed-trust design like Tor resolves each of these issues for -the relay component, but a constantly changing set of thousands of open -relays is clearly a useful idea for a discovery component. For example, -users might be able to make use of these proxies to bootstrap their -first introduction into the Tor network. - -\subsection{Blocking resistance and JAP} - -K\"{o}psell and Hilling's Blocking Resistance -design~\cite{koepsell:wpes2004} is probably -the closest related work, and is the starting point for the design in this -paper. In this design, the JAP anonymity system~\cite{web-mix} is used -as a base instead of Tor. Volunteers operate a large number of access -points that relay traffic to the core JAP -network, which in turn anonymizes users' traffic. The software to run these -relays is, as in our design, included in the JAP client software and enabled -only when the user decides to enable it. Discovery is handled with a -CAPTCHA-based mechanism; users prove that they aren't an automated process, -and are given the address of an access point. (The problem of a determined -attacker with enough manpower to launch many requests and enumerate all the -access points is not considered in depth.) There is also some suggestion -that information about access points could spread through existing social -networks. - -\subsection{Infranet} - -The Infranet design~\cite{infranet} uses one-hop relays to deliver web -content, but disguises its communications as ordinary HTTP traffic. Requests -are split into multiple requests for URLs on the relay, which then encodes -its responses in the content it returns. The relay needs to be an actual -website with plausible content and a number of URLs which the user might want -to access---if the Infranet software produced its own cover content, it would -be far easier for censors to identify. To keep the censors from noticing -that cover content changes depending on what data is embedded, Infranet needs -the cover content to have an innocuous reason for changing frequently: the -paper recommends watermarked images and webcams. - -The attacker and relay operators in Infranet's threat model are significantly -different than in ours. Unlike our attacker, Infranet's censor can't be -bypassed with encrypted traffic (presumably because the censor blocks -encrypted traffic, or at least considers it suspicious), and has more -computational resources to devote to each connection than ours (so it can -notice subtle patterns over time). Unlike our bridge operators, Infranet's -operators (and users) have more bandwidth to spare; the overhead in typical -steganography schemes is far higher than Tor's. - -The Infranet design does not include a discovery element. Discovery, -however, is a critical point: if whatever mechanism allows users to learn -about relays also allows the censor to do so, he can trivially discover and -block their addresses, even if the steganography would prevent mere traffic -observation from revealing the relays' addresses. - -\subsection{RST-evasion and other packet-level tricks} - -In their analysis of China's firewall's content-based blocking, Clayton, -Murdoch and Watson discovered that rather than blocking all packets in a TCP -streams once a forbidden word was noticed, the firewall was simply forging -RST packets to make the communicating parties believe that the connection was -closed~\cite{clayton:pet2006}. They proposed altering operating systems -to ignore forged RST packets. This approach might work in some cases, but -in practice it appears that many firewalls start filtering by IP address -once a sufficient number of RST packets have been sent. - -Other packet-level responses to filtering include splitting -sensitive words across multiple TCP packets, so that the censors' -firewalls can't notice them without performing expensive stream -reconstruction~\cite{ptacek98insertion}. This technique relies on the -same insight as our weak steganography assumption. - -%\subsection{Internal caching networks} - -%Freenet~\cite{freenet-pets00} is an anonymous peer-to-peer data store. -%Analyzing Freenet's security can be difficult, as its design is in flux as -%new discovery and routing mechanisms are proposed, and no complete -%specification has (to our knowledge) been written. Freenet servers relay -%requests for specific content (indexed by a digest of the content) -%``toward'' the server that hosts it, and then cache the content as it -%follows the same path back to -%the requesting user. If Freenet's routing mechanism is successful in -%allowing nodes to learn about each other and route correctly even as some -%node-to-node links are blocked by firewalls, then users inside censored areas -%can ask a local Freenet server for a piece of content, and get an answer -%without having to connect out of the country at all. Of course, operators of -%servers inside the censored area can still be targeted, and the addresses of -%external servers can still be blocked. - -%\subsection{Skype} - -%The popular Skype voice-over-IP software uses multiple techniques to tolerate -%restrictive networks, some of which allow it to continue operating in the -%presence of censorship. By switching ports and using encryption, Skype -%attempts to resist trivial blocking and content filtering. Even if no -%encryption were used, it would still be expensive to scan all voice -%traffic for sensitive words. Also, most current keyloggers are unable to -%store voice traffic. Nevertheless, Skype can still be blocked, especially at -%its central login server. - -%*sjmurdoch* "we consider the login server to be the only central component in -%the Skype p2p network." -%*sjmurdoch* http://www1.cs.columbia.edu/~salman/publications/skype1_4.pdf -%-> *sjmurdoch* ok. what is the login server's role? -%-> *sjmurdoch* and do you need to reach it directly to use skype? -%*sjmurdoch* It checks the username and password -%*sjmurdoch* It is necessary in the current implementation, but I don't know if -%it is a fundemental limitation of the architecture - -\subsection{Tor itself} - -And last, we include Tor itself in the list of current solutions -to firewalls. Tens of thousands of people use Tor from countries that -routinely filter their Internet. Tor's website has been blocked in most -of them. But why hasn't the Tor network been blocked yet? - -We have several theories. The first is the most straightforward: tens of -thousands of people are simply too few to matter. It may help that Tor is -perceived to be for experts only, and thus not worth attention yet. The -more subtle variant on this theory is that we've positioned Tor in the -public eye as a tool for retaining civil liberties in more free countries, -so perhaps blocking authorities don't view it as a threat. (We revisit -this idea when we consider whether and how to publicize a Tor variant -that improves blocking-resistance---see Section~\ref{subsec:publicity} -for more discussion.) - -The broader explanation is that the maintenance of most government-level -filters is aimed at stopping widespread information flow and appearing to be -in control, not by the impossible goal of blocking all possible ways to bypass -censorship. Censors realize that there will always -be ways for a few people to get around the firewall, and as long as Tor -has not publically threatened their control, they see no urgent need to -block it yet. - -We should recognize that we're \emph{already} in the arms race. These -constraints can give us insight into the priorities and capabilities of -our various attackers. - -\section{The relay component of our blocking-resistant design} -\label{sec:bridges} - -Section~\ref{sec:current-tor} describes many reasons why Tor is -well-suited as a building block in our context, but several changes will -allow the design to resist blocking better. The most critical changes are -to get more relay addresses, and to distribute them to users differently. - -%We need to address three problems: -%- adapting the relay component of Tor so it resists blocking better. -%- Discovery. -%- Tor's network fingerprint. - -%Here we describe the new pieces we need to add to the current Tor design. - -\subsection{Bridge relays} - -Today, Tor relays operate on a few thousand distinct IP addresses; -an adversary -could enumerate and block them all with little trouble. To provide a -means of ingress to the network, we need a larger set of entry points, most -of which an adversary won't be able to enumerate easily. Fortunately, we -have such a set: the Tor users. - -Hundreds of thousands of people around the world use Tor. We can leverage -our already self-selected user base to produce a list of thousands of -frequently-changing IP addresses. Specifically, we can give them a little -button in the GUI that says ``Tor for Freedom'', and users who click -the button will turn into \emph{bridge relays} (or just \emph{bridges} -for short). They can rate limit relayed connections to 10 KB/s (almost -nothing for a broadband user in a free country, but plenty for a user -who otherwise has no access at all), and since they are just relaying -bytes back and forth between blocked users and the main Tor network, they -won't need to make any external connections to Internet sites. Because -of this separation of roles, and because we're making use of software -that the volunteers have already installed for their own use, we expect -our scheme to attract and maintain more volunteers than previous schemes. - -As usual, there are new anonymity and security implications from running a -bridge relay, particularly from letting people relay traffic through your -Tor client; but we leave this discussion for Section~\ref{sec:security}. - -%...need to outline instructions for a Tor config that will publish -%to an alternate directory authority, and for controller commands -%that will do this cleanly. - -\subsection{The bridge directory authority} - -How do the bridge relays advertise their existence to the world? We -introduce a second new component of the design: a specialized directory -authority that aggregates and tracks bridges. Bridge relays periodically -publish relay descriptors (summaries of their keys, locations, etc, -signed by their long-term identity key), just like the relays in the -``main'' Tor network, but in this case they publish them only to the -bridge directory authorities. - -The main difference between bridge authorities and the directory -authorities for the main Tor network is that the main authorities provide -a list of every known relay, but the bridge authorities only give -out a relay descriptor if you already know its identity key. That is, -you can keep up-to-date on a bridge's location and other information -once you know about it, but you can't just grab a list of all the bridges. - -The identity key, IP address, and directory port for each bridge -authority ship by default with the Tor software, so the bridge relays -can be confident they're publishing to the right location, and the -blocked users can establish an encrypted authenticated channel. See -Section~\ref{subsec:trust-chain} for more discussion of the public key -infrastructure and trust chain. - -Bridges use Tor to publish their descriptors privately and securely, -so even an attacker monitoring the bridge directory authority's network -can't make a list of all the addresses contacting the authority. -Bridges may publish to only a subset of the -authorities, to limit the potential impact of an authority compromise. - - -%\subsection{A simple matter of engineering} -% -%Although we've described bridges and bridge authorities in simple terms -%above, some design modifications and features are needed in the Tor -%codebase to add them. We describe the four main changes here. -% -%Firstly, we need to get smarter about rate limiting: -%Bandwidth classes -% -%Secondly, while users can in fact configure which directory authorities -%they use, we need to add a new type of directory authority and teach -%bridges to fetch directory information from the main authorities while -%publishing relay descriptors to the bridge authorities. We're most of -%the way there, since we can already specify attributes for directory -%authorities: -%add a separate flag named ``blocking''. -% -%Thirdly, need to build paths using bridges as the first -%hop. One more hole in the non-clique assumption. -% -%Lastly, since bridge authorities don't answer full network statuses, -%we need to add a new way for users to learn the current status for a -%single relay or a small set of relays---to answer such questions as -%``is it running?'' or ``is it behaving correctly?'' We describe in -%Section~\ref{subsec:enclave-dirs} a way for the bridge authority to -%publish this information without resorting to signing each answer -%individually. - -\subsection{Putting them together} -\label{subsec:relay-together} - -If a blocked user knows the identity keys of a set of bridge relays, and -he has correct address information for at least one of them, he can use -that one to make a secure connection to the bridge authority and update -his knowledge about the other bridge relays. He can also use it to make -secure connections to the main Tor network and directory authorities, so he -can build circuits and connect to the rest of the Internet. All of these -updates happen in the background: from the blocked user's perspective, -he just accesses the Internet via his Tor client like always. - -So now we've reduced the problem from how to circumvent the firewall -for all transactions (and how to know that the pages you get have not -been modified by the local attacker) to how to learn about a working -bridge relay. - -There's another catch though. We need to make sure that the network -traffic we generate by simply connecting to a bridge relay doesn't stand -out too much. - -%The following section describes ways to bootstrap knowledge of your first -%bridge relay, and ways to maintain connectivity once you know a few -%bridge relays. - -% (See Section~\ref{subsec:first-bridge} for a discussion -%of exactly what information is sufficient to characterize a bridge relay.) - - - -\section{Hiding Tor's network fingerprint} -\label{sec:network-fingerprint} -\label{subsec:enclave-dirs} - -Currently, Tor uses two protocols for its network communications. The -main protocol uses TLS for encrypted and authenticated communication -between Tor instances. The second protocol is standard HTTP, used for -fetching directory information. All Tor relays listen on their ``ORPort'' -for TLS connections, and some of them opt to listen on their ``DirPort'' -as well, to serve directory information. Tor relays choose whatever port -numbers they like; the relay descriptor they publish to the directory -tells users where to connect. - -One format for communicating address information about a bridge relay is -its IP address and DirPort. From there, the user can ask the bridge's -directory cache for an up-to-date copy of its relay descriptor, and -learn its current circuit keys, its ORPort, and so on. - -However, connecting directly to the directory cache involves a plaintext -HTTP request. A censor could create a network fingerprint (known as a -\emph{signature} in the intrusion detection field) for the request -and/or its response, thus preventing these connections. To resolve this -vulnerability, we've modified the Tor protocol so that users can connect -to the directory cache via the main Tor port---they establish a TLS -connection with the bridge as normal, and then send a special ``begindir'' -relay command to establish an internal connection to its directory cache. - -Therefore a better way to summarize a bridge's address is by its IP -address and ORPort, so all communications between the client and the -bridge will use ordinary TLS. But there are other details that need -more investigation. - -What port should bridges pick for their ORPort? We currently recommend -that they listen on port 443 (the default HTTPS port) if they want to -be most useful, because clients behind standard firewalls will have -the best chance to reach them. Is this the best choice in all cases, -or should we encourage some fraction of them pick random ports, or other -ports commonly permitted through firewalls like 53 (DNS) or 110 -(POP)? Or perhaps we should use other ports where TLS traffic is -expected, like 993 (IMAPS) or 995 (POP3S). We need more research on our -potential users, and their current and anticipated firewall restrictions. - -Furthermore, we need to look at the specifics of Tor's TLS handshake. -Right now Tor uses some predictable strings in its TLS handshakes. For -example, it sets the X.509 organizationName field to ``Tor'', and it puts -the Tor relay's nickname in the certificate's commonName field. We -should tweak the handshake protocol so it doesn't rely on any unusual details -in the certificate, yet it remains secure; the certificate itself -should be made to resemble an ordinary HTTPS certificate. We should also try -to make our advertised cipher-suites closer to what an ordinary web server -would support. - -Tor's TLS handshake uses two-certificate chains: one certificate -contains the self-signed identity key for -the router, and the second contains a current TLS key, signed by the -identity key. We use these to authenticate that we're talking to the right -router, and to limit the impact of TLS-key exposure. Most (though far from -all) consumer-oriented HTTPS services provide only a single certificate. -These extra certificates may help identify Tor's TLS handshake; instead, -bridges should consider using only a single TLS key certificate signed by -their identity key, and providing the full value of the identity key in an -early handshake cell. More significantly, Tor currently has all clients -present certificates, so that clients are harder to distinguish from relays. -But in a blocking-resistance environment, clients should not present -certificates at all. - -Last, what if the adversary starts observing the network traffic even -more closely? Even if our TLS handshake looks innocent, our traffic timing -and volume still look different than a user making a secure web connection -to his bank. The same techniques used in the growing trend to build tools -to recognize encrypted Bittorrent traffic -%~\cite{bt-traffic-shaping} -could be used to identify Tor communication and recognize bridge -relays. Rather than trying to look like encrypted web traffic, we may be -better off trying to blend with some other encrypted network protocol. The -first step is to compare typical network behavior for a Tor client to -typical network behavior for various other protocols. This statistical -cat-and-mouse game is made more complex by the fact that Tor transports a -variety of protocols, and we'll want to automatically handle web browsing -differently from, say, instant messaging. - -% Tor cells are 512 bytes each. So TLS records will be roughly -% multiples of this size? How bad is this? -RD -% Look at ``Inferring the Source of Encrypted HTTP Connections'' -% by Marc Liberatore and Brian Neil Levine (CCS 2006) -% They substantially flesh out the numbers for the web fingerprinting -% attack. -PS -% Yes, but I meant detecting the fingerprint of Tor traffic itself, not -% learning what websites we're going to. I wouldn't be surprised to -% learn that these are related problems, but it's not obvious to me. -RD - -\subsection{Identity keys as part of addressing information} -\label{subsec:id-address} - -We have described a way for the blocked user to bootstrap into the -network once he knows the IP address and ORPort of a bridge. What about -local spoofing attacks? That is, since we never learned an identity -key fingerprint for the bridge, a local attacker could intercept our -connection and pretend to be the bridge we had in mind. It turns out -that giving false information isn't that bad---since the Tor client -ships with trusted keys for the bridge directory authority and the Tor -network directory authorities, the user can learn whether he's being -given a real connection to the bridge authorities or not. (After all, -if the adversary intercepts every connection the user makes and gives -him a bad connection each time, there's nothing we can do.) - -What about anonymity-breaking attacks from observing traffic, if the -blocked user doesn't start out knowing the identity key of his intended -bridge? The vulnerabilities aren't so bad in this case either---the -adversary could do similar attacks just by monitoring the network -traffic. -% cue paper by steven and george - -Once the Tor client has fetched the bridge's relay descriptor, it should -remember the identity key fingerprint for that bridge relay. Thus if -the bridge relay moves to a new IP address, the client can query the -bridge directory authority to look up a fresh relay descriptor using -this fingerprint. - -So we've shown that it's \emph{possible} to bootstrap into the network -just by learning the IP address and ORPort of a bridge, but are there -situations where it's more convenient or more secure to learn the bridge's -identity fingerprint as well as instead, while bootstrapping? We keep -that question in mind as we next investigate bootstrapping and discovery. - -\section{Discovering working bridge relays} -\label{sec:discovery} - -Tor's modular design means that we can develop a better relay component -independently of developing the discovery component. This modularity's -great promise is that we can pick any discovery approach we like; but the -unfortunate fact is that we have no magic bullet for discovery. We're -in the same arms race as all the other designs we described in -Section~\ref{sec:related}. - -In this section we describe a variety of approaches to adding discovery -components for our design. - -\subsection{Bootstrapping: finding your first bridge.} -\label{subsec:first-bridge} - -In Section~\ref{subsec:relay-together}, we showed that a user who knows -a working bridge address can use it to reach the bridge authority and -to stay connected to the Tor network. But how do new users reach the -bridge authority in the first place? After all, the bridge authority -will be one of the first addresses that a censor blocks. - -First, we should recognize that most government firewalls are not -perfect. That is, they may allow connections to Google cache or some -open proxy servers, or they let file-sharing traffic, Skype, instant -messaging, or World-of-Warcraft connections through. Different users will -have different mechanisms for bypassing the firewall initially. Second, -we should remember that most people don't operate in a vacuum; users will -hopefully know other people who are in other situations or have other -resources available. In the rest of this section we develop a toolkit -of different options and mechanisms, so that we can enable users in a -diverse set of contexts to bootstrap into the system. - -(For users who can't use any of these techniques, hopefully they know -a friend who can---for example, perhaps the friend already knows some -bridge relay addresses. If they can't get around it at all, then we -can't help them---they should go meet more people or learn more about -the technology running the firewall in their area.) - -By deploying all the schemes in the toolkit at once, we let bridges and -blocked users employ the discovery approach that is most appropriate -for their situation. - -\subsection{Independent bridges, no central discovery} - -The first design is simply to have no centralized discovery component at -all. Volunteers run bridges, and we assume they have some blocked users -in mind and communicate their address information to them out-of-band -(for example, through Gmail). This design allows for small personal -bridges that have only one or a handful of users in mind, but it can -also support an entire community of users. For example, Citizen Lab's -upcoming Psiphon single-hop proxy tool~\cite{psiphon} plans to use this -\emph{social network} approach as its discovery component. - -There are several ways to do bootstrapping in this design. In the simple -case, the operator of the bridge informs each chosen user about his -bridge's address information and/or keys. A different approach involves -blocked users introducing new blocked users to the bridges they know. -That is, somebody in the blocked area can pass along a bridge's address to -somebody else they trust. This scheme brings in appealing but complex game -theoretic properties: the blocked user making the decision has an incentive -only to delegate to trustworthy people, since an adversary who learns -the bridge's address and filters it makes it unavailable for both of them. -Also, delegating known bridges to members of your social network can be -dangerous: an the adversary who can learn who knows which bridges may -be able to reconstruct the social network. - -Note that a central set of bridge directory authorities can still be -compatible with a decentralized discovery process. That is, how users -first learn about bridges is entirely up to the bridges, but the process -of fetching up-to-date descriptors for them can still proceed as described -in Section~\ref{sec:bridges}. Of course, creating a central place that -knows about all the bridges may not be smart, especially if every other -piece of the system is decentralized. Further, if a user only knows -about one bridge and he loses track of it, it may be quite a hassle to -reach the bridge authority. We address these concerns next. - -\subsection{Families of bridges, no central discovery} - -Because the blocked users are running our software too, we have many -opportunities to improve usability or robustness. Our second design builds -on the first by encouraging volunteers to run several bridges at once -(or coordinate with other bridge volunteers), such that some -of the bridges are likely to be available at any given time. - -The blocked user's Tor client would periodically fetch an updated set of -recommended bridges from any of the working bridges. Now the client can -learn new additions to the bridge pool, and can expire abandoned bridges -or bridges that the adversary has blocked, without the user ever needing -to care. To simplify maintenance of the community's bridge pool, each -community could run its own bridge directory authority---reachable via -the available bridges, and also mirrored at each bridge. - -\subsection{Public bridges with central discovery} - -What about people who want to volunteer as bridges but don't know any -suitable blocked users? What about people who are blocked but don't -know anybody on the outside? Here we describe how to make use of these -\emph{public bridges} in a way that still makes it hard for the attacker -to learn all of them. - -The basic idea is to divide public bridges into a set of pools based on -identity key. Each pool corresponds to a \emph{distribution strategy}: -an approach to distributing its bridge addresses to users. Each strategy -is designed to exercise a different scarce resource or property of -the user. - -How do we divide bridges between these strategy pools such that they're -evenly distributed and the allocation is hard to influence or predict, -but also in a way that's amenable to creating more strategies later -on without reshuffling all the pools? We assign a given bridge -to a strategy pool by hashing the bridge's identity key along with a -secret that only the bridge authority knows: the first $n$ bits of this -hash dictate the strategy pool number, where $n$ is a parameter that -describes how many strategy pools we want at this point. We choose $n=3$ -to start, so we divide bridges between 8 pools; but as we later invent -new distribution strategies, we can increment $n$ to split the 8 into -16. Since a bridge can't predict the next bit in its hash, it can't -anticipate which identity key will correspond to a certain new pool -when the pools are split. Further, since the bridge authority doesn't -provide any feedback to the bridge about which strategy pool it's in, -an adversary who signs up bridges with the goal of filling a certain -pool~\cite{casc-rep} will be hindered. - -% This algorithm is not ideal. When we split pools, each existing -% pool is cut in half, where half the bridges remain with the -% old distribution policy, and half will be under what the new one -% is. So the new distribution policy inherits a bunch of blocked -% bridges if the old policy was too loose, or a bunch of unblocked -% bridges if its policy was still secure. -RD -% -% I think it should be more chordlike. -% Bridges are allocated to wherever on the ring which is divided -% into arcs (buckets). -% If a bucket gets too full, you can just split it. -% More on this below. -PFS - -The first distribution strategy (used for the first pool) publishes bridge -addresses in a time-release fashion. The bridge authority divides the -available bridges into partitions, and each partition is deterministically -available only in certain time windows. That is, over the course of a -given time slot (say, an hour), each requester is given a random bridge -from within that partition. When the next time slot arrives, a new set -of bridges from the pool are available for discovery. Thus some bridge -address is always available when a new -user arrives, but to learn about all bridges the attacker needs to fetch -all new addresses at every new time slot. By varying the length of the -time slots, we can make it harder for the attacker to guess when to check -back. We expect these bridges will be the first to be blocked, but they'll -help the system bootstrap until they \emph{do} get blocked. Further, -remember that we're dealing with different blocking regimes around the -world that will progress at different rates---so this pool will still -be useful to some users even as the arms races progress. - -The second distribution strategy publishes bridge addresses based on the IP -address of the requesting user. Specifically, the bridge authority will -divide the available bridges in the pool into a bunch of partitions -(as in the first distribution scheme), hash the requester's IP address -with a secret of its own (as in the above allocation scheme for creating -pools), and give the requester a random bridge from the appropriate -partition. To raise the bar, we should discard the last octet of the -IP address before inputting it to the hash function, so an attacker -who only controls a single ``/24'' network only counts as one user. A -large attacker like China will still be able to control many addresses, -but the hassle of establishing connections from each network (or spoofing -TCP connections) may still slow them down. Similarly, as a special case, -we should treat IP addresses that are Tor exit nodes as all being on -the same network. - -The third strategy combines the time-based and location-based -strategies to further constrain and rate-limit the available bridge -addresses. Specifically, the bridge address provided in a given time -slot to a given network location is deterministic within the partition, -rather than chosen randomly each time from the partition. Thus, repeated -requests during that time slot from a given network are given the same -bridge address as the first request. - -The fourth strategy is based on Circumventor's discovery strategy. -The Circumventor project, realizing that its adoption will remain limited -if it has no central coordination mechanism, has started a mailing list to -distribute new proxy addresses every few days. From experimentation it -seems they have concluded that sending updates every three or four days -is sufficient to stay ahead of the current attackers. - -The fifth strategy provides an alternative approach to a mailing list: -users provide an email address and receive an automated response -listing an available bridge address. We could limit one response per -email address. To further rate limit queries, we could require a CAPTCHA -solution -%~\cite{captcha} -in each case too. In fact, we wouldn't need to -implement the CAPTCHA on our side: if we only deliver bridge addresses -to Yahoo or GMail addresses, we can leverage the rate-limiting schemes -that other parties already impose for account creation. - -The sixth strategy ties in the social network design with public -bridges and a reputation system. We pick some seeds---trusted people in -blocked areas---and give them each a few dozen bridge addresses and a few -\emph{delegation tokens}. We run a website next to the bridge authority, -where users can log in (they connect via Tor, and they don't need to -provide actual identities, just persistent pseudonyms). Users can delegate -trust to other people they know by giving them a token, which can be -exchanged for a new account on the website. Accounts in ``good standing'' -then accrue new bridge addresses and new tokens. As usual, reputation -schemes bring in a host of new complexities~\cite{rep-anon}: how do we -decide that an account is in good standing? We could tie reputation -to whether the bridges they're told about have been blocked---see -Section~\ref{subsec:geoip} below for initial thoughts on how to discover -whether bridges have been blocked. We could track reputation between -accounts (if you delegate to somebody who screws up, it impacts you too), -or we could use blinded delegation tokens~\cite{chaum-blind} to prevent -the website from mapping the seeds' social network. We put off deeper -discussion of the social network reputation strategy for future work. - -Pools seven and eight are held in reserve, in case our currently deployed -tricks all fail at once and the adversary blocks all those bridges---so -we can adapt and move to new approaches quickly, and have some bridges -immediately available for the new schemes. New strategies might be based -on some other scarce resource, such as relaying traffic for others or -other proof of energy spent. (We might also worry about the incentives -for bridges that sign up and get allocated to the reserve pools: will they -be unhappy that they're not being used? But this is a transient problem: -if Tor users are bridges by default, nobody will mind not being used yet. -See also Section~\ref{subsec:incentives}.) - -%Is it useful to load balance which bridges are handed out? The above -%pool concept makes some bridges wildly popular and others less so. -%But I guess that's the point. - -\subsection{Public bridges with coordinated discovery} - -We presented the above discovery strategies in the context of a single -bridge directory authority, but in practice we will want to distribute the -operations over several bridge authorities---a single point of failure -or attack is a bad move. The first answer is to run several independent -bridge directory authorities, and bridges gravitate to one based on -their identity key. The better answer would be some federation of bridge -authorities that work together to provide redundancy but don't introduce -new security issues. We could even imagine designs where the bridge -authorities have encrypted versions of the bridge's relay descriptors, -and the users learn a decryption key that they keep private when they -first hear about the bridge---this way the bridge authorities would not -be able to learn the IP address of the bridges. - -We leave this design question for future work. - -\subsection{Assessing whether bridges are useful} - -Learning whether a bridge is useful is important in the bridge authority's -decision to include it in responses to blocked users. For example, if -we end up with a list of thousands of bridges and only a few dozen of -them are reachable right now, most blocked users will not end up knowing -about working bridges. - -There are three components for assessing how useful a bridge is. First, -is it reachable from the public Internet? Second, what proportion of -the time is it available? Third, is it blocked in certain jurisdictions? - -The first component can be tested just as we test reachability of -ordinary Tor relays. Specifically, the bridges do a self-test---connect -to themselves via the Tor network---before they are willing to -publish their descriptor, to make sure they're not obviously broken or -misconfigured. Once the bridges publish, the bridge authority also tests -reachability to make sure they're not confused or outright lying. - -The second component can be measured and tracked by the bridge authority. -By doing periodic reachability tests, we can get a sense of how often the -bridge is available. More complex tests will involve bandwidth-intensive -checks to force the bridge to commit resources in order to be counted as -available. We need to evaluate how the relationship of uptime percentage -should weigh into our choice of which bridges to advertise. We leave -this to future work. - -The third component is perhaps the trickiest: with many different -adversaries out there, how do we keep track of which adversaries have -blocked which bridges, and how do we learn about new blocks as they -occur? We examine this problem next. - -\subsection{How do we know if a bridge relay has been blocked?} -\label{subsec:geoip} - -There are two main mechanisms for testing whether bridges are reachable -from inside each blocked area: active testing via users, and passive -testing via bridges. - -In the case of active testing, certain users inside each area -sign up as testing relays. The bridge authorities can then use a -Blossom-like~\cite{blossom-thesis} system to build circuits through them -to each bridge and see if it can establish the connection. But how do -we pick the users? If we ask random users to do the testing (or if we -solicit volunteers from the users), the adversary should sign up so he -can enumerate the bridges we test. Indeed, even if we hand-select our -testers, the adversary might still discover their location and monitor -their network activity to learn bridge addresses. - -Another answer is not to measure directly, but rather let the bridges -report whether they're being used. -%If they periodically report to their -%bridge directory authority how much use they're seeing, perhaps the -%authority can make smart decisions from there. -Specifically, bridges should install a GeoIP database such as the public -IP-To-Country list~\cite{ip-to-country}, and then periodically report to the -bridge authorities which countries they're seeing use from. This data -would help us track which countries are making use of the bridge design, -and can also let us learn about new steps the adversary has taken in -the arms race. (The compressed GeoIP database is only several hundred -kilobytes, and we could even automate the update process by serving it -from the bridge authorities.) -More analysis of this passive reachability -testing design is needed to resolve its many edge cases: for example, -if a bridge stops seeing use from a certain area, does that mean the -bridge is blocked or does that mean those users are asleep? - -There are many more problems with the general concept of detecting whether -bridges are blocked. First, different zones of the Internet are blocked -in different ways, and the actual firewall jurisdictions do not match -country borders. Our bridge scheme could help us map out the topology -of the censored Internet, but this is a huge task. More generally, -if a bridge relay isn't reachable, is that because of a network block -somewhere, because of a problem at the bridge relay, or just a temporary -outage somewhere in between? And last, an attacker could poison our -bridge database by signing up already-blocked bridges. In this case, -if we're stingy giving out bridge addresses, users in that country won't -learn working bridges. - -All of these issues are made more complex when we try to integrate this -testing into our social network reputation system above. -Since in that case we punish or reward users based on whether bridges -get blocked, the adversary has new attacks to trick or bog down the -reputation tracking. Indeed, the bridge authority doesn't even know -what zone the blocked user is in, so do we blame him for any possible -censored zone, or what? - -Clearly more analysis is required. The eventual solution will probably -involve a combination of passive measurement via GeoIP and active -measurement from trusted testers. More generally, we can use the passive -feedback mechanism to track usage of the bridge network as a whole---which -would let us respond to attacks and adapt the design, and it would also -let the general public track the progress of the project. - -%Worry: the adversary could choose not to block bridges but just record -%connections to them. So be it, I guess. - -\subsection{Advantages of deploying all solutions at once} - -For once, we're not in the position of the defender: we don't have to -defend against every possible filtering scheme; we just have to defend -against at least one. On the flip side, the attacker is forced to guess -how to allocate his resources to defend against each of these discovery -strategies. So by deploying all of our strategies at once, we not only -increase our chances of finding one that the adversary has difficulty -blocking, but we actually make \emph{all} of the strategies more robust -in the face of an adversary with limited resources. - -%\subsection{Remaining unsorted notes} - -%In the first subsection we describe how to find a first bridge. - -%Going to be an arms race. Need a bag of tricks. Hard to say -%which ones will work. Don't spend them all at once. - -%Some techniques are sufficient to get us an IP address and a port, -%and others can get us IP:port:key. Lay out some plausible options -%for how users can bootstrap into learning their first bridge. - -%\section{The account / reputation system} -%\section{Social networks with directory-side support} -%\label{sec:accounts} - -%One answer is to measure based on whether the bridge addresses -%we give it end up blocked. But how do we decide if they get blocked? - -%Perhaps each bridge should be known by a single bridge directory -%authority. This makes it easier to trace which users have learned about -%it, so easier to blame or reward. It also makes things more brittle, -%since loss of that authority means its bridges aren't advertised until -%they switch, and means its bridge users are sad too. -%(Need a slick hash algorithm that will map our identity key to a -%bridge authority, in a way that's sticky even when we add bridge -%directory authorities, but isn't sticky when our authority goes -%away. Does this exist?) -% [[Ian says: What about just using something like hash table chaining? -% This should work, so long as the client knows which authorities currently -% exist.]] - -%\subsection{Discovery based on social networks} - -%A token that can be exchanged at the bridge authority (assuming you -%can reach it) for a new bridge address. - -%The account server runs as a Tor controller for the bridge authority. - -%Users can establish reputations, perhaps based on social network -%connectivity, perhaps based on not getting their bridge relays blocked, - -%Probably the most critical lesson learned in past work on reputation -%systems in privacy-oriented environments~\cite{rep-anon} is the need for -%verifiable transactions. That is, the entity computing and advertising -%reputations for participants needs to actually learn in a convincing -%way that a given transaction was successful or unsuccessful. - -%(Lesson from designing reputation systems~\cite{rep-anon}: easy to -%reward good behavior, hard to punish bad behavior. - -\section{Security considerations} -\label{sec:security} - -\subsection{Possession of Tor in oppressed areas} - -Many people speculate that installing and using a Tor client in areas with -particularly extreme firewalls is a high risk---and the risk increases -as the firewall gets more restrictive. This notion certainly has merit, but -there's -a counter pressure as well: as the firewall gets more restrictive, more -ordinary people behind it end up using Tor for more mainstream activities, -such as learning -about Wall Street prices or looking at pictures of women's ankles. So -as the restrictive firewall pushes up the number of Tor users, the -``typical'' Tor user becomes more mainstream, and therefore mere -use or possession of the Tor software is not so surprising. - -It's hard to say which of these pressures will ultimately win out, -but we should keep both sides of the issue in mind. - -%Nick, want to rewrite/elaborate on this section? - -%Ian suggests: -% Possession of Tor: this is totally of-the-cuff, and there are lots of -% security issues to think about, but what about an ActiveX version of -% Tor? The magic you learn (as opposed to a bridge address) is a plain -% old HTTPS server, which feeds you an ActiveX applet pre-configured with -% some bridge address (possibly on the same machine). For bonus points, -% somehow arrange that (a) the applet is signed in some way the user can -% reliably check, but (b) don't end up with anything like an incriminating -% long-term cert stored on the user's computer. This may be marginally -% useful in some Internet-cafe situations as well, though (a) is even -% harder to get right there. - - -\subsection{Observers can tell who is publishing and who is reading} -\label{subsec:upload-padding} - -Tor encrypts traffic on the local network, and it obscures the eventual -destination of the communication, but it doesn't do much to obscure the -traffic volume. In particular, a user publishing a home video will have a -different network fingerprint than a user reading an online news article. -Based on our assumption in Section~\ref{sec:adversary} that users who -publish material are in more danger, should we work to improve Tor's -security in this situation? - -In the general case this is an extremely challenging task: -effective \emph{end-to-end traffic confirmation attacks} -are known where the adversary observes the origin and the -destination of traffic and confirms that they are part of the -same communication~\cite{danezis:pet2004,e2e-traffic}. Related are -\emph{website fingerprinting attacks}, where the adversary downloads -a few hundred popular websites, makes a set of "fingerprints" for each -site, and then observes the target Tor client's traffic to look for -a match~\cite{pet05-bissias,defensive-dropping}. But can we do better -against a limited adversary who just does coarse-grained sweeps looking -for unusually prolific publishers? - -One answer is for bridge users to automatically send bursts of padding -traffic periodically. (This traffic can be implemented in terms of -long-range drop cells, which are already part of the Tor specification.) -Of course, convincingly simulating an actual human publishing interesting -content is a difficult arms race, but it may be worthwhile to at least -start the race. More research remains. - -\subsection{Anonymity effects from acting as a bridge relay} - -Against some attacks, relaying traffic for others can improve -anonymity. The simplest example is an attacker who owns a small number -of Tor relays. He will see a connection from the bridge, but he won't -be able to know whether the connection originated there or was relayed -from somebody else. More generally, the mere uncertainty of whether the -traffic originated from that user may be helpful. - -There are some cases where it doesn't seem to help: if an attacker can -watch all of the bridge's incoming and outgoing traffic, then it's easy -to learn which connections were relayed and which started there. (In this -case he still doesn't know the final destinations unless he is watching -them too, but in this case bridges are no better off than if they were -an ordinary client.) - -There are also some potential downsides to running a bridge. First, while -we try to make it hard to enumerate all bridges, it's still possible to -learn about some of them, and for some people just the fact that they're -running one might signal to an attacker that they place a higher value -on their anonymity. Second, there are some more esoteric attacks on Tor -relays that are not as well-understood or well-tested---for example, an -attacker may be able to ``observe'' whether the bridge is sending traffic -even if he can't actually watch its network, by relaying traffic through -it and noticing changes in traffic timing~\cite{attack-tor-oak05}. On -the other hand, it may be that limiting the bandwidth the bridge is -willing to relay will allow this sort of attacker to determine if it's -being used as a bridge but not easily learn whether it is adding traffic -of its own. - -We also need to examine how entry guards fit in. Entry guards -(a small set of nodes that are always used for the first -step in a circuit) help protect against certain attacks -where the attacker runs a few Tor relays and waits for -the user to choose these relays as the beginning and end of her -circuit\footnote{\url{http://wiki.noreply.org/noreply/TheOnionRouter/TorFAQ#EntryGuards}}. -If the blocked user doesn't use the bridge's entry guards, then the bridge -doesn't gain as much cover benefit. On the other hand, what design changes -are needed for the blocked user to use the bridge's entry guards without -learning what they are (this seems hard), and even if we solve that, -do they then need to use the guards' guards and so on down the line? - -It is an open research question whether the benefits of running a bridge -outweigh the risks. A lot of the decision rests on which attacks the -users are most worried about. For most users, we don't think running a -bridge relay will be that damaging, and it could help quite a bit. - -\subsection{Trusting local hardware: Internet cafes and LiveCDs} -\label{subsec:cafes-and-livecds} - -Assuming that users have their own trusted hardware is not -always reasonable. - -For Internet cafe Windows computers that let you attach your own USB key, -a USB-based Tor image would be smart. There's Torpark, and hopefully -there will be more thoroughly analyzed and trustworthy options down the -road. Worries remain about hardware or software keyloggers and other -spyware, as well as physical surveillance. - -If the system lets you boot from a CD or from a USB key, you can gain -a bit more security by bringing a privacy LiveCD with you. (This -approach isn't foolproof either of course, since hardware -keyloggers and physical surveillance are still a worry). - -In fact, LiveCDs are also useful if it's your own hardware, since it's -easier to avoid leaving private data and logs scattered around the -system. - -%\subsection{Forward compatibility and retiring bridge authorities} -% -%Eventually we'll want to change the identity key and/or location -%of a bridge authority. How do we do this mostly cleanly? - -\subsection{The trust chain} -\label{subsec:trust-chain} - -Tor's ``public key infrastructure'' provides a chain of trust to -let users verify that they're actually talking to the right relays. -There are four pieces to this trust chain. - -First, when Tor clients are establishing circuits, at each step -they demand that the next Tor relay in the path prove knowledge of -its private key~\cite{tor-design}. This step prevents the first node -in the path from just spoofing the rest of the path. Second, the -Tor directory authorities provide a signed list of relays along with -their public keys---so unless the adversary can control a threshold -of directory authorities, he can't trick the Tor client into using other -Tor relays. Third, the location and keys of the directory authorities, -in turn, is hard-coded in the Tor source code---so as long as the user -got a genuine version of Tor, he can know that he is using the genuine -Tor network. And last, the source code and other packages are signed -with the GPG keys of the Tor developers, so users can confirm that they -did in fact download a genuine version of Tor. - -In the case of blocked users contacting bridges and bridge directory -authorities, the same logic applies in parallel: the blocked users fetch -information from both the bridge authorities and the directory authorities -for the `main' Tor network, and they combine this information locally. - -How can a user in an oppressed country know that he has the correct -key fingerprints for the developers? As with other security systems, it -ultimately comes down to human interaction. The keys are signed by dozens -of people around the world, and we have to hope that our users have met -enough people in the PGP web of trust -%~\cite{pgp-wot} -that they can learn -the correct keys. For users that aren't connected to the global security -community, though, this question remains a critical weakness. - -%\subsection{Security through obscurity: publishing our design} - -%Many other schemes like dynaweb use the typical arms race strategy of -%not publishing their plans. Our goal here is to produce a design---a -%framework---that can be public and still secure. Where's the tradeoff? - -%\section{Performance improvements} -%\label{sec:performance} -% -%\subsection{Fetch relay descriptors just-in-time} -% -%I guess we should encourage most places to do this, so blocked -%users don't stand out. -% -% -%network-status and directory optimizations. caching better. partitioning -%issues? - -\section{Maintaining reachability} -\label{sec:reachability} - -\subsection{How many bridge relays should you know about?} - -The strategies described in Section~\ref{sec:discovery} talked about -learning one bridge address at a time. But if most bridges are ordinary -Tor users on cable modem or DSL connection, many of them will disappear -and/or move periodically. How many bridge relays should a blocked user -know about so that she is likely to have at least one reachable at any -given point? This is already a challenging problem if we only consider -natural churn: the best approach is to see what bridges we attract in -reality and measure their churn. We may also need to factor in a parameter -for how quickly bridges get discovered and blocked by the attacker; -we leave this for future work after we have more deployment experience. - -A related question is: if the bridge relays change IP addresses -periodically, how often does the blocked user need to fetch updates in -order to keep from being cut out of the loop? - -Once we have more experience and intuition, we should explore technical -solutions to this problem too. For example, if the discovery strategies -give out $k$ bridge addresses rather than a single bridge address, perhaps -we can improve robustness from the user perspective without significantly -aiding the adversary. Rather than giving out a new random subset of $k$ -addresses at each point, we could bind them together into \emph{bridge -families}, so all users that learn about one member of the bridge family -are told about the rest as well. - -This scheme may also help defend against attacks to map the set of -bridges. That is, if all blocked users learn a random subset of bridges, -the attacker should learn about a few bridges, monitor the country-level -firewall for connections to them, then watch those users to see what -other bridges they use, and repeat. By segmenting the bridge address -space, we can limit the exposure of other users. - -\subsection{Cablemodem users don't usually provide important websites} -\label{subsec:block-cable} - -Another attacker we might be concerned about is that the attacker could -just block all DSL and cablemodem network addresses, on the theory that -they don't run any important services anyway. If most of our bridges -are on these networks, this attack could really hurt. - -The first answer is to aim to get volunteers both from traditionally -``consumer'' networks and also from traditionally ``producer'' networks. -Since bridges don't need to be Tor exit nodes, as we improve our usability -it seems quite feasible to get a lot of websites helping out. - -The second answer (not as practical) would be to encourage more use of -consumer networks for popular and useful Internet services. -%(But P2P exists; -%minor websites exist; gaming exists; IM exists; ...) - -A related attack we might worry about is based on large countries putting -economic pressure on companies that want to expand their business. For -example, what happens if Verizon wants to sell services in China, and -China pressures Verizon to discourage its users in the free world from -running bridges? - -\subsection{Scanning resistance: making bridges more subtle} - -If it's trivial to verify that a given address is operating as a bridge, -and most bridges run on a predictable port, then it's conceivable our -attacker could scan the whole Internet looking for bridges. (In fact, -he can just concentrate on scanning likely networks like cablemodem -and DSL services---see Section~\ref{subsec:block-cable} above for -related attacks.) It would be nice to slow down this attack. It would -be even nicer to make it hard to learn whether we're a bridge without -first knowing some secret. We call this general property \emph{scanning -resistance}, and it goes along with normalizing Tor's TLS handshake and -network fingerprint. - -We could provide a password to the blocked user, and she (or her Tor -client) provides a nonced hash of this password when she connects. We'd -need to give her an ID key for the bridge too (in addition to the IP -address and port---see Section~\ref{subsec:id-address}), and wait to -present the password until we've finished the TLS handshake, else it -would look unusual. If Alice can authenticate the bridge before she -tries to send her password, we can resist an adversary who pretends -to be the bridge and launches a man-in-the-middle attack to learn the -password. But even if she can't, we still resist against widespread -scanning. - -How should the bridge behave if accessed without the correct -authorization? Perhaps it should act like an unconfigured HTTPS server -(``welcome to the default Apache page''), or maybe it should mirror -and act like common websites, or websites randomly chosen from Google. - -We might assume that the attacker can recognize HTTPS connections that -use self-signed certificates. (This process would be resource-intensive -but not out of the realm of possibility.) But even in this case, many -popular websites around the Internet use self-signed or just plain broken -SSL certificates. - -%to unknown servers. It can then attempt to connect to them and block -%connections to servers that seem suspicious. It may be that password -%protected web sites will not be suspicious in general, in which case -%that may be the easiest way to give controlled access to the bridge. -%If such sites that have no other overt features are automatically -%blocked when detected, then we may need to be more subtle. -%Possibilities include serving an innocuous web page if a TLS encrypted -%request is received without the authorization needed to access the Tor -%network and only responding to a requested access to the Tor network -%of proper authentication is given. If an unauthenticated request to -%access the Tor network is sent, the bridge should respond as if -%it has received a message it does not understand (as would be the -%case were it not a bridge). - -% Ian suggests a ``socialist millionaires'' protocol here, for something. - -% Did we once mention knocking here? it's a good idea, but we should clarify -% what we mean. Ian also notes that knocking itself is very fingerprintable, -% and we should beware. - -\subsection{How to motivate people to run bridge relays} -\label{subsec:incentives} - -One of the traditional ways to get people to run software that benefits -others is to give them motivation to install it themselves. An often -suggested approach is to install it as a stunning screensaver so everybody -will be pleased to run it. We take a similar approach here, by leveraging -the fact that these users are already interested in protecting their -own Internet traffic, so they will install and run the software. - -Eventually, we may be able to make all Tor users become bridges if they -pass their self-reachability tests---the software and installers need -more work on usability first, but we're making progress. - -In the mean time, we can make a snazzy network graph with -Vidalia\footnote{\url{http://vidalia-project.net/}} that -emphasizes the connections the bridge user is currently relaying. -%(Minor -%anonymity implications, but hey.) (In many cases there won't be much -%activity, so this may backfire. Or it may be better suited to full-fledged -%Tor relay.) - -% Also consider everybody-a-relay. Many of the scalability questions -% are easier when you're talking about making everybody a bridge. - -%\subsection{What if the clients can't install software?} - -%[this section should probably move to the related work section, -%or just disappear entirely.] - -%Bridge users without Tor software - -%Bridge relays could always open their socks proxy. This is bad though, -%first -%because bridges learn the bridge users' destinations, and second because -%we've learned that open socks proxies tend to attract abusive users who -%have no idea they're using Tor. - -%Bridges could require passwords in the socks handshake (not supported -%by most software including Firefox). Or they could run web proxies -%that require authentication and then pass the requests into Tor. This -%approach is probably a good way to help bootstrap the Psiphon network, -%if one of its barriers to deployment is a lack of volunteers willing -%to exit directly to websites. But it clearly drops some of the nice -%anonymity and security features Tor provides. - -%A hybrid approach where the user gets his anonymity from Tor but his -%software-less use from a web proxy running on a trusted machine on the -%free side. - -\subsection{Publicity attracts attention} -\label{subsec:publicity} - -Many people working on this field want to publicize the existence -and extent of censorship concurrently with the deployment of their -circumvention software. The easy reason for this two-pronged push is -to attract volunteers for running proxies in their systems; but in many -cases their main goal is not to focus on getting more users signed up, -but rather to educate the rest of the world about the -censorship. The media also tries to do its part by broadcasting the -existence of each new circumvention system. - -But at the same time, this publicity attracts the attention of the -censors. We can slow down the arms race by not attracting as much -attention, and just spreading by word of mouth. If our goal is to -establish a solid social network of bridges and bridge users before -the adversary gets involved, does this extra attention work to our -disadvantage? - -\subsection{The Tor website: how to get the software} - -One of the first censoring attacks against a system like ours is to -block the website and make the software itself hard to find. Our system -should work well once the user is running an authentic -copy of Tor and has found a working bridge, but to get to that point -we rely on their individual skills and ingenuity. - -Right now, most countries that block access to Tor block only the main -website and leave mirrors and the network itself untouched. -Falling back on word-of-mouth is always a good last resort, but we should -also take steps to make sure it's relatively easy for users to get a copy, -such as publicizing the mirrors more and making copies available through -other media. We might also mirror the latest version of the software on -each bridge, so users who hear about an honest bridge can get a good -copy. -See Section~\ref{subsec:first-bridge} for more discussion. - -% Ian suggests that we have every tor relay distribute a signed copy of the -% software. - -\section{Next Steps} -\label{sec:conclusion} - -Technical solutions won't solve the whole censorship problem. After all, -the firewalls in places like China are \emph{socially} very -successful, even if technologies and tricks exist to get around them. -However, having a strong technical solution is still necessary as one -important piece of the puzzle. - -In this paper, we have shown that Tor provides a great set of building -blocks to start from. The next steps are to deploy prototype bridges and -bridge authorities, implement some of the proposed discovery strategies, -and then observe the system in operation and get more intuition about -the actual requirements and adversaries we're up against. - -\bibliographystyle{plain} \bibliography{tor-design} - -%\appendix - -%\section{Counting Tor users by country} -%\label{app:geoip} - -\end{document} - - - -\section{Future designs} -\label{sec:future} - -\subsection{Bridges inside the blocked network too} - -Assuming actually crossing the firewall is the risky part of the -operation, can we have some bridge relays inside the blocked area too, -and more established users can use them as relays so they don't need to -communicate over the firewall directly at all? A simple example here is -to make new blocked users into internal bridges also---so they sign up -on the bridge authority as part of doing their query, and we give out -their addresses -rather than (or along with) the external bridge addresses. This design -is a lot trickier because it brings in the complexity of whether the -internal bridges will remain available, can maintain reachability with -the outside world, etc. - -More complex future designs involve operating a separate Tor network -inside the blocked area, and using \emph{hidden service bridges}---bridges -that can be accessed by users of the internal Tor network but whose -addresses are not published or findable, even by these users---to get -from inside the firewall to the rest of the Internet. But this design -requires directory authorities to run inside the blocked area too, -and they would be a fine target to take down the network. - -% Hidden services as bridge directory authorities. - - ------------------------------------------- - -ship geoip db to bridges. they look up users who tls to them in the db, -and upload a signed list of countries and number-of-users each day. the -bridge authority aggregates them and publishes stats. - -bridge relays have buddies -they ask a user to test the reachability of their buddy. -leaks O(1) bridges, but not O(n). - -we should not be blockable by ordinary cisco censorship features. -that is, if they want to block our new design, they will need to -add a feature to block exactly this. -strategically speaking, this may come in handy. - -Bridges come in clumps of 4 or 8 or whatever. If you know one bridge -in a clump, the authority will tell you the rest. Now bridges can -ask users to test reachability of their buddies. - -Giving out clumps helps with dynamic IP addresses too. Whether it -should be 4 or 8 depends on our churn. - -the account server. let's call it a database, it doesn't have to -be a thing that human interacts with. - -so how do we reward people for being good? - -\subsubsection{Public Bridges with Coordinated Discovery} - -****Pretty much this whole subsubsection will probably need to be -deferred until ``later'' and moved to after end document, but I'm leaving -it here for now in case useful.****** - -Rather than be entirely centralized, we can have a coordinated -collection of bridge authorities, analogous to how Tor network -directory authorities now work. - -Key components -``Authorities'' will distribute caches of what they know to overlapping -collections of nodes so that no one node is owned by one authority. -Also so that it is impossible to DoS info maintained by one authority -simply by making requests to it. - -Where a bridge gets assigned is not predictable by the bridge? - -If authorities don't know the IP addresses of the bridges they -are responsible for, they can't abuse that info (or be attacked for -having it). But, they also can't, e.g., control being sent massive -lists of nodes that were never good. This raises another question. -We generally decry use of IP address for location, etc. but we -need to do that to limit the introduction of functional but useless -IP addresses because, e.g., they are in China and the adversary -owns massive chunks of the IP space there. - -We don't want an arbitrary someone to be able to contact the -authorities and say an IP address is bad because it would be easy -for an adversary to take down all the suspicious bridges -even if they provide good cover websites, etc. Only the bridge -itself and/or the directory authority can declare a bridge blocked -from somewhere. - - -9. Bridge directories must not simply be a handful of nodes that -provide the list of bridges. They must flood or otherwise distribute -information out to other Tor nodes as mirrors. That way it becomes -difficult for censors to flood the bridge directory authorities with -requests, effectively denying access for others. But, there's lots of -churn and a much larger size than Tor directories. We are forced to -handle the directory scaling problem here much sooner than for the -network in general. Authorities can pass their bridge directories -(and policy info) to some moderate number of unidentified Tor nodes. -Anyone contacting one of those nodes can get bridge info. the nodes -must remain somewhat synched to prevent the adversary from abusing, -e.g., a timed release policy or the distribution to those nodes must -be resilient even if they are not coordinating. - -I think some kind of DHT like scheme would work here. A Tor node is -assigned a chunk of the directory. Lookups in the directory should be -via hashes of keys (fingerprints) and that should determine the Tor -nodes responsible. Ordinary directories can publish lists of Tor nodes -responsible for fingerprint ranges. Clients looking to update info on -some bridge will make a Tor connection to one of the nodes responsible -for that address. Instead of shutting down a circuit after getting -info on one address, extend it to another that is responsible for that -address (the node from which you are extending knows you are doing so -anyway). Keep going. This way you can amortize the Tor connection. - -10. We need some way to give new identity keys out to those who need -them without letting those get immediately blocked by authorities. One -way is to give a fingerprint that gets you more fingerprints, as -already described. These are meted out/updated periodically but allow -us to keep track of which sources are compromised: if a distribution -fingerprint repeatedly leads to quickly blocked bridges, it should be -suspect, dropped, etc. Since we're using hashes, there shouldn't be a -correlation with bridge directory mirrors, bridges, portions of the -network observed, etc. It should just be that the authorities know -about that key that leads to new addresses. - -This last point is very much like the issues in the valet nodes paper, -which is essentially about blocking resistance wrt exiting the Tor network, -while this paper is concerned with blocking the entering to the Tor network. -In fact the tickets used to connect to the IPo (Introduction Point), -could serve as an example, except that instead of authorizing -a connection to the Hidden Service, it's authorizing the downloading -of more fingerprints. - -Also, the fingerprints can follow the hash(q + '1' + cookie) scheme of -that paper (where q = hash(PK + salt) gave the q.onion address). This -allows us to control and track which fingerprint was causing problems. - -Note that, unlike many settings, the reputation problem should not be -hard here. If a bridge says it is blocked, then it might as well be. -If an adversary can say that the bridge is blocked wrt -$\mathit{censor}_i$, then it might as well be, since -$\mathit{censor}_i$ can presumably then block that bridge if it so -chooses. - -11. How much damage can the adversary do by running nodes in the Tor -network and watching for bridge nodes connecting to it? (This is -analogous to an Introduction Point watching for Valet Nodes connecting -to it.) What percentage of the network do you need to own to do how -much damage. Here the entry-guard design comes in helpfully. So we -need to have bridges use entry-guards, but (cf. 3 above) not use -bridges as entry-guards. Here's a serious tradeoff (again akin to the -ratio of valets to IPos) the more bridges/client the worse the -anonymity of that client. The fewer bridges/client the worse the -blocking resistance of that client. - - - diff --git a/doc/design-paper/cell-struct.eps b/doc/design-paper/cell-struct.eps deleted file mode 100644 index eb9fcb8643..0000000000 --- a/doc/design-paper/cell-struct.eps +++ /dev/null @@ -1,189 +0,0 @@ -%!PS-Adobe-2.0 EPSF-2.0 -%%Title: cell-struct.fig -%%Creator: fig2dev Version 3.2 Patchlevel 4 -%%CreationDate: Mon May 17 00:04:58 2004 -%%For: root@localhost.localdomain (root) -%%BoundingBox: 0 0 254 73 -%%Magnification: 1.0000 -%%EndComments -/$F2psDict 200 dict def -$F2psDict begin -$F2psDict /mtrx matrix put -/col-1 {0 setgray} bind def -/col0 {0.000 0.000 0.000 srgb} bind def -/col1 {0.000 0.000 1.000 srgb} bind def -/col2 {0.000 1.000 0.000 srgb} bind def -/col3 {0.000 1.000 1.000 srgb} bind def -/col4 {1.000 0.000 0.000 srgb} bind def -/col5 {1.000 0.000 1.000 srgb} bind def -/col6 {1.000 1.000 0.000 srgb} bind def -/col7 {1.000 1.000 1.000 srgb} bind def -/col8 {0.000 0.000 0.560 srgb} bind def -/col9 {0.000 0.000 0.690 srgb} bind def -/col10 {0.000 0.000 0.820 srgb} bind def -/col11 {0.530 0.810 1.000 srgb} bind def -/col12 {0.000 0.560 0.000 srgb} bind def -/col13 {0.000 0.690 0.000 srgb} bind def -/col14 {0.000 0.820 0.000 srgb} bind def -/col15 {0.000 0.560 0.560 srgb} bind def -/col16 {0.000 0.690 0.690 srgb} bind def -/col17 {0.000 0.820 0.820 srgb} bind def -/col18 {0.560 0.000 0.000 srgb} bind def -/col19 {0.690 0.000 0.000 srgb} bind def -/col20 {0.820 0.000 0.000 srgb} bind def -/col21 {0.560 0.000 0.560 srgb} bind def -/col22 {0.690 0.000 0.690 srgb} bind def -/col23 {0.820 0.000 0.820 srgb} bind def -/col24 {0.500 0.190 0.000 srgb} bind def -/col25 {0.630 0.250 0.000 srgb} bind def -/col26 {0.750 0.380 0.000 srgb} bind def -/col27 {1.000 0.500 0.500 srgb} bind def -/col28 {1.000 0.630 0.630 srgb} bind def -/col29 {1.000 0.750 0.750 srgb} bind def -/col30 {1.000 0.880 0.880 srgb} bind def -/col31 {1.000 0.840 0.000 srgb} bind def - -end -save -newpath 0 73 moveto 0 0 lineto 254 0 lineto 254 73 lineto closepath clip newpath --35.3 77.2 translate -1 -1 scale - -/cp {closepath} bind def -/ef {eofill} bind def -/gr {grestore} bind def -/gs {gsave} bind def -/sa {save} bind def -/rs {restore} bind def -/l {lineto} bind def -/m {moveto} bind def -/rm {rmoveto} bind def -/n {newpath} bind def -/s {stroke} bind def -/sh {show} bind def -/slc {setlinecap} bind def -/slj {setlinejoin} bind def -/slw {setlinewidth} bind def -/srgb {setrgbcolor} bind def -/rot {rotate} bind def -/sc {scale} bind def -/sd {setdash} bind def -/ff {findfont} bind def -/sf {setfont} bind def -/scf {scalefont} bind def -/sw {stringwidth} bind def -/tr {translate} bind def -/tnt {dup dup currentrgbcolor - 4 -2 roll dup 1 exch sub 3 -1 roll mul add - 4 -2 roll dup 1 exch sub 3 -1 roll mul add - 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb} - bind def -/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul - 4 -2 roll mul srgb} bind def -/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def -/$F2psEnd {$F2psEnteredState restore end} def - -$F2psBegin -10 setmiterlimit -0 slj 0 slc - 0.06000 0.06000 sc -% -% Fig objects follow -% -% -% here starts figure with depth 50 -% Polyline -7.500 slw -n 1200 975 m - 1200 1275 l gs col0 s gr -% Polyline -n 1725 975 m - 1725 1275 l gs col0 s gr -% Polyline -n 600 975 m 4800 975 l 4800 1275 l 600 1275 l - cp gs col0 s gr -% Polyline -n 1200 300 m - 1200 600 l gs col0 s gr -% Polyline -n 1725 300 m - 1725 600 l gs col0 s gr -% Polyline -n 600 300 m 4800 300 l 4800 600 l 600 600 l - cp gs col0 s gr -% Polyline -n 2550 975 m - 2550 1275 l gs col0 s gr -% Polyline -n 3150 975 m - 3150 1275 l gs col0 s gr -% Polyline -n 3450 975 m - 3450 1275 l gs col0 s gr -% Polyline -n 3900 975 m - 3900 1275 l gs col0 s gr -/Times-Roman ff 180.00 scf sf -675 1200 m -gs 1 -1 sc (CircID) col0 sh gr -/Times-Roman ff 180.00 scf sf -900 900 m -gs 1 -1 sc (2) col0 sh gr -/Times-Roman ff 180.00 scf sf -1275 1200 m -gs 1 -1 sc (Relay) col0 sh gr -/Times-Roman ff 180.00 scf sf -1800 1200 m -gs 1 -1 sc (StreamID) col0 sh gr -/Times-Roman ff 180.00 scf sf -2625 1200 m -gs 1 -1 sc (Digest) col0 sh gr -/Times-Roman ff 180.00 scf sf -3150 1200 m -gs 1 -1 sc (Len) col0 sh gr -/Times-Roman ff 180.00 scf sf -4200 1200 m -gs 1 -1 sc (DATA) col0 sh gr -/Times-Roman ff 180.00 scf sf -675 525 m -gs 1 -1 sc (CircID) col0 sh gr -/Times-Roman ff 180.00 scf sf -1275 525 m -gs 1 -1 sc (CMD) col0 sh gr -/Times-Roman ff 180.00 scf sf -900 225 m -gs 1 -1 sc (2) col0 sh gr -/Times-Roman ff 180.00 scf sf -1425 225 m -gs 1 -1 sc (1) col0 sh gr -/Times-Roman ff 180.00 scf sf -3225 525 m -gs 1 -1 sc (DATA) col0 sh gr -/Times-Roman ff 180.00 scf sf -3225 900 m -gs 1 -1 sc (2) col0 sh gr -/Times-Roman ff 180.00 scf sf -3450 1200 m -gs 1 -1 sc (CMD) col0 sh gr -/Times-Roman ff 180.00 scf sf -3600 900 m -gs 1 -1 sc (1) col0 sh gr -/Times-Roman ff 180.00 scf sf -3300 225 m -gs 1 -1 sc (509 bytes) col0 sh gr -/Times-Roman ff 180.00 scf sf -1425 900 m -gs 1 -1 sc (1) col0 sh gr -/Times-Roman ff 180.00 scf sf -2100 900 m -gs 1 -1 sc (2) col0 sh gr -/Times-Roman ff 180.00 scf sf -2850 900 m -gs 1 -1 sc (6) col0 sh gr -/Times-Roman ff 180.00 scf sf -4350 900 m -gs 1 -1 sc (498) col0 sh gr -% here ends figure; -$F2psEnd -rs -showpage diff --git a/doc/design-paper/cell-struct.fig b/doc/design-paper/cell-struct.fig deleted file mode 100644 index 3490673ca6..0000000000 --- a/doc/design-paper/cell-struct.fig +++ /dev/null @@ -1,49 +0,0 @@ -#FIG 3.2 -Landscape -Center -Inches -Letter -100.00 -Single --2 -1200 2 -2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 - 1200 975 1200 1275 -2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 - 1725 975 1725 1275 -2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 - 600 975 4800 975 4800 1275 600 1275 600 975 -2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 - 1200 300 1200 600 -2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 - 1725 300 1725 600 -2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 - 600 300 4800 300 4800 600 600 600 600 300 -2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 - 2550 975 2550 1275 -2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 - 3150 975 3150 1275 -2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 - 3450 975 3450 1275 -2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 - 3900 975 3900 1275 -4 0 0 50 -1 0 12 0.0000 4 135 510 675 1200 CircID\001 -4 0 0 50 -1 0 12 0.0000 4 135 90 900 900 2\001 -4 0 0 50 -1 0 12 0.0000 4 180 435 1275 1200 Relay\001 -4 0 0 50 -1 0 12 0.0000 4 135 735 1800 1200 StreamID\001 -4 0 0 50 -1 0 12 0.0000 4 180 510 2625 1200 Digest\001 -4 0 0 50 -1 0 12 0.0000 4 135 285 3150 1200 Len\001 -4 0 0 50 -1 0 12 0.0000 4 135 510 4200 1200 DATA\001 -4 0 0 50 -1 0 12 0.0000 4 135 510 675 525 CircID\001 -4 0 0 50 -1 0 12 0.0000 4 135 420 1275 525 CMD\001 -4 0 0 50 -1 0 12 0.0000 4 135 90 900 225 2\001 -4 0 0 50 -1 0 12 0.0000 4 135 90 1425 225 1\001 -4 0 0 50 -1 0 12 0.0000 4 135 510 3225 525 DATA\001 -4 0 0 50 -1 0 12 0.0000 4 135 90 3225 900 2\001 -4 0 0 50 -1 0 12 0.0000 4 135 420 3450 1200 CMD\001 -4 0 0 50 -1 0 12 0.0000 4 135 90 3600 900 1\001 -4 0 0 50 -1 0 12 0.0000 4 180 735 3300 225 509 bytes\001 -4 0 0 50 -1 0 12 0.0000 4 135 90 1425 900 1\001 -4 0 0 50 -1 0 12 0.0000 4 135 90 2100 900 2\001 -4 0 0 50 -1 0 12 0.0000 4 135 90 2850 900 6\001 -4 0 0 50 -1 0 12 0.0000 4 135 270 4350 900 498\001 diff --git a/doc/design-paper/cell-struct.pdf b/doc/design-paper/cell-struct.pdf Binary files differdeleted file mode 100644 index 8ca52deeb9..0000000000 --- a/doc/design-paper/cell-struct.pdf +++ /dev/null diff --git a/doc/design-paper/cell-struct.png b/doc/design-paper/cell-struct.png Binary files differdeleted file mode 100644 index 799bcc8c18..0000000000 --- a/doc/design-paper/cell-struct.png +++ /dev/null diff --git a/doc/design-paper/challenges.pdf b/doc/design-paper/challenges.pdf Binary files differdeleted file mode 100644 index d0a3e0923b..0000000000 --- a/doc/design-paper/challenges.pdf +++ /dev/null diff --git a/doc/design-paper/challenges.tex b/doc/design-paper/challenges.tex deleted file mode 100644 index 6949693bf0..0000000000 --- a/doc/design-paper/challenges.tex +++ /dev/null @@ -1,1505 +0,0 @@ -\documentclass{llncs} - -\usepackage{url} -\usepackage{amsmath} -\usepackage{epsfig} - -\setlength{\textwidth}{5.9in} -\setlength{\textheight}{8.4in} -\setlength{\topmargin}{.5cm} -\setlength{\oddsidemargin}{1cm} -\setlength{\evensidemargin}{1cm} - -\newenvironment{tightlist}{\begin{list}{$\bullet$}{ - \setlength{\itemsep}{0mm} - \setlength{\parsep}{0mm} - % \setlength{\labelsep}{0mm} - % \setlength{\labelwidth}{0mm} - % \setlength{\topsep}{0mm} - }}{\end{list}} - -\begin{document} - -\title{Challenges in deploying low-latency anonymity (DRAFT)} - -\author{Roger Dingledine\inst{1} \and -Nick Mathewson\inst{1} \and -Paul Syverson\inst{2}} -\institute{The Free Haven Project \email{<\{arma,nickm\}@freehaven.net>} \and -Naval Research Laboratory \email{<syverson@itd.nrl.navy.mil>}} - -\maketitle -\pagestyle{plain} - -\begin{abstract} - There are many unexpected or unexpectedly difficult obstacles to - deploying anonymous communications. Drawing on our experiences deploying - Tor (the second-generation onion routing network), we describe social - challenges and technical issues that must be faced - in building, deploying, and sustaining a scalable, distributed, low-latency - anonymity network. -\end{abstract} - -\section{Introduction} -% Your network is not practical unless it is sustainable and distributed. -Anonymous communication is full of surprises. This paper discusses some -unexpected challenges arising from our experiences deploying Tor, a -low-latency general-purpose anonymous communication system. We will discuss -some of the difficulties we have experienced and how we have met them (or how -we plan to meet them, if we know). We also discuss some less -troublesome open problems that we must nevertheless eventually address. -%We will describe both those future challenges that we intend to explore and -%those that we have decided not to explore and why. - -Tor is an overlay network for anonymizing TCP streams over the -Internet~\cite{tor-design}. It addresses limitations in earlier Onion -Routing designs~\cite{or-ih96,or-jsac98,or-discex00,or-pet00} by adding -perfect forward secrecy, congestion control, directory servers, data -integrity, configurable exit policies, and location-hidden services using -rendezvous points. Tor works on the real-world Internet, requires no special -privileges or kernel modifications, requires little synchronization or -coordination between nodes, and provides a reasonable trade-off between -anonymity, usability, and efficiency. - -We deployed the public Tor network in October 2003; since then it has -grown to over a hundred volunteer-operated nodes -and as much as 80 megabits of -average traffic per second. Tor's research strategy has focused on deploying -a network to as many users as possible; thus, we have resisted designs that -would compromise deployability by imposing high resource demands on node -operators, and designs that would compromise usability by imposing -unacceptable restrictions on which applications we support. Although this -strategy has -drawbacks (including a weakened threat model, as discussed below), it has -made it possible for Tor to serve many thousands of users and attract -funding from diverse sources whose goals range from security on a -national scale down to individual liberties. - -In~\cite{tor-design} we gave an overall view of Tor's -design and goals. Here we describe some policy, social, and technical -issues that we face as we continue deployment. -Rather than providing complete solutions to every problem, we -instead lay out the challenges and constraints that we have observed while -deploying Tor. In doing so, we aim to provide a research agenda -of general interest to projects attempting to build -and deploy practical, usable anonymity networks in the wild. - -%While the Tor design paper~\cite{tor-design} gives an overall view its -%design and goals, -%this paper describes the policy and technical issues that Tor faces as -%we continue deployment. Rather than trying to provide complete solutions -%to every problem here, we lay out the assumptions and constraints -%that we have observed through deploying Tor in the wild. In doing so, we -%aim to create a research agenda for others to -%help in addressing these issues. -% Section~\ref{sec:what-is-tor} gives an -%overview of the Tor -%design and ours goals. Sections~\ref{sec:crossroads-policy} -%and~\ref{sec:crossroads-design} go on to describe the practical challenges, -%both policy and technical respectively, -%that stand in the way of moving -%from a practical useful network to a practical useful anonymous network. - -%\section{What Is Tor} -\section{Background} -Here we give a basic overview of the Tor design and its properties, and -compare Tor to other low-latency anonymity designs. - -\subsection{Tor, threat models, and distributed trust} -\label{sec:what-is-tor} - -%Here we give a basic overview of the Tor design and its properties. For -%details on the design, assumptions, and security arguments, we refer -%the reader to the Tor design paper~\cite{tor-design}. - -Tor provides \emph{forward privacy}, so that users can connect to -Internet sites without revealing their logical or physical locations -to those sites or to observers. It also provides \emph{location-hidden -services}, so that servers can support authorized users without -giving an effective vector for physical or online attackers. -Tor provides these protections even when a portion of its -infrastructure is compromised. - -To connect to a remote server via Tor, the client software learns a signed -list of Tor nodes from one of several central \emph{directory servers}, and -incrementally creates a private pathway or \emph{circuit} of encrypted -connections through authenticated Tor nodes on the network, negotiating a -separate set of encryption keys for each hop along the circuit. The circuit -is extended one node at a time, and each node along the way knows only the -immediately previous and following nodes in the circuit, so no individual Tor -node knows the complete path that each fixed-sized data packet (or -\emph{cell}) will take. -%Because each node sees no more than one hop in the -%circuit, -Thus, neither an eavesdropper nor a compromised node can -see both the connection's source and destination. Later requests use a new -circuit, to complicate long-term linkability between different actions by -a single user. - -Tor also helps servers hide their locations while -providing services such as web publishing or instant -messaging. Using ``rendezvous points'', other Tor users can -connect to these authenticated hidden services, neither one learning the -other's network identity. - -Tor attempts to anonymize the transport layer, not the application layer. -This approach is useful for applications such as SSH -where authenticated communication is desired. However, when anonymity from -those with whom we communicate is desired, -application protocols that include personally identifying information need -additional application-level scrubbing proxies, such as -Privoxy~\cite{privoxy} for HTTP\@. Furthermore, Tor does not relay arbitrary -IP packets; it only anonymizes TCP streams and DNS requests -%, and only supports -%connections via SOCKS -(but see Section~\ref{subsec:tcp-vs-ip}). - -Most node operators do not want to allow arbitrary TCP traffic. % to leave -%their server. -To address this, Tor provides \emph{exit policies} so -each exit node can block the IP addresses and ports it is unwilling to allow. -Tor nodes advertise their exit policies to the directory servers, so that -clients can tell which nodes will support their connections. - -As of January 2005, the Tor network has grown to around a hundred nodes -on four continents, with a total capacity exceeding 1Gbit/s. Appendix A -shows a graph of the number of working nodes over time, as well as a -graph of the number of bytes being handled by the network over time. -The network is now sufficiently diverse for further development -and testing; but of course we always encourage new nodes -to join. - -Tor research and development has been funded by ONR and DARPA -for use in securing government -communications, and by the Electronic Frontier Foundation for use -in maintaining civil liberties for ordinary citizens online. The Tor -protocol is one of the leading choices -for the anonymizing layer in the European Union's PRIME directive to -help maintain privacy in Europe. -The AN.ON project in Germany -has integrated an independent implementation of the Tor protocol into -their popular Java Anon Proxy anonymizing client. -% This wide variety of -%interests helps maintain both the stability and the security of the -%network. - -\medskip -\noindent -{\bf Threat models and design philosophy.} -The ideal Tor network would be practical, useful and anonymous. When -trade-offs arise between these properties, Tor's research strategy has been -to remain useful enough to attract many users, -and practical enough to support them. Only subject to these -constraints do we try to maximize -anonymity.\footnote{This is not the only possible -direction in anonymity research: designs exist that provide more anonymity -than Tor at the expense of significantly increased resource requirements, or -decreased flexibility in application support (typically because of increased -latency). Such research does not typically abandon aspirations toward -deployability or utility, but instead tries to maximize deployability and -utility subject to a certain degree of structural anonymity (structural because -usability and practicality affect usage which affects the actual anonymity -provided by the network \cite{econymics,back01}).} -%{We believe that these -%approaches can be promising and useful, but that by focusing on deploying a -%usable system in the wild, Tor helps us experiment with the actual parameters -%of what makes a system ``practical'' for volunteer operators and ``useful'' -%for home users, and helps illuminate undernoticed issues which any deployed -%volunteer anonymity network will need to address.} -Because of our strategy, Tor has a weaker threat model than many designs in -the literature. In particular, because we -support interactive communications without impractically expensive padding, -we fall prey to a variety -of intra-network~\cite{back01,attack-tor-oak05,flow-correlation04} and -end-to-end~\cite{danezis:pet2004,SS03} anonymity-breaking attacks. - -Tor does not attempt to defend against a global observer. In general, an -attacker who can measure both ends of a connection through the Tor network -% I say 'measure' rather than 'observe', to encompass murdoch-danezis -% style attacks. -RD -can correlate the timing and volume of data on that connection as it enters -and leaves the network, and so link communication partners. -Known solutions to this attack would seem to require introducing a -prohibitive degree of traffic padding between the user and the network, or -introducing an unacceptable degree of latency (but see Section -\ref{subsec:mid-latency}). Also, it is not clear that these methods would -work at all against a minimally active adversary who could introduce timing -patterns or additional traffic. Thus, Tor only attempts to defend against -external observers who cannot observe both sides of a user's connections. - - -Against internal attackers who sign up Tor nodes, the situation is more -complicated. In the simplest case, if an adversary has compromised $c$ of -$n$ nodes on the Tor network, then the adversary will be able to compromise -a random circuit with probability $\frac{c^2}{n^2}$ (since the circuit -initiator chooses hops randomly). But there are -complicating factors: -(1)~If the user continues to build random circuits over time, an adversary - is pretty certain to see a statistical sample of the user's traffic, and - thereby can build an increasingly accurate profile of her behavior. (See - Section~\ref{subsec:helper-nodes} for possible solutions.) -(2)~An adversary who controls a popular service outside the Tor network - can be certain to observe all connections to that service; he - can therefore trace connections to that service with probability - $\frac{c}{n}$. -(3)~Users do not in fact choose nodes with uniform probability; they - favor nodes with high bandwidth or uptime, and exit nodes that - permit connections to their favorite services. -(See Section~\ref{subsec:routing-zones} for discussion of larger -adversaries and our dispersal goals.) - -% I'm trying to make this paragraph work without reference to the -% analysis/confirmation distinction, which we haven't actually introduced -% yet, and which we realize isn't very stable anyway. Also, I don't want to -% deprecate these attacks if we can't demonstrate that they don't work, since -% in case they *do* turn out to work well against Tor, we'll look pretty -% foolish. -NM -More powerful attacks may exist. In \cite{hintz-pet02} it was -shown that an attacker who can catalog data volumes of popular -responder destinations (say, websites with consistent data volumes) may not -need to -observe both ends of a stream to learn source-destination links for those -responders. -Similarly, latencies of going through various routes can be -cataloged~\cite{back01} to connect endpoints. -% Also, \cite{kesdogan:pet2002} takes the -% attack another level further, to narrow down where you could be -% based on an intersection attack on subpages in a website. -RD -It has not yet been shown whether these attacks will succeed or fail -in the presence of the variability and volume quantization introduced by the -Tor network, but it seems likely that these factors will at best delay -rather than halt the attacks in the cases where they succeed. -Along similar lines, the same paper suggests a ``clogging -attack'' in which the throughput on a circuit is observed to slow -down when an adversary clogs the right nodes with his own traffic. -To determine the nodes in a circuit this attack requires the ability -to continuously monitor the traffic exiting the network on a circuit -that is up long enough to probe all network nodes in binary fashion. -% Though somewhat related, clogging and interference are really different -% attacks with different assumptions about adversary distribution and -% capabilities as well as different techniques. -pfs -Murdoch and Danezis~\cite{attack-tor-oak05} show a practical -interference attack against portions of -the fifty node Tor network as deployed in mid 2004. -An outside attacker can actively trace a circuit through the Tor network -by observing changes in the latency of his -own traffic sent through various Tor nodes. This can be done -simultaneously at multiple nodes; however, like clogging, -this attack only reveals -the Tor nodes in the circuit, not initiator and responder addresses, -so it is still necessary to discover the endpoints to complete an -effective attack. Increasing the size and diversity of the Tor network may -help counter these attacks. - -%discuss $\frac{c^2}{n^2}$, except how in practice the chance of owning -%the last hop is not $c/n$ since that doesn't take the destination (website) -%into account. so in cases where the adversary does not also control the -%final destination we're in good shape, but if he *does* then we'd be better -%off with a system that lets each hop choose a path. -% -%Isn't it more accurate to say ``If the adversary _always_ controls the final -% dest, we would be just as well off with such as system.'' ? If not, why -% not? -nm -% Sure. In fact, better off, since they seem to scale more easily. -rd - -%Murdoch and Danezis describe an attack -%\cite{attack-tor-oak05} that lets an attacker determine the nodes used -%in a circuit; yet s/he cannot identify the initiator or responder, -%e.g., client or web server, through this attack. So the endpoints -%remain secure, which is the goal. It is conceivable that an -%adversary could attack or set up observation of all connections -%to an arbitrary Tor node in only a few minutes. If such an adversary -%were to exist, s/he could use this probing to remotely identify a node -%for further attack. Of more likely immediate practical concern -%an adversary with active access to the responder traffic -%wants to keep a circuit alive long enough to attack an identified -%node. Thus it is important to prevent the responding end of the circuit -%from keeping it open indefinitely. -%Also, someone could identify nodes in this way and if in their -%jurisdiction, immediately get a subpoena (if they even need one) -%telling the node operator(s) that she must retain all the active -%circuit data she now has. -%Further, the enclave model, which had previously looked to be the most -%generally secure, seems particularly threatened by this attack, since -%it identifies endpoints when they're also nodes in the Tor network: -%see Section~\ref{subsec:helper-nodes} for discussion of some ways to -%address this issue. - -\medskip -\noindent -{\bf Distributed trust.} -In practice Tor's threat model is based on -dispersal and diversity. -Our defense lies in having a diverse enough set of nodes -to prevent most real-world -adversaries from being in the right places to attack users, -by distributing each transaction -over several nodes in the network. This ``distributed trust'' approach -means the Tor network can be safely operated and used by a wide variety -of mutually distrustful users, providing sustainability and security. -%than some previous attempts at anonymizing networks. - -No organization can achieve this security on its own. If a single -corporation or government agency were to build a private network to -protect its operations, any connections entering or leaving that network -would be obviously linkable to the controlling organization. The members -and operations of that agency would be easier, not harder, to distinguish. - -Instead, to protect our networks from traffic analysis, we must -collaboratively blend the traffic from many organizations and private -citizens, so that an eavesdropper can't tell which users are which, -and who is looking for what information. %By bringing more users onto -%the network, all users become more secure~\cite{econymics}. -%[XXX I feel uncomfortable saying this last sentence now. -RD] -%[So, I took it out. I think we can do without it. -PFS] -The Tor network has a broad range of users, including ordinary citizens -concerned about their privacy, corporations -who don't want to reveal information to their competitors, and law -enforcement and government intelligence agencies who need -to do operations on the Internet without being noticed. -Naturally, organizations will not want to depend on others for their -security. If most participating providers are reliable, Tor tolerates -some hostile infiltration of the network. For maximum protection, -the Tor design includes an enclave approach that lets data be encrypted -(and authenticated) end-to-end, so high-sensitivity users can be sure it -hasn't been read or modified. This even works for Internet services that -don't have built-in encryption and authentication, such as unencrypted -HTTP or chat, and it requires no modification of those services. - -\subsection{Related work} -Tor differs from other deployed systems for traffic analysis resistance -in its security and flexibility. Mix networks such as -Mixmaster~\cite{mixmaster-spec} or its successor Mixminion~\cite{minion-design} -gain the highest degrees of anonymity at the expense of introducing highly -variable delays, making them unsuitable for applications such as web -browsing. Commercial single-hop -proxies~\cite{anonymizer} can provide good performance, but -a single compromise can expose all users' traffic, and a single-point -eavesdropper can perform traffic analysis on the entire network. -%Also, their proprietary implementations place any infrastructure that -%depends on these single-hop solutions at the mercy of their providers' -%financial health as well as network security. -The Java -Anon Proxy~\cite{web-mix} provides similar functionality to Tor but -handles only web browsing rather than all TCP\@. -%Some peer-to-peer file-sharing overlay networks such as -%Freenet~\cite{freenet} and Mute~\cite{mute} -The Freedom -network from Zero-Knowledge Systems~\cite{freedom21-security} -was even more flexible than Tor in -transporting arbitrary IP packets, and also supported -pseudonymity in addition to anonymity; but it had -a different approach to sustainability (collecting money from users -and paying ISPs to run Tor nodes), and was eventually shut down due to financial -load. Finally, %potentially more scalable -% [I had added 'potentially' because the scalability of these designs -% is not established, and I am uncomfortable making the -% bolder unmodified assertion. Roger took 'potentially' out. -% Here's an attempt at more neutral wording -pfs] -peer-to-peer designs that are intended to be more scalable, -for example Tarzan~\cite{tarzan:ccs02} and -MorphMix~\cite{morphmix:fc04}, have been proposed in the literature but -have not been fielded. These systems differ somewhat -in threat model and presumably practical resistance to threats. -Note that MorphMix differs from Tor only in -node discovery and circuit setup; so Tor's architecture is flexible -enough to contain a MorphMix experiment. -We direct the interested reader -to~\cite{tor-design} for a more in-depth review of related work. - -%XXXX six-four. crowds. i2p. - -%XXXX -%have a serious discussion of morphmix's assumptions, since they would -%seem to be the direct competition. in fact tor is a flexible architecture -%that would encompass morphmix, and they're nearly identical except for -%path selection and node discovery. and the trust system morphmix has -%seems overkill (and/or insecure) based on the threat model we've picked. -% this para should probably move to the scalability / directory system. -RD -% Nope. Cut for space, except for small comment added above -PFS - -\section{Social challenges} - -Many of the issues the Tor project needs to address extend beyond -system design and technology development. In particular, the -Tor project's \emph{image} with respect to its users and the rest of -the Internet impacts the security it can provide. -With this image issue in mind, this section discusses the Tor user base and -Tor's interaction with other services on the Internet. - -\subsection{Communicating security} - -Usability for anonymity systems -contributes to their security, because usability -affects the possible anonymity set~\cite{econymics,back01}. -Conversely, an unusable system attracts few users and thus can't provide -much anonymity. - -This phenomenon has a second-order effect: knowing this, users should -choose which anonymity system to use based in part on how usable -and secure -\emph{others} will find it, in order to get the protection of a larger -anonymity set. Thus we might supplement the adage ``usability is a security -parameter''~\cite{back01} with a new one: ``perceived usability is a -security parameter.'' From here we can better understand the effects -of publicity on security: the more convincing your -advertising, the more likely people will believe you have users, and thus -the more users you will attract. Perversely, over-hyped systems (if they -are not too broken) may be a better choice than modestly promoted ones, -if the hype attracts more users~\cite{usability-network-effect}. - -So it follows that we should come up with ways to accurately communicate -the available security levels to the user, so she can make informed -decisions. JAP aims to do this by including a -comforting `anonymity meter' dial in the software's graphical interface, -giving the user an impression of the level of protection for her current -traffic. - -However, there's a catch. For users to share the same anonymity set, -they need to act like each other. An attacker who can distinguish -a given user's traffic from the rest of the traffic will not be -distracted by anonymity set size. For high-latency systems like -Mixminion, where the threat model is based on mixing messages with each -other, there's an arms race between end-to-end statistical attacks and -counter-strategies~\cite{statistical-disclosure,minion-design,e2e-traffic,trickle02}. -But for low-latency systems like Tor, end-to-end \emph{traffic -correlation} attacks~\cite{danezis:pet2004,defensive-dropping,SS03} -allow an attacker who can observe both ends of a communication -to correlate packet timing and volume, quickly linking -the initiator to her destination. - -Like Tor, the current JAP implementation does not pad connections -apart from using small fixed-size cells for transport. In fact, -JAP's cascade-based network topology may be more vulnerable to these -attacks, because its network has fewer edges. JAP was born out of -the ISDN mix design~\cite{isdn-mixes}, where padding made sense because -every user had a fixed bandwidth allocation and altering the timing -pattern of packets could be immediately detected. But in its current context -as an Internet web anonymizer, adding sufficient padding to JAP -would probably be prohibitively expensive and ineffective against a -minimally active attacker.\footnote{Even if JAP could -fund higher-capacity nodes indefinitely, our experience -suggests that many users would not accept the increased per-user -bandwidth requirements, leading to an overall much smaller user base. But -see Section~\ref{subsec:mid-latency}.} Therefore, since under this threat -model the number of concurrent users does not seem to have much impact -on the anonymity provided, we suggest that JAP's anonymity meter is not -accurately communicating security levels to its users. - -On the other hand, while the number of active concurrent users may not -matter as much as we'd like, it still helps to have some other users -on the network. We investigate this issue next. - -\subsection{Reputability and perceived social value} -Another factor impacting the network's security is its reputability: -the perception of its social value based on its current user base. If Alice is -the only user who has ever downloaded the software, it might be socially -accepted, but she's not getting much anonymity. Add a thousand -activists, and she's anonymous, but everyone thinks she's an activist too. -Add a thousand -diverse citizens (cancer survivors, privacy enthusiasts, and so on) -and now she's harder to profile. - -Furthermore, the network's reputability affects its operator base: more people -are willing to run a service if they believe it will be used by human rights -workers than if they believe it will be used exclusively for disreputable -ends. This effect becomes stronger if node operators themselves think they -will be associated with their users' disreputable ends. - -So the more cancer survivors on Tor, the better for the human rights -activists. The more malicious hackers, the worse for the normal users. Thus, -reputability is an anonymity issue for two reasons. First, it impacts -the sustainability of the network: a network that's always about to be -shut down has difficulty attracting and keeping adequate nodes. -Second, a disreputable network is more vulnerable to legal and -political attacks, since it will attract fewer supporters. - -While people therefore have an incentive for the network to be used for -``more reputable'' activities than their own, there are still trade-offs -involved when it comes to anonymity. To follow the above example, a -network used entirely by cancer survivors might welcome file sharers -onto the network, though of course they'd prefer a wider -variety of users. - -Reputability becomes even more tricky in the case of privacy networks, -since the good uses of the network (such as publishing by journalists in -dangerous countries) are typically kept private, whereas network abuses -or other problems tend to be more widely publicized. - -The impact of public perception on security is especially important -during the bootstrapping phase of the network, where the first few -widely publicized uses of the network can dictate the types of users it -attracts next. -As an example, some U.S.~Department of Energy -penetration testing engineers are tasked with compromising DoE computers -from the outside. They only have a limited number of ISPs from which to -launch their attacks, and they found that the defenders were recognizing -attacks because they came from the same IP space. These engineers wanted -to use Tor to hide their tracks. First, from a technical standpoint, -Tor does not support the variety of IP packets one would like to use in -such attacks (see Section~\ref{subsec:tcp-vs-ip}). But aside from this, -we also decided that it would probably be poor precedent to encourage -such use---even legal use that improves national security---and managed -to dissuade them. - -%% "outside of academia, jap has just lost, permanently". (That is, -%% even though the crime detection issues are resolved and are unlikely -%% to go down the same way again, public perception has not been kind.) - -\subsection{Sustainability and incentives} -One of the unsolved problems in low-latency anonymity designs is -how to keep the nodes running. ZKS's Freedom network -depended on paying third parties to run its servers; the JAP project's -bandwidth depends on grants to pay for its bandwidth and -administrative expenses. In Tor, bandwidth and administrative costs are -distributed across the volunteers who run Tor nodes, so we at least have -reason to think that the Tor network could survive without continued research -funding.\footnote{It also helps that Tor is implemented with free and open - source software that can be maintained by anybody with the ability and - inclination.} But why are these volunteers running nodes, and what can we -do to encourage more volunteers to do so? - -We have not formally surveyed Tor node operators to learn why they are -running nodes, but -from the information they have provided, it seems that many of them run Tor -nodes for reasons of personal interest in privacy issues. It is possible -that others are running Tor nodes to protect their own -anonymity, but of course they are -hardly likely to tell us specifics if they are. -%Significantly, Tor's threat model changes the anonymity incentives for running -%a node. In a high-latency mix network, users can receive additional -%anonymity by running their own node, since doing so obscures when they are -%injecting messages into the network. But, anybody observing all I/O to a Tor -%node can tell when the node is generating traffic that corresponds to -%none of its incoming traffic. -% -%I didn't buy the above for reason's subtle enough that I just cut it -PFS -Tor exit node operators do attain a degree of -``deniability'' for traffic that originates at that exit node. For - example, it is likely in practice that HTTP requests from a Tor node's IP - will be assumed to be from the Tor network. - More significantly, people and organizations who use Tor for - anonymity depend on the - continued existence of the Tor network to do so; running a node helps to - keep the network operational. -%\item Local Tor entry and exit nodes allow users on a network to run in an -% `enclave' configuration. [XXXX need to resolve this. They would do this -% for E2E encryption + auth?] - - -%We must try to make the costs of running a Tor node easily minimized. -Since Tor is run by volunteers, the most crucial software usability issue is -usability by operators: when an operator leaves, the network becomes less -usable by everybody. To keep operators pleased, we must try to keep Tor's -resource and administrative demands as low as possible. - -Because of ISP billing structures, many Tor operators have underused capacity -that they are willing to donate to the network, at no additional monetary -cost to them. Features to limit bandwidth have been essential to adoption. -Also useful has been a ``hibernation'' feature that allows a Tor node that -wants to provide high bandwidth, but no more than a certain amount in a -giving billing cycle, to become dormant once its bandwidth is exhausted, and -to reawaken at a random offset into the next billing cycle. This feature has -interesting policy implications, however; see -the next section below. -Exit policies help to limit administrative costs by limiting the frequency of -abuse complaints (see Section~\ref{subsec:tor-and-blacklists}). We discuss -technical incentive mechanisms in Section~\ref{subsec:incentives-by-design}. - -%[XXXX say more. Why else would you run a node? What else can we do/do we -% already do to make running a node more attractive?] -%[We can enforce incentives; see Section 6.1. We can rate-limit clients. -% We can put "top bandwidth nodes lists" up a la seti@home.] - -\subsection{Bandwidth and file-sharing} -\label{subsec:bandwidth-and-file-sharing} -%One potentially problematical area with deploying Tor has been our response -%to file-sharing applications. -Once users have configured their applications to work with Tor, the largest -remaining usability issue is performance. Users begin to suffer -when websites ``feel slow.'' -Clients currently try to build their connections through nodes that they -guess will have enough bandwidth. But even if capacity is allocated -optimally, it seems unlikely that the current network architecture will have -enough capacity to provide every user with as much bandwidth as she would -receive if she weren't using Tor, unless far more nodes join the network. - -%Limited capacity does not destroy the network, however. Instead, usage tends -%towards an equilibrium: when performance suffers, users who value performance -%over anonymity tend to leave the system, thus freeing capacity until the -%remaining users on the network are exactly those willing to use that capacity -%there is. - -Much of Tor's recent bandwidth difficulties have come from file-sharing -applications. These applications provide two challenges to -any anonymizing network: their intensive bandwidth requirement, and the -degree to which they are associated (correctly or not) with copyright -infringement. - -High-bandwidth protocols can make the network unresponsive, -but tend to be somewhat self-correcting as lack of bandwidth drives away -users who need it. Issues of copyright violation, -however, are more interesting. Typical exit node operators want to help -people achieve private and anonymous speech, not to help people (say) host -Vin Diesel movies for download; and typical ISPs would rather not -deal with customers who draw menacing letters -from the MPAA\@. While it is quite likely that the operators are doing nothing -illegal, many ISPs have policies of dropping users who get repeated legal -threats regardless of the merits of those threats, and many operators would -prefer to avoid receiving even meritless legal threats. -So when letters arrive, operators are likely to face -pressure to block file-sharing applications entirely, in order to avoid the -hassle. - -But blocking file-sharing is not easy: popular -protocols have evolved to run on non-standard ports to -get around other port-based bans. Thus, exit node operators who want to -block file-sharing would have to find some way to integrate Tor with a -protocol-aware exit filter. This could be a technically expensive -undertaking, and one with poor prospects: it is unlikely that Tor exit nodes -would succeed where so many institutional firewalls have failed. Another -possibility for sensitive operators is to run a restrictive node that -only permits exit connections to a restricted range of ports that are -not frequently associated with file sharing. There are increasingly few such -ports. - -Other possible approaches might include rate-limiting connections, especially -long-lived connections or connections to file-sharing ports, so that -high-bandwidth connections do not flood the network. We might also want to -give priority to cells on low-bandwidth connections to keep them interactive, -but this could have negative anonymity implications. - -For the moment, it seems that Tor's bandwidth issues have rendered it -unattractive for bulk file-sharing traffic; this may continue to be so in the -future. Nevertheless, Tor will likely remain attractive for limited use in -file-sharing protocols that have separate control and data channels. - -%[We should say more -- but what? That we'll see a similar -% equilibriating effect as with bandwidth, where sensitive ops switch to -% middleman, and we become less useful for file-sharing, so the file-sharing -% people back off, so we get more ops since there's less file-sharing, so the -% file-sharers come back, etc.] - -%XXXX -%in practice, plausible deniability is hypothetical and doesn't seem very -%convincing. if ISPs find the activity antisocial, they don't care *why* -%your computer is doing that behavior. - -\subsection{Tor and blacklists} -\label{subsec:tor-and-blacklists} - -It was long expected that, alongside legitimate users, Tor would also -attract troublemakers who exploit Tor to abuse services on the -Internet with vandalism, rude mail, and so on. -Our initial answer to this situation was to use ``exit policies'' -to allow individual Tor nodes to block access to specific IP/port ranges. -This approach aims to make operators more willing to run Tor by allowing -them to prevent their nodes from being used for abusing particular -services. For example, all Tor nodes currently block SMTP (port 25), -to avoid being used for spam. - -Exit policies are useful, but they are insufficient: if not all nodes -block a given service, that service may try to block Tor instead. -While being blockable is important to being good netizens, we would like -to encourage services to allow anonymous access. Services should not -need to decide between blocking legitimate anonymous use and allowing -unlimited abuse. - -This is potentially a bigger problem than it may appear. -On the one hand, services should be allowed to refuse connections from -sources of possible abuse. -But when a Tor node administrator decides whether he prefers to be able -to post to Wikipedia from his IP address, or to allow people to read -Wikipedia anonymously through his Tor node, he is making the decision -for others as well. (For a while, Wikipedia -blocked all posting from all Tor nodes based on IP addresses.) If -the Tor node shares an address with a campus or corporate NAT, -then the decision can prevent the entire population from posting. -This is a loss for both Tor -and Wikipedia: we don't want to compete for (or divvy up) the -NAT-protected entities of the world. - -Worse, many IP blacklists are coarse-grained: they ignore Tor's exit -policies, partly because it's easier to implement and partly -so they can punish -all Tor nodes. One IP blacklist even bans -every class C network that contains a Tor node, and recommends banning SMTP -from these networks even though Tor does not allow SMTP at all. This -strategic decision aims to discourage the -operation of anything resembling an open proxy by encouraging its neighbors -to shut it down to get unblocked themselves. This pressure even -affects Tor nodes running in middleman mode (disallowing all exits) when -those nodes are blacklisted too. - -Problems of abuse occur mainly with services such as IRC networks and -Wikipedia, which rely on IP blocking to ban abusive users. While at first -blush this practice might seem to depend on the anachronistic assumption that -each IP is an identifier for a single user, it is actually more reasonable in -practice: it assumes that non-proxy IPs are a costly resource, and that an -abuser can not change IPs at will. By blocking IPs which are used by Tor -nodes, open proxies, and service abusers, these systems hope to make -ongoing abuse difficult. Although the system is imperfect, it works -tolerably well for them in practice. - -Of course, we would prefer that legitimate anonymous users be able to -access abuse-prone services. One conceivable approach would require -would-be IRC users, for instance, to register accounts if they want to -access the IRC network from Tor. In practice this would not -significantly impede abuse if creating new accounts were easily automatable; -this is why services use IP blocking. To deter abuse, pseudonymous -identities need to require a significant switching cost in resources or human -time. Some popular webmail applications -impose cost with Reverse Turing Tests, but this step may not deter all -abusers. Freedom used blind signatures to limit -the number of pseudonyms for each paying account, but Tor has neither the -ability nor the desire to collect payment. - -We stress that as far as we can tell, most Tor uses are not -abusive. Most services have not complained, and others are actively -working to find ways besides banning to cope with the abuse. For example, -the Freenode IRC network had a problem with a coordinated group of -abusers joining channels and subtly taking over the conversation; but -when they labelled all users coming from Tor IPs as ``anonymous users,'' -removing the ability of the abusers to blend in, the abuse stopped. - -%The use of squishy IP-based ``authentication'' and ``authorization'' -%has not broken down even to the level that SSNs used for these -%purposes have in commercial and public record contexts. Externalities -%and misplaced incentives cause a continued focus on fighting identity -%theft by protecting SSNs rather than developing better authentication -%and incentive schemes \cite{price-privacy}. Similarly we can expect a -%continued use of identification by IP number as long as there is no -%workable alternative. - -%[XXX Mention correct DNS-RBL implementation. -NM] - -\section{Design choices} - -In addition to social issues, Tor also faces some design trade-offs that must -be investigated as the network develops. - -\subsection{Transporting the stream vs transporting the packets} -\label{subsec:stream-vs-packet} -\label{subsec:tcp-vs-ip} - -Tor transports streams; it does not tunnel packets. -It has often been suggested that like the old Freedom -network~\cite{freedom21-security}, Tor should -``obviously'' anonymize IP traffic -at the IP layer. Before this could be done, many issues need to be resolved: - -\begin{enumerate} -\setlength{\itemsep}{0mm} -\setlength{\parsep}{0mm} -\item \emph{IP packets reveal OS characteristics.} We would still need to do -IP-level packet normalization, to stop things like TCP fingerprinting -attacks. %There likely exist libraries that can help with this. -This is unlikely to be a trivial task, given the diversity and complexity of -TCP stacks. -\item \emph{Application-level streams still need scrubbing.} We still need -Tor to be easy to integrate with user-level application-specific proxies -such as Privoxy. So it's not just a matter of capturing packets and -anonymizing them at the IP layer. -\item \emph{Certain protocols will still leak information.} For example, we -must rewrite DNS requests so they are delivered to an unlinkable DNS server -rather than the DNS server at a user's ISP; thus, we must understand the -protocols we are transporting. -\item \emph{The crypto is unspecified.} First we need a block-level encryption -approach that can provide security despite -packet loss and out-of-order delivery. Freedom allegedly had one, but it was -never publicly specified. -Also, TLS over UDP is not yet implemented or -specified, though some early work has begun~\cite{dtls}. -\item \emph{We'll still need to tune network parameters.} Since the above -encryption system will likely need sequence numbers (and maybe more) to do -replay detection, handle duplicate frames, and so on, we will be reimplementing -a subset of TCP anyway---a notoriously tricky path. -\item \emph{Exit policies for arbitrary IP packets mean building a secure -IDS\@.} Our node operators tell us that exit policies are one of -the main reasons they're willing to run Tor. -Adding an Intrusion Detection System to handle exit policies would -increase the security complexity of Tor, and would likely not work anyway, -as evidenced by the entire field of IDS and counter-IDS papers. Many -potential abuse issues are resolved by the fact that Tor only transports -valid TCP streams (as opposed to arbitrary IP including malformed packets -and IP floods), so exit policies become even \emph{more} important as -we become able to transport IP packets. We also need to compactly -describe exit policies so clients can predict -which nodes will allow which packets to exit. -\item \emph{The Tor-internal name spaces would need to be redesigned.} We -support hidden service {\tt{.onion}} addresses (and other special addresses, -like {\tt{.exit}} which lets the user request a particular exit node), -by intercepting the addresses when they are passed to the Tor client. -Doing so at the IP level would require a more complex interface between -Tor and the local DNS resolver. -\end{enumerate} - -This list is discouragingly long, but being able to transport more -protocols obviously has some advantages. It would be good to learn which -items are actual roadblocks and which are easier to resolve than we think. - -To be fair, Tor's stream-based approach has run into -stumbling blocks as well. While Tor supports the SOCKS protocol, -which provides a standardized interface for generic TCP proxies, many -applications do not support SOCKS\@. For them we already need to -replace the networking system calls with SOCKS-aware -versions, or run a SOCKS tunnel locally, neither of which is -easy for the average user. %---even with good instructions. -Even when applications can use SOCKS, they often make DNS requests -themselves before handing an IP address to Tor, which advertises -where the user is about to connect. -We are still working on more usable solutions. - -%So to actually provide good anonymity, we need to make sure that -%users have a practical way to use Tor anonymously. Possibilities include -%writing wrappers for applications to anonymize them automatically; improving -%the applications' support for SOCKS; writing libraries to help application -%writers use Tor properly; and implementing a local DNS proxy to reroute DNS -%requests to Tor so that applications can simply point their DNS resolvers at -%localhost and continue to use SOCKS for data only. - -\subsection{Mid-latency} -\label{subsec:mid-latency} - -Some users need to resist traffic correlation attacks. Higher-latency -mix-networks introduce variability into message -arrival times: as timing variance increases, timing correlation attacks -require increasingly more data~\cite{e2e-traffic}. Can we improve Tor's -resistance without losing too much usability? - -We need to learn whether we can trade a small increase in latency -for a large anonymity increase, or if we'd end up trading a lot of -latency for only a minimal security gain. A trade-off might be worthwhile -even if we -could only protect certain use cases, such as infrequent short-duration -transactions. % To answer this question -We might adapt the techniques of~\cite{e2e-traffic} to a lower-latency mix -network, where the messages are batches of cells in temporally clustered -connections. These large fixed-size batches can also help resist volume -signature attacks~\cite{hintz-pet02}. We could also experiment with traffic -shaping to get a good balance of throughput and security. -%Other padding regimens might supplement the -%mid-latency option; however, we should continue the caution with which -%we have always approached padding lest the overhead cost us too much -%performance or too many volunteers. - -We must keep usability in mind too. How much can latency increase -before we drive users away? We've already been forced to increase -latency slightly, as our growing network incorporates more DSL and -cable-modem nodes and more nodes in distant continents. Perhaps we can -harness this increased latency to improve anonymity rather than just -reduce usability. Further, if we let clients label certain circuits as -mid-latency as they are constructed, we could handle both types of traffic -on the same network, giving users a choice between speed and security---and -giving researchers a chance to experiment with parameters to improve the -quality of those choices. - -\subsection{Enclaves and helper nodes} -\label{subsec:helper-nodes} - -It has long been thought that users can improve their anonymity by -running their own node~\cite{tor-design,or-ih96,or-pet00}, and using -it in an \emph{enclave} configuration, where all their circuits begin -at the node under their control. Running Tor clients or servers at -the enclave perimeter is useful when policy or other requirements -prevent individual machines within the enclave from running Tor -clients~\cite{or-jsac98,or-discex00}. - -Of course, Tor's default path length of -three is insufficient for these enclaves, since the entry and/or exit -% [edit war: without the ``and/'' the natural reading here -% is aut rather than vel. And the use of the plural verb does not work -pfs] -themselves are sensitive. Tor thus increments path length by one -for each sensitive endpoint in the circuit. -Enclaves also help to protect against end-to-end attacks, since it's -possible that traffic coming from the node has simply been relayed from -elsewhere. However, if the node has recognizable behavior patterns, -an attacker who runs nodes in the network can triangulate over time to -gain confidence that it is in fact originating the traffic. Wright et -al.~\cite{wright03} introduce the notion of a \emph{helper node}---a -single fixed entry node for each user---to combat this \emph{predecessor -attack}. - -However, the attack in~\cite{attack-tor-oak05} shows that simply adding -to the path length, or using a helper node, may not protect an enclave -node. A hostile web server can send constant interference traffic to -all nodes in the network, and learn which nodes are involved in the -circuit (though at least in the current attack, he can't learn their -order). Using randomized path lengths may help some, since the attacker -will never be certain he has identified all nodes in the path unless -he probes the entire network, but as -long as the network remains small this attack will still be feasible. - -Helper nodes also aim to help Tor clients, because choosing entry and exit -points -randomly and changing them frequently allows an attacker who controls -even a few nodes to eventually link some of their destinations. The goal -is to take the risk once and for all about choosing a bad entry node, -rather than taking a new risk for each new circuit. (Choosing fixed -exit nodes is less useful, since even an honest exit node still doesn't -protect against a hostile website.) But obstacles remain before -we can implement helper nodes. -For one, the literature does not describe how to choose helpers from a list -of nodes that changes over time. If Alice is forced to choose a new entry -helper every $d$ days and $c$ of the $n$ nodes are bad, she can expect -to choose a compromised node around -every $dc/n$ days. Statistically over time this approach only helps -if she is better at choosing honest helper nodes than at choosing -honest nodes. Worse, an attacker with the ability to DoS nodes could -force users to switch helper nodes more frequently, or remove -other candidate helpers. - -%Do general DoS attacks have anonymity implications? See e.g. Adam -%Back's IH paper, but I think there's more to be pointed out here. -RD -% Not sure what you want to say here. -NM - -%Game theory for helper nodes: if Alice offers a hidden service on a -%server (enclave model), and nobody ever uses helper nodes, then against -%George+Steven's attack she's totally nailed. If only Alice uses a helper -%node, then she's still identified as the source of the data. If everybody -%uses a helper node (including Alice), then the attack identifies the -%helper node and also Alice, and knows which one is which. If everybody -%uses a helper node (but not Alice), then the attacker figures the real -%source was a client that is using Alice as a helper node. [How's my -%logic here?] -RD -% -% Not sure about the logic. For the attack to work with helper nodes, the -%attacker needs to guess that Alice is running the hidden service, right? -%Otherwise, how can he know to measure her traffic specifically? -NM -% -% In the Murdoch-Danezis attack, the adversary measures all servers. -RD - -%point to routing-zones section re: helper nodes to defend against -%big stuff. - -\subsection{Location-hidden services} -\label{subsec:hidden-services} - -% This section is first up against the wall when the revolution comes. - -Tor's \emph{rendezvous points} -let users provide TCP services to other Tor users without revealing -the service's location. Since this feature is relatively recent, we describe -here -a couple of our early observations from its deployment. - -First, our implementation of hidden services seems less hidden than we'd -like, since they build a different rendezvous circuit for each user, -and an external adversary can induce them to -produce traffic. This insecurity means that they may not be suitable as -a building block for Free Haven~\cite{freehaven-berk} or other anonymous -publishing systems that aim to provide long-term security, though helper -nodes, as discussed above, would seem to help. - -\emph{Hot-swap} hidden services, where more than one location can -provide the service and loss of any one location does not imply a -change in service, would help foil intersection and observation attacks -where an adversary monitors availability of a hidden service and also -monitors whether certain users or servers are online. The design -challenges in providing such services without otherwise compromising -the hidden service's anonymity remain an open problem; -however, see~\cite{move-ndss05}. - -In practice, hidden services are used for more than just providing private -access to a web server or IRC server. People are using hidden services -as a poor man's VPN and firewall-buster. Many people want to be able -to connect to the computers in their private network via secure shell, -and rather than playing with dyndns and trying to pierce holes in their -firewall, they run a hidden service on the inside and then rendezvous -with that hidden service externally. - -News sites like Bloggers Without Borders (www.b19s.org) are advertising -a hidden-service address on their front page. Doing this can provide -increased robustness if they use the dual-IP approach we describe -in~\cite{tor-design}, -but in practice they do it to increase visibility -of the Tor project and their support for privacy, and to offer -a way for their users, using unmodified software, to get end-to-end -encryption and authentication to their website. - -\subsection{Location diversity and ISP-class adversaries} -\label{subsec:routing-zones} - -Anonymity networks have long relied on diversity of node location for -protection against attacks---typically an adversary who can observe a -larger fraction of the network can launch a more effective attack. One -way to achieve dispersal involves growing the network so a given adversary -sees less. Alternately, we can arrange the topology so traffic can enter -or exit at many places (for example, by using a free-route network -like Tor rather than a cascade network like JAP). Lastly, we can use -distributed trust to spread each transaction over multiple jurisdictions. -But how do we decide whether two nodes are in related locations? - -Feamster and Dingledine defined a \emph{location diversity} metric -in~\cite{feamster:wpes2004}, and began investigating a variant of location -diversity based on the fact that the Internet is divided into thousands of -independently operated networks called {\em autonomous systems} (ASes). -The key insight from their paper is that while we typically think of a -connection as going directly from the Tor client to the first Tor node, -actually it traverses many different ASes on each hop. An adversary at -any of these ASes can monitor or influence traffic. Specifically, given -plausible initiators and recipients, and given random path selection, -some ASes in the simulation were able to observe 10\% to 30\% of the -transactions (that is, learn both the origin and the destination) on -the deployed Tor network (33 nodes as of June 2004). - -The paper concludes that for best protection against the AS-level -adversary, nodes should be in ASes that have the most links to other ASes: -Tier-1 ISPs such as AT\&T and Abovenet. Further, a given transaction -is safest when it starts or ends in a Tier-1 ISP\@. Therefore, assuming -initiator and responder are both in the U.S., it actually \emph{hurts} -our location diversity to use far-flung nodes in -continents like Asia or South America. -% it's not just entering or exiting from them. using them as the middle -% hop reduces your effective path length, which you presumably don't -% want because you chose that path length for a reason. -% -% Not sure I buy that argument. Two end nodes in the right ASs to -% discourage linking are still not known to each other. If some -% adversary in a single AS can bridge the middle node, it shouldn't -% therefore be able to identify initiator or responder; although it could -% contribute to further attacks given more assumptions. -% Nonetheless, no change to the actual text for now. - -Many open questions remain. First, it will be an immense engineering -challenge to get an entire BGP routing table to each Tor client, or to -summarize it sufficiently. Without a local copy, clients won't be -able to safely predict what ASes will be traversed on the various paths -through the Tor network to the final destination. Tarzan~\cite{tarzan:ccs02} -and MorphMix~\cite{morphmix:fc04} suggest that we compare IP prefixes to -determine location diversity; but the above paper showed that in practice -many of the Mixmaster nodes that share a single AS have entirely different -IP prefixes. When the network has scaled to thousands of nodes, does IP -prefix comparison become a more useful approximation? % Alternatively, can -%relevant parts of the routing tables be summarized centrally and delivered to -%clients in a less verbose format? -%% i already said "or to summarize is sufficiently" above. is that not -%% enough? -RD -% -Second, we can take advantage of caching certain content at the -exit nodes, to limit the number of requests that need to leave the -network at all. What about taking advantage of caches like Akamai or -Google~\cite{shsm03}? (Note that they're also well-positioned as global -adversaries.) -% -Third, if we follow the recommendations in~\cite{feamster:wpes2004} - and tailor path selection -to avoid choosing endpoints in similar locations, how much are we hurting -anonymity against larger real-world adversaries who can take advantage -of knowing our algorithm? -% -Fourth, can we use this knowledge to figure out which gaps in our network -most affect our robustness to this class of attack, and go recruit -new nodes with those ASes in mind? - -%Tor's security relies in large part on the dispersal properties of its -%network. We need to be more aware of the anonymity properties of various -%approaches so we can make better design decisions in the future. - -\subsection{The Anti-censorship problem} -\label{subsec:china} - -Citizens in a variety of countries, such as most recently China and -Iran, are blocked from accessing various sites outside -their country. These users try to find any tools available to allow -them to get around these firewalls. Some anonymity networks, such as -Six-Four~\cite{six-four}, are designed specifically with this goal in -mind; others like the Anonymizer~\cite{anonymizer} are paid by sponsors -such as Voice of America to encourage Internet -freedom. Even though Tor wasn't -designed with ubiquitous access to the network in mind, thousands of -users across the world are now using it for exactly this purpose. -% Academic and NGO organizations, peacefire, \cite{berkman}, etc - -Anti-censorship networks hoping to bridge country-level blocks face -a variety of challenges. One of these is that they need to find enough -exit nodes---servers on the `free' side that are willing to relay -traffic from users to their final destinations. Anonymizing -networks like Tor are well-suited to this task since we have -already gathered a set of exit nodes that are willing to tolerate some -political heat. - -The other main challenge is to distribute a list of reachable relays -to the users inside the country, and give them software to use those relays, -without letting the censors also enumerate this list and block each -relay. Anonymizer solves this by buying lots of seemingly-unrelated IP -addresses (or having them donated), abandoning old addresses as they are -`used up,' and telling a few users about the new ones. Distributed -anonymizing networks again have an advantage here, in that we already -have tens of thousands of separate IP addresses whose users might -volunteer to provide this service since they've already installed and use -the software for their own privacy~\cite{koepsell:wpes2004}. Because -the Tor protocol separates routing from network discovery \cite{tor-design}, -volunteers could configure their Tor clients -to generate node descriptors and send them to a special directory -server that gives them out to dissidents who need to get around blocks. - -Of course, this still doesn't prevent the adversary -from enumerating and preemptively blocking the volunteer relays. -Perhaps a tiered-trust system could be built where a few individuals are -given relays' locations. They could then recommend other individuals -by telling them -those addresses, thus providing a built-in incentive to avoid letting the -adversary intercept them. Max-flow trust algorithms~\cite{advogato} -might help to bound the number of IP addresses leaked to the adversary. Groups -like the W3C are looking into using Tor as a component in an overall system to -help address censorship; we wish them success. - -%\cite{infranet} - -\section{Scaling} -\label{sec:scaling} - -Tor is running today with hundreds of nodes and tens of thousands of -users, but it will certainly not scale to millions. -Scaling Tor involves four main challenges. First, to get a -large set of nodes, we must address incentives for -users to carry traffic for others. Next is safe node discovery, both -while bootstrapping (Tor clients must robustly find an initial -node list) and later (Tor clients must learn about a fair sample -of honest nodes and not let the adversary control circuits). -We must also detect and handle node speed and reliability as the network -becomes increasingly heterogeneous: since the speed and reliability -of a circuit is limited by its worst link, we must learn to track and -predict performance. Finally, we must stop assuming that all points on -the network can connect to all other points. - -\subsection{Incentives by Design} -\label{subsec:incentives-by-design} - -There are three behaviors we need to encourage for each Tor node: relaying -traffic; providing good throughput and reliability while doing it; -and allowing traffic to exit the network from that node. - -We encourage these behaviors through \emph{indirect} incentives: that -is, by designing the system and educating users in such a way that users -with certain goals will choose to relay traffic. One -main incentive for running a Tor node is social: volunteers -altruistically donate their bandwidth and time. We encourage this with -public rankings of the throughput and reliability of nodes, much like -seti@home. We further explain to users that they can get -deniability for any traffic emerging from the same address as a Tor -exit node, and they can use their own Tor node -as an entry or exit point with confidence that it's not run by an adversary. -Further, users may run a node simply because they need such a network -to be persistently available and usable, and the value of supporting this -exceeds any countervening costs. -Finally, we can encourage operators by improving the usability and feature -set of the software: -rate limiting support and easy packaging decrease the hassle of -maintaining a node, and our configurable exit policies allow each -operator to advertise a policy describing the hosts and ports to which -he feels comfortable connecting. - -To date these incentives appear to have been adequate. As the system scales -or as new issues emerge, however, we may also need to provide - \emph{direct} incentives: -providing payment or other resources in return for high-quality service. -Paying actual money is problematic: decentralized e-cash systems are -not yet practical, and a centralized collection system not only reduces -robustness, but also has failed in the past (the history of commercial -anonymizing networks is littered with failed attempts). A more promising -option is to use a tit-for-tat incentive scheme, where nodes provide better -service to nodes that have provided good service for them. - -Unfortunately, such an approach introduces new anonymity problems. -There are many surprising ways for nodes to game the incentive and -reputation system to undermine anonymity---such systems are typically -designed to encourage fairness in storage or bandwidth usage, not -fairness of provided anonymity. An adversary can attract more traffic -by performing well or can target individual users by selectively -performing, to undermine their anonymity. Typically a user who -chooses evenly from all nodes is most resistant to an adversary -targeting him, but that approach hampers the efficient use -of heterogeneous nodes. - -%When a node (call him Steve) performs well for Alice, does Steve gain -%reputation with the entire system, or just with Alice? If the entire -%system, how does Alice tell everybody about her experience in a way that -%prevents her from lying about it yet still protects her identity? If -%Steve's behavior only affects Alice's behavior, does this allow Steve to -%selectively perform only for Alice, and then break her anonymity later -%when somebody (presumably Alice) routes through his node? - -A possible solution is a simplified approach to the tit-for-tat -incentive scheme based on two rules: (1) each node should measure the -service it receives from adjacent nodes, and provide service relative -to the received service, but (2) when a node is making decisions that -affect its own security (such as building a circuit for its own -application connections), it should choose evenly from a sufficiently -large set of nodes that meet some minimum service -threshold~\cite{casc-rep}. This approach allows us to discourage -bad service -without opening Alice up as much to attacks. All of this requires -further study. - -\subsection{Trust and discovery} -\label{subsec:trust-and-discovery} - -The published Tor design is deliberately simplistic in how -new nodes are authorized and how clients are informed about Tor -nodes and their status. -All nodes periodically upload a signed description -of their locations, keys, and capabilities to each of several well-known {\it - directory servers}. These directory servers construct a signed summary -of all known Tor nodes (a ``directory''), and a signed statement of which -nodes they -believe to be operational then (a ``network status''). Clients -periodically download a directory to learn the latest nodes and -keys, and more frequently download a network status to learn which nodes are -likely to be running. Tor nodes also operate as directory caches, to -lighten the bandwidth on the directory servers. - -To prevent Sybil attacks (wherein an adversary signs up many -purportedly independent nodes to increase her network view), -this design -requires the directory server operators to manually -approve new nodes. Unapproved nodes are included in the directory, -but clients -do not use them at the start or end of their circuits. In practice, -directory administrators perform little actual verification, and tend to -approve any Tor node whose operator can compose a coherent email. -This procedure -may prevent trivial automated Sybil attacks, but will do little -against a clever and determined attacker. - -There are a number of flaws in this system that need to be addressed as we -move forward. First, -each directory server represents an independent point of failure: any -compromised directory server could start recommending only compromised -nodes. -Second, as more nodes join the network, %the more unreasonable it -%becomes to expect clients to know about them all. -directories -become infeasibly large, and downloading the list of nodes becomes -burdensome. -Third, the validation scheme may do as much harm as it does good. It -does not prevent clever attackers from mounting Sybil attacks, -and it may deter node operators from joining the network---if -they expect the validation process to be difficult, or they do not share -any languages in common with the directory server operators. - -We could try to move the system in several directions, depending on our -choice of threat model and requirements. If we did not need to increase -network capacity to support more users, we could simply - adopt even stricter validation requirements, and reduce the number of -nodes in the network to a trusted minimum. -But, we can only do that if we can simultaneously make node capacity -scale much more than we anticipate to be feasible soon, and if we can find -entities willing to run such nodes, an equally daunting prospect. - -In order to address the first two issues, it seems wise to move to a system -including a number of semi-trusted directory servers, no one of which can -compromise a user on its own. Ultimately, of course, we cannot escape the -problem of a first introducer: since most users will run Tor in whatever -configuration the software ships with, the Tor distribution itself will -remain a single point of failure so long as it includes the seed -keys for directory servers, a list of directory servers, or any other means -to learn which nodes are on the network. But omitting this information -from the Tor distribution would only delegate the trust problem to each -individual user. %, most of whom are presumably less informed about how to make -%trust decisions than the Tor developers. -A well publicized, widely available, authoritatively and independently -endorsed and signed list of initial directory servers and their keys -is a possible solution. But, setting that up properly is itself a large -bootstrapping task. - -%Network discovery, sybil, node admission, scaling. It seems that the code -%will ship with something and that's our trust root. We could try to get -%people to build a web of trust, but no. Where we go from here depends -%on what threats we have in mind. Really decentralized if your threat is -%RIAA; less so if threat is to application data or individuals or... - -\subsection{Measuring performance and capacity} -\label{subsec:performance} - -One of the paradoxes with engineering an anonymity network is that we'd like -to learn as much as we can about how traffic flows so we can improve the -network, but we want to prevent others from learning how traffic flows in -order to trace users' connections through the network. Furthermore, many -mechanisms that help Tor run efficiently -require measurements about the network. - -Currently, nodes try to deduce their own available bandwidth (based on how -much traffic they have been able to transfer recently) and include this -information in the descriptors they upload to the directory. Clients -choose servers weighted by their bandwidth, neglecting really slow -servers and capping the influence of really fast ones. -% -This is, of course, eminently cheatable. A malicious node can get a -disproportionate amount of traffic simply by claiming to have more bandwidth -than it does. But better mechanisms have their problems. If bandwidth data -is to be measured rather than self-reported, it is usually possible for -nodes to selectively provide better service for the measuring party, or -sabotage the measured value of other nodes. Complex solutions for -mix networks have been proposed, but do not address the issues -completely~\cite{mix-acc,casc-rep}. - -Even with no cheating, network measurement is complex. It is common -for views of a node's latency and/or bandwidth to vary wildly between -observers. Further, it is unclear whether total bandwidth is really -the right measure; perhaps clients should instead be considering nodes -based on unused bandwidth or observed throughput. -%How to measure performance without letting people selectively deny service -%by distinguishing pings. Heck, just how to measure performance at all. In -%practice people have funny firewalls that don't match up to their exit -%policies and Tor doesn't deal. -% -%Network investigation: Is all this bandwidth publishing thing a good idea? -%How can we collect stats better? Note weasel's smokeping, at -%http://seppia.noreply.org/cgi-bin/smokeping.cgi?target=Tor -%which probably gives george and steven enough info to break tor? -% -And even if we can collect and use this network information effectively, -we must ensure -that it is not more useful to attackers than to us. While it -seems plausible that bandwidth data alone is not enough to reveal -sender-recipient connections under most circumstances, it could certainly -reveal the path taken by large traffic flows under low-usage circumstances. - -\subsection{Non-clique topologies} - -Tor's comparatively weak threat model may allow easier scaling than -other -designs. High-latency mix networks need to avoid partitioning attacks, where -network splits let an attacker distinguish users in different partitions. -Since Tor assumes the adversary cannot cheaply observe nodes at will, -a network split may not decrease protection much. -Thus, one option when the scale of a Tor network -exceeds some size is simply to split it. Nodes could be allocated into -partitions while hampering collaborating hostile nodes from taking over -a single partition~\cite{casc-rep}. -Clients could switch between -networks, even on a per-circuit basis. -%Future analysis may uncover -%other dangers beyond those affecting mix-nets. - -More conservatively, we can try to scale a single Tor network. Likely -problems with adding more servers to a single Tor network include an -explosion in the number of sockets needed on each server as more servers -join, and increased coordination overhead to keep each users' view of -the network consistent. As we grow, we will also have more instances of -servers that can't reach each other simply due to Internet topology or -routing problems. - -%include restricting the number of sockets and the amount of bandwidth -%used by each node. The number of sockets is determined by the network's -%connectivity and the number of users, while bandwidth capacity is determined -%by the total bandwidth of nodes on the network. The simplest solution to -%bandwidth capacity is to add more nodes, since adding a Tor node of any -%feasible bandwidth will increase the traffic capacity of the network. So as -%a first step to scaling, we should focus on making the network tolerate more -%nodes, by reducing the interconnectivity of the nodes; later we can reduce -%overhead associated with directories, discovery, and so on. - -We can address these points by reducing the network's connectivity. -Danezis~\cite{danezis:pet2003} considers -the anonymity implications of restricting routes on mix networks and -recommends an approach based on expander graphs (where any subgraph is likely -to have many neighbors). It is not immediately clear that this approach will -extend to Tor, which has a weaker threat model but higher performance -requirements: instead of analyzing the -probability of an attacker's viewing whole paths, we will need to examine the -attacker's likelihood of compromising the endpoints. -% -Tor may not need an expander graph per se: it -may be enough to have a single central subnet that is highly connected, like -an Internet backbone. % As an -%example, assume fifty nodes of relatively high traffic capacity. This -%\emph{center} forms a clique. Assume each center node can -%handle 200 connections to other nodes (including the other ones in the -%center). Assume every noncenter node connects to three nodes in the -%center and anyone out of the center that they want to. Then the -%network easily scales to c. 2500 nodes with commensurate increase in -%bandwidth. -There are many open questions: how to distribute connectivity information -(presumably nodes will learn about the central nodes -when they download Tor), whether central nodes -will need to function as a `backbone', and so on. As above, -this could reduce the amount of anonymity available from a mix-net, -but for a low-latency network where anonymity derives largely from -the edges, it may be feasible. - -%In a sense, Tor already has a non-clique topology. -%Individuals can set up and run Tor nodes without informing the -%directory servers. This allows groups to run a -%local Tor network of private nodes that connects to the public Tor -%network. This network is hidden behind the Tor network, and its -%only visible connection to Tor is at those points where it connects. -%As far as the public network, or anyone observing it, is concerned, -%they are running clients. - -\section{The Future} -\label{sec:conclusion} - -Tor is the largest and most diverse low-latency anonymity network -available, but we are still in the beginning stages of deployment. Several -major questions remain. - -First, will our volunteer-based approach to sustainability work in the -long term? As we add more features and destabilize the network, the -developers spend a lot of time keeping the server operators happy. Even -though Tor is free software, the network would likely stagnate and die at -this stage if the developers stopped actively working on it. We may get -an unexpected boon from the fact that we're a general-purpose overlay -network: as Tor grows more popular, other groups who need an overlay -network on the Internet are starting to adapt Tor to their needs. -% -Second, Tor is only one of many components that preserve privacy online. -For applications where it is desirable to -keep identifying information out of application traffic, someone must build -more and better protocol-aware proxies that are usable by ordinary people. -% -Third, we need to gain a reputation for social good, and learn how to -coexist with the variety of Internet services and their established -authentication mechanisms. We can't just keep escalating the blacklist -standoff forever. -% -Fourth, the current Tor -architecture does not scale even to handle current user demand. We must -find designs and incentives to let some clients relay traffic too, without -sacrificing too much anonymity. - -These are difficult and open questions. Yet choosing not to solve them -means leaving most users to a less secure network or no anonymizing -network at all. - -\bibliographystyle{plain} \bibliography{tor-design} - -\clearpage -\appendix - -\begin{figure}[t] -%\unitlength=1in -\centering -%\begin{picture}(6.0,2.0) -%\put(3,1){\makebox(0,0)[c]{\epsfig{figure=graphnodes,width=6in}}} -%\end{picture} -\mbox{\epsfig{figure=graphnodes,width=5in}} -\caption{Number of Tor nodes over time, through January 2005. Lowest -line is number of exit -nodes that allow connections to port 80. Middle line is total number of -verified (registered) Tor nodes. The line above that represents nodes -that are running but not yet registered.} -\label{fig:graphnodes} -\end{figure} - -\begin{figure}[t] -\centering -\mbox{\epsfig{figure=graphtraffic,width=5in}} -\caption{The sum of traffic reported by each node over time, through -January 2005. The bottom -pair show average throughput, and the top pair represent the largest 15 -minute burst in each 4 hour period.} -\label{fig:graphtraffic} -\end{figure} - -\end{document} - -%Making use of nodes with little bandwidth, or high latency/packet loss. - -%Running Tor nodes behind NATs, behind great-firewalls-of-China, etc. -%Restricted routes. How to propagate to everybody the topology? BGP -%style doesn't work because we don't want just *one* path. Point to -%Geoff's stuff. - diff --git a/doc/design-paper/challenges2.tex b/doc/design-paper/challenges2.tex deleted file mode 100644 index a39b66cf7d..0000000000 --- a/doc/design-paper/challenges2.tex +++ /dev/null @@ -1,1612 +0,0 @@ -\documentclass{llncs} - -\usepackage{url} -\usepackage{amsmath} -\usepackage{epsfig} - -\setlength{\textwidth}{5.9in} -\setlength{\textheight}{8.4in} -\setlength{\topmargin}{.5cm} -\setlength{\oddsidemargin}{1cm} -\setlength{\evensidemargin}{1cm} - -\newenvironment{tightlist}{\begin{list}{$\bullet$}{ - \setlength{\itemsep}{0mm} - \setlength{\parsep}{0mm} - % \setlength{\labelsep}{0mm} - % \setlength{\labelwidth}{0mm} - % \setlength{\topsep}{0mm} - }}{\end{list}} - - -\newcommand{\workingnote}[1]{} % The version that hides the note. -%\newcommand{\workingnote}[1]{(**#1)} % The version that makes the note visible. - - -\begin{document} - -\title{Design challenges and social factors in deploying low-latency anonymity} - -\author{Roger Dingledine\inst{1} \and -Nick Mathewson\inst{1} \and -Paul Syverson\inst{2}} -\institute{The Free Haven Project \email{<\{arma,nickm\}@freehaven.net>} \and -Naval Research Laboratory \email{<syverson@itd.nrl.navy.mil>}} - -\maketitle -\pagestyle{plain} - -\begin{abstract} - There are many unexpected or unexpectedly difficult obstacles to - deploying anonymous communications. We describe the design - philosophy of Tor (the third-generation onion routing network), and, - drawing on our experiences deploying Tor, we describe social - challenges and related technical issues that must be faced in - building, deploying, and sustaining a scalable, distributed, - low-latency anonymity network. -\end{abstract} - -\section{Introduction} -% Your network is not practical unless it is sustainable and distributed. -Anonymous communication is full of surprises. This article describes -Tor, a low-latency general-purpose anonymous communication system, and -discusses some unexpected challenges arising from our experiences -deploying Tor. We will discuss -some of the difficulties we have experienced and how we have met them (or how -we plan to meet them, if we know). -% We also discuss some less -% troublesome open problems that we must nevertheless eventually address. -%We will describe both those future challenges that we intend to explore and -%those that we have decided not to explore and why. - -Tor is an overlay network for anonymizing TCP streams over the -Internet~\cite{tor-design}. It addresses limitations in earlier Onion -Routing designs~\cite{or-ih96,or-jsac98,or-discex00,or-pet00} by adding -perfect forward secrecy, congestion control, directory servers, data -integrity, -%configurable exit policies, Huh? That was part of the gen. 1 design -PFS -and a revised design for location-hidden services using -rendezvous points. Tor works on the real-world Internet, requires no special -privileges or kernel modifications, requires little synchronization or -coordination between nodes, and provides a reasonable trade-off between -anonymity, usability, and efficiency. - -We deployed the public Tor network in October 2003; since then it has -grown to over nine hundred volunteer-operated nodes worldwide -and over 100 megabytes average traffic per second from hundreds of -thousands of concurrent users. -Tor's research strategy has focused on deploying -a network to as many users as possible; thus, we have resisted designs that -would compromise deployability by imposing high resource demands on node -operators, and designs that would compromise usability by imposing -unacceptable restrictions on which applications we support. Although this -strategy has drawbacks (including a weakened threat model, as -discussed below), it has made it possible for Tor to serve many -hundreds of thousands of users and attract funding from diverse -sources whose goals range from security on a national scale down to -individual liberties. - -In~\cite{tor-design} we gave an overall view of Tor's design and -goals. Here we review that design at a higher level and describe -some policy and social issues that we face as -we continue deployment. Though we will discuss technical responses to -these, we do not in this article discuss purely technical challenges -facing Tor (e.g., transport protocol, resource scaling issues, moving -to non-clique topologies, performance, etc.), nor do we even cover -all of the social issues: we simply touch on some of the most salient of these. -Also, rather than providing complete solutions to every problem, we -instead lay out the challenges and constraints that we have observed while -deploying Tor. In doing so, we aim to provide a research agenda -of general interest to projects attempting to build -and deploy practical, usable anonymity networks in the wild. - -%While the Tor design paper~\cite{tor-design} gives an overall view its -%design and goals, -%this paper describes the policy and technical issues that Tor faces as -%we continue deployment. Rather than trying to provide complete solutions -%to every problem here, we lay out the assumptions and constraints -%that we have observed through deploying Tor in the wild. In doing so, we -%aim to create a research agenda for others to -%help in addressing these issues. -% Section~\ref{sec:what-is-tor} gives an -%overview of the Tor -%design and ours goals. Sections~\ref{sec:crossroads-policy} -%and~\ref{sec:crossroads-design} go on to describe the practical challenges, -%both policy and technical respectively, -%that stand in the way of moving -%from a practical useful network to a practical useful anonymous network. - -%\section{What Is Tor} -\section{Background} -Here we give a basic overview of the Tor design and its properties, and -compare Tor to other low-latency anonymity designs. - -\subsection{Tor, threat models, and distributed trust} -\label{sec:what-is-tor} - -%Here we give a basic overview of the Tor design and its properties. For -%details on the design, assumptions, and security arguments, we refer -%the reader to the Tor design paper~\cite{tor-design}. - -Tor provides \emph{forward privacy}, so that users can connect to -Internet sites without revealing their logical or physical locations -to those sites or to observers. It also provides \emph{location-hidden -services}, so that servers can support authorized users without -giving an effective vector for physical or online attackers. -Tor provides these protections even when a portion of its -infrastructure is compromised. - -To connect to a remote server via Tor, the client software learns a signed -list of Tor nodes from one of several central \emph{directory servers}, and -incrementally creates a private pathway or \emph{circuit} of encrypted -connections through authenticated Tor nodes on the network, negotiating a -separate set of encryption keys for each hop along the circuit. The circuit -is extended one node at a time, and each node along the way knows only the -immediately previous and following nodes in the circuit, so no individual Tor -node knows the complete path that each fixed-sized data packet (or -\emph{cell}) will take. -%Because each node sees no more than one hop in the -%circuit, -Thus, neither an eavesdropper nor a compromised node can -see both the connection's source and destination. Later requests use a new -circuit, to complicate long-term linkability between different actions by -a single user. - -%Tor also helps servers hide their locations while -%providing services such as web publishing or instant -%messaging. Using ``rendezvous points'', other Tor users can -%connect to these authenticated hidden services, neither one learning the -%other's network identity. - -Tor attempts to anonymize the transport layer, not the application layer. -This approach is useful for applications such as SSH -where authenticated communication is desired. However, when anonymity from -those with whom we communicate is desired, -application protocols that include personally identifying information need -additional application-level scrubbing proxies, such as -Privoxy~\cite{privoxy} for HTTP\@. Furthermore, Tor does not relay arbitrary -IP packets; it only anonymizes TCP streams and DNS requests. -%, and only supports -%connections via SOCKS -%(but see Section~\ref{subsec:tcp-vs-ip}). - -%Most node operators do not want to allow arbitrary TCP traffic. % to leave -%their server. -%To address this, Tor provides \emph{exit policies} so -%each exit node can block the IP addresses and ports it is unwilling to allow. -%Tor nodes advertise their exit policies to the directory servers, so that -%client can tell which nodes will support their connections. -% -%***Covered in 3.4*** Matt Edman via -PFS -% -%As of this writing, the Tor network has grown to around nine hundred nodes -%on four continents, with a total average load exceeding 100 MB/s and -%a total capacity exceeding %1Gbit/s. -%\\***What's the current capacity? -PFS***\\ -% -%***Covered in intro*** Matt Edman via -PFS -% -%Appendix A -%shows a graph of the number of working nodes over time, as well as a -%graph of the number of bytes being handled by the network over time. -%The network is now sufficiently diverse for further development -%and testing; but of course we always encourage new nodes -%to join. - -Building from earlier versions of onion routing developed at NRL, -Tor was researched and developed by NRL and FreeHaven under -funding by ONR and DARPA for use in securing government -communications. Continuing development and deployment has also been -funded by the Omidyar Network, the Electronic Frontier Foundation for use -in maintaining civil liberties for ordinary citizens online, and the -International Broadcasting Bureau and Reporters without Borders to combat -blocking and censorship on the Internet. As we will see below, -this wide variety of interests helps maintain both the stability and -the security of the network. - -% The Tor -%protocol was chosen -%for the anonymizing layer in the European Union's PRIME directive to -%help maintain privacy in Europe. -%The AN.ON project in Germany -%has integrated an independent implementation of the Tor protocol into -%their popular Java Anon Proxy anonymizing client. - -\medskip -\noindent -{\bf Threat models and design philosophy.} -The ideal Tor network would be practical, useful and anonymous. When -trade-offs arise between these properties, Tor's research strategy has been -to remain useful enough to attract many users, -and practical enough to support them. Only subject to these -constraints do we try to maximize -anonymity.\footnote{This is not the only possible -direction in anonymity research: designs exist that provide more anonymity -than Tor at the expense of significantly increased resource requirements, or -decreased flexibility in application support (typically because of increased -latency). Such research does not typically abandon aspirations toward -deployability or utility, but instead tries to maximize deployability and -utility subject to a certain degree of structural anonymity (structural because -usability and practicality affect usage which affects the actual anonymity -provided by the network \cite{econymics,back01}).} -%{We believe that these -%approaches can be promising and useful, but that by focusing on deploying a -%usable system in the wild, Tor helps us experiment with the actual parameters -%of what makes a system ``practical'' for volunteer operators and ``useful'' -%for home users, and helps illuminate undernoticed issues which any deployed -%volunteer anonymity network will need to address.} -Because of our strategy, Tor has a weaker threat model than many designs in -the literature. In particular, because we -support interactive communications without impractically expensive padding, -we fall prey to a variety -of intra-network~\cite{back01,attack-tor-oak05,flow-correlation04,hs-attack} -and -end-to-end~\cite{danezis:pet2004,SS03} anonymity-breaking attacks. - -Tor does not attempt to defend against a global observer. In general, an -attacker who can measure both ends of a connection through the Tor network -% I say 'measure' rather than 'observe', to encompass murdoch-danezis -% style attacks. -RD -can correlate the timing and volume of data on that connection as it enters -and leaves the network, and so link communication partners. -Known solutions to this attack would seem to require introducing a -prohibitive degree of traffic padding between the user and the network, or -introducing an unacceptable degree of latency. -Also, it is not clear that these methods would -work at all against a minimally active adversary who could introduce timing -patterns or additional traffic. Thus, Tor only attempts to defend against -external observers who cannot observe both sides of a user's connections. - -Against internal attackers who sign up Tor nodes, the situation is more -complicated. In the simplest case, if an adversary has compromised $c$ of -$n$ nodes on the Tor network, then the adversary will be able to compromise -a random circuit with probability $\frac{c^2}{n^2}$~\cite{or-pet00} -(since the circuit -initiator chooses hops randomly). But there are -complicating factors: -(1)~If the user continues to build random circuits over time, an adversary - is pretty certain to see a statistical sample of the user's traffic, and - thereby can build an increasingly accurate profile of her behavior. -(2)~An adversary who controls a popular service outside the Tor network - can be certain to observe all connections to that service; he - can therefore trace connections to that service with probability - $\frac{c}{n}$. -(3)~Users do not in fact choose nodes with uniform probability; they - favor nodes with high bandwidth or uptime, and exit nodes that - permit connections to their favorite services. -We demonstrated the severity of these problems in experiments on the -live Tor network in 2006~\cite{hsattack} and introduced \emph{entry - guards} as a means to curtail them. By choosing entry guards from -a small persistent subset, it becomes difficult for an adversary to -increase the number of circuits observed entering the network from any -given client simply by causing -numerous connections or by watching compromised nodes over time.% (See -%also Section~\ref{subsec:routing-zones} for discussion of larger -%adversaries and our dispersal goals.) - - -% I'm trying to make this paragraph work without reference to the -% analysis/confirmation distinction, which we haven't actually introduced -% yet, and which we realize isn't very stable anyway. Also, I don't want to -% deprecate these attacks if we can't demonstrate that they don't work, since -% in case they *do* turn out to work well against Tor, we'll look pretty -% foolish. -NM -% -% Matt suggests maybe cutting the following paragraph -PFS -% -More powerful attacks may exist. In \cite{hintz-pet02} it was -shown that an attacker who can catalog data volumes of popular -responder destinations (say, websites with consistent data volumes) may not -need to -observe both ends of a stream to learn source-destination links for those -responders. Entry guards should complicate such attacks as well. -Similarly, latencies of going through various routes can be -cataloged~\cite{back01} to connect endpoints. -% Also, \cite{kesdogan:pet2002} takes the -% attack another level further, to narrow down where you could be -% based on an intersection attack on subpages in a website. -RD -It has not yet been shown whether these attacks will succeed or fail -in the presence of the variability and volume quantization introduced by the -Tor network, but it seems likely that these factors will at best delay -the time and data needed for success -rather than prevent the attacks completely. - -\workingnote{ -Along similar lines, the same paper suggests a ``clogging -attack'' in which the throughput on a circuit is observed to slow -down when an adversary clogs the right nodes with his own traffic. -To determine the nodes in a circuit this attack requires the ability -to continuously monitor the traffic exiting the network on a circuit -that is up long enough to probe all network nodes in binary fashion. -% Though somewhat related, clogging and interference are really different -% attacks with different assumptions about adversary distribution and -% capabilities as well as different techniques. -pfs -Murdoch and Danezis~\cite{attack-tor-oak05} show a practical -interference attack against portions of -the fifty node Tor network as deployed in mid 2004. -An outside attacker can actively trace a circuit through the Tor network -by observing changes in the latency of his -own traffic sent through various Tor nodes. This can be done -simultaneously at multiple nodes; however, like clogging, -this attack only reveals -the Tor nodes in the circuit, not initiator and responder addresses, -so it is still necessary to discover the endpoints to complete an -effective attack. The the size and diversity of the Tor network have -increased many fold since then, and it is unknown if the attacks -can scale to the current Tor network. -} - - -%discuss $\frac{c^2}{n^2}$, except how in practice the chance of owning -%the last hop is not $c/n$ since that doesn't take the destination (website) -%into account. so in cases where the adversary does not also control the -%final destination we're in good shape, but if he *does* then we'd be better -%off with a system that lets each hop choose a path. -% -%Isn't it more accurate to say ``If the adversary _always_ controls the final -% dest, we would be just as well off with such as system.'' ? If not, why -% not? -nm -% Sure. In fact, better off, since they seem to scale more easily. -rd - -%Murdoch and Danezis describe an attack -%\cite{attack-tor-oak05} that lets an attacker determine the nodes used -%in a circuit; yet s/he cannot identify the initiator or responder, -%e.g., client or web server, through this attack. So the endpoints -%remain secure, which is the goal. It is conceivable that an -%adversary could attack or set up observation of all connections -%to an arbitrary Tor node in only a few minutes. If such an adversary -%were to exist, s/he could use this probing to remotely identify a node -%for further attack. Of more likely immediate practical concern -%an adversary with active access to the responder traffic -%wants to keep a circuit alive long enough to attack an identified -%node. Thus it is important to prevent the responding end of the circuit -%from keeping it open indefinitely. -%Also, someone could identify nodes in this way and if in their -%jurisdiction, immediately get a subpoena (if they even need one) -%telling the node operator(s) that she must retain all the active -%circuit data she now has. -%Further, the enclave model, which had previously looked to be the most -%generally secure, seems particularly threatened by this attack, since -%it identifies endpoints when they're also nodes in the Tor network: -%see Section~\ref{subsec:helper-nodes} for discussion of some ways to -%address this issue. - -\medskip -\noindent -{\bf Distributed trust.} -In practice Tor's threat model is based on -dispersal and diversity. -Our defense lies in having a diverse enough set of nodes -to prevent most real-world -adversaries from being in the right places to attack users, -by distributing each transaction -over several nodes in the network. This ``distributed trust'' approach -means the Tor network can be safely operated and used by a wide variety -of mutually distrustful users, providing sustainability and security. -%than some previous attempts at anonymizing networks. - -%No organization can achieve this security on its own. If a single -%corporation or government agency were to build a private network to -%protect its operations, any connections entering or leaving that network -%would be obviously linkable to the controlling organization. The members -%and operations of that agency would be easier, not harder, to distinguish. - -To protect our networks from traffic analysis, we must -collaboratively blend the traffic from many organizations and private -citizens, so that an eavesdropper can't tell which users are which, -and who is looking for what information. %By bringing more users onto -%the network, all users become more secure~\cite{econymics}. -%[XXX I feel uncomfortable saying this last sentence now. -RD] -%[So, I took it out. I think we can do without it. -PFS] -The Tor network has a broad range of users, including ordinary citizens -concerned about their privacy, corporations -who don't want to reveal information to their competitors, and law -enforcement and government intelligence agencies who need -to do operations on the Internet without being noticed. -Naturally, organizations will not want to depend on others for their -security. If most participating providers are reliable, Tor tolerates -some hostile infiltration of the network. For maximum protection, -the Tor design includes an enclave approach that lets data be encrypted -(and authenticated) end-to-end, so high-sensitivity users can be sure it -hasn't been read or modified. This even works for Internet services that -don't have built-in encryption and authentication, such as unencrypted -HTTP or chat, and it requires no modification of those services. - -%\subsection{Related work} -Tor differs from other deployed systems for traffic analysis resistance -in its security and flexibility. Mix networks such as -Mixmaster~\cite{mixmaster-spec} or its successor Mixminion~\cite{minion-design} -gain the highest degrees of anonymity at the expense of introducing highly -variable delays, making them unsuitable for applications such as web -browsing. Commercial single-hop -proxies~\cite{anonymizer} can provide good performance, but -a single compromise can expose all users' traffic, and a single-point -eavesdropper can perform traffic analysis on the entire network. -%Also, their proprietary implementations place any infrastructure that -%depends on these single-hop solutions at the mercy of their providers' -%financial health as well as network security. -The Java -Anon Proxy (JAP)~\cite{web-mix} provides similar functionality to Tor but -handles only web browsing rather than all TCP\@. Because all traffic -passes through fixed ``cascades'' for which the endpoints are predictable, -an adversary can know where to watch for traffic analysis from particular -clients or to particular web servers. The design calls for padding to -complicate this, although it does not appear to be implemented. -%Some peer-to-peer file-sharing overlay networks such as -%Freenet~\cite{freenet} and Mute~\cite{mute} -The Freedom -network from Zero-Knowledge Systems~\cite{freedom21-security} -was even more flexible than Tor in -transporting arbitrary IP packets, and also supported -pseudonymity in addition to anonymity; but it had -a different approach to sustainability (collecting money from users -and paying ISPs to run Tor nodes), and was eventually shut down due to financial -load. Finally, %potentially more scalable -% [I had added 'potentially' because the scalability of these designs -% is not established, and I am uncomfortable making the -% bolder unmodified assertion. Roger took 'potentially' out. -% Here's an attempt at more neutral wording -pfs] -peer-to-peer designs that are intended to be more scalable, -for example Tarzan~\cite{tarzan:ccs02} and -MorphMix~\cite{morphmix:fc04}, have been proposed in the literature but -have not been fielded. These systems differ somewhat -in threat model and presumably practical resistance to threats. -% -% Matt suggests cutting some or all of the rest of this paragraph. -PFS -% -Note that MorphMix differs from Tor only in -node discovery and circuit setup; so Tor's architecture is flexible -enough to contain a MorphMix experiment. Recently, -Tor has adopted from MorphMix the approach of making it harder to -own both ends of a circuit by requiring that nodes be chosen from -different /16 subnets. This requires -an adversary to own nodes in multiple address ranges to even have the -possibility of observing both ends of a circuit. We direct the -interested reader to~\cite{tor-design} for a more in-depth review of -related work. - -%XXXX six-four. crowds. i2p. - -%XXXX -%have a serious discussion of morphmix's assumptions, since they would -%seem to be the direct competition. in fact tor is a flexible architecture -%that would encompass morphmix, and they're nearly identical except for -%path selection and node discovery. and the trust system morphmix has -%seems overkill (and/or insecure) based on the threat model we've picked. -% this para should probably move to the scalability / directory system. -RD -% Nope. Cut for space, except for small comment added above -PFS - -\section{Social challenges} - -Many of the issues the Tor project needs to address extend beyond -system design and technology development. In particular, the -Tor project's \emph{image} with respect to its users and the rest of -the Internet impacts the security it can provide. -With this image issue in mind, this section discusses the Tor user base and -Tor's interaction with other services on the Internet. - -\subsection{Communicating security} - -Usability for anonymity systems -contributes to their security, because usability -affects the possible anonymity set~\cite{econymics,back01}. -Conversely, an unusable system attracts few users and thus can't provide -much anonymity. - -This phenomenon has a second-order effect: knowing this, users should -choose which anonymity system to use based in part on how usable -and secure -\emph{others} will find it, in order to get the protection of a larger -anonymity set. Thus we might supplement the adage ``usability is a security -parameter''~\cite{back01} with a new one: ``perceived usability is a -security parameter.''~\cite{usability-network-effect}. -% From here we can better understand the effects -%of publicity on security: the more convincing your -%advertising, the more likely people will believe you have users, and thus -%the more users you will attract. Perversely, over-hyped systems (if they -%are not too broken) may be a better choice than modestly promoted ones, -%if the hype attracts more users~\cite{usability-network-effect}. - -%So it follows that we should come up with ways to accurately communicate -%the available security levels to the user, so she can make informed -%decisions. -%JAP aims to do this by including a -%comforting `anonymity meter' dial in the software's graphical interface, -%giving the user an impression of the level of protection for her current -%traffic. - -However, there's a catch. For users to share the same anonymity set, -they need to act like each other. An attacker who can distinguish -a given user's traffic from the rest of the traffic will not be -distracted by anonymity set size. For high-latency systems like -Mixminion, where the threat model is based on mixing messages with each -other, there's an arms race between end-to-end statistical attacks and -counter-strategies~\cite{statistical-disclosure,minion-design,e2e-traffic,trickle02}. -But for low-latency systems like Tor, end-to-end \emph{traffic -correlation} attacks~\cite{danezis:pet2004,defensive-dropping,SS03,hs-attack} -allow an attacker who can observe both ends of a communication -to correlate packet timing and volume, quickly linking -the initiator to her destination. - -\workingnote{ -Like Tor, the current JAP implementation does not pad connections -apart from using small fixed-size cells for transport. In fact, -JAP's cascade-based network topology may be more vulnerable to these -attacks, because its network has fewer edges. JAP was born out of -the ISDN mix design~\cite{isdn-mixes}, where padding made sense because -every user had a fixed bandwidth allocation and altering the timing -pattern of packets could be immediately detected. But in its current context -as an Internet web anonymizer, adding sufficient padding to JAP -would probably be prohibitively expensive and ineffective against a -minimally active attacker.\footnote{Even if JAP could -fund higher-capacity nodes indefinitely, our experience -suggests that many users would not accept the increased per-user -bandwidth requirements, leading to an overall much smaller user base.} -Therefore, since under this threat -model the number of concurrent users does not seem to have much impact -on the anonymity provided, we suggest that JAP's anonymity meter is not -accurately communicating security levels to its users. - -On the other hand, while the number of active concurrent users may not -matter as much as we'd like, it still helps to have some other users -on the network, in particular different types of users. -We investigate this issue next. -} -\subsection{Reputability and perceived social value} -Another factor impacting the network's security is its reputability: -the perception of its social value based on its current user base. If Alice is -the only user who has ever downloaded the software, it might be socially -accepted, but she's not getting much anonymity. Add a thousand -activists, and she's anonymous, but everyone thinks she's an activist too. -Add a thousand -diverse citizens (cancer survivors, privacy enthusiasts, and so on) -and now she's harder to profile. - -Furthermore, the network's reputability affects its operator base: more people -are willing to run a service if they believe it will be used by human rights -workers than if they believe it will be used exclusively for disreputable -ends. This effect becomes stronger if node operators themselves think they -will be associated with their users' disreputable ends. - -So the more cancer survivors on Tor, the better for the human rights -activists. The more malicious hackers, the worse for the normal users. Thus, -reputability is an anonymity issue for two reasons. First, it impacts -the sustainability of the network: a network that's always about to be -shut down has difficulty attracting and keeping adequate nodes. -Second, a disreputable network is more vulnerable to legal and -political attacks, since it will attract fewer supporters. - -\workingnote{ -While people therefore have an incentive for the network to be used for -``more reputable'' activities than their own, there are still trade-offs -involved when it comes to anonymity. To follow the above example, a -network used entirely by cancer survivors might welcome file sharers -onto the network, though of course they'd prefer a wider -variety of users. -} -Reputability becomes even more tricky in the case of privacy networks, -since the good uses of the network (such as publishing by journalists in -dangerous countries) are typically kept private, whereas network abuses -or other problems tend to be more widely publicized. - -\workingnote{ -The impact of public perception on security is especially important -during the bootstrapping phase of the network, where the first few -widely publicized uses of the network can dictate the types of users it -attracts next. -As an example, some U.S.~Department of Energy -penetration testing engineers are tasked with compromising DoE computers -from the outside. They only have a limited number of ISPs from which to -launch their attacks, and they found that the defenders were recognizing -attacks because they came from the same IP space. These engineers wanted -to use Tor to hide their tracks. First, from a technical standpoint, -Tor does not support the variety of IP packets one would like to use in -such attacks.% (see Section~\ref{subsec:tcp-vs-ip}). -But aside from this, we also decided that it would probably be poor -precedent to encourage such use---even legal use that improves -national security---and managed to dissuade them. -} -%% "outside of academia, jap has just lost, permanently". (That is, -%% even though the crime detection issues are resolved and are unlikely -%% to go down the same way again, public perception has not been kind.) - -\subsection{Sustainability and incentives} -One of the unsolved problems in low-latency anonymity designs is -how to keep the nodes running. ZKS's Freedom network -depended on paying third parties to run its servers; the JAP project's -bandwidth depends on grants to pay for its bandwidth and -administrative expenses. In Tor, bandwidth and administrative costs are -distributed across the volunteers who run Tor nodes, so we at least have -reason to think that the Tor network could survive without continued research -funding.\footnote{It also helps that Tor is implemented with free and open - source software that can be maintained by anybody with the ability and - inclination.} But why are these volunteers running nodes, and what can we -do to encourage more volunteers to do so? - -We have not formally surveyed Tor node operators to learn why they are -running nodes, but -from the information they have provided, it seems that many of them run Tor -nodes for reasons of personal interest in privacy issues. It is possible -that others are running Tor nodes to protect their own -anonymity, but of course they are -hardly likely to tell us specifics if they are. -%Significantly, Tor's threat model changes the anonymity incentives for running -%a node. In a high-latency mix network, users can receive additional -%anonymity by running their own node, since doing so obscures when they are -%injecting messages into the network. But, anybody observing all I/O to a Tor -%node can tell when the node is generating traffic that corresponds to -%none of its incoming traffic. -% -%I didn't buy the above for reason's subtle enough that I just cut it -PFS -Tor exit node operators do attain a degree of -``deniability'' for traffic that originates at that exit node. For - example, it is likely in practice that HTTP requests from a Tor node's IP - will be assumed to be from the Tor network. - More significantly, people and organizations who use Tor for - anonymity depend on the - continued existence of the Tor network to do so; running a node helps to - keep the network operational. -%\item Local Tor entry and exit nodes allow users on a network to run in an -% `enclave' configuration. [XXXX need to resolve this. They would do this -% for E2E encryption + auth?] - - -%We must try to make the costs of running a Tor node easily minimized. -Since Tor is run by volunteers, the most crucial software usability issue is -usability by operators: when an operator leaves, the network becomes less -usable by everybody. To keep operators pleased, we must try to keep Tor's -resource and administrative demands as low as possible. - -Because of ISP billing structures, many Tor operators have underused capacity -that they are willing to donate to the network, at no additional monetary -cost to them. Features to limit bandwidth have been essential to adoption. -Also useful has been a ``hibernation'' feature that allows a Tor node that -wants to provide high bandwidth, but no more than a certain amount in a -given billing cycle, to become dormant once its bandwidth is exhausted, and -to reawaken at a random offset into the next billing cycle. -Exit policies help to limit administrative costs by limiting the frequency of -abuse complaints (see Section~\ref{subsec:tor-and-blacklists}). -% We discuss -%technical incentive mechanisms in Section~\ref{subsec:incentives-by-design}. - -%[XXXX say more. Why else would you run a node? What else can we do/do we -% already do to make running a node more attractive?] -%[We can enforce incentives; see Section 6.1. We can rate-limit clients. -% We can put "top bandwidth nodes lists" up a la seti@home.] - -\workingnote{ -\subsection{Bandwidth and file-sharing} -\label{subsec:bandwidth-and-file-sharing} -%One potentially problematical area with deploying Tor has been our response -%to file-sharing applications. -Once users have configured their applications to work with Tor, the largest -remaining usability issue is performance. Users begin to suffer -when websites ``feel slow.'' -Clients currently try to build their connections through nodes that they -guess will have enough bandwidth. But even if capacity is allocated -optimally, it seems unlikely that the current network architecture will have -enough capacity to provide every user with as much bandwidth as she would -receive if she weren't using Tor, unless far more nodes join the network. - -%Limited capacity does not destroy the network, however. Instead, usage tends -%towards an equilibrium: when performance suffers, users who value performance -%over anonymity tend to leave the system, thus freeing capacity until the -%remaining users on the network are exactly those willing to use that capacity -%there is. - -Much of Tor's recent bandwidth difficulties have come from file-sharing -applications. These applications provide two challenges to -any anonymizing network: their intensive bandwidth requirement, and the -degree to which they are associated (correctly or not) with copyright -infringement. - -High-bandwidth protocols can make the network unresponsive, -but tend to be somewhat self-correcting as lack of bandwidth drives away -users who need it. Issues of copyright violation, -however, are more interesting. Typical exit node operators want to help -people achieve private and anonymous speech, not to help people (say) host -Vin Diesel movies for download; and typical ISPs would rather not -deal with customers who draw menacing letters -from the MPAA\@. While it is quite likely that the operators are doing nothing -illegal, many ISPs have policies of dropping users who get repeated legal -threats regardless of the merits of those threats, and many operators would -prefer to avoid receiving even meritless legal threats. -So when letters arrive, operators are likely to face -pressure to block file-sharing applications entirely, in order to avoid the -hassle. - -But blocking file-sharing is not easy: popular -protocols have evolved to run on non-standard ports to -get around other port-based bans. Thus, exit node operators who want to -block file-sharing would have to find some way to integrate Tor with a -protocol-aware exit filter. This could be a technically expensive -undertaking, and one with poor prospects: it is unlikely that Tor exit nodes -would succeed where so many institutional firewalls have failed. Another -possibility for sensitive operators is to run a restrictive node that -only permits exit connections to a restricted range of ports that are -not frequently associated with file sharing. There are increasingly few such -ports. - -Other possible approaches might include rate-limiting connections, especially -long-lived connections or connections to file-sharing ports, so that -high-bandwidth connections do not flood the network. We might also want to -give priority to cells on low-bandwidth connections to keep them interactive, -but this could have negative anonymity implications. - -For the moment, it seems that Tor's bandwidth issues have rendered it -unattractive for bulk file-sharing traffic; this may continue to be so in the -future. Nevertheless, Tor will likely remain attractive for limited use in -file-sharing protocols that have separate control and data channels. - -%[We should say more -- but what? That we'll see a similar -% equilibriating effect as with bandwidth, where sensitive ops switch to -% middleman, and we become less useful for file-sharing, so the file-sharing -% people back off, so we get more ops since there's less file-sharing, so the -% file-sharers come back, etc.] - -%XXXX -%in practice, plausible deniability is hypothetical and doesn't seem very -%convincing. if ISPs find the activity antisocial, they don't care *why* -%your computer is doing that behavior. -} - -\subsection{Tor and blacklists} -\label{subsec:tor-and-blacklists} - -It was long expected that, alongside legitimate users, Tor would also -attract troublemakers who exploit Tor to abuse services on the -Internet with vandalism, rude mail, and so on. -Our initial answer to this situation was to use ``exit policies'' -to allow individual Tor nodes to block access to specific IP/port ranges. -This approach aims to make operators more willing to run Tor by allowing -them to prevent their nodes from being used for abusing particular -services. For example, by default Tor nodes block SMTP (port 25), -to avoid the issue of spam. -\workingnote{ -Note that for spammers, Tor would be -a step back, a much less effective means of distributing spam than -those currently available. This is thus primarily an unmistakable -answer to those confused about Internet communication who might raise -spam as an issue. -} - -Exit policies are useful, but they are insufficient: if not all nodes -block a given service, that service may try to block Tor instead. -While being blockable is important to being good netizens, we would like -to encourage services to allow anonymous access. Services should not -need to decide between blocking legitimate anonymous use and allowing -unlimited abuse. For the time being, blocking by IP address is -an expedient strategy, even if it undermines Internet stability and -functionality in the long run~\cite{netauth} - -This is potentially a bigger problem than it may appear. -On the one hand, services should be allowed to refuse connections from -sources of possible abuse. -But when a Tor node administrator decides whether he prefers to be able -to post to Wikipedia from his IP address, or to allow people to read -Wikipedia anonymously through his Tor node, he is making the decision -for others as well. (For a while, Wikipedia -blocked all posting from all Tor nodes based on IP addresses.) If -the Tor node shares an address with a campus or corporate NAT, -then the decision can prevent the entire population from posting. -Similarly, whether intended or not, such blocking supports -repression of free speech. In many locations where Internet access -of various kinds is censored or even punished by imprisonment, -Tor is a path both to the outside world and to others inside. -Blocking posts from Tor makes the job of censoring authorities easier. -This is a loss for both Tor -and Wikipedia: we don't want to compete for (or divvy up) the -NAT-protected entities of the world. -This is also unfortunate because there are relatively simple technical -solutions. -Various schemes for escrowing anonymous posts until they are reviewed -by editors would both prevent abuse and remove incentives for attempts -to abuse. Further, pseudonymous reputation tracking of posters through Tor -would allow those who establish adequate reputation to post without -escrow. -\workingnote{ -Software to support pseudonymous access via Tor designed precisely -to interact with Wikipedia's access mechanism has even been developed -and proposed to Wikimedia by Jason Holt~\cite{nym}, but has not been taken up. - - -Perhaps worse, many IP blacklists are coarse-grained: they ignore Tor's exit -policies, partly because it's easier to implement and partly -so they can punish -all Tor nodes. One IP blacklist even bans -every class C network that contains a Tor node, and recommends banning SMTP -from these networks even though Tor does not allow SMTP at all. This -strategic decision aims to discourage the -operation of anything resembling an open proxy by encouraging its neighbors -to shut it down to get unblocked themselves. This pressure even -affects Tor nodes running in middleman mode (disallowing all exits) when -those nodes are blacklisted too. -% Perception of Tor as an abuse vector -%is also partly driven by multiple base-rate fallacies~\cite{axelsson00}. -} - -Problems of abuse occur mainly with services such as IRC networks and -Wikipedia, which rely on IP blocking to ban abusive users. While at first -blush this practice might seem to depend on the anachronistic assumption that -each IP is an identifier for a single user, it is actually more reasonable in -practice: it assumes that non-proxy IPs are a costly resource, and that an -abuser can not change IPs at will. By blocking IPs which are used by Tor -nodes, open proxies, and service abusers, these systems hope to make -ongoing abuse difficult. Although the system is imperfect, it works -tolerably well for them in practice. - -Of course, we would prefer that legitimate anonymous users be able to -access abuse-prone services. -\workingnote{ - One conceivable approach would require -would-be IRC users, for instance, to register accounts if they want to -access the IRC network from Tor. In practice this would not -significantly impede abuse if creating new accounts were easily automatable; -this is why services use IP blocking. To deter abuse, pseudonymous -identities need to require a significant switching cost in resources or human -time. Some popular webmail applications -impose cost with Reverse Turing Tests, but this step may not deter all -abusers. Freedom used blind signatures to limit -the number of pseudonyms for each paying account, but Tor has neither the -ability nor the desire to collect payment. -} -We stress that as far as we can tell, most Tor uses are not -abusive. Most services have not complained, and others are actively -working to find ways besides banning to cope with the abuse. For example, -the Freenode IRC network had a problem with a coordinated group of -abusers joining channels and subtly taking over the conversation; but -when they labelled all users coming from Tor IPs as ``anonymous users,'' -removing the ability of the abusers to blend in, the abuse stopped. -This is an illustration of how simple technical mechanisms can remove -the ability to abuse anonymously without undermining the ability -to communicate anonymously and can thus remove the incentive to attempt -abusing in this way. - -%The use of squishy IP-based ``authentication'' and ``authorization'' -%has not broken down even to the level that SSNs used for these -%purposes have in commercial and public record contexts. Externalities -%and misplaced incentives cause a continued focus on fighting identity -%theft by protecting SSNs rather than developing better authentication -%and incentive schemes \cite{price-privacy}. Similarly we can expect a -%continued use of identification by IP number as long as there is no -%workable alternative. - -%[XXX Mention correct DNS-RBL implementation. -NM] - -\workingnote{ -\section{Design choices} - -In addition to social issues, Tor also faces some design trade-offs that must -be investigated as the network develops. - -\subsection{Transporting the stream vs transporting the packets} -\label{subsec:stream-vs-packet} -\label{subsec:tcp-vs-ip} - -Tor transports streams; it does not tunnel packets. -It has often been suggested that like the old Freedom -network~\cite{freedom21-security}, Tor should -``obviously'' anonymize IP traffic -at the IP layer. Before this could be done, many issues need to be resolved: - -\begin{enumerate} -\setlength{\itemsep}{0mm} -\setlength{\parsep}{0mm} -\item \emph{IP packets reveal OS characteristics.} We would still need to do -IP-level packet normalization, to stop things like TCP fingerprinting -attacks. %There likely exist libraries that can help with this. -This is unlikely to be a trivial task, given the diversity and complexity of -TCP stacks. -\item \emph{Application-level streams still need scrubbing.} We still need -Tor to be easy to integrate with user-level application-specific proxies -such as Privoxy. So it's not just a matter of capturing packets and -anonymizing them at the IP layer. -\item \emph{Certain protocols will still leak information.} For example, we -must rewrite DNS requests so they are delivered to an unlinkable DNS server -rather than the DNS server at a user's ISP; thus, we must understand the -protocols we are transporting. -\item \emph{The crypto is unspecified.} First we need a block-level encryption -approach that can provide security despite -packet loss and out-of-order delivery. Freedom allegedly had one, but it was -never publicly specified. -Also, TLS over UDP is not yet implemented or -specified, though some early work has begun~\cite{dtls}. -\item \emph{We'll still need to tune network parameters.} Since the above -encryption system will likely need sequence numbers (and maybe more) to do -replay detection, handle duplicate frames, and so on, we will be reimplementing -a subset of TCP anyway---a notoriously tricky path. -\item \emph{Exit policies for arbitrary IP packets mean building a secure -IDS\@.} Our node operators tell us that exit policies are one of -the main reasons they're willing to run Tor. -Adding an Intrusion Detection System to handle exit policies would -increase the security complexity of Tor, and would likely not work anyway, -as evidenced by the entire field of IDS and counter-IDS papers. Many -potential abuse issues are resolved by the fact that Tor only transports -valid TCP streams (as opposed to arbitrary IP including malformed packets -and IP floods), so exit policies become even \emph{more} important as -we become able to transport IP packets. We also need to compactly -describe exit policies so clients can predict -which nodes will allow which packets to exit. -\item \emph{The Tor-internal name spaces would need to be redesigned.} We -support hidden service {\tt{.onion}} addresses (and other special addresses, -like {\tt{.exit}} which lets the user request a particular exit node), -by intercepting the addresses when they are passed to the Tor client. -Doing so at the IP level would require a more complex interface between -Tor and the local DNS resolver. -\end{enumerate} - -This list is discouragingly long, but being able to transport more -protocols obviously has some advantages. It would be good to learn which -items are actual roadblocks and which are easier to resolve than we think. - -To be fair, Tor's stream-based approach has run into -stumbling blocks as well. While Tor supports the SOCKS protocol, -which provides a standardized interface for generic TCP proxies, many -applications do not support SOCKS\@. For them we already need to -replace the networking system calls with SOCKS-aware -versions, or run a SOCKS tunnel locally, neither of which is -easy for the average user. %---even with good instructions. -Even when applications can use SOCKS, they often make DNS requests -themselves before handing an IP address to Tor, which advertises -where the user is about to connect. -We are still working on more usable solutions. - -%So to actually provide good anonymity, we need to make sure that -%users have a practical way to use Tor anonymously. Possibilities include -%writing wrappers for applications to anonymize them automatically; improving -%the applications' support for SOCKS; writing libraries to help application -%writers use Tor properly; and implementing a local DNS proxy to reroute DNS -%requests to Tor so that applications can simply point their DNS resolvers at -%localhost and continue to use SOCKS for data only. - -\subsection{Mid-latency} -\label{subsec:mid-latency} - -Some users need to resist traffic correlation attacks. Higher-latency -mix-networks introduce variability into message -arrival times: as timing variance increases, timing correlation attacks -require increasingly more data~\cite{e2e-traffic}. Can we improve Tor's -resistance without losing too much usability? - -We need to learn whether we can trade a small increase in latency -for a large anonymity increase, or if we'd end up trading a lot of -latency for only a minimal security gain. A trade-off might be worthwhile -even if we -could only protect certain use cases, such as infrequent short-duration -transactions. % To answer this question -We might adapt the techniques of~\cite{e2e-traffic} to a lower-latency mix -network, where the messages are batches of cells in temporally clustered -connections. These large fixed-size batches can also help resist volume -signature attacks~\cite{hintz-pet02}. We could also experiment with traffic -shaping to get a good balance of throughput and security. -%Other padding regimens might supplement the -%mid-latency option; however, we should continue the caution with which -%we have always approached padding lest the overhead cost us too much -%performance or too many volunteers. - -We must keep usability in mind too. How much can latency increase -before we drive users away? We've already been forced to increase -latency slightly, as our growing network incorporates more DSL and -cable-modem nodes and more nodes in distant continents. Perhaps we can -harness this increased latency to improve anonymity rather than just -reduce usability. Further, if we let clients label certain circuits as -mid-latency as they are constructed, we could handle both types of traffic -on the same network, giving users a choice between speed and security---and -giving researchers a chance to experiment with parameters to improve the -quality of those choices. - -\subsection{Enclaves and helper nodes} -\label{subsec:helper-nodes} - -It has long been thought that users can improve their anonymity by -running their own node~\cite{tor-design,or-ih96,or-pet00}, and using -it in an \emph{enclave} configuration, where all their circuits begin -at the node under their control. Running Tor clients or servers at -the enclave perimeter is useful when policy or other requirements -prevent individual machines within the enclave from running Tor -clients~\cite{or-jsac98,or-discex00}. - -Of course, Tor's default path length of -three is insufficient for these enclaves, since the entry and/or exit -% [edit war: without the ``and/'' the natural reading here -% is aut rather than vel. And the use of the plural verb does not work -pfs] -themselves are sensitive. Tor thus increments path length by one -for each sensitive endpoint in the circuit. -Enclaves also help to protect against end-to-end attacks, since it's -possible that traffic coming from the node has simply been relayed from -elsewhere. However, if the node has recognizable behavior patterns, -an attacker who runs nodes in the network can triangulate over time to -gain confidence that it is in fact originating the traffic. Wright et -al.~\cite{wright03} introduce the notion of a \emph{helper node}---a -single fixed entry node for each user---to combat this \emph{predecessor -attack}. - -However, the attack in~\cite{attack-tor-oak05} shows that simply adding -to the path length, or using a helper node, may not protect an enclave -node. A hostile web server can send constant interference traffic to -all nodes in the network, and learn which nodes are involved in the -circuit (though at least in the current attack, he can't learn their -order). Using randomized path lengths may help some, since the attacker -will never be certain he has identified all nodes in the path unless -he probes the entire network, but as -long as the network remains small this attack will still be feasible. - -Helper nodes also aim to help Tor clients, because choosing entry and exit -points -randomly and changing them frequently allows an attacker who controls -even a few nodes to eventually link some of their destinations. The goal -is to take the risk once and for all about choosing a bad entry node, -rather than taking a new risk for each new circuit. (Choosing fixed -exit nodes is less useful, since even an honest exit node still doesn't -protect against a hostile website.) But obstacles remain before -we can implement helper nodes. -For one, the literature does not describe how to choose helpers from a list -of nodes that changes over time. If Alice is forced to choose a new entry -helper every $d$ days and $c$ of the $n$ nodes are bad, she can expect -to choose a compromised node around -every $dc/n$ days. Statistically over time this approach only helps -if she is better at choosing honest helper nodes than at choosing -honest nodes. Worse, an attacker with the ability to DoS nodes could -force users to switch helper nodes more frequently, or remove -other candidate helpers. - -%Do general DoS attacks have anonymity implications? See e.g. Adam -%Back's IH paper, but I think there's more to be pointed out here. -RD -% Not sure what you want to say here. -NM - -%Game theory for helper nodes: if Alice offers a hidden service on a -%server (enclave model), and nobody ever uses helper nodes, then against -%George+Steven's attack she's totally nailed. If only Alice uses a helper -%node, then she's still identified as the source of the data. If everybody -%uses a helper node (including Alice), then the attack identifies the -%helper node and also Alice, and knows which one is which. If everybody -%uses a helper node (but not Alice), then the attacker figures the real -%source was a client that is using Alice as a helper node. [How's my -%logic here?] -RD -% -% Not sure about the logic. For the attack to work with helper nodes, the -%attacker needs to guess that Alice is running the hidden service, right? -%Otherwise, how can he know to measure her traffic specifically? -NM -% -% In the Murdoch-Danezis attack, the adversary measures all servers. -RD - -%point to routing-zones section re: helper nodes to defend against -%big stuff. - -\subsection{Location-hidden services} -\label{subsec:hidden-services} - -% This section is first up against the wall when the revolution comes. - -Tor's \emph{rendezvous points} -let users provide TCP services to other Tor users without revealing -the service's location. Since this feature is relatively recent, we describe -here -a couple of our early observations from its deployment. - -First, our implementation of hidden services seems less hidden than we'd -like, since they build a different rendezvous circuit for each user, -and an external adversary can induce them to -produce traffic. This insecurity means that they may not be suitable as -a building block for Free Haven~\cite{freehaven-berk} or other anonymous -publishing systems that aim to provide long-term security, though helper -nodes, as discussed above, would seem to help. - -\emph{Hot-swap} hidden services, where more than one location can -provide the service and loss of any one location does not imply a -change in service, would help foil intersection and observation attacks -where an adversary monitors availability of a hidden service and also -monitors whether certain users or servers are online. The design -challenges in providing such services without otherwise compromising -the hidden service's anonymity remain an open problem; -however, see~\cite{move-ndss05}. - -In practice, hidden services are used for more than just providing private -access to a web server or IRC server. People are using hidden services -as a poor man's VPN and firewall-buster. Many people want to be able -to connect to the computers in their private network via secure shell, -and rather than playing with dyndns and trying to pierce holes in their -firewall, they run a hidden service on the inside and then rendezvous -with that hidden service externally. - -News sites like Bloggers Without Borders (www.b19s.org) are advertising -a hidden-service address on their front page. Doing this can provide -increased robustness if they use the dual-IP approach we describe -in~\cite{tor-design}, -but in practice they do it to increase visibility -of the Tor project and their support for privacy, and to offer -a way for their users, using unmodified software, to get end-to-end -encryption and authentication to their website. - -\subsection{Location diversity and ISP-class adversaries} -\label{subsec:routing-zones} - -Anonymity networks have long relied on diversity of node location for -protection against attacks---typically an adversary who can observe a -larger fraction of the network can launch a more effective attack. One -way to achieve dispersal involves growing the network so a given adversary -sees less. Alternately, we can arrange the topology so traffic can enter -or exit at many places (for example, by using a free-route network -like Tor rather than a cascade network like JAP). Lastly, we can use -distributed trust to spread each transaction over multiple jurisdictions. -But how do we decide whether two nodes are in related locations? - -Feamster and Dingledine defined a \emph{location diversity} metric -in~\cite{feamster:wpes2004}, and began investigating a variant of location -diversity based on the fact that the Internet is divided into thousands of -independently operated networks called {\em autonomous systems} (ASes). -The key insight from their paper is that while we typically think of a -connection as going directly from the Tor client to the first Tor node, -actually it traverses many different ASes on each hop. An adversary at -any of these ASes can monitor or influence traffic. Specifically, given -plausible initiators and recipients, and given random path selection, -some ASes in the simulation were able to observe 10\% to 30\% of the -transactions (that is, learn both the origin and the destination) on -the deployed Tor network (33 nodes as of June 2004). - -The paper concludes that for best protection against the AS-level -adversary, nodes should be in ASes that have the most links to other ASes: -Tier-1 ISPs such as AT\&T and Abovenet. Further, a given transaction -is safest when it starts or ends in a Tier-1 ISP\@. Therefore, assuming -initiator and responder are both in the U.S., it actually \emph{hurts} -our location diversity to use far-flung nodes in -continents like Asia or South America. -% it's not just entering or exiting from them. using them as the middle -% hop reduces your effective path length, which you presumably don't -% want because you chose that path length for a reason. -% -% Not sure I buy that argument. Two end nodes in the right ASs to -% discourage linking are still not known to each other. If some -% adversary in a single AS can bridge the middle node, it shouldn't -% therefore be able to identify initiator or responder; although it could -% contribute to further attacks given more assumptions. -% Nonetheless, no change to the actual text for now. - -Many open questions remain. First, it will be an immense engineering -challenge to get an entire BGP routing table to each Tor client, or to -summarize it sufficiently. Without a local copy, clients won't be -able to safely predict what ASes will be traversed on the various paths -through the Tor network to the final destination. Tarzan~\cite{tarzan:ccs02} -and MorphMix~\cite{morphmix:fc04} suggest that we compare IP prefixes to -determine location diversity; but the above paper showed that in practice -many of the Mixmaster nodes that share a single AS have entirely different -IP prefixes. When the network has scaled to thousands of nodes, does IP -prefix comparison become a more useful approximation? % Alternatively, can -%relevant parts of the routing tables be summarized centrally and delivered to -%clients in a less verbose format? -%% i already said "or to summarize is sufficiently" above. is that not -%% enough? -RD -% -Second, we can take advantage of caching certain content at the -exit nodes, to limit the number of requests that need to leave the -network at all. What about taking advantage of caches like Akamai or -Google~\cite{shsm03}? (Note that they're also well-positioned as global -adversaries.) -% -Third, if we follow the recommendations in~\cite{feamster:wpes2004} - and tailor path selection -to avoid choosing endpoints in similar locations, how much are we hurting -anonymity against larger real-world adversaries who can take advantage -of knowing our algorithm? -% -Fourth, can we use this knowledge to figure out which gaps in our network -most affect our robustness to this class of attack, and go recruit -new nodes with those ASes in mind? - -%Tor's security relies in large part on the dispersal properties of its -%network. We need to be more aware of the anonymity properties of various -%approaches so we can make better design decisions in the future. - -\subsection{The Anti-censorship problem} -\label{subsec:china} - -Citizens in a variety of countries, such as most recently China and -Iran, are blocked from accessing various sites outside -their country. These users try to find any tools available to allow -them to get-around these firewalls. Some anonymity networks, such as -Six-Four~\cite{six-four}, are designed specifically with this goal in -mind; others like the Anonymizer~\cite{anonymizer} are paid by sponsors -such as Voice of America to encourage Internet -freedom. Even though Tor wasn't -designed with ubiquitous access to the network in mind, thousands of -users across the world are now using it for exactly this purpose. -% Academic and NGO organizations, peacefire, \cite{berkman}, etc - -Anti-censorship networks hoping to bridge country-level blocks face -a variety of challenges. One of these is that they need to find enough -exit nodes---servers on the `free' side that are willing to relay -traffic from users to their final destinations. Anonymizing -networks like Tor are well-suited to this task since we have -already gathered a set of exit nodes that are willing to tolerate some -political heat. - -The other main challenge is to distribute a list of reachable relays -to the users inside the country, and give them software to use those relays, -without letting the censors also enumerate this list and block each -relay. Anonymizer solves this by buying lots of seemingly-unrelated IP -addresses (or having them donated), abandoning old addresses as they are -`used up,' and telling a few users about the new ones. Distributed -anonymizing networks again have an advantage here, in that we already -have tens of thousands of separate IP addresses whose users might -volunteer to provide this service since they've already installed and use -the software for their own privacy~\cite{koepsell:wpes2004}. Because -the Tor protocol separates routing from network discovery \cite{tor-design}, -volunteers could configure their Tor clients -to generate node descriptors and send them to a special directory -server that gives them out to dissidents who need to get around blocks. - -Of course, this still doesn't prevent the adversary -from enumerating and preemptively blocking the volunteer relays. -Perhaps a tiered-trust system could be built where a few individuals are -given relays' locations. They could then recommend other individuals -by telling them -those addresses, thus providing a built-in incentive to avoid letting the -adversary intercept them. Max-flow trust algorithms~\cite{advogato} -might help to bound the number of IP addresses leaked to the adversary. Groups -like the W3C are looking into using Tor as a component in an overall system to -help address censorship; we wish them success. - -%\cite{infranet} - - -\section{Scaling} -\label{sec:scaling} - -Tor is running today with hundreds of nodes and hundreds of thousands of -users, but it will certainly not scale to millions. -Scaling Tor involves four main challenges. First, to get a -large set of nodes, we must address incentives for -users to carry traffic for others. Next is safe node discovery, both -while bootstrapping (Tor clients must robustly find an initial -node list) and later (Tor clients must learn about a fair sample -of honest nodes and not let the adversary control circuits). -We must also detect and handle node speed and reliability as the network -becomes increasingly heterogeneous: since the speed and reliability -of a circuit is limited by its worst link, we must learn to track and -predict performance. Finally, we must stop assuming that all points on -the network can connect to all other points. - -\subsection{Incentives by Design} -\label{subsec:incentives-by-design} - -There are three behaviors we need to encourage for each Tor node: relaying -traffic; providing good throughput and reliability while doing it; -and allowing traffic to exit the network from that node. - -We encourage these behaviors through \emph{indirect} incentives: that -is, by designing the system and educating users in such a way that users -with certain goals will choose to relay traffic. One -main incentive for running a Tor node is social: volunteers -altruistically donate their bandwidth and time. We encourage this with -public rankings of the throughput and reliability of nodes, much like -seti@home. We further explain to users that they can get -deniability for any traffic emerging from the same address as a Tor -exit node, and they can use their own Tor node -as an entry or exit point with confidence that it's not run by an adversary. -Further, users may run a node simply because they need such a network -to be persistently available and usable, and the value of supporting this -exceeds any countervening costs. -Finally, we can encourage operators by improving the usability and feature -set of the software: -rate limiting support and easy packaging decrease the hassle of -maintaining a node, and our configurable exit policies allow each -operator to advertise a policy describing the hosts and ports to which -he feels comfortable connecting. - -To date these incentives appear to have been adequate. As the system scales -or as new issues emerge, however, we may also need to provide - \emph{direct} incentives: -providing payment or other resources in return for high-quality service. -Paying actual money is problematic: decentralized e-cash systems are -not yet practical, and a centralized collection system not only reduces -robustness, but also has failed in the past (the history of commercial -anonymizing networks is littered with failed attempts). A more promising -option is to use a tit-for-tat incentive scheme, where nodes provide better -service to nodes that have provided good service for them. - -Unfortunately, such an approach introduces new anonymity problems. -There are many surprising ways for nodes to game the incentive and -reputation system to undermine anonymity---such systems are typically -designed to encourage fairness in storage or bandwidth usage, not -fairness of provided anonymity. An adversary can attract more traffic -by performing well or can target individual users by selectively -performing, to undermine their anonymity. Typically a user who -chooses evenly from all nodes is most resistant to an adversary -targeting him, but that approach hampers the efficient use -of heterogeneous nodes. - -%When a node (call him Steve) performs well for Alice, does Steve gain -%reputation with the entire system, or just with Alice? If the entire -%system, how does Alice tell everybody about her experience in a way that -%prevents her from lying about it yet still protects her identity? If -%Steve's behavior only affects Alice's behavior, does this allow Steve to -%selectively perform only for Alice, and then break her anonymity later -%when somebody (presumably Alice) routes through his node? - -A possible solution is a simplified approach to the tit-for-tat -incentive scheme based on two rules: (1) each node should measure the -service it receives from adjacent nodes, and provide service relative -to the received service, but (2) when a node is making decisions that -affect its own security (such as building a circuit for its own -application connections), it should choose evenly from a sufficiently -large set of nodes that meet some minimum service -threshold~\cite{casc-rep}. This approach allows us to discourage -bad service -without opening Alice up as much to attacks. All of this requires -further study. - - -\subsection{Trust and discovery} -\label{subsec:trust-and-discovery} - -The published Tor design is deliberately simplistic in how -new nodes are authorized and how clients are informed about Tor -nodes and their status. -All nodes periodically upload a signed description -of their locations, keys, and capabilities to each of several well-known {\it - directory servers}. These directory servers construct a signed summary -of all known Tor nodes (a ``directory''), and a signed statement of which -nodes they -believe to be operational then (a ``network status''). Clients -periodically download a directory to learn the latest nodes and -keys, and more frequently download a network status to learn which nodes are -likely to be running. Tor nodes also operate as directory caches, to -lighten the bandwidth on the directory servers. - -To prevent Sybil attacks (wherein an adversary signs up many -purportedly independent nodes to increase her network view), -this design -requires the directory server operators to manually -approve new nodes. Unapproved nodes are included in the directory, -but clients -do not use them at the start or end of their circuits. In practice, -directory administrators perform little actual verification, and tend to -approve any Tor node whose operator can compose a coherent email. -This procedure -may prevent trivial automated Sybil attacks, but will do little -against a clever and determined attacker. - -There are a number of flaws in this system that need to be addressed as we -move forward. First, -each directory server represents an independent point of failure: any -compromised directory server could start recommending only compromised -nodes. -Second, as more nodes join the network, %the more unreasonable it -%becomes to expect clients to know about them all. -directories -become infeasibly large, and downloading the list of nodes becomes -burdensome. -Third, the validation scheme may do as much harm as it does good. It -does not prevent clever attackers from mounting Sybil attacks, -and it may deter node operators from joining the network---if -they expect the validation process to be difficult, or they do not share -any languages in common with the directory server operators. - -We could try to move the system in several directions, depending on our -choice of threat model and requirements. If we did not need to increase -network capacity to support more users, we could simply - adopt even stricter validation requirements, and reduce the number of -nodes in the network to a trusted minimum. -But, we can only do that if can simultaneously make node capacity -scale much more than we anticipate to be feasible soon, and if we can find -entities willing to run such nodes, an equally daunting prospect. - -In order to address the first two issues, it seems wise to move to a system -including a number of semi-trusted directory servers, no one of which can -compromise a user on its own. Ultimately, of course, we cannot escape the -problem of a first introducer: since most users will run Tor in whatever -configuration the software ships with, the Tor distribution itself will -remain a single point of failure so long as it includes the seed -keys for directory servers, a list of directory servers, or any other means -to learn which nodes are on the network. But omitting this information -from the Tor distribution would only delegate the trust problem to each -individual user. %, most of whom are presumably less informed about how to make -%trust decisions than the Tor developers. -A well publicized, widely available, authoritatively and independently -endorsed and signed list of initial directory servers and their keys -is a possible solution. But, setting that up properly is itself a large -bootstrapping task. - -%Network discovery, sybil, node admission, scaling. It seems that the code -%will ship with something and that's our trust root. We could try to get -%people to build a web of trust, but no. Where we go from here depends -%on what threats we have in mind. Really decentralized if your threat is -%RIAA; less so if threat is to application data or individuals or... - - -\subsection{Measuring performance and capacity} -\label{subsec:performance} - -One of the paradoxes with engineering an anonymity network is that we'd like -to learn as much as we can about how traffic flows so we can improve the -network, but we want to prevent others from learning how traffic flows in -order to trace users' connections through the network. Furthermore, many -mechanisms that help Tor run efficiently -require measurements about the network. - -Currently, nodes try to deduce their own available bandwidth (based on how -much traffic they have been able to transfer recently) and include this -information in the descriptors they upload to the directory. Clients -choose servers weighted by their bandwidth, neglecting really slow -servers and capping the influence of really fast ones. -% -This is, of course, eminently cheatable. A malicious node can get a -disproportionate amount of traffic simply by claiming to have more bandwidth -than it does. But better mechanisms have their problems. If bandwidth data -is to be measured rather than self-reported, it is usually possible for -nodes to selectively provide better service for the measuring party, or -sabotage the measured value of other nodes. Complex solutions for -mix networks have been proposed, but do not address the issues -completely~\cite{mix-acc,casc-rep}. - -Even with no cheating, network measurement is complex. It is common -for views of a node's latency and/or bandwidth to vary wildly between -observers. Further, it is unclear whether total bandwidth is really -the right measure; perhaps clients should instead be considering nodes -based on unused bandwidth or observed throughput. -%How to measure performance without letting people selectively deny service -%by distinguishing pings. Heck, just how to measure performance at all. In -%practice people have funny firewalls that don't match up to their exit -%policies and Tor doesn't deal. -% -%Network investigation: Is all this bandwidth publishing thing a good idea? -%How can we collect stats better? Note weasel's smokeping, at -%http://seppia.noreply.org/cgi-bin/smokeping.cgi?target=Tor -%which probably gives george and steven enough info to break tor? -% -And even if we can collect and use this network information effectively, -we must ensure -that it is not more useful to attackers than to us. While it -seems plausible that bandwidth data alone is not enough to reveal -sender-recipient connections under most circumstances, it could certainly -reveal the path taken by large traffic flows under low-usage circumstances. - -\subsection{Non-clique topologies} - -Tor's comparatively weak threat model may allow easier scaling than -other -designs. High-latency mix networks need to avoid partitioning attacks, where -network splits let an attacker distinguish users in different partitions. -Since Tor assumes the adversary cannot cheaply observe nodes at will, -a network split may not decrease protection much. -Thus, one option when the scale of a Tor network -exceeds some size is simply to split it. Nodes could be allocated into -partitions while hampering collaborating hostile nodes from taking over -a single partition~\cite{casc-rep}. -Clients could switch between -networks, even on a per-circuit basis. -%Future analysis may uncover -%other dangers beyond those affecting mix-nets. - -More conservatively, we can try to scale a single Tor network. Likely -problems with adding more servers to a single Tor network include an -explosion in the number of sockets needed on each server as more servers -join, and increased coordination overhead to keep each users' view of -the network consistent. As we grow, we will also have more instances of -servers that can't reach each other simply due to Internet topology or -routing problems. - -%include restricting the number of sockets and the amount of bandwidth -%used by each node. The number of sockets is determined by the network's -%connectivity and the number of users, while bandwidth capacity is determined -%by the total bandwidth of nodes on the network. The simplest solution to -%bandwidth capacity is to add more nodes, since adding a Tor node of any -%feasible bandwidth will increase the traffic capacity of the network. So as -%a first step to scaling, we should focus on making the network tolerate more -%nodes, by reducing the interconnectivity of the nodes; later we can reduce -%overhead associated with directories, discovery, and so on. - -We can address these points by reducing the network's connectivity. -Danezis~\cite{danezis:pet2003} considers -the anonymity implications of restricting routes on mix networks and -recommends an approach based on expander graphs (where any subgraph is likely -to have many neighbors). It is not immediately clear that this approach will -extend to Tor, which has a weaker threat model but higher performance -requirements: instead of analyzing the -probability of an attacker's viewing whole paths, we will need to examine the -attacker's likelihood of compromising the endpoints. -% -Tor may not need an expander graph per se: it -may be enough to have a single central subnet that is highly connected, like -an Internet backbone. % As an -%example, assume fifty nodes of relatively high traffic capacity. This -%\emph{center} forms a clique. Assume each center node can -%handle 200 connections to other nodes (including the other ones in the -%center). Assume every noncenter node connects to three nodes in the -%center and anyone out of the center that they want to. Then the -%network easily scales to c. 2500 nodes with commensurate increase in -%bandwidth. -There are many open questions: how to distribute connectivity information -(presumably nodes will learn about the central nodes -when they download Tor), whether central nodes -will need to function as a `backbone', and so on. As above, -this could reduce the amount of anonymity available from a mix-net, -but for a low-latency network where anonymity derives largely from -the edges, it may be feasible. - -%In a sense, Tor already has a non-clique topology. -%Individuals can set up and run Tor nodes without informing the -%directory servers. This allows groups to run a -%local Tor network of private nodes that connects to the public Tor -%network. This network is hidden behind the Tor network, and its -%only visible connection to Tor is at those points where it connects. -%As far as the public network, or anyone observing it, is concerned, -%they are running clients. -} - -\section{The Future} -\label{sec:conclusion} - -Tor is the largest and most diverse low-latency anonymity network -available, but we are still in the beginning stages of deployment. Several -major questions remain. - -First, will our volunteer-based approach to sustainability work in the -long term? As we add more features and destabilize the network, the -developers spend a lot of time keeping the server operators happy. Even -though Tor is free software, the network would likely stagnate and die at -this stage if the developers stopped actively working on it. We may get -an unexpected boon from the fact that we're a general-purpose overlay -network: as Tor grows more popular, other groups who need an overlay -network on the Internet are starting to adapt Tor to their needs. -% -Second, Tor is only one of many components that preserve privacy online. -For applications where it is desirable to -keep identifying information out of application traffic, someone must build -more and better protocol-aware proxies that are usable by ordinary people. -% -Third, we need to gain a reputation for social good, and learn how to -coexist with the variety of Internet services and their established -authentication mechanisms. We can't just keep escalating the blacklist -standoff forever. -% -Fourth, the current Tor -architecture does not scale even to handle current user demand. We must -find designs and incentives to let some clients relay traffic too, without -sacrificing too much anonymity. - -These are difficult and open questions. Yet choosing not to solve them -means leaving most users to a less secure network or no anonymizing -network at all. - -\bibliographystyle{plain} \bibliography{tor-design} - -\end{document} - -\clearpage -\appendix - -\begin{figure}[t] -%\unitlength=1in -\centering -%\begin{picture}(6.0,2.0) -%\put(3,1){\makebox(0,0)[c]{\epsfig{figure=graphnodes,width=6in}}} -%\end{picture} -\mbox{\epsfig{figure=graphnodes,width=5in}} -\caption{Number of Tor nodes over time, through January 2005. Lowest -line is number of exit -nodes that allow connections to port 80. Middle line is total number of -verified (registered) Tor nodes. The line above that represents nodes -that are running but not yet registered.} -\label{fig:graphnodes} -\end{figure} - -\begin{figure}[t] -\centering -\mbox{\epsfig{figure=graphtraffic,width=5in}} -\caption{The sum of traffic reported by each node over time, through -January 2005. The bottom -pair show average throughput, and the top pair represent the largest 15 -minute burst in each 4 hour period.} -\label{fig:graphtraffic} -\end{figure} - - - -%Making use of nodes with little bandwidth, or high latency/packet loss. - -%Running Tor nodes behind NATs, behind great-firewalls-of-China, etc. -%Restricted routes. How to propagate to everybody the topology? BGP -%style doesn't work because we don't want just *one* path. Point to -%Geoff's stuff. - diff --git a/doc/design-paper/graphnodes.eps b/doc/design-paper/graphnodes.eps deleted file mode 100644 index c1ccf44ffd..0000000000 --- a/doc/design-paper/graphnodes.eps +++ /dev/null @@ -1,168 +0,0 @@ -%!PS-Adobe-3.0 EPSF-3.0 -%%BoundingBox: 0 0 594 282 -% -% created by bmeps 1.1.0 (SCCS=1.73) -% -/pstr - 1782 string -def -/inputf - currentfile - /ASCII85Decode filter - /FlateDecode filter - /RunLengthDecode filter -def -gsave -0 282 translate -594 282 scale -594 282 8 [594 0 0 -282 0 0] -{ inputf pstr readstring pop } -false -3 -colorimage -GhVOjDi>]<FajlqSJie3"Aig\TXkI[MM+kbD;&*L=t/GMV-hs)5>n9_WC$R38uPNS -:\/!h(->g0&o'd+8jO"#,u#\]-qJ1e";8Kq`509MM1b+?CGo=.n_,W;mpme-e![aj -B("Hbs6'BNh5d)*o60!7giMmDpRL,(pWU;Y7i/+RPG?h0oNsh*1qc+:\e*?imYf?h -re5U2P]Er&[l@U7MGMBC<a&dq32GfDGWpC0c6`qhbLO?+4%u@8Z/Vdnq"treSqVDa -\U#EJUY%e5Ph#d,:B[]`lVll3`Q9BVa#HgAoB(Bt*4*aZo7("N(W4Q'r4i65/`js/ -061>D:!%,B]fqbFk0:H<3\DZWUJuk]n`SCZd8H)<CDVC+qoL]1fjpXph0Q![-O?Oq -n2%`,NU$PO:5qJ*XDRK*P<t":(jqY4cZFDZM5+?Fc#ZM$-E"'\bBrOCFfhh@jSBjM -f:%bZ^h\g@?;f;.19A;qa6-IE5bGYGqlO2:J$;Gj6N""i>pE_P!2XOs^VlZn5=9gq -SE9TkJS0@]8s0%XFBHtSSdhZ7mZ"DR\ETm=>(Yd+lps3F+2>#X8RI>kQHh'^kE]G/ -a-5/<G.p/S\TAfTXlOXK5G)$?o9\G+,^,uSYE+&$4BjjYqN7SgY7t'?'`N!Q^[T)g -/]C$ZS1Y)G<UX2C0U&MVY3<'G=.rD^)@r<3<ggnVGFjRkYJXjJq:Y)V,*4CW$M@nY -NL*V(Qd.j5TJKEVZ`/?XB$ZD&"QGh-OD_k&OP;`"JT]DO9<1iMLL>?j@AaZ!H*6[I -W$g(]g^4*1cPI-WK.qdDQoH*_jR`3(kp3I@FTFUH.,QhO4Pd\`)8Xm2MW:P73+qe\ -l(RWFgm5I>8e&2oY10M02D"Bgl>$6gA8)eEL"!-DKD@0"<oXfT1(9!LgNJujgbDB8 -lc6]'n#IO/CtkdTgkJ%iQsJ=e777##4S;<q4[KHA&kX:>#m>K/.3VuT5IN3;!3&q6 -9.PR*SN*mGlIajjqQ8^eneFI#aI<?j/=R07$/0'ElO9To55d+AlhXG%89U!ZlcAtO -S^+cHqB;RVg</;?YrJof2t`YLp8K6JL$7'B=mp27ff([#\Wm,[0\m!:a1-7_X">h) -%BW3%i[Ym_R]4`UTdSXfJ3)6W4TfuNFO]o*I_.]iijHIdht#$U)$0f3eA.uk[0r'b -<A"ZaLu8ql1t,nPYn04AUs2kUP*SgT5[l%shgKb2=^:R=o.U-77ntMWVsgE(;dCQ4 -'qE2$X</F>#Aj=';Rad""$Q7lM$q$i%F(T&OXpc=%`R%j9CL[no72iYSO_=(6@q]E -J.C(`=q/trfLN`&@=,6Ocg0I1TGQL%CC&iZ8+6Rqj*#<^Q;eI=.Y2TO+Q_EAcn?Wa -D`X6FY=JerlZAa_r)+9m)OZ)5&nU[lbp5nB+FEJ'*7hr1Vt+RqGW-&sK7DXdY\rA. -'@"E3Fso6;ktS/,81ZP$Ch'):.7*M!9j[YpCV#.'e9%,qS"bg%l<IZ1%_jaOckeR2 -F!CA@j9?J!%DX7aLW-KB&G]IqK+S7pmr+T0\AHnIN=TM_haXFkc8VpA*cjH6oRT1R -W1!q;(eP6bqGjNuGV]RF$erJQ2pbF?Q(;q*7rI^=CRR[[m@s#RH)V]9i8ALDoJrab -U(6UuF1M;D7$&]1@qm4$=.4CHWM4($&XXX%_1PuaeN.338X0\">3;Y@#Fe)[1)cT^ -`W@OtV$9,#QRWFc7:7PH<%8!uAtNHE*VKHXBJ>-B?pFj<+>3,,V&^Q/bcCQL_^]X4 -bg"PN1V4F<Qeta">h%#hd1CBhK/Y<d"f0UX.ttb9Ot@,t\E<1SL3^WGX4.65Z+/%6 ->Rs%7SBu)&b[oN_ERW$FaNtClNe,n\X+=I)**h430XhM!i&c6kAcE#+[h7e2>D>?0 -:uV.oZ`qRK\fpLsKWeTG4SfYLl+;^NC8O9Z^=5h>Q1P[)*+(FKTT)tKb&s^-s7``i -c%(Q68^3oO+_G6h/*VT(D&iLWh^,rI5G]u>F2HJY`_6:tF?Yp-q"-$shiEhNke'kW -?15Lfc3(-;jGpX*W7<ihq>/'WjnVs:NM6oShm"ppNbghEDn2Oh6Mt+Xq&1!Np>/HE -a(AjV,f?ssgoh)WR@=u%!BsirW&BkYpp=-Q'udI76+k%AbLNn65G6ZLjjlWDnjA@R -#D_*9#PS'?X9(kKN)=MlV$]sthLR%c=V_H=H'1s\:"BorK7OP%g>'r;Zp?h\ecr6+ -KI(MQe&(6N]\Y(0cBC/q-G&`k-V#S!@2;3cFXT-:klfY]6;&!-[]gF(4Z-WZ0bt"M ->!pLt&ksVXea3>AW*0WX`WL?;DD5D9=<+rK)=5e#C8&qCf8Xbrk(;fL>lFk.*84PT -Pc?uRaumCeIj/2&fEH,Z^'mU".9EH[X_,,FaK$GM(mQMO'"lZF`OflCE5K;^M+3pJ -TMKOa+ST^jX;!s>c``QXaL\O'kFdTYfS=n-]:T+-4bN[0RgNB$\GoD"P^$WT*;%h] -a&ih_/Z/le''^/Krf>WE+ZMnnm"(k)7DVB.8L-uOfPEb)f,Rc0>KligeqWmJ'[Rq+ -#8kT>ZqA\8e5`d]+n6/\QP#Q!R:ZOcnV:ddH4A8R6Q4fN*Ms-2i%+Jj3sTq#crqmi -RD(mbp`cS0J^/m8H=+2Ola/7],0-c)E)XnZd3sd91#00]bu09lpk6t2r^[HGTL0:G -Ta=p`4_c^is3:9]ac?s6k<9-:dM302pL3'#"'TMb%LhL)-gGp`?V:/T\9RK$P4qW= -T.M2s_>mEl=M[TMaJm0<n?jX%m>(N1Unl\'lA!?J7@\#]Bsg?7;&b;cYhlT<:QtJc -Q:>a-,<\\*?hq%f[7(2\>1bW9>B2e!1g!_G[u,K;F75r4,c?gtaL7\L2Q`t>/!/`\ -qp4jE9$Slnh=1\33IF]rI<YAjMm=tm@pt$d:7^j-[=>(Mmmj30032FeK0R#7IuHDT -W31!/?O_U>:WUXK[S+\Y%5qe)%-%NQ?ubB`Kb'42F_(B]Tp5DfXO\iNLXRe/EP#1r -QOiu)T=%iK+H'dl*,]1-E!S)tFOQac@e^.YWJZsh_qWS<'VC-!DbG<^G/.33fu/lJ -5i\eM/]De)!Dpg@6#Dn%Hk1=WYJh[6T(:kXhY)9pf=[-)0!AWII(:PD)j9]sp=(DQ -GrF=X6aHC`2`?l)g@5e^"8t"=j>YVrfm[5H>Ab5iY?*qqmaXG)='VfSUX`)B\(;!9 -$)?lDAdn(AYuN5EdV\LW7G3oXM-P1c9g&;7qmh&i>Hh/Z>Li(%[8clI0dEpQ;3IQY -GPk*_Md2IK"&Fa;iMd]haBnt*]aqRbS,#8s^$AA(=DD#f$JBQ5?KPuGYmCij_Z&[o -$ebL%T/>93"`*K<;+%dJ_'hu[=/nc.UIohOEsXt_*8>_Fi./Vf#i1QmIFLnd5qiq[ -#YHVTJkBBa`Jn]"^PfU4`b*[dq_c&U<F83o;V#-WN+b-][EaehDA2@(I=1(2.IpX> -`+S0GR2Ye1=mWTAc>DC"Pg+[8;33K<F%rV.,"YLA]L8hOEp^!&5&4?n602DW:45oP -Vr!!Y.HK[cRW%A58]dU5<ff_epn_$fs3kZuBf0C=Y0o58Mt@P5D*qjU,dP=Y1$<kL -*<_T!"J9Z(.ZE2mG!dsI'u\Q4.'2j2"o'i(4@T^CWeVDte#=0jAfS!5B+hH_cl?.L -A3Eb:]@Xr.n$$>lZfH[9,Ui7=q@)WJMhZ2AAsVoS9eUsl+2B.Fc_&>ZT"`4+dH*X" -!\gu-rk(WQ59f,^Cpb,M\rh,Q1GtdaY_)0']Ig<a`P<bY0r'VDs"]aR,lMjK`VV54 -g&,=.G^^M`k_`WdmtZ;$dM?jB5)N1l]=35oe^.V8RKAZ.NeTC"BS`s-=AWk3@h('O -?>ei"NO@L^nSW54&'ND\_h&dP2YMDpa/nuQfHR[h`n4K1-%AeG`&:&*S"`IB[V:U8 -@Hcf(IoHWI5]8SaK0I`)'oI[hoZ^%,cmC!NZeJ)jHNfo^)uJ"#TrAM#fn?V2T?&52 -[!#:WS6f%44N+.-r38gZ)s!LPcqCQBZ8%(,9q.I\9N*)281j0&.(2a7gKqF@n101$ -O>Ud.5U6;eNnP-U)P[VCQ/Y+d7^/d2'N"Lr%)ut]e"akJ6[jKb+cOo_M+edEp%mDo ->EGpE4&?$'L*qBjc8L3u1UKGe2l>=Z]%nrY[GXRp;'.FTTD2`-N;YZYonrD_c[i1> -R6YfR#T>DODc/Argc&WeSF-c)g.W'^b&kEg)`;iOgX+Gr9fq(Y]RktkJTiLeo!2tN -hgI<GS;la?+Lre.1U55lTnXSU=N!X;]9^2/I(R$_Qj,20=VG0PRJL[^4u-8k^l$Pr -%F]g+mr&u:?"DdX*@3[rrQN?X7AjR8c2A+L"j+S;?n((;=OVf&3!ZCEYsm'Y`jI!` -@#q`:&/_^#Q+X2[kuHh]FCKpESJdp#=IORAG],fVCnKM6_;@]=g9ui'J<@:BCHEXM -`/n]fUZoikC`^!$#919lc\87e$pNH*+&6S6(\'2\AS5mJ7p+itN0Pm@i$3#>M7gN' -,Z4#UAEMInh;cT00<+g8aH>j6CRc\1E[M=OhBq$nl-hAZL9GrTfZrfCf3t-$Z'^b! -N$u.t(5$J5aBX-j"i:Mr^7/ASRst,m_L%?1G$,mukB#e8G(3?ZL5VbG*ZVl9Msb4] -`SnPXjHnliZ)0=TDX&IX$[Y5QnEY*hQ<F?T>bg<>RY&WC!!m#F0tc!(aa<mph?!A# -n)aH*"4t]P"Z<)mh=&b=c^Q"a'u#`jeX#)-MGj1q>!,_?d%?1(N3NZe,OHo%oFfB\ -MMWYuQ\A8'mQGUCS"f2S[J7kI9$Ul7AYItiZVHaf[7-$rU-t:o^t+jfIf63"cJC7b -4LnS56phbs;DYb1gAu)ZN#f^=6./Ht8pT)]Y;PXLE%bW?6q!_DmS&hE#d/%P;J2J+ -s3d]t>_,i*35GqB"!JjZO^Dg7JjsWga>A]&k0.PXjk$BXo,TuFbt^?>*[pGJg*08$ -ppf_6a1/h4>2F[>I'jDE_1N$?i_!Bk4lOHnAe:fV/U3Aa%6/gXaKqTQ'oHen\:0W@ -_i^n<PuWqFHL+[<Mo])Tjo02af5M/P^M`db9>/*V:]WlQ#pr$Par%"/!@R^[8&N`+ -o"t,GKG3H.\)gDTnG"GacVY06Ll7jC1o,4aS>O#PgFsNsO#^.0P[Sg:RV(skjjL\O -V?rj!3rlYR,GW'%Md->._2@hE3mH)r^:1OlT-d&bBnh:X*lS<=S=,5dN2c(T[/D;q -#tE-\b_r_O>=>::F<n?'-/)3%2n"Xr^Hqot4ANiFK?,%4*+VT&6Qor7,>h/KE)jX! -!'>&Z<SAS.pg*jiiWK1'eq6tLZ5qZC,m1EKYDYL9'1DIe[WFFhUJ<9N:G]O=QZ]lH -m,5E)Eku<Q&B?q$I9hU/W4>^mqp!-+;o/R"Ocb3#1\'3EDQq#,Lsqb),T?rT.^-+n -NEGl(!ZUUGA^<fiKZKc`r(JqhB`l;eO*=#WPk98ZT;VV<2*%.<64adS0+r0.m&)W@ -=:uJjC',j;Ark;:]XZYB('G3U+A09iWg=X8",l50ijF]^om94g9%b0aEu+B%<F+&u -'QN:<-8(Xi6]o[d9T`-URcD6s\"8C&S'[RB`Tja^`g%PHPq6)`(JP;K;NehVBFJ^M -+JPjLpn5cQ]BYq4luTcg*KSV66$p=!g%!;YGnIld(:c.9=s&lb]2_=j`:)2bM'DQ9 -I,^\+,(j0@%Q#A>f7<%E!B4eL6f2XA!?r2+;8Og-/Gd5t:<eMT)E$VU!JLuVm*%"> -9nJbA_l4TaKA[>BJODXt#\R;_B-!qZGA4$DXn\hQ9%tt7F[q&OIAJ*/+F$PTpN8ge -ehJr_Ns'=n#UcQJ9@`_K-[eP7K+p$R>$"uCqm>[Gb0'.ASQ%l]`R+?#1J^q^_]ki7 -H"W/LDqImO9'=au>Jmc8'hAe!(u\dnD?&*^Xc>V/8g9CqGE(/&V]j/J4)^u2)L(n= -B^DY[Qq2OlNbUbKSZU3NBi@Oe,n'r=pY?8Dmoc0sNf4T_jp12igW[OU%=MJ:P0ESq --.H6KOW+Dk%,grA#GnG%3osd^\*&H^Ir;DuqZ4)4!_/'M5gPfhEtJnE7[B3pUlCVL -kTPk2UX_VY3G"@LN(TPh_&IrfdQS*-bOEMY)1=P+](St7[s^P]cZE@9BiKIA8B[,0 -8HX:iE#U,*4@4M$G;*fI?sY7qrAKG<LdXYbk;HjlJe&$F_h+<!';$0.d.h7imUM"I -'K!S8IMn/]?IIns\1biuVrjE/P9'(P=^A#J@Zig0Le(=(2!j]0aPP34K<4I8QEf)o -RhW'<[D+\Q5r<ih-YU@6)44qViie5RGL;*?M_$o%;C5SCJ,3<I\OOVIi68m_(HI.` -[LCs53'gRfr6P;)g2l>omojQi]:2EI:ZoAl]A'gk_YnHFVt-9DOM/9n*XcPP9<*UZ -Q;_=ZjmSEg[RVggGBB3+A@35u7gE8tWjGJ?3,'WG&ro.eb*R:F5\MDe@+i$3,fh!i -:"6?O7_.]FCY'p4!8><</oJVZQ*]El>?2V2RJf!n5HRusha^CDCAVU^6h0"rEYT0X -&VQ<94-0Bps,QQC@C7t;5BC#@?ekB%8s:c"&&uilDNk%>%2OJ2-_L]h*[9Ul"0<5n -pa?P@^62jXoboK0K9G>&KbkW4P^^Hkh?rl29aBVm*_"5"U8cd=0QPrQeq;%m?c6NR -&0po_:>/n[Q=#/S&iZ(?bP1aKqn-n-%5DNRGkRf>JKSa(*'_e*MBAR.,NGBmL=p6P -l/1=&GASg*(+(g)R*)I\nuUq317rQp6CEF>M]*t-kQ9#OHEO5N:YC\TNXiV:RZSPJ -@BB^<$&u<6E7>?\b]:X+]Cd.3Y'oWh-.e:4mLE97+9PV6o4o8hfuO[0=I)H*dt5aQ -*iV%!8K0]J1g+TZ+f"(hS%^e@7BLdCm(KWso*U#6q+f8uDb'I+25,,mpIZ6\8r^/Q -]/+E+q'&['C?#bm!(P!:7V*D8$XbfA;(ON;%mYEo[9;Wm4X%8NTWr&./*!%^P\bYT -5m&)E*VUK%2osk3p.:$NduA%2It0[INLn8GeBeR<-p9jl&i=IPTQAhBPi8*_Fb3L9 -2sA1l,@_;MV1H%O#Ea^8J9M4++kI5,.ln+bR)1Eo\G#'8ok.Q)+c#R&R>,a[:?iYh -;e+9$#0dJOWTYUj-kK/F42b!3mfEKE:K*Qh[-%h#7sm84)TWKElE-C]F"WFPNCOD: -\R8R&qZ)V"&N%L%DO01ZOLBgGl/Q0+]#[/j+5Pd@.3=>4!$[sOh@TS=8_iX:]g;A( -0(/Vq+Z?9KrObJ1mZR\^d(M8FM^(1!ERiDG#)Xrs9Rhi$NLM%"F8P=ZBr/I]5L*T$ -grD+i+.3U@+I@2n96l3(_<';d;h2-V`],*8^jY-ikAi4iN/%f_:T'O0I(W5=qV/?& -2`%"KJB.a8:W(DW!@%CIOoKZ^<M#@c1H:Rgifd>'cukK(%fue=g^!6LpG1LlkQrFD -29:2U(0<J@hSmSHL<E!CV`EeMqqrhoDLe>[I_(-'1RlXj[iKRQGcW&KZBN/[9h4?I -_sTm<VF[G(8=9n`YRc(RaiJt+.fdtFaEBH,=lHJ\Y`rqAJ>uY6JCT1Y0.:$:!B<JN -"h>lB!#n7VOjP5oG_.#TJG.3-J$uuFWB$73DN3%QifIP?H<\9oJ^G5h<@0js.KfEr -qq]<k>t-TAn'R&3Z"Psrjs98P.s`R')SjoZ!G:C$=S^B[*A5DVa1ZX_,L\fB;-@_i -H8)\YbUcn5ZgUTX#8M[*!8:.j5X9nt^jY.Tfl$?3A-7q<K%HU6f.<c'\;d]F;d;)g -4uQVl0Q:V/AUA!q>&qYlZ;#%c:F;R%^c*<Qlat7K81SX$\.:"BLf`+\QU*mi!G)M# -*DlI48(ka^frd*$V^%Vd2b`HTp0f':cEac._="QSO4%Vsfhokd!Hn_#*FO'sDXWr8 -QfIgf6@^k2+1MitD$RoEn;:73f`>XhPT<SD5hQ0R"RZBdJbsu'"3M*%4.tqjZ3/8C -#A#:Q<V^0G)B/#kj(u"6N-?u4_0?4F*K7K-`qu2O0k)#hn>?U,Se[mJ@)dg:%q[qk -nR0AWJ\fh1C1,.bTdB_V@E"Gi'?B@u\q3n'b9g:^mN]NKTfq%1f0gsD(YI&e.5eAk -bZ1_YpSR[qEda(`JVl<6HVNSH*)q8XY)fltV[^,W<#9<*$U52(DIF"G"q0tmEG<f: -ig8Wo,^,uSYE*V+h2RErqV@>.l/]U\'8NSL0sn!B,Om8;2>(>[mI.dOpT91Y+'8?+ -X6rniI!ZNu9oAmFMu@D`s'*b@5E4\pa`fXG6CsJY[&9ImXYfd;>b*<-Mp4%aL#:J> -2/+3Pn8+>hV%0mNr1jKjBli*7eJ[iu\>#d.:Q1eJ(KRI!WtP2t6cF$I5-MWj#kD2' -n9G:;L6fB+>a&-poO`6CIEGWJFO-)TEOnFbL\O9ha-*Y0]7$[!+=_o#n<?Aae%o</ -aKluW.8F#/)-%(ONL`)+YS?m:ARn/[M)fB:(TT>DH%"]"B;Cg4#W'h/U4r@L,%<S8 -C"5aXfjq=C"M#H_NtXil+K]/VT",-q9!NE&Yk%uUlH<s+Y)/i&<SA/.B..1"7oH@o -\[,9*)#dn"E<J:'[pSbk+HD&eUsgG&iW&#ALChSHF`#(IcUiP/^YkF-k8Ns(LmZ,` -8`"-`I^+2EpoDJ>M#WN^1!aHehs%nZ`2aOo*cQK0*o"G*_s=VM]?#EK8M/fboBE'[ -D8XD3:Yn[1$"7&LYckl4<pJW-ij0-&Bt;YM.M8:]hF8`M<S2e#h/-Pu^0,h)W6X"S -U\n?T1@m29h8cO."3\Mhj:Lu")ABJErjhfO#D.d;+RhWj&n#16bJ_'Rr.3s#@&8Fs -9</"JV#1oe<s+$fFJe4<;G"828ZO[R9W]KE#WS!W\gD-_R1o^YiNYP42Oqe;q6&m6 -s42i,P@8th5r:O:4PDh"0qeND5+WB!=PDu&=]0Kr1fa*-enX3iF1^52_;>knBlLX] -BLYm2@F(WF!\al%Rs4(m6,N3LP/lTl/J5a?Gd2W>%PMD$3$o1eOVFHAW'86@Lq/ae -PoG]<!?0ND3p&uD,_aDaI4)seP\M_3-6QSfXhedD_F\16HC/)@KkRTj!)>$O["7%9 -D6B5)D40SQUj"t>'sP,GBNHcO_@C;#b4Y/s#/"Q=JK#Z6aoL"/UN22;/ICc%Ue"uB -]?c8;r^8O2Gn1@s^['02cR?WTXYaq/ld"*4j3n6hI:ur.qDG(lF-1K[aj&iIf8Zb6 -Or[i<?g[<aqu7aROUer -~> -grestore -currentdict /inputf undef -currentdict /pstr undef diff --git a/doc/design-paper/graphnodes.pdf b/doc/design-paper/graphnodes.pdf Binary files differdeleted file mode 100644 index 68e98dc8dc..0000000000 --- a/doc/design-paper/graphnodes.pdf +++ /dev/null diff --git a/doc/design-paper/graphtraffic.eps b/doc/design-paper/graphtraffic.eps deleted file mode 100644 index a84383cf2a..0000000000 --- a/doc/design-paper/graphtraffic.eps +++ /dev/null @@ -1,143 +0,0 @@ -%!PS-Adobe-3.0 EPSF-3.0 -%%BoundingBox: 0 0 594 282 -% -% created by bmeps 1.1.0 (SCCS=1.73) -% -/pstr - 1782 string -def -/inputf - currentfile - /ASCII85Decode filter - /FlateDecode filter - /RunLengthDecode filter -def -gsave -0 282 translate -594 282 scale -594 282 8 [594 0 0 -282 0 0] -{ inputf pstr readstring pop } -false -3 -colorimage -GhVOJCMt3EF[JG%)$pqO7S(n%G(K^F;`(G8(XOELP.48L(.b+8Ob*4)ImDCW62R>Y -5HGO<Fr1H]>/u'-DC`Yb)[DB4)jR6jITs'/g5KaoH;L?XMWU?Y2nS3s=$,-(cJ7Tn -YBfKkRG!i0]/-cTO*_rBhK-t*kM//,dl$1+3UP^>^A=DLhS%e1RVVcTmH/coI^_2p --/A)fY!:s%S!R6CL[OJmLso/T9kVT@SOM-<O.4GdDT?Ibc$/qg%i@"6-bO49Co>.P -"#a4/jnkLEcag/sj%jS#:OH52=nG\2FV0B#(d/l[r=F$/4Qhlj2FFL=Hhc&;&J`$g -e/#mQQ%%cA4[bE&c?USiST&@qHQeW6Q<1<?3l70B^)I">&A7>i*.RhB&kZqmpiuqT -;(;:1\^[8F`<fe<)dDm.L73akH"7:CV3Kg[r._&+4FZ(9HW8l3<+74k&%JL^0ok15 -[,a]$bn\sQcP$*%^M7Lam>q*%ek]FmNu1&pA5S"U,JuZG85EA52-4j+Fa)>Q]WpUj --c\0AUZjm(rgK(!Q'CJUfn"L/+)^cpr0ER@>F\KtL:]+M%t+>D*CI0*(eUl!(XEHC -$I9(6CE)"1Ous8io0`ntb7#Cg$&"&X9J(K:5Un!<:QB?a8(2a0*")gtAsRsn%p7_Y -jHp%:/!9qS_<='I%A-l7LCc&1FrQqV[ZM!=1G^WceO%6YV*a`XLa9B7m^LNMqdY3' -Q4<UR>^+0us6$rO^61F>EZnab:b1so.b)bf>Uo!B#Y'gi7FN_'.$fHM3n_"O`l(oW -(Ph(J+H_ZdYbETV;*h>J]RD]U)MoC-)+9`C6jcNt=$"H,Yfb*#M/"S=ag+N;<)?)& -/9*T#H2)>tq!dT0(po?TZI>;oH"-D60R!510A=+28p4MPmT29j@+ZGDPQChf'`4_7 -=89;sc_'deo<oChZ)I/0[+cegDPUat-1,bme6"/p$>N#m[0q+I,atN[9.Q94$\_&3 -g,5;nh9*RoFNa_4=gkG/akK[bc9hf.L^N%j_+MVk($!c^@@GEhD5&>b2'm@d[B;nR -%]d"=L=Bg0GjGk))]]eG0VW]DA4A/7>a9V=6G]9H!<d(q')["DlRGP3ED_BA->`q6 -1*$1JTu:\fnZ8ssn>ri7:2H]'6LgW:%,TmWcA.k.S@0X)P)*g&E>k`Lc'B-oD;ASY -\eH:KVUM`<Q;^gr))B8ii/#ab0\*N+iNOB"GcJ0%LpC%*TY,;f<5MddGepP%O+%Y9 -AjN/MkB<YK*'Ljm]G`d5$q;`&[Nf;5#^_2U>Y=D7fEPr[<7W\W<onQqh6!I-?lObi -Y]k,^2CKMYm4=J!Z'tQIOdNT8UZjP"TO4$g7Ba!ZqVP7IOU>hlk=@qY%Z_W$65f7> ->bumq/d5R@&L^V)(kp'eRQC%t"`ZMg$nP/:[me8R6oURfnU^pdf*J<%JP-E.(n'\G -&T6V'%gFZ8,#.H4JlulYG"<7("BC^WH]j3$J(>cAdkn-JkXrB*N2'8P&Tc6U9!),j -.lrFf.T_4J\7QJEm0ZgoF>BOm/j;ipaTXksiF>E+>bPYX[9Q-!s"\QRfG?fmX?D@O -$U*EpT?\k`J#-&`7pJ2>9]dJEI8:*F+]#k+^("B5?%lIB$$MG(=I<++YF&4(1YTjR -IY1'5c,%T(]:3sl,,Ptq-Q2],6>cB_d(G(@ppOkK!Lb?Sl_&malpO"Ur>r<LlqCbE -Y,t<i=&:D.LPi]7!L('\JhlJF!'p(4GJoN0ZY$U.`C,KS33a#@!XF%gT=1&SqgY>0 -`I@cjN53uiGuEuEIF?[28aa$G3<"j_WhreS'6pH%:mVSSN\6D?VPEA<enH"s"<et. -`O(hZVPZVYBNJ-pZJ-9o7o/`rh%])C0XqtXX%5H>AL*-kX<o3N4+`p@,+l!eEWrkF -dj!:`Wf\]:KA+jCf;[[ZMV+_lQ\d!7$'ulaoWfTm`d1=%P-_3`Zsi,>"p9EB_gWuC -"9mEX22$/=k[#nS+-W!M_FduS+a6&ciEji'TPX%VI>F0$3:N%,KSVuOL,^1T8WeiS -Eq.?kX!_aE]YOp$pN7\l/!Op+0=+>"F7uQ"5_hT+WO.%D-h.d#R>AM:XF%Bf+hN8g -H^:'D-Ai+9b@(JuGR:>-,Xtj=FK;"KKsS-21jJhh#:2]rHL\m\3Na&hE`G+o<UNk@ -D8k%@&DMUa#>ga1QmU]E/%/S(_Yq;!=4%>MIF;'6-+R]2iFo(s8#V<^h/g[75cqOP -_-WdW[-#A\QLk&eQ)G&,lO6Y2];anIYbMt#i&a"U@OHYclJ\>1$@(,1/;ip=,lhea -rY<T\=,YQ\>X]#uraYLc@4LLXW)YUtb`6@l$"'pt9q'U_%<jH;_^l::>_(n6^=\mO -^aMLr5t>ff79.tWd"ot@R>OP`OC97]Xk"G7em4.\YfVn#Xb%?gW4$HQei'Z!8b#mT -'->:W8(Bp&^nJNnX1E\Aeq!ae=_6G3RoB/=]&=>Z>_qW2`=O"X2m_f`!XhS'"ThQ: -mGNVkdDWb)#&q]OjM6@dpS*NB%*5:(#+0eS3O2upb1PR*B$)/X:j\1rc)*T/*#Sl> -PAg#j*b7l<m#u1WB@/Xg9uY2jN]ll(=M=?!01\0Ij$>Ei3?;TnTNlJ9aEF)DF!ggJ -R:F&'IIpSi1K(6Tg9,i[Vf:jOE?go3\ngpJ*lo9WnR!ZA/e]m4Gki4(N?hB/Ms?Le -l)kEO,RsL@$&'9S,d]?(A+>W]nA1;K&im@`E+JF1\AmWZUgTKnmU-=]`%68V`W`[V -i`7;mVI^:ke=D\U-nR5jn:'T+\?LoHne/(&j4d;fWH<R*#;[&DPH3E;XZr"A]q[PC -/C(>[pZanm"(s1<R8r=Qfoj3?<k@&*8O,tL4WiGj0=8Ag*1Q&X?E1uh8=EK?,;ka3 -_]6kg#.GodeMLWji]/jmpm$CIX='YmVIsi*A8f\%]/5n^mMs#mqV$J38&FZ=i6P:N -XXpuaTerLh^cVfMJp\mZ>_T?dkDF+rB$U\=OE%To=WP.=b1sKmKW,D$SjIXsJ*mUQ -B`R:NmJrLqfPt;-+b;+`p).=o!nVR.,Mc72>s(A+4"e$kna]1,+tp#ugD$MOcEQij -R!mceFd;q6Rf^_ZR^D$aqQ@+,Y:sPe\5Y'4FM5Yn@&i4]eQ,hLl(9Y@iQ&l+NE9rL -Z+eUh9s^1alX^fNbd_soRSMH:9;\G"CbSNHIu%c"A4;3L\%*Pfka]S9ZIo^nY`_,f -qp++X>?X3#`N1$aVUgZek:EO'Rc^e.8he$G4@a"QO_0ZW7.-\#DP\CY2?4[u_F4"I -KIIloF1bT8I)1C?a]:pP6peqn!V6r-Xt;Vf3^\o\$/#DtFEaK*MIWZhNP!;]g3;0c -cR`Y.-*G,YVPEW9^S+.+\DaVe6N$psR,S98OWCQtGH[M>&J`sHA?`Iog[gLb!qG_- -MfFZIVn"coLZU(T9(SuRhAL%O&CIa8dDfWaIpm?H'LSS,`cnkk4oo5Y31l\a%O2Y9 -3%J+2XFY7oiC^hCPR6o?r&?H(Mih*<C1$@`I4C5%.SBR!Zj3!APAS03QH:fMq[`;H -![kG;.JZa#Ic:4?Hg0k1!VE(7Sk`J&pa0:Tie4i(F-D;_J^:o#o%Th&\%n8?P9mq/ -:&'2.9<T]j5l0JWG*Sl).PJe:iq]j`(Ai*6)g!Oo%EOOLc$Z2Pb2#Bh>-J=e.PLcN -([EM%1X`_!GF:CRc.Rl7Xr$'DVhsFQ]>5n)Rs>@..p-XfB;J"36e^UmJ9AEN)>[+f -1pR'Y)$a9O_%o$69ANMFJL(]4Tr7#"^:D6X;dH"IP&YCj$g_MEM+fgQC`5JVlE%3u -SIj:Ui@Z>BZYsX5iWXUhf*jiRif9(#l6D6NW4g@^7nNr(p&:?@a+G%5q'<6GYdF19 -:Lom#ZBENTHrO_+-.T9jdA5_E(fbuGn+LDEO!$<uS]L*\Bt0'8/(?B'![QPs8R%oG -C\3d%`b%=-GW`Hs)*u`^L4chPL=m?pU[X9qLoIOr_Kkkq=2f$AJND]4a&=['\/<k2 -e(dS4&iK0F]rQghWrfk^Qi#7d`5A4=cSl*nA*]r(rdBm#9;r8m<ng.M3_a=3]%'!N -VWGd%Ol!?/rauRiMUOUp;I3TV<1YM,SBTUbrV#s`L0rhlG3L&+H:!d_XTdP2Y2&10 -QSkJ%dYU@V-`frU-b3=.ER\X-$?'_gs'>H;>A%gB`EUC56"r)IO&;3,Oc>22+HgbD ->/%M_hi*AbnX_Qc!4DrBZ27ZkV?U:09J.TEgnuL$+a8e`NmdEB.RT\8NRd%_At,pq -^H#Bi!kSp)?W(*&a&H_@\+)kIAHOm%EOuiWQI9.)pQe_30/en\?FIt-3BMrEMmi97 -7XXnOd(T&A^pg5&*3M)(pWN!@)Xl;K(U[%r^1/9pf+j%g\,.np")7LRE?T[3(:(Jj -G-LGeaqVeoW(VI%.M^;G(P6*f*?BT`Y8;M</Y8e3mU2<R>r#o<@$*5:c2:Qj1VP@L -RK;"n;!ubUIGH=[$\>Db[0@H0lUK!Ur9L0s*#E>-l86`K?^KZodfuor^4E[D/G=&o -$0Wg`4<O_O7?-Z.CK"B_T*)IpY?1.QC)`!llng@<M0CHM7n6&C6f&^8nhL_I$UX'D -Le+,d3'-r<D@e/SWSr8kqa9(R11CdUUUmbWH:97+f*"cnE8[l,Ii\k($i"O*X]pCE -s(5]"P#H=+,tJu?>U-sno8knk:U#AW7i$c3bJMh^8"d_Q_3Ni.')k9[::#iF9%?kI -C%UTM]49^WmlkccB4VHs'RAf&'B;DE#_[ceHF^[umAE8<5Rrg2*emIfXp<<4/_dZN -qq8FcG"A2;k5tB[ieJ02Cl<5&&o4KL\lgW[V)fARTVBsErZDT@BbcsrmeG@03'Bhd -_h%fir'JA>E.YYdBeB$d4SBD[90p`);^QV"%-Hu2Im;D;YQGqEX0n>/UQpuBZNOod -81QBU(W;HYE8cmaXJb:LG/'r1CoX5UpW)W:IdB22\!J"@pig/8F=h`E&.eqr.kf?J -`h`CSMWb(kabRo>58.d/e'W'@dQqoQ`Cq3;p-Ljp0*pmY:QCJD1]3`:5F>/\jI&gp ->bCD7Nqb"cpS1<WSaUrK88E$pqV>"d$Q^\c$utg#f7XXQ=4\#Kb>WH>Qia>mShr-B -C-b3AoaQnN<_l;0'7M;:$RG#O+/tcRUZW2p^AsS``%irLO<I+Bq;.jt9m''60J)'P -=7+HtocBFK`>I,UMf?0VAs@4"B2VZg#e(6A4m,'*cNPU+;j;gPhl24@(@q4N*,W>9 -iDO"j;PNZ8o!W"9.SEQ>RAp:iJeN/5mL5LPV(;sn[cB!).AoT*,IbhGGMboXX(d-Q -E\J_MK=a0P5?F-to@[5l>pq(PZPs=fY`l$oQNRo`1l]OeK3_OV^P.OFX"C(WF:T*C -b^5X'R?4kn"q?oq&KcqRmpR@?oQOPBp8oj!o!u4uXQ0o4+M/f?M0AC#`k0c'5'ptk -qBCHb,[:hE2"#aCL%Q43U55so`)f0q'iu^clK0sGqd"4u=J6m%6-S[og4T;aQ#3Mf -Bg([7(%`"^2lraoO4T;+H@^sD\pimLfhN-#\'><mYO,^>p_P'8FOBjfUidOC8rsuK -hBbt<mAYZa5776%8GNG>7SKc:<h;PC(nOt;gmgH[309@U3%cO!q^;i9q:(5;jQG[) -WbemcS"dr`89+;#)ThXeD6(h-Z$kd[ED=mi51c\;<fAgMR(Ao"PKeu&+\``%Ia,lO -+-\)bW/[*5ba<A2hYY@(iH>g9L7om2s+r&^b'+=uQYLf"7gJ]@(\RaMdcVC:_J^n> -P,DFkJr2fUV$KZ,$)!imU(ZmHYD\rF5Up,lEcYRS00f9!9D_AADDCO^$BP\n.a9D/ -'/=8^%ms_kQi1]kg<=Vmq5Vd+Uus5s:k`"^I7`V<S>><!)KiLI,jIERs$ME+$opeg -d$FdH+J2=Z@(2t@a$ITnCrYJ`/;aba9U=lM.<@n9d@Db25X$t^UE,6))-MAP@VOg2 -[7t*Dkt;,o:2hFbn%Os*7iu;@s7nb^2WCOM-,[Tfb*!C_(B!tgdh%8G\$`:9IARMs -U,!K54=u%r>%=i5!s;J?"5PWnj)-o=UH#>Hs!7A<HT^Tt?VI@#b4$cY')fD:rX4!5 -b@YaG`ZJeDIN>F&*+`-_[!QAjl%\u)U^O2OaD?N(Aa#\18DhP%D=#[7(549JWT$EQ -_Y:*V`DS>JR7*Z<3tdr2]]-n[T<8l3V9l7?UguVM(]87eA'/CZl8m6#8L!-f9cc@O -AA_jAMUtpuFjXJl7!JD0;4aoKVY'i!j$jn4k=@7Ub4V=UR%D$J*.T<3>XBXM<,(cT -IN#V27&Pq/6,Tm3$Z635V92_PC\;gTr`Z",dd<Z<Y=BLK\LFTUTf(d)7's$*L=)Pc -GrJBF.uj9Jj75a#k`GGKP#UaH3@8LgrQ]r]?Pgk%C@ZX&8Ut.m9&KmDSgi:aS13^! -H8o'%\G]CT9f\t+Bs"mo&=k;Bi<$SnW->>Wb;%^HAUjNN<?ro4TIE$u;`Eh?4u;jl -0K)_l/YP7/&?U8#s7XH!_=P-DUKJlq;@@V=)q<YE?/u$*c]dB.da<0.*oDrY2`:Z7 -,8sn"ro4B(%m'\Eh)u"u,GogdUX\qHRpGJ<gtkHroAMmo9).]Z3SOl7Dh\'Go`$DQ -s"2iF,.`28#*ajLq$p&%A^<eE/VVq9/)tLG3_=FS(KLtt)e(ZL?qXk!l\.<Dm(TOa -%/u%NLp._;_.e9A3^rN&OE0bS04`,b78MbDY'D1]G7_t@b\;#l#BAu83l=U97#Q6s -^m=N1;!pkDRN<]l3n!%E$A#D_Hqc'=L=5Tj%8cb.9#HM<iu>%m@j7g^!9Sh_OH;p9 -,$LfBr"bj^kc)Y@1-F&q;A5]]U71s#W+%O`*pP#`DL;M[".mNRbD>?gn5.s!18i5m -Sci[q3j0]ZhG2f[7E4,]49dL-TM>o9\jked-_i$VP5k02]>-G\J)PIY3^WBu6AM'X -s.`*Q!&`q;DRenNLH_gAk,=k[icc;s6r>3gf\Hq68Gkbei>4TADbcJKeTLKV<Heml -[7QCFaRNp(Ld)E3H[F',BSN?H;SlH$<M,N1F$9R>b$\SKJFf=a@%A&3oeW8g$6/'L -jAj`mid;ea-F@GF>=7F_hjG.L187EmeAG>_0IifNMHC>fcc;+jK:CCRO3(X!,SQM$ -S!hdF/7`cDOh'unOb/XJ>8f75ag:KICueG\TgNA5-IAeq<-%NX,,W;+/.6i6o2*OL -;G')Pl&"biOff@qN"9`q%O22D>;"YnAsfr'YW7`6[/o2tX!T=]><jiZ&k52>-^t+_ -e6G[MD7k1":h"T]M:JOLW_SG&L+*;<$HsODi9CmGge:_J-e/D2BrfiS`[.un2`4'a -MseCj-YdUAWI*PMUTKfejB.g$,:g+C6Tr<t8"%"+Lb.QPcI2;,)2#^uhs4\1KM:-" -N*KV.*F,6*UO"WnaX,"$.ML5cb91TkX"(1P#D"C+f,Et3*p])_+qFE/hJ6=6Ts]^! -_la#r2go;KYO]p4l0?%d68`)T^q,?BHcZm?aSTJ!iq7tu)7:;"9Zchp:#s$5OPt`i -]B#Q0_K-7,^oRWLlUdkF9dHIkFn)pIT="^BKY51IZKDf)^W1fFT@;lX=3/j'^fZPF -VQYR0O+4`aDh"C[p\mCpac"F -~> -grestore -currentdict /inputf undef -currentdict /pstr undef diff --git a/doc/design-paper/graphtraffic.pdf b/doc/design-paper/graphtraffic.pdf Binary files differdeleted file mode 100644 index e37382f6e6..0000000000 --- a/doc/design-paper/graphtraffic.pdf +++ /dev/null diff --git a/doc/design-paper/interaction.eps b/doc/design-paper/interaction.eps deleted file mode 100644 index 9b4e3db619..0000000000 --- a/doc/design-paper/interaction.eps +++ /dev/null @@ -1,463 +0,0 @@ -%!PS-Adobe-2.0 EPSF-2.0 -%%Title: interaction.fig -%%Creator: fig2dev Version 3.2 Patchlevel 3d -%%CreationDate: Sat Jan 31 05:25:23 2004 -%%For: nickm@totoro.wangafu.net () -%%BoundingBox: 0 0 449 235 -%%Magnification: 1.0000 -%%EndComments -/$F2psDict 200 dict def -$F2psDict begin -$F2psDict /mtrx matrix put -/col-1 {0 setgray} bind def -/col0 {0.000 0.000 0.000 srgb} bind def -/col1 {0.000 0.000 1.000 srgb} bind def -/col2 {0.000 1.000 0.000 srgb} bind def -/col3 {0.000 1.000 1.000 srgb} bind def -/col4 {1.000 0.000 0.000 srgb} bind def -/col5 {1.000 0.000 1.000 srgb} bind def -/col6 {1.000 1.000 0.000 srgb} bind def -/col7 {1.000 1.000 1.000 srgb} bind def -/col8 {0.000 0.000 0.560 srgb} bind def -/col9 {0.000 0.000 0.690 srgb} bind def -/col10 {0.000 0.000 0.820 srgb} bind def -/col11 {0.530 0.810 1.000 srgb} bind def -/col12 {0.000 0.560 0.000 srgb} bind def -/col13 {0.000 0.690 0.000 srgb} bind def -/col14 {0.000 0.820 0.000 srgb} bind def -/col15 {0.000 0.560 0.560 srgb} bind def -/col16 {0.000 0.690 0.690 srgb} bind def -/col17 {0.000 0.820 0.820 srgb} bind def -/col18 {0.560 0.000 0.000 srgb} bind def -/col19 {0.690 0.000 0.000 srgb} bind def -/col20 {0.820 0.000 0.000 srgb} bind def -/col21 {0.560 0.000 0.560 srgb} bind def -/col22 {0.690 0.000 0.690 srgb} bind def -/col23 {0.820 0.000 0.820 srgb} bind def -/col24 {0.500 0.190 0.000 srgb} bind def -/col25 {0.630 0.250 0.000 srgb} bind def -/col26 {0.750 0.380 0.000 srgb} bind def -/col27 {1.000 0.500 0.500 srgb} bind def -/col28 {1.000 0.630 0.630 srgb} bind def -/col29 {1.000 0.750 0.750 srgb} bind def -/col30 {1.000 0.880 0.880 srgb} bind def -/col31 {1.000 0.840 0.000 srgb} bind def - -end -save -newpath 0 235 moveto 0 0 lineto 449 0 lineto 449 235 lineto closepath clip newpath --62.3 239.8 translate -1 -1 scale - -/cp {closepath} bind def -/ef {eofill} bind def -/gr {grestore} bind def -/gs {gsave} bind def -/sa {save} bind def -/rs {restore} bind def -/l {lineto} bind def -/m {moveto} bind def -/rm {rmoveto} bind def -/n {newpath} bind def -/s {stroke} bind def -/sh {show} bind def -/slc {setlinecap} bind def -/slj {setlinejoin} bind def -/slw {setlinewidth} bind def -/srgb {setrgbcolor} bind def -/rot {rotate} bind def -/sc {scale} bind def -/sd {setdash} bind def -/ff {findfont} bind def -/sf {setfont} bind def -/scf {scalefont} bind def -/sw {stringwidth} bind def -/tr {translate} bind def -/tnt {dup dup currentrgbcolor - 4 -2 roll dup 1 exch sub 3 -1 roll mul add - 4 -2 roll dup 1 exch sub 3 -1 roll mul add - 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb} - bind def -/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul - 4 -2 roll mul srgb} bind def -/reencdict 12 dict def /ReEncode { reencdict begin -/newcodesandnames exch def /newfontname exch def /basefontname exch def -/basefontdict basefontname findfont def /newfont basefontdict maxlength dict def -basefontdict { exch dup /FID ne { dup /Encoding eq -{ exch dup length array copy newfont 3 1 roll put } -{ exch newfont 3 1 roll put } ifelse } { pop pop } ifelse } forall -newfont /FontName newfontname put newcodesandnames aload pop -128 1 255 { newfont /Encoding get exch /.notdef put } for -newcodesandnames length 2 idiv { newfont /Encoding get 3 1 roll put } repeat -newfontname newfont definefont pop end } def -/isovec [ -8#055 /minus 8#200 /grave 8#201 /acute 8#202 /circumflex 8#203 /tilde -8#204 /macron 8#205 /breve 8#206 /dotaccent 8#207 /dieresis -8#210 /ring 8#211 /cedilla 8#212 /hungarumlaut 8#213 /ogonek 8#214 /caron -8#220 /dotlessi 8#230 /oe 8#231 /OE -8#240 /space 8#241 /exclamdown 8#242 /cent 8#243 /sterling -8#244 /currency 8#245 /yen 8#246 /brokenbar 8#247 /section 8#250 /dieresis -8#251 /copyright 8#252 /ordfeminine 8#253 /guillemotleft 8#254 /logicalnot -8#255 /hyphen 8#256 /registered 8#257 /macron 8#260 /degree 8#261 /plusminus -8#262 /twosuperior 8#263 /threesuperior 8#264 /acute 8#265 /mu 8#266 /paragraph -8#267 /periodcentered 8#270 /cedilla 8#271 /onesuperior 8#272 /ordmasculine -8#273 /guillemotright 8#274 /onequarter 8#275 /onehalf -8#276 /threequarters 8#277 /questiondown 8#300 /Agrave 8#301 /Aacute -8#302 /Acircumflex 8#303 /Atilde 8#304 /Adieresis 8#305 /Aring -8#306 /AE 8#307 /Ccedilla 8#310 /Egrave 8#311 /Eacute -8#312 /Ecircumflex 8#313 /Edieresis 8#314 /Igrave 8#315 /Iacute -8#316 /Icircumflex 8#317 /Idieresis 8#320 /Eth 8#321 /Ntilde 8#322 /Ograve -8#323 /Oacute 8#324 /Ocircumflex 8#325 /Otilde 8#326 /Odieresis 8#327 /multiply -8#330 /Oslash 8#331 /Ugrave 8#332 /Uacute 8#333 /Ucircumflex -8#334 /Udieresis 8#335 /Yacute 8#336 /Thorn 8#337 /germandbls 8#340 /agrave -8#341 /aacute 8#342 /acircumflex 8#343 /atilde 8#344 /adieresis 8#345 /aring -8#346 /ae 8#347 /ccedilla 8#350 /egrave 8#351 /eacute -8#352 /ecircumflex 8#353 /edieresis 8#354 /igrave 8#355 /iacute -8#356 /icircumflex 8#357 /idieresis 8#360 /eth 8#361 /ntilde 8#362 /ograve -8#363 /oacute 8#364 /ocircumflex 8#365 /otilde 8#366 /odieresis 8#367 /divide -8#370 /oslash 8#371 /ugrave 8#372 /uacute 8#373 /ucircumflex -8#374 /udieresis 8#375 /yacute 8#376 /thorn 8#377 /ydieresis] def -/Times-Bold /Times-Bold-iso isovec ReEncode -/Times-Roman /Times-Roman-iso isovec ReEncode -/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def -/$F2psEnd {$F2psEnteredState restore end} def - -$F2psBegin -10 setmiterlimit - 0.06000 0.06000 sc -% -% Fig objects follow -% -% Polyline -15.000 slw -n 6000 300 m - 6000 3975 l gs col0 s gr -% Polyline -7.500 slw -gs clippath -3615 555 m 3615 495 l 3464 495 l 3584 525 l 3464 555 l cp -eoclip -n 1200 525 m - 3600 525 l gs col0 s gr gr - -% arrowhead -n 3464 555 m 3584 525 l 3464 495 l 3464 555 l cp gs 0.00 setgray ef gr col0 s -% Polyline -gs clippath -1185 795 m 1185 855 l 1336 855 l 1216 825 l 1336 795 l cp -eoclip -n 3600 825 m - 1200 825 l gs col0 s gr gr - -% arrowhead -n 1336 795 m 1216 825 l 1336 855 l 1336 795 l cp gs 0.00 setgray ef gr col0 s -% Polyline -15.000 slw -n 1200 300 m - 1200 3975 l gs col0 s gr -% Polyline -7.500 slw -gs clippath -3615 1155 m 3615 1095 l 3464 1095 l 3584 1125 l 3464 1155 l cp -eoclip -n 1200 1125 m - 3600 1125 l gs col0 s gr gr - -% arrowhead -n 3464 1155 m 3584 1125 l 3464 1095 l 3464 1155 l cp gs 0.00 setgray ef gr col0 s -% Polyline -15.000 slw -n 3600 300 m - 3600 3975 l gs col0 s gr -% Polyline -7.500 slw -gs clippath -6015 1230 m 6015 1170 l 5864 1170 l 5984 1200 l 5864 1230 l cp -eoclip -n 3600 1200 m - 6000 1200 l gs col0 s gr gr - -% arrowhead -n 5864 1230 m 5984 1200 l 5864 1170 l 5864 1230 l cp gs 0.00 setgray ef gr col0 s -% Polyline -gs clippath -3585 1470 m 3585 1530 l 3736 1530 l 3616 1500 l 3736 1470 l cp -eoclip -n 6000 1500 m - 3600 1500 l gs col0 s gr gr - -% arrowhead -n 3736 1470 m 3616 1500 l 3736 1530 l 3736 1470 l cp gs 0.00 setgray ef gr col0 s -% Polyline -gs clippath -1185 1545 m 1185 1605 l 1336 1605 l 1216 1575 l 1336 1545 l cp -eoclip -n 3600 1575 m - 1200 1575 l gs col0 s gr gr - -% arrowhead -n 1336 1545 m 1216 1575 l 1336 1605 l 1336 1545 l cp gs 0.00 setgray ef gr col0 s -% Polyline - [15 45] 45 sd -n 1050 1800 m - 8325 1800 l gs col0 s gr [] 0 sd -% Polyline -gs clippath -3615 2130 m 3615 2070 l 3464 2070 l 3584 2100 l 3464 2130 l cp -eoclip -n 1200 2100 m - 3600 2100 l gs col0 s gr gr - -% arrowhead -n 3464 2130 m 3584 2100 l 3464 2070 l 3464 2130 l cp gs 0.00 setgray ef gr col0 s -% Polyline -gs clippath -6015 2205 m 6015 2145 l 5864 2145 l 5984 2175 l 5864 2205 l cp -eoclip -n 3600 2175 m - 6000 2175 l gs col0 s gr gr - -% arrowhead -n 5864 2205 m 5984 2175 l 5864 2145 l 5864 2205 l cp gs 0.00 setgray ef gr col0 s -% Polyline - [60] 0 sd -gs clippath -8190 2430 m 8190 2370 l 8039 2370 l 8159 2400 l 8039 2430 l cp -5985 2370 m 5985 2430 l 6136 2430 l 6016 2400 l 6136 2370 l cp -eoclip -n 6000 2400 m - 8175 2400 l gs col0 s gr gr - [] 0 sd -% arrowhead -n 6136 2370 m 6016 2400 l 6136 2430 l 6136 2370 l cp gs 0.00 setgray ef gr col0 s -% arrowhead -n 8039 2430 m 8159 2400 l 8039 2370 l 8039 2430 l cp gs 0.00 setgray ef gr col0 s -% Polyline -gs clippath -3585 2520 m 3585 2580 l 3736 2580 l 3616 2550 l 3736 2520 l cp -eoclip -n 6000 2550 m - 3600 2550 l gs col0 s gr gr - -% arrowhead -n 3736 2520 m 3616 2550 l 3736 2580 l 3736 2520 l cp gs 0.00 setgray ef gr col0 s -% Polyline -gs clippath -1185 2595 m 1185 2655 l 1336 2655 l 1216 2625 l 1336 2595 l cp -eoclip -n 3600 2625 m - 1200 2625 l gs col0 s gr gr - -% arrowhead -n 1336 2595 m 1216 2625 l 1336 2655 l 1336 2595 l cp gs 0.00 setgray ef gr col0 s -% Polyline -gs clippath -3615 3030 m 3615 2970 l 3464 2970 l 3584 3000 l 3464 3030 l cp -eoclip -n 1200 3000 m - 3600 3000 l gs col0 s gr gr - -% arrowhead -n 3464 3030 m 3584 3000 l 3464 2970 l 3464 3030 l cp gs 0.00 setgray ef gr col0 s -% Polyline -gs clippath -6015 3105 m 6015 3045 l 5864 3045 l 5984 3075 l 5864 3105 l cp -eoclip -n 3600 3075 m - 6000 3075 l gs col0 s gr gr - -% arrowhead -n 5864 3105 m 5984 3075 l 5864 3045 l 5864 3105 l cp gs 0.00 setgray ef gr col0 s -% Polyline -gs clippath -8190 3180 m 8190 3120 l 8039 3120 l 8159 3150 l 8039 3180 l cp -eoclip -n 6000 3150 m - 8175 3150 l gs col0 s gr gr - -% arrowhead -n 8039 3180 m 8159 3150 l 8039 3120 l 8039 3180 l cp gs 0.00 setgray ef gr col0 s -% Polyline -gs clippath -5985 3420 m 5985 3480 l 6136 3480 l 6016 3450 l 6136 3420 l cp -eoclip -n 8175 3450 m - 6000 3450 l gs col0 s gr gr - -% arrowhead -n 6136 3420 m 6016 3450 l 6136 3480 l 6136 3420 l cp gs 0.00 setgray ef gr col0 s -% Polyline -gs clippath -5985 3495 m 5985 3555 l 6136 3555 l 6016 3525 l 6136 3495 l cp -eoclip -n 8175 3525 m - 6000 3525 l gs col0 s gr gr - -% arrowhead -n 6136 3495 m 6016 3525 l 6136 3555 l 6136 3495 l cp gs 0.00 setgray ef gr col0 s -% Polyline -gs clippath -5985 3570 m 5985 3630 l 6136 3630 l 6016 3600 l 6136 3570 l cp -eoclip -n 8175 3600 m - 6000 3600 l gs col0 s gr gr - -% arrowhead -n 6136 3570 m 6016 3600 l 6136 3630 l 6136 3570 l cp gs 0.00 setgray ef gr col0 s -% Polyline -gs clippath -3585 3495 m 3585 3555 l 3736 3555 l 3616 3525 l 3736 3495 l cp -eoclip -n 6000 3525 m - 3600 3525 l gs col0 s gr gr - -% arrowhead -n 3736 3495 m 3616 3525 l 3736 3555 l 3736 3495 l cp gs 0.00 setgray ef gr col0 s -% Polyline -gs clippath -3585 3645 m 3585 3705 l 3736 3705 l 3616 3675 l 3736 3645 l cp -eoclip -n 6000 3675 m - 3600 3675 l gs col0 s gr gr - -% arrowhead -n 3736 3645 m 3616 3675 l 3736 3705 l 3736 3645 l cp gs 0.00 setgray ef gr col0 s -% Polyline -gs clippath -3585 3570 m 3585 3630 l 3736 3630 l 3616 3600 l 3736 3570 l cp -eoclip -n 6000 3600 m - 3600 3600 l gs col0 s gr gr - -% arrowhead -n 3736 3570 m 3616 3600 l 3736 3630 l 3736 3570 l cp gs 0.00 setgray ef gr col0 s -% Polyline -gs clippath -1185 3645 m 1185 3705 l 1336 3705 l 1216 3675 l 1336 3645 l cp -eoclip -n 3600 3675 m - 1200 3675 l gs col0 s gr gr - -% arrowhead -n 1336 3645 m 1216 3675 l 1336 3705 l 1336 3645 l cp gs 0.00 setgray ef gr col0 s -% Polyline -gs clippath -1185 3720 m 1185 3780 l 1336 3780 l 1216 3750 l 1336 3720 l cp -eoclip -n 3600 3750 m - 1200 3750 l gs col0 s gr gr - -% arrowhead -n 1336 3720 m 1216 3750 l 1336 3780 l 1336 3720 l cp gs 0.00 setgray ef gr col0 s -% Polyline -gs clippath -1185 3795 m 1185 3855 l 1336 3855 l 1216 3825 l 1336 3795 l cp -eoclip -n 3600 3825 m - 1200 3825 l gs col0 s gr gr - -% arrowhead -n 1336 3795 m 1216 3825 l 1336 3855 l 1336 3795 l cp gs 0.00 setgray ef gr col0 s -% Polyline -15.000 slw -n 8175 300 m - 8175 3975 l gs col0 s gr -% Polyline -7.500 slw -n 6300 825 m 7950 825 l 7950 1725 l 6300 1725 l - cp gs col7 1.00 shd ef gr gs col0 s gr -/Times-Bold-iso ff 180.00 scf sf -3375 225 m -gs 1 -1 sc (OR 1) col0 sh gr -/Times-Bold-iso ff 180.00 scf sf -1050 225 m -gs 1 -1 sc (Alice) col0 sh gr -/Times-Bold-iso ff 180.00 scf sf -5775 225 m -gs 1 -1 sc (OR 2) col0 sh gr -/Times-Roman-iso ff 150.00 scf sf -6075 3075 m -gs 1 -1 sc ("HTTP GET...") col0 sh gr -/Times-Roman-iso ff 150.00 scf sf -4800 3975 m -gs 1 -1 sc (. . .) dup sw pop 2 div neg 0 rm col0 sh gr -/Times-Roman-iso ff 150.00 scf sf -7125 3975 m -gs 1 -1 sc (. . .) dup sw pop 2 div neg 0 rm col0 sh gr -/Times-Roman-iso ff 150.00 scf sf -2400 3975 m -gs 1 -1 sc (. . .) dup sw pop 2 div neg 0 rm col0 sh gr -/Times-Roman-iso ff 150.00 scf sf -7125 2325 m -gs 1 -1 sc (\(TCP handshake\)) dup sw pop 2 div neg 0 rm col0 sh gr -/Times-Bold-iso ff 180.00 scf sf -7875 225 m -gs 1 -1 sc (website) col0 sh gr -/Times-Roman-iso ff 150.00 scf sf -7125 1425 m -gs 1 -1 sc ({X}--AES encryption) dup sw pop 2 div neg 0 rm col0 sh gr -/Times-Roman-iso ff 150.00 scf sf -7125 1200 m -gs 1 -1 sc (E\(x\)--RSA encryption) dup sw pop 2 div neg 0 rm col0 sh gr -/Times-Roman-iso ff 150.00 scf sf -7125 975 m -gs 1 -1 sc (Legend:) dup sw pop 2 div neg 0 rm col0 sh gr -/Times-Roman-iso ff 150.00 scf sf -2400 225 m -gs 1 -1 sc (\(link is TLS-encrypted\)) dup sw pop 2 div neg 0 rm col0 sh gr -/Times-Roman-iso ff 150.00 scf sf -1275 1050 m -gs 1 -1 sc (Relay c1{Extend, OR2, E\(g^x2\)}) col0 sh gr -/Times-Roman-iso ff 150.00 scf sf -1275 2025 m -gs 1 -1 sc (Relay c1{{Begin <website>:80}}) col0 sh gr -/Times-Roman-iso ff 150.00 scf sf -3525 1500 m -gs 1 -1 sc (Relay c1{Extended, g^y2, H\(K2\)}) dup sw pop neg 0 rm col0 sh gr -/Times-Roman-iso ff 150.00 scf sf -3675 2100 m -gs 1 -1 sc (Relay c2{Begin <website>:80}) col0 sh gr -/Times-Roman-iso ff 150.00 scf sf -3525 2550 m -gs 1 -1 sc (Relay c1{{Connected}}) dup sw pop neg 0 rm col0 sh gr -/Times-Roman-iso ff 150.00 scf sf -5925 2475 m -gs 1 -1 sc (Relay c2{Connected}) dup sw pop neg 0 rm col0 sh gr -/Times-Roman-iso ff 150.00 scf sf -1275 2925 m -gs 1 -1 sc (Relay c1{{Data, "HTTP GET..."}}) col0 sh gr -/Times-Roman-iso ff 150.00 scf sf -3675 3000 m -gs 1 -1 sc (Relay c2{Data, "HTTP GET..."}) col0 sh gr -/Times-Roman-iso ff 150.00 scf sf -4800 225 m -gs 1 -1 sc (\(link is TLS-encryped\)) dup sw pop 2 div neg 0 rm col0 sh gr -/Times-Roman-iso ff 150.00 scf sf -7050 225 m -gs 1 -1 sc (\(unencrypted\)) dup sw pop 2 div neg 0 rm col0 sh gr -/Times-Roman-iso ff 150.00 scf sf -7125 1650 m -gs 1 -1 sc (cN--a circID) dup sw pop 2 div neg 0 rm col0 sh gr -/Times-Roman-iso ff 150.00 scf sf -3525 3600 m -gs 1 -1 sc (Relay c1{{Data, \(response\)}}) dup sw pop neg 0 rm col0 sh gr -/Times-Roman-iso ff 150.00 scf sf -8100 3375 m -gs 1 -1 sc (\(response\)) dup sw pop neg 0 rm col0 sh gr -/Times-Roman-iso ff 150.00 scf sf -5925 3450 m -gs 1 -1 sc (Relay c2{Data, \(response\)}) dup sw pop neg 0 rm col0 sh gr -/Times-Roman-iso ff 150.00 scf sf -5925 1425 m -gs 1 -1 sc (Created c2, g^y2, H\(K2\)) dup sw pop neg 0 rm col0 sh gr -/Times-Roman-iso ff 150.00 scf sf -3675 1125 m -gs 1 -1 sc (Create c2, E\(g^x2\)) col0 sh gr -/Times-Roman-iso ff 150.00 scf sf -1275 450 m -gs 1 -1 sc (Create c1, E\(g^x1\)) col0 sh gr -/Times-Roman-iso ff 150.00 scf sf -3525 750 m -gs 1 -1 sc (Created c1, g^y1, H\(K1\)) dup sw pop neg 0 rm col0 sh gr -$F2psEnd -rs diff --git a/doc/design-paper/interaction.fig b/doc/design-paper/interaction.fig deleted file mode 100644 index a7b49e0a52..0000000000 --- a/doc/design-paper/interaction.fig +++ /dev/null @@ -1,122 +0,0 @@ -#FIG 3.2 -Landscape -Center -Inches -Letter -100.00 -Single --2 -1200 2 -2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 2 - 6000 300 6000 3975 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 1 1 1.00 60.00 120.00 - 1200 525 3600 525 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 1 1 1.00 60.00 120.00 - 3600 825 1200 825 -2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 2 - 1200 300 1200 3975 -2 1 0 1 0 7 50 0 -1 3.000 0 0 -1 1 0 2 - 1 1 1.00 60.00 120.00 - 1200 1125 3600 1125 -2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 2 - 3600 300 3600 3975 -2 1 0 1 0 7 50 0 -1 3.000 0 0 -1 1 0 2 - 1 1 1.00 60.00 120.00 - 3600 1200 6000 1200 -2 1 0 1 0 7 50 0 -1 3.000 0 0 -1 1 0 2 - 1 1 1.00 60.00 120.00 - 6000 1500 3600 1500 -2 1 0 1 0 7 50 0 -1 3.000 0 0 -1 1 0 2 - 1 1 1.00 60.00 120.00 - 3600 1575 1200 1575 -2 1 2 1 0 7 50 0 -1 3.000 0 0 -1 0 0 2 - 1050 1800 8325 1800 -2 1 0 1 0 7 50 0 -1 3.000 0 0 -1 1 0 2 - 1 1 1.00 60.00 120.00 - 1200 2100 3600 2100 -2 1 0 1 0 7 50 0 -1 3.000 0 0 -1 1 0 2 - 1 1 1.00 60.00 120.00 - 3600 2175 6000 2175 -2 1 1 1 0 7 50 0 -1 4.000 0 0 -1 1 1 2 - 1 1 1.00 60.00 120.00 - 1 1 1.00 60.00 120.00 - 6000 2400 8175 2400 -2 1 0 1 0 7 50 0 -1 3.000 0 0 -1 1 0 2 - 1 1 1.00 60.00 120.00 - 6000 2550 3600 2550 -2 1 0 1 0 7 50 0 -1 3.000 0 0 -1 1 0 2 - 1 1 1.00 60.00 120.00 - 3600 2625 1200 2625 -2 1 0 1 0 7 50 0 -1 3.000 0 0 -1 1 0 2 - 1 1 1.00 60.00 120.00 - 1200 3000 3600 3000 -2 1 0 1 0 7 50 0 -1 3.000 0 0 -1 1 0 2 - 1 1 1.00 60.00 120.00 - 3600 3075 6000 3075 -2 1 0 1 0 7 50 0 -1 3.000 0 0 -1 1 0 2 - 1 1 1.00 60.00 120.00 - 6000 3150 8175 3150 -2 1 0 1 0 7 50 0 -1 3.000 0 0 -1 1 0 2 - 1 1 1.00 60.00 120.00 - 8175 3450 6000 3450 -2 1 0 1 0 7 50 0 -1 3.000 0 0 -1 1 0 2 - 1 1 1.00 60.00 120.00 - 8175 3525 6000 3525 -2 1 0 1 0 7 50 0 -1 3.000 0 0 -1 1 0 2 - 1 1 1.00 60.00 120.00 - 8175 3600 6000 3600 -2 1 0 1 0 7 50 0 -1 3.000 0 0 -1 1 0 2 - 1 1 1.00 60.00 120.00 - 6000 3525 3600 3525 -2 1 0 1 0 7 50 0 -1 3.000 0 0 -1 1 0 2 - 1 1 1.00 60.00 120.00 - 6000 3675 3600 3675 -2 1 0 1 0 7 50 0 -1 3.000 0 0 -1 1 0 2 - 1 1 1.00 60.00 120.00 - 6000 3600 3600 3600 -2 1 0 1 0 7 50 0 -1 3.000 0 0 -1 1 0 2 - 1 1 1.00 60.00 120.00 - 3600 3675 1200 3675 -2 1 0 1 0 7 50 0 -1 3.000 0 0 -1 1 0 2 - 1 1 1.00 60.00 120.00 - 3600 3750 1200 3750 -2 1 0 1 0 7 50 0 -1 3.000 0 0 -1 1 0 2 - 1 1 1.00 60.00 120.00 - 3600 3825 1200 3825 -2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 2 - 8175 300 8175 3975 -2 2 0 1 0 7 50 0 20 3.000 0 0 -1 0 0 5 - 6300 825 7950 825 7950 1725 6300 1725 6300 825 -4 0 0 50 0 2 12 0.0000 4 135 450 3375 225 OR 1\001 -4 0 0 50 0 2 12 0.0000 4 135 420 1050 225 Alice\001 -4 0 0 50 0 2 12 0.0000 4 135 450 5775 225 OR 2\001 -4 0 0 50 0 0 10 0.0000 4 105 960 6075 3075 "HTTP GET..."\001 -4 1 0 50 0 0 10 0.0000 4 15 135 4800 3975 . . .\001 -4 1 0 50 0 0 10 0.0000 4 15 135 7125 3975 . . .\001 -4 1 0 50 0 0 10 0.0000 4 15 135 2400 3975 . . .\001 -4 1 0 50 0 0 10 0.0000 4 135 1050 7125 2325 (TCP handshake)\001 -4 0 0 50 0 2 12 0.0000 4 135 630 7875 225 website\001 -4 1 0 50 0 0 10 0.0000 4 135 1335 7125 1425 {X}--AES encryption\001 -4 1 0 50 0 0 10 0.0000 4 135 1410 7125 1200 E(x)--RSA encryption\001 -4 1 0 50 0 0 10 0.0000 4 135 480 7125 975 Legend:\001 -4 1 0 50 0 0 10 0.0000 4 135 1455 2400 225 (link is TLS-encrypted)\001 -4 0 0 50 0 0 10 0.0000 4 135 2085 1275 1050 Relay c1{Extend, OR2, E(g^x2)}\001 -4 0 0 50 0 0 10 0.0000 4 135 1965 1275 2025 Relay c1{{Begin <website>:80}}\001 -4 2 0 50 0 0 10 0.0000 4 135 2190 3525 1500 Relay c1{Extended, g^y2, H(K2)}\001 -4 0 0 50 0 0 10 0.0000 4 135 1845 3675 2100 Relay c2{Begin <website>:80}\001 -4 2 0 50 0 0 10 0.0000 4 135 1410 3525 2550 Relay c1{{Connected}}\001 -4 2 0 50 0 0 10 0.0000 4 135 1290 5925 2475 Relay c2{Connected}\001 -4 0 0 50 0 0 10 0.0000 4 135 2085 1275 2925 Relay c1{{Data, "HTTP GET..."}}\001 -4 0 0 50 0 0 10 0.0000 4 135 1965 3675 3000 Relay c2{Data, "HTTP GET..."}\001 -4 1 0 50 0 0 10 0.0000 4 135 1365 4800 225 (link is TLS-encryped)\001 -4 1 0 50 0 0 10 0.0000 4 135 870 7050 225 (unencrypted)\001 -4 1 0 50 0 0 10 0.0000 4 105 780 7125 1650 cN--a circID\001 -4 2 0 50 0 0 10 0.0000 4 135 1860 3525 3600 Relay c1{{Data, (response)}}\001 -4 2 0 50 0 0 10 0.0000 4 135 645 8100 3375 (response)\001 -4 2 0 50 0 0 10 0.0000 4 135 1650 5925 3450 Relay c2{Data, (response)}\001 -4 2 0 50 0 0 10 0.0000 4 135 1545 5925 1425 Created c2, g^y2, H(K2)\001 -4 0 0 50 0 0 10 0.0000 4 135 1170 3675 1125 Create c2, E(g^x2)\001 -4 0 0 50 0 0 10 0.0000 4 135 1170 1275 450 Create c1, E(g^x1)\001 -4 2 0 50 0 0 10 0.0000 4 135 1545 3525 750 Created c1, g^y1, H(K1)\001 diff --git a/doc/design-paper/interaction.pdf b/doc/design-paper/interaction.pdf Binary files differdeleted file mode 100644 index 8def0add59..0000000000 --- a/doc/design-paper/interaction.pdf +++ /dev/null diff --git a/doc/design-paper/interaction.png b/doc/design-paper/interaction.png Binary files differdeleted file mode 100644 index 2bb904fcd9..0000000000 --- a/doc/design-paper/interaction.png +++ /dev/null diff --git a/doc/design-paper/latex8.bst b/doc/design-paper/latex8.bst deleted file mode 100644 index bae8e209ee..0000000000 --- a/doc/design-paper/latex8.bst +++ /dev/null @@ -1,1122 +0,0 @@ - -% --------------------------------------------------------------- -% -% by Paolo.Ienne@di.epfl.ch -% - -% --------------------------------------------------------------- -% -% no guarantee is given that the format corresponds perfectly to -% IEEE 8.5" x 11" Proceedings, but most features should be ok. -% -% --------------------------------------------------------------- -% -% `latex8' from BibTeX standard bibliography style `abbrv' -% version 0.99a for BibTeX versions 0.99a or later, LaTeX version 2.09. -% Copyright (C) 1985, all rights reserved. -% Copying of this file is authorized only if either -% (1) you make absolutely no changes to your copy, including name, or -% (2) if you do make changes, you name it something other than -% btxbst.doc, plain.bst, unsrt.bst, alpha.bst, and abbrv.bst. -% This restriction helps ensure that all standard styles are identical. -% The file btxbst.doc has the documentation for this style. - -ENTRY - { address - author - booktitle - chapter - edition - editor - howpublished - institution - journal - key - month - note - number - organization - pages - publisher - school - series - title - type - volume - year - } - {} - { label } - -INTEGERS { output.state before.all mid.sentence after.sentence after.block } - -FUNCTION {init.state.consts} -{ #0 'before.all := - #1 'mid.sentence := - #2 'after.sentence := - #3 'after.block := -} - -STRINGS { s t } - -FUNCTION {output.nonnull} -{ 's := - output.state mid.sentence = - { ", " * write$ } - { output.state after.block = - { add.period$ write$ - newline$ - "\newblock " write$ - } - { output.state before.all = - 'write$ - { add.period$ " " * write$ } - if$ - } - if$ - mid.sentence 'output.state := - } - if$ - s -} - -FUNCTION {output} -{ duplicate$ empty$ - 'pop$ - 'output.nonnull - if$ -} - -FUNCTION {output.check} -{ 't := - duplicate$ empty$ - { pop$ "empty " t * " in " * cite$ * warning$ } - 'output.nonnull - if$ -} - -FUNCTION {output.bibitem} -{ newline$ - "\bibitem{" write$ - cite$ write$ - "}" write$ - newline$ - "" - before.all 'output.state := -} - -FUNCTION {fin.entry} -{ add.period$ - write$ - newline$ -} - -FUNCTION {new.block} -{ output.state before.all = - 'skip$ - { after.block 'output.state := } - if$ -} - -FUNCTION {new.sentence} -{ output.state after.block = - 'skip$ - { output.state before.all = - 'skip$ - { after.sentence 'output.state := } - if$ - } - if$ -} - -FUNCTION {not} -{ { #0 } - { #1 } - if$ -} - -FUNCTION {and} -{ 'skip$ - { pop$ #0 } - if$ -} - -FUNCTION {or} -{ { pop$ #1 } - 'skip$ - if$ -} - -FUNCTION {new.block.checka} -{ empty$ - 'skip$ - 'new.block - if$ -} - -FUNCTION {new.block.checkb} -{ empty$ - swap$ empty$ - and - 'skip$ - 'new.block - if$ -} - -FUNCTION {new.sentence.checka} -{ empty$ - 'skip$ - 'new.sentence - if$ -} - -FUNCTION {new.sentence.checkb} -{ empty$ - swap$ empty$ - and - 'skip$ - 'new.sentence - if$ -} - -FUNCTION {field.or.null} -{ duplicate$ empty$ - { pop$ "" } - 'skip$ - if$ -} - -FUNCTION {emphasize} -{ duplicate$ empty$ - { pop$ "" } - { "{\em " swap$ * "}" * } - if$ -} - -INTEGERS { nameptr namesleft numnames } - -FUNCTION {format.names} -{ 's := - #1 'nameptr := - s num.names$ 'numnames := - numnames 'namesleft := - { namesleft #0 > } - { s nameptr "{f.~}{vv~}{ll}{, jj}" format.name$ 't := - nameptr #1 > - { namesleft #1 > - { ", " * t * } - { numnames #2 > - { "," * } - 'skip$ - if$ - t "others" = - { " et~al." * } - { " and " * t * } - if$ - } - if$ - } - 't - if$ - nameptr #1 + 'nameptr := - - namesleft #1 - 'namesleft := - } - while$ -} - -FUNCTION {format.authors} -{ author empty$ - { "" } - { author format.names } - if$ -} - -FUNCTION {format.editors} -{ editor empty$ - { "" } - { editor format.names - editor num.names$ #1 > - { ", editors" * } - { ", editor" * } - if$ - } - if$ -} - -FUNCTION {format.title} -{ title empty$ - { "" } - { title "t" change.case$ } - if$ -} - -FUNCTION {n.dashify} -{ 't := - "" - { t empty$ not } - { t #1 #1 substring$ "-" = - { t #1 #2 substring$ "--" = not - { "--" * - t #2 global.max$ substring$ 't := - } - { { t #1 #1 substring$ "-" = } - { "-" * - t #2 global.max$ substring$ 't := - } - while$ - } - if$ - } - { t #1 #1 substring$ * - t #2 global.max$ substring$ 't := - } - if$ - } - while$ -} - -FUNCTION {format.date} -{ year empty$ - { month empty$ - { "" } - { "there's a month but no year in " cite$ * warning$ - month - } - if$ - } - { month empty$ - 'year - { month " " * year * } - if$ - } - if$ -} - -FUNCTION {format.btitle} -{ title emphasize -} - -FUNCTION {tie.or.space.connect} -{ duplicate$ text.length$ #3 < - { "~" } - { " " } - if$ - swap$ * * -} - -FUNCTION {either.or.check} -{ empty$ - 'pop$ - { "can't use both " swap$ * " fields in " * cite$ * warning$ } - if$ -} - -FUNCTION {format.bvolume} -{ volume empty$ - { "" } - { "volume" volume tie.or.space.connect - series empty$ - 'skip$ - { " of " * series emphasize * } - if$ - "volume and number" number either.or.check - } - if$ -} - -FUNCTION {format.number.series} -{ volume empty$ - { number empty$ - { series field.or.null } - { output.state mid.sentence = - { "number" } - { "Number" } - if$ - number tie.or.space.connect - series empty$ - { "there's a number but no series in " cite$ * warning$ } - { " in " * series * } - if$ - } - if$ - } - { "" } - if$ -} - -FUNCTION {format.edition} -{ edition empty$ - { "" } - { output.state mid.sentence = - { edition "l" change.case$ " edition" * } - { edition "t" change.case$ " edition" * } - if$ - } - if$ -} - -INTEGERS { multiresult } - -FUNCTION {multi.page.check} -{ 't := - #0 'multiresult := - { multiresult not - t empty$ not - and - } - { t #1 #1 substring$ - duplicate$ "-" = - swap$ duplicate$ "," = - swap$ "+" = - or or - { #1 'multiresult := } - { t #2 global.max$ substring$ 't := } - if$ - } - while$ - multiresult -} - -FUNCTION {format.pages} -{ pages empty$ - { "" } - { pages multi.page.check - { "pages" pages n.dashify tie.or.space.connect } - { "page" pages tie.or.space.connect } - if$ - } - if$ -} - -FUNCTION {format.vol.num.pages} -{ volume field.or.null - number empty$ - 'skip$ - { "(" number * ")" * * - volume empty$ - { "there's a number but no volume in " cite$ * warning$ } - 'skip$ - if$ - } - if$ - pages empty$ - 'skip$ - { duplicate$ empty$ - { pop$ format.pages } - { ":" * pages n.dashify * } - if$ - } - if$ -} - -FUNCTION {format.chapter.pages} -{ chapter empty$ - 'format.pages - { type empty$ - { "chapter" } - { type "l" change.case$ } - if$ - chapter tie.or.space.connect - pages empty$ - 'skip$ - { ", " * format.pages * } - if$ - } - if$ -} - -FUNCTION {format.in.ed.booktitle} -{ booktitle empty$ - { "" } - { editor empty$ - { "In " booktitle emphasize * } - { "In " format.editors * ", " * booktitle emphasize * } - if$ - } - if$ -} - -FUNCTION {empty.misc.check} - -{ author empty$ title empty$ howpublished empty$ - month empty$ year empty$ note empty$ - and and and and and - key empty$ not and - { "all relevant fields are empty in " cite$ * warning$ } - 'skip$ - if$ -} - -FUNCTION {format.thesis.type} -{ type empty$ - 'skip$ - { pop$ - type "t" change.case$ - } - if$ -} - -FUNCTION {format.tr.number} -{ type empty$ - { "Technical Report" } - 'type - if$ - number empty$ - { "t" change.case$ } - { number tie.or.space.connect } - if$ -} - -FUNCTION {format.article.crossref} -{ key empty$ - { journal empty$ - { "need key or journal for " cite$ * " to crossref " * crossref * - warning$ - "" - } - { "In {\em " journal * "\/}" * } - if$ - } - { "In " key * } - if$ - " \cite{" * crossref * "}" * -} - -FUNCTION {format.crossref.editor} -{ editor #1 "{vv~}{ll}" format.name$ - editor num.names$ duplicate$ - #2 > - { pop$ " et~al." * } - { #2 < - 'skip$ - { editor #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" = - { " et~al." * } - { " and " * editor #2 "{vv~}{ll}" format.name$ * } - if$ - } - if$ - } - if$ -} - -FUNCTION {format.book.crossref} -{ volume empty$ - { "empty volume in " cite$ * "'s crossref of " * crossref * warning$ - "In " - } - { "Volume" volume tie.or.space.connect - " of " * - } - if$ - editor empty$ - editor field.or.null author field.or.null = - or - { key empty$ - { series empty$ - { "need editor, key, or series for " cite$ * " to crossref " * - crossref * warning$ - "" * - } - { "{\em " * series * "\/}" * } - if$ - } - { key * } - if$ - } - { format.crossref.editor * } - if$ - " \cite{" * crossref * "}" * -} - -FUNCTION {format.incoll.inproc.crossref} -{ editor empty$ - editor field.or.null author field.or.null = - or - { key empty$ - { booktitle empty$ - { "need editor, key, or booktitle for " cite$ * " to crossref " * - crossref * warning$ - "" - } - { "In {\em " booktitle * "\/}" * } - if$ - } - { "In " key * } - if$ - } - { "In " format.crossref.editor * } - if$ - " \cite{" * crossref * "}" * -} - -FUNCTION {article} -{ output.bibitem - format.authors "author" output.check - new.block - format.title "title" output.check - new.block - crossref missing$ - { journal emphasize "journal" output.check - format.vol.num.pages output - format.date "year" output.check - } - { format.article.crossref output.nonnull - format.pages output - } - if$ - new.block - note output - fin.entry -} - -FUNCTION {book} -{ output.bibitem - author empty$ - { format.editors "author and editor" output.check } - { format.authors output.nonnull - crossref missing$ - { "author and editor" editor either.or.check } - 'skip$ - if$ - } - if$ - new.block - format.btitle "title" output.check - crossref missing$ - { format.bvolume output - new.block - format.number.series output - new.sentence - publisher "publisher" output.check - address output - } - { new.block - format.book.crossref output.nonnull - } - if$ - format.edition output - format.date "year" output.check - new.block - note output - fin.entry -} - -FUNCTION {booklet} -{ output.bibitem - format.authors output - new.block - format.title "title" output.check - howpublished address new.block.checkb - howpublished output - address output - format.date output - new.block - note output - fin.entry -} - -FUNCTION {inbook} -{ output.bibitem - author empty$ - { format.editors "author and editor" output.check } - { format.authors output.nonnull - - crossref missing$ - { "author and editor" editor either.or.check } - 'skip$ - if$ - } - if$ - new.block - format.btitle "title" output.check - crossref missing$ - { format.bvolume output - format.chapter.pages "chapter and pages" output.check - new.block - format.number.series output - new.sentence - publisher "publisher" output.check - address output - } - { format.chapter.pages "chapter and pages" output.check - new.block - format.book.crossref output.nonnull - } - if$ - format.edition output - format.date "year" output.check - new.block - note output - fin.entry -} - -FUNCTION {incollection} -{ output.bibitem - format.authors "author" output.check - new.block - format.title "title" output.check - new.block - crossref missing$ - { format.in.ed.booktitle "booktitle" output.check - format.bvolume output - format.number.series output - format.chapter.pages output - new.sentence - publisher "publisher" output.check - address output - format.edition output - format.date "year" output.check - } - { format.incoll.inproc.crossref output.nonnull - format.chapter.pages output - } - if$ - new.block - note output - fin.entry -} - -FUNCTION {inproceedings} -{ output.bibitem - format.authors "author" output.check - new.block - format.title "title" output.check - new.block - crossref missing$ - { format.in.ed.booktitle "booktitle" output.check - format.bvolume output - format.number.series output - format.pages output - address empty$ - { organization publisher new.sentence.checkb - organization output - publisher output - format.date "year" output.check - } - { address output.nonnull - format.date "year" output.check - new.sentence - organization output - publisher output - } - if$ - } - { format.incoll.inproc.crossref output.nonnull - format.pages output - } - if$ - new.block - note output - fin.entry -} - -FUNCTION {conference} { inproceedings } - -FUNCTION {manual} -{ output.bibitem - author empty$ - { organization empty$ - 'skip$ - { organization output.nonnull - address output - } - if$ - } - { format.authors output.nonnull } - if$ - new.block - format.btitle "title" output.check - author empty$ - { organization empty$ - { address new.block.checka - address output - } - 'skip$ - if$ - } - { organization address new.block.checkb - organization output - address output - } - if$ - format.edition output - format.date output - new.block - note output - fin.entry -} - -FUNCTION {mastersthesis} -{ output.bibitem - format.authors "author" output.check - new.block - format.title "title" output.check - new.block - "Master's thesis" format.thesis.type output.nonnull - school "school" output.check - address output - format.date "year" output.check - new.block - note output - fin.entry -} - -FUNCTION {misc} -{ output.bibitem - format.authors output - title howpublished new.block.checkb - format.title output - howpublished new.block.checka - howpublished output - format.date output - new.block - note output - fin.entry - empty.misc.check -} - -FUNCTION {phdthesis} -{ output.bibitem - format.authors "author" output.check - new.block - format.btitle "title" output.check - new.block - "PhD thesis" format.thesis.type output.nonnull - school "school" output.check - address output - format.date "year" output.check - new.block - note output - fin.entry -} - -FUNCTION {proceedings} -{ output.bibitem - editor empty$ - { organization output } - { format.editors output.nonnull } - - if$ - new.block - format.btitle "title" output.check - format.bvolume output - format.number.series output - address empty$ - { editor empty$ - { publisher new.sentence.checka } - { organization publisher new.sentence.checkb - organization output - } - if$ - publisher output - format.date "year" output.check - } - { address output.nonnull - format.date "year" output.check - new.sentence - editor empty$ - 'skip$ - { organization output } - if$ - publisher output - } - if$ - new.block - note output - fin.entry -} - -FUNCTION {techreport} -{ output.bibitem - format.authors "author" output.check - new.block - format.title "title" output.check - new.block - format.tr.number output.nonnull - institution "institution" output.check - address output - format.date "year" output.check - new.block - note output - fin.entry -} - -FUNCTION {unpublished} -{ output.bibitem - format.authors "author" output.check - new.block - format.title "title" output.check - new.block - note "note" output.check - format.date output - fin.entry -} - -FUNCTION {default.type} { misc } - -MACRO {jan} {"Jan."} - -MACRO {feb} {"Feb."} - -MACRO {mar} {"Mar."} - -MACRO {apr} {"Apr."} - -MACRO {may} {"May"} - -MACRO {jun} {"June"} - -MACRO {jul} {"July"} - -MACRO {aug} {"Aug."} - -MACRO {sep} {"Sept."} - -MACRO {oct} {"Oct."} - -MACRO {nov} {"Nov."} - -MACRO {dec} {"Dec."} - -MACRO {acmcs} {"ACM Comput. Surv."} - -MACRO {acta} {"Acta Inf."} - -MACRO {cacm} {"Commun. ACM"} - -MACRO {ibmjrd} {"IBM J. Res. Dev."} - -MACRO {ibmsj} {"IBM Syst.~J."} - -MACRO {ieeese} {"IEEE Trans. Softw. Eng."} - -MACRO {ieeetc} {"IEEE Trans. Comput."} - -MACRO {ieeetcad} - {"IEEE Trans. Comput.-Aided Design Integrated Circuits"} - -MACRO {ipl} {"Inf. Process. Lett."} - -MACRO {jacm} {"J.~ACM"} - -MACRO {jcss} {"J.~Comput. Syst. Sci."} - -MACRO {scp} {"Sci. Comput. Programming"} - -MACRO {sicomp} {"SIAM J. Comput."} - -MACRO {tocs} {"ACM Trans. Comput. Syst."} - -MACRO {tods} {"ACM Trans. Database Syst."} - -MACRO {tog} {"ACM Trans. Gr."} - -MACRO {toms} {"ACM Trans. Math. Softw."} - -MACRO {toois} {"ACM Trans. Office Inf. Syst."} - -MACRO {toplas} {"ACM Trans. Prog. Lang. Syst."} - -MACRO {tcs} {"Theoretical Comput. Sci."} - -READ - -FUNCTION {sortify} -{ purify$ - "l" change.case$ -} - -INTEGERS { len } - -FUNCTION {chop.word} -{ 's := - 'len := - s #1 len substring$ = - { s len #1 + global.max$ substring$ } - 's - if$ -} - -FUNCTION {sort.format.names} -{ 's := - #1 'nameptr := - "" - s num.names$ 'numnames := - numnames 'namesleft := - { namesleft #0 > } - { nameptr #1 > - { " " * } - 'skip$ - if$ - s nameptr "{vv{ } }{ll{ }}{ f{ }}{ jj{ }}" format.name$ 't := - nameptr numnames = t "others" = and - { "et al" * } - { t sortify * } - if$ - nameptr #1 + 'nameptr := - namesleft #1 - 'namesleft := - } - while$ -} - -FUNCTION {sort.format.title} -{ 't := - "A " #2 - "An " #3 - "The " #4 t chop.word - chop.word - chop.word - sortify - #1 global.max$ substring$ -} - -FUNCTION {author.sort} -{ author empty$ - { key empty$ - { "to sort, need author or key in " cite$ * warning$ - "" - } - { key sortify } - if$ - } - { author sort.format.names } - if$ -} - -FUNCTION {author.editor.sort} -{ author empty$ - { editor empty$ - { key empty$ - { "to sort, need author, editor, or key in " cite$ * warning$ - "" - } - { key sortify } - if$ - } - { editor sort.format.names } - if$ - } - { author sort.format.names } - if$ -} - -FUNCTION {author.organization.sort} -{ author empty$ - - { organization empty$ - { key empty$ - { "to sort, need author, organization, or key in " cite$ * warning$ - "" - } - { key sortify } - if$ - } - { "The " #4 organization chop.word sortify } - if$ - } - { author sort.format.names } - if$ -} - -FUNCTION {editor.organization.sort} -{ editor empty$ - { organization empty$ - { key empty$ - { "to sort, need editor, organization, or key in " cite$ * warning$ - "" - } - { key sortify } - if$ - } - { "The " #4 organization chop.word sortify } - if$ - } - { editor sort.format.names } - if$ -} - -FUNCTION {presort} -{ type$ "book" = - type$ "inbook" = - or - 'author.editor.sort - { type$ "proceedings" = - 'editor.organization.sort - { type$ "manual" = - 'author.organization.sort - 'author.sort - if$ - } - if$ - } - if$ - " " - * - year field.or.null sortify - * - " " - * - title field.or.null - sort.format.title - * - #1 entry.max$ substring$ - 'sort.key$ := -} - -ITERATE {presort} - -SORT - -STRINGS { longest.label } - -INTEGERS { number.label longest.label.width } - -FUNCTION {initialize.longest.label} -{ "" 'longest.label := - #1 'number.label := - #0 'longest.label.width := -} - -FUNCTION {longest.label.pass} -{ number.label int.to.str$ 'label := - number.label #1 + 'number.label := - label width$ longest.label.width > - { label 'longest.label := - label width$ 'longest.label.width := - } - 'skip$ - if$ -} - -EXECUTE {initialize.longest.label} - -ITERATE {longest.label.pass} - -FUNCTION {begin.bib} -{ preamble$ empty$ - 'skip$ - { preamble$ write$ newline$ } - if$ - "\begin{thebibliography}{" longest.label * - "}\setlength{\itemsep}{-1ex}\small" * write$ newline$ -} - -EXECUTE {begin.bib} - -EXECUTE {init.state.consts} - -ITERATE {call.type$} - -FUNCTION {end.bib} -{ newline$ - "\end{thebibliography}" write$ newline$ -} - -EXECUTE {end.bib} - -% end of file latex8.bst -% --------------------------------------------------------------- - - - diff --git a/doc/design-paper/llncs.cls b/doc/design-paper/llncs.cls deleted file mode 100644 index 697dd774ec..0000000000 --- a/doc/design-paper/llncs.cls +++ /dev/null @@ -1,1016 +0,0 @@ -% LLNCS DOCUMENT CLASS -- version 2.8 -% for LaTeX2e -% -\NeedsTeXFormat{LaTeX2e}[1995/12/01] -\ProvidesClass{llncs}[2000/05/16 v2.8 -^^JLaTeX document class for Lecture Notes in Computer Science] -% Options -\let\if@envcntreset\iffalse -\DeclareOption{envcountreset}{\let\if@envcntreset\iftrue} -\DeclareOption{citeauthoryear}{\let\citeauthoryear=Y} -\DeclareOption{oribibl}{\let\oribibl=Y} -\let\if@custvec\iftrue -\DeclareOption{orivec}{\let\if@custvec\iffalse} -\let\if@envcntsame\iffalse -\DeclareOption{envcountsame}{\let\if@envcntsame\iftrue} -\let\if@envcntsect\iffalse -\DeclareOption{envcountsect}{\let\if@envcntsect\iftrue} -\let\if@runhead\iffalse -\DeclareOption{runningheads}{\let\if@runhead\iftrue} - -\let\if@openbib\iffalse -\DeclareOption{openbib}{\let\if@openbib\iftrue} - -\DeclareOption*{\PassOptionsToClass{\CurrentOption}{article}} - -\ProcessOptions - -\LoadClass[twoside]{article} -\RequirePackage{multicol} % needed for the list of participants, index - -\setlength{\textwidth}{12.2cm} -\setlength{\textheight}{19.3cm} - -% Ragged bottom for the actual page -\def\thisbottomragged{\def\@textbottom{\vskip\z@ plus.0001fil -\global\let\@textbottom\relax}} - -\renewcommand\small{% - \@setfontsize\small\@ixpt{11}% - \abovedisplayskip 8.5\p@ \@plus3\p@ \@minus4\p@ - \abovedisplayshortskip \z@ \@plus2\p@ - \belowdisplayshortskip 4\p@ \@plus2\p@ \@minus2\p@ - \def\@listi{\leftmargin\leftmargini - \parsep 0\p@ \@plus1\p@ \@minus\p@ - \topsep 8\p@ \@plus2\p@ \@minus4\p@ - \itemsep0\p@}% - \belowdisplayskip \abovedisplayskip -} - -\frenchspacing -\widowpenalty=10000 -\clubpenalty=10000 - -\setlength\oddsidemargin {63\p@} -\setlength\evensidemargin {63\p@} -\setlength\marginparwidth {90\p@} - -\setlength\headsep {16\p@} - -\setlength\footnotesep{7.7\p@} -\setlength\textfloatsep{8mm\@plus 2\p@ \@minus 4\p@} -\setlength\intextsep {8mm\@plus 2\p@ \@minus 2\p@} - -\setcounter{secnumdepth}{2} - -\newcounter {chapter} -\renewcommand\thechapter {\@arabic\c@chapter} - -\newif\if@mainmatter \@mainmattertrue -\newcommand\frontmatter{\cleardoublepage - \@mainmatterfalse\pagenumbering{Roman}} -\newcommand\mainmatter{\cleardoublepage - \@mainmattertrue\pagenumbering{arabic}} -\newcommand\backmatter{\if@openright\cleardoublepage\else\clearpage\fi - \@mainmatterfalse} - -\renewcommand\part{\cleardoublepage - \thispagestyle{empty}% - \if@twocolumn - \onecolumn - \@tempswatrue - \else - \@tempswafalse - \fi - \null\vfil - \secdef\@part\@spart} - -\def\@part[#1]#2{% - \ifnum \c@secnumdepth >-2\relax - \refstepcounter{part}% - \addcontentsline{toc}{part}{\thepart\hspace{1em}#1}% - \else - \addcontentsline{toc}{part}{#1}% - \fi - \markboth{}{}% - {\centering - \interlinepenalty \@M - \normalfont - \ifnum \c@secnumdepth >-2\relax - \huge\bfseries \partname~\thepart - \par - \vskip 20\p@ - \fi - \Huge \bfseries #2\par}% - \@endpart} -\def\@spart#1{% - {\centering - \interlinepenalty \@M - \normalfont - \Huge \bfseries #1\par}% - \@endpart} -\def\@endpart{\vfil\newpage - \if@twoside - \null - \thispagestyle{empty}% - \newpage - \fi - \if@tempswa - \twocolumn - \fi} - -\newcommand\chapter{\clearpage - \thispagestyle{empty}% - \global\@topnum\z@ - \@afterindentfalse - \secdef\@chapter\@schapter} -\def\@chapter[#1]#2{\ifnum \c@secnumdepth >\m@ne - \if@mainmatter - \refstepcounter{chapter}% - \typeout{\@chapapp\space\thechapter.}% - \addcontentsline{toc}{chapter}% - {\protect\numberline{\thechapter}#1}% - \else - \addcontentsline{toc}{chapter}{#1}% - \fi - \else - \addcontentsline{toc}{chapter}{#1}% - \fi - \chaptermark{#1}% - \addtocontents{lof}{\protect\addvspace{10\p@}}% - \addtocontents{lot}{\protect\addvspace{10\p@}}% - \if@twocolumn - \@topnewpage[\@makechapterhead{#2}]% - \else - \@makechapterhead{#2}% - \@afterheading - \fi} -\def\@makechapterhead#1{% -% \vspace*{50\p@}% - {\centering - \ifnum \c@secnumdepth >\m@ne - \if@mainmatter - \large\bfseries \@chapapp{} \thechapter - \par\nobreak - \vskip 20\p@ - \fi - \fi - \interlinepenalty\@M - \Large \bfseries #1\par\nobreak - \vskip 40\p@ - }} -\def\@schapter#1{\if@twocolumn - \@topnewpage[\@makeschapterhead{#1}]% - \else - \@makeschapterhead{#1}% - \@afterheading - \fi} -\def\@makeschapterhead#1{% -% \vspace*{50\p@}% - {\centering - \normalfont - \interlinepenalty\@M - \Large \bfseries #1\par\nobreak - \vskip 40\p@ - }} - -\renewcommand\section{\@startsection{section}{1}{\z@}% - {-18\p@ \@plus -4\p@ \@minus -4\p@}% - {12\p@ \@plus 4\p@ \@minus 4\p@}% - {\normalfont\large\bfseries\boldmath - \rightskip=\z@ \@plus 8em\pretolerance=10000 }} -\renewcommand\subsection{\@startsection{subsection}{2}{\z@}% - {-18\p@ \@plus -4\p@ \@minus -4\p@}% - {8\p@ \@plus 4\p@ \@minus 4\p@}% - {\normalfont\normalsize\bfseries\boldmath - \rightskip=\z@ \@plus 8em\pretolerance=10000 }} -\renewcommand\subsubsection{\@startsection{subsubsection}{3}{\z@}% - {-18\p@ \@plus -4\p@ \@minus -4\p@}% - {-0.5em \@plus -0.22em \@minus -0.1em}% - {\normalfont\normalsize\bfseries\boldmath}} -\renewcommand\paragraph{\@startsection{paragraph}{4}{\z@}% - {-12\p@ \@plus -4\p@ \@minus -4\p@}% - {-0.5em \@plus -0.22em \@minus -0.1em}% - {\normalfont\normalsize\itshape}} -\renewcommand\subparagraph[1]{\typeout{LLNCS warning: You should not use - \string\subparagraph\space with this class}\vskip0.5cm -You should not use \verb|\subparagraph| with this class.\vskip0.5cm} - -\DeclareMathSymbol{\Gamma}{\mathalpha}{letters}{"00} -\DeclareMathSymbol{\Delta}{\mathalpha}{letters}{"01} -\DeclareMathSymbol{\Theta}{\mathalpha}{letters}{"02} -\DeclareMathSymbol{\Lambda}{\mathalpha}{letters}{"03} -\DeclareMathSymbol{\Xi}{\mathalpha}{letters}{"04} -\DeclareMathSymbol{\Pi}{\mathalpha}{letters}{"05} -\DeclareMathSymbol{\Sigma}{\mathalpha}{letters}{"06} -\DeclareMathSymbol{\Upsilon}{\mathalpha}{letters}{"07} -\DeclareMathSymbol{\Phi}{\mathalpha}{letters}{"08} -\DeclareMathSymbol{\Psi}{\mathalpha}{letters}{"09} -\DeclareMathSymbol{\Omega}{\mathalpha}{letters}{"0A} - -\let\footnotesize\small - -\if@custvec -\def\vec#1{\mathchoice{\mbox{\boldmath$\displaystyle#1$}} -{\mbox{\boldmath$\textstyle#1$}} -{\mbox{\boldmath$\scriptstyle#1$}} -{\mbox{\boldmath$\scriptscriptstyle#1$}}} -\fi - -\def\squareforqed{\hbox{\rlap{$\sqcap$}$\sqcup$}} -\def\qed{\ifmmode\squareforqed\else{\unskip\nobreak\hfil -\penalty50\hskip1em\null\nobreak\hfil\squareforqed -\parfillskip=0pt\finalhyphendemerits=0\endgraf}\fi} - -\def\getsto{\mathrel{\mathchoice {\vcenter{\offinterlineskip -\halign{\hfil -$\displaystyle##$\hfil\cr\gets\cr\to\cr}}} -{\vcenter{\offinterlineskip\halign{\hfil$\textstyle##$\hfil\cr\gets -\cr\to\cr}}} -{\vcenter{\offinterlineskip\halign{\hfil$\scriptstyle##$\hfil\cr\gets -\cr\to\cr}}} -{\vcenter{\offinterlineskip\halign{\hfil$\scriptscriptstyle##$\hfil\cr -\gets\cr\to\cr}}}}} -\def\lid{\mathrel{\mathchoice {\vcenter{\offinterlineskip\halign{\hfil -$\displaystyle##$\hfil\cr<\cr\noalign{\vskip1.2pt}=\cr}}} -{\vcenter{\offinterlineskip\halign{\hfil$\textstyle##$\hfil\cr<\cr -\noalign{\vskip1.2pt}=\cr}}} -{\vcenter{\offinterlineskip\halign{\hfil$\scriptstyle##$\hfil\cr<\cr -\noalign{\vskip1pt}=\cr}}} -{\vcenter{\offinterlineskip\halign{\hfil$\scriptscriptstyle##$\hfil\cr -<\cr -\noalign{\vskip0.9pt}=\cr}}}}} -\def\gid{\mathrel{\mathchoice {\vcenter{\offinterlineskip\halign{\hfil -$\displaystyle##$\hfil\cr>\cr\noalign{\vskip1.2pt}=\cr}}} -{\vcenter{\offinterlineskip\halign{\hfil$\textstyle##$\hfil\cr>\cr -\noalign{\vskip1.2pt}=\cr}}} -{\vcenter{\offinterlineskip\halign{\hfil$\scriptstyle##$\hfil\cr>\cr -\noalign{\vskip1pt}=\cr}}} -{\vcenter{\offinterlineskip\halign{\hfil$\scriptscriptstyle##$\hfil\cr ->\cr -\noalign{\vskip0.9pt}=\cr}}}}} -\def\grole{\mathrel{\mathchoice {\vcenter{\offinterlineskip -\halign{\hfil -$\displaystyle##$\hfil\cr>\cr\noalign{\vskip-1pt}<\cr}}} -{\vcenter{\offinterlineskip\halign{\hfil$\textstyle##$\hfil\cr ->\cr\noalign{\vskip-1pt}<\cr}}} -{\vcenter{\offinterlineskip\halign{\hfil$\scriptstyle##$\hfil\cr ->\cr\noalign{\vskip-0.8pt}<\cr}}} -{\vcenter{\offinterlineskip\halign{\hfil$\scriptscriptstyle##$\hfil\cr ->\cr\noalign{\vskip-0.3pt}<\cr}}}}} -\def\bbbr{{\rm I\!R}} %reelle Zahlen -\def\bbbm{{\rm I\!M}} -\def\bbbn{{\rm I\!N}} %natuerliche Zahlen -\def\bbbf{{\rm I\!F}} -\def\bbbh{{\rm I\!H}} -\def\bbbk{{\rm I\!K}} -\def\bbbp{{\rm I\!P}} -\def\bbbone{{\mathchoice {\rm 1\mskip-4mu l} {\rm 1\mskip-4mu l} -{\rm 1\mskip-4.5mu l} {\rm 1\mskip-5mu l}}} -\def\bbbc{{\mathchoice {\setbox0=\hbox{$\displaystyle\rm C$}\hbox{\hbox -to0pt{\kern0.4\wd0\vrule height0.9\ht0\hss}\box0}} -{\setbox0=\hbox{$\textstyle\rm C$}\hbox{\hbox -to0pt{\kern0.4\wd0\vrule height0.9\ht0\hss}\box0}} -{\setbox0=\hbox{$\scriptstyle\rm C$}\hbox{\hbox -to0pt{\kern0.4\wd0\vrule height0.9\ht0\hss}\box0}} -{\setbox0=\hbox{$\scriptscriptstyle\rm C$}\hbox{\hbox -to0pt{\kern0.4\wd0\vrule height0.9\ht0\hss}\box0}}}} -\def\bbbq{{\mathchoice {\setbox0=\hbox{$\displaystyle\rm -Q$}\hbox{\raise -0.15\ht0\hbox to0pt{\kern0.4\wd0\vrule height0.8\ht0\hss}\box0}} -{\setbox0=\hbox{$\textstyle\rm Q$}\hbox{\raise -0.15\ht0\hbox to0pt{\kern0.4\wd0\vrule height0.8\ht0\hss}\box0}} -{\setbox0=\hbox{$\scriptstyle\rm Q$}\hbox{\raise -0.15\ht0\hbox to0pt{\kern0.4\wd0\vrule height0.7\ht0\hss}\box0}} -{\setbox0=\hbox{$\scriptscriptstyle\rm Q$}\hbox{\raise -0.15\ht0\hbox to0pt{\kern0.4\wd0\vrule height0.7\ht0\hss}\box0}}}} -\def\bbbt{{\mathchoice {\setbox0=\hbox{$\displaystyle\rm -T$}\hbox{\hbox to0pt{\kern0.3\wd0\vrule height0.9\ht0\hss}\box0}} -{\setbox0=\hbox{$\textstyle\rm T$}\hbox{\hbox -to0pt{\kern0.3\wd0\vrule height0.9\ht0\hss}\box0}} -{\setbox0=\hbox{$\scriptstyle\rm T$}\hbox{\hbox -to0pt{\kern0.3\wd0\vrule height0.9\ht0\hss}\box0}} -{\setbox0=\hbox{$\scriptscriptstyle\rm T$}\hbox{\hbox -to0pt{\kern0.3\wd0\vrule height0.9\ht0\hss}\box0}}}} -\def\bbbs{{\mathchoice -{\setbox0=\hbox{$\displaystyle \rm S$}\hbox{\raise0.5\ht0\hbox -to0pt{\kern0.35\wd0\vrule height0.45\ht0\hss}\hbox -to0pt{\kern0.55\wd0\vrule height0.5\ht0\hss}\box0}} -{\setbox0=\hbox{$\textstyle \rm S$}\hbox{\raise0.5\ht0\hbox -to0pt{\kern0.35\wd0\vrule height0.45\ht0\hss}\hbox -to0pt{\kern0.55\wd0\vrule height0.5\ht0\hss}\box0}} -{\setbox0=\hbox{$\scriptstyle \rm S$}\hbox{\raise0.5\ht0\hbox -to0pt{\kern0.35\wd0\vrule height0.45\ht0\hss}\raise0.05\ht0\hbox -to0pt{\kern0.5\wd0\vrule height0.45\ht0\hss}\box0}} -{\setbox0=\hbox{$\scriptscriptstyle\rm S$}\hbox{\raise0.5\ht0\hbox -to0pt{\kern0.4\wd0\vrule height0.45\ht0\hss}\raise0.05\ht0\hbox -to0pt{\kern0.55\wd0\vrule height0.45\ht0\hss}\box0}}}} -\def\bbbz{{\mathchoice {\hbox{$\mathsf\textstyle Z\kern-0.4em Z$}} -{\hbox{$\mathsf\textstyle Z\kern-0.4em Z$}} -{\hbox{$\mathsf\scriptstyle Z\kern-0.3em Z$}} -{\hbox{$\mathsf\scriptscriptstyle Z\kern-0.2em Z$}}}} - -\let\ts\, - -\setlength\leftmargini {17\p@} -\setlength\leftmargin {\leftmargini} -\setlength\leftmarginii {\leftmargini} -\setlength\leftmarginiii {\leftmargini} -\setlength\leftmarginiv {\leftmargini} -\setlength \labelsep {.5em} -\setlength \labelwidth{\leftmargini} -\addtolength\labelwidth{-\labelsep} - -\def\@listI{\leftmargin\leftmargini - \parsep 0\p@ \@plus1\p@ \@minus\p@ - \topsep 8\p@ \@plus2\p@ \@minus4\p@ - \itemsep0\p@} -\let\@listi\@listI -\@listi -\def\@listii {\leftmargin\leftmarginii - \labelwidth\leftmarginii - \advance\labelwidth-\labelsep - \topsep 0\p@ \@plus2\p@ \@minus\p@} -\def\@listiii{\leftmargin\leftmarginiii - \labelwidth\leftmarginiii - \advance\labelwidth-\labelsep - \topsep 0\p@ \@plus\p@\@minus\p@ - \parsep \z@ - \partopsep \p@ \@plus\z@ \@minus\p@} - -\renewcommand\labelitemi{\normalfont\bfseries --} -\renewcommand\labelitemii{$\m@th\bullet$} - -\setlength\arraycolsep{1.4\p@} -\setlength\tabcolsep{1.4\p@} - -\def\tableofcontents{\chapter*{\contentsname\@mkboth{{\contentsname}}% - {{\contentsname}}} - \def\authcount##1{\setcounter{auco}{##1}\setcounter{@auth}{1}} - \def\lastand{\ifnum\value{auco}=2\relax - \unskip{} \andname\ - \else - \unskip \lastandname\ - \fi}% - \def\and{\stepcounter{@auth}\relax - \ifnum\value{@auth}=\value{auco}% - \lastand - \else - \unskip, - \fi}% - \@starttoc{toc}\if@restonecol\twocolumn\fi} - -\def\l@part#1#2{\addpenalty{\@secpenalty}% - \addvspace{2em plus\p@}% % space above part line - \begingroup - \parindent \z@ - \rightskip \z@ plus 5em - \hrule\vskip5pt - \large % same size as for a contribution heading - \bfseries\boldmath % set line in boldface - \leavevmode % TeX command to enter horizontal mode. - #1\par - \vskip5pt - \hrule - \vskip1pt - \nobreak % Never break after part entry - \endgroup} - -\def\@dotsep{2} - -\def\hyperhrefextend{\ifx\hyper@anchor\@undefined\else -{chapter.\thechapter}\fi} - -\def\addnumcontentsmark#1#2#3{% -\addtocontents{#1}{\protect\contentsline{#2}{\protect\numberline - {\thechapter}#3}{\thepage}\hyperhrefextend}} -\def\addcontentsmark#1#2#3{% -\addtocontents{#1}{\protect\contentsline{#2}{#3}{\thepage}\hyperhrefextend}} -\def\addcontentsmarkwop#1#2#3{% -\addtocontents{#1}{\protect\contentsline{#2}{#3}{0}\hyperhrefextend}} - -\def\@adcmk[#1]{\ifcase #1 \or -\def\@gtempa{\addnumcontentsmark}% - \or \def\@gtempa{\addcontentsmark}% - \or \def\@gtempa{\addcontentsmarkwop}% - \fi\@gtempa{toc}{chapter}} -\def\addtocmark{\@ifnextchar[{\@adcmk}{\@adcmk[3]}} - -\def\l@chapter#1#2{\addpenalty{-\@highpenalty} - \vskip 1.0em plus 1pt \@tempdima 1.5em \begingroup - \parindent \z@ \rightskip \@pnumwidth - \parfillskip -\@pnumwidth - \leavevmode \advance\leftskip\@tempdima \hskip -\leftskip - {\large\bfseries\boldmath#1}\ifx0#2\hfil\null - \else - \nobreak - \leaders\hbox{$\m@th \mkern \@dotsep mu.\mkern - \@dotsep mu$}\hfill - \nobreak\hbox to\@pnumwidth{\hss #2}% - \fi\par - \penalty\@highpenalty \endgroup} - -\def\l@title#1#2{\addpenalty{-\@highpenalty} - \addvspace{8pt plus 1pt} - \@tempdima \z@ - \begingroup - \parindent \z@ \rightskip \@tocrmarg - \parfillskip -\@tocrmarg - \leavevmode \advance\leftskip\@tempdima \hskip -\leftskip - #1\nobreak - \leaders\hbox{$\m@th \mkern \@dotsep mu.\mkern - \@dotsep mu$}\hfill - \nobreak\hbox to\@pnumwidth{\hss #2}\par - \penalty\@highpenalty \endgroup} - -\setcounter{tocdepth}{0} -\newdimen\tocchpnum -\newdimen\tocsecnum -\newdimen\tocsectotal -\newdimen\tocsubsecnum -\newdimen\tocsubsectotal -\newdimen\tocsubsubsecnum -\newdimen\tocsubsubsectotal -\newdimen\tocparanum -\newdimen\tocparatotal -\newdimen\tocsubparanum -\tocchpnum=\z@ % no chapter numbers -\tocsecnum=15\p@ % section 88. plus 2.222pt -\tocsubsecnum=23\p@ % subsection 88.8 plus 2.222pt -\tocsubsubsecnum=27\p@ % subsubsection 88.8.8 plus 1.444pt -\tocparanum=35\p@ % paragraph 88.8.8.8 plus 1.666pt -\tocsubparanum=43\p@ % subparagraph 88.8.8.8.8 plus 1.888pt -\def\calctocindent{% -\tocsectotal=\tocchpnum -\advance\tocsectotal by\tocsecnum -\tocsubsectotal=\tocsectotal -\advance\tocsubsectotal by\tocsubsecnum -\tocsubsubsectotal=\tocsubsectotal -\advance\tocsubsubsectotal by\tocsubsubsecnum -\tocparatotal=\tocsubsubsectotal -\advance\tocparatotal by\tocparanum} -\calctocindent - -\def\l@section{\@dottedtocline{1}{\tocchpnum}{\tocsecnum}} -\def\l@subsection{\@dottedtocline{2}{\tocsectotal}{\tocsubsecnum}} -\def\l@subsubsection{\@dottedtocline{3}{\tocsubsectotal}{\tocsubsubsecnum}} -\def\l@paragraph{\@dottedtocline{4}{\tocsubsubsectotal}{\tocparanum}} -\def\l@subparagraph{\@dottedtocline{5}{\tocparatotal}{\tocsubparanum}} - -\def\listoffigures{\@restonecolfalse\if@twocolumn\@restonecoltrue\onecolumn - \fi\section*{\listfigurename\@mkboth{{\listfigurename}}{{\listfigurename}}} - \@starttoc{lof}\if@restonecol\twocolumn\fi} -\def\l@figure{\@dottedtocline{1}{0em}{1.5em}} - -\def\listoftables{\@restonecolfalse\if@twocolumn\@restonecoltrue\onecolumn - \fi\section*{\listtablename\@mkboth{{\listtablename}}{{\listtablename}}} - \@starttoc{lot}\if@restonecol\twocolumn\fi} -\let\l@table\l@figure - -\renewcommand\listoffigures{% - \section*{\listfigurename - \@mkboth{\listfigurename}{\listfigurename}}% - \@starttoc{lof}% - } - -\renewcommand\listoftables{% - \section*{\listtablename - \@mkboth{\listtablename}{\listtablename}}% - \@starttoc{lot}% - } - -\ifx\oribibl\undefined -\ifx\citeauthoryear\undefined -\renewenvironment{thebibliography}[1] - {\section*{\refname} - \def\@biblabel##1{##1.} - \small - \list{\@biblabel{\@arabic\c@enumiv}}% - {\settowidth\labelwidth{\@biblabel{#1}}% - \leftmargin\labelwidth - \advance\leftmargin\labelsep - \if@openbib - \advance\leftmargin\bibindent - \itemindent -\bibindent - \listparindent \itemindent - \parsep \z@ - \fi - \usecounter{enumiv}% - \let\p@enumiv\@empty - \renewcommand\theenumiv{\@arabic\c@enumiv}}% - \if@openbib - \renewcommand\newblock{\par}% - \else - \renewcommand\newblock{\hskip .11em \@plus.33em \@minus.07em}% - \fi - \sloppy\clubpenalty4000\widowpenalty4000% - \sfcode`\.=\@m} - {\def\@noitemerr - {\@latex@warning{Empty `thebibliography' environment}}% - \endlist} -\def\@lbibitem[#1]#2{\item[{[#1]}\hfill]\if@filesw - {\let\protect\noexpand\immediate - \write\@auxout{\string\bibcite{#2}{#1}}}\fi\ignorespaces} -\newcount\@tempcntc -\def\@citex[#1]#2{\if@filesw\immediate\write\@auxout{\string\citation{#2}}\fi - \@tempcnta\z@\@tempcntb\m@ne\def\@citea{}\@cite{\@for\@citeb:=#2\do - {\@ifundefined - {b@\@citeb}{\@citeo\@tempcntb\m@ne\@citea\def\@citea{,}{\bfseries - ?}\@warning - {Citation `\@citeb' on page \thepage \space undefined}}% - {\setbox\z@\hbox{\global\@tempcntc0\csname b@\@citeb\endcsname\relax}% - \ifnum\@tempcntc=\z@ \@citeo\@tempcntb\m@ne - \@citea\def\@citea{,}\hbox{\csname b@\@citeb\endcsname}% - \else - \advance\@tempcntb\@ne - \ifnum\@tempcntb=\@tempcntc - \else\advance\@tempcntb\m@ne\@citeo - \@tempcnta\@tempcntc\@tempcntb\@tempcntc\fi\fi}}\@citeo}{#1}} -\def\@citeo{\ifnum\@tempcnta>\@tempcntb\else - \@citea\def\@citea{,\,\hskip\z@skip}% - \ifnum\@tempcnta=\@tempcntb\the\@tempcnta\else - {\advance\@tempcnta\@ne\ifnum\@tempcnta=\@tempcntb \else - \def\@citea{--}\fi - \advance\@tempcnta\m@ne\the\@tempcnta\@citea\the\@tempcntb}\fi\fi} -\else -\renewenvironment{thebibliography}[1] - {\section*{\refname} - \small - \list{}% - {\settowidth\labelwidth{}% - \leftmargin\parindent - \itemindent=-\parindent - \labelsep=\z@ - \if@openbib - \advance\leftmargin\bibindent - \itemindent -\bibindent - \listparindent \itemindent - \parsep \z@ - \fi - \usecounter{enumiv}% - \let\p@enumiv\@empty - \renewcommand\theenumiv{}}% - \if@openbib - \renewcommand\newblock{\par}% - \else - \renewcommand\newblock{\hskip .11em \@plus.33em \@minus.07em}% - \fi - \sloppy\clubpenalty4000\widowpenalty4000% - \sfcode`\.=\@m} - {\def\@noitemerr - {\@latex@warning{Empty `thebibliography' environment}}% - \endlist} - \def\@cite#1{#1}% - \def\@lbibitem[#1]#2{\item[]\if@filesw - {\def\protect##1{\string ##1\space}\immediate - \write\@auxout{\string\bibcite{#2}{#1}}}\fi\ignorespaces} - \fi -\else -\@cons\@openbib@code{\noexpand\small} -\fi - -\def\idxquad{\hskip 10\p@}% space that divides entry from number - -\def\@idxitem{\par\hangindent 10\p@} - -\def\subitem{\par\setbox0=\hbox{--\enspace}% second order - \noindent\hangindent\wd0\box0}% index entry - -\def\subsubitem{\par\setbox0=\hbox{--\,--\enspace}% third - \noindent\hangindent\wd0\box0}% order index entry - -\def\indexspace{\par \vskip 10\p@ plus5\p@ minus3\p@\relax} - -\renewenvironment{theindex} - {\@mkboth{\indexname}{\indexname}% - \thispagestyle{empty}\parindent\z@ - \parskip\z@ \@plus .3\p@\relax - \let\item\par - \def\,{\relax\ifmmode\mskip\thinmuskip - \else\hskip0.2em\ignorespaces\fi}% - \normalfont\small - \begin{multicols}{2}[\@makeschapterhead{\indexname}]% - } - {\end{multicols}} - -\renewcommand\footnoterule{% - \kern-3\p@ - \hrule\@width 2truecm - \kern2.6\p@} - \newdimen\fnindent - \fnindent1em -\long\def\@makefntext#1{% - \parindent \fnindent% - \leftskip \fnindent% - \noindent - \llap{\hb@xt@1em{\hss\@makefnmark\ }}\ignorespaces#1} - -\long\def\@makecaption#1#2{% - \vskip\abovecaptionskip - \sbox\@tempboxa{{\bfseries #1.} #2}% - \ifdim \wd\@tempboxa >\hsize - {\bfseries #1.} #2\par - \else - \global \@minipagefalse - \hb@xt@\hsize{\hfil\box\@tempboxa\hfil}% - \fi - \vskip\belowcaptionskip} - -\def\fps@figure{htbp} -\def\fnum@figure{\figurename\thinspace\thefigure} -\def \@floatboxreset {% - \reset@font - \small - \@setnobreak - \@setminipage -} -\def\fps@table{htbp} -\def\fnum@table{\tablename~\thetable} -\renewenvironment{table} - {\setlength\abovecaptionskip{0\p@}% - \setlength\belowcaptionskip{10\p@}% - \@float{table}} - {\end@float} -\renewenvironment{table*} - {\setlength\abovecaptionskip{0\p@}% - \setlength\belowcaptionskip{10\p@}% - \@dblfloat{table}} - {\end@dblfloat} - -\long\def\@caption#1[#2]#3{\par\addcontentsline{\csname - ext@#1\endcsname}{#1}{\protect\numberline{\csname - the#1\endcsname}{\ignorespaces #2}}\begingroup - \@parboxrestore - \@makecaption{\csname fnum@#1\endcsname}{\ignorespaces #3}\par - \endgroup} - -% LaTeX does not provide a command to enter the authors institute -% addresses. The \institute command is defined here. - -\newcounter{@inst} -\newcounter{@auth} -\newcounter{auco} -\def\andname{and} -\def\lastandname{\unskip, and} -\newdimen\instindent -\newbox\authrun -\newtoks\authorrunning -\newtoks\tocauthor -\newbox\titrun -\newtoks\titlerunning -\newtoks\toctitle - -\def\clearheadinfo{\gdef\@author{No Author Given}% - \gdef\@title{No Title Given}% - \gdef\@subtitle{}% - \gdef\@institute{No Institute Given}% - \gdef\@thanks{}% - \global\titlerunning={}\global\authorrunning={}% - \global\toctitle={}\global\tocauthor={}} - -\def\institute#1{\gdef\@institute{#1}} - -\def\institutename{\par - \begingroup - \parskip=\z@ - \parindent=\z@ - \setcounter{@inst}{1}% - \def\and{\par\stepcounter{@inst}% - \noindent$^{\the@inst}$\enspace\ignorespaces}% - \setbox0=\vbox{\def\thanks##1{}\@institute}% - \ifnum\c@@inst=1\relax - \else - \setcounter{footnote}{\c@@inst}% - \setcounter{@inst}{1}% - \noindent$^{\the@inst}$\enspace - \fi - \ignorespaces - \@institute\par - \endgroup} - -\def\@fnsymbol#1{\ensuremath{\ifcase#1\or\star\or{\star\star}\or - {\star\star\star}\or \dagger\or \ddagger\or - \mathchar "278\or \mathchar "27B\or \|\or **\or \dagger\dagger - \or \ddagger\ddagger \else\@ctrerr\fi}} - -\def\inst#1{\unskip$^{#1}$} -\def\fnmsep{\unskip$^,$} -\def\email#1{{\tt#1}} -\AtBeginDocument{\@ifundefined{url}{\def\url#1{#1}}{}} -\def\homedir{\~{ }} - -\def\subtitle#1{\gdef\@subtitle{#1}} -\clearheadinfo - -\renewcommand\maketitle{\newpage - \refstepcounter{chapter}% - \stepcounter{section}% - \setcounter{section}{0}% - \setcounter{subsection}{0}% - \setcounter{figure}{0} - \setcounter{table}{0} - \setcounter{equation}{0} - \setcounter{footnote}{0}% - \begingroup - \parindent=\z@ - \renewcommand\thefootnote{\@fnsymbol\c@footnote}% - \if@twocolumn - \ifnum \col@number=\@ne - \@maketitle - \else - \twocolumn[\@maketitle]% - \fi - \else - \newpage - \global\@topnum\z@ % Prevents figures from going at top of page. - \@maketitle - \fi - \thispagestyle{empty}\@thanks -% - \def\\{\unskip\ \ignorespaces}\def\inst##1{\unskip{}}% - \def\thanks##1{\unskip{}}\def\fnmsep{\unskip}% - \instindent=\hsize - \advance\instindent by-\headlineindent - \if!\the\toctitle!\addcontentsline{toc}{title}{\@title}\else - \addcontentsline{toc}{title}{\the\toctitle}\fi - \if@runhead - \if!\the\titlerunning!\else - \edef\@title{\the\titlerunning}% - \fi - \global\setbox\titrun=\hbox{\small\rm\unboldmath\ignorespaces\@title}% - \ifdim\wd\titrun>\instindent - \typeout{Title too long for running head. Please supply}% - \typeout{a shorter form with \string\titlerunning\space prior to - \string\maketitle}% - \global\setbox\titrun=\hbox{\small\rm - Title Suppressed Due to Excessive Length}% - \fi - \xdef\@title{\copy\titrun}% - \fi -% - \if!\the\tocauthor!\relax - {\def\and{\noexpand\protect\noexpand\and}% - \protected@xdef\toc@uthor{\@author}}% - \else - \def\\{\noexpand\protect\noexpand\newline}% - \protected@xdef\scratch{\the\tocauthor}% - \protected@xdef\toc@uthor{\scratch}% - \fi - \addtocontents{toc}{{\protect\raggedright\protect\leftskip15\p@ - \protect\rightskip\@tocrmarg - \protect\itshape\toc@uthor\protect\endgraf}}% - \if@runhead - \if!\the\authorrunning! - \value{@inst}=\value{@auth}% - \setcounter{@auth}{1}% - \else - \edef\@author{\the\authorrunning}% - \fi - \global\setbox\authrun=\hbox{\small\unboldmath\@author\unskip}% - \ifdim\wd\authrun>\instindent - \typeout{Names of authors too long for running head. Please supply}% - \typeout{a shorter form with \string\authorrunning\space prior to - \string\maketitle}% - \global\setbox\authrun=\hbox{\small\rm - Authors Suppressed Due to Excessive Length}% - \fi - \xdef\@author{\copy\authrun}% - \markboth{\@author}{\@title}% - \fi - \endgroup - \setcounter{footnote}{0}% - \clearheadinfo} -% -\def\@maketitle{\newpage - \markboth{}{}% - \def\lastand{\ifnum\value{@inst}=2\relax - \unskip{} \andname\ - \else - \unskip \lastandname\ - \fi}% - \def\and{\stepcounter{@auth}\relax - \ifnum\value{@auth}=\value{@inst}% - \lastand - \else - \unskip, - \fi}% - \begin{center}% - {\Large \bfseries\boldmath - \pretolerance=10000 - \@title \par}\vskip .8cm -\if!\@subtitle!\else {\large \bfseries\boldmath - \vskip -.65cm - \pretolerance=10000 - \@subtitle \par}\vskip .8cm\fi - \setbox0=\vbox{\setcounter{@auth}{1}\def\and{\stepcounter{@auth}}% - \def\thanks##1{}\@author}% - \global\value{@inst}=\value{@auth}% - \global\value{auco}=\value{@auth}% - \setcounter{@auth}{1}% -{\lineskip .5em -\noindent\ignorespaces -\@author\vskip.35cm} - {\small\institutename} - \end{center}% - } - -% definition of the "\spnewtheorem" command. -% -% Usage: -% -% \spnewtheorem{env_nam}{caption}[within]{cap_font}{body_font} -% or \spnewtheorem{env_nam}[numbered_like]{caption}{cap_font}{body_font} -% or \spnewtheorem*{env_nam}{caption}{cap_font}{body_font} -% -% New is "cap_font" and "body_font". It stands for -% fontdefinition of the caption and the text itself. -% -% "\spnewtheorem*" gives a theorem without number. -% -% A defined spnewthoerem environment is used as described -% by Lamport. -% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\def\@thmcountersep{} -\def\@thmcounterend{.} - -\def\spnewtheorem{\@ifstar{\@sthm}{\@Sthm}} - -% definition of \spnewtheorem with number - -\def\@spnthm#1#2{% - \@ifnextchar[{\@spxnthm{#1}{#2}}{\@spynthm{#1}{#2}}} -\def\@Sthm#1{\@ifnextchar[{\@spothm{#1}}{\@spnthm{#1}}} - -\def\@spxnthm#1#2[#3]#4#5{\expandafter\@ifdefinable\csname #1\endcsname - {\@definecounter{#1}\@addtoreset{#1}{#3}% - \expandafter\xdef\csname the#1\endcsname{\expandafter\noexpand - \csname the#3\endcsname \noexpand\@thmcountersep \@thmcounter{#1}}% - \expandafter\xdef\csname #1name\endcsname{#2}% - \global\@namedef{#1}{\@spthm{#1}{\csname #1name\endcsname}{#4}{#5}}% - \global\@namedef{end#1}{\@endtheorem}}} - -\def\@spynthm#1#2#3#4{\expandafter\@ifdefinable\csname #1\endcsname - {\@definecounter{#1}% - \expandafter\xdef\csname the#1\endcsname{\@thmcounter{#1}}% - \expandafter\xdef\csname #1name\endcsname{#2}% - \global\@namedef{#1}{\@spthm{#1}{\csname #1name\endcsname}{#3}{#4}}% - \global\@namedef{end#1}{\@endtheorem}}} - -\def\@spothm#1[#2]#3#4#5{% - \@ifundefined{c@#2}{\@latexerr{No theorem environment `#2' defined}\@eha}% - {\expandafter\@ifdefinable\csname #1\endcsname - {\global\@namedef{the#1}{\@nameuse{the#2}}% - \expandafter\xdef\csname #1name\endcsname{#3}% - \global\@namedef{#1}{\@spthm{#2}{\csname #1name\endcsname}{#4}{#5}}% - \global\@namedef{end#1}{\@endtheorem}}}} - -\def\@spthm#1#2#3#4{\topsep 7\p@ \@plus2\p@ \@minus4\p@ -\refstepcounter{#1}% -\@ifnextchar[{\@spythm{#1}{#2}{#3}{#4}}{\@spxthm{#1}{#2}{#3}{#4}}} - -\def\@spxthm#1#2#3#4{\@spbegintheorem{#2}{\csname the#1\endcsname}{#3}{#4}% - \ignorespaces} - -\def\@spythm#1#2#3#4[#5]{\@spopargbegintheorem{#2}{\csname - the#1\endcsname}{#5}{#3}{#4}\ignorespaces} - -\def\@spbegintheorem#1#2#3#4{\trivlist - \item[\hskip\labelsep{#3#1\ #2\@thmcounterend}]#4} - -\def\@spopargbegintheorem#1#2#3#4#5{\trivlist - \item[\hskip\labelsep{#4#1\ #2}]{#4(#3)\@thmcounterend\ }#5} - -% definition of \spnewtheorem* without number - -\def\@sthm#1#2{\@Ynthm{#1}{#2}} - -\def\@Ynthm#1#2#3#4{\expandafter\@ifdefinable\csname #1\endcsname - {\global\@namedef{#1}{\@Thm{\csname #1name\endcsname}{#3}{#4}}% - \expandafter\xdef\csname #1name\endcsname{#2}% - \global\@namedef{end#1}{\@endtheorem}}} - -\def\@Thm#1#2#3{\topsep 7\p@ \@plus2\p@ \@minus4\p@ -\@ifnextchar[{\@Ythm{#1}{#2}{#3}}{\@Xthm{#1}{#2}{#3}}} - -\def\@Xthm#1#2#3{\@Begintheorem{#1}{#2}{#3}\ignorespaces} - -\def\@Ythm#1#2#3[#4]{\@Opargbegintheorem{#1} - {#4}{#2}{#3}\ignorespaces} - -\def\@Begintheorem#1#2#3{#3\trivlist - \item[\hskip\labelsep{#2#1\@thmcounterend}]} - -\def\@Opargbegintheorem#1#2#3#4{#4\trivlist - \item[\hskip\labelsep{#3#1}]{#3(#2)\@thmcounterend\ }} - -\if@envcntsect - \def\@thmcountersep{.} - \spnewtheorem{theorem}{Theorem}[section]{\bfseries}{\itshape} -\else - \spnewtheorem{theorem}{Theorem}{\bfseries}{\itshape} - \if@envcntreset - \@addtoreset{theorem}{section} - \else - \@addtoreset{theorem}{chapter} - \fi -\fi - -%definition of divers theorem environments -\spnewtheorem*{claim}{Claim}{\itshape}{\rmfamily} -\spnewtheorem*{proof}{Proof}{\itshape}{\rmfamily} -\if@envcntsame % alle Umgebungen wie Theorem. - \def\spn@wtheorem#1#2#3#4{\@spothm{#1}[theorem]{#2}{#3}{#4}} -\else % alle Umgebungen mit eigenem Zaehler - \if@envcntsect % mit section numeriert - \def\spn@wtheorem#1#2#3#4{\@spxnthm{#1}{#2}[section]{#3}{#4}} - \else % nicht mit section numeriert - \if@envcntreset - \def\spn@wtheorem#1#2#3#4{\@spynthm{#1}{#2}{#3}{#4} - \@addtoreset{#1}{section}} - \else - \def\spn@wtheorem#1#2#3#4{\@spynthm{#1}{#2}{#3}{#4} - \@addtoreset{#1}{chapter}}% - \fi - \fi -\fi -\spn@wtheorem{case}{Case}{\itshape}{\rmfamily} -\spn@wtheorem{conjecture}{Conjecture}{\itshape}{\rmfamily} -\spn@wtheorem{corollary}{Corollary}{\bfseries}{\itshape} -\spn@wtheorem{definition}{Definition}{\bfseries}{\itshape} -\spn@wtheorem{example}{Example}{\itshape}{\rmfamily} -\spn@wtheorem{exercise}{Exercise}{\itshape}{\rmfamily} -\spn@wtheorem{lemma}{Lemma}{\bfseries}{\itshape} -\spn@wtheorem{note}{Note}{\itshape}{\rmfamily} -\spn@wtheorem{problem}{Problem}{\itshape}{\rmfamily} -\spn@wtheorem{property}{Property}{\itshape}{\rmfamily} -\spn@wtheorem{proposition}{Proposition}{\bfseries}{\itshape} -\spn@wtheorem{question}{Question}{\itshape}{\rmfamily} -\spn@wtheorem{solution}{Solution}{\itshape}{\rmfamily} -\spn@wtheorem{remark}{Remark}{\itshape}{\rmfamily} - -\def\@takefromreset#1#2{% - \def\@tempa{#1}% - \let\@tempd\@elt - \def\@elt##1{% - \def\@tempb{##1}% - \ifx\@tempa\@tempb\else - \@addtoreset{##1}{#2}% - \fi}% - \expandafter\expandafter\let\expandafter\@tempc\csname cl@#2\endcsname - \expandafter\def\csname cl@#2\endcsname{}% - \@tempc - \let\@elt\@tempd} - -\def\theopargself{\def\@spopargbegintheorem##1##2##3##4##5{\trivlist - \item[\hskip\labelsep{##4##1\ ##2}]{##4##3\@thmcounterend\ }##5} - \def\@Opargbegintheorem##1##2##3##4{##4\trivlist - \item[\hskip\labelsep{##3##1}]{##3##2\@thmcounterend\ }} - } - -\renewenvironment{abstract}{% - \list{}{\advance\topsep by0.35cm\relax\small - \leftmargin=1cm - \labelwidth=\z@ - \listparindent=\z@ - \itemindent\listparindent - \rightmargin\leftmargin}\item[\hskip\labelsep - \bfseries\abstractname]} - {\endlist} -\renewcommand{\abstractname}{Abstract.} -\renewcommand{\contentsname}{Table of Contents} -\renewcommand{\figurename}{Fig.} -\renewcommand{\tablename}{Table} - -\newdimen\headlineindent % dimension for space between -\headlineindent=1.166cm % number and text of headings. - -\def\ps@headings{\let\@mkboth\@gobbletwo - \let\@oddfoot\@empty\let\@evenfoot\@empty - \def\@evenhead{\normalfont\small\rlap{\thepage}\hspace{\headlineindent}% - \leftmark\hfil} - \def\@oddhead{\normalfont\small\hfil\rightmark\hspace{\headlineindent}% - \llap{\thepage}} - \def\chaptermark##1{}% - \def\sectionmark##1{}% - \def\subsectionmark##1{}} - -\def\ps@titlepage{\let\@mkboth\@gobbletwo - \let\@oddfoot\@empty\let\@evenfoot\@empty - \def\@evenhead{\normalfont\small\rlap{\thepage}\hspace{\headlineindent}% - \hfil} - \def\@oddhead{\normalfont\small\hfil\hspace{\headlineindent}% - \llap{\thepage}} - \def\chaptermark##1{}% - \def\sectionmark##1{}% - \def\subsectionmark##1{}} - -\if@runhead\ps@headings\else -\ps@empty\fi - -\setlength\arraycolsep{1.4\p@} -\setlength\tabcolsep{1.4\p@} - -\endinput - diff --git a/doc/design-paper/sptor.tex b/doc/design-paper/sptor.tex deleted file mode 100644 index 4b659eeda1..0000000000 --- a/doc/design-paper/sptor.tex +++ /dev/null @@ -1,353 +0,0 @@ -\documentclass{llncs} - -\usepackage{url} -\usepackage{amsmath} -\usepackage{epsfig} - -\setlength{\textwidth}{5.9in} -\setlength{\textheight}{8.4in} -\setlength{\topmargin}{.5cm} -\setlength{\oddsidemargin}{1cm} -\setlength{\evensidemargin}{1cm} - -\newenvironment{tightlist}{\begin{list}{$\bullet$}{ - \setlength{\itemsep}{0mm} - \setlength{\parsep}{0mm} - % \setlength{\labelsep}{0mm} - % \setlength{\labelwidth}{0mm} - % \setlength{\topsep}{0mm} - }}{\end{list}} - - -\newcommand{\workingnote}[1]{} % The version that hides the note. -%\newcommand{\workingnote}[1]{(**#1)} % The version that makes the note visible. - - -\begin{document} - -\title{Design challenges and social factors in deploying low-latency anonymity} -% Could still use a better title -PFS - -\author{Roger Dingledine\inst{1} \and -Nick Mathewson\inst{1} \and -Paul Syverson\inst{2}} -\institute{The Tor Project \email{<\{arma,nickm\}@torproject.org>} \and -Naval Research Laboratory \email{<syverson@itd.nrl.navy.mil>}} - -\maketitle -\pagestyle{plain} - -\begin{abstract} - There are many unexpected or unexpectedly difficult obstacles to - deploying anonymous communications. We describe Tor (\emph{the} - onion routing), how to use it, our design philosophy, and some of - the challenges that we have faced and continue to face in building, - deploying, and sustaining a scalable, distributed, low-latency - anonymity network. -\end{abstract} - -\section{Introduction} -This article describes Tor, a widely-used low-latency general-purpose -anonymous communication system, and discusses some unexpected -challenges arising from our experiences deploying Tor. We will tell -you how to use it, who uses it, how it works, why we designed it the -way we did, and why this makes it usable and stable. - -Tor is an overlay network for anonymizing TCP streams over the -Internet~\cite{tor-design}. Tor works on the real-world Internet, -requires no special privileges or kernel modifications, requires -little synchronization or coordination between nodes, and provides a -reasonable trade-off between anonymity, usability, and efficiency. - -Since deployment in October 2003 the public Tor network has grown to -about a thousand volunteer-operated nodes worldwide and over 110 -megabytes average traffic per second from hundreds of thousands of -concurrent users. - -\section{Tor Design and Design Philosophy: Distributed Trust and Usability} - -Tor enables users to connect to Internet sites without revealing their -logical or physical locations to those sites or to observers. It -enables hosts to be publicly accessible yet have similar protection -against location through its \emph{location-hidden services}. - -To connect to a remote server via Tor the client software first learns -a %signed -list of Tor nodes from several central \emph{directory servers} via a -voting protocol (to avoid dependence on or complete trust in any one -of these servers). It then incrementally creates a private pathway or -\emph{circuit} across the network. This circuit consists of -encrypted connections through authenticated Tor nodes -whose public keys were obtained from the directory servers. The client -software negotiates a separate set of encryption keys for each hop along the -circuit. The nodes in the circuit are chosen at random by the client -subject to a preference for higher performing nodes to allocate -resources effectively and with a client-chosen preferred set of first -nodes called \emph{entry guards} to complicate profiling attacks by -internal adversaries~\cite{hs-attack}. -The circuit is extended one node at a time, tunneling extensions -through already established portions of the circuit, and each node -along the way knows only the immediately previous and following nodes -in the circuit, so no individual Tor node knows the complete path that -each fixed-sized data packet (or \emph{cell}) will take. Thus, -neither an eavesdropper nor a compromised node can see both the -connection's source and destination. Later requests use a new -circuit to complicate long-term linkability between different actions -by a single user. - -Tor attempts to anonymize the transport layer, not the application -layer. Thus, applications such as SSH can provide -authenticated communication that is hidden by Tor from outside observers. -When anonymity from communication partners is desired, -application-level protocols that transmit identifying -information need additional scrubbing proxies, such as -Privoxy~\cite{privoxy} for HTTP\@. Furthermore, Tor does not relay -arbitrary IP packets; it only anonymizes TCP streams and DNS requests. - -Tor, the third generation of deployed onion-routing -designs~\cite{or-ih96,or-jsac98,tor-design}, was researched, developed, -and deployed by the Naval Research Laboratory and the Free Haven -Project under ONR and DARPA funding for secure government -communications. In 2005, continuing work by Free Haven was funded by -the Electronic Frontier Foundation for maintaining civil liberties of -ordinary citizens online. In 2006, The Tor Project incorporated as a -non-profit and has received continued funding from the Omidyar Network, -the U.S. International Broadcasting Bureau, and other groups to combat -blocking and censorship on the Internet. This diversity of funding fits -Tor's overall philosophy: a wide variety of interests helps maintain -both the stability and the security of the network. - -Usability is also a central goal. Downloading and installing Tor is -easy. Simply go to\\ -http://www.torproject.org/ and download. Tor comes with install -wizards and a GUI for major operating systems: GNU/Linux, OS X, and -Windows. It also runs on various flavors of BSD and UNIX\@. Basic -instructions, documentation, FAQs, etc.\ are available in many -languages. The Tor GUI Vidalia makes server configuration easy, e.g., -choosing how much bandwidth to allocate to Tor, exit policy choices, -etc. And, the GUI Torbutton allows Firefox users a one-click toggle of -whether browsing goes through Tor or not. Tor is easily configured by -a site administrator to run at either individual desktops or just at a -site firewall or combinations of these. - -The ideal Tor network would be practical, useful and anonymous. When -trade-offs arise between these properties, Tor's research strategy has -been to remain useful enough to attract many users, and practical -enough to support them. Only subject to these constraints do we try -to maximize anonymity. Tor thus differs from other deployed systems -for traffic analysis resistance in its security and flexibility. Mix -networks such as -% Mixmaster~\cite{mixmaster-spec} or its successor -Mixminion~\cite{minion-design} gain the highest degrees of practical -anonymity at the expense of introducing highly variable delays, making -them unsuitable for applications such as web browsing. Commercial -single-hop proxies~\cite{anonymizer} can provide good performance, but -a single-point compromise can expose all users' traffic, and a -single-point eavesdropper can perform traffic analysis on the entire -network. Also, their proprietary implementations place any -infrastructure that depends on these single-hop solutions at the mercy -of their providers' financial health as well as network security. -There are numerous other designs for distributed anonymous low-latency -communication~\cite{crowds-tissec,web-mix,freedom21-security,i2p,tarzan:ccs02,morphmix:fc04}. -Some have been deployed or even commercialized; some exist only on -paper. Though each has something unique to offer, we feel Tor has -advantages over each of them that make it a superior choice for most -users and applications. For example, unlike purely P2P designs we -neither limit ordinary users to content and services available only -within our network nor require them to take on responsibility for -connections outside the network, unless they separately choose to run -server nodes. Nonetheless because we support low-latency interactive -communications, end-to-end \emph{traffic correlation} -attacks~\cite{danezis:pet2004,defensive-dropping,SS03,hs-attack,bauer:tr2007} -allow an attacker who can observe both ends of a communication to -correlate packet timing and volume, quickly linking the initiator to -her destination. - - -Our defense lies in having a diverse enough set of nodes to prevent -most real-world adversaries from being in the right places to attack -users, by distributing each transaction over several nodes in the -network. This ``distributed trust'' approach means the Tor network -can be safely operated and used by a wide variety of mutually -distrustful users, providing sustainability and security. - -The Tor network has a broad range of users, making it difficult for -eavesdroppers to track them or profile interests. These include -ordinary citizens concerned about their privacy, corporations who -don't want to reveal information to their competitors, and law -enforcement and government intelligence agencies who need to do -operations on the Internet without being noticed. Naturally, -organizations will not want to depend on others for their security. -If most participating providers are reliable, Tor tolerates some -hostile infiltration of the network. - -This distribution of trust is central to the Tor philosophy and -pervades Tor at all levels: Onion routing has been open source since -the mid-nineties (mistrusting users can inspect the code themselves); -Tor is free software (anyone could take up the development of Tor from -the current team); anyone can use Tor without license or charge (which -encourages a broad user base with diverse interests); Tor is designed to be -usable (also promotes a large, diverse user base) and configurable (so -users can easily set up and run server nodes); the Tor -infrastructure is run by volunteers (it is not dependent on the -economic viability or business strategy of any company) who are -scattered around the globe (not completely under the jurisdiction of -any single country); ongoing development and deployment has been -funded by diverse sources (development does not fully depend on -funding from any one source or even funding for any one primary -purpose or sources in any one jurisdiction). All of these contribute -to Tor's resilience and sustainability. - - -\section{Social challenges} - -Many of the issues the Tor project needs to address extend beyond -system design and technology development. In particular, the Tor -project's \emph{image} with respect to its users and the rest of the -Internet impacts the security it can provide. With this image issue -in mind, this section discusses the Tor user base and Tor's -interaction with other services on the Internet. - -\subsection{Communicating security} - -Usability for anonymity systems contributes to their security, because -usability affects the possible anonymity set~\cite{econymics,back01}. -Conversely, an unusable system attracts few users and thus can't -provide much anonymity. - -This phenomenon has a second-order effect: knowing this, users should -choose which anonymity system to use based in part on how usable and -secure \emph{others} will find it, in order to get the protection of a -larger anonymity set. Thus we might supplement the adage ``usability -is a security parameter''~\cite{back01} with a new one: ``perceived -usability is a security parameter.''~\cite{usability-network-effect}. - - - -\subsection{Reputability and perceived social value} -Another factor impacting the network's security is its reputability, -the perception of its social value based on its current user base. If -Alice is the only user who has ever downloaded the software, it might -be socially accepted, but she's not getting much anonymity. Add a -thousand activists, and she's anonymous, but everyone thinks she's an -activist too. Add a thousand diverse citizens (cancer survivors, -people concerned about identity theft, law enforcement agents, and so -on) and now she's harder to profile. - -Furthermore, the network's reputability affects its operator base: -more people are willing to run a service if they believe it will be -used by human rights workers than if they believe it will be used -exclusively for disreputable ends. This effect becomes stronger if -node operators themselves think they will be associated with their -users' ends. - -So the more cancer survivors on Tor, the better for the human rights -activists. The more malicious hackers, the worse for the normal -users. Thus, reputability is an anonymity issue for two -reasons. First, it impacts the sustainability of the network: a -network that's always about to be shut down has difficulty attracting -and keeping adequate nodes. Second, a disreputable network is more -vulnerable to legal and political attacks, since it will attract fewer -supporters. - -Reputability becomes even more tricky in the case of privacy networks, -since the good uses of the network (such as publishing by journalists -in dangerous countries, protecting road warriors from profiling and -potential physical harm, tracking of criminals by law enforcement, -protecting corporate research interests, etc.) are typically kept private, -whereas network abuses or other problems tend to be more widely -publicized. - - -\subsection{Abuse} -\label{subsec:tor-and-blacklists} - -For someone willing to be antisocial or even break the law, Tor is -usually a poor choice to hide bad behavior. For example, Tor nodes are -publicly identified, unlike the million-node botnets that are now -common on the Internet. Nonetheless, we always expected that, -alongside legitimate users, Tor would also attract troublemakers who -exploit Tor to abuse services on the Internet with vandalism, rude -mail, and so on. \emph{Exit policies} have allowed individual nodes -to block access to specific IP/port ranges. This approach aims to -make operators more willing to run Tor by allowing them to prevent -their nodes from being used for abusing particular services. For -example, by default Tor nodes block SMTP (port 25), to avoid the issue -of spam. - -Exit policies are useful but insufficient: if not all nodes block a -given service, that service may try to block Tor instead. While being -blockable is important to being good netizens, we would like to -encourage services to allow anonymous access. Services should not need -to decide between blocking legitimate anonymous use and allowing -unlimited abuse. Nonetheless, blocking IP addresses is a -course-grained solution~\cite{netauth}: entire apartment buildings, -campuses, and even countries sometimes share a single IP address. -Also, whether intended or not, such blocking supports repression of -free speech. In many locations where Internet access of various kinds -is censored or even punished by imprisonment, Tor is a path both to -the outside world and to others inside. Blocking posts from Tor makes -the job of censoring authorities easier. This is a loss for both Tor -and services that block, such as Wikipedia: we don't want to compete -for (or divvy up) the NAT-protected entities of the world. This is -also unfortunate because there are relatively simple technical -solutions~\cite{nym}. Various schemes for escrowing anonymous posts -until they are reviewed by editors would both prevent abuse and remove -incentives for attempts to abuse. Further, pseudonymous reputation -tracking of posters through Tor would allow those who establish -adequate reputation to post without escrow~\cite{nym,nymble}. - -We stress that as far as we can tell, most Tor uses are not -abusive. Most services have not complained, and others are actively -working to find ways besides banning to cope with the abuse. For -example, the Freenode IRC network had a problem with a coordinated -group of abusers joining channels and subtly taking over the -conversation; but when they labelled all users coming from Tor IP -addresses as ``anonymous users,'' removing the ability of the abusers -to blend in, the abusers stopped using Tor. This is an illustration of -how simple -technical mechanisms can remove the ability to abuse anonymously -without undermining the ability to communicate anonymously and can -thus remove the incentive to attempt abusing in this way. - - - -\section{The Future} -\label{sec:conclusion} - -Tor is the largest and most diverse low-latency anonymity network -available, but we are still in the early stages. Several major -questions remain. - -First, will our volunteer-based approach to sustainability continue to -work as well in the long term as it has the first several years? -Besides node operation, Tor research, deployment, maintainance, and -development is increasingly done by volunteers: package maintenance -for various OSes, document translation, GUI design and implementation, -live CDs, specification of new design changes, etc.\ -% -Second, Tor is only one of many components that preserve privacy -online. For applications where it is desirable to keep identifying -information out of application traffic, someone must build more and -better protocol-aware proxies that are usable by ordinary people. -% -Third, we need to maintain a reputation for social good, and learn how to -coexist with the variety of Internet services and their established -authentication mechanisms. We can't just keep escalating the blacklist -standoff forever. -% -Fourth, the current Tor architecture hardly scales even to handle -current user demand. We must deploy designs and incentives to further -encourage clients to relay traffic too, without thereby trading away -too much anonymity or other properties. - -These are difficult and open questions. Yet choosing not to solve them -means leaving most users to a less secure network or no anonymizing -network at all.\\ - -\noindent{\bf Acknowledgment:} Thanks to Matt Edman for many - helpful comments on a draft of this article. -\bibliographystyle{plain} \bibliography{tor-design} - -\end{document} - diff --git a/doc/design-paper/tor-design.bib b/doc/design-paper/tor-design.bib deleted file mode 100644 index 981761e94b..0000000000 --- a/doc/design-paper/tor-design.bib +++ /dev/null @@ -1,1493 +0,0 @@ -% hs-attack -@inproceedings{hs-attack, - title = {Locating Hidden Servers}, - author = {Lasse {\O}verlier and Paul Syverson}, - booktitle = {Proceedings of the 2006 IEEE Symposium on Security and Privacy}, - year = {2006}, - month = {May}, - publisher = {IEEE CS}, -} - - -@TechReport{bauer:tr2007, - author = {Kevin Bauer and Damon McCoy and Dirk Grunwald and Tadayoshi Kohno and Douglas Sicker}, - title = {Low-Resource Routing Attacks Against Anonymous Systems}, - institution = {University of Colorado at Boulder}, - year = 2007, - number = {CU-CS-1025-07} -} - -@inproceedings{bauer:wpes2007, - title = {Low-Resource Routing Attacks Against Tor}, - author = {Kevin Bauer and Damon McCoy and Dirk Grunwald and Tadayoshi Kohno and Douglas Sicker}, - booktitle = {{Proceedings of the Workshop on Privacy in the Electronic Society (WPES 2007)}}, - year = {2007}, - month = {October}, - address = {Washington, DC, USA}, -} - -% fix me -@misc{tannenbaum96, - author = "Andrew Tannenbaum", - title = "Computer Networks", - year = "1996", - publisher = "Prentice Hall, 3rd edition", -} - -@article{ meadows96, - author = "Catherine Meadows", - title = "The {NRL} Protocol Analyzer: An Overview", - journal = "Journal of Logic Programming", - volume = "26", - number = "2", - pages = "113--131", - year = "1996", -} -@inproceedings{kesdogan:pet2002, - title = {Unobservable Surfing on the World Wide Web: Is Private Information Retrieval an - alternative to the MIX based Approach?}, - author = {Dogan Kesdogan and Mark Borning and Michael Schmeink}, - booktitle = {Privacy Enhancing Technologies (PET 2002)}, - year = {2002}, - month = {April}, - editor = {Roger Dingledine and Paul Syverson}, - publisher = {Springer-Verlag, LNCS 2482}, -} - -@inproceedings{statistical-disclosure, - title = {Statistical Disclosure Attacks}, - author = {George Danezis}, - booktitle = {Security and Privacy in the Age of Uncertainty ({SEC2003})}, - organization = {{IFIP TC11}}, - year = {2003}, - month = {May}, - address = {Athens}, - pages = {421--426}, - publisher = {Kluwer}, -} - -@inproceedings{limits-open, - title = {Limits of Anonymity in Open Environments}, - author = {Dogan Kesdogan and Dakshi Agrawal and Stefan Penz}, - booktitle = {Information Hiding Workshop (IH 2002)}, - year = {2002}, - month = {October}, - editor = {Fabien Petitcolas}, - publisher = {Springer-Verlag, LNCS 2578}, -} - -@inproceedings{isdn-mixes, - title = {{ISDN-mixes: Untraceable communication with very small bandwidth overhead}}, - author = {Andreas Pfitzmann and Birgit Pfitzmann and Michael Waidner}, - booktitle = {GI/ITG Conference on Communication in Distributed Systems}, - year = {1991}, - month = {February}, - pages = {451-463}, -} - - -@Article{jerichow-jsac98, - author = {Anja Jerichow and Jan M\"{u}ller and Andreas - Pfitzmann and Birgit Pfitzmann and Michael Waidner}, - title = {Real-Time Mixes: A Bandwidth-Efficient Anonymity Protocol}, - journal = {IEEE Journal on Selected Areas in Communications}, - year = 1998, - volume = 16, - number = 4, - pages = {495--509}, - month = {May} -} - -@inproceedings{tarzan:ccs02, - title = {Tarzan: A Peer-to-Peer Anonymizing Network Layer}, - author = {Michael J. Freedman and Robert Morris}, - booktitle = {9th {ACM} {C}onference on {C}omputer and {C}ommunications - {S}ecurity ({CCS 2002})}, - year = {2002}, - month = {November}, - address = {Washington, DC}, -} - -@inproceedings{cebolla, - title = {{Cebolla: Pragmatic IP Anonymity}}, - author = {Zach Brown}, - booktitle = {Ottawa Linux Symposium}, - year = {2002}, - month = {June}, -} - -@inproceedings{eax, - author = "M. Bellare and P. Rogaway and D. Wagner", - title = {The {EAX} Mode of Operation: A Two-Pass Authenticated-Encryption Scheme Optimized for Simplicity and Efficiency}, - booktitle = {Fast Software Encryption 2004}, - month = {February}, - year = {2004}, -} - -@misc{darkside, - title = {{The Dark Side of the Web: An Open Proxy's View}}, - author = {Vivek S. Pai and Limin Wang and KyoungSoo Park and Ruoming Pang and Larry Peterson}, - note = {\newline \url{http://codeen.cs.princeton.edu/}}, -} -% note = {Submitted to HotNets-II. \url{http://codeen.cs.princeton.edu/}}, - -@Misc{anonymizer, - key = {anonymizer}, - title = {The {Anonymizer}}, - note = {\url{http://anonymizer.com/}} -} - -@Misc{privoxy, - key = {privoxy}, - title = {{Privoxy}}, - note = {\url{http://www.privoxy.org/}} -} - -@Misc{i2p, - key = {i2p}, - title = {{I2P}}, - note = {\url{http://www.i2p.net/}} -} - -@Misc{nym, - author = {Jason Holt}, - title = {nym: practical pseudonymity for anonymous networks}, - note = {Paper and source code at \url{http://www.lunkwill.org/src/nym/}} -} - -@InProceedings{nymble, - author = {Peter C. Johnson and Apu Kapadia and Patrick P. Tsang and Sean W. Smith}, - title = {Nymble: Anonymous {IP}-address Blocking}, - booktitle = {Privacy Enhancing Technologies (PET 2007)}, - year = 2007, - publisher = {Springer-Verlag, LNCS 4776} -} - -@inproceedings{anonnet, - title = {{Analysis of an Anonymity Network for Web Browsing}}, - author = {Marc Rennhard and Sandro Rafaeli and Laurent Mathy and Bernhard Plattner and - David Hutchison}, - booktitle = {{IEEE 7th Intl. Workshop on Enterprise Security (WET ICE - 2002)}}, - year = {2002}, - month = {June}, - address = {Pittsburgh, USA}, -} -% pages = {49--54}, - -@inproceedings{econymics, - title = {On the Economics of Anonymity}, - author = {Alessandro Acquisti and Roger Dingledine and Paul Syverson}, - booktitle = {Financial Cryptography}, - year = {2003}, - editor = {Rebecca N. Wright}, - publisher = {Springer-Verlag, LNCS 2742}, -} - -@inproceedings{defensive-dropping, - title = {Timing Analysis in Low-Latency Mix-Based Systems}, - author = {Brian N. Levine and Michael K. Reiter and Chenxi Wang and Matthew Wright}, - booktitle = {Financial Cryptography}, - year = {2004}, - editor = {Ari Juels}, - publisher = {Springer-Verlag, LNCS (forthcoming)}, -} - -@inproceedings{morphmix:fc04, - title = {Practical Anonymity for the Masses with MorphMix}, - author = {Marc Rennhard and Bernhard Plattner}, - booktitle = {Financial Cryptography}, - year = {2004}, - editor = {Ari Juels}, - publisher = {Springer-Verlag, LNCS (forthcoming)}, -} - -@inproceedings{eternity, - title = {The Eternity Service}, - author = {Ross Anderson}, - booktitle = {Pragocrypt '96}, - year = {1996}, -} - %note = {\url{http://www.cl.cam.ac.uk/users/rja14/eternity/eternity.html}}, - - -@inproceedings{minion-design, - title = {Mixminion: Design of a Type {III} Anonymous Remailer Protocol}, - author = {George Danezis and Roger Dingledine and Nick Mathewson}, - booktitle = {2003 IEEE Symposium on Security and Privacy}, - year = {2003}, - month = {May}, - publisher = {IEEE CS}, - pages = {2--15}, -} - %note = {\url{http://mixminion.net/minion-design.pdf}}, - -@inproceedings{ rao-pseudonymity, - author = "Josyula R. Rao and Pankaj Rohatgi", - title = "Can Pseudonymity Really Guarantee Privacy?", - booktitle = "Proceedings of the Ninth USENIX Security Symposium", - year = {2000}, - month = Aug, - publisher = {USENIX}, - pages = "85--96", -} - %note = {\url{http://www.usenix.org/publications/library/proceedings/sec2000/ -%full_papers/rao/rao.pdf}}, - -@InProceedings{pfitzmann90how, - author = "Birgit Pfitzmann and Andreas Pfitzmann", - title = "How to Break the Direct {RSA}-Implementation of {MIXes}", - booktitle = {Eurocrypt 89}, - publisher = {Springer-Verlag, LNCS 434}, - year = {1990}, - note = {\url{http://citeseer.nj.nec.com/pfitzmann90how.html}}, -} - -@Misc{tor-spec, - author = {Roger Dingledine and Nick Mathewson}, - title = {Tor Protocol Specifications}, - note = {\url{https://www.torproject.org/svn/trunk/doc/tor-spec.txt}}, -} - -@Misc{incentives-txt, - author = {Roger Dingledine and Nick Mathewson}, - title = {Tor Incentives Design Brainstorms}, - note = {\url{https://www.torproject.org/svn/trunk/doc/incentives.txt}}, -} - -@InProceedings{BM:mixencrypt, - author = {M{\"o}ller, Bodo}, - title = {Provably Secure Public-Key Encryption for Length-Preserving Chaumian Mixes}, - booktitle = {{CT-RSA} 2003}, - publisher = {Springer-Verlag, LNCS 2612}, - year = 2003, -} - -@InProceedings{back01, - author = {Adam Back and Ulf M\"oller and Anton Stiglic}, - title = {Traffic Analysis Attacks and Trade-Offs in Anonymity Providing Systems}, - booktitle = {Information Hiding (IH 2001)}, - pages = {245--257}, - year = 2001, - editor = {Ira S. Moskowitz}, - publisher = {Springer-Verlag, LNCS 2137}, -} - %note = {\newline \url{http://www.cypherspace.org/adam/pubs/traffic.pdf}}, - -@InProceedings{rackoff93cryptographic, - author = {Charles Rackoff and Daniel R. Simon}, - title = {Cryptographic Defense Against Traffic Analysis}, - booktitle = {{ACM} Symposium on Theory of Computing}, - pages = {672--681}, - year = {1993}, -} - %note = {\url{http://research.microsoft.com/crypto/dansimon/me.htm}}, - -@InProceedings{freehaven-berk, - author = {Roger Dingledine and Michael J. Freedman and David Molnar}, - title = {The Free Haven Project: Distributed Anonymous Storage Service}, - booktitle = {Designing Privacy Enhancing Technologies: Workshop - on Design Issue in Anonymity and Unobservability}, - year = 2000, - month = {July}, - editor = {H. Federrath}, - publisher = {Springer-Verlag, LNCS 2009}, -} - - @InProceedings{move-ndss05, - author = {Angelos Stavrou and Angelos D. Keromytis and Jason Nieh and Vishal Misra and Dan Rubenstein}, - title = {MOVE: An End-to-End Solution To Network Denial of Service}, - booktitle = {{ISOC Network and Distributed System Security Symposium (NDSS05)}}, - year = 2005, - month = {February}, - publisher = {Internet Society} -} - -%note = {\url{http://freehaven.net/papers.html}}, - - - - -@InProceedings{raymond00, - author = {J. F. Raymond}, - title = {{Traffic Analysis: Protocols, Attacks, Design Issues, - and Open Problems}}, - booktitle = {Designing Privacy Enhancing Technologies: Workshop - on Design Issue in Anonymity and Unobservability}, - year = 2000, - month = {July}, - pages = {10-29}, - editor = {H. Federrath}, - publisher = {Springer-Verlag, LNCS 2009}, -} - -@InProceedings{sybil, - author = "John Douceur", - title = {{The Sybil Attack}}, - booktitle = "Proceedings of the 1st International Peer To Peer Systems Workshop (IPTPS)", - month = Mar, - year = 2002, -} - - -@InCollection{price-privacy, - author = {Paul Syverson and Adam Shostack}, - editor = {L. Jean Camp and Stephen Lewis}, - title = {What Price Privacy? (and why identity theft is about neither identity nor theft)}, - booktitle = {Economics of Information Security}, - chapter = 10, - publisher = {Kluwer}, - year = 2004, - pages = {129--142} -} - - -@InProceedings{trickle02, - author = {Andrei Serjantov and Roger Dingledine and Paul Syverson}, - title = {From a Trickle to a Flood: Active Attacks on Several - Mix Types}, - booktitle = {Information Hiding (IH 2002)}, - year = {2002}, - editor = {Fabien Petitcolas}, - publisher = {Springer-Verlag, LNCS 2578}, -} - -@InProceedings{langos02, - author = {Oliver Berthold and Heinrich Langos}, - title = {Dummy Traffic Against Long Term Intersection Attacks}, - booktitle = {Privacy Enhancing Technologies (PET 2002)}, - year = {2002}, - editor = {Roger Dingledine and Paul Syverson}, - publisher = {Springer-Verlag, LNCS 2482} -} - - -@InProceedings{hintz-pet02, - author = {Andrew Hintz}, - title = {Fingerprinting Websites Using Traffic Analysis}, - booktitle = {Privacy Enhancing Technologies (PET 2002)}, - pages = {171--178}, - year = 2002, - editor = {Roger Dingledine and Paul Syverson}, - publisher = {Springer-Verlag, LNCS 2482} -} - -@InProceedings{or-discex00, - author = {Paul Syverson and Michael Reed and David Goldschlag}, - title = {{O}nion {R}outing Access Configurations}, - booktitle = {DARPA Information Survivability Conference and - Exposition (DISCEX 2000)}, - year = {2000}, - publisher = {IEEE CS Press}, - pages = {34--40}, - volume = {1}, -} - %note = {\newline \url{http://www.onion-router.net/Publications.html}}, - -@Inproceedings{or-pet00, - title = {{Towards an Analysis of Onion Routing Security}}, - author = {Paul Syverson and Gene Tsudik and Michael Reed and - Carl Landwehr}, - booktitle = {Designing Privacy Enhancing Technologies: Workshop - on Design Issue in Anonymity and Unobservability}, - year = 2000, - month = {July}, - pages = {96--114}, - editor = {H. Federrath}, - publisher = {Springer-Verlag, LNCS 2009}, -} - %note = {\url{http://www.onion-router.net/Publications/WDIAU-2000.ps.gz}}, - -@Inproceedings{freenet-pets00, - title = {Freenet: A Distributed Anonymous Information Storage - and Retrieval System}, - author = {Ian Clarke and Oskar Sandberg and Brandon Wiley and - Theodore W. Hong}, - booktitle = {Designing Privacy Enhancing Technologies: Workshop - on Design Issue in Anonymity and Unobservability}, - year = 2000, - month = {July}, - pages = {46--66}, - editor = {H. Federrath}, - publisher = {Springer-Verlag, LNCS 2009}, -} - %note = {\url{http://citeseer.nj.nec.com/clarke00freenet.html}}, - -@InProceedings{or-ih96, - author = {David M. Goldschlag and Michael G. Reed and Paul - F. Syverson}, - title = {Hiding Routing Information}, - booktitle = {Information Hiding, First International Workshop}, - pages = {137--150}, - year = 1996, - editor = {R. Anderson}, - month = {May}, - publisher = {Springer-Verlag, LNCS 1174}, -} - -@InProceedings{federrath-ih96, - author = {Hannes Federrath and Anja Jerichow and Andreas Pfitzmann}, - title = {{MIXes} in Mobile Communication Systems: Location - Management with Privacy}, - booktitle = {Information Hiding, First International Workshop}, - pages = {121--135}, - year = 1996, - editor = {R. Anderson}, - month = {May}, - publisher = {Springer-Verlag, LNCS 1174}, -} - - -@InProceedings{reed-protocols97, - author = {Michael G. Reed and Paul F. Syverson and David - M. Goldschlag}, - title = {Protocols Using Anonymous Connections: Mobile Applications}, - booktitle = {Security Protocols: 5th International Workshop}, - pages = {13--23}, - year = 1997, - editor = {Bruce Christianson and Bruno Crispo and Mark Lomas - and Michael Roe}, - month = {April}, - publisher = {Springer-Verlag, LNCS 1361} -} - - - -@Article{or-jsac98, - author = {Michael G. Reed and Paul F. Syverson and David - M. Goldschlag}, - title = {Anonymous Connections and Onion Routing}, - journal = {IEEE Journal on Selected Areas in Communications}, - year = 1998, - volume = 16, - number = 4, - pages = {482--494}, - month = {May}, -} - %note = {\url{http://www.onion-router.net/Publications/JSAC-1998.ps.gz}} - -@Misc{TLS, - author = {T. Dierks and C. Allen}, - title = {The {TLS} {P}rotocol --- {V}ersion 1.0}, - howpublished = {IETF RFC 2246}, - month = {January}, - year = {1999}, -} -%note = {\url{http://www.rfc-editor.org/rfc/rfc2246.txt}}, - -@Misc{SMTP, - author = {J. Postel}, - title = {Simple {M}ail {T}ransfer {P}rotocol}, - howpublished = {IETF RFC 2821 (also STD0010)}, - month = {April}, - year = {2001}, - note = {\url{http://www.rfc-editor.org/rfc/rfc2821.txt}}, -} - -@Misc{IMAP, - author = {M. Crispin}, - title = {Internet {M}essage {A}ccess {P}rotocol --- {V}ersion 4rev1}, - howpublished = {IETF RFC 2060}, - month = {December}, - year = {1996}, - note = {\url{http://www.rfc-editor.org/rfc/rfc2060.txt}}, -} - -@misc{pipenet, - title = {PipeNet 1.1}, - author = {Wei Dai}, - year = 1996, - month = {August}, - howpublished = {Usenet post}, - note = {\url{http://www.eskimo.com/~weidai/pipenet.txt} First mentioned - in a post to the cypherpunks list, Feb.\ 1995.}, -} - - -@Misc{POP3, - author = {J. Myers and M. Rose}, - title = {Post {O}ffice {P}rotocol --- {V}ersion 3}, - howpublished = {IETF RFC 1939 (also STD0053)}, - month = {May}, - year = {1996}, - note = {\url{http://www.rfc-editor.org/rfc/rfc1939.txt}}, -} - - -@InProceedings{shuffle, - author = {C. Andrew Neff}, - title = {A Verifiable Secret Shuffle and its Application to E-Voting}, - booktitle = {8th ACM Conference on Computer and Communications - Security (CCS-8)}, - pages = {116--125}, - year = 2001, - editor = {P. Samarati}, - month = {November}, - publisher = {ACM Press}, -} - %note = {\url{http://www.votehere.net/ada_compliant/ourtechnology/ - % technicaldocs/shuffle.pdf}}, - -@InProceedings{dolev91, - author = {Danny Dolev and Cynthia Dwork and Moni Naor}, - title = {Non-Malleable Cryptography}, - booktitle = {23rd ACM Symposium on the Theory of Computing (STOC)}, - pages = {542--552}, - year = 1991, - note = {Updated version at - \url{http://citeseer.nj.nec.com/dolev00nonmalleable.html}}, -} - -@TechReport{rsw96, - author = {Ronald L. Rivest and Adi Shamir and David A. Wagner}, - title = {Time-lock puzzles and timed-release Crypto}, - year = 1996, - type = {MIT LCS technical memo}, - number = {MIT/LCS/TR-684}, - month = {February}, - note = {\newline \url{http://citeseer.nj.nec.com/rivest96timelock.html}}, -} - -@InProceedings{web-mix, - author = {Oliver Berthold and Hannes Federrath and Stefan K\"opsell}, - title = {Web {MIX}es: A system for anonymous and unobservable - {I}nternet access}, - booktitle = {Designing Privacy Enhancing Technologies: Workshop - on Design Issue in Anonymity and Unobservability}, - editor = {H. Federrath}, - publisher = {Springer-Verlag, LNCS 2009}, - year = {2000}, -} -% pages = {115--129}, - -@InProceedings{disad-free-routes, - author = {Oliver Berthold and Andreas Pfitzmann and Ronny Standtke}, - title = {The disadvantages of free {MIX} routes and how to overcome - them}, - booktitle = {Designing Privacy Enhancing Technologies: Workshop - on Design Issue in Anonymity and Unobservability}, - pages = {30--45}, - year = 2000, - editor = {H. Federrath}, - publisher = {Springer-Verlag, LNCS 2009}, -} - %note = {\url{http://www.tik.ee.ethz.ch/~weiler/lehre/netsec/Unterlagen/anon/ - % disadvantages_berthold.pdf}}, - -@InProceedings{boneh00, - author = {Dan Boneh and Moni Naor}, - title = {Timed Commitments}, - booktitle = {Advances in Cryptology -- {CRYPTO} 2000}, - pages = {236--254}, - year = 2000, - publisher = {Springer-Verlag, LNCS 1880}, - note = {\newline \url{http://crypto.stanford.edu/~dabo/abstracts/timedcommit.html}}, -} - -@InProceedings{goldschlag98, - author = {David M. Goldschlag and Stuart G. Stubblebine}, - title = {Publicly Verifiable Lotteries: Applications of - Delaying Functions}, - booktitle = {Financial Cryptography}, - pages = {214--226}, - year = 1998, - publisher = {Springer-Verlag, LNCS 1465}, - note = {\newline \url{http://citeseer.nj.nec.com/goldschlag98publicly.html}}, -} - -@InProceedings{syverson98, - author = {Paul Syverson}, - title = {Weakly Secret Bit Commitment: Applications to - Lotteries and Fair Exchange}, - booktitle = {Computer Security Foundations Workshop (CSFW11)}, - pages = {2--13}, - year = 1998, - address = {Rockport Massachusetts}, - month = {June}, - publisher = {IEEE CS Press}, - note = {\newline \url{http://chacs.nrl.navy.mil/publications/CHACS/1998/}}, -} - -@Misc{shoup-iso, - author = {Victor Shoup}, - title = {A Proposal for an {ISO} {S}tandard for Public Key Encryption (version 2.1)}, - note = {Revised December 20, 2001. \url{http://www.shoup.net/papers/}}, -} - -@Misc{shoup-oaep, - author = {Victor Shoup}, - title = {{OAEP} Reconsidered}, - howpublished = {{IACR} e-print 2000/060}, - note = {\newline \url{http://eprint.iacr.org/2000/060/}}, -} - -@Misc{oaep-still-alive, - author = {E. Fujisaki and D. Pointcheval and T. Okamoto and J. Stern}, - title = {{RSA}-{OAEP} is Still Alive!}, - howpublished = {{IACR} e-print 2000/061}, - note = {\newline \url{http://eprint.iacr.org/2000/061/}}, -} - -@misc{echolot, - author = {Peter Palfrader}, - title = {Echolot: a pinger for anonymous remailers}, - note = {\url{http://www.palfrader.org/echolot/}}, -} - -@Misc{mixmaster-attacks, - author = {Lance Cottrell}, - title = {Mixmaster and Remailer Attacks}, - note = {\url{http://www.obscura.com/~loki/remailer/remailer-essay.html}}, -} - -@Misc{mixmaster-spec, - author = {Ulf M{\"o}ller and Lance Cottrell and Peter - Palfrader and Len Sassaman}, - title = {Mixmaster {P}rotocol --- {V}ersion 2}, - year = {2003}, - month = {July}, - howpublished = {Draft}, - note = {\url{http://www.abditum.com/mixmaster-spec.txt}}, -} - -@InProceedings{puzzles-tls, - author = "Drew Dean and Adam Stubblefield", - title = {{Using Client Puzzles to Protect TLS}}, - booktitle = "Proceedings of the 10th USENIX Security Symposium", - year = {2001}, - month = Aug, - publisher = {USENIX}, -} - -@InProceedings{breadpudding, - author = {Markus Jakobsson and Ari Juels}, - title = {Proofs of Work and Bread Pudding Protocols}, - booktitle = {Proceedings of the IFIP TC6 and TC11 Joint Working - Conference on Communications and Multimedia Security - (CMS '99)}, - year = 1999, - month = {September}, - publisher = {Kluwer} -} - -@Misc{hashcash, - author = {Adam Back}, - title = {Hash cash}, - note = {\newline \url{http://www.cypherspace.org/~adam/hashcash/}}, -} - -@InProceedings{oreilly-acc, - author = {Roger Dingledine and Michael J. Freedman and David Molnar}, - title = {Accountability}, - booktitle = {Peer-to-peer: Harnessing the Benefits of a Disruptive - Technology}, - year = {2001}, - publisher = {O'Reilly and Associates}, -} - - -@InProceedings{han, - author = {Yongfei Han}, - title = {Investigation of non-repudiation protocols}, - booktitle = {ACISP '96}, - year = 1996, - publisher = {Springer-Verlag}, -} - - -@Misc{socks5, - key = {socks5}, - title = {{SOCKS} {P}rotocol {V}ersion 5}, - howpublished= {IETF RFC 1928}, - month = {March}, - year = 1996, - note = {\url{http://www.ietf.org/rfc/rfc1928.txt}} -} - -@InProceedings{abe, - author = {Masayuki Abe}, - title = {Universally Verifiable {MIX} With Verification Work Independent of - The Number of {MIX} Servers}, - booktitle = {{EUROCRYPT} 1998}, - year = {1998}, - publisher = {Springer-Verlag, LNCS 1403}, -} - -@InProceedings{desmedt, - author = {Yvo Desmedt and Kaoru Kurosawa}, - title = {How To Break a Practical {MIX} and Design a New One}, - booktitle = {{EUROCRYPT} 2000}, - year = {2000}, - publisher = {Springer-Verlag, LNCS 1803}, - note = {\url{http://citeseer.nj.nec.com/447709.html}}, -} - -@InProceedings{mitkuro, - author = {M. Mitomo and K. Kurosawa}, - title = {{Attack for Flash MIX}}, - booktitle = {{ASIACRYPT} 2000}, - year = {2000}, - publisher = {Springer-Verlag, LNCS 1976}, - note = {\newline \url{http://citeseer.nj.nec.com/450148.html}}, -} - -@InProceedings{hybrid-mix, - author = {M. Ohkubo and M. Abe}, - title = {A {L}ength-{I}nvariant {H}ybrid {MIX}}, - booktitle = {Advances in Cryptology - {ASIACRYPT} 2000}, - year = {2000}, - publisher = {Springer-Verlag, LNCS 1976}, -} - -@InProceedings{PShuffle, - author = {Jun Furukawa and Kazue Sako}, - title = {An Efficient Scheme for Proving a Shuffle}, - editor = {Joe Kilian}, - booktitle = {CRYPTO 2001}, - year = {2001}, - publisher = {Springer-Verlag, LNCS 2139}, -} - - -@InProceedings{jakobsson-optimally, - author = "Markus Jakobsson and Ari Juels", - title = "An Optimally Robust Hybrid Mix Network (Extended Abstract)", - booktitle = {Principles of Distributed Computing - {PODC} '01}, - year = "2001", - publisher = {ACM Press}, - note = {\url{http://citeseer.nj.nec.com/492015.html}}, -} - -@InProceedings{kesdogan, - author = {D. Kesdogan and M. Egner and T. B\"uschkes}, - title = {Stop-and-Go {MIX}es Providing Probabilistic Anonymity in an Open - System}, - booktitle = {Information Hiding (IH 1998)}, - year = {1998}, - publisher = {Springer-Verlag, LNCS 1525}, -} - %note = {\url{http://www.cl.cam.ac.uk/~fapp2/ihw98/ihw98-sgmix.pdf}}, - -@InProceedings{socks4, - author = {David Koblas and Michelle R. Koblas}, - title = {{SOCKS}}, - booktitle = {UNIX Security III Symposium (1992 USENIX Security - Symposium)}, - pages = {77--83}, - year = 1992, - publisher = {USENIX}, -} - -@InProceedings{flash-mix, - author = {Markus Jakobsson}, - title = {Flash {M}ixing}, - booktitle = {Principles of Distributed Computing - {PODC} '99}, - year = {1999}, - publisher = {ACM Press}, - note = {\newline \url{http://citeseer.nj.nec.com/jakobsson99flash.html}}, -} - -@InProceedings{SK, - author = {Joe Kilian and Kazue Sako}, - title = {Receipt-Free {MIX}-Type Voting Scheme - A Practical Solution to - the Implementation of a Voting Booth}, - booktitle = {EUROCRYPT '95}, - year = {1995}, - publisher = {Springer-Verlag}, -} - -@InProceedings{OAEP, - author = {M. Bellare and P. Rogaway}, - year = {1994}, - booktitle = {EUROCRYPT '94}, - title = {Optimal {A}symmetric {E}ncryption {P}adding : How To Encrypt With - {RSA}}, - publisher = {Springer-Verlag}, - note = {\newline \url{http://www-cse.ucsd.edu/users/mihir/papers/oaep.html}}, -} -@inproceedings{babel, - title = {Mixing {E}-mail With {B}abel}, - author = {Ceki G\"ulc\"u and Gene Tsudik}, - booktitle = {{Network and Distributed Security Symposium (NDSS 96)}}, - year = 1996, - month = {February}, - pages = {2--16}, - publisher = {IEEE}, -} - %note = {\url{http://citeseer.nj.nec.com/2254.html}}, - -@Misc{rprocess, - author = {RProcess}, - title = {Selective Denial of Service Attacks}, - note = {\newline \url{http://www.eff.org/pub/Privacy/Anonymity/1999\_09\_DoS\_remail\_vuln.html}}, -} - -@Article{remailer-history, - author = {Sameer Parekh}, - title = {Prospects for Remailers}, - journal = {First Monday}, - volume = {1}, - number = {2}, - month = {August}, - year = {1996}, - note = {\url{http://www.firstmonday.dk/issues/issue2/remailers/}}, -} - -@Article{chaum-mix, - author = {David Chaum}, - title = {Untraceable electronic mail, return addresses, and digital pseudo-nyms}, - journal = {Communications of the ACM}, - year = {1981}, - volume = {4}, - number = {2}, - month = {February}, -} - %note = {\url{http://www.eskimo.com/~weidai/mix-net.txt}}, - -@InProceedings{nym-alias-net, - author = {David Mazi\`{e}res and M. Frans Kaashoek}, - title = {{The Design, Implementation and Operation of an Email - Pseudonym Server}}, - booktitle = {$5^{th}$ ACM Conference on Computer and - Communications Security (CCS'98)}, - year = 1998, - publisher = {ACM Press}, -} - %note = {\newline \url{http://www.scs.cs.nyu.edu/~dm/}}, - -@InProceedings{tangler, - author = {Marc Waldman and David Mazi\`{e}res}, - title = {Tangler: A Censorship-Resistant Publishing System - Based on Document Entanglements}, - booktitle = {$8^{th}$ ACM Conference on Computer and - Communications Security (CCS-8)}, - pages = {86--135}, - year = 2001, - publisher = {ACM Press}, -} - %note = {\url{http://www.scs.cs.nyu.edu/~dm/}} - -@misc{neochaum, - author = {Tim May}, - title = {Payment mixes for anonymity}, - howpublished = {E-mail archived at - \url{http://\newline www.inet-one.com/cypherpunks/dir.2000.02.28-2000.03.05/msg00334.html}}, -} - -@misc{helsingius, - author = {J. Helsingius}, - title = {{\tt anon.penet.fi} press release}, - note = {\newline \url{http://www.penet.fi/press-english.html}}, -} - -@InProceedings{garay97secure, - author = {J. Garay and R. Gennaro and C. Jutla and T. Rabin}, - title = {Secure distributed storage and retrieval}, - booktitle = {11th International Workshop, WDAG '97}, - pages = {275--289}, - year = {1997}, - publisher = {Springer-Verlag, LNCS 1320}, - note = {\newline \url{http://citeseer.nj.nec.com/garay97secure.html}}, -} - -@InProceedings{PIK, - author = {C. Park and K. Itoh and K. Kurosawa}, - title = {Efficient anonymous channel and all/nothing election scheme}, - booktitle = {Advances in Cryptology -- {EUROCRYPT} '93}, - pages = {248--259}, - publisher = {Springer-Verlag, LNCS 765}, -} - -@Misc{pgpfaq, - key = {PGP}, - title = {{PGP} {FAQ}}, - note = {\newline \url{http://www.faqs.org/faqs/pgp-faq/}}, -} - -@Article{riordan-schneier, - author = {James Riordan and Bruce Schneier}, - title = {A Certified E-mail Protocol with No Trusted Third Party}, - journal = {13th Annual Computer Security Applications Conference}, - month = {December}, - year = {1998}, - note = {\newline \url{http://www.counterpane.com/certified-email.html}}, -} - - -@Article{crowds-tissec, - author = {Michael K. Reiter and Aviel D. Rubin}, - title = {Crowds: Anonymity for Web Transactions}, - journal = {ACM TISSEC}, - year = 1998, - volume = 1, - number = 1, - pages = {66--92}, - month = {June}, -} - %note = {\url{http://citeseer.nj.nec.com/284739.html}} - -@Article{crowds-dimacs, - author = {Michael K. Reiter and Aviel D. Rubin}, - title = {Crowds: Anonymity for Web Transactions}, - journal = {{DIMACS} Technical Report (Revised)}, - volume = {97}, - number = {15}, - month = {August}, - year = {1997}, -} - -@Misc{advogato, - author = {Raph Levien}, - title = {Advogato's Trust Metric}, - note = {\newline \url{http://www.advogato.org/trust-metric.html}}, -} - -@InProceedings{publius, - author = {Marc Waldman and Aviel Rubin and Lorrie Cranor}, - title = {Publius: {A} robust, tamper-evident, censorship-resistant and - source-anonymous web publishing system}, - booktitle = {Proc. 9th USENIX Security Symposium}, - pages = {59--72}, - year = {2000}, - month = {August}, -} - %note = {\newline \url{http://citeseer.nj.nec.com/waldman00publius.html}}, - -@Misc{freedom-nyms, - author = {Russell Samuels}, - title = {Untraceable Nym Creation on the {F}reedom {N}etwork}, - year = {1999}, - month = {November}, - day = {21}, - note = {\newline \url{http://www.freedom.net/products/whitepapers/white11.html}}, -} - -@techreport{freedom2-arch, - title = {Freedom Systems 2.0 Architecture}, - author = {Philippe Boucher and Adam Shostack and Ian Goldberg}, - institution = {Zero Knowledge Systems, {Inc.}}, - year = {2000}, - month = {December}, - type = {White Paper}, - day = {18}, -} - -@techreport{freedom21-security, - title = {Freedom Systems 2.1 Security Issues and Analysis}, - author = {Adam Back and Ian Goldberg and Adam Shostack}, - institution = {Zero Knowledge Systems, {Inc.}}, - year = {2001}, - month = {May}, - type = {White Paper}, -} - -@inproceedings{cfs:sosp01, - title = {Wide-area cooperative storage with {CFS}}, - author = {Frank Dabek and M. Frans Kaashoek and David Karger and Robert Morris and Ion Stoica}, - booktitle = {18th {ACM} {S}ymposium on {O}perating {S}ystems {P}rinciples ({SOSP} '01)}, - year = {2001}, - month = {October}, - address = {Chateau Lake Louise, Banff, Canada}, -} - -@inproceedings{SS03, - title = {Passive Attack Analysis for Connection-Based Anonymity Systems}, - author = {Andrei Serjantov and Peter Sewell}, - booktitle = {Computer Security -- ESORICS 2003}, - publisher = {Springer-Verlag, LNCS 2808}, - year = {2003}, - month = {October}, -} - %note = {\url{http://www.cl.cam.ac.uk/users/aas23/papers_aas/conn_sys.ps}}, - -@Misc{pk-relations, - author = {M. Bellare and A. Desai and D. Pointcheval and P. Rogaway}, - title = {Relations Among Notions of Security for Public-Key Encryption - Schemes}, - howpublished = { - Extended abstract in {\em Advances in Cryptology - CRYPTO '98}, LNCS Vol. 1462. - Springer-Verlag, 1998. - Full version available from \newline \url{http://www-cse.ucsd.edu/users/mihir/}}, -} - - -@InProceedings{mix-acc, - author = {Roger Dingledine and Michael J. Freedman and David - Hopwood and David Molnar}, - title = {{A Reputation System to Increase MIX-net - Reliability}}, - booktitle = {Information Hiding (IH 2001)}, - pages = {126--141}, - year = 2001, - editor = {Ira S. Moskowitz}, - publisher = {Springer-Verlag, LNCS 2137}, -} - %note = {\url{http://www.freehaven.net/papers.html}}, - -@InProceedings{casc-rep, - author = {Roger Dingledine and Paul Syverson}, - title = {{Reliable MIX Cascade Networks through Reputation}}, - booktitle = {Financial Cryptography}, - year = 2002, - editor = {Matt Blaze}, - publisher = {Springer-Verlag, LNCS 2357}, -} - %note = {\newline \url{http://www.freehaven.net/papers.html}}, - -@InProceedings{zhou96certified, - author = {Zhou and Gollmann}, - title = {Certified Electronic Mail}, - booktitle = {{ESORICS: European Symposium on Research in Computer - Security}}, - publisher = {Springer-Verlag, LNCS 1146}, - year = {1996}, - note = {\newline \url{http://citeseer.nj.nec.com/zhou96certified.html}}, -} - -@Misc{realtime-mix, - author = {Anja Jerichow and Jan M\"uller and Andreas Pfitzmann and - Birgit Pfitzmann and Michael Waidner}, - title = {{Real-Time MIXes: A Bandwidth-Efficient Anonymity Protocol}}, - howpublished = {IEEE Journal on Selected Areas in Communications, 1998.}, - note = {\url{http://www.zurich.ibm.com/security/publications/1998.html}}, -} - -@InProceedings{danezis:pet2003, - author = {George Danezis}, - title = {Mix-networks with Restricted Routes}, - booktitle = {Privacy Enhancing Technologies (PET 2003)}, - year = 2003, - editor = {Roger Dingledine}, - publisher = {Springer-Verlag LNCS 2760} -} - -@InProceedings{gap-pets03, - author = {Krista Bennett and Christian Grothoff}, - title = {{GAP} -- practical anonymous networking}, - booktitle = {Privacy Enhancing Technologies (PET 2003)}, - year = 2003, - editor = {Roger Dingledine}, - publisher = {Springer-Verlag LNCS 2760} -} - -@Article{hordes-jcs, - author = {Brian Neal Levine and Clay Shields}, - title = {Hordes: A Multicast-Based Protocol for Anonymity}, - journal = {Journal of Computer Security}, - year = 2002, - volume = 10, - number = 3, - pages = {213--240} -} - -@TechReport{herbivore, - author = {Sharad Goel and Mark Robson and Milo Polte and Emin G\"{u}n Sirer}, - title = {Herbivore: A Scalable and Efficient Protocol for Anonymous Communication}, - institution = {Cornell University Computing and Information Science}, - year = 2003, - type = {Technical Report}, - number = {TR2003-1890}, - month = {February} -} - -@InProceedings{p5, - author = {Rob Sherwood and Bobby Bhattacharjee and Aravind Srinivasan}, - title = {$P^5$: A Protocol for Scalable Anonymous Communication}, - booktitle = {IEEE Symposium on Security and Privacy}, - pages = {58--70}, - year = 2002, - publisher = {IEEE CS} -} - -@phdthesis{ian-thesis, - title = {A Pseudonymous Communications Infrastructure for the Internet}, - author = {Ian Goldberg}, - school = {UC Berkeley}, - year = {2000}, - month = {Dec}, -} - -@Article{taz, - author = {Ian Goldberg and David Wagner}, - title = {TAZ Servers and the Rewebber Network: Enabling - Anonymous Publishing on the World Wide Web}, - journal = {First Monday}, - year = 1998, - volume = 3, - number = 4, - month = {August}, - note = {\url{http://www.firstmonday.dk/issues/issue3_4/goldberg/}} -} - -@Misc{tcp-over-tcp-is-bad, - key = {tcp-over-tcp-is-bad}, - title = {Why {TCP} Over {TCP} Is A Bad Idea}, - author = {Olaf Titz}, - note = {\url{http://sites.inka.de/sites/bigred/devel/tcp-tcp.html}} -} - -@inproceedings{wright02, - title = {An Analysis of the Degradation of Anonymous Protocols}, - author = {Matthew Wright and Micah Adler and Brian Neil Levine and Clay Shields}, - booktitle = {{Network and Distributed Security Symposium (NDSS 02)}}, - year = {2002}, - month = {February}, - publisher = {IEEE}, -} - -@inproceedings{wright03, - title = {Defending Anonymous Communication Against Passive Logging Attacks}, - author = {Matthew Wright and Micah Adler and Brian Neil Levine and Clay Shields}, - booktitle = {IEEE Symposium on Security and Privacy}, - pages= {28--41}, - year = {2003}, - month = {May}, - publisher = {IEEE CS}, -} - - -@InProceedings{attack-tor-oak05, - author = {Steven J. Murdoch and George Danezis}, - title = {Low-cost Traffic Analysis of {T}or}, - booktitle = {IEEE Symposium on Security and Privacy}, - year = 2005, - month = {May}, - publisher = {IEEE CS} -} - -@Misc{jap-backdoor, - author={{The AN.ON Project}}, - howpublished={Press release}, - year={2003}, - month={September}, - title={German Police proceeds against anonymity service}, - note={\url{http://www.datenschutzzentrum.de/material/themen/presse/anon-bka_e.htm}} -} - -@article{shsm03, - title = {Using Caching for Browsing Anonymity}, - author = {Anna Shubina and Sean Smith}, - journal = {ACM SIGEcom Exchanges}, - volume = {4}, - number = {2}, - year = {2003}, - month = {Sept}, - note = {\url{http://www.acm.org/sigs/sigecom/exchanges/volume_4_(03)/4.2-Shubina.pdf}}, -} - -@inproceedings{tor-design, - title = {Tor: The Second-Generation Onion Router}, - author = {Roger Dingledine and Nick Mathewson and Paul Syverson}, - booktitle = {Proceedings of the 13th USENIX Security Symposium}, - year = {2004}, - month = {August}, - note = {\url{https://www.torproject.org/tor-design.pdf}} -} - -@inproceedings{flow-correlation04, - title = {On Flow Correlation Attacks and Countermeasures in Mix Networks}, - author = {Ye Zhu and Xinwen Fu and Bryan Graham and Riccardo Bettati and Wei Zhao}, - booktitle = {Proceedings of Privacy Enhancing Technologies workshop (PET 2004)}, - year = {2004}, - month = {May}, - series = {LNCS}, - note = {\url{http://students.cs.tamu.edu/xinwenfu/paper/PET04.pdf}}, -} - -@InProceedings{danezis:pet2004, - author = "George Danezis", - title = "The Traffic Analysis of Continuous-Time Mixes", - booktitle= {Privacy Enhancing Technologies (PET 2004)}, - editor = {David Martin and Andrei Serjantov}, - month = {May}, - year = {2004}, - series = {LNCS}, - note = {\url{http://www.cl.cam.ac.uk/users/gd216/cmm2.pdf}}, -} - -@inproceedings{feamster:wpes2004, - title = {Location Diversity in Anonymity Networks}, - author = {Nick Feamster and Roger Dingledine}, - booktitle = {{Proceedings of the Workshop on Privacy in the Electronic Society (WPES 2004)}}, - year = {2004}, - month = {October}, - address = {Washington, DC, USA}, - note = {\url{http://freehaven.net/doc/routing-zones/routing-zones.ps}}, -} - -@inproceedings{koepsell:wpes2004, - title = {How to Achieve Blocking Resistance for Existing Systems Enabling Anonymous Web Surfing}, - author = {Stefan K\"opsell and Ulf Hilling}, - booktitle = {{Proceedings of the Workshop on Privacy in the Electronic Society (WPES 2004)}}, - year = {2004}, - month = {October}, - address = {Washington, DC, USA}, - note = {\url{http://freehaven.net/anonbib/papers/p103-koepsell.pdf}}, -} - -@inproceedings{sync-batching, - title = {Synchronous Batching: From Cascades to Free Routes}, - author = {Roger Dingledine and Vitaly Shmatikov and Paul Syverson}, - booktitle = {Proceedings of Privacy Enhancing Technologies workshop (PET 2004)}, - editor = {David Martin and Andrei Serjantov}, - year = {2004}, - month = {May}, - series = {LNCS}, - note = {\url{http://freehaven.net/doc/sync-batching/sync-batching.pdf}}, -} - -@InProceedings{e2e-traffic, - author = "Nick Mathewson and Roger Dingledine", - title = "Practical Traffic Analysis: Extending and Resisting Statistical Disclosure", - booktitle= {Privacy Enhancing Technologies (PET 2004)}, - editor = {David Martin and Andrei Serjantov}, - month = {May}, - year = {2004}, - series = {LNCS}, - note = {\url{http://freehaven.net/doc/e2e-traffic/e2e-traffic.pdf}}, -} - -@Misc{dtls, - author = {E. Rescorla and N. Modadugu}, - title = {{Datagram Transport Layer Security}}, - howpublished = {IETF Draft}, - month = {December}, - year = {2003}, - note = {\url{http://www.ietf.org/internet-drafts/draft-rescorla-dtls-02.txt}}, -} - -@InProceedings{usability-network-effect, - author={Roger Dingledine and Nick Mathewson}, - title={Anonymity Loves Company: Usability and the Network Effect}, - booktitle = {Designing Security Systems That People Can Use}, - year = {2005}, - publisher = {O'Reilly Media}, -} - -@inproceedings{usability:weis2006, - title = {Anonymity Loves Company: Usability and the Network Effect}, - author = {Roger Dingledine and Nick Mathewson}, - booktitle = {Proceedings of the Fifth Workshop on the Economics of Information Security - (WEIS 2006)}, - year = {2006}, - month = {June}, - address = {Cambridge, UK}, - bookurl = {http://weis2006.econinfosec.org/}, - note = {\url{http://freehaven.net/doc/wupss04/usability.pdf}}, -} - -@Misc{six-four, - key = {six-four}, - title = {{The Six/Four System}}, - note = {\url{http://sourceforge.net/projects/sixfour/}} -} - -@inproceedings{clayton:pet2006, - title = {Ignoring the Great Firewall of China}, - author = {Richard Clayton and Steven J. Murdoch and Robert N. M. Watson}, - booktitle = {Proceedings of the Sixth Workshop on Privacy Enhancing Technologies (PET 2006)}, - year = {2006}, - month = {June}, - address = {Cambridge, UK}, - publisher = {Springer}, - bookurl = {http://petworkshop.org/2006/}, - note = {\url{http://www.cl.cam.ac.uk/~rnc1/ignoring.pdf}}, -} - -@Misc{zuckerman-threatmodels, - key = {zuckerman-threatmodels}, - title = {We've got to adjust some of our threat models}, - author = {Ethan Zuckerman}, - note = {\url{http://www.ethanzuckerman.com/blog/?p=1019}} -} - -@Misc{cgiproxy, - key = {cgiproxy}, - title = {{CGIProxy: HTTP/FTP Proxy in a CGI Script}}, - author = {James Marshall}, - note = {\url{http://www.jmarshall.com/tools/cgiproxy/}} -} - -@Misc{circumventor, - key = {circumventor}, - title = {{How to install the Circumventor program}}, - author = {Bennett Haselton}, - note = {\url{http://www.peacefire.org/circumventor/simple-circumventor-instructions.html}} -} - -@Misc{psiphon, - key = {psiphon}, - title = {Psiphon}, - author = {Ronald Deibert et al}, - note = {\url{http://psiphon.civisec.org/}} -} - -@InProceedings{tcpstego, author = {Steven J. Murdoch and Stephen Lewis}, - title = {Embedding Covert Channels into {TCP/IP}}, - booktitle = {Information Hiding: 7th International Workshop}, - pages = {247--261}, - year = {2005}, - editor = {Mauro Barni and Jordi Herrera-Joancomart\'{\i} and -Stefan Katzenbeisser and Fernando P\'{e}rez-Gonz\'{a}lez}, - volume = {3727}, - series = {LNCS}, - address = {Barcelona, Catalonia (Spain)}, - month = {June}, - publisher = {Springer-Verlag}, - url = {http://www.cl.cam.ac.uk/~sjm217/papers/ih05coverttcp.pdf} -} - -@phdthesis{blossom-thesis, - title = {Perspective Access Networks}, - author = {Geoffrey Goodell}, - school = {Harvard University}, - year = {2006}, - month = {July}, - note = {\url{http://afs.eecs.harvard.edu/~goodell/thesis.pdf}}, -} - -@inproceedings{tap:pet2006, - title = {On the Security of the Tor Authentication Protocol}, - author = {Ian Goldberg}, - booktitle = {Proceedings of the Sixth Workshop on Privacy Enhancing Technologies (PET 2006)}, - year = {2006}, - month = {June}, - address = {Cambridge, UK}, - publisher = {Springer}, - bookurl = {http://petworkshop.org/2006/}, - note = {\url{http://www.cypherpunks.ca/~iang/pubs/torsec.pdf}}, -} - -@inproceedings{rep-anon, - title = {{Reputation in P2P Anonymity Systems}}, - author = {Roger Dingledine and Nick Mathewson and Paul Syverson}, - booktitle = {Proceedings of Workshop on Economics of Peer-to-Peer Systems}, - year = {2003}, - month = {June}, - note = {\url{http://freehaven.net/doc/econp2p03/econp2p03.pdf}}, -} - -@misc{tor-challenges, - author = {Roger Dingledine and Nick Mathewson and Paul Syverson}, - title = {Challenges in deploying low-latency anonymity}, - year = {2005}, - note = {Manuscript} -} - -@InProceedings{chaum-blind, - author = {David Chaum}, - title = {Blind Signatures for Untraceable Payments}, - booktitle = {Advances in Cryptology: Proceedings of Crypto 82}, - pages = {199--203}, - year = 1983, - editor = {D. Chaum and R.L. Rivest and A.T. Sherman}, - publisher = {Plenum Press} -} - -@Article{netauth, - author = {Geoffrey Goodell and Paul Syverson}, - title = {The Right Place at the Right Time: Examining the use of network location in authentication and abuse prevention}, - journal = {Communications of the ACM}, - year = 2007, - volume = 50, - number = 5, - pages = {113--117}, - month = {May} -} - -@misc{ip-to-country, - key = {ip-to-country}, - title = {IP-to-country database}, - note = {\url{http://ip-to-country.webhosting.info/}}, -} - -@misc{mackinnon-personal, - author = {Rebecca MacKinnon}, - title = {Private communication}, - year = {2006}, -} - -@inproceedings{pet05-bissias, - title = {Privacy Vulnerabilities in Encrypted HTTP Streams}, - author = {George Dean Bissias and Marc Liberatore and Brian Neil Levine}, - booktitle = {Proceedings of Privacy Enhancing Technologies workshop (PET 2005)}, - year = {2005}, - month = {May}, - note = {\url{http://prisms.cs.umass.edu/brian/pubs/bissias.liberatore.pet.2005.pdf}}, -} - -@InProceedings{infranet, - author = {Nick Feamster and Magdalena Balazinska and Greg Harfst and Hari Balakrishnan and David Karger}, - title = {Infranet: Circumventing Web Censorship and Surveillance}, - booktitle = {Proceedings of the 11th USENIX Security Symposium}, - year = {2002}, - month = {August}, - note = {\url{http://nms.lcs.mit.edu/~feamster/papers/usenixsec2002.pdf}}, -} - -@techreport{ ptacek98insertion, - author = "Thomas H. Ptacek and Timothy N. Newsham", - title = "Insertion, Evasion, and Denial of Service: Eluding Network Intrusion Detection", - institution = "Secure Networks, Inc.", - address = "Suite 330, 1201 5th Street S.W, Calgary, Alberta, Canada, T2R-0Y6", - year = "1998", - url = "citeseer.ist.psu.edu/ptacek98insertion.html", -} - -@inproceedings{active-wardens, - author = "Gina Fisk and Mike Fisk and Christos Papadopoulos and Joshua Neil", - title = "Eliminating Steganography in Internet Traffic with Active Wardens", - booktitle = {Information Hiding Workshop (IH 2002)}, - year = {2002}, - month = {October}, - editor = {Fabien Petitcolas}, - publisher = {Springer-Verlag, LNCS 2578}, -} - -@inproceedings{clog-the-queue, - title = {Don't Clog the Queue: Circuit Clogging and Mitigation in {P2P} anonymity schemes}, - author = {Jon McLachlan and Nicholas Hopper}, - booktitle = {Proceedings of Financial Cryptography (FC '08)}, - year = {2008}, - month = {January}, -} - -@inproceedings{snader08, - title = {A Tune-up for {Tor}: Improving Security and Performance in the {Tor} Network}, - author = {Robin Snader and Nikita Borisov}, - booktitle = {Proceedings of the Network and Distributed Security Symposium - {NDSS} '08}, - year = {2008}, - month = {February}, - publisher = {Internet Society}, -} - -@inproceedings{murdoch-pet2008, - title = {Metrics for Security and Performance in Low-Latency Anonymity Networks}, - author = {Steven J. Murdoch and Robert N. M. Watson}, - booktitle = {Proceedings of the Eighth International Symposium on Privacy Enhancing Technologies (PETS 2008)}, - year = {2008}, - month = {July}, - address = {Leuven, Belgium}, - pages = {115--132}, - editor = {Nikita Borisov and Ian Goldberg}, - publisher = {Springer}, - bookurl = {http://petsymposium.org/2008/}, -} - -@inproceedings{danezis-pet2008, - title = {Bridging and Fingerprinting: Epistemic Attacks on Route Selection}, - author = {George Danezis and Paul Syverson}, - booktitle = {Proceedings of the Eighth International Symposium on Privacy Enhancing Technologies (PETS 2008)}, - year = {2008}, - month = {July}, - address = {Leuven, Belgium}, - pages = {133--150}, - editor = {Nikita Borisov and Ian Goldberg}, - publisher = {Springer}, - bookurl = {http://petsymposium.org/2008/}, -} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: "tor-design" -%%% End: diff --git a/doc/design-paper/tor-design.html b/doc/design-paper/tor-design.html deleted file mode 100644 index 5fac644e62..0000000000 --- a/doc/design-paper/tor-design.html +++ /dev/null @@ -1,2488 +0,0 @@ -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" - "DTD/xhtml1-transitional.dtd"> -<html xmlns="http://www.w3.org/1999/xhtml"> -<head> -<meta name="GENERATOR" content="TtH 3.59" /> - <style type="text/css"> div.p { margin-top: 7pt;}</style> - <style type="text/css"><!-- - td div.comp { margin-top: -0.6ex; margin-bottom: -1ex;} - td div.comb { margin-top: -0.6ex; margin-bottom: -.6ex;} - td div.hrcomp { line-height: 0.9; margin-top: -0.8ex; margin-bottom: -1ex;} - td div.norm {line-height:normal;} - span.roman {font-family: serif; font-style: normal; font-weight: normal;} - span.overacc2 {position: relative; left: .8em; top: -1.2ex;} - span.overacc1 {position: relative; left: .6em; top: -1.2ex;} --></style> - - -<title> Tor: The Second-Generation Onion Router </title> -</head> -<body> - -<h1 align="center">Tor: The Second-Generation Onion Router </h1> -<div class="p"><!----></div> - -<h3 align="center"> -Roger Dingledine, The Free Haven Project, <tt>arma@freehaven.net</tt><br> -Nick Mathewson, The Free Haven Project, <tt>nickm@freehaven.net</tt><br> -Paul Syverson, Naval Research Lab, <tt>syverson@itd.nrl.navy.mil</tt> </h3> - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<h2> Abstract</h2> -We present Tor, a circuit-based low-latency anonymous communication -service. This second-generation Onion Routing system addresses limitations -in the original design by adding perfect forward secrecy, congestion -control, directory servers, integrity checking, configurable exit policies, -and a practical design for location-hidden services via rendezvous -points. Tor works on the real-world -Internet, requires no special privileges or kernel modifications, requires -little synchronization or coordination between nodes, and provides a -reasonable tradeoff between anonymity, usability, and efficiency. -We briefly describe our experiences with an international network of -more than 30 nodes. We close with a list of open problems in anonymous communication. - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> - <h2><a name="tth_sEc1"> -<a name="sec:intro"> -1</a> Overview</h2> -</a> - -<div class="p"><!----></div> -Onion Routing is a distributed overlay network designed to anonymize -TCP-based applications like web browsing, secure shell, -and instant messaging. Clients choose a path through the network and -build a <em>circuit</em>, in which each node (or "onion router" or "OR") -in the path knows its predecessor and successor, but no other nodes in -the circuit. Traffic flows down the circuit in fixed-size -<em>cells</em>, which are unwrapped by a symmetric key at each node -(like the layers of an onion) and relayed downstream. The -Onion Routing project published several design and analysis -papers [<a href="#or-ih96" name="CITEor-ih96">27</a>,<a href="#or-jsac98" name="CITEor-jsac98">41</a>,<a href="#or-discex00" name="CITEor-discex00">48</a>,<a href="#or-pet00" name="CITEor-pet00">49</a>]. While a wide area Onion -Routing network was deployed briefly, the only long-running -public implementation was a fragile -proof-of-concept that ran on a single machine. Even this simple deployment -processed connections from over sixty thousand distinct IP addresses from -all over the world at a rate of about fifty thousand per day. -But many critical design and deployment issues were never -resolved, and the design has not been updated in years. Here -we describe Tor, a protocol for asynchronous, loosely federated onion -routers that provides the following improvements over the old Onion -Routing design: - -<div class="p"><!----></div> -<b>Perfect forward secrecy:</b> In the original Onion Routing design, -a single hostile node could record traffic and -later compromise successive nodes in the circuit and force them -to decrypt it. Rather than using a single multiply encrypted data -structure (an <em>onion</em>) to lay each circuit, -Tor now uses an incremental or <em>telescoping</em> path-building design, -where the initiator negotiates session keys with each successive hop in -the circuit. Once these keys are deleted, subsequently compromised nodes -cannot decrypt old traffic. As a side benefit, onion replay detection -is no longer necessary, and the process of building circuits is more -reliable, since the initiator knows when a hop fails and can then try -extending to a new node. - -<div class="p"><!----></div> -<b>Separation of "protocol cleaning" from anonymity:</b> -Onion Routing originally required a separate "application -proxy" for each supported application protocol — most of which were -never written, so many applications were never supported. Tor uses the -standard and near-ubiquitous SOCKS [<a href="#socks4" name="CITEsocks4">32</a>] proxy interface, allowing -us to support most TCP-based programs without modification. Tor now -relies on the filtering features of privacy-enhancing -application-level proxies such as Privoxy [<a href="#privoxy" name="CITEprivoxy">39</a>], without trying -to duplicate those features itself. - -<div class="p"><!----></div> -<b>No mixing, padding, or traffic shaping (yet):</b> Onion -Routing originally called for batching and reordering cells as they arrived, -assumed padding between ORs, and in -later designs added padding between onion proxies (users) and -ORs [<a href="#or-ih96" name="CITEor-ih96">27</a>,<a href="#or-jsac98" name="CITEor-jsac98">41</a>]. Tradeoffs between padding protection -and cost were discussed, and <em>traffic shaping</em> algorithms were -theorized [<a href="#or-pet00" name="CITEor-pet00">49</a>] to provide good security without expensive -padding, but no concrete padding scheme was suggested. -Recent research [<a href="#econymics" name="CITEeconymics">1</a>] -and deployment experience [<a href="#freedom21-security" name="CITEfreedom21-security">4</a>] suggest that this -level of resource use is not practical or economical; and even full -link padding is still vulnerable [<a href="#defensive-dropping" name="CITEdefensive-dropping">33</a>]. Thus, -until we have a proven and convenient design for traffic shaping or -low-latency mixing that improves anonymity against a realistic -adversary, we leave these strategies out. - -<div class="p"><!----></div> -<b>Many TCP streams can share one circuit:</b> Onion Routing originally -built a separate circuit for each -application-level request, but this required -multiple public key operations for every request, and also presented -a threat to anonymity from building so many circuits; see -Section <a href="#sec:maintaining-anonymity">9</a>. Tor multiplexes multiple TCP -streams along each circuit to improve efficiency and anonymity. - -<div class="p"><!----></div> -<b>Leaky-pipe circuit topology:</b> Through in-band signaling -within the circuit, Tor initiators can direct traffic to nodes partway -down the circuit. This novel approach -allows traffic to exit the circuit from the middle — possibly -frustrating traffic shape and volume attacks based on observing the end -of the circuit. (It also allows for long-range padding if -future research shows this to be worthwhile.) - -<div class="p"><!----></div> -<b>Congestion control:</b> Earlier anonymity designs do not -address traffic bottlenecks. Unfortunately, typical approaches to -load balancing and flow control in overlay networks involve inter-node -control communication and global views of traffic. Tor's decentralized -congestion control uses end-to-end acks to maintain anonymity -while allowing nodes at the edges of the network to detect congestion -or flooding and send less data until the congestion subsides. - -<div class="p"><!----></div> -<b>Directory servers:</b> The earlier Onion Routing design -planned to flood state information through the network — an approach -that can be unreliable and complex. Tor takes a simplified view toward distributing this -information. Certain more trusted nodes act as <em>directory -servers</em>: they provide signed directories describing known -routers and their current state. Users periodically download them -via HTTP. - -<div class="p"><!----></div> -<b>Variable exit policies:</b> Tor provides a consistent mechanism -for each node to advertise a policy describing the hosts -and ports to which it will connect. These exit policies are critical -in a volunteer-based distributed infrastructure, because each operator -is comfortable with allowing different types of traffic to exit -from his node. - -<div class="p"><!----></div> -<b>End-to-end integrity checking:</b> The original Onion Routing -design did no integrity checking on data. Any node on the -circuit could change the contents of data cells as they passed by — for -example, to alter a connection request so it would connect -to a different webserver, or to `tag' encrypted traffic and look for -corresponding corrupted traffic at the network edges [<a href="#minion-design" name="CITEminion-design">15</a>]. -Tor hampers these attacks by verifying data integrity before it leaves -the network. - -<div class="p"><!----></div> - -<div class="p"><!----></div> -<b>Rendezvous points and hidden services:</b> -Tor provides an integrated mechanism for responder anonymity via -location-protected servers. Previous Onion Routing designs included -long-lived "reply onions" that could be used to build circuits -to a hidden server, but these reply onions did not provide forward -security, and became useless if any node in the path went down -or rotated its keys. In Tor, clients negotiate <i>rendezvous points</i> -to connect with hidden servers; reply onions are no longer required. - -<div class="p"><!----></div> -Unlike Freedom [<a href="#freedom2-arch" name="CITEfreedom2-arch">8</a>], Tor does not require OS kernel -patches or network stack support. This prevents us from anonymizing -non-TCP protocols, but has greatly helped our portability and -deployability. - -<div class="p"><!----></div> - -<div class="p"><!----></div> -We have implemented all of the above features, including rendezvous -points. Our source code is -available under a free license, and Tor -is not covered by the patent that affected distribution and use of -earlier versions of Onion Routing. -We have deployed a wide-area alpha network -to test the design, to get more experience with usability -and users, and to provide a research platform for experimentation. -As of this writing, the network stands at 32 nodes spread over two continents. - -<div class="p"><!----></div> -We review previous work in Section <a href="#sec:related-work">2</a>, describe -our goals and assumptions in Section <a href="#sec:assumptions">3</a>, -and then address the above list of improvements in -Sections <a href="#sec:design">4</a>, <a href="#sec:rendezvous">5</a>, and <a href="#sec:other-design">6</a>. -We summarize -in Section <a href="#sec:attacks">7</a> how our design stands up to -known attacks, and talk about our early deployment experiences in -Section <a href="#sec:in-the-wild">8</a>. We conclude with a list of open problems in -Section <a href="#sec:maintaining-anonymity">9</a> and future work for the Onion -Routing project in Section <a href="#sec:conclusion">10</a>. - -<div class="p"><!----></div> - -<div class="p"><!----></div> - <h2><a name="tth_sEc2"> -<a name="sec:related-work"> -2</a> Related work</h2> -</a> - -<div class="p"><!----></div> -Modern anonymity systems date to Chaum's <b>Mix-Net</b> -design [<a href="#chaum-mix" name="CITEchaum-mix">10</a>]. Chaum -proposed hiding the correspondence between sender and recipient by -wrapping messages in layers of public-key cryptography, and relaying them -through a path composed of "mixes." Each mix in turn -decrypts, delays, and re-orders messages before relaying them -onward. - -<div class="p"><!----></div> -Subsequent relay-based anonymity designs have diverged in two -main directions. Systems like <b>Babel</b> [<a href="#babel" name="CITEbabel">28</a>], -<b>Mixmaster</b> [<a href="#mixmaster-spec" name="CITEmixmaster-spec">36</a>], -and <b>Mixminion</b> [<a href="#minion-design" name="CITEminion-design">15</a>] have tried -to maximize anonymity at the cost of introducing comparatively large and -variable latencies. Because of this decision, these <em>high-latency</em> -networks resist strong global adversaries, -but introduce too much lag for interactive tasks like web browsing, -Internet chat, or SSH connections. - -<div class="p"><!----></div> -Tor belongs to the second category: <em>low-latency</em> designs that -try to anonymize interactive network traffic. These systems handle -a variety of bidirectional protocols. They also provide more convenient -mail delivery than the high-latency anonymous email -networks, because the remote mail server provides explicit and timely -delivery confirmation. But because these designs typically -involve many packets that must be delivered quickly, it is -difficult for them to prevent an attacker who can eavesdrop both ends of the -communication from correlating the timing and volume -of traffic entering the anonymity network with traffic leaving it [<a href="#SS03" name="CITESS03">45</a>]. -These -protocols are similarly vulnerable to an active adversary who introduces -timing patterns into traffic entering the network and looks -for correlated patterns among exiting traffic. -Although some work has been done to frustrate these attacks, most designs -protect primarily against traffic analysis rather than traffic -confirmation (see Section <a href="#subsec:threat-model">3.1</a>). - -<div class="p"><!----></div> -The simplest low-latency designs are single-hop proxies such as the -<b>Anonymizer</b> [<a href="#anonymizer" name="CITEanonymizer">3</a>]: a single trusted server strips the -data's origin before relaying it. These designs are easy to -analyze, but users must trust the anonymizing proxy. -Concentrating the traffic to this single point increases the anonymity set -(the people a given user is hiding among), but it is vulnerable if the -adversary can observe all traffic entering and leaving the proxy. - -<div class="p"><!----></div> -More complex are distributed-trust, circuit-based anonymizing systems. -In these designs, a user establishes one or more medium-term bidirectional -end-to-end circuits, and tunnels data in fixed-size cells. -Establishing circuits is computationally expensive and typically -requires public-key -cryptography, whereas relaying cells is comparatively inexpensive and -typically requires only symmetric encryption. -Because a circuit crosses several servers, and each server only knows -the adjacent servers in the circuit, no single server can link a -user to her communication partners. - -<div class="p"><!----></div> -The <b>Java Anon Proxy</b> (also known as JAP or Web MIXes) uses fixed shared -routes known as <em>cascades</em>. As with a single-hop proxy, this -approach aggregates users into larger anonymity sets, but again an -attacker only needs to observe both ends of the cascade to bridge all -the system's traffic. The Java Anon Proxy's design -calls for padding between end users and the head of the -cascade [<a href="#web-mix" name="CITEweb-mix">7</a>]. However, it is not demonstrated whether the current -implementation's padding policy improves anonymity. - -<div class="p"><!----></div> -<b>PipeNet</b> [<a href="#back01" name="CITEback01">5</a>,<a href="#pipenet" name="CITEpipenet">12</a>], another low-latency design proposed -around the same time as Onion Routing, gave -stronger anonymity but allowed a single user to shut -down the network by not sending. Systems like <b>ISDN -mixes</b> [<a href="#isdn-mixes" name="CITEisdn-mixes">38</a>] were designed for other environments with -different assumptions. - -<div class="p"><!----></div> -In P2P designs like <b>Tarzan</b> [<a href="#tarzan:ccs02" name="CITEtarzan:ccs02">24</a>] and -<b>MorphMix</b> [<a href="#morphmix:fc04" name="CITEmorphmix:fc04">43</a>], all participants both generate -traffic and relay traffic for others. These systems aim to conceal -whether a given peer originated a request -or just relayed it from another peer. While Tarzan and MorphMix use -layered encryption as above, <b>Crowds</b> [<a href="#crowds-tissec" name="CITEcrowds-tissec">42</a>] simply assumes -an adversary who cannot observe the initiator: it uses no public-key -encryption, so any node on a circuit can read users' traffic. - -<div class="p"><!----></div> -<b>Hordes</b> [<a href="#hordes-jcs" name="CITEhordes-jcs">34</a>] is based on Crowds but also uses multicast -responses to hide the initiator. <b>Herbivore</b> [<a href="#herbivore" name="CITEherbivore">25</a>] and -<b>P</b><sup><b>5</b></sup> [<a href="#p5" name="CITEp5">46</a>] go even further, requiring broadcast. -These systems are designed primarily for communication among peers, -although Herbivore users can make external connections by -requesting a peer to serve as a proxy. - -<div class="p"><!----></div> -Systems like <b>Freedom</b> and the original Onion Routing build circuits -all at once, using a layered "onion" of public-key encrypted messages, -each layer of which provides session keys and the address of the -next server in the circuit. Tor as described herein, Tarzan, MorphMix, -<b>Cebolla</b> [<a href="#cebolla" name="CITEcebolla">9</a>], and Rennhard's <b>Anonymity Network</b> [<a href="#anonnet" name="CITEanonnet">44</a>] -build circuits -in stages, extending them one hop at a time. -Section <a href="#subsubsec:constructing-a-circuit">4.2</a> describes how this -approach enables perfect forward secrecy. - -<div class="p"><!----></div> -Circuit-based designs must choose which protocol layer -to anonymize. They may intercept IP packets directly, and -relay them whole (stripping the source address) along the -circuit [<a href="#freedom2-arch" name="CITEfreedom2-arch">8</a>,<a href="#tarzan:ccs02" name="CITEtarzan:ccs02">24</a>]. Like -Tor, they may accept TCP streams and relay the data in those streams, -ignoring the breakdown of that data into TCP -segments [<a href="#morphmix:fc04" name="CITEmorphmix:fc04">43</a>,<a href="#anonnet" name="CITEanonnet">44</a>]. Finally, like Crowds, they may accept -application-level protocols such as HTTP and relay the application -requests themselves. -Making this protocol-layer decision requires a compromise between flexibility -and anonymity. For example, a system that understands HTTP -can strip -identifying information from requests, can take advantage of caching -to limit the number of requests that leave the network, and can batch -or encode requests to minimize the number of connections. -On the other hand, an IP-level anonymizer can handle nearly any protocol, -even ones unforeseen by its designers (though these systems require -kernel-level modifications to some operating systems, and so are more -complex and less portable). TCP-level anonymity networks like Tor present -a middle approach: they are application neutral (so long as the -application supports, or can be tunneled across, TCP), but by treating -application connections as data streams rather than raw TCP packets, -they avoid the inefficiencies of tunneling TCP over -TCP. - -<div class="p"><!----></div> -Distributed-trust anonymizing systems need to prevent attackers from -adding too many servers and thus compromising user paths. -Tor relies on a small set of well-known directory servers, run by -independent parties, to decide which nodes can -join. Tarzan and MorphMix allow unknown users to run servers, and use -a limited resource (like IP addresses) to prevent an attacker from -controlling too much of the network. Crowds suggests requiring -written, notarized requests from potential crowd members. - -<div class="p"><!----></div> -Anonymous communication is essential for censorship-resistant -systems like Eternity [<a href="#eternity" name="CITEeternity">2</a>], Free Haven [<a href="#freehaven-berk" name="CITEfreehaven-berk">19</a>], -Publius [<a href="#publius" name="CITEpublius">53</a>], and Tangler [<a href="#tangler" name="CITEtangler">52</a>]. Tor's rendezvous -points enable connections between mutually anonymous entities; they -are a building block for location-hidden servers, which are needed by -Eternity and Free Haven. - -<div class="p"><!----></div> - -<div class="p"><!----></div> - <h2><a name="tth_sEc3"> -<a name="sec:assumptions"> -3</a> Design goals and assumptions</h2> -</a> - -<div class="p"><!----></div> -<font size="+1"><b>Goals</b></font><br /> -Like other low-latency anonymity designs, Tor seeks to frustrate -attackers from linking communication partners, or from linking -multiple communications to or from a single user. Within this -main goal, however, several considerations have directed -Tor's evolution. - -<div class="p"><!----></div> -<b>Deployability:</b> The design must be deployed and used in the -real world. Thus it -must not be expensive to run (for example, by requiring more bandwidth -than volunteers are willing to provide); must not place a heavy -liability burden on operators (for example, by allowing attackers to -implicate onion routers in illegal activities); and must not be -difficult or expensive to implement (for example, by requiring kernel -patches, or separate proxies for every protocol). We also cannot -require non-anonymous parties (such as websites) -to run our software. (Our rendezvous point design does not meet -this goal for non-anonymous users talking to hidden servers, -however; see Section <a href="#sec:rendezvous">5</a>.) - -<div class="p"><!----></div> -<b>Usability:</b> A hard-to-use system has fewer users — and because -anonymity systems hide users among users, a system with fewer users -provides less anonymity. Usability is thus not only a convenience: -it is a security requirement [<a href="#econymics" name="CITEeconymics">1</a>,<a href="#back01" name="CITEback01">5</a>]. Tor should -therefore not -require modifying familiar applications; should not introduce prohibitive -delays; -and should require as few configuration decisions -as possible. Finally, Tor should be easily implementable on all common -platforms; we cannot require users to change their operating system -to be anonymous. (Tor currently runs on Win32, Linux, -Solaris, BSD-style Unix, MacOS X, and probably others.) - -<div class="p"><!----></div> -<b>Flexibility:</b> The protocol must be flexible and well-specified, -so Tor can serve as a test-bed for future research. -Many of the open problems in low-latency anonymity -networks, such as generating dummy traffic or preventing Sybil -attacks [<a href="#sybil" name="CITEsybil">22</a>], may be solvable independently from the issues -solved by -Tor. Hopefully future systems will not need to reinvent Tor's design. - -<div class="p"><!----></div> -<b>Simple design:</b> The protocol's design and security -parameters must be well-understood. Additional features impose implementation -and complexity costs; adding unproven techniques to the design threatens -deployability, readability, and ease of security analysis. Tor aims to -deploy a simple and stable system that integrates the best accepted -approaches to protecting anonymity.<br /> - -<div class="p"><!----></div> -<font size="+1"><b>Non-goals</b></font><a name="subsec:non-goals"> -</a><br /> -In favoring simple, deployable designs, we have explicitly deferred -several possible goals, either because they are solved elsewhere, or because -they are not yet solved. - -<div class="p"><!----></div> -<b>Not peer-to-peer:</b> Tarzan and MorphMix aim to scale to completely -decentralized peer-to-peer environments with thousands of short-lived -servers, many of which may be controlled by an adversary. This approach -is appealing, but still has many open -problems [<a href="#tarzan:ccs02" name="CITEtarzan:ccs02">24</a>,<a href="#morphmix:fc04" name="CITEmorphmix:fc04">43</a>]. - -<div class="p"><!----></div> -<b>Not secure against end-to-end attacks:</b> Tor does not claim -to completely solve end-to-end timing or intersection -attacks. Some approaches, such as having users run their own onion routers, -may help; -see Section <a href="#sec:maintaining-anonymity">9</a> for more discussion. - -<div class="p"><!----></div> -<b>No protocol normalization:</b> Tor does not provide <em>protocol -normalization</em> like Privoxy or the Anonymizer. If senders want anonymity from -responders while using complex and variable -protocols like HTTP, Tor must be layered with a filtering proxy such -as Privoxy to hide differences between clients, and expunge protocol -features that leak identity. -Note that by this separation Tor can also provide services that -are anonymous to the network yet authenticated to the responder, like -SSH. Similarly, Tor does not integrate -tunneling for non-stream-based protocols like UDP; this must be -provided by an external service if appropriate. - -<div class="p"><!----></div> -<b>Not steganographic:</b> Tor does not try to conceal who is connected -to the network. - -<div class="p"><!----></div> - <h3><a name="tth_sEc3.1"> -<a name="subsec:threat-model"> -3.1</a> Threat Model</h3> -</a> - -<div class="p"><!----></div> -A global passive adversary is the most commonly assumed threat when -analyzing theoretical anonymity designs. But like all practical -low-latency systems, Tor does not protect against such a strong -adversary. Instead, we assume an adversary who can observe some fraction -of network traffic; who can generate, modify, delete, or delay -traffic; who can operate onion routers of his own; and who can -compromise some fraction of the onion routers. - -<div class="p"><!----></div> -In low-latency anonymity systems that use layered encryption, the -adversary's typical goal is to observe both the initiator and the -responder. By observing both ends, passive attackers can confirm a -suspicion that Alice is -talking to Bob if the timing and volume patterns of the traffic on the -connection are distinct enough; active attackers can induce timing -signatures on the traffic to force distinct patterns. Rather -than focusing on these <em>traffic confirmation</em> attacks, -we aim to prevent <em>traffic -analysis</em> attacks, where the adversary uses traffic patterns to learn -which points in the network he should attack. - -<div class="p"><!----></div> -Our adversary might try to link an initiator Alice with her -communication partners, or try to build a profile of Alice's -behavior. He might mount passive attacks by observing the network edges -and correlating traffic entering and leaving the network — by -relationships in packet timing, volume, or externally visible -user-selected -options. The adversary can also mount active attacks by compromising -routers or keys; by replaying traffic; by selectively denying service -to trustworthy routers to move users to -compromised routers, or denying service to users to see if traffic -elsewhere in the -network stops; or by introducing patterns into traffic that can later be -detected. The adversary might subvert the directory servers to give users -differing views of network state. Additionally, he can try to decrease -the network's reliability by attacking nodes or by performing antisocial -activities from reliable nodes and trying to get them taken down — making -the network unreliable flushes users to other less anonymous -systems, where they may be easier to attack. We summarize -in Section <a href="#sec:attacks">7</a> how well the Tor design defends against -each of these attacks. - -<div class="p"><!----></div> - -<div class="p"><!----></div> - <h2><a name="tth_sEc4"> -<a name="sec:design"> -4</a> The Tor Design</h2> -</a> - -<div class="p"><!----></div> -The Tor network is an overlay network; each onion router (OR) -runs as a normal -user-level process without any special privileges. -Each onion router maintains a TLS [<a href="#TLS" name="CITETLS">17</a>] -connection to every other onion router. -Each user -runs local software called an onion proxy (OP) to fetch directories, -establish circuits across the network, -and handle connections from user applications. These onion proxies accept -TCP streams and multiplex them across the circuits. The onion -router on the other side -of the circuit connects to the requested destinations -and relays data. - -<div class="p"><!----></div> -Each onion router maintains a long-term identity key and a short-term -onion key. The identity -key is used to sign TLS certificates, to sign the OR's <em>router -descriptor</em> (a summary of its keys, address, bandwidth, exit policy, -and so on), and (by directory servers) to sign directories. The onion key is used to decrypt requests -from users to set up a circuit and negotiate ephemeral keys. -The TLS protocol also establishes a short-term link key when communicating -between ORs. Short-term keys are rotated periodically and -independently, to limit the impact of key compromise. - -<div class="p"><!----></div> -Section <a href="#subsec:cells">4.1</a> presents the fixed-size -<em>cells</em> that are the unit of communication in Tor. We describe -in Section <a href="#subsec:circuits">4.2</a> how circuits are -built, extended, truncated, and destroyed. Section <a href="#subsec:tcp">4.3</a> -describes how TCP streams are routed through the network. We address -integrity checking in Section <a href="#subsec:integrity-checking">4.4</a>, -and resource limiting in Section <a href="#subsec:rate-limit">4.5</a>. -Finally, -Section <a href="#subsec:congestion">4.6</a> talks about congestion control and -fairness issues. - -<div class="p"><!----></div> - <h3><a name="tth_sEc4.1"> -<a name="subsec:cells"> -4.1</a> Cells</h3> -</a> - -<div class="p"><!----></div> -Onion routers communicate with one another, and with users' OPs, via -TLS connections with ephemeral keys. Using TLS conceals the data on -the connection with perfect forward secrecy, and prevents an attacker -from modifying data on the wire or impersonating an OR. - -<div class="p"><!----></div> -Traffic passes along these connections in fixed-size cells. Each cell -is 512 bytes, and consists of a header and a payload. The header includes a circuit -identifier (circID) that specifies which circuit the cell refers to -(many circuits can be multiplexed over the single TLS connection), and -a command to describe what to do with the cell's payload. (Circuit -identifiers are connection-specific: each circuit has a different -circID on each OP/OR or OR/OR connection it traverses.) -Based on their command, cells are either <em>control</em> cells, which are -always interpreted by the node that receives them, or <em>relay</em> cells, -which carry end-to-end stream data. The control cell commands are: -<em>padding</em> (currently used for keepalive, but also usable for link -padding); <em>create</em> or <em>created</em> (used to set up a new circuit); -and <em>destroy</em> (to tear down a circuit). - -<div class="p"><!----></div> -Relay cells have an additional header (the relay header) at the front -of the payload, containing a streamID (stream identifier: many streams can -be multiplexed over a circuit); an end-to-end checksum for integrity -checking; the length of the relay payload; and a relay command. -The entire contents of the relay header and the relay cell payload -are encrypted or decrypted together as the relay cell moves along the -circuit, using the 128-bit AES cipher in counter mode to generate a -cipher stream. The relay commands are: <em>relay -data</em> (for data flowing down the stream), <em>relay begin</em> (to open a -stream), <em>relay end</em> (to close a stream cleanly), <em>relay -teardown</em> (to close a broken stream), <em>relay connected</em> -(to notify the OP that a relay begin has succeeded), <em>relay -extend</em> and <em>relay extended</em> (to extend the circuit by a hop, -and to acknowledge), <em>relay truncate</em> and <em>relay truncated</em> -(to tear down only part of the circuit, and to acknowledge), <em>relay -sendme</em> (used for congestion control), and <em>relay drop</em> (used to -implement long-range dummies). -We give a visual overview of cell structure plus the details of relay -cell structure, and then describe each of these cell types and commands -in more detail below. - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> -<a name="tth_fIg1"> -</a> <center><img src="cell-struct.png" alt="cell-struct.png" /> -</center> -<div class="p"><!----></div> - <h3><a name="tth_sEc4.2"> -<a name="subsec:circuits"> -4.2</a> Circuits and streams</h3> -</a> - -<div class="p"><!----></div> -Onion Routing originally built one circuit for each -TCP stream. Because building a circuit can take several tenths of a -second (due to public-key cryptography and network latency), -this design imposed high costs on applications like web browsing that -open many TCP streams. - -<div class="p"><!----></div> -In Tor, each circuit can be shared by many TCP streams. To avoid -delays, users construct circuits preemptively. To limit linkability -among their streams, users' OPs build a new circuit -periodically if the previous ones have been used, -and expire old used circuits that no longer have any open streams. -OPs consider rotating to a new circuit once a minute: thus -even heavy users spend negligible time -building circuits, but a limited number of requests can be linked -to each other through a given exit node. Also, because circuits are built -in the background, OPs can recover from failed circuit creation -without harming user experience.<br /> - -<div class="p"><!----></div> - -<div class="p"><!----></div> -<a name="tth_fIg1"> -</a> <center><img src="interaction.png" alt="interaction.png" /> - -<center>Figure 1: Alice builds a two-hop circuit and begins fetching a web page.</center> -<a name="fig:interaction"> -</a> -</center> -<div class="p"><!----></div> -<a name="subsubsec:constructing-a-circuit"></a> -<font size="+1"><b>Constructing a circuit</b></font> -<br /> -A user's OP constructs circuits incrementally, negotiating a -symmetric key with each OR on the circuit, one hop at a time. To begin -creating a new circuit, the OP (call her Alice) sends a -<em>create</em> cell to the first node in her chosen path (call him Bob). -(She chooses a new -circID C<sub>AB</sub> not currently used on the connection from her to Bob.) -The <em>create</em> cell's -payload contains the first half of the Diffie-Hellman handshake -(g<sup>x</sup>), encrypted to the onion key of Bob. Bob -responds with a <em>created</em> cell containing g<sup>y</sup> -along with a hash of the negotiated key K=g<sup>xy</sup>. - -<div class="p"><!----></div> -Once the circuit has been established, Alice and Bob can send one -another relay cells encrypted with the negotiated -key.<a href="#tthFtNtAAB" name="tthFrefAAB"><sup>1</sup></a> More detail is given in -the next section. - -<div class="p"><!----></div> -To extend the circuit further, Alice sends a <em>relay extend</em> cell -to Bob, specifying the address of the next OR (call her Carol), and -an encrypted g<sup>x<sub>2</sub></sup> for her. Bob copies the half-handshake into a -<em>create</em> cell, and passes it to Carol to extend the circuit. -(Bob chooses a new circID C<sub>BC</sub> not currently used on the connection -between him and Carol. Alice never needs to know this circID; only Bob -associates C<sub>AB</sub> on his connection with Alice to C<sub>BC</sub> on -his connection with Carol.) -When Carol responds with a <em>created</em> cell, Bob wraps the payload -into a <em>relay extended</em> cell and passes it back to Alice. Now -the circuit is extended to Carol, and Alice and Carol share a common key -K<sub>2</sub> = g<sup>x<sub>2</sub> y<sub>2</sub></sup>. - -<div class="p"><!----></div> -To extend the circuit to a third node or beyond, Alice -proceeds as above, always telling the last node in the circuit to -extend one hop further. - -<div class="p"><!----></div> -This circuit-level handshake protocol achieves unilateral entity -authentication (Alice knows she's handshaking with the OR, but -the OR doesn't care who is opening the circuit — Alice uses no public key -and remains anonymous) and unilateral key authentication -(Alice and the OR agree on a key, and Alice knows only the OR learns -it). It also achieves forward -secrecy and key freshness. More formally, the protocol is as follows -(where E<sub>PK<sub>Bob</sub></sub>(·) is encryption with Bob's public key, -H is a secure hash function, and <font face="symbol">|</font -> is concatenation): - -<div class="p"><!----></div> -<a name="tth_tAb1"> -</a> -<table> -<tr><td align="right">Alice </td><td align="center">-> </td><td align="center">Bob </td><td>: E<sub>PK<sub>Bob</sub></sub>(g<sup>x</sup>) </td></tr> -<tr><td align="right">Bob </td><td align="center">-> </td><td align="center">Alice </td><td>: g<sup>y</sup>, H(K <font face="symbol">|</font -> "<span class="roman">handshake</span>") -</td></tr></table> - - -<div class="p"><!----></div> - In the second step, Bob proves that it was he who received g<sup>x</sup>, -and who chose y. We use PK encryption in the first step -(rather than, say, using the first two steps of STS, which has a -signature in the second step) because a single cell is too small to -hold both a public key and a signature. Preliminary analysis with the -NRL protocol analyzer [<a href="#meadows96" name="CITEmeadows96">35</a>] shows this protocol to be -secure (including perfect forward secrecy) under the -traditional Dolev-Yao model.<br /> - -<div class="p"><!----></div> -<font size="+1"><b>Relay cells</b></font><br /> -Once Alice has established the circuit (so she shares keys with each -OR on the circuit), she can send relay cells. -Upon receiving a relay -cell, an OR looks up the corresponding circuit, and decrypts the relay -header and payload with the session key for that circuit. -If the cell is headed away from Alice the OR then checks whether the -decrypted cell has a valid digest (as an optimization, the first -two bytes of the integrity check are zero, so in most cases we can avoid -computing the hash). -If valid, it accepts the relay cell and processes it as described -below. Otherwise, -the OR looks up the circID and OR for the -next step in the circuit, replaces the circID as appropriate, and -sends the decrypted relay cell to the next OR. (If the OR at the end -of the circuit receives an unrecognized relay cell, an error has -occurred, and the circuit is torn down.) - -<div class="p"><!----></div> -OPs treat incoming relay cells similarly: they iteratively unwrap the -relay header and payload with the session keys shared with each -OR on the circuit, from the closest to farthest. -If at any stage the digest is valid, the cell must have -originated at the OR whose encryption has just been removed. - -<div class="p"><!----></div> -To construct a relay cell addressed to a given OR, Alice assigns the -digest, and then iteratively -encrypts the cell payload (that is, the relay header and payload) with -the symmetric key of each hop up to that OR. Because the digest is -encrypted to a different value at each step, only at the targeted OR -will it have a meaningful value.<a href="#tthFtNtAAC" name="tthFrefAAC"><sup>2</sup></a> -This <em>leaky pipe</em> circuit topology -allows Alice's streams to exit at different ORs on a single circuit. -Alice may choose different exit points because of their exit policies, -or to keep the ORs from knowing that two streams -originate from the same person. - -<div class="p"><!----></div> -When an OR later replies to Alice with a relay cell, it -encrypts the cell's relay header and payload with the single key it -shares with Alice, and sends the cell back toward Alice along the -circuit. Subsequent ORs add further layers of encryption as they -relay the cell back to Alice. - -<div class="p"><!----></div> -To tear down a circuit, Alice sends a <em>destroy</em> control -cell. Each OR in the circuit receives the <em>destroy</em> cell, closes -all streams on that circuit, and passes a new <em>destroy</em> cell -forward. But just as circuits are built incrementally, they can also -be torn down incrementally: Alice can send a <em>relay -truncate</em> cell to a single OR on a circuit. That OR then sends a -<em>destroy</em> cell forward, and acknowledges with a -<em>relay truncated</em> cell. Alice can then extend the circuit to -different nodes, without signaling to the intermediate nodes (or -a limited observer) that she has changed her circuit. -Similarly, if a node on the circuit goes down, the adjacent -node can send a <em>relay truncated</em> cell back to Alice. Thus the -"break a node and see which circuits go down" -attack [<a href="#freedom21-security" name="CITEfreedom21-security">4</a>] is weakened. - -<div class="p"><!----></div> - <h3><a name="tth_sEc4.3"> -<a name="subsec:tcp"> -4.3</a> Opening and closing streams</h3> -</a> - -<div class="p"><!----></div> -When Alice's application wants a TCP connection to a given -address and port, it asks the OP (via SOCKS) to make the -connection. The OP chooses the newest open circuit (or creates one if -needed), and chooses a suitable OR on that circuit to be the -exit node (usually the last node, but maybe others due to exit policy -conflicts; see Section <a href="#subsec:exitpolicies">6.2</a>.) The OP then opens -the stream by sending a <em>relay begin</em> cell to the exit node, -using a new random streamID. Once the -exit node connects to the remote host, it responds -with a <em>relay connected</em> cell. Upon receipt, the OP sends a -SOCKS reply to notify the application of its success. The OP -now accepts data from the application's TCP stream, packaging it into -<em>relay data</em> cells and sending those cells along the circuit to -the chosen OR. - -<div class="p"><!----></div> -There's a catch to using SOCKS, however — some applications pass the -alphanumeric hostname to the Tor client, while others resolve it into -an IP address first and then pass the IP address to the Tor client. If -the application does DNS resolution first, Alice thereby reveals her -destination to the remote DNS server, rather than sending the hostname -through the Tor network to be resolved at the far end. Common applications -like Mozilla and SSH have this flaw. - -<div class="p"><!----></div> -With Mozilla, the flaw is easy to address: the filtering HTTP -proxy called Privoxy gives a hostname to the Tor client, so Alice's -computer never does DNS resolution. -But a portable general solution, such as is needed for -SSH, is -an open problem. Modifying or replacing the local nameserver -can be invasive, brittle, and unportable. Forcing the resolver -library to prefer TCP rather than UDP is hard, and also has -portability problems. Dynamically intercepting system calls to the -resolver library seems a promising direction. We could also provide -a tool similar to <em>dig</em> to perform a private lookup through the -Tor network. Currently, we encourage the use of privacy-aware proxies -like Privoxy wherever possible. - -<div class="p"><!----></div> -Closing a Tor stream is analogous to closing a TCP stream: it uses a -two-step handshake for normal operation, or a one-step handshake for -errors. If the stream closes abnormally, the adjacent node simply sends a -<em>relay teardown</em> cell. If the stream closes normally, the node sends -a <em>relay end</em> cell down the circuit, and the other side responds with -its own <em>relay end</em> cell. Because -all relay cells use layered encryption, only the destination OR knows -that a given relay cell is a request to close a stream. This two-step -handshake allows Tor to support TCP-based applications that use half-closed -connections. - -<div class="p"><!----></div> - <h3><a name="tth_sEc4.4"> -<a name="subsec:integrity-checking"> -4.4</a> Integrity checking on streams</h3> -</a> - -<div class="p"><!----></div> -Because the old Onion Routing design used a stream cipher without integrity -checking, traffic was -vulnerable to a malleability attack: though the attacker could not -decrypt cells, any changes to encrypted data -would create corresponding changes to the data leaving the network. -This weakness allowed an adversary who could guess the encrypted content -to change a padding cell to a destroy -cell; change the destination address in a <em>relay begin</em> cell to the -adversary's webserver; or change an FTP command from -<tt>dir</tt> to <tt>rm *</tt>. (Even an external -adversary could do this, because the link encryption similarly used a -stream cipher.) - -<div class="p"><!----></div> -Because Tor uses TLS on its links, external adversaries cannot modify -data. Addressing the insider malleability attack, however, is -more complex. - -<div class="p"><!----></div> -We could do integrity checking of the relay cells at each hop, either -by including hashes or by using an authenticating cipher mode like -EAX [<a href="#eax" name="CITEeax">6</a>], but there are some problems. First, these approaches -impose a message-expansion overhead at each hop, and so we would have to -either leak the path length or waste bytes by padding to a maximum -path length. Second, these solutions can only verify traffic coming -from Alice: ORs would not be able to produce suitable hashes for -the intermediate hops, since the ORs on a circuit do not know the -other ORs' session keys. Third, we have already accepted that our design -is vulnerable to end-to-end timing attacks; so tagging attacks performed -within the circuit provide no additional information to the attacker. - -<div class="p"><!----></div> -Thus, we check integrity only at the edges of each stream. (Remember that -in our leaky-pipe circuit topology, a stream's edge could be any hop -in the circuit.) When Alice -negotiates a key with a new hop, they each initialize a SHA-1 -digest with a derivative of that key, -thus beginning with randomness that only the two of them know. -Then they each incrementally add to the SHA-1 digest the contents of -all relay cells they create, and include with each relay cell the -first four bytes of the current digest. Each also keeps a SHA-1 -digest of data received, to verify that the received hashes are correct. - -<div class="p"><!----></div> -To be sure of removing or modifying a cell, the attacker must be able -to deduce the current digest state (which depends on all -traffic between Alice and Bob, starting with their negotiated key). -Attacks on SHA-1 where the adversary can incrementally add to a hash -to produce a new valid hash don't work, because all hashes are -end-to-end encrypted across the circuit. The computational overhead -of computing the digests is minimal compared to doing the AES -encryption performed at each hop of the circuit. We use only four -bytes per cell to minimize overhead; the chance that an adversary will -correctly guess a valid hash -is -acceptably low, given that the OP or OR tear down the circuit if they -receive a bad hash. - -<div class="p"><!----></div> - <h3><a name="tth_sEc4.5"> -<a name="subsec:rate-limit"> -4.5</a> Rate limiting and fairness</h3> -</a> - -<div class="p"><!----></div> -Volunteers are more willing to run services that can limit -their bandwidth usage. To accommodate them, Tor servers use a -token bucket approach [<a href="#tannenbaum96" name="CITEtannenbaum96">50</a>] to -enforce a long-term average rate of incoming bytes, while still -permitting short-term bursts above the allowed bandwidth. - -<div class="p"><!----></div> - -<div class="p"><!----></div> -Because the Tor protocol outputs about the same number of bytes as it -takes in, it is sufficient in practice to limit only incoming bytes. -With TCP streams, however, the correspondence is not one-to-one: -relaying a single incoming byte can require an entire 512-byte cell. -(We can't just wait for more bytes, because the local application may -be awaiting a reply.) Therefore, we treat this case as if the entire -cell size had been read, regardless of the cell's fullness. - -<div class="p"><!----></div> -Further, inspired by Rennhard et al's design in [<a href="#anonnet" name="CITEanonnet">44</a>], a -circuit's edges can heuristically distinguish interactive streams from bulk -streams by comparing the frequency with which they supply cells. We can -provide good latency for interactive streams by giving them preferential -service, while still giving good overall throughput to the bulk -streams. Such preferential treatment presents a possible end-to-end -attack, but an adversary observing both -ends of the stream can already learn this information through timing -attacks. - -<div class="p"><!----></div> - <h3><a name="tth_sEc4.6"> -<a name="subsec:congestion"> -4.6</a> Congestion control</h3> -</a> - -<div class="p"><!----></div> -Even with bandwidth rate limiting, we still need to worry about -congestion, either accidental or intentional. If enough users choose the -same OR-to-OR connection for their circuits, that connection can become -saturated. For example, an attacker could send a large file -through the Tor network to a webserver he runs, and then -refuse to read any of the bytes at the webserver end of the -circuit. Without some congestion control mechanism, these bottlenecks -can propagate back through the entire network. We don't need to -reimplement full TCP windows (with sequence numbers, -the ability to drop cells when we're full and retransmit later, and so -on), -because TCP already guarantees in-order delivery of each -cell. -We describe our response below. - -<div class="p"><!----></div> -<b>Circuit-level throttling:</b> -To control a circuit's bandwidth usage, each OR keeps track of two -windows. The <em>packaging window</em> tracks how many relay data cells the OR is -allowed to package (from incoming TCP streams) for transmission back to the OP, -and the <em>delivery window</em> tracks how many relay data cells it is willing -to deliver to TCP streams outside the network. Each window is initialized -(say, to 1000 data cells). When a data cell is packaged or delivered, -the appropriate window is decremented. When an OR has received enough -data cells (currently 100), it sends a <em>relay sendme</em> cell towards the OP, -with streamID zero. When an OR receives a <em>relay sendme</em> cell with -streamID zero, it increments its packaging window. Either of these cells -increments the corresponding window by 100. If the packaging window -reaches 0, the OR stops reading from TCP connections for all streams -on the corresponding circuit, and sends no more relay data cells until -receiving a <em>relay sendme</em> cell. - -<div class="p"><!----></div> -The OP behaves identically, except that it must track a packaging window -and a delivery window for every OR in the circuit. If a packaging window -reaches 0, it stops reading from streams destined for that OR. - -<div class="p"><!----></div> -<b>Stream-level throttling</b>: -The stream-level congestion control mechanism is similar to the -circuit-level mechanism. ORs and OPs use <em>relay sendme</em> cells -to implement end-to-end flow control for individual streams across -circuits. Each stream begins with a packaging window (currently 500 cells), -and increments the window by a fixed value (50) upon receiving a <em>relay -sendme</em> cell. Rather than always returning a <em>relay sendme</em> cell as soon -as enough cells have arrived, the stream-level congestion control also -has to check whether data has been successfully flushed onto the TCP -stream; it sends the <em>relay sendme</em> cell only when the number of bytes pending -to be flushed is under some threshold (currently 10 cells' worth). - -<div class="p"><!----></div> - -<div class="p"><!----></div> -These arbitrarily chosen parameters seem to give tolerable throughput -and delay; see Section <a href="#sec:in-the-wild">8</a>. - -<div class="p"><!----></div> - <h2><a name="tth_sEc5"> -<a name="sec:rendezvous"> -5</a> Rendezvous Points and hidden services</h2> -</a> - -<div class="p"><!----></div> -Rendezvous points are a building block for <em>location-hidden -services</em> (also known as <em>responder anonymity</em>) in the Tor -network. Location-hidden services allow Bob to offer a TCP -service, such as a webserver, without revealing his IP address. -This type of anonymity protects against distributed DoS attacks: -attackers are forced to attack the onion routing network -because they do not know Bob's IP address. - -<div class="p"><!----></div> -Our design for location-hidden servers has the following goals. -<b>Access-control:</b> Bob needs a way to filter incoming requests, -so an attacker cannot flood Bob simply by making many connections to him. -<b>Robustness:</b> Bob should be able to maintain a long-term pseudonymous -identity even in the presence of router failure. Bob's service must -not be tied to a single OR, and Bob must be able to migrate his service -across ORs. <b>Smear-resistance:</b> -A social attacker -should not be able to "frame" a rendezvous router by -offering an illegal or disreputable location-hidden service and -making observers believe the router created that service. -<b>Application-transparency:</b> Although we require users -to run special software to access location-hidden servers, we must not -require them to modify their applications. - -<div class="p"><!----></div> -We provide location-hiding for Bob by allowing him to advertise -several onion routers (his <em>introduction points</em>) as contact -points. He may do this on any robust efficient -key-value lookup system with authenticated updates, such as a -distributed hash table (DHT) like CFS [<a href="#cfs:sosp01" name="CITEcfs:sosp01">11</a>].<a href="#tthFtNtAAD" name="tthFrefAAD"><sup>3</sup></a> Alice, the client, chooses an OR as her -<em>rendezvous point</em>. She connects to one of Bob's introduction -points, informs him of her rendezvous point, and then waits for him -to connect to the rendezvous point. This extra level of indirection -helps Bob's introduction points avoid problems associated with serving -unpopular files directly (for example, if Bob serves -material that the introduction point's community finds objectionable, -or if Bob's service tends to get attacked by network vandals). -The extra level of indirection also allows Bob to respond to some requests -and ignore others. - -<div class="p"><!----></div> - <h3><a name="tth_sEc5.1"> -5.1</a> Rendezvous points in Tor</h3> - -<div class="p"><!----></div> -The following steps are -performed on behalf of Alice and Bob by their local OPs; -application integration is described more fully below. - -<div class="p"><!----></div> - -<dl compact="compact"> - - <dt><b></b></dt> - <dd><li>Bob generates a long-term public key pair to identify his service.</dd> - <dt><b></b></dt> - <dd><li>Bob chooses some introduction points, and advertises them on - the lookup service, signing the advertisement with his public key. He - can add more later.</dd> - <dt><b></b></dt> - <dd><li>Bob builds a circuit to each of his introduction points, and tells - them to wait for requests.</dd> - <dt><b></b></dt> - <dd><li>Alice learns about Bob's service out of band (perhaps Bob told her, - or she found it on a website). She retrieves the details of Bob's - service from the lookup service. If Alice wants to access Bob's - service anonymously, she must connect to the lookup service via Tor.</dd> - <dt><b></b></dt> - <dd><li>Alice chooses an OR as the rendezvous point (RP) for her connection to - Bob's service. She builds a circuit to the RP, and gives it a - randomly chosen "rendezvous cookie" to recognize Bob.</dd> - <dt><b></b></dt> - <dd><li>Alice opens an anonymous stream to one of Bob's introduction - points, and gives it a message (encrypted with Bob's public key) - telling it about herself, - her RP and rendezvous cookie, and the - start of a DH - handshake. The introduction point sends the message to Bob.</dd> - <dt><b></b></dt> - <dd><li>If Bob wants to talk to Alice, he builds a circuit to Alice's - RP and sends the rendezvous cookie, the second half of the DH - handshake, and a hash of the session - key they now share. By the same argument as in - Section <a href="#subsubsec:constructing-a-circuit">4.2</a>, Alice knows she - shares the key only with Bob.</dd> - <dt><b></b></dt> - <dd><li>The RP connects Alice's circuit to Bob's. Note that RP can't - recognize Alice, Bob, or the data they transmit.</dd> - <dt><b></b></dt> - <dd><li>Alice sends a <em>relay begin</em> cell along the circuit. It - arrives at Bob's OP, which connects to Bob's - webserver.</dd> - <dt><b></b></dt> - <dd><li>An anonymous stream has been established, and Alice and Bob - communicate as normal. -</dd> -</dl> - -<div class="p"><!----></div> -When establishing an introduction point, Bob provides the onion router -with the public key identifying his service. Bob signs his -messages, so others cannot usurp his introduction point -in the future. He uses the same public key to establish the other -introduction points for his service, and periodically refreshes his -entry in the lookup service. - -<div class="p"><!----></div> -The message that Alice gives -the introduction point includes a hash of Bob's public key and an optional initial authorization token (the -introduction point can do prescreening, for example to block replays). Her -message to Bob may include an end-to-end authorization token so Bob -can choose whether to respond. -The authorization tokens can be used to provide selective access: -important users can get uninterrupted access. -During normal situations, Bob's service might simply be offered -directly from mirrors, while Bob gives out tokens to high-priority users. If -the mirrors are knocked down, -those users can switch to accessing Bob's service via -the Tor rendezvous system. - -<div class="p"><!----></div> -Bob's introduction points are themselves subject to DoS — he must -open many introduction points or risk such an attack. -He can provide selected users with a current list or future schedule of -unadvertised introduction points; -this is most practical -if there is a stable and large group of introduction points -available. Bob could also give secret public keys -for consulting the lookup service. All of these approaches -limit exposure even when -some selected users collude in the DoS. - -<div class="p"><!----></div> - <h3><a name="tth_sEc5.2"> -5.2</a> Integration with user applications</h3> - -<div class="p"><!----></div> -Bob configures his onion proxy to know the local IP address and port of his -service, a strategy for authorizing clients, and his public key. The onion -proxy anonymously publishes a signed statement of Bob's -public key, an expiration time, and -the current introduction points for his service onto the lookup service, -indexed -by the hash of his public key. Bob's webserver is unmodified, -and doesn't even know that it's hidden behind the Tor network. - -<div class="p"><!----></div> -Alice's applications also work unchanged — her client interface -remains a SOCKS proxy. We encode all of the necessary information -into the fully qualified domain name (FQDN) Alice uses when establishing her -connection. Location-hidden services use a virtual top level domain -called <tt>.onion</tt>: thus hostnames take the form <tt>x.y.onion</tt> where -<tt>x</tt> is the authorization cookie and <tt>y</tt> encodes the hash of -the public key. Alice's onion proxy -examines addresses; if they're destined for a hidden server, it decodes -the key and starts the rendezvous as described above. - -<div class="p"><!----></div> - <h3><a name="tth_sEc5.3"> -5.3</a> Previous rendezvous work</h3> - -<div class="p"><!----></div> -Rendezvous points in low-latency anonymity systems were first -described for use in ISDN telephony [<a href="#jerichow-jsac98" name="CITEjerichow-jsac98">30</a>,<a href="#isdn-mixes" name="CITEisdn-mixes">38</a>]. -Later low-latency designs used rendezvous points for hiding location -of mobile phones and low-power location -trackers [<a href="#federrath-ih96" name="CITEfederrath-ih96">23</a>,<a href="#reed-protocols97" name="CITEreed-protocols97">40</a>]. Rendezvous for -anonymizing low-latency -Internet connections was suggested in early Onion Routing -work [<a href="#or-ih96" name="CITEor-ih96">27</a>], but the first published design was by Ian -Goldberg [<a href="#ian-thesis" name="CITEian-thesis">26</a>]. His design differs from -ours in three ways. First, Goldberg suggests that Alice should manually -hunt down a current location of the service via Gnutella; our approach -makes lookup transparent to the user, as well as faster and more robust. -Second, in Tor the client and server negotiate session keys -with Diffie-Hellman, so plaintext is not exposed even at the rendezvous -point. Third, -our design minimizes the exposure from running the -service, to encourage volunteers to offer introduction and rendezvous -services. Tor's introduction points do not output any bytes to the -clients; the rendezvous points don't know the client or the server, -and can't read the data being transmitted. The indirection scheme is -also designed to include authentication/authorization — if Alice doesn't -include the right cookie with her request for service, Bob need not even -acknowledge his existence. - -<div class="p"><!----></div> - <h2><a name="tth_sEc6"> -<a name="sec:other-design"> -6</a> Other design decisions</h2> -</a> - -<div class="p"><!----></div> - <h3><a name="tth_sEc6.1"> -<a name="subsec:dos"> -6.1</a> Denial of service</h3> -</a> - -<div class="p"><!----></div> -Providing Tor as a public service creates many opportunities for -denial-of-service attacks against the network. While -flow control and rate limiting (discussed in -Section <a href="#subsec:congestion">4.6</a>) prevent users from consuming more -bandwidth than routers are willing to provide, opportunities remain for -users to -consume more network resources than their fair share, or to render the -network unusable for others. - -<div class="p"><!----></div> -First of all, there are several CPU-consuming denial-of-service -attacks wherein an attacker can force an OR to perform expensive -cryptographic operations. For example, an attacker can -fake the start of a TLS handshake, forcing the OR to carry out its -(comparatively expensive) half of the handshake at no real computational -cost to the attacker. - -<div class="p"><!----></div> -We have not yet implemented any defenses for these attacks, but several -approaches are possible. First, ORs can -require clients to solve a puzzle [<a href="#puzzles-tls" name="CITEpuzzles-tls">16</a>] while beginning new -TLS handshakes or accepting <em>create</em> cells. So long as these -tokens are easy to verify and computationally expensive to produce, this -approach limits the attack multiplier. Additionally, ORs can limit -the rate at which they accept <em>create</em> cells and TLS connections, -so that -the computational work of processing them does not drown out the -symmetric cryptography operations that keep cells -flowing. This rate limiting could, however, allow an attacker -to slow down other users when they build new circuits. - -<div class="p"><!----></div> - -<div class="p"><!----></div> -Adversaries can also attack the Tor network's hosts and network -links. Disrupting a single circuit or link breaks all streams passing -along that part of the circuit. Users similarly lose service -when a router crashes or its operator restarts it. The current -Tor design treats such attacks as intermittent network failures, and -depends on users and applications to respond or recover as appropriate. A -future design could use an end-to-end TCP-like acknowledgment protocol, -so no streams are lost unless the entry or exit point is -disrupted. This solution would require more buffering at the network -edges, however, and the performance and anonymity implications from this -extra complexity still require investigation. - -<div class="p"><!----></div> - <h3><a name="tth_sEc6.2"> -<a name="subsec:exitpolicies"> -6.2</a> Exit policies and abuse</h3> -</a> - -<div class="p"><!----></div> - -<div class="p"><!----></div> -Exit abuse is a serious barrier to wide-scale Tor deployment. Anonymity -presents would-be vandals and abusers with an opportunity to hide -the origins of their activities. Attackers can harm the Tor network by -implicating exit servers for their abuse. Also, applications that commonly -use IP-based authentication (such as institutional mail or webservers) -can be fooled by the fact that anonymous connections appear to originate -at the exit OR. - -<div class="p"><!----></div> -We stress that Tor does not enable any new class of abuse. Spammers -and other attackers already have access to thousands of misconfigured -systems worldwide, and the Tor network is far from the easiest way -to launch attacks. -But because the -onion routers can be mistaken for the originators of the abuse, -and the volunteers who run them may not want to deal with the hassle of -explaining anonymity networks to irate administrators, we must block or limit -abuse through the Tor network. - -<div class="p"><!----></div> -To mitigate abuse issues, each onion router's <em>exit policy</em> -describes to which external addresses and ports the router will -connect. On one end of the spectrum are <em>open exit</em> -nodes that will connect anywhere. On the other end are <em>middleman</em> -nodes that only relay traffic to other Tor nodes, and <em>private exit</em> -nodes that only connect to a local host or network. A private -exit can allow a client to connect to a given host or -network more securely — an external adversary cannot eavesdrop traffic -between the private exit and the final destination, and so is less sure of -Alice's destination and activities. Most onion routers in the current -network function as -<em>restricted exits</em> that permit connections to the world at large, -but prevent access to certain abuse-prone addresses and services such -as SMTP. -The OR might also be able to authenticate clients to -prevent exit abuse without harming anonymity [<a href="#or-discex00" name="CITEor-discex00">48</a>]. - -<div class="p"><!----></div> - -<div class="p"><!----></div> -Many administrators use port restrictions to support only a -limited set of services, such as HTTP, SSH, or AIM. -This is not a complete solution, of course, since abuse opportunities for these -protocols are still well known. - -<div class="p"><!----></div> -We have not yet encountered any abuse in the deployed network, but if -we do we should consider using proxies to clean traffic for certain -protocols as it leaves the network. For example, much abusive HTTP -behavior (such as exploiting buffer overflows or well-known script -vulnerabilities) can be detected in a straightforward manner. -Similarly, one could run automatic spam filtering software (such as -SpamAssassin) on email exiting the OR network. - -<div class="p"><!----></div> -ORs may also rewrite exiting traffic to append -headers or other information indicating that the traffic has passed -through an anonymity service. This approach is commonly used -by email-only anonymity systems. ORs can also -run on servers with hostnames like <tt>anonymous</tt> to further -alert abuse targets to the nature of the anonymous traffic. - -<div class="p"><!----></div> -A mixture of open and restricted exit nodes allows the most -flexibility for volunteers running servers. But while having many -middleman nodes provides a large and robust network, -having only a few exit nodes reduces the number of points -an adversary needs to monitor for traffic analysis, and places a -greater burden on the exit nodes. This tension can be seen in the -Java Anon Proxy -cascade model, wherein only one node in each cascade needs to handle -abuse complaints — but an adversary only needs to observe the entry -and exit of a cascade to perform traffic analysis on all that -cascade's users. The hydra model (many entries, few exits) presents a -different compromise: only a few exit nodes are needed, but an -adversary needs to work harder to watch all the clients; see -Section <a href="#sec:conclusion">10</a>. - -<div class="p"><!----></div> -Finally, we note that exit abuse must not be dismissed as a peripheral -issue: when a system's public image suffers, it can reduce the number -and diversity of that system's users, and thereby reduce the anonymity -of the system itself. Like usability, public perception is a -security parameter. Sadly, preventing abuse of open exit nodes is an -unsolved problem, and will probably remain an arms race for the -foreseeable future. The abuse problems faced by Princeton's CoDeeN -project [<a href="#darkside" name="CITEdarkside">37</a>] give us a glimpse of likely issues. - -<div class="p"><!----></div> - <h3><a name="tth_sEc6.3"> -<a name="subsec:dirservers"> -6.3</a> Directory Servers</h3> -</a> - -<div class="p"><!----></div> -First-generation Onion Routing designs [<a href="#freedom2-arch" name="CITEfreedom2-arch">8</a>,<a href="#or-jsac98" name="CITEor-jsac98">41</a>] used -in-band network status updates: each router flooded a signed statement -to its neighbors, which propagated it onward. But anonymizing networks -have different security goals than typical link-state routing protocols. -For example, delays (accidental or intentional) -that can cause different parts of the network to have different views -of link-state and topology are not only inconvenient: they give -attackers an opportunity to exploit differences in client knowledge. -We also worry about attacks to deceive a -client about the router membership list, topology, or current network -state. Such <em>partitioning attacks</em> on client knowledge help an -adversary to efficiently deploy resources -against a target [<a href="#minion-design" name="CITEminion-design">15</a>]. - -<div class="p"><!----></div> -Tor uses a small group of redundant, well-known onion routers to -track changes in network topology and node state, including keys and -exit policies. Each such <em>directory server</em> acts as an HTTP -server, so clients can fetch current network state -and router lists, and so other ORs can upload -state information. Onion routers periodically publish signed -statements of their state to each directory server. The directory servers -combine this information with their own views of network liveness, -and generate a signed description (a <em>directory</em>) of the entire -network state. Client software is -pre-loaded with a list of the directory servers and their keys, -to bootstrap each client's view of the network. - -<div class="p"><!----></div> -When a directory server receives a signed statement for an OR, it -checks whether the OR's identity key is recognized. Directory -servers do not advertise unrecognized ORs — if they did, -an adversary could take over the network by creating many -servers [<a href="#sybil" name="CITEsybil">22</a>]. Instead, new nodes must be approved by the -directory -server administrator before they are included. Mechanisms for automated -node approval are an area of active research, and are discussed more -in Section <a href="#sec:maintaining-anonymity">9</a>. - -<div class="p"><!----></div> -Of course, a variety of attacks remain. An adversary who controls -a directory server can track clients by providing them different -information — perhaps by listing only nodes under its control, or by -informing only certain clients about a given node. Even an external -adversary can exploit differences in client knowledge: clients who use -a node listed on one directory server but not the others are vulnerable. - -<div class="p"><!----></div> -Thus these directory servers must be synchronized and redundant, so -that they can agree on a common directory. Clients should only trust -this directory if it is signed by a threshold of the directory -servers. - -<div class="p"><!----></div> -The directory servers in Tor are modeled after those in -Mixminion [<a href="#minion-design" name="CITEminion-design">15</a>], but our situation is easier. First, -we make the -simplifying assumption that all participants agree on the set of -directory servers. Second, while Mixminion needs to predict node -behavior, Tor only needs a threshold consensus of the current -state of the network. Third, we assume that we can fall back to the -human administrators to discover and resolve problems when a consensus -directory cannot be reached. Since there are relatively few directory -servers (currently 3, but we expect as many as 9 as the network scales), -we can afford operations like broadcast to simplify the consensus-building -protocol. - -<div class="p"><!----></div> -To avoid attacks where a router connects to all the directory servers -but refuses to relay traffic from other routers, the directory servers -must also build circuits and use them to anonymously test router -reliability [<a href="#mix-acc" name="CITEmix-acc">18</a>]. Unfortunately, this defense is not yet -designed or -implemented. - -<div class="p"><!----></div> -Using directory servers is simpler and more flexible than flooding. -Flooding is expensive, and complicates the analysis when we -start experimenting with non-clique network topologies. Signed -directories can be cached by other -onion routers, -so directory servers are not a performance -bottleneck when we have many users, and do not aid traffic analysis by -forcing clients to announce their existence to any -central point. - -<div class="p"><!----></div> - <h2><a name="tth_sEc7"> -<a name="sec:attacks"> -7</a> Attacks and Defenses</h2> -</a> - -<div class="p"><!----></div> -Below we summarize a variety of attacks, and discuss how well our -design withstands them.<br /> - -<div class="p"><!----></div> -<font size="+1"><b>Passive attacks</b></font><br /> -<em>Observing user traffic patterns.</em> Observing a user's connection -will not reveal her destination or data, but it will -reveal traffic patterns (both sent and received). Profiling via user -connection patterns requires further processing, because multiple -application streams may be operating simultaneously or in series over -a single circuit. - -<div class="p"><!----></div> -<em>Observing user content.</em> While content at the user end is encrypted, -connections to responders may not be (indeed, the responding website -itself may be hostile). While filtering content is not a primary goal -of Onion Routing, Tor can directly use Privoxy and related -filtering services to anonymize application data streams. - -<div class="p"><!----></div> -<em>Option distinguishability.</em> We allow clients to choose -configuration options. For example, clients concerned about request -linkability should rotate circuits more often than those concerned -about traceability. Allowing choice may attract users with different -needs; but clients who are -in the minority may lose more anonymity by appearing distinct than they -gain by optimizing their behavior [<a href="#econymics" name="CITEeconymics">1</a>]. - -<div class="p"><!----></div> -<em>End-to-end timing correlation.</em> Tor only minimally hides -such correlations. An attacker watching patterns of -traffic at the initiator and the responder will be -able to confirm the correspondence with high probability. The -greatest protection currently available against such confirmation is to hide -the connection between the onion proxy and the first Tor node, -by running the OP on the Tor node or behind a firewall. This approach -requires an observer to separate traffic originating at the onion -router from traffic passing through it: a global observer can do this, -but it might be beyond a limited observer's capabilities. - -<div class="p"><!----></div> -<em>End-to-end size correlation.</em> Simple packet counting -will also be effective in confirming -endpoints of a stream. However, even without padding, we may have some -limited protection: the leaky pipe topology means different numbers -of packets may enter one end of a circuit than exit at the other. - -<div class="p"><!----></div> -<em>Website fingerprinting.</em> All the effective passive -attacks above are traffic confirmation attacks, -which puts them outside our design goals. There is also -a passive traffic analysis attack that is potentially effective. -Rather than searching exit connections for timing and volume -correlations, the adversary may build up a database of -"fingerprints" containing file sizes and access patterns for -targeted websites. He can later confirm a user's connection to a given -site simply by consulting the database. This attack has -been shown to be effective against SafeWeb [<a href="#hintz-pet02" name="CITEhintz-pet02">29</a>]. -It may be less effective against Tor, since -streams are multiplexed within the same circuit, and -fingerprinting will be limited to -the granularity of cells (currently 512 bytes). Additional -defenses could include -larger cell sizes, padding schemes to group websites -into large sets, and link -padding or long-range dummies.<a href="#tthFtNtAAE" name="tthFrefAAE"><sup>4</sup></a><br /> - -<div class="p"><!----></div> -<font size="+1"><b>Active attacks</b></font><br /> -<em>Compromise keys.</em> An attacker who learns the TLS session key can -see control cells and encrypted relay cells on every circuit on that -connection; learning a circuit -session key lets him unwrap one layer of the encryption. An attacker -who learns an OR's TLS private key can impersonate that OR for the TLS -key's lifetime, but he must -also learn the onion key to decrypt <em>create</em> cells (and because of -perfect forward secrecy, he cannot hijack already established circuits -without also compromising their session keys). Periodic key rotation -limits the window of opportunity for these attacks. On the other hand, -an attacker who learns a node's identity key can replace that node -indefinitely by sending new forged descriptors to the directory servers. - -<div class="p"><!----></div> -<em>Iterated compromise.</em> A roving adversary who can -compromise ORs (by system intrusion, legal coercion, or extralegal -coercion) could march down the circuit compromising the -nodes until he reaches the end. Unless the adversary can complete -this attack within the lifetime of the circuit, however, the ORs -will have discarded the necessary information before the attack can -be completed. (Thanks to the perfect forward secrecy of session -keys, the attacker cannot force nodes to decrypt recorded -traffic once the circuits have been closed.) Additionally, building -circuits that cross jurisdictions can make legal coercion -harder — this phenomenon is commonly called "jurisdictional -arbitrage." The Java Anon Proxy project recently experienced the -need for this approach, when -a German court forced them to add a backdoor to -their nodes [<a href="#jap-backdoor" name="CITEjap-backdoor">51</a>]. - -<div class="p"><!----></div> -<em>Run a recipient.</em> An adversary running a webserver -trivially learns the timing patterns of users connecting to it, and -can introduce arbitrary patterns in its responses. -End-to-end attacks become easier: if the adversary can induce -users to connect to his webserver (perhaps by advertising -content targeted to those users), he now holds one end of their -connection. There is also a danger that application -protocols and associated programs can be induced to reveal information -about the initiator. Tor depends on Privoxy and similar protocol cleaners -to solve this latter problem. - -<div class="p"><!----></div> -<em>Run an onion proxy.</em> It is expected that end users will -nearly always run their own local onion proxy. However, in some -settings, it may be necessary for the proxy to run -remotely — typically, in institutions that want -to monitor the activity of those connecting to the proxy. -Compromising an onion proxy compromises all future connections -through it. - -<div class="p"><!----></div> -<em>DoS non-observed nodes.</em> An observer who can only watch some -of the Tor network can increase the value of this traffic -by attacking non-observed nodes to shut them down, reduce -their reliability, or persuade users that they are not trustworthy. -The best defense here is robustness. - -<div class="p"><!----></div> -<em>Run a hostile OR.</em> In addition to being a local observer, -an isolated hostile node can create circuits through itself, or alter -traffic patterns to affect traffic at other nodes. Nonetheless, a hostile -node must be immediately adjacent to both endpoints to compromise the -anonymity of a circuit. If an adversary can -run multiple ORs, and can persuade the directory servers -that those ORs are trustworthy and independent, then occasionally -some user will choose one of those ORs for the start and another -as the end of a circuit. If an adversary -controls m > 1 of N nodes, he can correlate at most -([m/N])<sup>2</sup> of the traffic — although an -adversary -could still attract a disproportionately large amount of traffic -by running an OR with a permissive exit policy, or by -degrading the reliability of other routers. - -<div class="p"><!----></div> -<em>Introduce timing into messages.</em> This is simply a stronger -version of passive timing attacks already discussed earlier. - -<div class="p"><!----></div> -<em>Tagging attacks.</em> A hostile node could "tag" a -cell by altering it. If the -stream were, for example, an unencrypted request to a Web site, -the garbled content coming out at the appropriate time would confirm -the association. However, integrity checks on cells prevent -this attack. - -<div class="p"><!----></div> -<em>Replace contents of unauthenticated protocols.</em> When -relaying an unauthenticated protocol like HTTP, a hostile exit node -can impersonate the target server. Clients -should prefer protocols with end-to-end authentication. - -<div class="p"><!----></div> -<em>Replay attacks.</em> Some anonymity protocols are vulnerable -to replay attacks. Tor is not; replaying one side of a handshake -will result in a different negotiated session key, and so the rest -of the recorded session can't be used. - -<div class="p"><!----></div> -<em>Smear attacks.</em> An attacker could use the Tor network for -socially disapproved acts, to bring the -network into disrepute and get its operators to shut it down. -Exit policies reduce the possibilities for abuse, but -ultimately the network requires volunteers who can tolerate -some political heat. - -<div class="p"><!----></div> -<em>Distribute hostile code.</em> An attacker could trick users -into running subverted Tor software that did not, in fact, anonymize -their connections — or worse, could trick ORs into running weakened -software that provided users with less anonymity. We address this -problem (but do not solve it completely) by signing all Tor releases -with an official public key, and including an entry in the directory -that lists which versions are currently believed to be secure. To -prevent an attacker from subverting the official release itself -(through threats, bribery, or insider attacks), we provide all -releases in source code form, encourage source audits, and -frequently warn our users never to trust any software (even from -us) that comes without source.<br /> - -<div class="p"><!----></div> -<font size="+1"><b>Directory attacks</b></font><br /> -<em>Destroy directory servers.</em> If a few directory -servers disappear, the others still decide on a valid -directory. So long as any directory servers remain in operation, -they will still broadcast their views of the network and generate a -consensus directory. (If more than half are destroyed, this -directory will not, however, have enough signatures for clients to -use it automatically; human intervention will be necessary for -clients to decide whether to trust the resulting directory.) - -<div class="p"><!----></div> -<em>Subvert a directory server.</em> By taking over a directory server, -an attacker can partially influence the final directory. Since ORs -are included or excluded by majority vote, the corrupt directory can -at worst cast a tie-breaking vote to decide whether to include -marginal ORs. It remains to be seen how often such marginal cases -occur in practice. - -<div class="p"><!----></div> -<em>Subvert a majority of directory servers.</em> An adversary who controls -more than half the directory servers can include as many compromised -ORs in the final directory as he wishes. We must ensure that directory -server operators are independent and attack-resistant. - -<div class="p"><!----></div> -<em>Encourage directory server dissent.</em> The directory -agreement protocol assumes that directory server operators agree on -the set of directory servers. An adversary who can persuade some -of the directory server operators to distrust one another could -split the quorum into mutually hostile camps, thus partitioning -users based on which directory they use. Tor does not address -this attack. - -<div class="p"><!----></div> -<em>Trick the directory servers into listing a hostile OR.</em> -Our threat model explicitly assumes directory server operators will -be able to filter out most hostile ORs. - -<div class="p"><!----></div> -<em>Convince the directories that a malfunctioning OR is -working.</em> In the current Tor implementation, directory servers -assume that an OR is running correctly if they can start a TLS -connection to it. A hostile OR could easily subvert this test by -accepting TLS connections from ORs but ignoring all cells. Directory -servers must actively test ORs by building circuits and streams as -appropriate. The tradeoffs of a similar approach are discussed -in [<a href="#mix-acc" name="CITEmix-acc">18</a>].<br /> - -<div class="p"><!----></div> -<font size="+1"><b>Attacks against rendezvous points</b></font><br /> -<em>Make many introduction requests.</em> An attacker could -try to deny Bob service by flooding his introduction points with -requests. Because the introduction points can block requests that -lack authorization tokens, however, Bob can restrict the volume of -requests he receives, or require a certain amount of computation for -every request he receives. - -<div class="p"><!----></div> -<em>Attack an introduction point.</em> An attacker could -disrupt a location-hidden service by disabling its introduction -points. But because a service's identity is attached to its public -key, the service can simply re-advertise -itself at a different introduction point. Advertisements can also be -done secretly so that only high-priority clients know the address of -Bob's introduction points or so that different clients know of different -introduction points. This forces the attacker to disable all possible -introduction points. - -<div class="p"><!----></div> -<em>Compromise an introduction point.</em> An attacker who controls -Bob's introduction point can flood Bob with -introduction requests, or prevent valid introduction requests from -reaching him. Bob can notice a flood, and close the circuit. To notice -blocking of valid requests, however, he should periodically test the -introduction point by sending rendezvous requests and making -sure he receives them. - -<div class="p"><!----></div> -<em>Compromise a rendezvous point.</em> A rendezvous -point is no more sensitive than any other OR on -a circuit, since all data passing through the rendezvous is encrypted -with a session key shared by Alice and Bob. - -<div class="p"><!----></div> - <h2><a name="tth_sEc8"> -<a name="sec:in-the-wild"> -8</a> Early experiences: Tor in the Wild</h2> -</a> - -<div class="p"><!----></div> -As of mid-May 2004, the Tor network consists of 32 nodes -(24 in the US, 8 in Europe), and more are joining each week as the code -matures. (For comparison, the current remailer network -has about 40 nodes.) Each node has at least a 768Kb/768Kb connection, and -many have 10Mb. The number of users varies (and of course, it's hard to -tell for sure), but we sometimes have several hundred users — administrators at -several companies have begun sending their entire departments' web -traffic through Tor, to block other divisions of -their company from reading their traffic. Tor users have reported using -the network for web browsing, FTP, IRC, AIM, Kazaa, SSH, and -recipient-anonymous email via rendezvous points. One user has anonymously -set up a Wiki as a hidden service, where other users anonymously publish -the addresses of their hidden services. - -<div class="p"><!----></div> -Each Tor node currently processes roughly 800,000 relay -cells (a bit under half a gigabyte) per week. On average, about 80% -of each 498-byte payload is full for cells going back to the client, -whereas about 40% is full for cells coming from the client. (The difference -arises because most of the network's traffic is web browsing.) Interactive -traffic like SSH brings down the average a lot — once we have more -experience, and assuming we can resolve the anonymity issues, we may -partition traffic into two relay cell sizes: one to handle -bulk traffic and one for interactive traffic. - -<div class="p"><!----></div> -Based in part on our restrictive default exit policy (we -reject SMTP requests) and our low profile, we have had no abuse -issues since the network was deployed in October -2003. Our slow growth rate gives us time to add features, -resolve bugs, and get a feel for what users actually want from an -anonymity system. Even though having more users would bolster our -anonymity sets, we are not eager to attract the Kazaa or warez -communities — we feel that we must build a reputation for privacy, human -rights, research, and other socially laudable activities. - -<div class="p"><!----></div> -As for performance, profiling shows that Tor spends almost -all its CPU time in AES, which is fast. Current latency is attributable -to two factors. First, network latency is critical: we are -intentionally bouncing traffic around the world several times. Second, -our end-to-end congestion control algorithm focuses on protecting -volunteer servers from accidental DoS rather than on optimizing -performance. To quantify these effects, we did some informal tests using a network of 4 -nodes on the same machine (a heavily loaded 1GHz Athlon). We downloaded a 60 -megabyte file from <tt>debian.org</tt> every 30 minutes for 54 hours (108 sample -points). It arrived in about 300 seconds on average, compared to 210s for a -direct download. We ran a similar test on the production Tor network, -fetching the front page of <tt>cnn.com</tt> (55 kilobytes): -while a direct -download consistently took about 0.3s, the performance through Tor varied. -Some downloads were as fast as 0.4s, with a median at 2.8s, and -90% finishing within 5.3s. It seems that as the network expands, the chance -of building a slow circuit (one that includes a slow or heavily loaded node -or link) is increasing. On the other hand, as our users remain satisfied -with this increased latency, we can address our performance incrementally as we -proceed with development. -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> -Although Tor's clique topology and full-visibility directories present -scaling problems, we still expect the network to support a few hundred -nodes and maybe 10,000 users before we're forced to become -more distributed. With luck, the experience we gain running the current -topology will help us choose among alternatives when the time comes. - -<div class="p"><!----></div> - <h2><a name="tth_sEc9"> -<a name="sec:maintaining-anonymity"> -9</a> Open Questions in Low-latency Anonymity</h2> -</a> - -<div class="p"><!----></div> -In addition to the non-goals in -Section <a href="#subsec:non-goals">3</a>, many questions must be solved -before we can be confident of Tor's security. - -<div class="p"><!----></div> -Many of these open issues are questions of balance. For example, -how often should users rotate to fresh circuits? Frequent rotation -is inefficient, expensive, and may lead to intersection attacks and -predecessor attacks [<a href="#wright03" name="CITEwright03">54</a>], but infrequent rotation makes the -user's traffic linkable. Besides opening fresh circuits, clients can -also exit from the middle of the circuit, -or truncate and re-extend the circuit. More analysis is -needed to determine the proper tradeoff. - -<div class="p"><!----></div> - -<div class="p"><!----></div> -How should we choose path lengths? If Alice always uses two hops, -then both ORs can be certain that by colluding they will learn about -Alice and Bob. In our current approach, Alice always chooses at least -three nodes unrelated to herself and her destination. -Should Alice choose a random path length (e.g. from a geometric -distribution) to foil an attacker who -uses timing to learn that he is the fifth hop and thus concludes that -both Alice and the responder are running ORs? - -<div class="p"><!----></div> -Throughout this paper, we have assumed that end-to-end traffic -confirmation will immediately and automatically defeat a low-latency -anonymity system. Even high-latency anonymity systems can be -vulnerable to end-to-end traffic confirmation, if the traffic volumes -are high enough, and if users' habits are sufficiently -distinct [<a href="#statistical-disclosure" name="CITEstatistical-disclosure">14</a>,<a href="#limits-open" name="CITElimits-open">31</a>]. Can anything be -done to -make low-latency systems resist these attacks as well as high-latency -systems? Tor already makes some effort to conceal the starts and ends of -streams by wrapping long-range control commands in identical-looking -relay cells. Link padding could frustrate passive observers who count -packets; long-range padding could work against observers who own the -first hop in a circuit. But more research remains to find an efficient -and practical approach. Volunteers prefer not to run constant-bandwidth -padding; but no convincing traffic shaping approach has been -specified. Recent work on long-range padding [<a href="#defensive-dropping" name="CITEdefensive-dropping">33</a>] -shows promise. One could also try to reduce correlation in packet timing -by batching and re-ordering packets, but it is unclear whether this could -improve anonymity without introducing so much latency as to render the -network unusable. - -<div class="p"><!----></div> -A cascade topology may better defend against traffic confirmation by -aggregating users, and making padding and -mixing more affordable. Does the hydra topology (many input nodes, -few output nodes) work better against some adversaries? Are we going -to get a hydra anyway because most nodes will be middleman nodes? - -<div class="p"><!----></div> -Common wisdom suggests that Alice should run her own OR for best -anonymity, because traffic coming from her node could plausibly have -come from elsewhere. How much mixing does this approach need? Is it -immediately beneficial because of real-world adversaries that can't -observe Alice's router, but can run routers of their own? - -<div class="p"><!----></div> -To scale to many users, and to prevent an attacker from observing the -whole network, it may be necessary -to support far more servers than Tor currently anticipates. -This introduces several issues. First, if approval by a central set -of directory servers is no longer feasible, what mechanism should be used -to prevent adversaries from signing up many colluding servers? Second, -if clients can no longer have a complete picture of the network, -how can they perform discovery while preventing attackers from -manipulating or exploiting gaps in their knowledge? Third, if there -are too many servers for every server to constantly communicate with -every other, which non-clique topology should the network use? -(Restricted-route topologies promise comparable anonymity with better -scalability [<a href="#danezis-pets03" name="CITEdanezis-pets03">13</a>], but whatever topology we choose, we -need some way to keep attackers from manipulating their position within -it [<a href="#casc-rep" name="CITEcasc-rep">21</a>].) Fourth, if no central authority is tracking -server reliability, how do we stop unreliable servers from making -the network unusable? Fifth, do clients receive so much anonymity -from running their own ORs that we should expect them all to do -so [<a href="#econymics" name="CITEeconymics">1</a>], or do we need another incentive structure to -motivate them? Tarzan and MorphMix present possible solutions. - -<div class="p"><!----></div> - -<div class="p"><!----></div> -When a Tor node goes down, all its circuits (and thus streams) must break. -Will users abandon the system because of this brittleness? How well -does the method in Section <a href="#subsec:dos">6.1</a> allow streams to survive -node failure? If affected users rebuild circuits immediately, how much -anonymity is lost? It seems the problem is even worse in a peer-to-peer -environment — such systems don't yet provide an incentive for peers to -stay connected when they're done retrieving content, so we would expect -a higher churn rate. - -<div class="p"><!----></div> - -<div class="p"><!----></div> - <h2><a name="tth_sEc10"> -<a name="sec:conclusion"> -10</a> Future Directions</h2> -</a> - -<div class="p"><!----></div> -Tor brings together many innovations into a unified deployable system. The -next immediate steps include: - -<div class="p"><!----></div> -<em>Scalability:</em> Tor's emphasis on deployability and design simplicity -has led us to adopt a clique topology, semi-centralized -directories, and a full-network-visibility model for client -knowledge. These properties will not scale past a few hundred servers. -Section <a href="#sec:maintaining-anonymity">9</a> describes some promising -approaches, but more deployment experience will be helpful in learning -the relative importance of these bottlenecks. - -<div class="p"><!----></div> -<em>Bandwidth classes:</em> This paper assumes that all ORs have -good bandwidth and latency. We should instead adopt the MorphMix model, -where nodes advertise their bandwidth level (DSL, T1, T3), and -Alice avoids bottlenecks by choosing nodes that match or -exceed her bandwidth. In this way DSL users can usefully join the Tor -network. - -<div class="p"><!----></div> -<em>Incentives:</em> Volunteers who run nodes are rewarded with publicity -and possibly better anonymity [<a href="#econymics" name="CITEeconymics">1</a>]. More nodes means increased -scalability, and more users can mean more anonymity. We need to continue -examining the incentive structures for participating in Tor. Further, -we need to explore more approaches to limiting abuse, and understand -why most people don't bother using privacy systems. - -<div class="p"><!----></div> -<em>Cover traffic:</em> Currently Tor omits cover traffic — its costs -in performance and bandwidth are clear but its security benefits are -not well understood. We must pursue more research on link-level cover -traffic and long-range cover traffic to determine whether some simple padding -method offers provable protection against our chosen adversary. - -<div class="p"><!----></div> - -<div class="p"><!----></div> -<em>Caching at exit nodes:</em> Perhaps each exit node should run a -caching web proxy [<a href="#shsm03" name="CITEshsm03">47</a>], to improve anonymity for cached pages -(Alice's request never -leaves the Tor network), to improve speed, and to reduce bandwidth cost. -On the other hand, forward security is weakened because caches -constitute a record of retrieved files. We must find the right -balance between usability and security. - -<div class="p"><!----></div> -<em>Better directory distribution:</em> -Clients currently download a description of -the entire network every 15 minutes. As the state grows larger -and clients more numerous, we may need a solution in which -clients receive incremental updates to directory state. -More generally, we must find more -scalable yet practical ways to distribute up-to-date snapshots of -network status without introducing new attacks. - -<div class="p"><!----></div> -<em>Further specification review:</em> Our public -byte-level specification [<a href="#tor-spec" name="CITEtor-spec">20</a>] needs -external review. We hope that as Tor -is deployed, more people will examine its -specification. - -<div class="p"><!----></div> -<em>Multisystem interoperability:</em> We are currently working with the -designer of MorphMix to unify the specification and implementation of -the common elements of our two systems. So far, this seems -to be relatively straightforward. Interoperability will allow testing -and direct comparison of the two designs for trust and scalability. - -<div class="p"><!----></div> -<em>Wider-scale deployment:</em> The original goal of Tor was to -gain experience in deploying an anonymizing overlay network, and -learn from having actual users. We are now at a point in design -and development where we can start deploying a wider network. Once -we have many actual users, we will doubtlessly be better -able to evaluate some of our design decisions, including our -robustness/latency tradeoffs, our performance tradeoffs (including -cell size), our abuse-prevention mechanisms, and -our overall usability. - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<h2>Acknowledgments</h2> - We thank Peter Palfrader, Geoff Goodell, Adam Shostack, Joseph Sokol-Margolis, - John Bashinski, and Zack Brown - for editing and comments; - Matej Pfajfar, Andrei Serjantov, Marc Rennhard for design discussions; - Bram Cohen for congestion control discussions; - Adam Back for suggesting telescoping circuits; and - Cathy Meadows for formal analysis of the <em>extend</em> protocol. - This work has been supported by ONR and DARPA. - -<div class="p"><!----></div> - -<div class="p"><!----></div> - -<div class="p"><!----></div> -<h2>References</h2> - -<dl compact="compact"> -<font size="-1"></font> <dt><a href="#CITEeconymics" name="econymics">[1]</a></dt><dd> -A. Acquisti, R. Dingledine, and P. Syverson. - On the economics of anonymity. - In R. N. Wright, editor, <em>Financial Cryptography</em>. - Springer-Verlag, LNCS 2742, 2003. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEeternity" name="eternity">[2]</a></dt><dd> -R. Anderson. - The eternity service. - In <em>Pragocrypt '96</em>, 1996. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEanonymizer" name="anonymizer">[3]</a></dt><dd> -The Anonymizer. - <tt><http://anonymizer.com/>. - -<div class="p"><!----></div> -</tt></dd> - <dt><a href="#CITEfreedom21-security" name="freedom21-security">[4]</a></dt><dd> -A. Back, I. Goldberg, and A. Shostack. - Freedom systems 2.1 security issues and analysis. - White paper, Zero Knowledge Systems, Inc., May 2001. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEback01" name="back01">[5]</a></dt><dd> -A. Back, U. Möller, and A. Stiglic. - Traffic analysis attacks and trade-offs in anonymity providing - systems. - In I. S. Moskowitz, editor, <em>Information Hiding (IH 2001)</em>, pages - 245-257. Springer-Verlag, LNCS 2137, 2001. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEeax" name="eax">[6]</a></dt><dd> -M. Bellare, P. Rogaway, and D. Wagner. - The EAX mode of operation: A two-pass authenticated-encryption - scheme optimized for simplicity and efficiency. - In <em>Fast Software Encryption 2004</em>, February 2004. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEweb-mix" name="web-mix">[7]</a></dt><dd> -O. Berthold, H. Federrath, and S. Köpsell. - Web MIXes: A system for anonymous and unobservable Internet - access. - In H. Federrath, editor, <em>Designing Privacy Enhancing - Technologies: Workshop on Design Issue in Anonymity and Unobservability</em>. - Springer-Verlag, LNCS 2009, 2000. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEfreedom2-arch" name="freedom2-arch">[8]</a></dt><dd> -P. Boucher, A. Shostack, and I. Goldberg. - Freedom systems 2.0 architecture. - White paper, Zero Knowledge Systems, Inc., December 2000. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEcebolla" name="cebolla">[9]</a></dt><dd> -Z. Brown. - Cebolla: Pragmatic IP Anonymity. - In <em>Ottawa Linux Symposium</em>, June 2002. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEchaum-mix" name="chaum-mix">[10]</a></dt><dd> -D. Chaum. - Untraceable electronic mail, return addresses, and digital - pseudo-nyms. - <em>Communications of the ACM</em>, 4(2), February 1981. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEcfs:sosp01" name="cfs:sosp01">[11]</a></dt><dd> -F. Dabek, M. F. Kaashoek, D. Karger, R. Morris, and I. Stoica. - Wide-area cooperative storage with CFS. - In <em>18th ACM Symposium on Operating Systems Principles - (SOSP '01)</em>, Chateau Lake Louise, Banff, Canada, October 2001. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEpipenet" name="pipenet">[12]</a></dt><dd> -W. Dai. - Pipenet 1.1. - Usenet post, August 1996. - <tt><http://www.eskimo.com/ weidai/pipenet.txt> First mentioned in a - post to the cypherpunks list, Feb. 1995. - -<div class="p"><!----></div> -</tt></dd> - <dt><a href="#CITEdanezis-pets03" name="danezis-pets03">[13]</a></dt><dd> -G. Danezis. - Mix-networks with restricted routes. - In R. Dingledine, editor, <em>Privacy Enhancing Technologies (PET - 2003)</em>. Springer-Verlag LNCS 2760, 2003. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEstatistical-disclosure" name="statistical-disclosure">[14]</a></dt><dd> -G. Danezis. - Statistical disclosure attacks. - In <em>Security and Privacy in the Age of Uncertainty (SEC2003)</em>, - pages 421-426, Athens, May 2003. IFIP TC11, Kluwer. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEminion-design" name="minion-design">[15]</a></dt><dd> -G. Danezis, R. Dingledine, and N. Mathewson. - Mixminion: Design of a type III anonymous remailer protocol. - In <em>2003 IEEE Symposium on Security and Privacy</em>, pages 2-15. - IEEE CS, May 2003. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEpuzzles-tls" name="puzzles-tls">[16]</a></dt><dd> -D. Dean and A. Stubblefield. - Using Client Puzzles to Protect TLS. - In <em>Proceedings of the 10th USENIX Security Symposium</em>. USENIX, - Aug. 2001. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITETLS" name="TLS">[17]</a></dt><dd> -T. Dierks and C. Allen. - The TLS Protocol - Version 1.0. - IETF RFC 2246, January 1999. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEmix-acc" name="mix-acc">[18]</a></dt><dd> -R. Dingledine, M. J. Freedman, D. Hopwood, and D. Molnar. - A Reputation System to Increase MIX-net Reliability. - In I. S. Moskowitz, editor, <em>Information Hiding (IH 2001)</em>, pages - 126-141. Springer-Verlag, LNCS 2137, 2001. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEfreehaven-berk" name="freehaven-berk">[19]</a></dt><dd> -R. Dingledine, M. J. Freedman, and D. Molnar. - The free haven project: Distributed anonymous storage service. - In H. Federrath, editor, <em>Designing Privacy Enhancing - Technologies: Workshop on Design Issue in Anonymity and Unobservability</em>. - Springer-Verlag, LNCS 2009, July 2000. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEtor-spec" name="tor-spec">[20]</a></dt><dd> -R. Dingledine and N. Mathewson. - Tor protocol specifications. - <tt><http://freehaven.net/tor/tor-spec.txt>. - -<div class="p"><!----></div> -</tt></dd> - <dt><a href="#CITEcasc-rep" name="casc-rep">[21]</a></dt><dd> -R. Dingledine and P. Syverson. - Reliable MIX Cascade Networks through Reputation. - In M. Blaze, editor, <em>Financial Cryptography</em>. Springer-Verlag, - LNCS 2357, 2002. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEsybil" name="sybil">[22]</a></dt><dd> -J. Douceur. - The Sybil Attack. - In <em>Proceedings of the 1st International Peer To Peer Systems - Workshop (IPTPS)</em>, Mar. 2002. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEfederrath-ih96" name="federrath-ih96">[23]</a></dt><dd> -H. Federrath, A. Jerichow, and A. Pfitzmann. - MIXes in mobile communication systems: Location management with - privacy. - In R. Anderson, editor, <em>Information Hiding, First International - Workshop</em>, pages 121-135. Springer-Verlag, LNCS 1174, May 1996. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEtarzan:ccs02" name="tarzan:ccs02">[24]</a></dt><dd> -M. J. Freedman and R. Morris. - Tarzan: A peer-to-peer anonymizing network layer. - In <em>9th ACM Conference on Computer and Communications - Security (CCS 2002)</em>, Washington, DC, November 2002. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEherbivore" name="herbivore">[25]</a></dt><dd> -S. Goel, M. Robson, M. Polte, and E. G. Sirer. - Herbivore: A scalable and efficient protocol for anonymous - communication. - Technical Report TR2003-1890, Cornell University Computing and - Information Science, February 2003. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEian-thesis" name="ian-thesis">[26]</a></dt><dd> -I. Goldberg. - <em>A Pseudonymous Communications Infrastructure for the Internet</em>. - PhD thesis, UC Berkeley, Dec 2000. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEor-ih96" name="or-ih96">[27]</a></dt><dd> -D. M. Goldschlag, M. G. Reed, and P. F. Syverson. - Hiding routing information. - In R. Anderson, editor, <em>Information Hiding, First International - Workshop</em>, pages 137-150. Springer-Verlag, LNCS 1174, May 1996. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEbabel" name="babel">[28]</a></dt><dd> -C. Gülcü and G. Tsudik. - Mixing E-mail with Babel. - In <em>Network and Distributed Security Symposium (NDSS 96)</em>, - pages 2-16. IEEE, February 1996. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEhintz-pet02" name="hintz-pet02">[29]</a></dt><dd> -A. Hintz. - Fingerprinting websites using traffic analysis. - In R. Dingledine and P. Syverson, editors, <em>Privacy Enhancing - Technologies (PET 2002)</em>, pages 171-178. Springer-Verlag, LNCS 2482, 2002. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEjerichow-jsac98" name="jerichow-jsac98">[30]</a></dt><dd> -A. Jerichow, J. Müller, A. Pfitzmann, B. Pfitzmann, and M. Waidner. - Real-time mixes: A bandwidth-efficient anonymity protocol. - <em>IEEE Journal on Selected Areas in Communications</em>, - 16(4):495-509, May 1998. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITElimits-open" name="limits-open">[31]</a></dt><dd> -D. Kesdogan, D. Agrawal, and S. Penz. - Limits of anonymity in open environments. - In F. Petitcolas, editor, <em>Information Hiding Workshop (IH - 2002)</em>. Springer-Verlag, LNCS 2578, October 2002. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEsocks4" name="socks4">[32]</a></dt><dd> -D. Koblas and M. R. Koblas. - SOCKS. - In <em>UNIX Security III Symposium (1992 USENIX Security - Symposium)</em>, pages 77-83. USENIX, 1992. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEdefensive-dropping" name="defensive-dropping">[33]</a></dt><dd> -B. N. Levine, M. K. Reiter, C. Wang, and M. Wright. - Timing analysis in low-latency mix-based systems. - In A. Juels, editor, <em>Financial Cryptography</em>. Springer-Verlag, - LNCS (forthcoming), 2004. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEhordes-jcs" name="hordes-jcs">[34]</a></dt><dd> -B. N. Levine and C. Shields. - Hordes: A multicast-based protocol for anonymity. - <em>Journal of Computer Security</em>, 10(3):213-240, 2002. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEmeadows96" name="meadows96">[35]</a></dt><dd> -C. Meadows. - The NRL protocol analyzer: An overview. - <em>Journal of Logic Programming</em>, 26(2):113-131, 1996. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEmixmaster-spec" name="mixmaster-spec">[36]</a></dt><dd> -U. Möller, L. Cottrell, P. Palfrader, and L. Sassaman. - Mixmaster Protocol - Version 2. - Draft, July 2003. - <tt><http://www.abditum.com/mixmaster-spec.txt>. - -<div class="p"><!----></div> -</tt></dd> - <dt><a href="#CITEdarkside" name="darkside">[37]</a></dt><dd> -V. S. Pai, L. Wang, K. Park, R. Pang, and L. Peterson. - The Dark Side of the Web: An Open Proxy's View. - <tt><http://codeen.cs.princeton.edu/>. - -<div class="p"><!----></div> -</tt></dd> - <dt><a href="#CITEisdn-mixes" name="isdn-mixes">[38]</a></dt><dd> -A. Pfitzmann, B. Pfitzmann, and M. Waidner. - ISDN-mixes: Untraceable communication with very small bandwidth - overhead. - In <em>GI/ITG Conference on Communication in Distributed Systems</em>, - pages 451-463, February 1991. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEprivoxy" name="privoxy">[39]</a></dt><dd> -Privoxy. - <tt><http://www.privoxy.org/>. - -<div class="p"><!----></div> -</tt></dd> - <dt><a href="#CITEreed-protocols97" name="reed-protocols97">[40]</a></dt><dd> -M. G. Reed, P. F. Syverson, and D. M. Goldschlag. - Protocols using anonymous connections: Mobile applications. - In B. Christianson, B. Crispo, M. Lomas, and M. Roe, editors, <em> - Security Protocols: 5th International Workshop</em>, pages 13-23. - Springer-Verlag, LNCS 1361, April 1997. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEor-jsac98" name="or-jsac98">[41]</a></dt><dd> -M. G. Reed, P. F. Syverson, and D. M. Goldschlag. - Anonymous connections and onion routing. - <em>IEEE Journal on Selected Areas in Communications</em>, - 16(4):482-494, May 1998. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEcrowds-tissec" name="crowds-tissec">[42]</a></dt><dd> -M. K. Reiter and A. D. Rubin. - Crowds: Anonymity for web transactions. - <em>ACM TISSEC</em>, 1(1):66-92, June 1998. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEmorphmix:fc04" name="morphmix:fc04">[43]</a></dt><dd> -M. Rennhard and B. Plattner. - Practical anonymity for the masses with morphmix. - In A. Juels, editor, <em>Financial Cryptography</em>. Springer-Verlag, - LNCS (forthcoming), 2004. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEanonnet" name="anonnet">[44]</a></dt><dd> -M. Rennhard, S. Rafaeli, L. Mathy, B. Plattner, and D. Hutchison. - Analysis of an Anonymity Network for Web Browsing. - In <em>IEEE 7th Intl. Workshop on Enterprise Security (WET ICE - 2002)</em>, Pittsburgh, USA, June 2002. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITESS03" name="SS03">[45]</a></dt><dd> -A. Serjantov and P. Sewell. - Passive attack analysis for connection-based anonymity systems. - In <em>Computer Security - ESORICS 2003</em>. Springer-Verlag, LNCS - 2808, October 2003. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEp5" name="p5">[46]</a></dt><dd> -R. Sherwood, B. Bhattacharjee, and A. Srinivasan. - p<sup>5</sup>: A protocol for scalable anonymous communication. - In <em>IEEE Symposium on Security and Privacy</em>, pages 58-70. IEEE - CS, 2002. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEshsm03" name="shsm03">[47]</a></dt><dd> -A. Shubina and S. Smith. - Using caching for browsing anonymity. - <em>ACM SIGEcom Exchanges</em>, 4(2), Sept 2003. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEor-discex00" name="or-discex00">[48]</a></dt><dd> -P. Syverson, M. Reed, and D. Goldschlag. - Onion Routing access configurations. - In <em>DARPA Information Survivability Conference and Exposition - (DISCEX 2000)</em>, volume 1, pages 34-40. IEEE CS Press, 2000. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEor-pet00" name="or-pet00">[49]</a></dt><dd> -P. Syverson, G. Tsudik, M. Reed, and C. Landwehr. - Towards an Analysis of Onion Routing Security. - In H. Federrath, editor, <em>Designing Privacy Enhancing - Technologies: Workshop on Design Issue in Anonymity and Unobservability</em>, - pages 96-114. Springer-Verlag, LNCS 2009, July 2000. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEtannenbaum96" name="tannenbaum96">[50]</a></dt><dd> -A. Tannenbaum. - Computer networks, 1996. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEjap-backdoor" name="jap-backdoor">[51]</a></dt><dd> -The AN.ON Project. - German police proceeds against anonymity service. - Press release, September 2003. - - <tt><http://www.datenschutzzentrum.de/material/themen/presse/anon-bka_e.htm>. - -<div class="p"><!----></div> -</tt></dd> - <dt><a href="#CITEtangler" name="tangler">[52]</a></dt><dd> -M. Waldman and D. Mazières. - Tangler: A censorship-resistant publishing system based on document - entanglements. - In <em>8<sup>th</sup> ACM Conference on Computer and Communications - Security (CCS-8)</em>, pages 86-135. ACM Press, 2001. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEpublius" name="publius">[53]</a></dt><dd> -M. Waldman, A. Rubin, and L. Cranor. - Publius: A robust, tamper-evident, censorship-resistant and - source-anonymous web publishing system. - In <em>Proc. 9th USENIX Security Symposium</em>, pages 59-72, August - 2000. - -<div class="p"><!----></div> -</dd> - <dt><a href="#CITEwright03" name="wright03">[54]</a></dt><dd> -M. Wright, M. Adler, B. N. Levine, and C. Shields. - Defending anonymous communication against passive logging attacks. - In <em>IEEE Symposium on Security and Privacy</em>, pages 28-41. IEEE - CS, May 2003.</dd> -</dl> - - -<div class="p"><!----></div> -<hr /><h3>Footnotes:</h3> - -<div class="p"><!----></div> -<a name="tthFtNtAAB"></a><a href="#tthFrefAAB"><sup>1</sup></a>Actually, the negotiated key is used to derive two - symmetric keys: one for each direction. -<div class="p"><!----></div> -<a name="tthFtNtAAC"></a><a href="#tthFrefAAC"><sup>2</sup></a> - With 48 bits of digest per cell, the probability of an accidental -collision is far lower than the chance of hardware failure. -<div class="p"><!----></div> -<a name="tthFtNtAAD"></a><a href="#tthFrefAAD"><sup>3</sup></a> -Rather than rely on an external infrastructure, the Onion Routing network -can run the lookup service itself. Our current implementation provides a -simple lookup system on the -directory servers. -<div class="p"><!----></div> -<a name="tthFtNtAAE"></a><a href="#tthFrefAAE"><sup>4</sup></a>Note that this fingerprinting -attack should not be confused with the much more complicated latency -attacks of [<a href="#back01" name="CITEback01">5</a>], which require a fingerprint of the latencies -of all circuits through the network, combined with those from the -network edges to the target user and the responder website. -<br /><br /><hr /><small>File translated from -T<sub><font size="-1">E</font></sub>X -by <a href="http://hutchinson.belmont.ma.us/tth/"> -T<sub><font size="-1">T</font></sub>H</a>, -version 3.59.<br />On 18 May 2004, 10:45.</small> -</body></html> - diff --git a/doc/design-paper/tor-design.pdf b/doc/design-paper/tor-design.pdf Binary files differdeleted file mode 100644 index 76a2265153..0000000000 --- a/doc/design-paper/tor-design.pdf +++ /dev/null diff --git a/doc/design-paper/tor-design.tex b/doc/design-paper/tor-design.tex deleted file mode 100644 index dff1b4068b..0000000000 --- a/doc/design-paper/tor-design.tex +++ /dev/null @@ -1,1988 +0,0 @@ -\documentclass[twocolumn]{article} -\usepackage{usenix} - -%\documentclass[times,10pt,twocolumn]{article} -%\usepackage{latex8} -%\usepackage{times} -\usepackage{url} -\usepackage{graphics} -\usepackage{amsmath} -\usepackage{epsfig} - -\pagestyle{empty} - -\renewcommand\url{\begingroup \def\UrlLeft{<}\def\UrlRight{>}\urlstyle{tt}\Url} -\newcommand\emailaddr{\begingroup \def\UrlLeft{<}\def\UrlRight{>}\urlstyle{tt}\Url} - -\newcommand{\workingnote}[1]{} % The version that hides the note. -%\newcommand{\workingnote}[1]{(**#1)} % The version that makes the note visible. - -% If an URL ends up with '%'s in it, that's because the line *in the .bib/.tex -% file* is too long, so break it there (it doesn't matter if the next line is -% indented with spaces). -DH - -%\newif\ifpdf -%\ifx\pdfoutput\undefined -% \pdffalse -%\else -% \pdfoutput=1 -% \pdftrue -%\fi - -\newenvironment{tightlist}{\begin{list}{$\bullet$}{ - \setlength{\itemsep}{0mm} - \setlength{\parsep}{0mm} - % \setlength{\labelsep}{0mm} - % \setlength{\labelwidth}{0mm} - % \setlength{\topsep}{0mm} - }}{\end{list}} - -% Cut down on whitespace above and below figures displayed at head/foot of -% page. -\setlength{\textfloatsep}{3mm} -% Cut down on whitespace above and below figures displayed in middle of page -\setlength{\intextsep}{3mm} - -\begin{document} - -%% Use dvipdfm instead. --DH -%\ifpdf -% \pdfcompresslevel=9 -% \pdfpagewidth=\the\paperwidth -% \pdfpageheight=\the\paperheight -%\fi - -\title{Tor: The Second-Generation Onion Router} %\\DRAFT VERSION} -% Putting the 'Private' back in 'Virtual Private Network' - -\author{Roger Dingledine \\ The Free Haven Project \\ arma@freehaven.net \and -Nick Mathewson \\ The Free Haven Project \\ nickm@freehaven.net \and -Paul Syverson \\ Naval Research Lab \\ syverson@itd.nrl.navy.mil} - -\maketitle -\thispagestyle{empty} - -\begin{abstract} -We present Tor, a circuit-based low-latency anonymous communication -service. This second-generation Onion Routing system addresses limitations -in the original design by adding perfect forward secrecy, congestion -control, directory servers, integrity checking, configurable exit policies, -and a practical design for location-hidden services via rendezvous -points. Tor works on the real-world -Internet, requires no special privileges or kernel modifications, requires -little synchronization or coordination between nodes, and provides a -reasonable tradeoff between anonymity, usability, and efficiency. -We briefly describe our experiences with an international network of -more than 30 nodes. % that has been running for several months. -We close with a list of open problems in anonymous communication. -\end{abstract} - -%\begin{center} -%\textbf{Keywords:} anonymity, peer-to-peer, remailer, nymserver, reply block -%\end{center} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\section{Overview} -\label{sec:intro} - -Onion Routing is a distributed overlay network designed to anonymize -TCP-based applications like web browsing, secure shell, -and instant messaging. Clients choose a path through the network and -build a \emph{circuit}, in which each node (or ``onion router'' or ``OR'') -in the path knows its predecessor and successor, but no other nodes in -the circuit. Traffic flows down the circuit in fixed-size -\emph{cells}, which are unwrapped by a symmetric key at each node -(like the layers of an onion) and relayed downstream. The -Onion Routing project published several design and analysis -papers \cite{or-ih96,or-jsac98,or-discex00,or-pet00}. While a wide area Onion -Routing network was deployed briefly, the only long-running -public implementation was a fragile -proof-of-concept that ran on a single machine. Even this simple deployment -processed connections from over sixty thousand distinct IP addresses from -all over the world at a rate of about fifty thousand per day. -But many critical design and deployment issues were never -resolved, and the design has not been updated in years. Here -we describe Tor, a protocol for asynchronous, loosely federated onion -routers that provides the following improvements over the old Onion -Routing design: - -\textbf{Perfect forward secrecy:} In the original Onion Routing design, -a single hostile node could record traffic and -later compromise successive nodes in the circuit and force them -to decrypt it. Rather than using a single multiply encrypted data -structure (an \emph{onion}) to lay each circuit, -Tor now uses an incremental or \emph{telescoping} path-building design, -where the initiator negotiates session keys with each successive hop in -the circuit. Once these keys are deleted, subsequently compromised nodes -cannot decrypt old traffic. As a side benefit, onion replay detection -is no longer necessary, and the process of building circuits is more -reliable, since the initiator knows when a hop fails and can then try -extending to a new node. - -\textbf{Separation of ``protocol cleaning'' from anonymity:} -Onion Routing originally required a separate ``application -proxy'' for each supported application protocol---most of which were -never written, so many applications were never supported. Tor uses the -standard and near-ubiquitous SOCKS~\cite{socks4} proxy interface, allowing -us to support most TCP-based programs without modification. Tor now -relies on the filtering features of privacy-enhancing -application-level proxies such as Privoxy~\cite{privoxy}, without trying -to duplicate those features itself. - -\textbf{No mixing, padding, or traffic shaping (yet):} Onion -Routing originally called for batching and reordering cells as they arrived, -assumed padding between ORs, and in -later designs added padding between onion proxies (users) and -ORs~\cite{or-ih96,or-jsac98}. Tradeoffs between padding protection -and cost were discussed, and \emph{traffic shaping} algorithms were -theorized~\cite{or-pet00} to provide good security without expensive -padding, but no concrete padding scheme was suggested. -Recent research~\cite{econymics} -and deployment experience~\cite{freedom21-security} suggest that this -level of resource use is not practical or economical; and even full -link padding is still vulnerable~\cite{defensive-dropping}. Thus, -until we have a proven and convenient design for traffic shaping or -low-latency mixing that improves anonymity against a realistic -adversary, we leave these strategies out. - -\textbf{Many TCP streams can share one circuit:} Onion Routing originally -built a separate circuit for each -application-level request, but this required -multiple public key operations for every request, and also presented -a threat to anonymity from building so many circuits; see -Section~\ref{sec:maintaining-anonymity}. Tor multiplexes multiple TCP -streams along each circuit to improve efficiency and anonymity. - -\textbf{Leaky-pipe circuit topology:} Through in-band signaling -within the circuit, Tor initiators can direct traffic to nodes partway -down the circuit. This novel approach -allows traffic to exit the circuit from the middle---possibly -frustrating traffic shape and volume attacks based on observing the end -of the circuit. (It also allows for long-range padding if -future research shows this to be worthwhile.) - -\textbf{Congestion control:} Earlier anonymity designs do not -address traffic bottlenecks. Unfortunately, typical approaches to -load balancing and flow control in overlay networks involve inter-node -control communication and global views of traffic. Tor's decentralized -congestion control uses end-to-end acks to maintain anonymity -while allowing nodes at the edges of the network to detect congestion -or flooding and send less data until the congestion subsides. - -\textbf{Directory servers:} The earlier Onion Routing design -planned to flood state information through the network---an approach -that can be unreliable and complex. % open to partitioning attacks. -Tor takes a simplified view toward distributing this -information. Certain more trusted nodes act as \emph{directory -servers}: they provide signed directories describing known -routers and their current state. Users periodically download them -via HTTP. - -\textbf{Variable exit policies:} Tor provides a consistent mechanism -for each node to advertise a policy describing the hosts -and ports to which it will connect. These exit policies are critical -in a volunteer-based distributed infrastructure, because each operator -is comfortable with allowing different types of traffic to exit -from his node. - -\textbf{End-to-end integrity checking:} The original Onion Routing -design did no integrity checking on data. Any node on the -circuit could change the contents of data cells as they passed by---for -example, to alter a connection request so it would connect -to a different webserver, or to `tag' encrypted traffic and look for -corresponding corrupted traffic at the network edges~\cite{minion-design}. -Tor hampers these attacks by verifying data integrity before it leaves -the network. - -%\textbf{Improved robustness to failed nodes:} A failed node -%in the old design meant that circuit building failed, but thanks to -%Tor's step-by-step circuit building, users notice failed nodes -%while building circuits and route around them. Additionally, liveness -%information from directories allows users to avoid unreliable nodes in -%the first place. -%% Can't really claim this, now that we've found so many variants of -%% attack on partial-circuit-building. -RD - -\textbf{Rendezvous points and hidden services:} -Tor provides an integrated mechanism for responder anonymity via -location-protected servers. Previous Onion Routing designs included -long-lived ``reply onions'' that could be used to build circuits -to a hidden server, but these reply onions did not provide forward -security, and became useless if any node in the path went down -or rotated its keys. In Tor, clients negotiate {\it rendezvous points} -to connect with hidden servers; reply onions are no longer required. - -Unlike Freedom~\cite{freedom2-arch}, Tor does not require OS kernel -patches or network stack support. This prevents us from anonymizing -non-TCP protocols, but has greatly helped our portability and -deployability. - -%Unlike Freedom~\cite{freedom2-arch}, Tor only anonymizes -%TCP-based protocols---not requiring patches (or built-in support) in an -%operating system's network stack has been valuable to Tor's -%portability and deployability. - -We have implemented all of the above features, including rendezvous -points. Our source code is -available under a free license, and Tor -%, as far as we know, is unencumbered by patents. -is not covered by the patent that affected distribution and use of -earlier versions of Onion Routing. -We have deployed a wide-area alpha network -to test the design, to get more experience with usability -and users, and to provide a research platform for experimentation. -As of this writing, the network stands at 32 nodes %in thirteen -%distinct administrative domains -spread over two continents. - -We review previous work in Section~\ref{sec:related-work}, describe -our goals and assumptions in Section~\ref{sec:assumptions}, -and then address the above list of improvements in -Sections~\ref{sec:design},~\ref{sec:rendezvous}, and~\ref{sec:other-design}. -We summarize -in Section~\ref{sec:attacks} how our design stands up to -known attacks, and talk about our early deployment experiences in -Section~\ref{sec:in-the-wild}. We conclude with a list of open problems in -Section~\ref{sec:maintaining-anonymity} and future work for the Onion -Routing project in Section~\ref{sec:conclusion}. - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\section{Related work} -\label{sec:related-work} - -Modern anonymity systems date to Chaum's {\bf Mix-Net} -design~\cite{chaum-mix}. Chaum -proposed hiding the correspondence between sender and recipient by -wrapping messages in layers of public-key cryptography, and relaying them -through a path composed of ``mixes.'' Each mix in turn -decrypts, delays, and re-orders messages before relaying them -onward. -%toward their destinations. - -Subsequent relay-based anonymity designs have diverged in two -main directions. Systems like {\bf Babel}~\cite{babel}, -{\bf Mixmaster}~\cite{mixmaster-spec}, -and {\bf Mixminion}~\cite{minion-design} have tried -to maximize anonymity at the cost of introducing comparatively large and -variable latencies. Because of this decision, these \emph{high-latency} -networks resist strong global adversaries, -but introduce too much lag for interactive tasks like web browsing, -Internet chat, or SSH connections. - -Tor belongs to the second category: \emph{low-latency} designs that -try to anonymize interactive network traffic. These systems handle -a variety of bidirectional protocols. They also provide more convenient -mail delivery than the high-latency anonymous email -networks, because the remote mail server provides explicit and timely -delivery confirmation. But because these designs typically -involve many packets that must be delivered quickly, it is -difficult for them to prevent an attacker who can eavesdrop both ends of the -communication from correlating the timing and volume -of traffic entering the anonymity network with traffic leaving it~\cite{SS03}. -These -protocols are similarly vulnerable to an active adversary who introduces -timing patterns into traffic entering the network and looks -for correlated patterns among exiting traffic. -Although some work has been done to frustrate these attacks, most designs -protect primarily against traffic analysis rather than traffic -confirmation (see Section~\ref{subsec:threat-model}). - -The simplest low-latency designs are single-hop proxies such as the -{\bf Anonymizer}~\cite{anonymizer}: a single trusted server strips the -data's origin before relaying it. These designs are easy to -analyze, but users must trust the anonymizing proxy. -Concentrating the traffic to this single point increases the anonymity set -(the people a given user is hiding among), but it is vulnerable if the -adversary can observe all traffic entering and leaving the proxy. - -More complex are distributed-trust, circuit-based anonymizing systems. -In these designs, a user establishes one or more medium-term bidirectional -end-to-end circuits, and tunnels data in fixed-size cells. -Establishing circuits is computationally expensive and typically -requires public-key -cryptography, whereas relaying cells is comparatively inexpensive and -typically requires only symmetric encryption. -Because a circuit crosses several servers, and each server only knows -the adjacent servers in the circuit, no single server can link a -user to her communication partners. - -The {\bf Java Anon Proxy} (also known as JAP or Web MIXes) uses fixed shared -routes known as \emph{cascades}. As with a single-hop proxy, this -approach aggregates users into larger anonymity sets, but again an -attacker only needs to observe both ends of the cascade to bridge all -the system's traffic. The Java Anon Proxy's design -calls for padding between end users and the head of the -cascade~\cite{web-mix}. However, it is not demonstrated whether the current -implementation's padding policy improves anonymity. - -{\bf PipeNet}~\cite{back01, pipenet}, another low-latency design proposed -around the same time as Onion Routing, gave -stronger anonymity but allowed a single user to shut -down the network by not sending. Systems like {\bf ISDN -mixes}~\cite{isdn-mixes} were designed for other environments with -different assumptions. -%XXX please can we fix this sentence to something less demeaning - -In P2P designs like {\bf Tarzan}~\cite{tarzan:ccs02} and -{\bf MorphMix}~\cite{morphmix:fc04}, all participants both generate -traffic and relay traffic for others. These systems aim to conceal -whether a given peer originated a request -or just relayed it from another peer. While Tarzan and MorphMix use -layered encryption as above, {\bf Crowds}~\cite{crowds-tissec} simply assumes -an adversary who cannot observe the initiator: it uses no public-key -encryption, so any node on a circuit can read users' traffic. - -{\bf Hordes}~\cite{hordes-jcs} is based on Crowds but also uses multicast -responses to hide the initiator. {\bf Herbivore}~\cite{herbivore} and -$\mbox{\bf P}^{\mathbf 5}$~\cite{p5} go even further, requiring broadcast. -These systems are designed primarily for communication among peers, -although Herbivore users can make external connections by -requesting a peer to serve as a proxy. - -Systems like {\bf Freedom} and the original Onion Routing build circuits -all at once, using a layered ``onion'' of public-key encrypted messages, -each layer of which provides session keys and the address of the -next server in the circuit. Tor as described herein, Tarzan, MorphMix, -{\bf Cebolla}~\cite{cebolla}, and Rennhard's {\bf Anonymity Network}~\cite{anonnet} -build circuits -in stages, extending them one hop at a time. -Section~\ref{subsubsec:constructing-a-circuit} describes how this -approach enables perfect forward secrecy. - -Circuit-based designs must choose which protocol layer -to anonymize. They may intercept IP packets directly, and -relay them whole (stripping the source address) along the -circuit~\cite{freedom2-arch,tarzan:ccs02}. Like -Tor, they may accept TCP streams and relay the data in those streams, -ignoring the breakdown of that data into TCP -segments~\cite{morphmix:fc04,anonnet}. Finally, like Crowds, they may accept -application-level protocols such as HTTP and relay the application -requests themselves. -Making this protocol-layer decision requires a compromise between flexibility -and anonymity. For example, a system that understands HTTP -can strip -identifying information from requests, can take advantage of caching -to limit the number of requests that leave the network, and can batch -or encode requests to minimize the number of connections. -On the other hand, an IP-level anonymizer can handle nearly any protocol, -even ones unforeseen by its designers (though these systems require -kernel-level modifications to some operating systems, and so are more -complex and less portable). TCP-level anonymity networks like Tor present -a middle approach: they are application neutral (so long as the -application supports, or can be tunneled across, TCP), but by treating -application connections as data streams rather than raw TCP packets, -they avoid the inefficiencies of tunneling TCP over -TCP. - -Distributed-trust anonymizing systems need to prevent attackers from -adding too many servers and thus compromising user paths. -Tor relies on a small set of well-known directory servers, run by -independent parties, to decide which nodes can -join. Tarzan and MorphMix allow unknown users to run servers, and use -a limited resource (like IP addresses) to prevent an attacker from -controlling too much of the network. Crowds suggests requiring -written, notarized requests from potential crowd members. - -Anonymous communication is essential for censorship-resistant -systems like Eternity~\cite{eternity}, Free~Haven~\cite{freehaven-berk}, -Publius~\cite{publius}, and Tangler~\cite{tangler}. Tor's rendezvous -points enable connections between mutually anonymous entities; they -are a building block for location-hidden servers, which are needed by -Eternity and Free~Haven. - -% didn't include rewebbers. No clear place to put them, so I'll leave -% them out for now. -RD - -\section{Design goals and assumptions} -\label{sec:assumptions} - -\noindent{\large\bf Goals}\\ -Like other low-latency anonymity designs, Tor seeks to frustrate -attackers from linking communication partners, or from linking -multiple communications to or from a single user. Within this -main goal, however, several considerations have directed -Tor's evolution. - -\textbf{Deployability:} The design must be deployed and used in the -real world. Thus it -must not be expensive to run (for example, by requiring more bandwidth -than volunteers are willing to provide); must not place a heavy -liability burden on operators (for example, by allowing attackers to -implicate onion routers in illegal activities); and must not be -difficult or expensive to implement (for example, by requiring kernel -patches, or separate proxies for every protocol). We also cannot -require non-anonymous parties (such as websites) -to run our software. (Our rendezvous point design does not meet -this goal for non-anonymous users talking to hidden servers, -however; see Section~\ref{sec:rendezvous}.) - -\textbf{Usability:} A hard-to-use system has fewer users---and because -anonymity systems hide users among users, a system with fewer users -provides less anonymity. Usability is thus not only a convenience: -it is a security requirement~\cite{econymics,back01}. Tor should -therefore not -require modifying familiar applications; should not introduce prohibitive -delays; -and should require as few configuration decisions -as possible. Finally, Tor should be easily implementable on all common -platforms; we cannot require users to change their operating system -to be anonymous. (Tor currently runs on Win32, Linux, -Solaris, BSD-style Unix, MacOS X, and probably others.) - -\textbf{Flexibility:} The protocol must be flexible and well-specified, -so Tor can serve as a test-bed for future research. -Many of the open problems in low-latency anonymity -networks, such as generating dummy traffic or preventing Sybil -attacks~\cite{sybil}, may be solvable independently from the issues -solved by -Tor. Hopefully future systems will not need to reinvent Tor's design. -%(But note that while a flexible design benefits researchers, -%there is a danger that differing choices of extensions will make users -%distinguishable. Experiments should be run on a separate network.) - -\textbf{Simple design:} The protocol's design and security -parameters must be well-understood. Additional features impose implementation -and complexity costs; adding unproven techniques to the design threatens -deployability, readability, and ease of security analysis. Tor aims to -deploy a simple and stable system that integrates the best accepted -approaches to protecting anonymity.\\ - -\noindent{\large\bf Non-goals}\label{subsec:non-goals}\\ -In favoring simple, deployable designs, we have explicitly deferred -several possible goals, either because they are solved elsewhere, or because -they are not yet solved. - -\textbf{Not peer-to-peer:} Tarzan and MorphMix aim to scale to completely -decentralized peer-to-peer environments with thousands of short-lived -servers, many of which may be controlled by an adversary. This approach -is appealing, but still has many open -problems~\cite{tarzan:ccs02,morphmix:fc04}. - -\textbf{Not secure against end-to-end attacks:} Tor does not claim -to completely solve end-to-end timing or intersection -attacks. Some approaches, such as having users run their own onion routers, -may help; -see Section~\ref{sec:maintaining-anonymity} for more discussion. - -\textbf{No protocol normalization:} Tor does not provide \emph{protocol -normalization} like Privoxy or the Anonymizer. If senders want anonymity from -responders while using complex and variable -protocols like HTTP, Tor must be layered with a filtering proxy such -as Privoxy to hide differences between clients, and expunge protocol -features that leak identity. -Note that by this separation Tor can also provide services that -are anonymous to the network yet authenticated to the responder, like -SSH. Similarly, Tor does not integrate -tunneling for non-stream-based protocols like UDP; this must be -provided by an external service if appropriate. - -\textbf{Not steganographic:} Tor does not try to conceal who is connected -to the network. - -\subsection{Threat Model} -\label{subsec:threat-model} - -A global passive adversary is the most commonly assumed threat when -analyzing theoretical anonymity designs. But like all practical -low-latency systems, Tor does not protect against such a strong -adversary. Instead, we assume an adversary who can observe some fraction -of network traffic; who can generate, modify, delete, or delay -traffic; who can operate onion routers of his own; and who can -compromise some fraction of the onion routers. - -In low-latency anonymity systems that use layered encryption, the -adversary's typical goal is to observe both the initiator and the -responder. By observing both ends, passive attackers can confirm a -suspicion that Alice is -talking to Bob if the timing and volume patterns of the traffic on the -connection are distinct enough; active attackers can induce timing -signatures on the traffic to force distinct patterns. Rather -than focusing on these \emph{traffic confirmation} attacks, -we aim to prevent \emph{traffic -analysis} attacks, where the adversary uses traffic patterns to learn -which points in the network he should attack. - -Our adversary might try to link an initiator Alice with her -communication partners, or try to build a profile of Alice's -behavior. He might mount passive attacks by observing the network edges -and correlating traffic entering and leaving the network---by -relationships in packet timing, volume, or externally visible -user-selected -options. The adversary can also mount active attacks by compromising -routers or keys; by replaying traffic; by selectively denying service -to trustworthy routers to move users to -compromised routers, or denying service to users to see if traffic -elsewhere in the -network stops; or by introducing patterns into traffic that can later be -detected. The adversary might subvert the directory servers to give users -differing views of network state. Additionally, he can try to decrease -the network's reliability by attacking nodes or by performing antisocial -activities from reliable nodes and trying to get them taken down---making -the network unreliable flushes users to other less anonymous -systems, where they may be easier to attack. We summarize -in Section~\ref{sec:attacks} how well the Tor design defends against -each of these attacks. - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\section{The Tor Design} -\label{sec:design} - -The Tor network is an overlay network; each onion router (OR) -runs as a normal -user-level process without any special privileges. -Each onion router maintains a TLS~\cite{TLS} -connection to every other onion router. -%(We discuss alternatives to this clique-topology assumption in -%Section~\ref{sec:maintaining-anonymity}.) -% A subset of the ORs also act as -%directory servers, tracking which routers are in the network; -%see Section~\ref{subsec:dirservers} for directory server details. -Each user -runs local software called an onion proxy (OP) to fetch directories, -establish circuits across the network, -and handle connections from user applications. These onion proxies accept -TCP streams and multiplex them across the circuits. The onion -router on the other side -of the circuit connects to the requested destinations -and relays data. - -Each onion router maintains a long-term identity key and a short-term -onion key. The identity -key is used to sign TLS certificates, to sign the OR's \emph{router -descriptor} (a summary of its keys, address, bandwidth, exit policy, -and so on), and (by directory servers) to sign directories. %Changing -%the identity key of a router is considered equivalent to creating a -%new router. -The onion key is used to decrypt requests -from users to set up a circuit and negotiate ephemeral keys. -The TLS protocol also establishes a short-term link key when communicating -between ORs. Short-term keys are rotated periodically and -independently, to limit the impact of key compromise. - -Section~\ref{subsec:cells} presents the fixed-size -\emph{cells} that are the unit of communication in Tor. We describe -in Section~\ref{subsec:circuits} how circuits are -built, extended, truncated, and destroyed. Section~\ref{subsec:tcp} -describes how TCP streams are routed through the network. We address -integrity checking in Section~\ref{subsec:integrity-checking}, -and resource limiting in Section~\ref{subsec:rate-limit}. -Finally, -Section~\ref{subsec:congestion} talks about congestion control and -fairness issues. - -\subsection{Cells} -\label{subsec:cells} - -Onion routers communicate with one another, and with users' OPs, via -TLS connections with ephemeral keys. Using TLS conceals the data on -the connection with perfect forward secrecy, and prevents an attacker -from modifying data on the wire or impersonating an OR. - -Traffic passes along these connections in fixed-size cells. Each cell -is 512 bytes, %(but see Section~\ref{sec:conclusion} for a discussion of -%allowing large cells and small cells on the same network), -and consists of a header and a payload. The header includes a circuit -identifier (circID) that specifies which circuit the cell refers to -(many circuits can be multiplexed over the single TLS connection), and -a command to describe what to do with the cell's payload. (Circuit -identifiers are connection-specific: each circuit has a different -circID on each OP/OR or OR/OR connection it traverses.) -Based on their command, cells are either \emph{control} cells, which are -always interpreted by the node that receives them, or \emph{relay} cells, -which carry end-to-end stream data. The control cell commands are: -\emph{padding} (currently used for keepalive, but also usable for link -padding); \emph{create} or \emph{created} (used to set up a new circuit); -and \emph{destroy} (to tear down a circuit). - -Relay cells have an additional header (the relay header) at the front -of the payload, containing a streamID (stream identifier: many streams can -be multiplexed over a circuit); an end-to-end checksum for integrity -checking; the length of the relay payload; and a relay command. -The entire contents of the relay header and the relay cell payload -are encrypted or decrypted together as the relay cell moves along the -circuit, using the 128-bit AES cipher in counter mode to generate a -cipher stream. The relay commands are: \emph{relay -data} (for data flowing down the stream), \emph{relay begin} (to open a -stream), \emph{relay end} (to close a stream cleanly), \emph{relay -teardown} (to close a broken stream), \emph{relay connected} -(to notify the OP that a relay begin has succeeded), \emph{relay -extend} and \emph{relay extended} (to extend the circuit by a hop, -and to acknowledge), \emph{relay truncate} and \emph{relay truncated} -(to tear down only part of the circuit, and to acknowledge), \emph{relay -sendme} (used for congestion control), and \emph{relay drop} (used to -implement long-range dummies). -We give a visual overview of cell structure plus the details of relay -cell structure, and then describe each of these cell types and commands -in more detail below. - -%\begin{figure}[h] -%\unitlength=1cm -%\centering -%\begin{picture}(8.0,1.5) -%\put(4,.5){\makebox(0,0)[c]{\epsfig{file=cell-struct,width=7cm}}} -%\end{picture} -%\end{figure} - -\begin{figure}[h] -\centering -\mbox{\epsfig{figure=cell-struct,width=7cm}} -\end{figure} - -\subsection{Circuits and streams} -\label{subsec:circuits} - -Onion Routing originally built one circuit for each -TCP stream. Because building a circuit can take several tenths of a -second (due to public-key cryptography and network latency), -this design imposed high costs on applications like web browsing that -open many TCP streams. - -In Tor, each circuit can be shared by many TCP streams. To avoid -delays, users construct circuits preemptively. To limit linkability -among their streams, users' OPs build a new circuit -periodically if the previous ones have been used, -and expire old used circuits that no longer have any open streams. -OPs consider rotating to a new circuit once a minute: thus -even heavy users spend negligible time -building circuits, but a limited number of requests can be linked -to each other through a given exit node. Also, because circuits are built -in the background, OPs can recover from failed circuit creation -without harming user experience.\\ - -\begin{figure}[h] -\centering -\mbox{\epsfig{figure=interaction,width=8.75cm}} -\caption{Alice builds a two-hop circuit and begins fetching a web page.} -\label{fig:interaction} -\end{figure} - -\noindent{\large\bf Constructing a circuit}\label{subsubsec:constructing-a-circuit}\\ -%\subsubsection{Constructing a circuit} -A user's OP constructs circuits incrementally, negotiating a -symmetric key with each OR on the circuit, one hop at a time. To begin -creating a new circuit, the OP (call her Alice) sends a -\emph{create} cell to the first node in her chosen path (call him Bob). -(She chooses a new -circID $C_{AB}$ not currently used on the connection from her to Bob.) -The \emph{create} cell's -payload contains the first half of the Diffie-Hellman handshake -($g^x$), encrypted to the onion key of Bob. Bob -responds with a \emph{created} cell containing $g^y$ -along with a hash of the negotiated key $K=g^{xy}$. - -Once the circuit has been established, Alice and Bob can send one -another relay cells encrypted with the negotiated -key.\footnote{Actually, the negotiated key is used to derive two - symmetric keys: one for each direction.} More detail is given in -the next section. - -To extend the circuit further, Alice sends a \emph{relay extend} cell -to Bob, specifying the address of the next OR (call her Carol), and -an encrypted $g^{x_2}$ for her. Bob copies the half-handshake into a -\emph{create} cell, and passes it to Carol to extend the circuit. -(Bob chooses a new circID $C_{BC}$ not currently used on the connection -between him and Carol. Alice never needs to know this circID; only Bob -associates $C_{AB}$ on his connection with Alice to $C_{BC}$ on -his connection with Carol.) -When Carol responds with a \emph{created} cell, Bob wraps the payload -into a \emph{relay extended} cell and passes it back to Alice. Now -the circuit is extended to Carol, and Alice and Carol share a common key -$K_2 = g^{x_2 y_2}$. - -To extend the circuit to a third node or beyond, Alice -proceeds as above, always telling the last node in the circuit to -extend one hop further. - -This circuit-level handshake protocol achieves unilateral entity -authentication (Alice knows she's handshaking with the OR, but -the OR doesn't care who is opening the circuit---Alice uses no public key -and remains anonymous) and unilateral key authentication -(Alice and the OR agree on a key, and Alice knows only the OR learns -it). It also achieves forward -secrecy and key freshness. More formally, the protocol is as follows -(where $E_{PK_{Bob}}(\cdot)$ is encryption with Bob's public key, -$H$ is a secure hash function, and $|$ is concatenation): -\begin{equation*} -\begin{aligned} -\mathrm{Alice} \rightarrow \mathrm{Bob}&: E_{PK_{Bob}}(g^x) \\ -\mathrm{Bob} \rightarrow \mathrm{Alice}&: g^y, H(K | \mathrm{``handshake"}) \\ -\end{aligned} -\end{equation*} - -\noindent In the second step, Bob proves that it was he who received $g^x$, -and who chose $y$. We use PK encryption in the first step -(rather than, say, using the first two steps of STS, which has a -signature in the second step) because a single cell is too small to -hold both a public key and a signature. Preliminary analysis with the -NRL protocol analyzer~\cite{meadows96} shows this protocol to be -secure (including perfect forward secrecy) under the -traditional Dolev-Yao model.\\ - -\noindent{\large\bf Relay cells}\\ -%\subsubsection{Relay cells} -% -Once Alice has established the circuit (so she shares keys with each -OR on the circuit), she can send relay cells. -%Recall that every relay cell has a streamID that indicates to which -%stream the cell belongs. %This streamID allows a relay cell to be -%addressed to any OR on the circuit. -Upon receiving a relay -cell, an OR looks up the corresponding circuit, and decrypts the relay -header and payload with the session key for that circuit. -If the cell is headed away from Alice the OR then checks whether the -decrypted cell has a valid digest (as an optimization, the first -two bytes of the integrity check are zero, so in most cases we can avoid -computing the hash). -%is recognized---either because it -%corresponds to an open stream at this OR for the given circuit, or because -%it is the control streamID (zero). -If valid, it accepts the relay cell and processes it as described -below. Otherwise, -the OR looks up the circID and OR for the -next step in the circuit, replaces the circID as appropriate, and -sends the decrypted relay cell to the next OR. (If the OR at the end -of the circuit receives an unrecognized relay cell, an error has -occurred, and the circuit is torn down.) - -OPs treat incoming relay cells similarly: they iteratively unwrap the -relay header and payload with the session keys shared with each -OR on the circuit, from the closest to farthest. -If at any stage the digest is valid, the cell must have -originated at the OR whose encryption has just been removed. - -To construct a relay cell addressed to a given OR, Alice assigns the -digest, and then iteratively -encrypts the cell payload (that is, the relay header and payload) with -the symmetric key of each hop up to that OR. Because the digest is -encrypted to a different value at each step, only at the targeted OR -will it have a meaningful value.\footnote{ - % Should we just say that 2^56 is itself negligible? - % Assuming 4-hop circuits with 10 streams per hop, there are 33 - % possible bad streamIDs before the last circuit. This still - % gives an error only once every 2 million terabytes (approx). -With 48 bits of digest per cell, the probability of an accidental -collision is far lower than the chance of hardware failure.} -This \emph{leaky pipe} circuit topology -allows Alice's streams to exit at different ORs on a single circuit. -Alice may choose different exit points because of their exit policies, -or to keep the ORs from knowing that two streams -originate from the same person. - -When an OR later replies to Alice with a relay cell, it -encrypts the cell's relay header and payload with the single key it -shares with Alice, and sends the cell back toward Alice along the -circuit. Subsequent ORs add further layers of encryption as they -relay the cell back to Alice. - -To tear down a circuit, Alice sends a \emph{destroy} control -cell. Each OR in the circuit receives the \emph{destroy} cell, closes -all streams on that circuit, and passes a new \emph{destroy} cell -forward. But just as circuits are built incrementally, they can also -be torn down incrementally: Alice can send a \emph{relay -truncate} cell to a single OR on a circuit. That OR then sends a -\emph{destroy} cell forward, and acknowledges with a -\emph{relay truncated} cell. Alice can then extend the circuit to -different nodes, without signaling to the intermediate nodes (or -a limited observer) that she has changed her circuit. -Similarly, if a node on the circuit goes down, the adjacent -node can send a \emph{relay truncated} cell back to Alice. Thus the -``break a node and see which circuits go down'' -attack~\cite{freedom21-security} is weakened. - -\subsection{Opening and closing streams} -\label{subsec:tcp} - -When Alice's application wants a TCP connection to a given -address and port, it asks the OP (via SOCKS) to make the -connection. The OP chooses the newest open circuit (or creates one if -needed), and chooses a suitable OR on that circuit to be the -exit node (usually the last node, but maybe others due to exit policy -conflicts; see Section~\ref{subsec:exitpolicies}.) The OP then opens -the stream by sending a \emph{relay begin} cell to the exit node, -using a new random streamID. Once the -exit node connects to the remote host, it responds -with a \emph{relay connected} cell. Upon receipt, the OP sends a -SOCKS reply to notify the application of its success. The OP -now accepts data from the application's TCP stream, packaging it into -\emph{relay data} cells and sending those cells along the circuit to -the chosen OR. - -There's a catch to using SOCKS, however---some applications pass the -alphanumeric hostname to the Tor client, while others resolve it into -an IP address first and then pass the IP address to the Tor client. If -the application does DNS resolution first, Alice thereby reveals her -destination to the remote DNS server, rather than sending the hostname -through the Tor network to be resolved at the far end. Common applications -like Mozilla and SSH have this flaw. - -With Mozilla, the flaw is easy to address: the filtering HTTP -proxy called Privoxy gives a hostname to the Tor client, so Alice's -computer never does DNS resolution. -But a portable general solution, such as is needed for -SSH, is -an open problem. Modifying or replacing the local nameserver -can be invasive, brittle, and unportable. Forcing the resolver -library to prefer TCP rather than UDP is hard, and also has -portability problems. Dynamically intercepting system calls to the -resolver library seems a promising direction. We could also provide -a tool similar to \emph{dig} to perform a private lookup through the -Tor network. Currently, we encourage the use of privacy-aware proxies -like Privoxy wherever possible. - -Closing a Tor stream is analogous to closing a TCP stream: it uses a -two-step handshake for normal operation, or a one-step handshake for -errors. If the stream closes abnormally, the adjacent node simply sends a -\emph{relay teardown} cell. If the stream closes normally, the node sends -a \emph{relay end} cell down the circuit, and the other side responds with -its own \emph{relay end} cell. Because -all relay cells use layered encryption, only the destination OR knows -that a given relay cell is a request to close a stream. This two-step -handshake allows Tor to support TCP-based applications that use half-closed -connections. -% such as broken HTTP clients that close their side of the -%stream after writing but are still willing to read. - -\subsection{Integrity checking on streams} -\label{subsec:integrity-checking} - -Because the old Onion Routing design used a stream cipher without integrity -checking, traffic was -vulnerable to a malleability attack: though the attacker could not -decrypt cells, any changes to encrypted data -would create corresponding changes to the data leaving the network. -This weakness allowed an adversary who could guess the encrypted content -to change a padding cell to a destroy -cell; change the destination address in a \emph{relay begin} cell to the -adversary's webserver; or change an FTP command from -{\tt dir} to {\tt rm~*}. (Even an external -adversary could do this, because the link encryption similarly used a -stream cipher.) - -Because Tor uses TLS on its links, external adversaries cannot modify -data. Addressing the insider malleability attack, however, is -more complex. - -We could do integrity checking of the relay cells at each hop, either -by including hashes or by using an authenticating cipher mode like -EAX~\cite{eax}, but there are some problems. First, these approaches -impose a message-expansion overhead at each hop, and so we would have to -either leak the path length or waste bytes by padding to a maximum -path length. Second, these solutions can only verify traffic coming -from Alice: ORs would not be able to produce suitable hashes for -the intermediate hops, since the ORs on a circuit do not know the -other ORs' session keys. Third, we have already accepted that our design -is vulnerable to end-to-end timing attacks; so tagging attacks performed -within the circuit provide no additional information to the attacker. - -Thus, we check integrity only at the edges of each stream. (Remember that -in our leaky-pipe circuit topology, a stream's edge could be any hop -in the circuit.) When Alice -negotiates a key with a new hop, they each initialize a SHA-1 -digest with a derivative of that key, -thus beginning with randomness that only the two of them know. -Then they each incrementally add to the SHA-1 digest the contents of -all relay cells they create, and include with each relay cell the -first four bytes of the current digest. Each also keeps a SHA-1 -digest of data received, to verify that the received hashes are correct. - -To be sure of removing or modifying a cell, the attacker must be able -to deduce the current digest state (which depends on all -traffic between Alice and Bob, starting with their negotiated key). -Attacks on SHA-1 where the adversary can incrementally add to a hash -to produce a new valid hash don't work, because all hashes are -end-to-end encrypted across the circuit. The computational overhead -of computing the digests is minimal compared to doing the AES -encryption performed at each hop of the circuit. We use only four -bytes per cell to minimize overhead; the chance that an adversary will -correctly guess a valid hash -%, plus the payload the current cell, -is -acceptably low, given that the OP or OR tear down the circuit if they -receive a bad hash. - -\subsection{Rate limiting and fairness} -\label{subsec:rate-limit} - -Volunteers are more willing to run services that can limit -their bandwidth usage. To accommodate them, Tor servers use a -token bucket approach~\cite{tannenbaum96} to -enforce a long-term average rate of incoming bytes, while still -permitting short-term bursts above the allowed bandwidth. -% Current bucket sizes are set to ten seconds' worth of traffic. - -%Further, we want to avoid starving any Tor streams. Entire circuits -%could starve if we read greedily from connections and one connection -%uses all the remaining bandwidth. We solve this by dividing the number -%of tokens in the bucket by the number of connections that want to read, -%and reading at most that number of bytes from each connection. We iterate -%this procedure until the number of tokens in the bucket is under some -%threshold (currently 10KB), at which point we greedily read from connections. - -Because the Tor protocol outputs about the same number of bytes as it -takes in, it is sufficient in practice to limit only incoming bytes. -With TCP streams, however, the correspondence is not one-to-one: -relaying a single incoming byte can require an entire 512-byte cell. -(We can't just wait for more bytes, because the local application may -be awaiting a reply.) Therefore, we treat this case as if the entire -cell size had been read, regardless of the cell's fullness. - -Further, inspired by Rennhard et al's design in~\cite{anonnet}, a -circuit's edges can heuristically distinguish interactive streams from bulk -streams by comparing the frequency with which they supply cells. We can -provide good latency for interactive streams by giving them preferential -service, while still giving good overall throughput to the bulk -streams. Such preferential treatment presents a possible end-to-end -attack, but an adversary observing both -ends of the stream can already learn this information through timing -attacks. - -\subsection{Congestion control} -\label{subsec:congestion} - -Even with bandwidth rate limiting, we still need to worry about -congestion, either accidental or intentional. If enough users choose the -same OR-to-OR connection for their circuits, that connection can become -saturated. For example, an attacker could send a large file -through the Tor network to a webserver he runs, and then -refuse to read any of the bytes at the webserver end of the -circuit. Without some congestion control mechanism, these bottlenecks -can propagate back through the entire network. We don't need to -reimplement full TCP windows (with sequence numbers, -the ability to drop cells when we're full and retransmit later, and so -on), -because TCP already guarantees in-order delivery of each -cell. -%But we need to investigate further the effects of the current -%parameters on throughput and latency, while also keeping privacy in mind; -%see Section~\ref{sec:maintaining-anonymity} for more discussion. -We describe our response below. - -\textbf{Circuit-level throttling:} -To control a circuit's bandwidth usage, each OR keeps track of two -windows. The \emph{packaging window} tracks how many relay data cells the OR is -allowed to package (from incoming TCP streams) for transmission back to the OP, -and the \emph{delivery window} tracks how many relay data cells it is willing -to deliver to TCP streams outside the network. Each window is initialized -(say, to 1000 data cells). When a data cell is packaged or delivered, -the appropriate window is decremented. When an OR has received enough -data cells (currently 100), it sends a \emph{relay sendme} cell towards the OP, -with streamID zero. When an OR receives a \emph{relay sendme} cell with -streamID zero, it increments its packaging window. Either of these cells -increments the corresponding window by 100. If the packaging window -reaches 0, the OR stops reading from TCP connections for all streams -on the corresponding circuit, and sends no more relay data cells until -receiving a \emph{relay sendme} cell. - -The OP behaves identically, except that it must track a packaging window -and a delivery window for every OR in the circuit. If a packaging window -reaches 0, it stops reading from streams destined for that OR. - -\textbf{Stream-level throttling}: -The stream-level congestion control mechanism is similar to the -circuit-level mechanism. ORs and OPs use \emph{relay sendme} cells -to implement end-to-end flow control for individual streams across -circuits. Each stream begins with a packaging window (currently 500 cells), -and increments the window by a fixed value (50) upon receiving a \emph{relay -sendme} cell. Rather than always returning a \emph{relay sendme} cell as soon -as enough cells have arrived, the stream-level congestion control also -has to check whether data has been successfully flushed onto the TCP -stream; it sends the \emph{relay sendme} cell only when the number of bytes pending -to be flushed is under some threshold (currently 10 cells' worth). - -%% Maybe omit this next paragraph. -NM -%Currently, non-data relay cells do not affect the windows. Thus we -%avoid potential deadlock issues, for example, arising because a stream -%can't send a \emph{relay sendme} cell when its packaging window is empty. - -These arbitrarily chosen parameters seem to give tolerable throughput -and delay; see Section~\ref{sec:in-the-wild}. - -\section{Rendezvous Points and hidden services} -\label{sec:rendezvous} - -Rendezvous points are a building block for \emph{location-hidden -services} (also known as \emph{responder anonymity}) in the Tor -network. Location-hidden services allow Bob to offer a TCP -service, such as a webserver, without revealing his IP address. -This type of anonymity protects against distributed DoS attacks: -attackers are forced to attack the onion routing network -because they do not know Bob's IP address. - -Our design for location-hidden servers has the following goals. -\textbf{Access-control:} Bob needs a way to filter incoming requests, -so an attacker cannot flood Bob simply by making many connections to him. -\textbf{Robustness:} Bob should be able to maintain a long-term pseudonymous -identity even in the presence of router failure. Bob's service must -not be tied to a single OR, and Bob must be able to migrate his service -across ORs. \textbf{Smear-resistance:} -A social attacker -should not be able to ``frame'' a rendezvous router by -offering an illegal or disreputable location-hidden service and -making observers believe the router created that service. -\textbf{Application-transparency:} Although we require users -to run special software to access location-hidden servers, we must not -require them to modify their applications. - -We provide location-hiding for Bob by allowing him to advertise -several onion routers (his \emph{introduction points}) as contact -points. He may do this on any robust efficient -key-value lookup system with authenticated updates, such as a -distributed hash table (DHT) like CFS~\cite{cfs:sosp01}.\footnote{ -Rather than rely on an external infrastructure, the Onion Routing network -can run the lookup service itself. Our current implementation provides a -simple lookup system on the -directory servers.} Alice, the client, chooses an OR as her -\emph{rendezvous point}. She connects to one of Bob's introduction -points, informs him of her rendezvous point, and then waits for him -to connect to the rendezvous point. This extra level of indirection -helps Bob's introduction points avoid problems associated with serving -unpopular files directly (for example, if Bob serves -material that the introduction point's community finds objectionable, -or if Bob's service tends to get attacked by network vandals). -The extra level of indirection also allows Bob to respond to some requests -and ignore others. - -\subsection{Rendezvous points in Tor} - -The following steps are -%We give an overview of the steps of a rendezvous. These are -performed on behalf of Alice and Bob by their local OPs; -application integration is described more fully below. - -\begin{tightlist} -\item Bob generates a long-term public key pair to identify his service. -\item Bob chooses some introduction points, and advertises them on - the lookup service, signing the advertisement with his public key. He - can add more later. -\item Bob builds a circuit to each of his introduction points, and tells - them to wait for requests. -\item Alice learns about Bob's service out of band (perhaps Bob told her, - or she found it on a website). She retrieves the details of Bob's - service from the lookup service. If Alice wants to access Bob's - service anonymously, she must connect to the lookup service via Tor. -\item Alice chooses an OR as the rendezvous point (RP) for her connection to - Bob's service. She builds a circuit to the RP, and gives it a - randomly chosen ``rendezvous cookie'' to recognize Bob. -\item Alice opens an anonymous stream to one of Bob's introduction - points, and gives it a message (encrypted with Bob's public key) - telling it about herself, - her RP and rendezvous cookie, and the - start of a DH - handshake. The introduction point sends the message to Bob. -\item If Bob wants to talk to Alice, he builds a circuit to Alice's - RP and sends the rendezvous cookie, the second half of the DH - handshake, and a hash of the session - key they now share. By the same argument as in - Section~\ref{subsubsec:constructing-a-circuit}, Alice knows she - shares the key only with Bob. -\item The RP connects Alice's circuit to Bob's. Note that RP can't - recognize Alice, Bob, or the data they transmit. -\item Alice sends a \emph{relay begin} cell along the circuit. It - arrives at Bob's OP, which connects to Bob's - webserver. -\item An anonymous stream has been established, and Alice and Bob - communicate as normal. -\end{tightlist} - -When establishing an introduction point, Bob provides the onion router -with the public key identifying his service. Bob signs his -messages, so others cannot usurp his introduction point -in the future. He uses the same public key to establish the other -introduction points for his service, and periodically refreshes his -entry in the lookup service. - -The message that Alice gives -the introduction point includes a hash of Bob's public key % to identify -%the service, along with -and an optional initial authorization token (the -introduction point can do prescreening, for example to block replays). Her -message to Bob may include an end-to-end authorization token so Bob -can choose whether to respond. -The authorization tokens can be used to provide selective access: -important users can get uninterrupted access. -%important users get tokens to ensure uninterrupted access. %to the -%service. -During normal situations, Bob's service might simply be offered -directly from mirrors, while Bob gives out tokens to high-priority users. If -the mirrors are knocked down, -%by distributed DoS attacks or even -%physical attack, -those users can switch to accessing Bob's service via -the Tor rendezvous system. - -Bob's introduction points are themselves subject to DoS---he must -open many introduction points or risk such an attack. -He can provide selected users with a current list or future schedule of -unadvertised introduction points; -this is most practical -if there is a stable and large group of introduction points -available. Bob could also give secret public keys -for consulting the lookup service. All of these approaches -limit exposure even when -some selected users collude in the DoS\@. - -\subsection{Integration with user applications} - -Bob configures his onion proxy to know the local IP address and port of his -service, a strategy for authorizing clients, and his public key. The onion -proxy anonymously publishes a signed statement of Bob's -public key, an expiration time, and -the current introduction points for his service onto the lookup service, -indexed -by the hash of his public key. Bob's webserver is unmodified, -and doesn't even know that it's hidden behind the Tor network. - -Alice's applications also work unchanged---her client interface -remains a SOCKS proxy. We encode all of the necessary information -into the fully qualified domain name (FQDN) Alice uses when establishing her -connection. Location-hidden services use a virtual top level domain -called {\tt .onion}: thus hostnames take the form {\tt x.y.onion} where -{\tt x} is the authorization cookie and {\tt y} encodes the hash of -the public key. Alice's onion proxy -examines addresses; if they're destined for a hidden server, it decodes -the key and starts the rendezvous as described above. - -\subsection{Previous rendezvous work} -%XXXX Should this get integrated into the earlier related work section? -NM - -Rendezvous points in low-latency anonymity systems were first -described for use in ISDN telephony~\cite{jerichow-jsac98,isdn-mixes}. -Later low-latency designs used rendezvous points for hiding location -of mobile phones and low-power location -trackers~\cite{federrath-ih96,reed-protocols97}. Rendezvous for -anonymizing low-latency -Internet connections was suggested in early Onion Routing -work~\cite{or-ih96}, but the first published design was by Ian -Goldberg~\cite{ian-thesis}. His design differs from -ours in three ways. First, Goldberg suggests that Alice should manually -hunt down a current location of the service via Gnutella; our approach -makes lookup transparent to the user, as well as faster and more robust. -Second, in Tor the client and server negotiate session keys -with Diffie-Hellman, so plaintext is not exposed even at the rendezvous -point. Third, -our design minimizes the exposure from running the -service, to encourage volunteers to offer introduction and rendezvous -services. Tor's introduction points do not output any bytes to the -clients; the rendezvous points don't know the client or the server, -and can't read the data being transmitted. The indirection scheme is -also designed to include authentication/authorization---if Alice doesn't -include the right cookie with her request for service, Bob need not even -acknowledge his existence. - -\section{Other design decisions} -\label{sec:other-design} - -\subsection{Denial of service} -\label{subsec:dos} - -Providing Tor as a public service creates many opportunities for -denial-of-service attacks against the network. While -flow control and rate limiting (discussed in -Section~\ref{subsec:congestion}) prevent users from consuming more -bandwidth than routers are willing to provide, opportunities remain for -users to -consume more network resources than their fair share, or to render the -network unusable for others. - -First of all, there are several CPU-consuming denial-of-service -attacks wherein an attacker can force an OR to perform expensive -cryptographic operations. For example, an attacker can -%\emph{create} cell full of junk bytes can force an OR to perform an RSA -%decrypt. -%Similarly, an attacker can -fake the start of a TLS handshake, forcing the OR to carry out its -(comparatively expensive) half of the handshake at no real computational -cost to the attacker. - -We have not yet implemented any defenses for these attacks, but several -approaches are possible. First, ORs can -require clients to solve a puzzle~\cite{puzzles-tls} while beginning new -TLS handshakes or accepting \emph{create} cells. So long as these -tokens are easy to verify and computationally expensive to produce, this -approach limits the attack multiplier. Additionally, ORs can limit -the rate at which they accept \emph{create} cells and TLS connections, -so that -the computational work of processing them does not drown out the -symmetric cryptography operations that keep cells -flowing. This rate limiting could, however, allow an attacker -to slow down other users when they build new circuits. - -% What about link-to-link rate limiting? - -Adversaries can also attack the Tor network's hosts and network -links. Disrupting a single circuit or link breaks all streams passing -along that part of the circuit. Users similarly lose service -when a router crashes or its operator restarts it. The current -Tor design treats such attacks as intermittent network failures, and -depends on users and applications to respond or recover as appropriate. A -future design could use an end-to-end TCP-like acknowledgment protocol, -so no streams are lost unless the entry or exit point is -disrupted. This solution would require more buffering at the network -edges, however, and the performance and anonymity implications from this -extra complexity still require investigation. - -\subsection{Exit policies and abuse} -\label{subsec:exitpolicies} - -% originally, we planned to put the "users only know the hostname, -% not the IP, but exit policies are by IP" problem here too. Not -% worth putting in the submission, but worth thinking about putting -% in sometime somehow. -RD - -Exit abuse is a serious barrier to wide-scale Tor deployment. Anonymity -presents would-be vandals and abusers with an opportunity to hide -the origins of their activities. Attackers can harm the Tor network by -implicating exit servers for their abuse. Also, applications that commonly -use IP-based authentication (such as institutional mail or webservers) -can be fooled by the fact that anonymous connections appear to originate -at the exit OR. - -We stress that Tor does not enable any new class of abuse. Spammers -and other attackers already have access to thousands of misconfigured -systems worldwide, and the Tor network is far from the easiest way -to launch attacks. -%Indeed, because of its limited -%anonymity, Tor is probably not a good way to commit crimes. -But because the -onion routers can be mistaken for the originators of the abuse, -and the volunteers who run them may not want to deal with the hassle of -explaining anonymity networks to irate administrators, we must block or limit -abuse through the Tor network. - -To mitigate abuse issues, each onion router's \emph{exit policy} -describes to which external addresses and ports the router will -connect. On one end of the spectrum are \emph{open exit} -nodes that will connect anywhere. On the other end are \emph{middleman} -nodes that only relay traffic to other Tor nodes, and \emph{private exit} -nodes that only connect to a local host or network. A private -exit can allow a client to connect to a given host or -network more securely---an external adversary cannot eavesdrop traffic -between the private exit and the final destination, and so is less sure of -Alice's destination and activities. Most onion routers in the current -network function as -\emph{restricted exits} that permit connections to the world at large, -but prevent access to certain abuse-prone addresses and services such -as SMTP. -The OR might also be able to authenticate clients to -prevent exit abuse without harming anonymity~\cite{or-discex00}. - -%The abuse issues on closed (e.g. military) networks are different -%from the abuse on open networks like the Internet. While these IP-based -%access controls are still commonplace on the Internet, on closed networks, -%nearly all participants will be honest, and end-to-end authentication -%can be assumed for important traffic. - -Many administrators use port restrictions to support only a -limited set of services, such as HTTP, SSH, or AIM. -This is not a complete solution, of course, since abuse opportunities for these -protocols are still well known. - -We have not yet encountered any abuse in the deployed network, but if -we do we should consider using proxies to clean traffic for certain -protocols as it leaves the network. For example, much abusive HTTP -behavior (such as exploiting buffer overflows or well-known script -vulnerabilities) can be detected in a straightforward manner. -Similarly, one could run automatic spam filtering software (such as -SpamAssassin) on email exiting the OR network. - -ORs may also rewrite exiting traffic to append -headers or other information indicating that the traffic has passed -through an anonymity service. This approach is commonly used -by email-only anonymity systems. ORs can also -run on servers with hostnames like {\tt anonymous} to further -alert abuse targets to the nature of the anonymous traffic. - -A mixture of open and restricted exit nodes allows the most -flexibility for volunteers running servers. But while having many -middleman nodes provides a large and robust network, -having only a few exit nodes reduces the number of points -an adversary needs to monitor for traffic analysis, and places a -greater burden on the exit nodes. This tension can be seen in the -Java Anon Proxy -cascade model, wherein only one node in each cascade needs to handle -abuse complaints---but an adversary only needs to observe the entry -and exit of a cascade to perform traffic analysis on all that -cascade's users. The hydra model (many entries, few exits) presents a -different compromise: only a few exit nodes are needed, but an -adversary needs to work harder to watch all the clients; see -Section~\ref{sec:conclusion}. - -Finally, we note that exit abuse must not be dismissed as a peripheral -issue: when a system's public image suffers, it can reduce the number -and diversity of that system's users, and thereby reduce the anonymity -of the system itself. Like usability, public perception is a -security parameter. Sadly, preventing abuse of open exit nodes is an -unsolved problem, and will probably remain an arms race for the -foreseeable future. The abuse problems faced by Princeton's CoDeeN -project~\cite{darkside} give us a glimpse of likely issues. - -\subsection{Directory Servers} -\label{subsec:dirservers} - -First-generation Onion Routing designs~\cite{freedom2-arch,or-jsac98} used -in-band network status updates: each router flooded a signed statement -to its neighbors, which propagated it onward. But anonymizing networks -have different security goals than typical link-state routing protocols. -For example, delays (accidental or intentional) -that can cause different parts of the network to have different views -of link-state and topology are not only inconvenient: they give -attackers an opportunity to exploit differences in client knowledge. -We also worry about attacks to deceive a -client about the router membership list, topology, or current network -state. Such \emph{partitioning attacks} on client knowledge help an -adversary to efficiently deploy resources -against a target~\cite{minion-design}. - -Tor uses a small group of redundant, well-known onion routers to -track changes in network topology and node state, including keys and -exit policies. Each such \emph{directory server} acts as an HTTP -server, so clients can fetch current network state -and router lists, and so other ORs can upload -state information. Onion routers periodically publish signed -statements of their state to each directory server. The directory servers -combine this information with their own views of network liveness, -and generate a signed description (a \emph{directory}) of the entire -network state. Client software is -pre-loaded with a list of the directory servers and their keys, -to bootstrap each client's view of the network. -% XXX this means that clients will be forced to upgrade as the -% XXX dirservers change or get compromised. argue that this is ok. - -When a directory server receives a signed statement for an OR, it -checks whether the OR's identity key is recognized. Directory -servers do not advertise unrecognized ORs---if they did, -an adversary could take over the network by creating many -servers~\cite{sybil}. Instead, new nodes must be approved by the -directory -server administrator before they are included. Mechanisms for automated -node approval are an area of active research, and are discussed more -in Section~\ref{sec:maintaining-anonymity}. - -Of course, a variety of attacks remain. An adversary who controls -a directory server can track clients by providing them different -information---perhaps by listing only nodes under its control, or by -informing only certain clients about a given node. Even an external -adversary can exploit differences in client knowledge: clients who use -a node listed on one directory server but not the others are vulnerable. - -Thus these directory servers must be synchronized and redundant, so -that they can agree on a common directory. Clients should only trust -this directory if it is signed by a threshold of the directory -servers. - -The directory servers in Tor are modeled after those in -Mixminion~\cite{minion-design}, but our situation is easier. First, -we make the -simplifying assumption that all participants agree on the set of -directory servers. Second, while Mixminion needs to predict node -behavior, Tor only needs a threshold consensus of the current -state of the network. Third, we assume that we can fall back to the -human administrators to discover and resolve problems when a consensus -directory cannot be reached. Since there are relatively few directory -servers (currently 3, but we expect as many as 9 as the network scales), -we can afford operations like broadcast to simplify the consensus-building -protocol. - -To avoid attacks where a router connects to all the directory servers -but refuses to relay traffic from other routers, the directory servers -must also build circuits and use them to anonymously test router -reliability~\cite{mix-acc}. Unfortunately, this defense is not yet -designed or -implemented. - -Using directory servers is simpler and more flexible than flooding. -Flooding is expensive, and complicates the analysis when we -start experimenting with non-clique network topologies. Signed -directories can be cached by other -onion routers, -so directory servers are not a performance -bottleneck when we have many users, and do not aid traffic analysis by -forcing clients to announce their existence to any -central point. - -\section{Attacks and Defenses} -\label{sec:attacks} - -Below we summarize a variety of attacks, and discuss how well our -design withstands them.\\ - -\noindent{\large\bf Passive attacks}\\ -\emph{Observing user traffic patterns.} Observing a user's connection -will not reveal her destination or data, but it will -reveal traffic patterns (both sent and received). Profiling via user -connection patterns requires further processing, because multiple -application streams may be operating simultaneously or in series over -a single circuit. - -\emph{Observing user content.} While content at the user end is encrypted, -connections to responders may not be (indeed, the responding website -itself may be hostile). While filtering content is not a primary goal -of Onion Routing, Tor can directly use Privoxy and related -filtering services to anonymize application data streams. - -\emph{Option distinguishability.} We allow clients to choose -configuration options. For example, clients concerned about request -linkability should rotate circuits more often than those concerned -about traceability. Allowing choice may attract users with different -%There is economic incentive to attract users by -%allowing this choice; -needs; but clients who are -in the minority may lose more anonymity by appearing distinct than they -gain by optimizing their behavior~\cite{econymics}. - -\emph{End-to-end timing correlation.} Tor only minimally hides -such correlations. An attacker watching patterns of -traffic at the initiator and the responder will be -able to confirm the correspondence with high probability. The -greatest protection currently available against such confirmation is to hide -the connection between the onion proxy and the first Tor node, -by running the OP on the Tor node or behind a firewall. This approach -requires an observer to separate traffic originating at the onion -router from traffic passing through it: a global observer can do this, -but it might be beyond a limited observer's capabilities. - -\emph{End-to-end size correlation.} Simple packet counting -will also be effective in confirming -endpoints of a stream. However, even without padding, we may have some -limited protection: the leaky pipe topology means different numbers -of packets may enter one end of a circuit than exit at the other. - -\emph{Website fingerprinting.} All the effective passive -attacks above are traffic confirmation attacks, -which puts them outside our design goals. There is also -a passive traffic analysis attack that is potentially effective. -Rather than searching exit connections for timing and volume -correlations, the adversary may build up a database of -``fingerprints'' containing file sizes and access patterns for -targeted websites. He can later confirm a user's connection to a given -site simply by consulting the database. This attack has -been shown to be effective against SafeWeb~\cite{hintz-pet02}. -It may be less effective against Tor, since -streams are multiplexed within the same circuit, and -fingerprinting will be limited to -the granularity of cells (currently 512 bytes). Additional -defenses could include -larger cell sizes, padding schemes to group websites -into large sets, and link -padding or long-range dummies.\footnote{Note that this fingerprinting -attack should not be confused with the much more complicated latency -attacks of~\cite{back01}, which require a fingerprint of the latencies -of all circuits through the network, combined with those from the -network edges to the target user and the responder website.}\\ - -\noindent{\large\bf Active attacks}\\ -\emph{Compromise keys.} An attacker who learns the TLS session key can -see control cells and encrypted relay cells on every circuit on that -connection; learning a circuit -session key lets him unwrap one layer of the encryption. An attacker -who learns an OR's TLS private key can impersonate that OR for the TLS -key's lifetime, but he must -also learn the onion key to decrypt \emph{create} cells (and because of -perfect forward secrecy, he cannot hijack already established circuits -without also compromising their session keys). Periodic key rotation -limits the window of opportunity for these attacks. On the other hand, -an attacker who learns a node's identity key can replace that node -indefinitely by sending new forged descriptors to the directory servers. - -\emph{Iterated compromise.} A roving adversary who can -compromise ORs (by system intrusion, legal coercion, or extralegal -coercion) could march down the circuit compromising the -nodes until he reaches the end. Unless the adversary can complete -this attack within the lifetime of the circuit, however, the ORs -will have discarded the necessary information before the attack can -be completed. (Thanks to the perfect forward secrecy of session -keys, the attacker cannot force nodes to decrypt recorded -traffic once the circuits have been closed.) Additionally, building -circuits that cross jurisdictions can make legal coercion -harder---this phenomenon is commonly called ``jurisdictional -arbitrage.'' The Java Anon Proxy project recently experienced the -need for this approach, when -a German court forced them to add a backdoor to -their nodes~\cite{jap-backdoor}. - -\emph{Run a recipient.} An adversary running a webserver -trivially learns the timing patterns of users connecting to it, and -can introduce arbitrary patterns in its responses. -End-to-end attacks become easier: if the adversary can induce -users to connect to his webserver (perhaps by advertising -content targeted to those users), he now holds one end of their -connection. There is also a danger that application -protocols and associated programs can be induced to reveal information -about the initiator. Tor depends on Privoxy and similar protocol cleaners -to solve this latter problem. - -\emph{Run an onion proxy.} It is expected that end users will -nearly always run their own local onion proxy. However, in some -settings, it may be necessary for the proxy to run -remotely---typically, in institutions that want -to monitor the activity of those connecting to the proxy. -Compromising an onion proxy compromises all future connections -through it. - -\emph{DoS non-observed nodes.} An observer who can only watch some -of the Tor network can increase the value of this traffic -by attacking non-observed nodes to shut them down, reduce -their reliability, or persuade users that they are not trustworthy. -The best defense here is robustness. - -\emph{Run a hostile OR.} In addition to being a local observer, -an isolated hostile node can create circuits through itself, or alter -traffic patterns to affect traffic at other nodes. Nonetheless, a hostile -node must be immediately adjacent to both endpoints to compromise the -anonymity of a circuit. If an adversary can -run multiple ORs, and can persuade the directory servers -that those ORs are trustworthy and independent, then occasionally -some user will choose one of those ORs for the start and another -as the end of a circuit. If an adversary -controls $m>1$ of $N$ nodes, he can correlate at most -$\left(\frac{m}{N}\right)^2$ of the traffic---although an -adversary -could still attract a disproportionately large amount of traffic -by running an OR with a permissive exit policy, or by -degrading the reliability of other routers. - -\emph{Introduce timing into messages.} This is simply a stronger -version of passive timing attacks already discussed earlier. - -\emph{Tagging attacks.} A hostile node could ``tag'' a -cell by altering it. If the -stream were, for example, an unencrypted request to a Web site, -the garbled content coming out at the appropriate time would confirm -the association. However, integrity checks on cells prevent -this attack. - -\emph{Replace contents of unauthenticated protocols.} When -relaying an unauthenticated protocol like HTTP, a hostile exit node -can impersonate the target server. Clients -should prefer protocols with end-to-end authentication. - -\emph{Replay attacks.} Some anonymity protocols are vulnerable -to replay attacks. Tor is not; replaying one side of a handshake -will result in a different negotiated session key, and so the rest -of the recorded session can't be used. - -\emph{Smear attacks.} An attacker could use the Tor network for -socially disapproved acts, to bring the -network into disrepute and get its operators to shut it down. -Exit policies reduce the possibilities for abuse, but -ultimately the network requires volunteers who can tolerate -some political heat. - -\emph{Distribute hostile code.} An attacker could trick users -into running subverted Tor software that did not, in fact, anonymize -their connections---or worse, could trick ORs into running weakened -software that provided users with less anonymity. We address this -problem (but do not solve it completely) by signing all Tor releases -with an official public key, and including an entry in the directory -that lists which versions are currently believed to be secure. To -prevent an attacker from subverting the official release itself -(through threats, bribery, or insider attacks), we provide all -releases in source code form, encourage source audits, and -frequently warn our users never to trust any software (even from -us) that comes without source.\\ - -\noindent{\large\bf Directory attacks}\\ -\emph{Destroy directory servers.} If a few directory -servers disappear, the others still decide on a valid -directory. So long as any directory servers remain in operation, -they will still broadcast their views of the network and generate a -consensus directory. (If more than half are destroyed, this -directory will not, however, have enough signatures for clients to -use it automatically; human intervention will be necessary for -clients to decide whether to trust the resulting directory.) - -\emph{Subvert a directory server.} By taking over a directory server, -an attacker can partially influence the final directory. Since ORs -are included or excluded by majority vote, the corrupt directory can -at worst cast a tie-breaking vote to decide whether to include -marginal ORs. It remains to be seen how often such marginal cases -occur in practice. - -\emph{Subvert a majority of directory servers.} An adversary who controls -more than half the directory servers can include as many compromised -ORs in the final directory as he wishes. We must ensure that directory -server operators are independent and attack-resistant. - -\emph{Encourage directory server dissent.} The directory -agreement protocol assumes that directory server operators agree on -the set of directory servers. An adversary who can persuade some -of the directory server operators to distrust one another could -split the quorum into mutually hostile camps, thus partitioning -users based on which directory they use. Tor does not address -this attack. - -\emph{Trick the directory servers into listing a hostile OR.} -Our threat model explicitly assumes directory server operators will -be able to filter out most hostile ORs. -% If this is not true, an -% attacker can flood the directory with compromised servers. - -\emph{Convince the directories that a malfunctioning OR is -working.} In the current Tor implementation, directory servers -assume that an OR is running correctly if they can start a TLS -connection to it. A hostile OR could easily subvert this test by -accepting TLS connections from ORs but ignoring all cells. Directory -servers must actively test ORs by building circuits and streams as -appropriate. The tradeoffs of a similar approach are discussed -in~\cite{mix-acc}.\\ - -\noindent{\large\bf Attacks against rendezvous points}\\ -\emph{Make many introduction requests.} An attacker could -try to deny Bob service by flooding his introduction points with -requests. Because the introduction points can block requests that -lack authorization tokens, however, Bob can restrict the volume of -requests he receives, or require a certain amount of computation for -every request he receives. - -\emph{Attack an introduction point.} An attacker could -disrupt a location-hidden service by disabling its introduction -points. But because a service's identity is attached to its public -key, the service can simply re-advertise -itself at a different introduction point. Advertisements can also be -done secretly so that only high-priority clients know the address of -Bob's introduction points or so that different clients know of different -introduction points. This forces the attacker to disable all possible -introduction points. - -\emph{Compromise an introduction point.} An attacker who controls -Bob's introduction point can flood Bob with -introduction requests, or prevent valid introduction requests from -reaching him. Bob can notice a flood, and close the circuit. To notice -blocking of valid requests, however, he should periodically test the -introduction point by sending rendezvous requests and making -sure he receives them. - -\emph{Compromise a rendezvous point.} A rendezvous -point is no more sensitive than any other OR on -a circuit, since all data passing through the rendezvous is encrypted -with a session key shared by Alice and Bob. - -\section{Early experiences: Tor in the Wild} -\label{sec:in-the-wild} - -As of mid-May 2004, the Tor network consists of 32 nodes -(24 in the US, 8 in Europe), and more are joining each week as the code -matures. (For comparison, the current remailer network -has about 40 nodes.) % We haven't asked PlanetLab to provide -%Tor nodes, since their AUP wouldn't allow exit nodes (see -%also~\cite{darkside}) and because we aim to build a long-term community of -%node operators and developers.} -Each node has at least a 768Kb/768Kb connection, and -many have 10Mb. The number of users varies (and of course, it's hard to -tell for sure), but we sometimes have several hundred users---administrators at -several companies have begun sending their entire departments' web -traffic through Tor, to block other divisions of -their company from reading their traffic. Tor users have reported using -the network for web browsing, FTP, IRC, AIM, Kazaa, SSH, and -recipient-anonymous email via rendezvous points. One user has anonymously -set up a Wiki as a hidden service, where other users anonymously publish -the addresses of their hidden services. - -Each Tor node currently processes roughly 800,000 relay -cells (a bit under half a gigabyte) per week. On average, about 80\% -of each 498-byte payload is full for cells going back to the client, -whereas about 40\% is full for cells coming from the client. (The difference -arises because most of the network's traffic is web browsing.) Interactive -traffic like SSH brings down the average a lot---once we have more -experience, and assuming we can resolve the anonymity issues, we may -partition traffic into two relay cell sizes: one to handle -bulk traffic and one for interactive traffic. - -Based in part on our restrictive default exit policy (we -reject SMTP requests) and our low profile, we have had no abuse -issues since the network was deployed in October -2003. Our slow growth rate gives us time to add features, -resolve bugs, and get a feel for what users actually want from an -anonymity system. Even though having more users would bolster our -anonymity sets, we are not eager to attract the Kazaa or warez -communities---we feel that we must build a reputation for privacy, human -rights, research, and other socially laudable activities. - -As for performance, profiling shows that Tor spends almost -all its CPU time in AES, which is fast. Current latency is attributable -to two factors. First, network latency is critical: we are -intentionally bouncing traffic around the world several times. Second, -our end-to-end congestion control algorithm focuses on protecting -volunteer servers from accidental DoS rather than on optimizing -performance. % Right now the first $500 \times 500\mbox{B}=250\mbox{KB}$ -%of the stream arrives -%quickly, and after that throughput depends on the rate that \emph{relay -%sendme} acknowledgments arrive. -To quantify these effects, we did some informal tests using a network of 4 -nodes on the same machine (a heavily loaded 1GHz Athlon). We downloaded a 60 -megabyte file from {\tt debian.org} every 30 minutes for 54 hours (108 sample -points). It arrived in about 300 seconds on average, compared to 210s for a -direct download. We ran a similar test on the production Tor network, -fetching the front page of {\tt cnn.com} (55 kilobytes): -% every 20 seconds for 8952 data points -while a direct -download consistently took about 0.3s, the performance through Tor varied. -Some downloads were as fast as 0.4s, with a median at 2.8s, and -90\% finishing within 5.3s. It seems that as the network expands, the chance -of building a slow circuit (one that includes a slow or heavily loaded node -or link) is increasing. On the other hand, as our users remain satisfied -with this increased latency, we can address our performance incrementally as we -proceed with development. %\footnote{For example, we have just begun pushing -%a pipelining patch to the production network that seems to decrease -%latency for medium-to-large files; we will present revised benchmarks -%as they become available.} - -%With the current network's topology and load, users can typically get 1-2 -%megabits sustained transfer rate, which is good enough for now. -%Indeed, the Tor -%design aims foremost to provide a security research platform; performance -%only needs to be sufficient to retain users~\cite{econymics,back01}. -%We can tweak the congestion control -%parameters to provide faster throughput at the cost of -%larger buffers at each node; adding the heuristics mentioned in -%Section~\ref{subsec:rate-limit} to favor low-volume -%streams may also help. More research remains to find the -%right balance. -% We should say _HOW MUCH_ latency there is in these cases. -NM - -%performs badly on lossy networks. may need airhook or something else as -%transport alternative? - -Although Tor's clique topology and full-visibility directories present -scaling problems, we still expect the network to support a few hundred -nodes and maybe 10,000 users before we're forced to become -more distributed. With luck, the experience we gain running the current -topology will help us choose among alternatives when the time comes. - -\section{Open Questions in Low-latency Anonymity} -\label{sec:maintaining-anonymity} - -In addition to the non-goals in -Section~\ref{subsec:non-goals}, many questions must be solved -before we can be confident of Tor's security. - -Many of these open issues are questions of balance. For example, -how often should users rotate to fresh circuits? Frequent rotation -is inefficient, expensive, and may lead to intersection attacks and -predecessor attacks~\cite{wright03}, but infrequent rotation makes the -user's traffic linkable. Besides opening fresh circuits, clients can -also exit from the middle of the circuit, -or truncate and re-extend the circuit. More analysis is -needed to determine the proper tradeoff. - -%% Duplicated by 'Better directory distribution' in section 9. -% -%A similar question surrounds timing of directory operations: how often -%should directories be updated? Clients that update infrequently receive -%an inaccurate picture of the network, but frequent updates can overload -%the directory servers. More generally, we must find more -%decentralized yet practical ways to distribute up-to-date snapshots of -%network status without introducing new attacks. - -How should we choose path lengths? If Alice always uses two hops, -then both ORs can be certain that by colluding they will learn about -Alice and Bob. In our current approach, Alice always chooses at least -three nodes unrelated to herself and her destination. -%% This point is subtle, but not IMO necessary. Anybody who thinks -%% about it will see that it's implied by the above sentence; anybody -%% who doesn't think about it is safe in his ignorance. -% -%Thus normally she chooses -%three nodes, but if she is running an OR and her destination is on an OR, -%she uses five. -Should Alice choose a random path length (e.g.~from a geometric -distribution) to foil an attacker who -uses timing to learn that he is the fifth hop and thus concludes that -both Alice and the responder are running ORs? - -Throughout this paper, we have assumed that end-to-end traffic -confirmation will immediately and automatically defeat a low-latency -anonymity system. Even high-latency anonymity systems can be -vulnerable to end-to-end traffic confirmation, if the traffic volumes -are high enough, and if users' habits are sufficiently -distinct~\cite{statistical-disclosure,limits-open}. Can anything be -done to -make low-latency systems resist these attacks as well as high-latency -systems? Tor already makes some effort to conceal the starts and ends of -streams by wrapping long-range control commands in identical-looking -relay cells. Link padding could frustrate passive observers who count -packets; long-range padding could work against observers who own the -first hop in a circuit. But more research remains to find an efficient -and practical approach. Volunteers prefer not to run constant-bandwidth -padding; but no convincing traffic shaping approach has been -specified. Recent work on long-range padding~\cite{defensive-dropping} -shows promise. One could also try to reduce correlation in packet timing -by batching and re-ordering packets, but it is unclear whether this could -improve anonymity without introducing so much latency as to render the -network unusable. - -A cascade topology may better defend against traffic confirmation by -aggregating users, and making padding and -mixing more affordable. Does the hydra topology (many input nodes, -few output nodes) work better against some adversaries? Are we going -to get a hydra anyway because most nodes will be middleman nodes? - -Common wisdom suggests that Alice should run her own OR for best -anonymity, because traffic coming from her node could plausibly have -come from elsewhere. How much mixing does this approach need? Is it -immediately beneficial because of real-world adversaries that can't -observe Alice's router, but can run routers of their own? - -To scale to many users, and to prevent an attacker from observing the -whole network, it may be necessary -to support far more servers than Tor currently anticipates. -This introduces several issues. First, if approval by a central set -of directory servers is no longer feasible, what mechanism should be used -to prevent adversaries from signing up many colluding servers? Second, -if clients can no longer have a complete picture of the network, -how can they perform discovery while preventing attackers from -manipulating or exploiting gaps in their knowledge? Third, if there -are too many servers for every server to constantly communicate with -every other, which non-clique topology should the network use? -(Restricted-route topologies promise comparable anonymity with better -scalability~\cite{danezis:pet2003}, but whatever topology we choose, we -need some way to keep attackers from manipulating their position within -it~\cite{casc-rep}.) Fourth, if no central authority is tracking -server reliability, how do we stop unreliable servers from making -the network unusable? Fifth, do clients receive so much anonymity -from running their own ORs that we should expect them all to do -so~\cite{econymics}, or do we need another incentive structure to -motivate them? Tarzan and MorphMix present possible solutions. - -% advogato, captcha - -When a Tor node goes down, all its circuits (and thus streams) must break. -Will users abandon the system because of this brittleness? How well -does the method in Section~\ref{subsec:dos} allow streams to survive -node failure? If affected users rebuild circuits immediately, how much -anonymity is lost? It seems the problem is even worse in a peer-to-peer -environment---such systems don't yet provide an incentive for peers to -stay connected when they're done retrieving content, so we would expect -a higher churn rate. - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\section{Future Directions} -\label{sec:conclusion} - -Tor brings together many innovations into a unified deployable system. The -next immediate steps include: - -\emph{Scalability:} Tor's emphasis on deployability and design simplicity -has led us to adopt a clique topology, semi-centralized -directories, and a full-network-visibility model for client -knowledge. These properties will not scale past a few hundred servers. -Section~\ref{sec:maintaining-anonymity} describes some promising -approaches, but more deployment experience will be helpful in learning -the relative importance of these bottlenecks. - -\emph{Bandwidth classes:} This paper assumes that all ORs have -good bandwidth and latency. We should instead adopt the MorphMix model, -where nodes advertise their bandwidth level (DSL, T1, T3), and -Alice avoids bottlenecks by choosing nodes that match or -exceed her bandwidth. In this way DSL users can usefully join the Tor -network. - -\emph{Incentives:} Volunteers who run nodes are rewarded with publicity -and possibly better anonymity~\cite{econymics}. More nodes means increased -scalability, and more users can mean more anonymity. We need to continue -examining the incentive structures for participating in Tor. Further, -we need to explore more approaches to limiting abuse, and understand -why most people don't bother using privacy systems. - -\emph{Cover traffic:} Currently Tor omits cover traffic---its costs -in performance and bandwidth are clear but its security benefits are -not well understood. We must pursue more research on link-level cover -traffic and long-range cover traffic to determine whether some simple padding -method offers provable protection against our chosen adversary. - -%%\emph{Offer two relay cell sizes:} Traffic on the Internet tends to be -%%large for bulk transfers and small for interactive traffic. One cell -%%size cannot be optimal for both types of traffic. -% This should go in the spec and todo, but not the paper yet. -RD - -\emph{Caching at exit nodes:} Perhaps each exit node should run a -caching web proxy~\cite{shsm03}, to improve anonymity for cached pages -(Alice's request never -leaves the Tor network), to improve speed, and to reduce bandwidth cost. -On the other hand, forward security is weakened because caches -constitute a record of retrieved files. We must find the right -balance between usability and security. - -\emph{Better directory distribution:} -Clients currently download a description of -the entire network every 15 minutes. As the state grows larger -and clients more numerous, we may need a solution in which -clients receive incremental updates to directory state. -More generally, we must find more -scalable yet practical ways to distribute up-to-date snapshots of -network status without introducing new attacks. - -\emph{Further specification review:} Our public -byte-level specification~\cite{tor-spec} needs -external review. We hope that as Tor -is deployed, more people will examine its -specification. - -\emph{Multisystem interoperability:} We are currently working with the -designer of MorphMix to unify the specification and implementation of -the common elements of our two systems. So far, this seems -to be relatively straightforward. Interoperability will allow testing -and direct comparison of the two designs for trust and scalability. - -\emph{Wider-scale deployment:} The original goal of Tor was to -gain experience in deploying an anonymizing overlay network, and -learn from having actual users. We are now at a point in design -and development where we can start deploying a wider network. Once -we have many actual users, we will doubtlessly be better -able to evaluate some of our design decisions, including our -robustness/latency tradeoffs, our performance tradeoffs (including -cell size), our abuse-prevention mechanisms, and -our overall usability. - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%% commented out for anonymous submission -\section*{Acknowledgments} - We thank Peter Palfrader, Geoff Goodell, Adam Shostack, Joseph Sokol-Margolis, - John Bashinski, and Zack Brown - for editing and comments; - Matej Pfajfar, Andrei Serjantov, Marc Rennhard for design discussions; - Bram Cohen for congestion control discussions; - Adam Back for suggesting telescoping circuits; and - Cathy Meadows for formal analysis of the \emph{extend} protocol. - This work has been supported by ONR and DARPA. - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\bibliographystyle{latex8} -\bibliography{tor-design} - -\end{document} - -% Style guide: -% U.S. spelling -% avoid contractions (it's, can't, etc.) -% prefer ``for example'' or ``such as'' to e.g. -% prefer ``that is'' to i.e. -% 'mix', 'mixes' (as noun) -% 'mix-net' -% 'mix', 'mixing' (as verb) -% 'middleman' [Not with a hyphen; the hyphen has been optional -% since Middle English.] -% 'nymserver' -% 'Cypherpunk', 'Cypherpunks', 'Cypherpunk remailer' -% 'Onion Routing design', 'onion router' [note capitalization] -% 'SOCKS' -% Try not to use \cite as a noun. -% 'Authorizating' sounds great, but it isn't a word. -% 'First, second, third', not 'Firstly, secondly, thirdly'. -% 'circuit', not 'channel' -% Typography: no space on either side of an em dash---ever. -% Hyphens are for multi-part words; en dashs imply movement or -% opposition (The Alice--Bob connection); and em dashes are -% for punctuation---like that. -% A relay cell; a control cell; a \emph{create} cell; a -% \emph{relay truncated} cell. Never ``a \emph{relay truncated}.'' -% -% 'Substitute ``Damn'' every time you're inclined to write ``very;'' your -% editor will delete it and the writing will be just as it should be.' -% -- Mark Twain diff --git a/doc/design-paper/usenix.sty b/doc/design-paper/usenix.sty deleted file mode 100644 index 575c854e77..0000000000 --- a/doc/design-paper/usenix.sty +++ /dev/null @@ -1,96 +0,0 @@ -% usenix-2e.sty - to be used with latex2e (the new one) for USENIX. -% To use this style file, do this: -% -% \documentclass[twocolumn]{article} -% \usepackage{usenix-2e} -% and put {\rm ....} around the author names. -% -% The following definitions are modifications of standard article.sty -% definitions, arranged to do a better job of matching the USENIX -% guidelines. -% It will automatically select two-column mode and the Times-Roman -% font. - -% -% USENIX papers are two-column. -% Times-Roman font is nice if you can get it (requires NFSS, -% which is in latex2e. - -\if@twocolumn\else\input twocolumn.sty\fi -\usepackage{times} - -% -% USENIX wants margins of: 7/8" side, 1" bottom, and 3/4" top. -% 0.25" gutter between columns. -% Gives active areas of 6.75" x 9.25" -% -\setlength{\textheight}{9.0in} -\setlength{\columnsep}{0.25in} -%%\setlength{\textwidth}{6.75in} -\setlength{\textwidth}{7.00in} -%\setlength{\footheight}{0.0in} -\setlength{\topmargin}{-0.25in} -\setlength{\headheight}{0.0in} -\setlength{\headsep}{0.0in} -\setlength{\evensidemargin}{-0.125in} -\setlength{\oddsidemargin}{-0.125in} - -% -% Usenix wants no page numbers for submitted papers, so that they can -% number them themselves. -% -\pagestyle{empty} - -% -% Usenix titles are in 14-point bold type, with no date, and with no -% change in the empty page headers. The whol author section is 12 point -% italic--- you must use {\rm } around the actual author names to get -% them in roman. -% -\def\maketitle{\par - \begingroup - \renewcommand\thefootnote{\fnsymbol{footnote}}% - \def\@makefnmark{\hbox to\z@{$\m@th^{\@thefnmark}$\hss}}% - \long\def\@makefntext##1{\parindent 1em\noindent - \hbox to1.8em{\hss$\m@th^{\@thefnmark}$}##1}% - \if@twocolumn - \twocolumn[\@maketitle]% - \else \newpage - \global\@topnum\z@ - \@maketitle \fi\@thanks - \endgroup - \setcounter{footnote}{0}% - \let\maketitle\relax - \let\@maketitle\relax - \gdef\@thanks{}\gdef\@author{}\gdef\@title{}\let\thanks\relax} - -\def\@maketitle{\newpage - \vbox to 2.5in{ - \vspace*{\fill} - \vskip 2em - \begin{center}% - {\Large\bf \@title \par}% - \vskip 0.375in minus 0.300in - {\large\it - \lineskip .5em - \begin{tabular}[t]{c}\@author - \end{tabular}\par}% - \end{center}% - \par - \vspace*{\fill} -% \vskip 1.5em - } -} - -% -% The abstract is preceded by a 12-pt bold centered heading -\def\abstract{\begin{center}% -{\large\bf \abstractname\vspace{-.5em}\vspace{\z@}}% -\end{center}} -\def\endabstract{} - -% -% Main section titles are 12-pt bold. Others can be same or smaller. -% -\def\section{\@startsection {section}{1}{\z@}{-3.5ex plus-1ex minus - -.2ex}{2.3ex plus.2ex}{\reset@font\large\bf}} diff --git a/doc/design-paper/usenixsubmit.cls b/doc/design-paper/usenixsubmit.cls deleted file mode 100644 index 743ffcfe4a..0000000000 --- a/doc/design-paper/usenixsubmit.cls +++ /dev/null @@ -1,7 +0,0 @@ -% Created by Anil Somayaji - -\ProvidesClass{usenixsubmit} -\LoadClass[11pt,letterpaper]{article} -\usepackage{times} -\usepackage[margin=1in]{geometry} - diff --git a/doc/roadmaps/2008-12-19-roadmap-full.pdf b/doc/roadmaps/2008-12-19-roadmap-full.pdf Binary files differdeleted file mode 100644 index d87171c2d9..0000000000 --- a/doc/roadmaps/2008-12-19-roadmap-full.pdf +++ /dev/null diff --git a/doc/roadmaps/2009-03-11-performance.pdf b/doc/roadmaps/2009-03-11-performance.pdf Binary files differdeleted file mode 100644 index 3af74ddca5..0000000000 --- a/doc/roadmaps/2009-03-11-performance.pdf +++ /dev/null diff --git a/doc/roadmaps/roadmap-2007.pdf b/doc/roadmaps/roadmap-2007.pdf Binary files differdeleted file mode 100644 index 2422c05888..0000000000 --- a/doc/roadmaps/roadmap-2007.pdf +++ /dev/null diff --git a/doc/roadmaps/roadmap-2007.tex b/doc/roadmaps/roadmap-2007.tex deleted file mode 100644 index cebe4a5905..0000000000 --- a/doc/roadmaps/roadmap-2007.tex +++ /dev/null @@ -1,690 +0,0 @@ -\documentclass{article} - -\usepackage{url} - -\newenvironment{tightlist}{\begin{list}{$\bullet$}{ - \setlength{\itemsep}{0mm} - \setlength{\parsep}{0mm} - % \setlength{\labelsep}{0mm} - % \setlength{\labelwidth}{0mm} - % \setlength{\topsep}{0mm} - }}{\end{list}} -\newcommand{\tmp}[1]{{\bf #1} [......] \\} -\newcommand{\plan}[1]{ {\bf (#1)}} - -\begin{document} - -\title{Tor Development Roadmap: Wishlist for Nov 2006--Dec 2007} -\author{Roger Dingledine \and Nick Mathewson \and Shava Nerad} - -\maketitle -\pagestyle{plain} - -% TO DO: -% add cites -% add time estimates - - -\section{Introduction} -%Hi, Roger! Hi, Shava. This paragraph should get deleted soon. Right now, -%this document goes into about as much detail as I'd like to go into for a -%technical audience, since that's the audience I know best. It doesn't have -%time estimates everywhere. It isn't well prioritized, and it doesn't -%distinguish well between things that need lots of research and things that -%don't. The breakdowns don't all make sense. There are lots of things where -%I don't make it clear how they fit into larger goals, and lots of larger -%goals that don't break down into little things. It isn't all stuff we can do -%for sure, and it isn't even all stuff we can do for sure in 2007. The -%tmp\{\} macro indicates stuff I haven't said enough about. That said, here -%plangoes... - -Tor (the software) and Tor (the overall software/network/support/document -suite) are now experiencing all the crises of success. Over the next year, -we're probably going to grow more in terms of users, developers, and funding -than before. This gives us the opportunity to perform long-neglected -maintenance tasks. - -\section{Code and design infrastructure} - -\subsection{Protocol revision} -To maintain backward compatibility, we've postponed major protocol -changes and redesigns for a long time. Because of this, there are a number -of sensible revisions we've been putting off until we could deploy several of -them at once. To do each of these, we first need to discuss design -alternatives with other cryptographers and outside collaborators to -make sure that our choices are secure. - -First of all, our protocol needs better {\bf versioning support} so that we -can make backward-incompatible changes to our core protocol. There are -difficult anonymity issues here, since many naive designs would make it easy -to tell clients apart (and then track them) based on their supported versions. - -With protocol versioning support would come the ability to {\bf future-proof - our ciphersuites}. For example, not only our OR protocol, but also our -directory protocol, is pretty firmly tied to the SHA-1 hash function, which -though not yet known to be insecure for our purposes, has begun to show -its age. We should -remove assumptions throughout our design based on the assumption that public -keys, secret keys, or digests will remain any particular size indefinitely. - -Our OR {\bf authentication protocol}, though provably -secure\cite{tap:pet2006}, relies more on particular aspects of RSA and our -implementation thereof than we had initially believed. To future-proof -against changes, we should replace it with a less delicate approach. - -\plan{For all the above: 2 person-months to specify, spread over several - months with time for interaction with external participants. One - person-month to implement. Start specifying in early 2007.} - -We might design a {\bf stream migration} feature so that streams tunneled -over Tor could be more resilient to dropped connections and changed IPs. -\plan{Not in 2007.} - -A new protocol could support {\bf multiple cell sizes}. Right now, all data -passes through the Tor network divided into 512-byte cells. This is -efficient for high-bandwidth protocols, but inefficient for protocols -like SSH or AIM that send information in small chunks. Of course, we need to -investigate the extent to which multiple sizes could make it easier for an -adversary to fingerprint a traffic pattern. \plan{Not in 2007.} - -As a part of our design, we should investigate possible {\bf cipher modes} -other than counter mode. For example, a mode with built-in integrity -checking, error propagation, and random access could simplify our protocol -significantly. Sadly, many of these are patented and unavailable for us. -\plan{Not in 2007.} - -\subsection{Scalability} - -\subsubsection{Improved directory efficiency} -Right now, clients download a statement of the {\bf network status} made by -each directory authority. We could reduce network bandwidth significantly by -having the authorities jointly sign a statement reflecting their vote on the -current network status. This would save clients up to 160K per hour, and -make their view of the network more uniform. Of course, we'd need to make -sure the voting process was secure and resilient to failures in the -network.\plan{Must do; specify in 2006. 2 weeks to specify, 3-4 weeks to - implement.} - -We should {\bf shorten router descriptors}, since the current format includes -a great deal of information that's only of interest to the directory -authorities, and not of interest to clients. We can do this by having each -router upload a short-form and a long-form signed descriptor, and having -clients download only the short form. Even a naive version of this would -save about 40\% of the bandwidth currently spent by clients downloading -descriptors.\plan{Must do; specify in 2006. 3-4 weeks.} - -We should {\bf have routers upload their descriptors even less often}, so -that clients do not need to download replacements every 18 hours whether any -information has changed or not. (As of Tor 0.1.2.3-alpha, clients tolerate -routers that don't upload often, but routers still upload at least every 18 -hours to support older clients.) \plan{Must do, but not until 0.1.1.x is -deprecated in mid 2007. 1 week.} - -\subsubsection{Non-clique topology} -Our current network design achieves a certain amount of its anonymity by -making clients act like each other through the simple expedient of making -sure that all clients know all servers, and that any server can talk to any -other server. But as the number of servers increases to serve an -ever-greater number of clients, these assumptions become impractical. - -At worst, if these scalability issues become troubling before a solution is -found, we can design and build a solution to {\bf split the network into -multiple slices} until a better solution comes along. This is not ideal, -since rather than looking like all other users from a point of view of path -selection, users would ``only'' look like 200,000--300,000 other -users.\plan{Not unless needed.} - -We are in the process of designing {\bf improved schemes for network - scalability}. Some approaches focus on limiting what an adversary can know -about what a user knows; others focus on reducing the extent to which an -adversary can exploit this knowledge. These are currently in their infancy, -and will probably not be needed in 2007, but they must be designed in 2007 if -they are to be deployed in 2008.\plan{Design in 2007; unknown difficulty. - Write a paper.} - -\subsubsection{Relay incentives} -To support more users on the network, we need to get more servers. So far, -we've relied on volunteerism to attract server operators, and so far it's -served us well. But in the long run, we need to {\bf design incentives for - users to run servers} and relay traffic for others. Most obviously, we -could try to build the network so that servers offered improved service for -other servers, but we would need to do so without weakening anonymity and -making it obvious which connections originate from users running servers. We -have some preliminary designs~\cite{incentives-txt,tor-challenges}, -but need to perform -some more research to make sure they would be safe and effective.\plan{Write - a draft paper; 2 person-months.} - -\subsection{Portability} -Our {\bf Windows implementation}, though much improved, continues to lag -behind Unix and Mac OS X, especially when running as a server. We hope to -merge promising patches from Mike Chiussi to address this point, and bring -Windows performance on par with other platforms.\plan{Do in 2007; 1.5 months - to integrate not counting Mike's work.} - -We should have {\bf better support for portable devices}, including modes of -operation that require less RAM, and that write to disk less frequently (to -avoid wearing out flash RAM).\plan{Optional; 2 weeks.} - -We should {\bf stop using socketpair on Windows}; instead, we can use -in-memory structures to communicate between cpuworkers and the main thread, -and between connections.\plan{Optional; 1 week.} - -\subsection{Performance: resource usage} -We've been working on {\bf using less RAM}, especially on servers. This has -paid off a lot for directory caches in the 0.1.2, which in some cases are -using 90\% less memory than they used to require. But we can do better, -especially in the area around our buffer management algorithms, by using an -approach more like the BSD and Linux kernels use instead of our current ring -buffer approach. (For OR connections, we can just use queues of cell-sized -chunks produced with a specialized allocator.) This could potentially save -around 25 to 50\% of the memory currently allocated for network buffers, and -make Tor a more attractive proposition for restricted-memory environments -like old computers, mobile devices, and the like.\plan{Do in 2007; 2-3 weeks - plus one week measurement.} - -We should improve our {\bf bandwidth limiting}. The current system has been -crucial in making users willing to run servers: nobody is willing to run a -server if it might use an unbounded amount of bandwidth, especially if they -are charged for their usage. We can make our system better by letting users -configure bandwidth limits independently for their own traffic and traffic -relayed for others; and by adding write limits for users running directory -servers.\plan{Do in 2006; 2-3 weeks.} - -On many hosts, sockets are still in short supply, and will be until we can -migrate our protocol to UDP. We can {\bf use fewer sockets} by making our -self-to-self connections happen internally to the code rather than involving -the operating system's socket implementation.\plan{Optional; 1 week.} - -\subsection{Performance: network usage} -We know too little about how well our current path -selection algorithms actually spread traffic around the network in practice. -We should {\bf research the efficacy of our traffic allocation} and either -assure ourselves that it is close enough to optimal as to need no improvement -(unlikely) or {\bf identify ways to improve network usage}, and get more -users' traffic delivered faster. Performing this research will require -careful thought about anonymity implications. - -We should also {\bf examine the efficacy of our congestion control - algorithm}, and see whether we can improve client performance in the -presence of a congested network through dynamic `sendme' window sizes or -other means. This will have anonymity implications too if we aren't careful. - -\plan{For both of the above: research, design and write - a measurement tool in 2007: 1 month. See if we can interest a graduate - student.} - -We should work on making Tor's cell-based protocol perform better on -networks with low bandwidth -and high packet loss.\plan{Do in 2007 if we're funded to do it; 4-6 weeks.} - -\subsection{Performance scenario: one Tor client, many users} -We should {\bf improve Tor's performance when a single Tor handles many - clients}. Many organizations want to manage a single Tor client on their -firewall for many users, rather than having each user install a separate -Tor client. We haven't optimized for this scenario, and it is likely that -there are some code paths in the current implementation that become -inefficient when a single Tor is servicing hundreds or thousands of client -connections. (Additionally, it is likely that such clients have interesting -anonymity requirements the we should investigate.) We should profile Tor -under appropriate loads, identify bottlenecks, and fix them.\plan{Do in 2007 - if we're funded to do it; 4-8 weeks.} - -\subsection{Tor servers on asymmetric bandwidth} - -Tor should work better on servers that have asymmetric connections like cable -or DSL. Because Tor has separate TCP connections between each -hop, if the incoming bytes are arriving just fine and the outgoing bytes are -all getting dropped on the floor, the TCP push-back mechanisms don't really -transmit this information back to the incoming streams.\plan{Do in 2007 since - related to bandwidth limiting. 3-4 weeks.} - -\subsection{Running Tor as both client and server} - -Many performance tradeoffs and balances that might need more attention. -We first need to track and fix whatever bottlenecks emerge; but we also -need to invent good algorithms for prioritizing the client's traffic -without starving the server's traffic too much.\plan{No idea; try -profiling and improving things in 2007.} - -\subsection{Protocol redesign for UDP} -Tor has relayed only TCP traffic since its first versions, and has used -TLS-over-TCP to do so. This approach has proved reliable and flexible, but -in the long term we will need to allow UDP traffic on the network, and switch -some or all of the network to using a UDP transport. {\bf Supporting UDP - traffic} will make Tor more suitable for protocols that require UDP, such -as many VOIP protocols. {\bf Using a UDP transport} could greatly reduce -resource limitations on servers, and make the network far less interruptible -by lossy connections. Either of these protocol changes would require a great -deal of design work, however. We hope to be able to enlist the aid of a few -talented graduate students to assist with the initial design and -specification, but the actual implementation will require significant testing -of different reliable transport approaches.\plan{Maybe do a design in 2007 if -we find an interested academic. Ian or Ben L might be good partners here.} - -\section{Blocking resistance} - -\subsection{Design for blocking resistance} -We have written a design document explaining our general approach to blocking -resistance. We should workshop it with other experts in the field to get -their ideas about how we can improve Tor's efficacy as an anti-censorship -tool. - -\subsection{Implementation: client-side and bridges-side} - -Our anticensorship design calls for some nodes to act as ``bridges'' -that are outside a national firewall, and others inside the firewall to -act as pure clients. This part of the design is quite clear-cut; we're -probably ready to begin implementing it. To {\bf implement bridges}, we -need to have servers publish themselves as limited-availability relays -to a special bridge authority if they judge they'd make good servers. -We will also need to help provide documentation for port forwarding, -and an easy configuration tool for running as a bridge. - -To {\bf implement clients}, we need to provide a flexible interface to -learn about bridges and to act on knowledge of bridges. We also need -to teach them how to know to use bridges as their first hop, and how to -fetch directory information from both classes of directory authority. - -Clients also need to {\bf use the encrypted directory variant} added in Tor -0.1.2.3-alpha. This will let them retrieve directory information over Tor -once they've got their initial bridges. We may want to get the rest of the -Tor user base to begin using this encrypted directory variant too, to -provide cover. - -Bridges will want to be able to {\bf listen on multiple addresses and ports} -if they can, to give the adversary more ports to block. - -\subsection{Research: anonymity implications from becoming a bridge} - -\subsection{Implementation: bridge authority} - -The design here is also reasonably clear-cut: we need to run some -directory authorities with a slightly modified protocol that doesn't leak -the entire list of bridges. Thus users can learn up-to-date information -for bridges they already know about, but they can't learn about arbitrary -new bridges. - -\subsection{Normalizing the Tor protocol on the wire} -Additionally, we should {\bf resist content-based filters}. Though an -adversary can't see what users are saying, some aspects of our protocol are -easy to fingerprint {\em as} Tor. We should correct this where possible. - -Look like Firefox; or look like nothing? -Future research: investigate timing similarities with other protocols. - -\subsection{Access control for bridges} -Design/impl: password-protecting bridges, in light of above. -And/or more general access control. - -\subsection{Research: scanning-resistance} - -\subsection{Research/Design/Impl: how users discover bridges} -Our design anticipates an arms race between discovery methods and censors. -We need to begin the infrastructure on our side quickly, preferably in a -flexible language like Python, so we can adapt quickly to censorship. - -phase one: personal bridges -phase two: families of personal bridges -phase three: more structured social network -phase four: bag of tricks -Research: phase five... - -Integration with Psiphon, etc? - -\subsection{Document best practices for users} -Document best practices for various activities common among -blocked users (e.g. WordPress use). - -\subsection{Research: how to know if a bridge has been blocked?} - -\subsection{GeoIP maintenance, and "private" user statistics} -How to know if the whole idea is working? - -\subsection{Research: hiding whether the user is reading or publishing?} - -\subsection{Research: how many bridges do you need to know to maintain -reachability?} - -\subsection{Resisting censorship of the Tor website, docs, and mirrors} - -We should take some effort to consider {\bf initial distribution of Tor and - related information} in countries where the Tor website and mirrors are -censored. (Right now, most countries that block access to Tor block only the -main website and leave mirrors and the network itself untouched.) Falling -back on word-of-mouth is always a good last resort, but we should also take -steps to make sure it's relatively easy for users to get ahold of a copy. - -\section{Security} - -\subsection{Security research projects} - -We should investigate approaches with some promise to help Tor resist -end-to-end traffic correlation attacks. It's an open research question -whether (and to what extent) {\bf mixed-latency} networks, {\bf low-volume - long-distance padding}, or other approaches can resist these attacks, which -are currently some of the most effective against careful Tor users. We -should research these questions and perform simulations to identify -opportunities for strengthening our design without dropping performance to -unacceptable levels. %Cite something -\plan{Start doing this in 2007; write a paper. 8-16 weeks.} - -We've got some preliminary results suggesting that {\bf a topology-aware - routing algorithm}~\cite{feamster:wpes2004} could reduce Tor users' -vulnerability against local or ISP-level adversaries, by ensuring that they -are never in a position to watch both ends of a connection. We need to -examine the effects of this approach in more detail and consider side-effects -on anonymity against other kinds of adversaries. If the approach still looks -promising, we should investigate ways for clients to implement it (or an -approximation of it) without having to download routing tables for the whole -Internet. \plan{Not in 2007 unless a graduate student wants to do it.} - -%\tmp{defenses against end-to-end correlation} We don't expect any to work -%right now, but it would be useful to learn that one did. Alternatively, -%proving that one didn't would free up researchers in the field to go work on -%other things. -% -% See above; I think I got this. - -We should research the efficacy of {\bf website fingerprinting} attacks, -wherein an adversary tries to match the distinctive traffic and timing -pattern of the resources constituting a given website to the traffic pattern -of a user's client. These attacks work great in simulations, but in -practice we hear they don't work nearly as well. We should get some actual -numbers to investigate the issue, and figure out what's going on. If we -resist these attacks, or can improve our design to resist them, we should. -% add cites -\plan{Possibly part of end-to-end correlation paper. Otherwise, not in 2007 - unless a graduate student is interested.} - -\subsection{Implementation security} -Right now, each Tor node stores its keys unencrypted. We should {\bf encrypt - more Tor keys} so that Tor authorities can require a startup password. We -should look into adding intermediary medium-term ``signing keys'' between -identity keys and onion keys, so that a password could be required to replace -a signing key, but not to start Tor. This would improve Tor's long-term -security, especially in its directory authority infrastructure.\plan{Design this - as a part of the revised ``v2.1'' directory protocol; implement it in - 2007. 3-4 weeks.} - -We should also {\bf mark RAM that holds key material as non-swappable} so -that there is no risk of recovering key material from a hard disk -compromise. This would require submitting patches upstream to OpenSSL, where -support for marking memory as sensitive is currently in a very preliminary -state.\plan{Nice to do, but not in immediate Tor scope.} - -There are numerous tools for identifying trouble spots in code (such as -Coverity or even VS2005's code analysis tool) and we should convince somebody -to run some of them against the Tor codebase. Ideally, we could figure out a -way to get our code checked periodically rather than just once.\plan{Almost - no time once we talk somebody into it.} - -We should try {\bf protocol fuzzing} to identify errors in our -implementation.\plan{Not in 2007 unless we find a grad student or - undergraduate who wants to try.} - -Our guard nodes help prevent an attacker from being able to become a chosen -client's entry point by having each client choose a few favorite entry points -as ``guards'' and stick to them. We should implement a {\bf directory - guards} feature to keep adversaries from enumerating Tor users by acting as -a directory cache.\plan{Do in 2007; 2 weeks.} - -\subsection{Detect corrupt exits and other servers} -With the success of our network, we've attracted servers in many locations, -operated by many kinds of people. Unfortunately, some of these locations -have compromised or defective networks, and some of these people are -untrustworthy or incompetent. Our current design relies on authority -administrators to identify bad nodes and mark them as nonfunctioning. We -should {\bf automate the process of identifying malfunctioning nodes} as -follows: - -We should create a generic {\bf feedback mechanism for add-on tools} like -Mike Perry's ``Snakes on a Tor'' to report failing nodes to authorities. -\plan{Do in 2006; 1-2 weeks.} - -We should write tools to {\bf detect more kinds of innocent node failure}, -such as nodes whose network providers intercept SSL, nodes whose network -providers censor popular websites, and so on. We should also try to detect -{\bf routers that snoop traffic}; we could do this by launching connections -to throwaway accounts, and seeing which accounts get used.\plan{Do in 2007; - ask Mike Perry if he's interested. 4-6 weeks.} - -We should add {\bf an efficient way for authorities to mark a set of servers - as probably collaborating} though not necessarily otherwise dishonest. -This happens when an administrator starts multiple routers, but doesn't mark -them as belonging to the same family.\plan{Do during v2.1 directory protocol - redesign; 1-2 weeks to implement.} - -To avoid attacks where an adversary claims good performance in order to -attract traffic, we should {\bf have authorities measure node performance} -(including stability and bandwidth) themselves, and not simply believe what -they're told. Measuring stability can be done by tracking MTBF. Measuring -bandwidth can be tricky, since it's hard to distinguish between a server with -low capacity, and a high-capacity server with most of its capacity in -use.\plan{Do ``Stable'' in 2007; 2-3 weeks. ``Fast'' will be harder; do it - if we can interest a grad student.} - -{\bf Operating a directory authority should be easier.} We rely on authority -operators to keep the network running well, but right now their job involves -too much busywork and administrative overhead. A better interface for them -to use could free their time to work on exception cases rather than on -adding named nodes to the network.\plan{Do in 2007; 4-5 weeks.} - -\subsection{Protocol security} - -In addition to other protocol changes discussed above, -% And should we move some of them down here? -NM -we should add {\bf hooks for denial-of-service resistance}; we have some -preliminary designs, but we shouldn't postpone them until we really need them. -If somebody tries a DDoS attack against the Tor network, we won't want to -wait for all the servers and clients to upgrade to a new -version.\plan{Research project; do this in 2007 if funded.} - -\section{Development infrastructure} - -\subsection{Build farm} -We've begun to deploy a cross-platform distributed build farm of hosts -that build and test the Tor source every time it changes in our development -repository. - -We need to {\bf get more participants}, so that we can test a larger variety -of platforms. (Previously, we've only found out when our code had broken on -obscure platforms when somebody got around to building it.) - -We need also to {\bf add our dependencies} to the build farm, so that we can -ensure that libraries we need (especially libevent) do not stop working on -any important platform between one release and the next. - -\plan{This is ongoing as more buildbots arrive.} - -\subsection{Improved testing harness} -Currently, our {\bf unit tests} cover only about 20\% of the code base. This -is uncomfortably low; we should write more and switch to a more flexible -testing framework.\plan{Ongoing basis, time permitting.} - -We should also write flexible {\bf automated single-host deployment tests} so -we can more easily verify that the current codebase works with the -network.\plan{Worthwhile in 2007; would save lots of time. 2-4 weeks.} - -We should build automated {\bf stress testing} frameworks so we can see which -realistic loads cause Tor to perform badly, and regularly profile Tor against -these loads. This would give us {\it in vitro} performance values to -supplement our deployment experience.\plan{Worthwhile in 2007; 2-6 weeks.} - -We should improve our memory profiling code.\plan{...} - - -\subsection{Centralized build system} -We currently rely on a separate packager to maintain the packaging system and -to build Tor on each platform for which we distribute binaries. Separate -package maintainers is sensible, but separate package builders has meant -long turnaround times between source releases and package releases. We -should create the necessary infrastructure for us to produce binaries for all -major packages within an hour or so of source release.\plan{We should - brainstorm this at least in 2007.} - -\subsection{Improved metrics} -We need a way to {\bf measure the network's health, capacity, and degree of - utilization}. Our current means for doing this are ad hoc and not -completely accurate - -We need better ways to {\bf tell which countries are users are coming from, - and how many there are}. A good perspective of the network helps us -allocate resources and identify trouble spots, but our current approaches -will work less and less well as we make it harder for adversaries to -enumerate users. We'll probably want to shift to a smarter, statistical -approach rather than our current ``count and extrapolate'' method. - -\plan{All of this in 2007 if funded; 4-8 weeks} - -% \tmp{We'd like to know how much of the network is getting used.} -% I think this is covered above -NM - -\subsection{Controller library} -We've done lots of design and development on our controller interface, which -allows UI applications and other tools to interact with Tor. We could -encourage the development of more such tools by releasing a {\bf - general-purpose controller library}, ideally with API support for several -popular programming languages.\plan{2006 or 2007; 1-2 weeks.} - -\section{User experience} - -\subsection{Get blocked less, get blocked less broadly} -Right now, some services block connections from the Tor network because -they don't have a better -way to keep vandals from abusing them than blocking IP addresses associated -with vandalism. Our approach so far has been to educate them about better -solutions that currently exist, but we should also {\bf create better -solutions for limiting vandalism by anonymous users} like credential and -blind-signature based implementations, and encourage their use. Other -promising starting points including writing a patch and explanation for -Wikipedia, and helping Freenode to document, maintain, and expand its -current Tor-friendly position.\plan{Do a writeup here in 2007; 1-2 weeks.} - -Those who do block Tor users also block overbroadly, sometimes blacklisting -operators of Tor servers that do not permit exit to their services. We could -obviate innocent reasons for doing so by designing a {\bf narrowly-targeted Tor - RBL service} so that those who wanted to overblock Tor could no longer -plead incompetence.\plan{Possibly in 2007 if we decide it's a good idea; 3 - weeks.} - -\subsection{All-in-one bundle} -We need a well-tested, well-documented bundle of Tor and supporting -applications configured to use it correctly. We have an initial -implementation well under way, but it will need additional work in -identifying requisite Firefox extensions, identifying security threats, -improving user experience, and so on. This will need significantly more work -before it's ready for a general public release. - -\subsection{LiveCD Tor} -We need a nice bootable livecd containing a minimal OS and a few applications -configured to use it correctly. The Anonym.OS project demonstrated that this -is quite feasible, but their project is not currently maintained. - -\subsection{A Tor client in a VM} -\tmp{a.k.a JanusVM} which is quite related to the firewall-level deployment -section below. JanusVM is a Linux kernel running in VMWare. It gets an IP -address from the network, and serves as a DHCP server for its host Windows -machine. It intercepts all outgoing traffic and redirects it into Privoxy, -Tor, etc. This Linux-in-Windows approach may help us with scalability in -the short term, and it may also be a good long-term solution rather than -accepting all security risks in Windows. - -%\subsection{Interface improvements} -%\tmp{Allow controllers to manipulate server status.} -% (Why is this in the User Experience section?) -RD -% I think it's better left to a generic ``make controller iface better'' item. - -\subsection{Firewall-level deployment} -Another useful deployment mode for some users is using {\bf Tor in a firewall - configuration}, and directing all their traffic through Tor. This can be a -little tricky to set up currently, but it's an effective way to make sure no -traffic leaves the host un-anonymized. To achieve this, we need to {\bf - improve and port our new TransPort} feature which allows Tor to be used -without SOCKS support; to {\bf add an anonymizing DNS proxy} feature to Tor; -and to {\bf construct a recommended set of firewall configurations} to redirect -traffic to Tor. - -This is an area where {\bf deployment via a livecd}, or an installation -targeted at specialized home routing hardware, could be useful. - -\subsection{Assess software and configurations for anonymity risks} -Right now, users and packagers are more or less on their own when selecting -Firefox extensions. We should {\bf assemble a recommended list of browser - extensions} through experiment, and include this in the application bundles -we distribute. - -We should also describe {\bf best practices for using Tor with each class of - application}. For example, Ethan Zuckerman has written a detailed -tutorial on how to use Tor, Firefox, GMail, and Wordpress to blog with -improved safety. There are many other cases on the Internet where anonymity -would be helpful, and there are a lot of ways to screw up using Tor. - -The Foxtor and Torbutton extensions serve similar purposes; we should pick a -favorite, and merge in the useful features of the other. - -%\tmp{clean up our own bundled software: -%E.g. Merge the good features of Foxtor into Torbutton} -% -% What else did you have in mind? -NM - -\subsection{Localization} -Right now, most of our user-facing code is internationalized. We need to -internationalize the last few hold-outs (like the Tor expert installer), and get -more translations for the parts that are already internationalized. - -Also, we should look into a {\bf unified translator's solution}. Currently, -since different tools have been internationalized using the -framework-appropriate method, different tools require translators to localize -them via different interfaces. Inasmuch as possible, we should make -translators only need to use a single tool to translate the whole Tor suite. - -\section{Support} - -It would be nice to set up some {\bf user support infrastructure} and -{\bf contributor support infrastructure}, especially focusing on server -operators and on coordinating volunteers. - -This includes intuitive and easy ticket systems for bug reports and -feature suggestions (not just mailing lists with a half dozen people -and no clear roles for who answers what), but it also includes a more -personalized and efficient framework for interaction so we keep the -attention and interest of the contributors, and so we make them feel -helpful and wanted. - -\section{Documentation} - -\subsection{Unified documentation scheme} - -We need to {\bf inventory our documentation.} Our documentation so far has -been mostly produced on an {\it ad hoc} basis, in response to particular -needs and requests. We should figure out what documentation we have, which of -it (if any) should get priority, and whether we can't put it all into a -single format. - -We could {\bf unify the docs} into a single book-like thing. This will also -help us identify what sections of the ``book'' are missing. - -\subsection{Missing technical documentation} - -We should {\bf revise our design paper} to reflect the new decisions and -research we've made since it was published in 2004. This will help other -researchers evaluate and suggest improvements to Tor's current design. - -Other projects sometimes implement the client side of our protocol. We -encourage this, but we should write {\bf a document about how to avoid -excessive resource use}, so we don't need to worry that they will do so -without regard to the effect of their choices on server resources. - -\subsection{Missing user documentation} - -Our documentation falls into two broad categories: some is `discoursive' and -explains in detail why users should take certain actions, and other -documentation is `comprehensive' and describes all of Tor's features. Right -now, we have no document that is both deep, readable, and thorough. We -should correct this by identifying missing spots in our design. - -\bibliographystyle{plain} \bibliography{tor-design} - -\end{document} - diff --git a/doc/roadmaps/roadmap-future.pdf b/doc/roadmaps/roadmap-future.pdf Binary files differdeleted file mode 100644 index 8300ce19c9..0000000000 --- a/doc/roadmaps/roadmap-future.pdf +++ /dev/null diff --git a/doc/roadmaps/roadmap-future.tex b/doc/roadmaps/roadmap-future.tex deleted file mode 100644 index 4ab240f977..0000000000 --- a/doc/roadmaps/roadmap-future.tex +++ /dev/null @@ -1,895 +0,0 @@ -\documentclass{article} - -\usepackage{url} -\usepackage{fullpage} - -\newenvironment{tightlist}{\begin{list}{$\bullet$}{ - \setlength{\itemsep}{0mm} - \setlength{\parsep}{0mm} - % \setlength{\labelsep}{0mm} - % \setlength{\labelwidth}{0mm} - % \setlength{\topsep}{0mm} - }}{\end{list}} -\newcommand{\tmp}[1]{{\bf #1} [......] \\} -\newcommand{\plan}[1]{ {\bf (#1)}} - -\begin{document} - -\title{Tor Development Roadmap: Wishlist for 2008 and beyond} -\author{Roger Dingledine \and Nick Mathewson} -\date{} - -\maketitle -\pagestyle{plain} - -\section{Introduction} - -Tor (the software) and Tor (the overall software/network/support/document -suite) are now experiencing all the crises of success. Over the next -years, we're probably going to grow even more in terms of users, developers, -and funding than before. This document attempts to lay out all the -well-understood next steps that Tor needs to take. We should periodically -reorganize it to reflect current and intended priorities. - -\section{Everybody can be a relay} - -We've made a lot of progress towards letting an ordinary Tor client also -serve as a Tor relay. But these issues remain. - -\subsection{UPNP} - -We should teach Vidalia how to speak UPNP to automatically open and -forward ports on common (e.g. Linksys) routers. There are some promising -Qt-based UPNP libs out there, and in any case there are others (e.g. in -Perl) that we can base it on. - -\subsection{``ORPort auto'' to look for a reachable port} - -Vidalia defaults to port 443 on Windows and port 8080 elsewhere. But if -that port is already in use, or the ISP filters incoming connections -on that port (some cablemodem providers filter 443 inbound), the user -needs to learn how to notice this, and then pick a new one and type it -into Vidalia. - -We should add a new option ``auto'' that cycles through a set of preferred -ports, testing bindability and reachability for each of them, and only -complains to the user once it's given up on the common choices. - -\subsection{Incentives design} - -Roger has been working with researchers at Rice University to simulate -and analyze a new design where the directory authorities assign gold -stars to well-behaving relays, and then all the relays give priority -to traffic from gold-starred relays. The great feature of the design is -that not only does it provide the (explicit) incentive to run a relay, -but it also aims to grow the overall capacity of the network, so even -non-relays will benefit. - -It needs more analysis, and perhaps more design work, before we try -deploying it. - -\subsection{Windows libevent} - -Tor relays still don't work well or reliably on Windows XP or Windows -Vista, because we don't use the Windows-native ``overlapped IO'' -approach. Christian King made a good start at teaching libevent about -overlapped IO during Google Summer of Code 2007, and next steps are -to a) finish that, b) teach Tor to do openssl calls on buffers rather -than directly to the network, and c) teach Tor to use the new libevent -buffers approach. - -\subsection{Network scaling} - -If we attract many more relays, we will need to handle the growing pains -in terms of getting all the directory information to all the users. - -The first piece of this issue is a practical question: since the -directory size scales linearly with more relays, at some point it -will no longer be practical for every client to learn about every -relay. We can try to reduce the amount of information each client needs -to fetch (e.g. based on fetching less information preemptively as in -Section~\ref{subsec:fewer-descriptor-fetches} below), but eventually -clients will need to learn about only a subset of the network, and we -will need to design good ways to divide up the network information. - -The second piece is an anonymity question that arises from this -partitioning: if Tor's security comes from having all the clients -behaving in similar ways, yet we are now giving different clients -different directory information, how can we minimize the new anonymity -attacks we introduce? - -\subsection{Using fewer sockets} - -Since in the current network every Tor relay can reach every other Tor -relay, and we have many times more users than relays, pretty much every -possible link in the network is in use. That is, the current network -is a clique in practice. - -And since each of these connections requires a TCP socket, it's going -to be hard for the network to grow much larger: many systems come with -a default of 1024 file descriptors allowed per process, and raising -that ulimit is hard for end users. Worse, many low-end gateway/firewall -routers can't handle this many connections in their routing table. - -One approach is a restricted-route topology~\cite{danezis:pet2003}: -predefine which relays can reach which other relays, and communicate -these restrictions to the relays and the clients. We need to compute -which links are acceptable in a way that's decentralized yet scalable, -and in a way that achieves a small-worlds property; and we -need an efficient (compact) way to characterize the topology information -so all the users could keep up to date. - -Another approach would be to switch to UDP-based transport between -relays, so we don't need to keep the TCP sockets open at all. Needs more -investigation too. - -\subsection{Auto bandwidth detection and rate limiting, especially for - asymmetric connections.} - - -\subsection{Better algorithms for giving priority to local traffic} - -Proposal 111 made a lot of progress at separating local traffic from -relayed traffic, so Tor users can rate limit the relayed traffic at a -stricter level. But since we want to pass both traffic classes over the -same TCP connection, we can't keep them entirely separate. The current -compromise is that we treat all bytes to/from a given connectin as -local traffic if any of the bytes within the past N seconds were local -bytes. But a) we could use some more intelligent heuristics, and b) -this leaks information to an active attacker about when local traffic -was sent/received. - -\subsection{Tolerate absurdly wrong clocks, even for relays} - -Many of our users are on Windows, running with a clock several days or -even several years off from reality. Some of them are even intentionally -in this state so they can run software that will only run in the past. - -Before Tor 0.1.1.x, Tor clients would still function if their clock was -wildly off --- they simply got a copy of the directory and believed it. -Starting in Tor 0.1.1.x (and even moreso in Tor 0.2.0.x), the clients -only use networkstatus documents that they believe to be recent, so -clients with extremely wrong clocks no longer work. (This bug has been -an unending source of vague and confusing bug reports.) - -The first step is for clients to recognize when all the directory material -they're fetching has roughly the same offset from their current time, -and then automatically correct for it. - -Once that's working well, clients who opt to become bridge relays should -be able to use the same approach to serve accurate directory information -to their bridge users. - -\subsection{Risks from being a relay} - -Three different research -papers~\cite{back01,clog-the-queue,attack-tor-oak05} describe ways to -identify the nodes in a circuit by running traffic through candidate nodes -and looking for dips in the traffic while the circuit is active. These -clogging attacks are not that scary in the Tor context so long as relays -are never clients too. But if we're trying to encourage more clients to -turn on relay functionality too (whether as bridge relays or as normal -relays), then we need to understand this threat better and learn how to -mitigate it. - -One promising research direction is to investigate the RelayBandwidthRate -feature that lets Tor rate limit relayed traffic differently from local -traffic. Since the attacker's ``clogging'' traffic is not in the same -bandwidth class as the traffic initiated by the user, it may be harder -to detect interference. Or it may not be. - -\subsection{First a bridge, then a public relay?} - -Once enough of the items in this section are done, I want all clients -to start out automatically detecting their reachability and opting -to be bridge relays. - -Then if they realize they have enough consistency and bandwidth, they -should automatically upgrade to being non-exit relays. - -What metrics should we use for deciding when we're fast enough -and stable enough to switch? Given that the list of bridge relays needs -to be kept secret, it doesn't make much sense to switch back. - -\section{Tor on low resources / slow links} -\subsection{Reducing directory fetches further} -\label{subsec:fewer-descriptor-fetches} -\subsection{AvoidDiskWrites} -\subsection{Using less ram} -\subsection{Better DoS resistance for tor servers / authorities} -\section{Blocking resistance} -\subsection{Better bridge-address-distribution strategies} -\subsection{Get more volunteers running bridges} -\subsection{Handle multiple bridge authorities} -\subsection{Anonymity for bridge users: second layer of entry guards, etc?} -\subsection{More TLS normalization} -\subsection{Harder to block Tor software distribution} -\subsection{Integration with Psiphon} -\section{Packaging} -\subsection{Switch Privoxy out for Polipo} - - Make Vidalia able to launch more programs itself -\subsection{Continue Torbutton improvements} - especially better docs -\subsection{Vidalia and stability (especially wrt ongoing Windows problems)} - learn how to get useful crash reports (tracebacks) from Windows users -\subsection{Polipo support on Windows} -\subsection{Auto update for Tor, Vidalia, others} -\subsection{Tor browser bundle for USB and standalone use} -\subsection{LiveCD solution} -\subsection{VM-based solution} -\subsection{Tor-on-enclave-firewall configuration} -\subsection{General tutorials on what common applications are Tor-friendly} -\subsection{Controller libraries (torctl) plus documentation} -\subsection{Localization and translation (Vidalia, Torbutton, web pages)} -\section{Interacting better with Internet sites} -\subsection{Make tordnsel (tor exitlist) better and more well-known} -\subsection{Nymble} -\subsection{Work with Wikipedia, Slashdot, Google(, IRC networks)} -\subsection{IPv6 support for exit destinations} -\section{Network health} -\subsection{torflow / soat to detect bad relays} -\subsection{make authorities more automated} -\subsection{torstatus pages and better trend tracking} -\subsection{better metrics for assessing network health / growth} - - geoip usage-by-country reporting and aggregation - (Once that's working, switch to Directory guards) -\section{Performance research} -\subsection{Load balance better} -\subsection{Improve our congestion control algorithms} -\subsection{Two-hops vs Three-hops} -\subsection{Transport IP packets end-to-end} -\section{Outreach and user education} -\subsection{"Who uses Tor" use cases} -\subsection{Law enforcement contacts} - - "Was this IP address a Tor relay recently?" database -\subsection{Commercial/enterprise outreach. Help them use Tor well and - not fear it.} -\subsection{NGO outreach and training.} - - "How to be a safe blogger" -\subsection{More activist coordinators, more people to answer user questions} -\subsection{More people to hold hands of server operators} -\subsection{Teaching the media about Tor} -\subsection{The-dangers-of-plaintext awareness} -\subsection{check.torproject.org and other "privacy checkers"} -\subsection{Stronger legal FAQ for US} -\subsection{Legal FAQs for other countries} -\section{Anonymity research} -\subsection{estimate relay bandwidth more securely} -\subsection{website fingerprinting attacks} -\subsection{safer e2e defenses} -\subsection{Using Tor when you really need anonymity. Can you compose it - with other steps, like more trusted guards or separate proxies?} -\subsection{Topology-aware routing; routing-zones, steven's pet2007 paper.} -\subsection{Exactly what do guard nodes provide?} - -Entry guards seem to defend against all sorts of attacks. Can we work -through all the benefits they provide? Papers like Nikita's CCS 2007 -paper make me think their value is not well-understood by the research -community. - -\section{Organizational growth and stability} -\subsection{A contingency plan if Roger gets hit by a bus} - - Get a new executive director -\subsection{More diversity of funding} - - Don't rely on any one funder as much - - Don't rely on any sector or funder category as much -\subsection{More Tor-funded people who are skilled at peripheral apps like - Vidalia, Torbutton, Polipo, etc} -\subsection{More coordinated media handling and strategy} -\subsection{Clearer and more predictable trademark behavior} -\subsection{More outside funding for internships, etc e.g. GSoC.} -\section{Hidden services} -\subsection{Scaling: how to handle many hidden services} -\subsection{Performance: how to rendezvous with them quickly} -\subsection{Authentication/authorization: how to tolerate DoS / load} -\section{Tor as a general overlay network} -\subsection{Choose paths / exit by country} -\subsection{Easier to run your own private servers and have Tor use them - anywhere in the path} -\subsection{Easier to run an independent Tor network} -\section{Code security/correctness} -\subsection{veracode} -\subsection{code audit} -\subsection{more fuzzing tools} -\subsection{build farm, better testing harness} -\subsection{Long-overdue code refactoring and cleanup} -\section{Protocol security} -\subsection{safer circuit handshake} -\subsection{protocol versioning for future compatibility} -\subsection{cell sizes} -\subsection{adapt to new key sizes, etc} - -\bibliographystyle{plain} \bibliography{tor-design} - -\end{document} - - - - -\section{Code and design infrastructure} - -\subsection{Protocol revision} -To maintain backward compatibility, we've postponed major protocol -changes and redesigns for a long time. Because of this, there are a number -of sensible revisions we've been putting off until we could deploy several of -them at once. To do each of these, we first need to discuss design -alternatives with other cryptographers and outside collaborators to -make sure that our choices are secure. - -First of all, our protocol needs better {\bf versioning support} so that we -can make backward-incompatible changes to our core protocol. There are -difficult anonymity issues here, since many naive designs would make it easy -to tell clients apart (and then track them) based on their supported versions. - -With protocol versioning support would come the ability to {\bf future-proof - our ciphersuites}. For example, not only our OR protocol, but also our -directory protocol, is pretty firmly tied to the SHA-1 hash function, which -though not yet known to be insecure for our purposes, has begun to show -its age. We should -remove assumptions throughout our design based on the assumption that public -keys, secret keys, or digests will remain any particular size indefinitely. - -Our OR {\bf authentication protocol}, though provably -secure\cite{tap:pet2006}, relies more on particular aspects of RSA and our -implementation thereof than we had initially believed. To future-proof -against changes, we should replace it with a less delicate approach. - -\plan{For all the above: 2 person-months to specify, spread over several - months with time for interaction with external participants. One - person-month to implement. Start specifying in early 2007.} - -We might design a {\bf stream migration} feature so that streams tunneled -over Tor could be more resilient to dropped connections and changed IPs. -\plan{Not in 2007.} - -A new protocol could support {\bf multiple cell sizes}. Right now, all data -passes through the Tor network divided into 512-byte cells. This is -efficient for high-bandwidth protocols, but inefficient for protocols -like SSH or AIM that send information in small chunks. Of course, we need to -investigate the extent to which multiple sizes could make it easier for an -adversary to fingerprint a traffic pattern. \plan{Not in 2007.} - -As a part of our design, we should investigate possible {\bf cipher modes} -other than counter mode. For example, a mode with built-in integrity -checking, error propagation, and random access could simplify our protocol -significantly. Sadly, many of these are patented and unavailable for us. -\plan{Not in 2007.} - -\subsection{Scalability} - -\subsubsection{Improved directory efficiency} - -We should {\bf have routers upload their descriptors even less often}, so -that clients do not need to download replacements every 18 hours whether any -information has changed or not. (As of Tor 0.1.2.3-alpha, clients tolerate -routers that don't upload often, but routers still upload at least every 18 -hours to support older clients.) \plan{Must do, but not until 0.1.1.x is -deprecated in mid 2007. 1 week.} - -\subsubsection{Non-clique topology} -Our current network design achieves a certain amount of its anonymity by -making clients act like each other through the simple expedient of making -sure that all clients know all servers, and that any server can talk to any -other server. But as the number of servers increases to serve an -ever-greater number of clients, these assumptions become impractical. - -At worst, if these scalability issues become troubling before a solution is -found, we can design and build a solution to {\bf split the network into -multiple slices} until a better solution comes along. This is not ideal, -since rather than looking like all other users from a point of view of path -selection, users would ``only'' look like 200,000--300,000 other -users.\plan{Not unless needed.} - -We are in the process of designing {\bf improved schemes for network - scalability}. Some approaches focus on limiting what an adversary can know -about what a user knows; others focus on reducing the extent to which an -adversary can exploit this knowledge. These are currently in their infancy, -and will probably not be needed in 2007, but they must be designed in 2007 if -they are to be deployed in 2008.\plan{Design in 2007; unknown difficulty. - Write a paper.} - -\subsubsection{Relay incentives} -To support more users on the network, we need to get more servers. So far, -we've relied on volunteerism to attract server operators, and so far it's -served us well. But in the long run, we need to {\bf design incentives for - users to run servers} and relay traffic for others. Most obviously, we -could try to build the network so that servers offered improved service for -other servers, but we would need to do so without weakening anonymity and -making it obvious which connections originate from users running servers. We -have some preliminary designs~\cite{incentives-txt,tor-challenges}, -but need to perform -some more research to make sure they would be safe and effective.\plan{Write - a draft paper; 2 person-months.} -(XXX we did that) - -\subsection{Portability} -Our {\bf Windows implementation}, though much improved, continues to lag -behind Unix and Mac OS X, especially when running as a server. We hope to -merge promising patches from Christian King to address this point, and bring -Windows performance on par with other platforms.\plan{Do in 2007; 1.5 months - to integrate not counting Mike's work.} - -We should have {\bf better support for portable devices}, including modes of -operation that require less RAM, and that write to disk less frequently (to -avoid wearing out flash RAM).\plan{Optional; 2 weeks.} - -\subsection{Performance: resource usage} -We've been working on {\bf using less RAM}, especially on servers. This has -paid off a lot for directory caches in the 0.1.2, which in some cases are -using 90\% less memory than they used to require. But we can do better, -especially in the area around our buffer management algorithms, by using an -approach more like the BSD and Linux kernels use instead of our current ring -buffer approach. (For OR connections, we can just use queues of cell-sized -chunks produced with a specialized allocator.) This could potentially save -around 25 to 50\% of the memory currently allocated for network buffers, and -make Tor a more attractive proposition for restricted-memory environments -like old computers, mobile devices, and the like.\plan{Do in 2007; 2-3 weeks - plus one week measurement.} (XXX We did this, but we need to do something -more/else.) - -\subsection{Performance: network usage} -We know too little about how well our current path -selection algorithms actually spread traffic around the network in practice. -We should {\bf research the efficacy of our traffic allocation} and either -assure ourselves that it is close enough to optimal as to need no improvement -(unlikely) or {\bf identify ways to improve network usage}, and get more -users' traffic delivered faster. Performing this research will require -careful thought about anonymity implications. - -We should also {\bf examine the efficacy of our congestion control - algorithm}, and see whether we can improve client performance in the -presence of a congested network through dynamic `sendme' window sizes or -other means. This will have anonymity implications too if we aren't careful. - -\plan{For both of the above: research, design and write - a measurement tool in 2007: 1 month. See if we can interest a graduate - student.} - -We should work on making Tor's cell-based protocol perform better on -networks with low bandwidth -and high packet loss.\plan{Do in 2007 if we're funded to do it; 4-6 weeks.} - -\subsection{Performance scenario: one Tor client, many users} -We should {\bf improve Tor's performance when a single Tor handles many - clients}. Many organizations want to manage a single Tor client on their -firewall for many users, rather than having each user install a separate -Tor client. We haven't optimized for this scenario, and it is likely that -there are some code paths in the current implementation that become -inefficient when a single Tor is servicing hundreds or thousands of client -connections. (Additionally, it is likely that such clients have interesting -anonymity requirements the we should investigate.) We should profile Tor -under appropriate loads, identify bottlenecks, and fix them.\plan{Do in 2007 - if we're funded to do it; 4-8 weeks.} - -\subsection{Tor servers on asymmetric bandwidth} - -Tor should work better on servers that have asymmetric connections like cable -or DSL. Because Tor has separate TCP connections between each -hop, if the incoming bytes are arriving just fine and the outgoing bytes are -all getting dropped on the floor, the TCP push-back mechanisms don't really -transmit this information back to the incoming streams.\plan{Do in 2007 since - related to bandwidth limiting. 3-4 weeks.} - -\subsection{Running Tor as both client and server} - -Many performance tradeoffs and balances that might need more attention. -We first need to track and fix whatever bottlenecks emerge; but we also -need to invent good algorithms for prioritizing the client's traffic -without starving the server's traffic too much.\plan{No idea; try -profiling and improving things in 2007.} - -\subsection{Protocol redesign for UDP} -Tor has relayed only TCP traffic since its first versions, and has used -TLS-over-TCP to do so. This approach has proved reliable and flexible, but -in the long term we will need to allow UDP traffic on the network, and switch -some or all of the network to using a UDP transport. {\bf Supporting UDP - traffic} will make Tor more suitable for protocols that require UDP, such -as many VOIP protocols. {\bf Using a UDP transport} could greatly reduce -resource limitations on servers, and make the network far less interruptible -by lossy connections. Either of these protocol changes would require a great -deal of design work, however. We hope to be able to enlist the aid of a few -talented graduate students to assist with the initial design and -specification, but the actual implementation will require significant testing -of different reliable transport approaches.\plan{Maybe do a design in 2007 if -we find an interested academic. Ian or Ben L might be good partners here.} - -\section{Blocking resistance} - -\subsection{Design for blocking resistance} -We have written a design document explaining our general approach to blocking -resistance. We should workshop it with other experts in the field to get -their ideas about how we can improve Tor's efficacy as an anti-censorship -tool. - -\subsection{Implementation: client-side and bridges-side} - -Bridges will want to be able to {\bf listen on multiple addresses and ports} -if they can, to give the adversary more ports to block. - -\subsection{Research: anonymity implications from becoming a bridge} - -see arma's bridge proposal; e.g. should bridge users use a second layer of -entry guards? - -\subsection{Implementation: bridge authority} - -we run some -directory authorities with a slightly modified protocol that doesn't leak -the entire list of bridges. Thus users can learn up-to-date information -for bridges they already know about, but they can't learn about arbitrary -new bridges. - -we need a design for distributing the bridge authority over more than one -server - -\subsection{Normalizing the Tor protocol on the wire} -Additionally, we should {\bf resist content-based filters}. Though an -adversary can't see what users are saying, some aspects of our protocol are -easy to fingerprint {\em as} Tor. We should correct this where possible. - -Look like Firefox; or look like nothing? -Future research: investigate timing similarities with other protocols. - -\subsection{Research: scanning-resistance} - -\subsection{Research/Design/Impl: how users discover bridges} -Our design anticipates an arms race between discovery methods and censors. -We need to begin the infrastructure on our side quickly, preferably in a -flexible language like Python, so we can adapt quickly to censorship. - -phase one: personal bridges -phase two: families of personal bridges -phase three: more structured social network -phase four: bag of tricks -Research: phase five... - -Integration with Psiphon, etc? - -\subsection{Document best practices for users} -Document best practices for various activities common among -blocked users (e.g. WordPress use). - -\subsection{Research: how to know if a bridge has been blocked?} - -\subsection{GeoIP maintenance, and "private" user statistics} -How to know if the whole idea is working? - -\subsection{Research: hiding whether the user is reading or publishing?} - -\subsection{Research: how many bridges do you need to know to maintain -reachability?} - -\subsection{Resisting censorship of the Tor website, docs, and mirrors} - -We should take some effort to consider {\bf initial distribution of Tor and - related information} in countries where the Tor website and mirrors are -censored. (Right now, most countries that block access to Tor block only the -main website and leave mirrors and the network itself untouched.) Falling -back on word-of-mouth is always a good last resort, but we should also take -steps to make sure it's relatively easy for users to get ahold of a copy. - -\section{Security} - -\subsection{Security research projects} - -We should investigate approaches with some promise to help Tor resist -end-to-end traffic correlation attacks. It's an open research question -whether (and to what extent) {\bf mixed-latency} networks, {\bf low-volume - long-distance padding}, or other approaches can resist these attacks, which -are currently some of the most effective against careful Tor users. We -should research these questions and perform simulations to identify -opportunities for strengthening our design without dropping performance to -unacceptable levels. %Cite something -\plan{Start doing this in 2007; write a paper. 8-16 weeks.} - -We've got some preliminary results suggesting that {\bf a topology-aware - routing algorithm}~\cite{feamster:wpes2004} could reduce Tor users' -vulnerability against local or ISP-level adversaries, by ensuring that they -are never in a position to watch both ends of a connection. We need to -examine the effects of this approach in more detail and consider side-effects -on anonymity against other kinds of adversaries. If the approach still looks -promising, we should investigate ways for clients to implement it (or an -approximation of it) without having to download routing tables for the whole -Internet. \plan{Not in 2007 unless a graduate student wants to do it.} - -%\tmp{defenses against end-to-end correlation} We don't expect any to work -%right now, but it would be useful to learn that one did. Alternatively, -%proving that one didn't would free up researchers in the field to go work on -%other things. -% -% See above; I think I got this. - -We should research the efficacy of {\bf website fingerprinting} attacks, -wherein an adversary tries to match the distinctive traffic and timing -pattern of the resources constituting a given website to the traffic pattern -of a user's client. These attacks work great in simulations, but in -practice we hear they don't work nearly as well. We should get some actual -numbers to investigate the issue, and figure out what's going on. If we -resist these attacks, or can improve our design to resist them, we should. -% add cites -\plan{Possibly part of end-to-end correlation paper. Otherwise, not in 2007 - unless a graduate student is interested.} - -\subsection{Implementation security} - -We should also {\bf mark RAM that holds key material as non-swappable} so -that there is no risk of recovering key material from a hard disk -compromise. This would require submitting patches upstream to OpenSSL, where -support for marking memory as sensitive is currently in a very preliminary -state.\plan{Nice to do, but not in immediate Tor scope.} - -There are numerous tools for identifying trouble spots in code (such as -Coverity or even VS2005's code analysis tool) and we should convince somebody -to run some of them against the Tor codebase. Ideally, we could figure out a -way to get our code checked periodically rather than just once.\plan{Almost - no time once we talk somebody into it.} - -We should try {\bf protocol fuzzing} to identify errors in our -implementation.\plan{Not in 2007 unless we find a grad student or - undergraduate who wants to try.} - -Our guard nodes help prevent an attacker from being able to become a chosen -client's entry point by having each client choose a few favorite entry points -as ``guards'' and stick to them. We should implement a {\bf directory - guards} feature to keep adversaries from enumerating Tor users by acting as -a directory cache.\plan{Do in 2007; 2 weeks.} - -\subsection{Detect corrupt exits and other servers} -With the success of our network, we've attracted servers in many locations, -operated by many kinds of people. Unfortunately, some of these locations -have compromised or defective networks, and some of these people are -untrustworthy or incompetent. Our current design relies on authority -administrators to identify bad nodes and mark them as nonfunctioning. We -should {\bf automate the process of identifying malfunctioning nodes} as -follows: - -We should create a generic {\bf feedback mechanism for add-on tools} like -Mike Perry's ``Snakes on a Tor'' to report failing nodes to authorities. -\plan{Do in 2006; 1-2 weeks.} - -We should write tools to {\bf detect more kinds of innocent node failure}, -such as nodes whose network providers intercept SSL, nodes whose network -providers censor popular websites, and so on. We should also try to detect -{\bf routers that snoop traffic}; we could do this by launching connections -to throwaway accounts, and seeing which accounts get used.\plan{Do in 2007; - ask Mike Perry if he's interested. 4-6 weeks.} - -We should add {\bf an efficient way for authorities to mark a set of servers - as probably collaborating} though not necessarily otherwise dishonest. -This happens when an administrator starts multiple routers, but doesn't mark -them as belonging to the same family.\plan{Do during v2.1 directory protocol - redesign; 1-2 weeks to implement.} - -To avoid attacks where an adversary claims good performance in order to -attract traffic, we should {\bf have authorities measure node performance} -(including stability and bandwidth) themselves, and not simply believe what -they're told. We also measure stability by tracking MTBF. Measuring -bandwidth will be tricky, since it's hard to distinguish between a server with -low capacity, and a high-capacity server with most of its capacity in -use. See also Nikita's NDSS 2008 paper.\plan{Do it if we can interest -a grad student.} - -{\bf Operating a directory authority should be easier.} We rely on authority -operators to keep the network running well, but right now their job involves -too much busywork and administrative overhead. A better interface for them -to use could free their time to work on exception cases rather than on -adding named nodes to the network.\plan{Do in 2007; 4-5 weeks.} - -\subsection{Protocol security} - -In addition to other protocol changes discussed above, -% And should we move some of them down here? -NM -we should add {\bf hooks for denial-of-service resistance}; we have some -preliminary designs, but we shouldn't postpone them until we really need them. -If somebody tries a DDoS attack against the Tor network, we won't want to -wait for all the servers and clients to upgrade to a new -version.\plan{Research project; do this in 2007 if funded.} - -\section{Development infrastructure} - -\subsection{Build farm} -We've begun to deploy a cross-platform distributed build farm of hosts -that build and test the Tor source every time it changes in our development -repository. - -We need to {\bf get more participants}, so that we can test a larger variety -of platforms. (Previously, we've only found out when our code had broken on -obscure platforms when somebody got around to building it.) - -We need also to {\bf add our dependencies} to the build farm, so that we can -ensure that libraries we need (especially libevent) do not stop working on -any important platform between one release and the next. - -\plan{This is ongoing as more buildbots arrive.} - -\subsection{Improved testing harness} -Currently, our {\bf unit tests} cover only about 20\% of the code base. This -is uncomfortably low; we should write more and switch to a more flexible -testing framework.\plan{Ongoing basis, time permitting.} - -We should also write flexible {\bf automated single-host deployment tests} so -we can more easily verify that the current codebase works with the -network.\plan{Worthwhile in 2007; would save lots of time. 2-4 weeks.} - -We should build automated {\bf stress testing} frameworks so we can see which -realistic loads cause Tor to perform badly, and regularly profile Tor against -these loads. This would give us {\it in vitro} performance values to -supplement our deployment experience.\plan{Worthwhile in 2007; 2-6 weeks.} - -We should improve our memory profiling code.\plan{...} - - -\subsection{Centralized build system} -We currently rely on a separate packager to maintain the packaging system and -to build Tor on each platform for which we distribute binaries. Separate -package maintainers is sensible, but separate package builders has meant -long turnaround times between source releases and package releases. We -should create the necessary infrastructure for us to produce binaries for all -major packages within an hour or so of source release.\plan{We should - brainstorm this at least in 2007.} - -\subsection{Improved metrics} -We need a way to {\bf measure the network's health, capacity, and degree of - utilization}. Our current means for doing this are ad hoc and not -completely accurate - -We need better ways to {\bf tell which countries are users are coming from, - and how many there are}. A good perspective of the network helps us -allocate resources and identify trouble spots, but our current approaches -will work less and less well as we make it harder for adversaries to -enumerate users. We'll probably want to shift to a smarter, statistical -approach rather than our current ``count and extrapolate'' method. - -\plan{All of this in 2007 if funded; 4-8 weeks} - -% \tmp{We'd like to know how much of the network is getting used.} -% I think this is covered above -NM - -\subsection{Controller library} -We've done lots of design and development on our controller interface, which -allows UI applications and other tools to interact with Tor. We could -encourage the development of more such tools by releasing a {\bf - general-purpose controller library}, ideally with API support for several -popular programming languages.\plan{2006 or 2007; 1-2 weeks.} - -\section{User experience} - -\subsection{Get blocked less, get blocked less broadly} -Right now, some services block connections from the Tor network because -they don't have a better -way to keep vandals from abusing them than blocking IP addresses associated -with vandalism. Our approach so far has been to educate them about better -solutions that currently exist, but we should also {\bf create better -solutions for limiting vandalism by anonymous users} like credential and -blind-signature based implementations, and encourage their use. Other -promising starting points including writing a patch and explanation for -Wikipedia, and helping Freenode to document, maintain, and expand its -current Tor-friendly position.\plan{Do a writeup here in 2007; 1-2 weeks.} - -Those who do block Tor users also block overbroadly, sometimes blacklisting -operators of Tor servers that do not permit exit to their services. We could -obviate innocent reasons for doing so by designing a {\bf narrowly-targeted Tor - RBL service} so that those who wanted to overblock Tor could no longer -plead incompetence.\plan{Possibly in 2007 if we decide it's a good idea; 3 - weeks.} - -\subsection{All-in-one bundle} -We need a well-tested, well-documented bundle of Tor and supporting -applications configured to use it correctly. We have an initial -implementation well under way, but it will need additional work in -identifying requisite Firefox extensions, identifying security threats, -improving user experience, and so on. This will need significantly more work -before it's ready for a general public release. - -\subsection{LiveCD Tor} -We need a nice bootable livecd containing a minimal OS and a few applications -configured to use it correctly. The Anonym.OS project demonstrated that this -is quite feasible, but their project is not currently maintained. - -\subsection{A Tor client in a VM} -\tmp{a.k.a JanusVM} which is quite related to the firewall-level deployment -section below. JanusVM is a Linux kernel running in VMWare. It gets an IP -address from the network, and serves as a DHCP server for its host Windows -machine. It intercepts all outgoing traffic and redirects it into Privoxy, -Tor, etc. This Linux-in-Windows approach may help us with scalability in -the short term, and it may also be a good long-term solution rather than -accepting all security risks in Windows. - -%\subsection{Interface improvements} -%\tmp{Allow controllers to manipulate server status.} -% (Why is this in the User Experience section?) -RD -% I think it's better left to a generic ``make controller iface better'' item. - -\subsection{Firewall-level deployment} -Another useful deployment mode for some users is using {\bf Tor in a firewall - configuration}, and directing all their traffic through Tor. This can be a -little tricky to set up currently, but it's an effective way to make sure no -traffic leaves the host un-anonymized. To achieve this, we need to {\bf - improve and port our new TransPort} feature which allows Tor to be used -without SOCKS support; to {\bf add an anonymizing DNS proxy} feature to Tor; -and to {\bf construct a recommended set of firewall configurations} to redirect -traffic to Tor. - -This is an area where {\bf deployment via a livecd}, or an installation -targeted at specialized home routing hardware, could be useful. - -\subsection{Assess software and configurations for anonymity risks} -Right now, users and packagers are more or less on their own when selecting -Firefox extensions. We should {\bf assemble a recommended list of browser - extensions} through experiment, and include this in the application bundles -we distribute. - -We should also describe {\bf best practices for using Tor with each class of - application}. For example, Ethan Zuckerman has written a detailed -tutorial on how to use Tor, Firefox, GMail, and Wordpress to blog with -improved safety. There are many other cases on the Internet where anonymity -would be helpful, and there are a lot of ways to screw up using Tor. - -The Foxtor and Torbutton extensions serve similar purposes; we should pick a -favorite, and merge in the useful features of the other. - -%\tmp{clean up our own bundled software: -%E.g. Merge the good features of Foxtor into Torbutton} -% -% What else did you have in mind? -NM - -\subsection{Localization} -Right now, most of our user-facing code is internationalized. We need to -internationalize the last few hold-outs (like the Tor expert installer), and get -more translations for the parts that are already internationalized. - -Also, we should look into a {\bf unified translator's solution}. Currently, -since different tools have been internationalized using the -framework-appropriate method, different tools require translators to localize -them via different interfaces. Inasmuch as possible, we should make -translators only need to use a single tool to translate the whole Tor suite. - -\section{Support} - -It would be nice to set up some {\bf user support infrastructure} and -{\bf contributor support infrastructure}, especially focusing on server -operators and on coordinating volunteers. - -This includes intuitive and easy ticket systems for bug reports and -feature suggestions (not just mailing lists with a half dozen people -and no clear roles for who answers what), but it also includes a more -personalized and efficient framework for interaction so we keep the -attention and interest of the contributors, and so we make them feel -helpful and wanted. - -\section{Documentation} - -\subsection{Unified documentation scheme} - -We need to {\bf inventory our documentation.} Our documentation so far has -been mostly produced on an {\it ad hoc} basis, in response to particular -needs and requests. We should figure out what documentation we have, which of -it (if any) should get priority, and whether we can't put it all into a -single format. - -We could {\bf unify the docs} into a single book-like thing. This will also -help us identify what sections of the ``book'' are missing. - -\subsection{Missing technical documentation} - -We should {\bf revise our design paper} to reflect the new decisions and -research we've made since it was published in 2004. This will help other -researchers evaluate and suggest improvements to Tor's current design. - -Other projects sometimes implement the client side of our protocol. We -encourage this, but we should write {\bf a document about how to avoid -excessive resource use}, so we don't need to worry that they will do so -without regard to the effect of their choices on server resources. - -\subsection{Missing user documentation} - -Our documentation falls into two broad categories: some is `discoursive' and -explains in detail why users should take certain actions, and other -documentation is `comprehensive' and describes all of Tor's features. Right -now, we have no document that is both deep, readable, and thorough. We -should correct this by identifying missing spots in our design. - -\bibliographystyle{plain} \bibliography{tor-design} - -\end{document} - diff --git a/doc/rump-fc04.mgp b/doc/rump-fc04.mgp deleted file mode 100644 index efbf6c840c..0000000000 --- a/doc/rump-fc04.mgp +++ /dev/null @@ -1,175 +0,0 @@ -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%deffont "standard" xfont "comic sans ms-medium-r" -%%deffont "thick" xfont "arial black-medium-r" -%%deffont "typewriter" xfont "courier new-bold-r" -%%deffont "type2writer" xfont "arial narrow-bold-r" -%%deffont "standard" tfont "standard.ttf", tmfont "kochi-mincho.ttf" -%%deffont "thick" tfont "thick.ttf", tmfont "goth.ttf" -%%deffont "typewriter" tfont "typewriter.ttf", tmfont "goth.ttf" -%deffont "standard" xfont "helvetica-medium-r", tfont "arial.ttf", tmfont "times.ttf" -%deffont "thick" xfont "helvetica-bold-r", tfont "arialbd.ttf", tmfont "hoso6.ttf" -%deffont "italic" xfont "helvetica-italic-r", tfont "ariali.ttf", tmfont "hoso6.ttf" -%deffont "typewriter" xfont "courier-medium-r", tfont "typewriter.ttf", tmfont "hoso6.ttf" -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% -%% Default settings per each line numbers. -%% -%default 1 leftfill, size 8, fore "black", back "white", font "thick", hgap 1 -%default 2 size 8, vgap 10, prefix " ", ccolor "black" -%default 3 size 6, bar "gray70", vgap 0 -%default 4 size 6, fore "black", vgap 0, prefix " ", font "standard" -%% -%%default 1 area 90 90, leftfill, size 9, fore "yellow", back "blue", font "thick" -%%default 2 size 9, vgap 10, prefix " " -%%default 3 size 7, bar "gray70", vgap 10 -%%default 4 size 7, vgap 30, prefix " ", font "standard" -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% -%% Default settings that are applied to TAB-indented lines. -%% -%tab 1 size 5, vgap 40, prefix " ", icon arc "red" 50 -%tab 2 size 4, vgap 35, prefix " ", icon delta3 "blue" 40 -%tab 3 size 3, vgap 35, prefix " ", icon dia "DarkViolet" 40 -%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page -%nodefault -%center, size 9, font "thick", back "white", fore "black" - - - -Tor: -%size 8 -Next-generation Onion Routing - - -%size 7 -Roger Dingledine -Nick Mathewson -Paul Syverson - -%%The Free Haven Project -%%%font "typewriter", fore "blue" -%%http://freehaven.net/ -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page - -Low-latency anonymity system - -%leftfill -Deployed: 19 nodes, hundreds of users (?) - -Many improvements on earlier design - -Free software -- available source code - -Design is not covered by earlier onion routing -patent - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page - -Perfect forward secrecy - - -Telescoping circuit - - negotiates keys at each hop - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%page -%% -%%Separation from "protocol cleaning" -%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page - -No mixing, padding, traffic shaping (yet) - - -Please show us they're worth the usability tradeoff - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%page -%% -%%Many TCP streams can share one circuit -%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page - -Congestion control - - -Simple rate limiting - -Plus have to keep internal nodes from overflowing - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page - -Directory servers - - -Approve new servers - -Tell clients who's up right now - - plus their keys, location, etc - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page - -Variable exit policies - - -Each server allows different outgoing connections - -E.g. no servers allow outgoing mail currently - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page - -End-to-end integrity checking - - -In previous onion routing, an insider could change -the text being transmitted: - -"dir" => "rm *" - -Even an external adversary could do this! - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page - -Rendezvous points - - -allow hidden services - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page - -Differences / limitations - - -We're TCP-only, not all IP (but we're user-space and very portable) - -Not peer-to-peer - -No protocol normalization - -%%Not unobservable - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%page - -We have working code - - -Plus a design document, -and a byte-level specification - -%size 9 -http://freehaven.net/tor/ - |