summaryrefslogtreecommitdiff
path: root/contrib/directory-archive
diff options
context:
space:
mode:
authorPeter Palfrader <peter@palfrader.org>2008-09-02 09:42:55 +0000
committerPeter Palfrader <peter@palfrader.org>2008-09-02 09:42:55 +0000
commit8dd5a3d8301e15d96f39e5c1b53347ed2b340249 (patch)
treed19d31ae6f758d50e61815705b91f86328453976 /contrib/directory-archive
parent3c5c7994b0b40652f05b5ecb145845612354b89d (diff)
downloadtor-8dd5a3d8301e15d96f39e5c1b53347ed2b340249.tar.gz
tor-8dd5a3d8301e15d96f39e5c1b53347ed2b340249.zip
Add my scripts to dump directories to contrib
svn:r16736
Diffstat (limited to 'contrib/directory-archive')
-rw-r--r--contrib/directory-archive/crontab.sample3
-rwxr-xr-xcontrib/directory-archive/fetch-all77
-rw-r--r--contrib/directory-archive/fetch-all-functions72
-rwxr-xr-xcontrib/directory-archive/fetch-all-v3109
-rwxr-xr-xcontrib/directory-archive/sort-into-month-folder74
-rwxr-xr-xcontrib/directory-archive/tar-them-up125
6 files changed, 460 insertions, 0 deletions
diff --git a/contrib/directory-archive/crontab.sample b/contrib/directory-archive/crontab.sample
new file mode 100644
index 0000000000..e2821aa938
--- /dev/null
+++ b/contrib/directory-archive/crontab.sample
@@ -0,0 +1,3 @@
+10 * * * * cd projects/tor-v2dir && ./fetch-all-v3
+40 * * * * cd projects/tor-v2dir && ./fetch-all
+15 3 6 * * cd projects/tor-v2dir && ./sort-into-month-folder > /dev/null && ./tar-them-up last > /dev/null
diff --git a/contrib/directory-archive/fetch-all b/contrib/directory-archive/fetch-all
new file mode 100755
index 0000000000..745c0609b4
--- /dev/null
+++ b/contrib/directory-archive/fetch-all
@@ -0,0 +1,77 @@
+#!/bin/bash
+
+# Download all current v2 directory status documents, then download
+# the descriptors and extra info documents.
+
+# Copyright (c) 2005, 2006, 2007, 2008 Peter Palfrader
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+TZ=UTC
+export TZ
+
+DIRSERVERS=""
+DIRSERVERS="$DIRSERVERS 86.59.21.38:80" # tor26
+DIRSERVERS="$DIRSERVERS 128.31.0.34:9031" # moria1
+DIRSERVERS="$DIRSERVERS 128.31.0.34:9032" # moria2
+#DIRSERVERS="$DIRSERVERS 140.247.60.64:80" # lefkada
+DIRSERVERS="$DIRSERVERS 194.109.206.212:80" # dizum
+DATEDIR=$(date "+%Y/%m/%d")
+TIME=$(date "+%Y%m%d-%H%M%S")
+
+. fetch-all-functions
+
+statuses=""
+for dirserver in $DIRSERVERS; do
+ authorities=$(wget -q -O - http://$dirserver/tor/status/all | egrep '^fingerprint ' | awk '{print $2}')
+ if [ "$authorities" == "" ]; then
+ echo "Did not get a list of authorities from $dirserver, going to next" 2>&1
+ continue
+ fi
+
+ dir="status/$DATEDIR"
+ [ -d "$dir" ] || mkdir -p "$dir"
+
+ authprefix="$dir/$TIME-"
+ for fp in $authorities; do
+ wget -q -O "$authprefix$fp" http://$dirserver/tor/status/fp/"$fp"
+ bzip2 "$authprefix$fp"
+ statuses="$statuses $authprefix$fp.bz2"
+ done
+ if [ "$statuses" == "" ]; then
+ echo "Did not get any statuses from $dirserver, going to next" 2>&1
+ continue
+ else
+ break
+ fi
+done
+
+if [ "$statuses" = "" ]; then
+ echo "No statuses available" 2>&1
+ exit 1
+fi
+
+digests=$( for i in ` bzcat $statuses | awk '$1 == "r" {printf "%s===\n", $4}' | sort -u `; do
+ echo $i | \
+ base64-decode | \
+ perl -e 'undef $/; $a=<>; print unpack("H\*", $a),"\n";';
+ done )
+for digest in $digests; do
+ fetch_digest "$digest" "server-descriptor"
+done
diff --git a/contrib/directory-archive/fetch-all-functions b/contrib/directory-archive/fetch-all-functions
new file mode 100644
index 0000000000..6d5a0e469d
--- /dev/null
+++ b/contrib/directory-archive/fetch-all-functions
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+# function used by fetch-all* to download server descriptors and
+# extra info documents
+
+# Copyright (c) 2005, 2006, 2007, 2008 Peter Palfrader
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+
+fetch_digest() {
+ local digest
+ local objecttype
+ local urlpart
+ local pathpart
+ local target
+ local targetdir
+ local dirserver
+ local ei
+
+ digest="$1"
+ objecttype="$2"
+ if [ "$objecttype" = "server-descriptor" ] ; then
+ urlpart="server"
+ pathpart="server-descriptor"
+ elif [ "$objecttype" = "extra-info" ] ; then
+ urlpart="extra"
+ pathpart="extra-info"
+ else
+ echo "Called fetch_digest with illegal objecttype '$objecttype'" >&2
+ exit 1
+ fi
+ target=$( echo $digest | sed -e 's#^\(.\)\(.\)#'"$pathpart"'/\1/\2/\1\2#' )
+ targetdir=$( dirname $target )
+ [ -d "$targetdir" ] || mkdir -p "$targetdir"
+ if ! [ -e "$target" ]; then
+ for dirserver in $DIRSERVERS; do
+ wget -q -O "$target" http://$dirserver/tor/$urlpart/d/"$digest" || rm -f "$target"
+ if [ -s "$target" ]; then
+ if egrep '^opt extra-info-digest ' "$target" > /dev/null; then
+ ei=$( egrep '^opt extra-info-digest ' "$target" | awk '{print $3}' | tr 'A-F' 'a-f' )
+ fetch_digest "$ei" "extra-info"
+ elif egrep '^extra-info-digest ' "$target" > /dev/null; then
+ ei=$( egrep '^extra-info-digest ' "$target" | awk '{print $2}' | tr 'A-F' 'a-f' )
+ fetch_digest "$ei" "extra-info"
+ fi
+ break
+ else
+ rm -f "$target"
+ fi
+ done
+ fi
+ #if ! [ -e "$target" ]; then
+ # echo "$objecttype $digest" >> failed
+ #fi
+}
diff --git a/contrib/directory-archive/fetch-all-v3 b/contrib/directory-archive/fetch-all-v3
new file mode 100755
index 0000000000..fe07ad7ef9
--- /dev/null
+++ b/contrib/directory-archive/fetch-all-v3
@@ -0,0 +1,109 @@
+#!/bin/bash
+
+# Download all current v3 directory status votes and the consensus document,
+# then download the descriptors and extra info documents.
+
+# Copyright (c) 2005, 2006, 2007, 2008 Peter Palfrader
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+TZ=UTC
+export TZ
+
+DIRSERVERS=""
+DIRSERVERS="$DIRSERVERS 86.59.21.38:80" # tor26
+DIRSERVERS="$DIRSERVERS 128.31.0.34:9031" # moria1
+DIRSERVERS="$DIRSERVERS 216.224.124.114:9030" # ides
+DIRSERVERS="$DIRSERVERS 88.198.7.215:80" # gabelmoo
+#DIRSERVERS="$DIRSERVERS 140.247.60.64:80" # lefkada
+DIRSERVERS="$DIRSERVERS 194.109.206.212:80" # dizum
+DIRSERVERS="$DIRSERVERS 128.31.0.34:9032" # moria2
+TIME=$(date "+%Y%m%d-%H%M%S")
+
+. fetch-all-functions
+
+consensus=""
+tmpdir="consensus/tmp"
+[ -d "$tmpdir" ] || mkdir -p "$tmpdir"
+for dirserver in $DIRSERVERS; do
+ wget -q -O "$tmpdir/$TIME-consensus" http://$dirserver/tor/status-vote/current/consensus
+ if [ "$?" != 0 ]; then
+ rm -f "$tmpdir/$TIME-consensus"
+ continue
+ fi
+
+ freshconsensus="$tmpdir/$TIME-consensus"
+
+ timestamp=$(awk '$1=="valid-after" {printf "%s-%s", $2, $3}' < "$freshconsensus")
+ datedir=$(awk '$1=="valid-after" {printf "%s", $2}' < "$freshconsensus" | tr '-' '/')
+ dir="consensus/$datedir"
+ [ -d "$dir" ] || mkdir -p "$dir"
+
+
+ consensus="$dir/$timestamp-consensus.bz2"
+ if ! [ -e "$consensus" ]; then
+ # the consensus is new, or at least we don't have it yet
+ bzip2 "$freshconsensus"
+ mv "$freshconsensus.bz2" "$consensus"
+ break
+ fi
+
+ rm -f "$freshconsensus"
+ echo "Consensus from $timestamp (gotten from $dirserver) already exists!" >&2
+ # maybe there is a newer one on a different authority, so try again.
+done
+
+if [ "$consensus" = "" ]; then
+ echo "No consensus available" 2>&1
+ exit 1
+fi
+
+
+votes=$(bzcat $consensus | awk '$1 == "vote-digest" {print $2}')
+for vote in $votes; do
+ for dirserver in $DIRSERVERS; do
+ wget -q -O "$dir/$TIME-vote-$vote" http://$dirserver/tor/status-vote/current/d/$vote
+ if [ "$?" != 0 ]; then
+ rm -f "$dir/$TIME-vote-$vote"
+ continue
+ fi
+ break
+ done
+ if [ -e "$dir/$TIME-vote-$vote" ]; then
+ voteridentity=$(awk '$1=="fingerprint" {print $2}' < "$dir/$TIME-vote-$vote")
+ if [ -e "$dir/$timestamp-vote-$voteridentity-$vote.bz2" ]; then
+ echo "Vote $vote from $voteridentity already exists!" >&2
+ rm -f "$dir/$TIME-vote-$vote"
+ continue;
+ fi
+ mv "$dir/$TIME-vote-$vote" "$dir/$timestamp-vote-$voteridentity-$vote"
+ bzip2 "$dir/$timestamp-vote-$voteridentity-$vote"
+ else
+ echo "Failed to get vote $vote!" >&2
+ fi
+done
+
+digests=$( for i in ` bzcat $consensus | awk '$1 == "r" {printf "%s===\n", $4}' | sort -u `; do
+ echo $i | \
+ base64-decode | \
+ perl -e 'undef $/; $a=<>; print unpack("H\*", $a),"\n";';
+ done )
+for digest in $digests; do
+ fetch_digest "$digest" "server-descriptor"
+done
diff --git a/contrib/directory-archive/sort-into-month-folder b/contrib/directory-archive/sort-into-month-folder
new file mode 100755
index 0000000000..95033c58df
--- /dev/null
+++ b/contrib/directory-archive/sort-into-month-folder
@@ -0,0 +1,74 @@
+#!/usr/bin/perl -w
+
+# Sort dumped consensuses, statuses, descriptors etc into per-month folders.
+
+# Copyright (c) 2006, 2007, 2008 Peter Palfrader
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+use strict;
+use File::Find;
+use File::Basename;
+use File::stat;
+use Time::Local;
+
+
+my $cutofftime;
+
+
+sub wanted() {
+ return unless -f;
+ my $mtime = stat($_)->mtime;
+ return if $mtime >= $cutofftime;
+
+ my (undef,undef,undef,undef,$mon,$year,undef,undef,undef) = gmtime $mtime;
+
+ my $bn = basename $_;
+ my $dn = dirname $_;
+ my @path = split /\//, $dn;
+ $path[0] .= sprintf 's-%4d-%02d', 1900+$year, $mon+1;
+ $dn = join '/', @path;
+
+ if (! -d $dn) {
+ my $p = '.';
+ for my $component (@path) {
+ $p .= '/'.$component;
+ if (! -d $p) {
+ mkdir $p or die ("Cannot mkdir $p: $!\n");
+ };
+ };
+ };
+
+ print "$_ -> $dn/$bn\n";
+ rename $_, $dn.'/'.$bn or die ("Cannot rename $_ to $dn/$bn: $!\n");
+};
+
+my (undef,undef,undef,undef,$mon,$year,undef,undef,undef) = gmtime(time - 5*24*3600);
+$cutofftime = timegm(0,0,0,1,$mon,$year);
+find( {
+ wanted => \&wanted,
+ no_chdir => 1
+ },
+ 'server-descriptor');
+
+find( {
+ wanted => \&wanted,
+ no_chdir => 1
+ },
+ 'extra-info');
diff --git a/contrib/directory-archive/tar-them-up b/contrib/directory-archive/tar-them-up
new file mode 100755
index 0000000000..2e0f6ec03f
--- /dev/null
+++ b/contrib/directory-archive/tar-them-up
@@ -0,0 +1,125 @@
+#!/bin/sh
+
+# Tar up dumped consensuses, statuses, descriptors etc from per-month folders
+# into per-month tarballs.
+
+# Copyright (c) 2006, 2007, 2008 Peter Palfrader
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+set -e
+set -x
+set -u
+
+usage() {
+ echo "Usage: $0 <year> <month>" >&2
+ echo " $0 last (does last month)" >&2
+ exit 1
+}
+
+if [ -z "${1:-}" ]; then
+ usage
+fi
+
+if [ "$1" = "last" ]; then
+ year=`date --date="last month" +'%Y'`
+ month=`date --date="last month" +'%m'`
+elif [ -z "${2:-}" ]; then
+ usage
+else
+ year="$1"
+ month="$2"
+fi
+
+if [ "$year" -lt 2000 ] || [ "$year" -gt 2020 ] ||
+ [ "$month" -lt 1 ] || [ "$month" -gt 12 ] ||
+ [ "`echo -n $month | wc -c`" != 2 ]; then
+ usage
+fi
+
+
+this_year=`date --utc +'%Y'`
+this_month=`date --utc +'%m'`
+
+if [ "`date -d $this_year-$this_month-01 +%s`" -le "`date -d $year-$month-01 +%s`" ]; then
+ echo "Date in the future or current month?" >&2
+ exit 1
+fi
+
+
+
+
+
+for file in \
+ "extra-infos-$year-$month.tar.bz2" \
+ "server-descriptors-$year-$month.tar.bz2" \
+ "consensuses-$year-$month.tar.bz2" \
+ "statuses-$year-$month.tar.bz2" \
+ ; do
+ if [ -e "$file" ]; then
+ echo "$file already exists" >&2
+ exit 1
+ fi
+done
+
+for dir in \
+ "extra-infos-$year-$month" \
+ "server-descriptors-$year-$month" \
+ "consensus/$year/$month" \
+ "status/$year/$month" \
+ ; do
+ if ! [ -d "$dir" ]; then
+ echo "$dir not found" >&2
+ exit 1
+ fi
+done
+
+for dir in \
+ "consensuses-$year-$month" \
+ "statuses-$year-$month" \
+ ; do
+ if [ -e "$dir" ]; then
+ echo "$dir already exists" >&2
+ exit 1
+ fi
+done
+
+for kind in consensus status; do
+ mv "$kind"/$year/$month "$kind"es-$year-$month
+ find "$kind"es-$year-$month -type f -name '*.bz2' -print0 | xargs -0 bunzip2 -v
+ tar cjvf "$kind"es-$year-$month.tar.bz2 "$kind"es-$year-$month
+ rm -rf "$kind"es-$year-$month
+done
+
+for kind in extra-infos server-descriptors; do
+ tar cjvf "$kind"-$year-$month.tar.bz2 "$kind"-$year-$month
+ rm -rf "$kind"-$year-$month
+done
+
+
+
+for kind in consensus status; do
+ t="$kind"es-$year-$month.tar.bz2
+ ! [ -e Archive/"$t" ] && mv "$t" Archive
+done
+
+for kind in extra-infos server-descriptors; do
+ t="$kind"-$year-$month.tar.bz2
+ ! [ -e Archive/"$t" ] && mv "$t" Archive
+done