diff options
Diffstat (limited to 'src/config')
-rw-r--r-- | src/config/README.geoip | 90 | ||||
-rwxr-xr-x | src/config/deanonymind.py | 194 | ||||
-rw-r--r-- | src/config/geoip-manual | 116 | ||||
-rw-r--r-- | src/config/mmdb-convert.py | 466 | ||||
-rw-r--r-- | src/config/torrc.sample.in | 11 |
5 files changed, 473 insertions, 404 deletions
diff --git a/src/config/README.geoip b/src/config/README.geoip deleted file mode 100644 index 8520501405..0000000000 --- a/src/config/README.geoip +++ /dev/null @@ -1,90 +0,0 @@ -README.geoip -- information on the IP-to-country-code file shipped with tor -=========================================================================== - -The IP-to-country-code file in src/config/geoip is based on MaxMind's -GeoLite Country database with the following modifications: - - - Those "A1" ("Anonymous Proxy") entries lying inbetween two entries with - the same country code are automatically changed to that country code. - These changes can be overriden by specifying a different country code - in src/config/geoip-manual. - - - Other "A1" entries are replaced with country codes specified in - src/config/geoip-manual, or are left as is if there is no corresponding - entry in that file. Even non-"A1" entries can be modified by adding a - replacement entry to src/config/geoip-manual. Handle with care. - - -1. Updating the geoip file from a MaxMind database file -------------------------------------------------------- - -Download the most recent MaxMind GeoLite Country database: -http://geolite.maxmind.com/download/geoip/database/GeoIPCountryCSV.zip - -Run `python deanonymind.py` in the local directory. Review the output to -learn about applied automatic/manual changes and watch out for any -warnings. - -Possibly edit geoip-manual to make more/fewer/different manual changes and -re-run `python deanonymind.py`. - -When done, prepend the new geoip file with a comment like this: - - # Last updated based on $DATE Maxmind GeoLite Country - # See README.geoip for details on the conversion. - - -2. Verifying automatic and manual changes using diff ----------------------------------------------------- - -To unzip the original MaxMind file and look at the automatic changes, run: - - unzip GeoIPCountryCSV.zip - diff -U1 GeoIPCountryWhois.csv AutomaticGeoIPCountryWhois.csv - -To look at subsequent manual changes, run: - - diff -U1 AutomaticGeoIPCountryWhois.csv ManualGeoIPCountryWhois.csv - -To manually generate the geoip file and compare it to the automatically -created one, run: - - cut -d, -f3-5 < ManualGeoIPCountryWhois.csv | sed 's/"//g' > mygeoip - diff -U1 geoip mygeoip - - -3. Verifying automatic and manual changes using blockfinder ------------------------------------------------------------ - -Blockfinder is a powerful tool to handle multiple IP-to-country data -sources. Blockfinder has a function to specify a country code and compare -conflicting country code assignments in different data sources. - -We can use blockfinder to compare A1 entries in the original MaxMind file -with the same or overlapping blocks in the file generated above and in the -RIR delegation files: - - git clone https://github.com/ioerror/blockfinder - cd blockfinder/ - python blockfinder -i - python blockfinder -r ../GeoIPCountryWhois.csv - python blockfinder -r ../ManualGeoIPCountryWhois.csv - python blockfinder -p A1 > A1-comparison.txt - -The output marks conflicts between assignments using either '*' in case of -two different opinions or '#' for three or more different opinions about -the country code for a given block. - -The '*' conflicts are most likely harmless, because there will always be -at least two opinions with the original MaxMind file saying A1 and the -other two sources saying something more meaningful. - -However, watch out for '#' conflicts. In these cases, the original -MaxMind file ("A1"), the updated MaxMind file (hopefully the correct -country code), and the RIR delegation files (some other country code) all -disagree. - -There are perfectly valid cases where the updated MaxMind file and the RIR -delegation files don't agree. But each of those cases must be verified -manually. - diff --git a/src/config/deanonymind.py b/src/config/deanonymind.py deleted file mode 100755 index c86dadca99..0000000000 --- a/src/config/deanonymind.py +++ /dev/null @@ -1,194 +0,0 @@ -#!/usr/bin/env python -import optparse -import os -import sys -import zipfile - -""" -Take a MaxMind GeoLite Country database as input and replace A1 entries -with the country code and name of the preceding entry iff the preceding -(subsequent) entry ends (starts) directly before (after) the A1 entry and -both preceding and subsequent entries contain the same country code. - -Then apply manual changes, either replacing A1 entries that could not be -replaced automatically or overriding previously made automatic changes. -""" - -def main(): - options = parse_options() - assignments = read_file(options.in_maxmind) - assignments = apply_automatic_changes(assignments) - write_file(options.out_automatic, assignments) - manual_assignments = read_file(options.in_manual, must_exist=False) - assignments = apply_manual_changes(assignments, manual_assignments) - write_file(options.out_manual, assignments) - write_file(options.out_geoip, assignments, long_format=False) - -def parse_options(): - parser = optparse.OptionParser() - parser.add_option('-i', action='store', dest='in_maxmind', - default='GeoIPCountryCSV.zip', metavar='FILE', - help='use the specified MaxMind GeoLite Country .zip or .csv ' - 'file as input [default: %default]') - parser.add_option('-g', action='store', dest='in_manual', - default='geoip-manual', metavar='FILE', - help='use the specified .csv file for manual changes or to ' - 'override automatic changes [default: %default]') - parser.add_option('-a', action='store', dest='out_automatic', - default="AutomaticGeoIPCountryWhois.csv", metavar='FILE', - help='write full input file plus automatic changes to the ' - 'specified .csv file [default: %default]') - parser.add_option('-m', action='store', dest='out_manual', - default='ManualGeoIPCountryWhois.csv', metavar='FILE', - help='write full input file plus automatic and manual ' - 'changes to the specified .csv file [default: %default]') - parser.add_option('-o', action='store', dest='out_geoip', - default='geoip', metavar='FILE', - help='write full input file plus automatic and manual ' - 'changes to the specified .csv file that can be shipped ' - 'with tor [default: %default]') - (options, args) = parser.parse_args() - return options - -def read_file(path, must_exist=True): - if not os.path.exists(path): - if must_exist: - print 'File %s does not exist. Exiting.' % (path, ) - sys.exit(1) - else: - return - if path.endswith('.zip'): - zip_file = zipfile.ZipFile(path) - csv_content = zip_file.read('GeoIPCountryWhois.csv') - zip_file.close() - else: - csv_file = open(path) - csv_content = csv_file.read() - csv_file.close() - assignments = [] - for line in csv_content.split('\n'): - stripped_line = line.strip() - if len(stripped_line) > 0 and not stripped_line.startswith('#'): - assignments.append(stripped_line) - return assignments - -def apply_automatic_changes(assignments): - print '\nApplying automatic changes...' - result_lines = [] - prev_line = None - a1_lines = [] - for line in assignments: - if '"A1"' in line: - a1_lines.append(line) - else: - if len(a1_lines) > 0: - new_a1_lines = process_a1_lines(prev_line, a1_lines, line) - for new_a1_line in new_a1_lines: - result_lines.append(new_a1_line) - a1_lines = [] - result_lines.append(line) - prev_line = line - if len(a1_lines) > 0: - new_a1_lines = process_a1_lines(prev_line, a1_lines, None) - for new_a1_line in new_a1_lines: - result_lines.append(new_a1_line) - return result_lines - -def process_a1_lines(prev_line, a1_lines, next_line): - if not prev_line or not next_line: - return a1_lines # Can't merge first or last line in file. - if len(a1_lines) > 1: - return a1_lines # Can't merge more than 1 line at once. - a1_line = a1_lines[0].strip() - prev_entry = parse_line(prev_line) - a1_entry = parse_line(a1_line) - next_entry = parse_line(next_line) - touches_prev_entry = int(prev_entry['end_num']) + 1 == \ - int(a1_entry['start_num']) - touches_next_entry = int(a1_entry['end_num']) + 1 == \ - int(next_entry['start_num']) - same_country_code = prev_entry['country_code'] == \ - next_entry['country_code'] - if touches_prev_entry and touches_next_entry and same_country_code: - new_line = format_line_with_other_country(a1_entry, prev_entry) - print '-%s\n+%s' % (a1_line, new_line, ) - return [new_line] - else: - return a1_lines - -def parse_line(line): - if not line: - return None - keys = ['start_str', 'end_str', 'start_num', 'end_num', - 'country_code', 'country_name'] - stripped_line = line.replace('"', '').strip() - parts = stripped_line.split(',') - entry = dict((k, v) for k, v in zip(keys, parts)) - return entry - -def format_line_with_other_country(original_entry, other_entry): - return '"%s","%s","%s","%s","%s","%s"' % (original_entry['start_str'], - original_entry['end_str'], original_entry['start_num'], - original_entry['end_num'], other_entry['country_code'], - other_entry['country_name'], ) - -def apply_manual_changes(assignments, manual_assignments): - if not manual_assignments: - return assignments - print '\nApplying manual changes...' - manual_dict = {} - for line in manual_assignments: - start_num = parse_line(line)['start_num'] - if start_num in manual_dict: - print ('Warning: duplicate start number in manual ' - 'assignments:\n %s\n %s\nDiscarding first entry.' % - (manual_dict[start_num], line, )) - manual_dict[start_num] = line - result = [] - for line in assignments: - entry = parse_line(line) - start_num = entry['start_num'] - if start_num in manual_dict: - manual_line = manual_dict[start_num] - manual_entry = parse_line(manual_line) - if entry['start_str'] == manual_entry['start_str'] and \ - entry['end_str'] == manual_entry['end_str'] and \ - entry['end_num'] == manual_entry['end_num']: - if len(manual_entry['country_code']) != 2: - print '-%s' % (line, ) # only remove, don't replace - else: - new_line = format_line_with_other_country(entry, - manual_entry) - print '-%s\n+%s' % (line, new_line, ) - result.append(new_line) - del manual_dict[start_num] - else: - print ('Warning: only partial match between ' - 'original/automatically replaced assignment and ' - 'manual assignment:\n %s\n %s\nNot applying ' - 'manual change.' % (line, manual_line, )) - result.append(line) - else: - result.append(line) - if len(manual_dict) > 0: - print ('Warning: could not apply all manual assignments: %s' % - ('\n '.join(manual_dict.values())), ) - return result - -def write_file(path, assignments, long_format=True): - if long_format: - output_lines = assignments - else: - output_lines = [] - for long_line in assignments: - entry = parse_line(long_line) - short_line = "%s,%s,%s" % (entry['start_num'], - entry['end_num'], entry['country_code'], ) - output_lines.append(short_line) - out_file = open(path, 'w') - out_file.write('\n'.join(output_lines)) - out_file.close() - -if __name__ == '__main__': - main() - diff --git a/src/config/geoip-manual b/src/config/geoip-manual deleted file mode 100644 index 99c897ff42..0000000000 --- a/src/config/geoip-manual +++ /dev/null @@ -1,116 +0,0 @@ -# This file contains manual overrides of A1 entries (and possibly others) -# in MaxMind's GeoLite Country database. Use deanonymind.py in the same -# directory to process this file when producing a new geoip file. See -# README.geoip in the same directory for details. - -# Remove MaxMind entry 0.116.0.0-0.119.255.255 which MaxMind says is AT, -# but which is part of reserved range 0.0.0.0/8. -KL 2012-06-13 -# Disabled, because MaxMind apparently removed this range from their -# database. -KL 2013-02-08 -#"0.116.0.0","0.119.255.255","7602176","7864319","","" - -# NL, because previous MaxMind entry 31.171.128.0-31.171.133.255 is NL, -# and RIR delegation files say 31.171.128.0-31.171.135.255 is NL. -# -KL 2012-11-27 -"31.171.134.0","31.171.135.255","531334656","531335167","NL","Netherlands" - -# EU, because next MaxMind entry 37.139.64.1-37.139.64.9 is EU, because -# RIR delegation files say 37.139.64.0-37.139.71.255 is EU, and because it -# just makes more sense for the next entry to start at .0 and not .1. -# -KL 2012-11-27 -"37.139.64.0","37.139.64.0","629882880","629882880","EU","Europe" - -# CH, because previous MaxMind entry 46.19.141.0-46.19.142.255 is CH, and -# RIR delegation files say 46.19.136.0-46.19.143.255 is CH. -# -KL 2012-11-27 -"46.19.143.0","46.19.143.255","773033728","773033983","CH","Switzerland" - -# GB, because next MaxMind entry 46.166.129.0-46.166.134.255 is GB, and -# RIR delegation files say 46.166.128.0-46.166.191.255 is GB. -# -KL 2012-11-27 -"46.166.128.0","46.166.128.255","782663680","782663935","GB","United Kingdom" - -# US, though could as well be CA. Previous MaxMind entry -# 64.237.32.52-64.237.34.127 is US, next MaxMind entry -# 64.237.34.144-64.237.34.151 is CA, and RIR delegation files say the -# entire block 64.237.32.0-64.237.63.255 is US. -KL 2012-11-27 -"64.237.34.128","64.237.34.143","1089282688","1089282703","US","United States" - -# US, though could as well be UY. Previous MaxMind entry -# 67.15.170.0-67.15.182.255 is US, next MaxMind entry -# 67.15.183.128-67.15.183.159 is UY, and RIR delegation files say the -# entire block 67.15.0.0-67.15.255.255 is US. -KL 2012-11-27 -"67.15.183.0","67.15.183.127","1125103360","1125103487","US","United States" - -# US, because next MaxMind entry 67.43.145.0-67.43.155.255 is US, and RIR -# delegation files say 67.43.144.0-67.43.159.255 is US. -# -KL 2012-11-27 -"67.43.144.0","67.43.144.255","1126928384","1126928639","US","United States" - -# US, because previous MaxMind entry 70.159.21.51-70.232.244.255 is US, -# because next MaxMind entry 70.232.245.58-70.232.245.59 is A2 ("Satellite -# Provider") which is a country information about as useless as A1, and -# because RIR delegation files say 70.224.0.0-70.239.255.255 is US. -# -KL 2012-11-27 -"70.232.245.0","70.232.245.57","1189672192","1189672249","US","United States" - -# US, because next MaxMind entry 70.232.246.0-70.240.141.255 is US, -# because previous MaxMind entry 70.232.245.58-70.232.245.59 is A2 -# ("Satellite Provider") which is a country information about as useless -# as A1, and because RIR delegation files say 70.224.0.0-70.239.255.255 is -# US. -KL 2012-11-27 -"70.232.245.60","70.232.245.255","1189672252","1189672447","US","United States" - -# GB, despite neither previous (GE) nor next (LV) MaxMind entry being GB, -# but because RIR delegation files agree with both previous and next -# MaxMind entry and say GB for 91.228.0.0-91.228.3.255. -KL 2012-11-27 -"91.228.0.0","91.228.3.255","1541668864","1541669887","GB","United Kingdom" - -# GB, because next MaxMind entry 91.232.125.0-91.232.125.255 is GB, and -# RIR delegation files say 91.232.124.0-91.232.125.255 is GB. -# -KL 2012-11-27 -"91.232.124.0","91.232.124.255","1541962752","1541963007","GB","United Kingdom" - -# GB, despite neither previous (RU) nor next (PL) MaxMind entry being GB, -# but because RIR delegation files agree with both previous and next -# MaxMind entry and say GB for 91.238.214.0-91.238.215.255. -# -KL 2012-11-27 -"91.238.214.0","91.238.215.255","1542379008","1542379519","GB","United Kingdom" - -# US, because next MaxMind entry 173.0.16.0-173.0.65.255 is US, and RIR -# delegation files say 173.0.0.0-173.0.15.255 is US. -KL 2012-11-27 -"173.0.0.0","173.0.15.255","2902458368","2902462463","US","United States" - -# US, because next MaxMind entry 176.67.84.0-176.67.84.79 is US, and RIR -# delegation files say 176.67.80.0-176.67.87.255 is US. -KL 2012-11-27 -"176.67.80.0","176.67.83.255","2957201408","2957202431","US","United States" - -# US, because previous MaxMind entry 176.67.84.192-176.67.85.255 is US, -# and RIR delegation files say 176.67.80.0-176.67.87.255 is US. -# -KL 2012-11-27 -"176.67.86.0","176.67.87.255","2957202944","2957203455","US","United States" - -# EU, despite neither previous (RU) nor next (UA) MaxMind entry being EU, -# but because RIR delegation files agree with both previous and next -# MaxMind entry and say EU for 193.200.150.0-193.200.150.255. -# -KL 2012-11-27 -"193.200.150.0","193.200.150.255","3251148288","3251148543","EU","Europe" - -# US, because previous MaxMind entry 199.96.68.0-199.96.87.127 is US, and -# RIR delegation files say 199.96.80.0-199.96.87.255 is US. -# -KL 2012-11-27 -"199.96.87.128","199.96.87.255","3344979840","3344979967","US","United States" - -# US, because previous MaxMind entry 209.58.176.144-209.59.31.255 is US, -# and RIR delegation files say 209.59.32.0-209.59.63.255 is US. -# -KL 2012-11-27 -"209.59.32.0","209.59.63.255","3510312960","3510321151","US","United States" - -# FR, because previous MaxMind entry 217.15.166.0-217.15.166.255 is FR, -# and RIR delegation files contain a block 217.15.160.0-217.15.175.255 -# which, however, is EU, not FR. But merging with next MaxMind entry -# 217.15.176.0-217.15.191.255 which is KZ and which fully matches what -# the RIR delegation files say seems unlikely to be correct. -# -KL 2012-11-27 -"217.15.167.0","217.15.175.255","3641681664","3641683967","FR","France" - diff --git a/src/config/mmdb-convert.py b/src/config/mmdb-convert.py new file mode 100644 index 0000000000..cbe9acdc5d --- /dev/null +++ b/src/config/mmdb-convert.py @@ -0,0 +1,466 @@ +#!/usr/bin/python3 + +# This software has been dedicated to the public domain under the CC0 +# public domain dedication. +# +# To the extent possible under law, the person who associated CC0 +# with mmdb-convert.py has waived all copyright and related or +# neighboring rights to mmdb-convert.py. +# +# You should have received a copy of the CC0 legalcode along with this +# work in doc/cc0.txt. If not, see +# <http://creativecommons.org/publicdomain/zero/1.0/>. + +# Nick Mathewson is responsible for this kludge, but takes no +# responsibility for it. + +"""This kludge is meant to + parse mmdb files in sufficient detail to dump out the old format + that Tor expects. It's also meant to be pure-python. + + When given a simplicity/speed tradeoff, it opts for simplicity. + + You will not understand the code without undestanding the MaxMind-DB + file format. It is specified at: + https://github.com/maxmind/MaxMind-DB/blob/master/MaxMind-DB-spec.md. + + This isn't so much tested. When it breaks, you get to keep both + pieces. +""" + +import struct +import bisect +import socket +import binascii +import sys +import time + +METADATA_MARKER = b'\xab\xcd\xefMaxMind.com' + +# Here's some python2/python3 junk. Better solutions wanted. +try: + ord(b"1"[0]) +except TypeError: + def byte_to_int(b): + "convert a single element of a bytestring to an integer." + return b +else: + byte_to_int = ord + +# Here's some more python2/python3 junk. Better solutions wanted. +try: + str(b"a", "utf8") +except TypeError: + bytesToStr = str +else: + def bytesToStr(b): + "convert a bytestring in utf8 to a string." + return str(b, 'utf8') + +def to_int(s): + "Parse a big-endian integer from bytestring s." + result = 0 + for c in s: + result *= 256 + result += byte_to_int(c) + return result + +def to_int24(s): + "Parse a pair of big-endian 24-bit integers from bytestring s." + a, b, c = struct.unpack("!HHH", s) + return ((a <<8)+(b>>8)), (((b&0xff)<<16)+c) + +def to_int32(s): + "Parse a pair of big-endian 32-bit integers from bytestring s." + a, b = struct.unpack("!LL", s) + return a, b + +def to_int28(s): + "Parse a pair of big-endian 28-bit integers from bytestring s." + a, b = unpack("!LL", s + b'\x00') + return (((a & 0xf0) << 20) + (a >> 8)), ((a & 0x0f) << 24) + (b >> 8) + +class Tree(object): + "Holds a node in the tree" + def __init__(self, left, right): + self.left = left + self.right = right + +def resolve_tree(tree, data): + """Fill in the left_item and right_item fields for all values in the tree + so that they point to another Tree, or to a Datum, or to None.""" + d = Datum(None, None, None, None) + def resolve_item(item): + "Helper: resolve a single index." + if item < len(tree): + return tree[item] + elif item == len(tree): + return None + else: + d.pos = (item - len(tree) - 16) + p = bisect.bisect_left(data, d) + assert data[p].pos == d.pos + return data[p] + + for t in tree: + t.left_item = resolve_item(t.left) + t.right_item = resolve_item(t.right) + +def parse_search_tree(s, record_size): + """Given a bytestring and a record size in bits, parse the tree. + Return a list of nodes.""" + record_bytes = (record_size*2) // 8 + nodes = [] + p = 0 + try: + to_leftright = { 24: to_int24, + 28: to_int28, + 32: to_int32 }[ record_size ] + except KeyError: + raise NotImplementedError("Unsupported record size in bits: %d" % + record_size) + while p < len(s): + left, right = to_leftright(s[p:p+record_bytes]) + p += record_bytes + + nodes.append( Tree(left, right ) ) + + return nodes + +class Datum(object): + """Holds a single entry from the Data section""" + def __init__(self, pos, kind, ln, data): + self.pos = pos # Position of this record within data section + self.kind = kind # Type of this record. one of TP_* + self.ln = ln # Length field, which might be overloaded. + self.data = data # Raw bytes data. + self.children = None # Used for arrays and maps. + + def __repr__(self): + return "Datum(%r,%r,%r,%r)" % (self.pos, self.kind, self.ln, self.data) + + # Comparison functions used for bsearch + def __lt__(self, other): + return self.pos < other.pos + + def __gt__(self, other): + return self.pos > other.pos + + def __eq__(self, other): + return self.pos == other.pos + + def build_maps(self): + """If this is a map or array, fill in its 'map' field if it's a map, + and the 'map' field of all its children.""" + + if not hasattr(self, 'nChildren'): + return + + if self.kind == TP_ARRAY: + del self.nChildren + for c in self.children: + c.build_maps() + + elif self.kind == TP_MAP: + del self.nChildren + self.map = {} + for i in range(0, len(self.children), 2): + k = self.children[i].deref() + v = self.children[i+1].deref() + v.build_maps() + if k.kind != TP_UTF8: + raise ValueError("Bad dictionary key type %d"% k.kind) + self.map[bytesToStr(k.data)] = v + + def int_val(self): + """If this is an integer type, return its value""" + assert self.kind in (TP_UINT16, TP_UINT32, TP_UINT64, + TP_UINT128, TP_SINT32) + i = to_int(self.data) + if self.kind == TP_SINT32: + if i & 0x80000000: + i = i - 0x100000000 + return i + + def deref(self): + """If this value is a pointer, return its pointed-to-value. Chase + through multiple layers of pointers if need be. If this isn't + a pointer, return it.""" + n = 0 + s = self + while s.kind == TP_PTR: + s = s.ptr + n += 1 + assert n < 100 + return s + +def resolve_pointers(data): + """Fill in the ptr field of every pointer in data.""" + search = Datum(None, None, None, None) + for d in data: + if d.kind == TP_PTR: + search.pos = d.ln + p = bisect.bisect_left(data, search) + assert data[p].pos == d.ln + d.ptr = data[p] + +TP_PTR = 1 +TP_UTF8 = 2 +TP_DBL = 3 +TP_BYTES = 4 +TP_UINT16 = 5 +TP_UINT32 = 6 +TP_MAP = 7 +TP_SINT32 = 8 +TP_UINT64 = 9 +TP_UINT128 = 10 +TP_ARRAY = 11 +TP_DCACHE = 12 +TP_END = 13 +TP_BOOL = 14 +TP_FLOAT = 15 + +def get_type_and_len(s): + """Data parsing helper: decode the type value and much-overloaded 'length' + field for the value starting at s. Return a 3-tuple of type, length, + and number of bytes used to encode type-plus-length.""" + c = byte_to_int(s[0]) + tp = c >> 5 + skip = 1 + if tp == 0: + tp = byte_to_int(s[1])+7 + skip = 2 + ln = c & 31 + + # I'm sure I don't know what they were thinking here... + if tp == TP_PTR: + len_len = (ln >> 3) + 1 + if len_len < 4: + ln &= 7 + ln <<= len_len * 8 + else: + ln = 0 + ln += to_int(s[skip:skip+len_len]) + ln += (0, 0, 2048, 526336, 0)[len_len] + skip += len_len + elif ln >= 29: + len_len = ln - 28 + ln = to_int(s[skip:skip+len_len]) + ln += (0, 29, 285, 65821)[len_len] + skip += len_len + + return tp, ln, skip + +# Set of types for which 'length' doesn't mean length. +IGNORE_LEN_TYPES = set([ + TP_MAP, # Length is number of key-value pairs that follow. + TP_ARRAY, # Length is number of members that follow. + TP_PTR, # Length is index to pointed-to data element. + TP_BOOL, # Length is 0 or 1. + TP_DCACHE, # Length isnumber of members that follow +]) + +def parse_data_section(s): + """Given a data section encoded in a bytestring, return a list of + Datum items.""" + + # Stack of possibly nested containers. We use the 'nChildren' member of + # the last one to tell how many moreitems nest directly inside. + stack = [] + + # List of all items, including nested ones. + data = [] + + # Byte index within the data section. + pos = 0 + + while s: + tp, ln, skip = get_type_and_len(s) + if tp in IGNORE_LEN_TYPES: + real_len = 0 + else: + real_len = ln + + d = Datum(pos, tp, ln, s[skip:skip+real_len]) + data.append(d) + pos += skip+real_len + s = s[skip+real_len:] + + if stack: + stack[-1].children.append(d) + stack[-1].nChildren -= 1 + if stack[-1].nChildren == 0: + del stack[-1] + + if d.kind == TP_ARRAY: + d.nChildren = d.ln + d.children = [] + stack.append(d) + elif d.kind == TP_MAP: + d.nChildren = d.ln * 2 + d.children = [] + stack.append(d) + + return data + +def parse_mm_file(s): + """Parse a MaxMind-DB file.""" + try: + metadata_ptr = s.rindex(METADATA_MARKER) + except ValueError: + raise ValueError("No metadata!") + + metadata = parse_data_section(s[metadata_ptr+len(METADATA_MARKER):]) + + if metadata[0].kind != TP_MAP: + raise ValueError("Bad map") + + metadata[0].build_maps() + mm = metadata[0].map + + tree_size = (((mm['record_size'].int_val() * 2) // 8 ) * + mm['node_count'].int_val()) + + if s[tree_size:tree_size+16] != b'\x00'*16: + raise ValueError("Missing section separator!") + + tree = parse_search_tree(s[:tree_size], mm['record_size'].int_val()) + + data = parse_data_section(s[tree_size+16:metadata_ptr]) + + resolve_pointers(data) + resolve_tree(tree, data) + + for d in data: + d.build_maps() + + return metadata, tree, data + +def format_datum(datum): + """Given a Datum at a leaf of the tree, return the string that we should + write as its value. + + We first try country->iso_code which is the two-character ISO 3166-1 + country code of the country where MaxMind believes the end user is + located. If there's no such key, we try registered_country->iso_code + which is the country in which the ISP has registered the IP address. + Without falling back to registered_country, we'd leave out all ranges + that MaxMind thinks belong to anonymous proxies, because those ranges + don't contain country but only registered_country. In short: let's + fill all A1 entries with what ARIN et. al think. + """ + try: + return bytesToStr(datum.map['country'].map['iso_code'].data) + except KeyError: + pass + try: + return bytesToStr(datum.map['registered_country'].map['iso_code'].data) + except KeyError: + pass + return None + +IPV4_PREFIX = "0"*96 + +def dump_item_ipv4(entries, prefix, val): + """Dump the information for an IPv4 address to entries, where 'prefix' + is a string holding a binary prefix for the address, and 'val' is the + value to dump. If the prefix is not an IPv4 address (it does not start + with 96 bits of 0), then print nothing. + """ + if not prefix.startswith(IPV4_PREFIX): + return + prefix = prefix[96:] + v = int(prefix, 2) + shift = 32 - len(prefix) + lo = v << shift + hi = ((v+1) << shift) - 1 + entries.append((lo, hi, val)) + +def fmt_item_ipv4(entry): + """Format an IPv4 range with lo and hi addresses in decimal form.""" + return "%d,%d,%s\n"%(entry[0], entry[1], entry[2]) + +def fmt_ipv6_addr(v): + """Given a 128-bit integer representing an ipv6 address, return a + string for that ipv6 address.""" + return socket.inet_ntop(socket.AF_INET6, binascii.unhexlify("%032x"%v)) + +def fmt_item_ipv6(entry): + """Format an IPv6 range with lo and hi addresses in hex form.""" + return "%s,%s,%s\n"%(fmt_ipv6_addr(entry[0]), + fmt_ipv6_addr(entry[1]), + entry[2]) + +IPV4_MAPPED_IPV6_PREFIX = "0"*80 + "1"*16 +IPV6_6TO4_PREFIX = "0010000000000010" +TEREDO_IPV6_PREFIX = "0010000000000001" + "0"*16 + +def dump_item_ipv6(entries, prefix, val): + """Dump the information for an IPv6 address prefix to entries, where + 'prefix' is a string holding a binary prefix for the address, + and 'val' is the value to dump. If the prefix is an IPv4 address + (starts with 96 bits of 0), is an IPv4-mapped IPv6 address + (::ffff:0:0/96), or is in the 6to4 mapping subnet (2002::/16), then + print nothing. + """ + if prefix.startswith(IPV4_PREFIX) or \ + prefix.startswith(IPV4_MAPPED_IPV6_PREFIX) or \ + prefix.startswith(IPV6_6TO4_PREFIX) or \ + prefix.startswith(TEREDO_IPV6_PREFIX): + return + v = int(prefix, 2) + shift = 128 - len(prefix) + lo = v << shift + hi = ((v+1) << shift) - 1 + entries.append((lo, hi, val)) + +def dump_tree(entries, node, dump_item, prefix=""): + """Walk the tree rooted at 'node', and call dump_item on the + format_datum output of every leaf of the tree.""" + + if isinstance(node, Tree): + dump_tree(entries, node.left_item, dump_item, prefix+"0") + dump_tree(entries, node.right_item, dump_item, prefix+"1") + elif isinstance(node, Datum): + assert node.kind == TP_MAP + code = format_datum(node) + if code: + dump_item(entries, prefix, code) + else: + assert node == None + +GEOIP_FILE_HEADER = """\ +# Last updated based on %s Maxmind GeoLite2 Country +# wget https://geolite.maxmind.com/download/geoip/database/GeoLite2-Country.mmdb.gz +# gunzip GeoLite2-Country.mmdb.gz +# python mmdb-convert.py GeoLite2-Country.mmdb +""" + +def write_geoip_file(filename, metadata, the_tree, dump_item, fmt_item): + """Write the entries in the_tree to filename.""" + entries = [] + dump_tree(entries, the_tree[0], dump_item) + fobj = open(filename, 'w') + + build_epoch = metadata[0].map['build_epoch'].int_val() + fobj.write(GEOIP_FILE_HEADER % + time.strftime('%B %-d %Y', time.gmtime(build_epoch))) + + unwritten = None + for entry in entries: + if not unwritten: + unwritten = entry + elif unwritten[1] + 1 == entry[0] and unwritten[2] == entry[2]: + unwritten = (unwritten[0], entry[1], unwritten[2]) + else: + fobj.write(fmt_item(unwritten)) + unwritten = entry + if unwritten: + fobj.write(fmt_item(unwritten)) + fobj.close() + +content = open(sys.argv[1], 'rb').read() +metadata, the_tree, _ = parse_mm_file(content) + +write_geoip_file('geoip', metadata, the_tree, dump_item_ipv4, fmt_item_ipv4) +write_geoip_file('geoip6', metadata, the_tree, dump_item_ipv6, fmt_item_ipv6) diff --git a/src/config/torrc.sample.in b/src/config/torrc.sample.in index c667efc5c9..d842fbcaf5 100644 --- a/src/config/torrc.sample.in +++ b/src/config/torrc.sample.in @@ -1,5 +1,5 @@ ## Configuration file for a typical Tor user -## Last updated 12 September 2012 for Tor 0.2.4.3-alpha. +## Last updated 9 October 2013 for Tor 0.2.5.2-alpha. ## (may or may not work for much older or much newer versions of Tor.) ## ## Lines that begin with "## " try to explain what's going on. Lines @@ -120,9 +120,12 @@ ## is per month) #AccountingStart month 3 15:00 -## Contact info to be published in the directory, so we can contact you -## if your relay is misconfigured or something else goes wrong. Google -## indexes this, so spammers might also collect it. +## Administrative contact information for this relay or bridge. This line +## can be used to contact you if your relay or bridge is misconfigured or +## something else goes wrong. Note that we archive and publish all +## descriptors containing these lines and that Google indexes them, so +## spammers might also collect them. You may want to obscure the fact that +## it's an email address and/or generate a new address for this purpose. #ContactInfo Random Person <nobody AT example dot com> ## You might also include your PGP or GPG fingerprint if you have one: #ContactInfo 0xFFFFFFFF Random Person <nobody AT example dot com> |