diff options
-rw-r--r-- | tor-metrics/config.py | 9 | ||||
-rw-r--r-- | tor-metrics/countries.py | 8 | ||||
-rwxr-xr-x | tor-metrics/generate.py | 85 | ||||
-rw-r--r-- | tor-metrics/relays.py | 79 | ||||
-rw-r--r-- | tor-metrics/templates/relay-info.html | 2 |
5 files changed, 138 insertions, 45 deletions
diff --git a/tor-metrics/config.py b/tor-metrics/config.py index 4fc8d3d..67a1934 100644 --- a/tor-metrics/config.py +++ b/tor-metrics/config.py @@ -1,3 +1,12 @@ +''' +File: config.py + +Configuration dict used in the generation of HTML documents + +:output_root: path to output directory (created if not exists) +:onionoo_url: url to onionoo details document +''' + CONFIG = { 'output_root': 'www', 'onionoo_url': 'https://onionoo.torproject.org/details' diff --git a/tor-metrics/countries.py b/tor-metrics/countries.py index 373dacd..ba63358 100644 --- a/tor-metrics/countries.py +++ b/tor-metrics/countries.py @@ -1,6 +1,10 @@ -# list of countries that require "The" prefix +''' +File: countries.py -the_prefixed = [ +List of countries which require prefixing with "The" +''' + +THE_PREFIXED = [ 'Dominican Republic', 'Ivory Coast', 'Marshall Islands', diff --git a/tor-metrics/generate.py b/tor-metrics/generate.py index 4bfce2b..79c8102 100755 --- a/tor-metrics/generate.py +++ b/tor-metrics/generate.py @@ -1,16 +1,34 @@ #!/usr/bin/env python3 -from jinja2 import Environment, FileSystemLoader +''' +File: generate.py (executable) + +Generate complete set of relay HTML pages and copy static files to +config.CONFIG['output_root'] defined in config.py + +Default output directory: ./www +''' + +import os from shutil import rmtree, copytree +import config +import countries +from jinja2 import Environment, FileSystemLoader from relays import Relays -import os, json -import config, countries -abs_path = os.path.dirname(os.path.abspath(__file__)) -env = Environment(loader=FileSystemLoader(os.path.join(abs_path, 'templates')), - trim_blocks=True, lstrip_blocks=True) +ABS_PATH = os.path.dirname(os.path.abspath(__file__)) +ENV = Environment(loader=FileSystemLoader(os.path.join(ABS_PATH, 'templates')), + trim_blocks=True, lstrip_blocks=True) def generate_html(relays): + ''' + Render and write complete set of relay pages to disk by calling each group's + respective function + + Files are written to 'www' by default (defined in config.py) + + :relays: relays class object containing relay set (list of dict) + ''' if relays.json is not None: pages_by_key(relays, 'as') pages_by_key(relays, 'country') @@ -19,20 +37,32 @@ def generate_html(relays): unsorted(relays, 'index.html', is_index=True) unsorted(relays.json['relays'], 'all.html', is_index=False) relay_info(relays) - static_src_path = os.path.join(abs_path, 'static') + static_src_path = os.path.join(ABS_PATH, 'static') static_dest_path = os.path.join(config.CONFIG['output_root'], 'static') if not os.path.exists(static_dest_path): copytree(static_src_path, static_dest_path) def unsorted(relays, filename, is_index): - template = env.get_template(filename) + ''' + Render and write unsorted HTML listings to disk + + :relays: relays class object containing relay set (list of dict) + :filename: filename to write unsorted listing (e.g. all.html) + :is_index: whether the file is an index or not (True/False) + ''' + template = ENV.get_template(filename) template_render = template.render(relays=relays, is_index=is_index) output = os.path.join(config.CONFIG['output_root'], filename) with open(output, 'w', encoding='utf8') as html: html.write(template_render) def effective_family(relays): - template = env.get_template('effective_family.html') + ''' + Render and write HTML listings to disk sorted by effective family + + :relays: relays class object containing relay set (list of dict) + ''' + template = ENV.get_template('effective_family.html') output_path = os.path.join(config.CONFIG['output_root'], 'family') if os.path.exists(output_path): rmtree(output_path) @@ -53,14 +83,20 @@ def effective_family(relays): os.makedirs(dir_path) f_bandwidth = round(bandwidth / 1000000, 2) # convert to MB/s rendered = template.render(relays=members, bandwidth=f_bandwidth, - is_index=False, path_prefix='../../', deactivate='family', - family=fingerprint) + is_index=False, path_prefix='../../', + deactivate='family', family=fingerprint) with open(os.path.join(dir_path, 'index.html'), 'w', - encoding='utf8') as html: + encoding='utf8') as html: html.write(rendered) def pages_by_key(relays, key): - template = env.get_template(key + '.html') + ''' + Render and write HTML listings to disk sorted by KEY + + :relays: relays class object containing relay set (list of dict) + :key: onionoo JSON parameter to sort by, e.g. 'platform' + ''' + template = ENV.get_template(key + '.html') output_path = os.path.join(config.CONFIG['output_root'], key) if os.path.exists(output_path): rmtree(output_path) @@ -80,14 +116,20 @@ def pages_by_key(relays, key): os.makedirs(dir_path) f_bandwidth = round(bandwidth / 1000000, 2) # convert to MB/s rendered = template.render(relays=found_relays, - bandwidth=f_bandwidth, is_index=False, path_prefix='../../', - deactivate=key, special_countries=countries.the_prefixed) + bandwidth=f_bandwidth, is_index=False, + path_prefix='../../', deactivate=key, + special_countries=countries.THE_PREFIXED) with open(os.path.join(dir_path, 'index.html'), 'w', - encoding='utf8') as html: + encoding='utf8') as html: html.write(rendered) def relay_info(relays): - template = env.get_template('relay-info.html') + ''' + Render and write per-relay HTML info documents to disk + + :relays: relays class object containing relay set (list of dict) + ''' + template = ENV.get_template('relay-info.html') output_path = os.path.join(config.CONFIG['output_root'], 'relay') if os.path.exists(output_path): rmtree(output_path) @@ -96,11 +138,8 @@ def relay_info(relays): for relay in relay_list: rendered = template.render(relay=relay, path_prefix='../') with open(os.path.join(output_path, '%s.html' % relay['fingerprint']), - 'w', encoding='utf8') as html: + 'w', encoding='utf8') as html: html.write(rendered) - - -relays = Relays() -generate_html(relays) - +RELAY_SET = Relays() +generate_html(RELAY_SET) diff --git a/tor-metrics/relays.py b/tor-metrics/relays.py index 423b02e..923eeb5 100644 --- a/tor-metrics/relays.py +++ b/tor-metrics/relays.py @@ -1,17 +1,39 @@ -import os, json, time, urllib.request +''' +File: relays.py + +Relays class object consisting of relays (list of dict) and onionoo fetch +timestamp +''' + +import os +import json +import time +import urllib.request from urllib.error import URLError, HTTPError import config -abs_path = os.path.dirname(os.path.abspath(__file__)) +ABS_PATH = os.path.dirname(os.path.abspath(__file__)) class Relays: + ''' + Relay class consisting of relays (list of dict) and onionoo fetch timestamp + + :ts_file: absolute path to timestamp file used in setting If-Modified_since + :json: relay listings stored as a list of dict, derived from onionoo JSON + :timestamp: timestamp of onionoo fetch + ''' def __init__(self): self.url = config.CONFIG['onionoo_url'] - self.ts_file = os.path.join(abs_path, "timestamp") - self.json = self.fetch() - self.timestamp = self.timestamp() + self.ts_file = os.path.join(ABS_PATH, "timestamp") + self.json = self.fetch_onionoo_details() + self.timestamp = self.write_timestamp() - def fetch(self): + def fetch_onionoo_details(self): + ''' + Make request to onionoo to retrieve details document, return prepared + JSON response (trimmed platform and sorted by highest observed + bandwidth) + ''' if os.path.isfile(self.ts_file): with open(self.ts_file, 'r') as ts_file: prev_timestamp = ts_file.read() @@ -22,28 +44,47 @@ class Relays: try: api_response = urllib.request.urlopen(conn).read() - except Exception as e: - print(e) - return(None) + except HTTPError as err: + print('HTTPError caught during onionoo fetch: %s' % err) + return None + except URLError as err: + print('URLError caught during onionoo fetch: %s' % err) + return None + except Exception as err: + print('Uncaught exception during onionoo fetch: %s' % err) json_data = json.loads(api_response.decode('utf-8')) sorted_json = self.sort_by_bandwidth(json_data) trimmed_json = self.trim_platform(sorted_json) - return(trimmed_json) + return trimmed_json + + def trim_platform(self, json_data): + ''' + Trim platform to retain base operating system without version number or + unnecessary classification which could affect sorting - def trim_platform(self, json): - for relay in json['relays']: + e.g. "Tor 0.3.4.9 on Linux" -> "Linux" + ''' + for relay in json_data['relays']: relay['platform'] = relay['platform'].split(' on ', 1)[1].split(' ')[0] - return(json) + return json_data - def sort_by_bandwidth(self, json): - json['relays'].sort(key=lambda x: x['observed_bandwidth'], reverse=True) - return(json) + def sort_by_bandwidth(self, json_data): + ''' + Sort full JSON list by highest observed_bandwidth, retain this order + during subsequent sorting (country, AS, etc) + ''' + json_data['relays'].sort(key=lambda x: x['observed_bandwidth'], reverse=True) + return json_data - def timestamp(self): + def write_timestamp(self): + ''' + Store encoded timestamp in a file to retain time of last request, passed + to onionoo via If-Modified-Since header during fetch() if exists + ''' timestamp = time.time() f_timestamp = time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(timestamp)) if self.json is not None: with open(self.ts_file, 'w', encoding='utf8') as ts_file: - ts_file.write(f_timestamp) - return(f_timestamp) + ts_file.write(f_timestamp) + return f_timestamp diff --git a/tor-metrics/templates/relay-info.html b/tor-metrics/templates/relay-info.html index 78af670..cfa974a 100644 --- a/tor-metrics/templates/relay-info.html +++ b/tor-metrics/templates/relay-info.html @@ -147,7 +147,7 @@ <dt>AS Name</dt> <dd> {% if relay['as_name'] -%} - {{ relay['as_name']|escape }} (<a href='https://bgp.he.net/{{ relay['as']|escape }}'>bgp</a>) + {{ relay['as_name']|escape }} (<a href='https://bgp.he.net/{{ relay['as']|escape }}'>BGP</a>) {% else -%} unknown {% endif -%} |