aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJordan <me@jordan.im>2019-09-05 22:27:24 -0700
committerJordan <me@jordan.im>2019-09-05 22:27:24 -0700
commit3bfdc6c0cb32e10247fc80634653b0683768d088 (patch)
treebd3cd83c08695d70fe968278551caf29c717643c
parent0983cf45cd0494a80f7c9d16e75a240289165e56 (diff)
downloadallium-3bfdc6c0cb32e10247fc80634653b0683768d088.tar.gz
allium-3bfdc6c0cb32e10247fc80634653b0683768d088.zip
update documentation, improve naming, misc non-functional updates
-rw-r--r--tor-metrics/config.py9
-rw-r--r--tor-metrics/countries.py8
-rwxr-xr-xtor-metrics/generate.py85
-rw-r--r--tor-metrics/relays.py79
-rw-r--r--tor-metrics/templates/relay-info.html2
5 files changed, 138 insertions, 45 deletions
diff --git a/tor-metrics/config.py b/tor-metrics/config.py
index 4fc8d3d..67a1934 100644
--- a/tor-metrics/config.py
+++ b/tor-metrics/config.py
@@ -1,3 +1,12 @@
+'''
+File: config.py
+
+Configuration dict used in the generation of HTML documents
+
+:output_root: path to output directory (created if not exists)
+:onionoo_url: url to onionoo details document
+'''
+
CONFIG = {
'output_root': 'www',
'onionoo_url': 'https://onionoo.torproject.org/details'
diff --git a/tor-metrics/countries.py b/tor-metrics/countries.py
index 373dacd..ba63358 100644
--- a/tor-metrics/countries.py
+++ b/tor-metrics/countries.py
@@ -1,6 +1,10 @@
-# list of countries that require "The" prefix
+'''
+File: countries.py
-the_prefixed = [
+List of countries which require prefixing with "The"
+'''
+
+THE_PREFIXED = [
'Dominican Republic',
'Ivory Coast',
'Marshall Islands',
diff --git a/tor-metrics/generate.py b/tor-metrics/generate.py
index 4bfce2b..79c8102 100755
--- a/tor-metrics/generate.py
+++ b/tor-metrics/generate.py
@@ -1,16 +1,34 @@
#!/usr/bin/env python3
-from jinja2 import Environment, FileSystemLoader
+'''
+File: generate.py (executable)
+
+Generate complete set of relay HTML pages and copy static files to
+config.CONFIG['output_root'] defined in config.py
+
+Default output directory: ./www
+'''
+
+import os
from shutil import rmtree, copytree
+import config
+import countries
+from jinja2 import Environment, FileSystemLoader
from relays import Relays
-import os, json
-import config, countries
-abs_path = os.path.dirname(os.path.abspath(__file__))
-env = Environment(loader=FileSystemLoader(os.path.join(abs_path, 'templates')),
- trim_blocks=True, lstrip_blocks=True)
+ABS_PATH = os.path.dirname(os.path.abspath(__file__))
+ENV = Environment(loader=FileSystemLoader(os.path.join(ABS_PATH, 'templates')),
+ trim_blocks=True, lstrip_blocks=True)
def generate_html(relays):
+ '''
+ Render and write complete set of relay pages to disk by calling each group's
+ respective function
+
+ Files are written to 'www' by default (defined in config.py)
+
+ :relays: relays class object containing relay set (list of dict)
+ '''
if relays.json is not None:
pages_by_key(relays, 'as')
pages_by_key(relays, 'country')
@@ -19,20 +37,32 @@ def generate_html(relays):
unsorted(relays, 'index.html', is_index=True)
unsorted(relays.json['relays'], 'all.html', is_index=False)
relay_info(relays)
- static_src_path = os.path.join(abs_path, 'static')
+ static_src_path = os.path.join(ABS_PATH, 'static')
static_dest_path = os.path.join(config.CONFIG['output_root'], 'static')
if not os.path.exists(static_dest_path):
copytree(static_src_path, static_dest_path)
def unsorted(relays, filename, is_index):
- template = env.get_template(filename)
+ '''
+ Render and write unsorted HTML listings to disk
+
+ :relays: relays class object containing relay set (list of dict)
+ :filename: filename to write unsorted listing (e.g. all.html)
+ :is_index: whether the file is an index or not (True/False)
+ '''
+ template = ENV.get_template(filename)
template_render = template.render(relays=relays, is_index=is_index)
output = os.path.join(config.CONFIG['output_root'], filename)
with open(output, 'w', encoding='utf8') as html:
html.write(template_render)
def effective_family(relays):
- template = env.get_template('effective_family.html')
+ '''
+ Render and write HTML listings to disk sorted by effective family
+
+ :relays: relays class object containing relay set (list of dict)
+ '''
+ template = ENV.get_template('effective_family.html')
output_path = os.path.join(config.CONFIG['output_root'], 'family')
if os.path.exists(output_path):
rmtree(output_path)
@@ -53,14 +83,20 @@ def effective_family(relays):
os.makedirs(dir_path)
f_bandwidth = round(bandwidth / 1000000, 2) # convert to MB/s
rendered = template.render(relays=members, bandwidth=f_bandwidth,
- is_index=False, path_prefix='../../', deactivate='family',
- family=fingerprint)
+ is_index=False, path_prefix='../../',
+ deactivate='family', family=fingerprint)
with open(os.path.join(dir_path, 'index.html'), 'w',
- encoding='utf8') as html:
+ encoding='utf8') as html:
html.write(rendered)
def pages_by_key(relays, key):
- template = env.get_template(key + '.html')
+ '''
+ Render and write HTML listings to disk sorted by KEY
+
+ :relays: relays class object containing relay set (list of dict)
+ :key: onionoo JSON parameter to sort by, e.g. 'platform'
+ '''
+ template = ENV.get_template(key + '.html')
output_path = os.path.join(config.CONFIG['output_root'], key)
if os.path.exists(output_path):
rmtree(output_path)
@@ -80,14 +116,20 @@ def pages_by_key(relays, key):
os.makedirs(dir_path)
f_bandwidth = round(bandwidth / 1000000, 2) # convert to MB/s
rendered = template.render(relays=found_relays,
- bandwidth=f_bandwidth, is_index=False, path_prefix='../../',
- deactivate=key, special_countries=countries.the_prefixed)
+ bandwidth=f_bandwidth, is_index=False,
+ path_prefix='../../', deactivate=key,
+ special_countries=countries.THE_PREFIXED)
with open(os.path.join(dir_path, 'index.html'), 'w',
- encoding='utf8') as html:
+ encoding='utf8') as html:
html.write(rendered)
def relay_info(relays):
- template = env.get_template('relay-info.html')
+ '''
+ Render and write per-relay HTML info documents to disk
+
+ :relays: relays class object containing relay set (list of dict)
+ '''
+ template = ENV.get_template('relay-info.html')
output_path = os.path.join(config.CONFIG['output_root'], 'relay')
if os.path.exists(output_path):
rmtree(output_path)
@@ -96,11 +138,8 @@ def relay_info(relays):
for relay in relay_list:
rendered = template.render(relay=relay, path_prefix='../')
with open(os.path.join(output_path, '%s.html' % relay['fingerprint']),
- 'w', encoding='utf8') as html:
+ 'w', encoding='utf8') as html:
html.write(rendered)
-
-
-relays = Relays()
-generate_html(relays)
-
+RELAY_SET = Relays()
+generate_html(RELAY_SET)
diff --git a/tor-metrics/relays.py b/tor-metrics/relays.py
index 423b02e..923eeb5 100644
--- a/tor-metrics/relays.py
+++ b/tor-metrics/relays.py
@@ -1,17 +1,39 @@
-import os, json, time, urllib.request
+'''
+File: relays.py
+
+Relays class object consisting of relays (list of dict) and onionoo fetch
+timestamp
+'''
+
+import os
+import json
+import time
+import urllib.request
from urllib.error import URLError, HTTPError
import config
-abs_path = os.path.dirname(os.path.abspath(__file__))
+ABS_PATH = os.path.dirname(os.path.abspath(__file__))
class Relays:
+ '''
+ Relay class consisting of relays (list of dict) and onionoo fetch timestamp
+
+ :ts_file: absolute path to timestamp file used in setting If-Modified_since
+ :json: relay listings stored as a list of dict, derived from onionoo JSON
+ :timestamp: timestamp of onionoo fetch
+ '''
def __init__(self):
self.url = config.CONFIG['onionoo_url']
- self.ts_file = os.path.join(abs_path, "timestamp")
- self.json = self.fetch()
- self.timestamp = self.timestamp()
+ self.ts_file = os.path.join(ABS_PATH, "timestamp")
+ self.json = self.fetch_onionoo_details()
+ self.timestamp = self.write_timestamp()
- def fetch(self):
+ def fetch_onionoo_details(self):
+ '''
+ Make request to onionoo to retrieve details document, return prepared
+ JSON response (trimmed platform and sorted by highest observed
+ bandwidth)
+ '''
if os.path.isfile(self.ts_file):
with open(self.ts_file, 'r') as ts_file:
prev_timestamp = ts_file.read()
@@ -22,28 +44,47 @@ class Relays:
try:
api_response = urllib.request.urlopen(conn).read()
- except Exception as e:
- print(e)
- return(None)
+ except HTTPError as err:
+ print('HTTPError caught during onionoo fetch: %s' % err)
+ return None
+ except URLError as err:
+ print('URLError caught during onionoo fetch: %s' % err)
+ return None
+ except Exception as err:
+ print('Uncaught exception during onionoo fetch: %s' % err)
json_data = json.loads(api_response.decode('utf-8'))
sorted_json = self.sort_by_bandwidth(json_data)
trimmed_json = self.trim_platform(sorted_json)
- return(trimmed_json)
+ return trimmed_json
+
+ def trim_platform(self, json_data):
+ '''
+ Trim platform to retain base operating system without version number or
+ unnecessary classification which could affect sorting
- def trim_platform(self, json):
- for relay in json['relays']:
+ e.g. "Tor 0.3.4.9 on Linux" -> "Linux"
+ '''
+ for relay in json_data['relays']:
relay['platform'] = relay['platform'].split(' on ', 1)[1].split(' ')[0]
- return(json)
+ return json_data
- def sort_by_bandwidth(self, json):
- json['relays'].sort(key=lambda x: x['observed_bandwidth'], reverse=True)
- return(json)
+ def sort_by_bandwidth(self, json_data):
+ '''
+ Sort full JSON list by highest observed_bandwidth, retain this order
+ during subsequent sorting (country, AS, etc)
+ '''
+ json_data['relays'].sort(key=lambda x: x['observed_bandwidth'], reverse=True)
+ return json_data
- def timestamp(self):
+ def write_timestamp(self):
+ '''
+ Store encoded timestamp in a file to retain time of last request, passed
+ to onionoo via If-Modified-Since header during fetch() if exists
+ '''
timestamp = time.time()
f_timestamp = time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(timestamp))
if self.json is not None:
with open(self.ts_file, 'w', encoding='utf8') as ts_file:
- ts_file.write(f_timestamp)
- return(f_timestamp)
+ ts_file.write(f_timestamp)
+ return f_timestamp
diff --git a/tor-metrics/templates/relay-info.html b/tor-metrics/templates/relay-info.html
index 78af670..cfa974a 100644
--- a/tor-metrics/templates/relay-info.html
+++ b/tor-metrics/templates/relay-info.html
@@ -147,7 +147,7 @@
<dt>AS Name</dt>
<dd>
{% if relay['as_name'] -%}
- {{ relay['as_name']|escape }} (<a href='https://bgp.he.net/{{ relay['as']|escape }}'>bgp</a>)
+ {{ relay['as_name']|escape }} (<a href='https://bgp.he.net/{{ relay['as']|escape }}'>BGP</a>)
{% else -%}
unknown
{% endif -%}