aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJordan <me@jordan.im>2020-07-02 15:19:41 -0700
committerJordan <me@jordan.im>2020-07-02 15:19:41 -0700
commit57f0433f735a3f54e5c6e6f25e2045c9a29af006 (patch)
tree43a17042b9531f451d28ef7795997d962618b21b
parente6f230d7afb0da982e392966cfb5a42c77f948f2 (diff)
downloadallium-57f0433f735a3f54e5c6e6f25e2045c9a29af006.tar.gz
allium-57f0433f735a3f54e5c6e6f25e2045c9a29af006.zip
housekeeping
-rwxr-xr-xtor-metrics/generate.py244
-rw-r--r--tor-metrics/relays.py257
2 files changed, 251 insertions, 250 deletions
diff --git a/tor-metrics/generate.py b/tor-metrics/generate.py
index 21263a2..e0cb0c6 100755
--- a/tor-metrics/generate.py
+++ b/tor-metrics/generate.py
@@ -11,229 +11,13 @@ Default output directory: ./www
import os
import sys
-from shutil import rmtree, copytree
+from shutil import copytree
import config
-import countries
-from jinja2 import Environment, FileSystemLoader
from relays import Relays
ABS_PATH = os.path.dirname(os.path.abspath(__file__))
-ENV = Environment(loader=FileSystemLoader(os.path.join(ABS_PATH, 'templates')),
- trim_blocks=True, lstrip_blocks=True)
-
-def sort_relays(relays):
- '''
- Add a list of dict sorted by unique keys derived from relays as they're
- discovered, referenced by indice to the main set (relays.json['relays'])
-
- :relays: relays class object containing relay set (list of dict)
- '''
- keys = ['as', 'country', 'platform']
- if not relays.json.get('sorted'):
- relays.json['sorted'] = dict()
-
- relay_list = relays.json['relays']
- for idx, relay in enumerate(relay_list):
- for key in keys:
- v = relay.get(key)
- if not v or not v.isalnum(): continue
- if not key in relays.json['sorted']:
- relays.json['sorted'][key] = dict()
- if not v in relays.json['sorted'][key]:
- relays.json['sorted'][key][v] = dict()
- relays.json['sorted'][key][v]['relays'] = list()
- relays.json['sorted'][key][v]['bw'] = 0
- relays.json['sorted'][key][v]['exit_count'] = 0
- relays.json['sorted'][key][v]['middle_count'] = 0
- bw = relay['observed_bandwidth']
- relays.json['sorted'][key][v]['relays'].append(idx)
- relays.json['sorted'][key][v]['bw'] += bw
- if 'Exit' in relay['flags']:
- relays.json['sorted'][key][v]['exit_count'] += 1
- else:
- relays.json['sorted'][key][v]['middle_count'] += 1
-
- flags = relay['flags']
- for flag in flags:
- if not flag.isalnum(): continue
- if not 'flags' in relays.json['sorted']:
- relays.json['sorted']['flags'] = dict()
- if not flag in relays.json['sorted']['flags']:
- relays.json['sorted']['flags'][flag] = dict()
- relays.json['sorted']['flags'][flag]['relays'] = list()
- relays.json['sorted']['flags'][flag]['bw'] = 0
- relays.json['sorted']['flags'][flag]['exit_count'] = 0
- relays.json['sorted']['flags'][flag]['middle_count'] = 0
- bw = relay['observed_bandwidth']
- relays.json['sorted']['flags'][flag]['relays'].append(idx)
- relays.json['sorted']['flags'][flag]['bw'] += bw
- if 'Exit' in relay['flags']:
- relays.json['sorted']['flags'][flag]['exit_count'] += 1
- else:
- relays.json['sorted']['flags'][flag]['middle_count'] += 1
-
- members = relay['effective_family']
- for member in members:
- if not member.isalnum() or len(members) < 2: continue
- if not 'family' in relays.json['sorted']:
- relays.json['sorted']['family'] = dict()
- if not member in relays.json['sorted']['family']:
- relays.json['sorted']['family'][member] = dict()
- relays.json['sorted']['family'][member]['relays'] = list()
- relays.json['sorted']['family'][member]['bw'] = 0
- relays.json['sorted']['family'][member]['exit_count'] = 0
- relays.json['sorted']['family'][member]['middle_count'] = 0
- bw = relay['observed_bandwidth']
- relays.json['sorted']['family'][member]['relays'].append(idx)
- relays.json['sorted']['family'][member]['bw'] += bw
- if 'Exit' in relay['flags']:
- relays.json['sorted']['family'][member]['exit_count'] += 1
- else:
- relays.json['sorted']['family'][member]['middle_count'] += 1
-
-def unsorted(relays, filename, is_index):
- '''
- Render and write unsorted HTML listings to disk
-
- :relays: relays class object containing relay set (list of dict)
- :filename: filename to write unsorted listing (e.g. all.html)
- :is_index: whether the file is an index or not (True/False)
- '''
- template = ENV.get_template(filename)
- relays.json['relay_subset'] = relays.json['relays']
- template_render = template.render(relays=relays, is_index=is_index)
- output = os.path.join(config.CONFIG['output_root'], filename)
- with open(output, 'w', encoding='utf8') as html:
- html.write(template_render)
-
-def effective_family(relays):
- '''
- Render and write HTML listings to disk sorted by effective family
-
- :relays: relays class object containing relay set (list of dict)
- '''
- template = ENV.get_template('effective_family.html')
- output_path = os.path.join(config.CONFIG['output_root'], 'family')
- if os.path.exists(output_path):
- rmtree(output_path)
- relay_list = relays.json['relays']
- for family in relays.json['sorted']['family']:
- members = []
- bandwidth = relays.json['sorted']['family'][family]['bw']
- exit_count = relays.json['sorted']['family'][family]['exit_count']
- middle_count = relays.json['sorted']['family'][family]['middle_count']
- for m_relay in relays.json['sorted']['family'][family]['relays']:
- members.append(relay_list[m_relay])
- dir_path = os.path.join(output_path, family)
- os.makedirs(dir_path)
- f_bandwidth = round(bandwidth / 1000000, 2) # convert to MB/s
- relays.json['relay_subset'] = members
- rendered = template.render(relays=relays,
- bandwidth=f_bandwidth,
- exit_count=exit_count,
- middle_count=middle_count,
- is_index=False,
- path_prefix='../../',
- deactivate='family',
- family=family)
- with open(os.path.join(dir_path, 'index.html'), 'w',
- encoding='utf8') as html:
- html.write(rendered)
-
-def pages_by_key(relays, key):
- '''
- Render and write HTML listings to disk sorted by KEY
-
- :relays: relays class object containing relay set (list of dict)
- :key: relays['sorted'] key (onionoo parameter) containing list of indices
- belonging to key
- '''
- template = ENV.get_template(key + '.html')
- output_path = os.path.join(config.CONFIG['output_root'], key)
- if os.path.exists(output_path):
- rmtree(output_path)
- relay_list = relays.json['relays']
- for v in relays.json['sorted'][key]:
- m_relays = list()
- for idx in relays.json['sorted'][key][v]['relays']:
- m_relays.append(relays.json['relays'][idx])
- bandwidth = relays.json['sorted'][key][v]['bw']
- exit_count = relays.json['sorted'][key][v]['exit_count']
- middle_count = relays.json['sorted'][key][v]['middle_count']
- dir_path = os.path.join(output_path, v)
- os.makedirs(dir_path)
- f_bandwidth = round(bandwidth / 1000000, 2) # convert to MB/s
- relays.json['relay_subset'] = m_relays
- rendered = template.render(relays=relays,
- bandwidth=f_bandwidth,
- exit_count=exit_count,
- middle_count=middle_count,
- is_index=False,
- path_prefix='../../',
- deactivate=key,
- special_countries=countries.THE_PREFIXED)
- with open(os.path.join(dir_path, 'index.html'), 'w',
- encoding='utf8') as html:
- html.write(rendered)
-
-def pages_by_flag(relays):
- '''
- Render and write HTML listings to disk sorted by FLAG
-
- :relays: relays class object containing relay set (list of dict)
- '''
- template = ENV.get_template('flag.html')
- for flag in relays.json['sorted']['flags']:
- output_path = os.path.join(config.CONFIG['output_root'], 'flag',
- flag.lower())
- if os.path.exists(output_path):
- rmtree(output_path)
- relay_list = relays.json['relays']
- m_relays = list()
- for idx in relays.json['sorted']['flags'][flag]['relays']:
- m_relays.append(relays.json['relays'][idx])
- bandwidth = relays.json['sorted']['flags'][flag]['bw']
- exit_count = relays.json['sorted']['flags'][flag]['exit_count']
- middle_count = relays.json['sorted']['flags'][flag]['middle_count']
- os.makedirs(output_path)
- f_bandwidth = round(bandwidth / 1000000, 2) # convert to MB/s
- relays.json['relay_subset'] = m_relays
- rendered = template.render(relays=relays,
- bandwidth=f_bandwidth,
- exit_count=exit_count,
- middle_count=middle_count,
- is_index=False,
- path_prefix='../../',
- deactivate=flag,
- special_countries=countries.THE_PREFIXED,
- flag=flag)
- with open(os.path.join(output_path, 'index.html'), 'w',
- encoding='utf8') as html:
- html.write(rendered)
-
-def relay_info(relays):
- '''
- Render and write per-relay HTML info documents to disk
-
- :relays: relays class object containing relay set (list of dict)
- '''
- relay_list = relays.json['relays']
- template = ENV.get_template('relay-info.html')
- output_path = os.path.join(config.CONFIG['output_root'], 'relay')
- if os.path.exists(output_path):
- rmtree(output_path)
- os.makedirs(output_path)
- for relay in relay_list:
- if not relay['fingerprint'].isalnum():
- continue
- rendered = template.render(relay=relay, path_prefix='../',
- relays=relays)
- with open(os.path.join(output_path, '%s.html' % relay['fingerprint']),
- 'w', encoding='utf8') as html:
- html.write(rendered)
if __name__ == '__main__':
- # make request to onionoo, populate relays object
try:
RELAY_SET = Relays()
except Exception as err:
@@ -242,18 +26,18 @@ if __name__ == '__main__':
sys.exit()
# generate relay HTML documents
- sort_relays(RELAY_SET)
- pages_by_key(RELAY_SET, 'as')
- pages_by_key(RELAY_SET, 'country')
- pages_by_key(RELAY_SET, 'platform')
- pages_by_flag(RELAY_SET)
- effective_family(RELAY_SET)
- unsorted(RELAY_SET, 'index.html', is_index=True)
- unsorted(RELAY_SET, 'all.html', is_index=False)
- relay_info(RELAY_SET)
+ RELAY_SET.create_output_dir()
+ RELAY_SET.write_unsorted('index.html', is_index=True)
+ RELAY_SET.write_unsorted('all.html', is_index=False)
+ RELAY_SET.write_effective_family()
+ RELAY_SET.write_pages_by_key('as')
+ RELAY_SET.write_pages_by_key('country')
+ RELAY_SET.write_pages_by_key('platform')
+ RELAY_SET.write_pages_by_flag()
+ RELAY_SET.write_relay_info()
# copy static directory and its contents
- static_src_path = os.path.join(ABS_PATH, 'static')
- static_dest_path = os.path.join(config.CONFIG['output_root'], 'static')
- if not os.path.exists(static_dest_path):
- copytree(static_src_path, static_dest_path)
+ STATIC_SRC_PATH = os.path.join(ABS_PATH, 'static')
+ STATIC_DEST_PATH = os.path.join(config.CONFIG['output_root'], 'static')
+ if not os.path.exists(STATIC_DEST_PATH):
+ copytree(STATIC_SRC_PATH, STATIC_DEST_PATH)
diff --git a/tor-metrics/relays.py b/tor-metrics/relays.py
index 7693e17..aa55a93 100644
--- a/tor-metrics/relays.py
+++ b/tor-metrics/relays.py
@@ -5,14 +5,18 @@ Relays class object consisting of relays (list of dict) and onionoo fetch
timestamp
'''
-import os
import json
+import os
import time
import urllib.request
-from urllib.error import URLError, HTTPError
+from shutil import rmtree
import config
+import countries
+from jinja2 import Environment, FileSystemLoader
ABS_PATH = os.path.dirname(os.path.abspath(__file__))
+ENV = Environment(loader=FileSystemLoader(os.path.join(ABS_PATH, 'templates')),
+ trim_blocks=True, lstrip_blocks=True)
class Relays:
'''
@@ -25,10 +29,15 @@ class Relays:
def __init__(self):
self.url = config.CONFIG['onionoo_url']
self.ts_file = os.path.join(ABS_PATH, "timestamp")
- self.json = self.fetch_onionoo_details()
- self.timestamp = self.write_timestamp()
+ self.json = self._fetch_onionoo_details()
+ self.timestamp = self._write_timestamp()
+
+ self._fix_missing_observed_bandwidth()
+ self._sort_by_bandwidth()
+ self._trim_platform()
+ self._categorize_relays()
- def fetch_onionoo_details(self):
+ def _fetch_onionoo_details(self):
'''
Make request to onionoo to retrieve details document, return prepared
JSON response (trimmed platform and sorted by highest observed
@@ -45,24 +54,20 @@ class Relays:
api_response = urllib.request.urlopen(conn).read()
json_data = json.loads(api_response.decode('utf-8'))
- fixed_bw = self.fix_missing_observed_bandwidth(json_data)
- sorted_json = self.sort_by_bandwidth(fixed_bw)
- trimmed_json = self.trim_platform(sorted_json)
- return trimmed_json
+ return json_data
- def trim_platform(self, json_data):
+ def _trim_platform(self):
'''
Trim platform to retain base operating system without version number or
unnecessary classification which could affect sorting
e.g. "Tor 0.3.4.9 on Linux" -> "Linux"
'''
- for relay in json_data['relays']:
+ for relay in self.json['relays']:
relay['platform'] = relay['platform'].split(' on ', 1)[1].split(' ')[0]
relay['platform'] = relay['platform'].split('/')[-1] # GNU/*
- return json_data
- def fix_missing_observed_bandwidth(self, json_data):
+ def _fix_missing_observed_bandwidth(self):
'''
Set the observed_bandwidth parameter value for any relay missing the
parameter to 0; the observed_bandwidth parameter is (apparently)
@@ -73,21 +78,19 @@ class Relays:
--https://metrics.torproject.org/onionoo.html#details_relay_observed_bandwidth
'''
- for idx, relay in enumerate(json_data['relays']):
+ for idx, relay in enumerate(self.json['relays']):
if not relay.get('observed_bandwidth'):
- json_data['relays'][idx]['observed_bandwidth'] = 0
- return json_data
+ self.json['relays'][idx]['observed_bandwidth'] = 0
- def sort_by_bandwidth(self, json_data):
+ def _sort_by_bandwidth(self):
'''
Sort full JSON list by highest observed_bandwidth, retain this order
during subsequent sorting (country, AS, etc)
'''
- json_data['relays'].sort(key=lambda x: x['observed_bandwidth'],
+ self.json['relays'].sort(key=lambda x: x['observed_bandwidth'],
reverse=True)
- return json_data
- def write_timestamp(self):
+ def _write_timestamp(self):
'''
Store encoded timestamp in a file to retain time of last request, passed
to onionoo via If-Modified-Since header during fetch() if exists
@@ -99,3 +102,217 @@ class Relays:
with open(self.ts_file, 'w', encoding='utf8') as ts_file:
ts_file.write(f_timestamp)
return f_timestamp
+
+ def _categorize_relays(self):
+ '''
+ Add a list of dict sorted by unique keys derived from relays as they're
+ discovered, referenced by indice to the main set (relays.json['relays'])
+
+ This code looks (is) redundant but it saves us from multiple passes
+ over the entire set... not sure how to generalize it beyond the keys
+ list
+ '''
+ self.json['sorted'] = dict()
+ for idx, relay in enumerate(self.json['relays']):
+ keys = ['as', 'country', 'platform']
+ for key in keys:
+ v = relay.get(key)
+ if not v or not v.isalnum(): continue
+ if not key in self.json['sorted']:
+ self.json['sorted'][key] = dict()
+ if not v in self.json['sorted'][key]:
+ self.json['sorted'][key][v] = dict()
+ self.json['sorted'][key][v]['relays'] = list()
+ self.json['sorted'][key][v]['bw'] = 0
+ self.json['sorted'][key][v]['exit_count'] = 0
+ self.json['sorted'][key][v]['middle_count'] = 0
+ bw = relay['observed_bandwidth']
+ self.json['sorted'][key][v]['relays'].append(idx)
+ self.json['sorted'][key][v]['bw'] += bw
+ if 'Exit' in relay['flags']:
+ self.json['sorted'][key][v]['exit_count'] += 1
+ else:
+ self.json['sorted'][key][v]['middle_count'] += 1
+
+ for flag in relay['flags']:
+ if not flag.isalnum(): continue
+ if not 'flags' in self.json['sorted']:
+ self.json['sorted']['flags'] = dict()
+ if not flag in self.json['sorted']['flags']:
+ self.json['sorted']['flags'][flag] = dict()
+ self.json['sorted']['flags'][flag]['relays'] = list()
+ self.json['sorted']['flags'][flag]['bw'] = 0
+ self.json['sorted']['flags'][flag]['exit_count'] = 0
+ self.json['sorted']['flags'][flag]['middle_count'] = 0
+ bw = relay['observed_bandwidth']
+ self.json['sorted']['flags'][flag]['relays'].append(idx)
+ self.json['sorted']['flags'][flag]['bw'] += bw
+ if 'Exit' in relay['flags']:
+ self.json['sorted']['flags'][flag]['exit_count'] += 1
+ else:
+ self.json['sorted']['flags'][flag]['middle_count'] += 1
+
+ for member in relay['effective_family']:
+ if not member.isalnum() or len(relay['effective_family']) < 2:
+ continue
+ if not 'family' in self.json['sorted']:
+ self.json['sorted']['family'] = dict()
+ if not member in self.json['sorted']['family']:
+ self.json['sorted']['family'][member] = dict()
+ self.json['sorted']['family'][member]['relays'] = list()
+ self.json['sorted']['family'][member]['bw'] = 0
+ self.json['sorted']['family'][member]['exit_count'] = 0
+ self.json['sorted']['family'][member]['middle_count'] = 0
+ bw = relay['observed_bandwidth']
+ self.json['sorted']['family'][member]['relays'].append(idx)
+ self.json['sorted']['family'][member]['bw'] += bw
+ if 'Exit' in relay['flags']:
+ self.json['sorted']['family'][member]['exit_count'] += 1
+ else:
+ self.json['sorted']['family'][member]['middle_count'] += 1
+
+ def create_output_dir(self):
+ '''
+ Ensure config:output_root exists (required for write functions)
+ '''
+ os.makedirs(config.CONFIG['output_root'],exist_ok=True)
+
+ def write_unsorted(self, filename, is_index):
+ '''
+ Render and write unsorted HTML listings to disk
+
+ :filename: filename to write unsorted listing (e.g. all.html)
+ :is_index: whether the file is an index or not (True/False)
+ '''
+ template = ENV.get_template(filename)
+ self.json['relay_subset'] = self.json['relays']
+ template_render = template.render(relays=self, is_index=is_index)
+ output = os.path.join(config.CONFIG['output_root'], filename)
+ with open(output, 'w', encoding='utf8') as html:
+ html.write(template_render)
+
+ def write_effective_family(self):
+ '''
+ Render and write HTML listings to disk sorted by effective family
+ '''
+ template = ENV.get_template('effective_family.html')
+ output_path = os.path.join(config.CONFIG['output_root'], 'family')
+ if os.path.exists(output_path):
+ rmtree(output_path)
+ for family in self.json['sorted']['family']:
+ members = []
+ bandwidth = self.json['sorted']['family'][family]['bw']
+ exit_count = self.json['sorted']['family'][family]['exit_count']
+ middle_count = self.json['sorted']['family'][family]['middle_count']
+ for m_relay in self.json['sorted']['family'][family]['relays']:
+ members.append(self.json['relays'][m_relay])
+ dir_path = os.path.join(output_path, family)
+ os.makedirs(dir_path)
+ f_bandwidth = round(bandwidth / 1000000, 2) # convert to MB/s
+ self.json['relay_subset'] = members
+ rendered = template.render(
+ relays=self,
+ bandwidth=f_bandwidth,
+ exit_count=exit_count,
+ middle_count=middle_count,
+ is_index=False,
+ path_prefix='../../',
+ deactivate='family',
+ family=family
+ )
+ with open(os.path.join(dir_path, 'index.html'), 'w',
+ encoding='utf8') as html:
+ html.write(rendered)
+
+ def write_pages_by_key(self, key):
+ '''
+ Render and write HTML listings to disk sorted by KEY
+
+ :key: relays['sorted'] key (onionoo parameter) containing list of indices
+ belonging to key
+ '''
+ template = ENV.get_template(key + '.html')
+ output_path = os.path.join(config.CONFIG['output_root'], key)
+ if os.path.exists(output_path):
+ rmtree(output_path)
+ for v in self.json['sorted'][key]:
+ m_relays = list()
+ for idx in self.json['sorted'][key][v]['relays']:
+ m_relays.append(self.json['relays'][idx])
+ bandwidth = self.json['sorted'][key][v]['bw']
+ exit_count = self.json['sorted'][key][v]['exit_count']
+ middle_count = self.json['sorted'][key][v]['middle_count']
+ dir_path = os.path.join(output_path, v)
+ os.makedirs(dir_path)
+ f_bandwidth = round(bandwidth / 1000000, 2) # convert to MB/s
+ self.json['relay_subset'] = m_relays
+ rendered = template.render(
+ relays=self,
+ bandwidth=f_bandwidth,
+ exit_count=exit_count,
+ middle_count=middle_count,
+ is_index=False,
+ path_prefix='../../',
+ deactivate=key,
+ special_countries=countries.THE_PREFIXED
+ )
+ with open(os.path.join(dir_path, 'index.html'), 'w',
+ encoding='utf8') as html:
+ html.write(rendered)
+
+ def write_pages_by_flag(self):
+ '''
+ Render and write HTML listings to disk sorted by FLAG
+ '''
+ template = ENV.get_template('flag.html')
+ for flag in self.json['sorted']['flags']:
+ output_path = os.path.join(config.CONFIG['output_root'], 'flag',
+ flag.lower())
+ if os.path.exists(output_path):
+ rmtree(output_path)
+ relay_list = self.json['relays']
+ m_relays = list()
+ for idx in self.json['sorted']['flags'][flag]['relays']:
+ m_relays.append(self.json['relays'][idx])
+ bandwidth = self.json['sorted']['flags'][flag]['bw']
+ exit_count = self.json['sorted']['flags'][flag]['exit_count']
+ middle_count = self.json['sorted']['flags'][flag]['middle_count']
+ os.makedirs(output_path)
+ f_bandwidth = round(bandwidth / 1000000, 2) # convert to MB/s
+ self.json['relay_subset'] = m_relays
+ rendered = template.render(
+ relays=self,
+ bandwidth=f_bandwidth,
+ exit_count=exit_count,
+ middle_count=middle_count,
+ is_index=False,
+ path_prefix='../../',
+ deactivate=flag,
+ special_countries=countries.THE_PREFIXED,
+ flag=flag
+ )
+ with open(os.path.join(output_path, 'index.html'), 'w',
+ encoding='utf8') as html:
+ html.write(rendered)
+
+ def write_relay_info(self):
+ '''
+ Render and write per-relay HTML info documents to disk
+ '''
+ relay_list = self.json['relays']
+ template = ENV.get_template('relay-info.html')
+ output_path = os.path.join(config.CONFIG['output_root'], 'relay')
+ if os.path.exists(output_path):
+ rmtree(output_path)
+ os.makedirs(output_path)
+ for relay in relay_list:
+ if not relay['fingerprint'].isalnum():
+ continue
+ rendered = template.render(
+ relay=relay,
+ path_prefix='../',
+ relays=self
+ )
+ with open(os.path.join(output_path, '%s.html' % relay['fingerprint']),
+ 'w', encoding='utf8') as html:
+ html.write(rendered)