From fa85205fd0499f361b243943777590b348df290b Mon Sep 17 00:00:00 2001 From: Jordan Date: Mon, 11 Jan 2021 22:20:48 -0700 Subject: replace config.py with cli args, cleanup --- .gitignore | 4 +- README | 54 +++++---- allium/allium.py | 91 +++++++++++++++ allium/config.py | 13 --- allium/countries.py | 26 ----- allium/generate.py | 73 ------------ allium/lib/relays.py | 306 +++++++++++++++++++++++++++++++++++++++++++++++++++ allium/relays.py | 285 ----------------------------------------------- 8 files changed, 430 insertions(+), 422 deletions(-) create mode 100755 allium/allium.py delete mode 100644 allium/config.py delete mode 100644 allium/countries.py delete mode 100755 allium/generate.py create mode 100644 allium/lib/relays.py delete mode 100644 allium/relays.py diff --git a/.gitignore b/.gitignore index a7a359c..3437854 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,6 @@ *.swo *.sh sandbox -allium/__pycache__ +__pycache__ +timestamp allium/www -allium/timestamp diff --git a/README b/README index b4cd081..7664e47 100644 --- a/README +++ b/README @@ -1,32 +1,40 @@ -allium: statically generated tor metrics and statistics - https://yui.cat/ +allium: generate static tor relay metrics and statistics - allium generates a set of HTML documents which represent the total set of tor - relays at the time of execution +usage: allium.py [-h] [--out] [--onionoo-url] - allium is heavily inspired by the official tor metrics project[0] and serves - as a javascript-free, statically-generated clean room implementation. the - primary goals of the project are to be fast (static), use few API queries - (one), and to present information in a condensed, readable format +optional arguments: + -h, --help show this help message and exit + --out directory to store rendered files (default "./www") + --onionoo-url onionoo HTTP URL (default + "https://onionoo.torproject.org/details") - INSTALL +ABOUT - $ pip install -r requirements.txt - $ cd allium - $ ./generate.py +allium generates a set of HTML documents which represent the total set of tor +relays at the time of execution - Files will be generated in the ./www directory by default, configurable by - editing config.py; the only non-standard dependency is Jinja2>=2.11.2 +allium is heavily inspired by the official tor metrics project[0] and serves +as a javascript-free, statically-generated clean room implementation. the +primary goals of the project are to be fast (static), use few API queries +(one), and to present information in a condensed, readable format - TODO +REQUIRES - - top exit/guard/relay families (see https://nusenu.github.io/OrNetStats/) - - interesting statistics (ASN exit concentration, IPv6-supporting relays) - - implement something similar to https://metrics.torproject.org/bubbles.html +* python3 +* Jinja2>=2.11.2 - note: this project includes country flags from GoSquared[1] and relay flags - from the Tor Project[2], the licenses of which are included in this project's - root directory +INSTALL - [0] https://metrics.torproject.org/ - [1] https://github.com/gosquared/flags - [2] https://www.torproject.org/ +$ pip install -r requirements.txt +$ cd allium +$ ./allium.py + +LICENSE + +this project includes country flags from GoSquared[1] and relay flags from the +Tor Project[2], the licenses of which are included in this project's root +directory; all code is published under UNLICENSE (public domain) + +[0] https://metrics.torproject.org/ +[1] https://github.com/gosquared/flags +[2] https://www.torproject.org/ diff --git a/allium/allium.py b/allium/allium.py new file mode 100755 index 0000000..9da4c00 --- /dev/null +++ b/allium/allium.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 + +''' +File: allium.py (executable) + +Generate complete set of relay HTML pages and copy static files to the +output_dir + +Default output directory: ./www +''' + +import argparse +import os +import pkg_resources +import sys +from shutil import copytree +from lib.relays import Relays + +jinja_version = pkg_resources.parse_version( + pkg_resources.get_distribution('jinja2').version) + +if jinja_version < pkg_resources.parse_version("2.11.2"): + sys.exit('Jinja2>=2.11.2 required') + +ABS_PATH = os.path.dirname(os.path.abspath(__file__)) + +if __name__ == '__main__': + desc = 'allium: generate static tor relay metrics and statistics' + parser = argparse.ArgumentParser(description=desc) + parser.add_argument('--out', dest='output_dir', action='store_true', + default="./www", + help='directory to store rendered files (default "./www")', + required=False) + parser.add_argument('--onionoo-url', dest='onionoo_url', action='store_true', + default="https://onionoo.torproject.org/details", + help='onionoo HTTP URL (default '\ + '"https://onionoo.torproject.org/details")', + required=False) + args = parser.parse_args() + + # object containing onionoo data and processing routines + RELAY_SET = Relays(args.output_dir, args.onionoo_url) + RELAY_SET.create_output_dir() + + # index and "all" HTML relay sets; index set limited to 500 relays + RELAY_SET.write_misc( + template = 'index.html', + path = 'index.html', + path_prefix = './', + is_index = True, + ) + RELAY_SET.write_misc( + template = 'all.html', + path = 'misc/all.html' + ) + + # miscellaneous page filename suffixes and sorted-by keys + misc_pages = { + 'by-bandwidth': '1.bandwidth', + 'by-exit-count': '1.exit_count,1.bandwidth', + 'by-middle-count': '1.middle_count,1.bandwidth', + 'by-first-seen': '1.first_seen,1.bandwidth' + } + + # miscellaneous-sorted (per misc_pages k/v) HTML pages + for k, v in misc_pages.items(): + RELAY_SET.write_misc( + template = 'misc-families.html', + path = 'misc/families-{}.html'.format(k), + sorted_by = v + ) + RELAY_SET.write_misc( + template = 'misc-networks.html', + path = 'misc/networks-{}.html'.format(k), + sorted_by = v + ) + + # onionoo keys used to generate pages by unique value; e.g. AS43350 + keys = ['as', 'contact', 'country', 'family', 'flag', 'platform', + 'first_seen'] + + for k in keys: + RELAY_SET.write_pages_by_key(k) + + # per-relay info pages + RELAY_SET.write_relay_info() + + # copy static directory and its contents if it doesn't exist + if not os.path.exists(os.path.join(args.output_dir, 'static')): + copytree(os.path.join(ABS_PATH, 'static'), + os.path.join(args.output_dir, 'static')) diff --git a/allium/config.py b/allium/config.py deleted file mode 100644 index 67a1934..0000000 --- a/allium/config.py +++ /dev/null @@ -1,13 +0,0 @@ -''' -File: config.py - -Configuration dict used in the generation of HTML documents - -:output_root: path to output directory (created if not exists) -:onionoo_url: url to onionoo details document -''' - -CONFIG = { - 'output_root': 'www', - 'onionoo_url': 'https://onionoo.torproject.org/details' -} diff --git a/allium/countries.py b/allium/countries.py deleted file mode 100644 index ba63358..0000000 --- a/allium/countries.py +++ /dev/null @@ -1,26 +0,0 @@ -''' -File: countries.py - -List of countries which require prefixing with "The" -''' - -THE_PREFIXED = [ - 'Dominican Republic', - 'Ivory Coast', - 'Marshall Islands', - 'Northern Marianas Islands', - 'Solomon Islands', - 'United Arab Emirates', - 'United Kingdom', - 'United States', - 'United States of America', - 'Vatican City', - 'Czech Republic', - 'Bahamas', - 'Gambia', - 'Netherlands', - 'Philippines', - 'Seychelles', - 'Sudan', - 'Ukraine' -] diff --git a/allium/generate.py b/allium/generate.py deleted file mode 100755 index 928b6e6..0000000 --- a/allium/generate.py +++ /dev/null @@ -1,73 +0,0 @@ -#!/usr/bin/env python3 - -''' -File: generate.py (executable) - -Generate complete set of relay HTML pages and copy static files to -config.CONFIG['output_root'] defined in config.py - -Default output directory: ./www -''' - -import os -import sys -from shutil import copytree -import config -from relays import Relays - -ABS_PATH = os.path.dirname(os.path.abspath(__file__)) - -if __name__ == '__main__': - # object containing onionoo data and processing routines - RELAY_SET = Relays() - - # index and "all" HTML relay sets; index set limited to 500 relays - RELAY_SET.create_output_dir() - RELAY_SET.write_misc( - template = 'index.html', - path = 'index.html', - path_prefix = './', - is_index = True, - ) - RELAY_SET.write_misc( - template = 'all.html', - path = 'misc/all.html' - ) - - # miscellaneous page filename suffixes and sorted-by keys - misc_pages = { - 'by-bandwidth': '1.bandwidth', - 'by-exit-count': '1.exit_count,1.bandwidth', - 'by-middle-count': '1.middle_count,1.bandwidth', - 'by-first-seen': '1.first_seen,1.bandwidth' - } - - # write miscellaneous-sorted (per misc_pages) HTML pages - for k, v in misc_pages.items(): - RELAY_SET.write_misc( - template = 'misc-families.html', - path = 'misc/families-{}.html'.format(k), - sorted_by = v - ) - RELAY_SET.write_misc( - template = 'misc-networks.html', - path = 'misc/networks-{}.html'.format(k), - sorted_by = v - ) - - # onionoo keys to generate pages by unique value - keys = ['as', 'contact', 'country', 'family', 'flag', 'platform', - 'first_seen'] - - for k in keys: - RELAY_SET.write_pages_by_key(k) - - # per-relay info pages - RELAY_SET.write_relay_info() - - STATIC_SRC_PATH = os.path.join(ABS_PATH, 'static') - STATIC_DEST_PATH = os.path.join(config.CONFIG['output_root'], 'static') - - # copy static directory and its contents if it doesn't exist - if not os.path.exists(STATIC_DEST_PATH): - copytree(STATIC_SRC_PATH, STATIC_DEST_PATH) diff --git a/allium/lib/relays.py b/allium/lib/relays.py new file mode 100644 index 0000000..6c28d88 --- /dev/null +++ b/allium/lib/relays.py @@ -0,0 +1,306 @@ +''' +File: relays.py + +Relays class object consisting of relays (list of dict) and onionoo fetch +timestamp +''' + +import hashlib +import json +import os +import re +import time +import urllib.request +from shutil import rmtree +from jinja2 import Environment, FileSystemLoader + +ABS_PATH = os.path.dirname(os.path.abspath(__file__)) +ENV = Environment(loader=FileSystemLoader(os.path.join(ABS_PATH, '../templates')), + trim_blocks=True, lstrip_blocks=True) + +class Relays(): + ''' + Relay class consisting of processing routines and onionoo data + ''' + def __init__(self, output_dir, onionoo_url): + self.output_dir = output_dir + self.onionoo_url = onionoo_url + self.ts_file = os.path.join(os.path.dirname(ABS_PATH), "timestamp") + self.json = self._fetch_onionoo_details() + self.timestamp = self._write_timestamp() + + self._fix_missing_observed_bandwidth() + self._sort_by_bandwidth() + self._trim_platform() + self._add_hashed_contact() + self._categorize() + + def _fetch_onionoo_details(self): + ''' + Make request to onionoo to retrieve details document, return JSON + response + ''' + if os.path.isfile(self.ts_file): + with open(self.ts_file, 'r') as ts_file: + prev_timestamp = ts_file.read() + headers = {"If-Modified-Since": prev_timestamp} + conn = urllib.request.Request(self.onionoo_url, headers=headers) + else: + conn = urllib.request.Request(self.onionoo_url) + + api_response = urllib.request.urlopen(conn).read() + + return json.loads(api_response.decode('utf-8')) + + def _trim_platform(self): + ''' + Trim platform to retain base operating system without version number or + unnecessary classification which could affect sorting + + e.g. "Tor 0.3.4.9 on Linux" -> "Linux" + ''' + for relay in self.json['relays']: + relay['platform'] = relay['platform'].split(' on ', 1)[1].split(' ')[0] + relay['platform'] = relay['platform'].split('/')[-1] # GNU/* + + def _fix_missing_observed_bandwidth(self): + ''' + Set the observed_bandwidth parameter value for any relay missing the + parameter to 0; the observed_bandwidth parameter is (apparently) + optional, I hadn't run into an instance of it missing until 2019-10-03 + + "[...] Missing if router descriptor containing this information cannot be + found." + --https://metrics.torproject.org/onionoo.html#details_relay_observed_bandwidth + + ''' + for idx, relay in enumerate(self.json['relays']): + if not relay.get('observed_bandwidth'): + self.json['relays'][idx]['observed_bandwidth'] = 0 + + def _add_hashed_contact(self): + ''' + Adds a hashed contact key/value for every relay + ''' + for idx, relay in enumerate(self.json['relays']): + c = relay.get('contact', '').encode('utf-8') + self.json['relays'][idx]['contact_md5'] = hashlib.md5(c).hexdigest() + + def _sort_by_bandwidth(self): + ''' + Sort full JSON list by highest observed_bandwidth, retain this order + during subsequent sorting (country, AS, etc) + ''' + self.json['relays'].sort(key=lambda x: x['observed_bandwidth'], + reverse=True) + + def _write_timestamp(self): + ''' + Store encoded timestamp in a file to retain time of last request, passed + to onionoo via If-Modified-Since header during fetch() if exists + ''' + timestamp = time.time() + f_timestamp = time.strftime('%a, %d %b %Y %H:%M:%S GMT', + time.gmtime(timestamp)) + if self.json is not None: + with open(self.ts_file, 'w', encoding='utf8') as ts_file: + ts_file.write(f_timestamp) + + return f_timestamp + + def _sort(self, relay, idx, k, v): + ''' + Populate self.sorted dictionary with values from :relay: + + Args: + relay: relay from which values are derived + idx: index at which the relay can be found in self.json['relays'] + k: the name of the key to use in self.sorted + v: the name of the subkey to use in self.sorted[k] + ''' + if not v or not re.match(r'^[A-Za-z0-9_-]+$', v): + return + + if not k in self.json['sorted']: + self.json['sorted'][k] = dict() + + if not v in self.json['sorted'][k]: + self.json['sorted'][k][v] = { + 'relays': list(), + 'bandwidth': 0, + 'exit_count': 0, + 'middle_count': 0 + } + + bw = relay['observed_bandwidth'] + self.json['sorted'][k][v]['relays'].append(idx) + self.json['sorted'][k][v]['bandwidth'] += bw + + if 'Exit' in relay['flags']: + self.json['sorted'][k][v]['exit_count'] += 1 + else: + self.json['sorted'][k][v]['middle_count'] += 1 + + if k is 'as': + self.json['sorted'][k][v]['country'] = relay.get('country') + self.json['sorted'][k][v]['country_name'] = relay.get('country') + self.json['sorted'][k][v]['as_name'] = relay.get('as_name') + + if k is 'family': + self.json['sorted'][k][v]['contact'] = relay.get('contact') + self.json['sorted'][k][v]['contact_md5'] = relay.get('contact_md5') + + # update the first_seen parameter to always contain the oldest + # relay's first_seen date + if not self.json['sorted'][k][v].get('first_seen'): + self.json['sorted'][k][v]['first_seen'] = relay['first_seen'] + elif self.json['sorted'][k][v]['first_seen'] > relay['first_seen']: + self.json['sorted'][k][v]['first_seen'] = relay['first_seen'] + + def _categorize(self): + ''' + Iterate over self.json['relays'] set and call self._sort() against + discovered relays with attributes we use to generate static sets + ''' + self.json['sorted'] = dict() + + for idx, relay in enumerate(self.json['relays']): + keys = ['as', 'country', 'platform'] + for key in keys: + self._sort(relay, idx, key, relay.get(key)) + + for flag in relay['flags']: + self._sort(relay, idx, 'flag', flag) + + for member in relay['effective_family']: + if not len(relay['effective_family']) > 1: + continue + self._sort(relay, idx, 'family', member) + + self._sort(relay, idx, 'first_seen', relay['first_seen'].split(' ')[0]) + + c_str = relay.get('contact', '').encode('utf-8') + c_hash = hashlib.md5(c_str).hexdigest() + self._sort(relay, idx, 'contact', c_hash) + + def create_output_dir(self): + ''' + Ensure self.output_dir exists (required for write functions) + ''' + os.makedirs(self.output_dir,exist_ok=True) + + def write_misc(self, template, path, path_prefix='../', sorted_by=None, + reverse=True, is_index=False): + ''' + Render and write unsorted HTML listings to disk + + Args: + template: jinja template name + path: path to generate HTML document + path_prefix: path to prefix other docs/includes + sorted_by: key to sort by, used in family and networks pages + reverse: passed to sort() function in family and networks pages + is_index: whether document is main index listing, limits list to 500 + ''' + template = ENV.get_template(template) + self.json['relay_subset'] = self.json['relays'] + template_render = template.render( + relays = self, + sorted_by = sorted_by, + reverse = reverse, + is_index = is_index, + path_prefix = path_prefix + ) + output = os.path.join(self.output_dir, path) + os.makedirs(os.path.dirname(output), exist_ok=True) + + with open(output, 'w', encoding='utf8') as html: + html.write(template_render) + + def write_pages_by_key(self, k): + ''' + Render and write sorted HTML relay listings to disk + + Args: + k: onionoo key to sort by (as, country, platform...) + ''' + template = ENV.get_template(k + '.html') + output_path = os.path.join(self.output_dir, k) + + # the "royal the" must be gramatically recognized + the_prefixed = [ + "Dominican Republic", + "Ivory Coast", + "Marshall Islands", + "Northern Marianas Islands", + "Solomon Islands", + "United Arab Emirates", + "United Kingdom", + "United States", + "United States of America", + "Vatican City", + "Czech Republic", + "Bahamas", + "Gambia", + "Netherlands", + "Philippines", + "Seychelles", + "Sudan", + "Ukraine" + ] + + if os.path.exists(output_path): + rmtree(output_path) + + for v in self.json['sorted'][k]: + i = self.json['sorted'][k][v] + members = [] + + for m_relay in i['relays']: + members.append(self.json['relays'][m_relay]) + if k is 'flag': + dir_path = os.path.join(output_path, v.lower()) + else: + dir_path = os.path.join(output_path, v) + + os.makedirs(dir_path) + self.json['relay_subset'] = members + rendered = template.render( + relays = self, + bandwidth = round(i['bandwidth'] / 1000000, 2), + exit_count = i['exit_count'], + middle_count = i['middle_count'], + is_index = False, + path_prefix = '../../', + key = k, + value = v, + sp_countries = the_prefixed + ) + + with open(os.path.join(dir_path, 'index.html'), 'w', + encoding='utf8') as html: + html.write(rendered) + + def write_relay_info(self): + ''' + Render and write per-relay HTML info documents to disk + ''' + relay_list = self.json['relays'] + template = ENV.get_template('relay-info.html') + output_path = os.path.join(self.output_dir, 'relay') + + if os.path.exists(output_path): + rmtree(output_path) + os.makedirs(output_path) + + for relay in relay_list: + if not relay['fingerprint'].isalnum(): + continue + rendered = template.render( + relay = relay, + path_prefix = '../', + relays = self + ) + with open(os.path.join(output_path, '%s.html' % relay['fingerprint']), + 'w', encoding='utf8') as html: + html.write(rendered) diff --git a/allium/relays.py b/allium/relays.py deleted file mode 100644 index 5c50f66..0000000 --- a/allium/relays.py +++ /dev/null @@ -1,285 +0,0 @@ -''' -File: relays.py - -Relays class object consisting of relays (list of dict) and onionoo fetch -timestamp -''' - -import hashlib -import json -import os -import re -import time -import urllib.request -from shutil import rmtree -import config -import countries -from jinja2 import Environment, FileSystemLoader - -ABS_PATH = os.path.dirname(os.path.abspath(__file__)) -ENV = Environment(loader=FileSystemLoader(os.path.join(ABS_PATH, 'templates')), - trim_blocks=True, lstrip_blocks=True) - -class Relays: - ''' - Relay class consisting of processing routines and onionoo data - ''' - def __init__(self): - self.url = config.CONFIG['onionoo_url'] - self.ts_file = os.path.join(ABS_PATH, "timestamp") - self.json = self._fetch_onionoo_details() - self.timestamp = self._write_timestamp() - - self._fix_missing_observed_bandwidth() - self._sort_by_bandwidth() - self._trim_platform() - self._add_hashed_contact() - self._categorize() - - def _fetch_onionoo_details(self): - ''' - Make request to onionoo to retrieve details document, return JSON - response - ''' - if os.path.isfile(self.ts_file): - with open(self.ts_file, 'r') as ts_file: - prev_timestamp = ts_file.read() - headers = {"If-Modified-Since": prev_timestamp} - conn = urllib.request.Request(self.url, headers=headers) - else: - conn = urllib.request.Request(self.url) - - api_response = urllib.request.urlopen(conn).read() - - return json.loads(api_response.decode('utf-8')) - - def _trim_platform(self): - ''' - Trim platform to retain base operating system without version number or - unnecessary classification which could affect sorting - - e.g. "Tor 0.3.4.9 on Linux" -> "Linux" - ''' - for relay in self.json['relays']: - relay['platform'] = relay['platform'].split(' on ', 1)[1].split(' ')[0] - relay['platform'] = relay['platform'].split('/')[-1] # GNU/* - - def _fix_missing_observed_bandwidth(self): - ''' - Set the observed_bandwidth parameter value for any relay missing the - parameter to 0; the observed_bandwidth parameter is (apparently) - optional, I hadn't run into an instance of it missing until 2019-10-03 - - "[...] Missing if router descriptor containing this information cannot be - found." - --https://metrics.torproject.org/onionoo.html#details_relay_observed_bandwidth - - ''' - for idx, relay in enumerate(self.json['relays']): - if not relay.get('observed_bandwidth'): - self.json['relays'][idx]['observed_bandwidth'] = 0 - - def _add_hashed_contact(self): - ''' - Adds a hashed contact key/value for every relay - ''' - for idx, relay in enumerate(self.json['relays']): - c = relay.get('contact', '').encode('utf-8') - self.json['relays'][idx]['contact_md5'] = hashlib.md5(c).hexdigest() - - def _sort_by_bandwidth(self): - ''' - Sort full JSON list by highest observed_bandwidth, retain this order - during subsequent sorting (country, AS, etc) - ''' - self.json['relays'].sort(key=lambda x: x['observed_bandwidth'], - reverse=True) - - def _write_timestamp(self): - ''' - Store encoded timestamp in a file to retain time of last request, passed - to onionoo via If-Modified-Since header during fetch() if exists - ''' - timestamp = time.time() - f_timestamp = time.strftime('%a, %d %b %Y %H:%M:%S GMT', - time.gmtime(timestamp)) - if self.json is not None: - with open(self.ts_file, 'w', encoding='utf8') as ts_file: - ts_file.write(f_timestamp) - - return f_timestamp - - def _sort(self, relay, idx, k, v): - ''' - Populate self.sorted dictionary with values from :relay: - - Args: - relay: relay from which values are derived - idx: index at which the relay can be found in self.json['relays'] - k: the name of the key to use in self.sorted - v: the name of the subkey to use in self.sorted[k] - ''' - if not v or not re.match(r'^[A-Za-z0-9_-]+$', v): - return - - if not k in self.json['sorted']: - self.json['sorted'][k] = dict() - - if not v in self.json['sorted'][k]: - self.json['sorted'][k][v] = { - 'relays': list(), - 'bandwidth': 0, - 'exit_count': 0, - 'middle_count': 0 - } - - bw = relay['observed_bandwidth'] - self.json['sorted'][k][v]['relays'].append(idx) - self.json['sorted'][k][v]['bandwidth'] += bw - - if 'Exit' in relay['flags']: - self.json['sorted'][k][v]['exit_count'] += 1 - else: - self.json['sorted'][k][v]['middle_count'] += 1 - - if k is 'as': - self.json['sorted'][k][v]['country'] = relay.get('country') - self.json['sorted'][k][v]['country_name'] = relay.get('country') - self.json['sorted'][k][v]['as_name'] = relay.get('as_name') - - if k is 'family': - self.json['sorted'][k][v]['contact'] = relay.get('contact') - self.json['sorted'][k][v]['contact_md5'] = relay.get('contact_md5') - - # update the first_seen parameter to always contain the oldest - # relay's first_seen date - if not self.json['sorted'][k][v].get('first_seen'): - self.json['sorted'][k][v]['first_seen'] = relay['first_seen'] - elif self.json['sorted'][k][v]['first_seen'] > relay['first_seen']: - self.json['sorted'][k][v]['first_seen'] = relay['first_seen'] - - def _categorize(self): - ''' - Iterate over self.json['relays'] set and call self._sort() against - discovered relays with attributes we use to generate static sets - ''' - self.json['sorted'] = dict() - - for idx, relay in enumerate(self.json['relays']): - keys = ['as', 'country', 'platform'] - for key in keys: - self._sort(relay, idx, key, relay.get(key)) - - for flag in relay['flags']: - self._sort(relay, idx, 'flag', flag) - - for member in relay['effective_family']: - if not len(relay['effective_family']) > 1: - continue - self._sort(relay, idx, 'family', member) - - self._sort(relay, idx, 'first_seen', relay['first_seen'].split(' ')[0]) - - c_str = relay.get('contact', '').encode('utf-8') - c_hash = hashlib.md5(c_str).hexdigest() - self._sort(relay, idx, 'contact', c_hash) - - def create_output_dir(self): - ''' - Ensure config:output_root exists (required for write functions) - ''' - os.makedirs(config.CONFIG['output_root'],exist_ok=True) - - def write_misc(self, template, path, path_prefix='../', sorted_by=None, - reverse=True, is_index=False): - ''' - Render and write unsorted HTML listings to disk - - Args: - template: jinja template name - path: path to generate HTML document - path_prefix: path to prefix other docs/includes - sorted_by: key to sort by, used in family and networks pages - reverse: passed to sort() function in family and networks pages - is_index: whether document is main index listing, limits list to 500 - ''' - template = ENV.get_template(template) - self.json['relay_subset'] = self.json['relays'] - template_render = template.render( - relays = self, - sorted_by = sorted_by, - reverse = reverse, - is_index = is_index, - path_prefix = path_prefix - ) - output = os.path.join(config.CONFIG['output_root'], path) - os.makedirs(os.path.dirname(output), exist_ok=True) - - with open(output, 'w', encoding='utf8') as html: - html.write(template_render) - - def write_pages_by_key(self, k): - ''' - Render and write sorted HTML relay listings to disk - - Args: - k: onionoo key to sort by (as, country, platform...) - ''' - template = ENV.get_template(k + '.html') - output_path = os.path.join(config.CONFIG['output_root'], k) - - if os.path.exists(output_path): - rmtree(output_path) - - for v in self.json['sorted'][k]: - i = self.json['sorted'][k][v] - members = [] - - for m_relay in i['relays']: - members.append(self.json['relays'][m_relay]) - if k is 'flag': - dir_path = os.path.join(output_path, v.lower()) - else: - dir_path = os.path.join(output_path, v) - - os.makedirs(dir_path) - self.json['relay_subset'] = members - rendered = template.render( - relays = self, - bandwidth = round(i['bandwidth'] / 1000000, 2), - exit_count = i['exit_count'], - middle_count = i['middle_count'], - is_index = False, - path_prefix = '../../', - key = k, - value = v, - sp_countries = countries.THE_PREFIXED - ) - - with open(os.path.join(dir_path, 'index.html'), 'w', - encoding='utf8') as html: - html.write(rendered) - - def write_relay_info(self): - ''' - Render and write per-relay HTML info documents to disk - ''' - relay_list = self.json['relays'] - template = ENV.get_template('relay-info.html') - output_path = os.path.join(config.CONFIG['output_root'], 'relay') - - if os.path.exists(output_path): - rmtree(output_path) - os.makedirs(output_path) - - for relay in relay_list: - if not relay['fingerprint'].isalnum(): - continue - rendered = template.render( - relay = relay, - path_prefix = '../', - relays = self - ) - with open(os.path.join(output_path, '%s.html' % relay['fingerprint']), - 'w', encoding='utf8') as html: - html.write(rendered) -- cgit v1.2.3-54-g00ecf