diff options
author | Jordan <me@jordan.im> | 2020-09-17 18:55:14 -0700 |
---|---|---|
committer | Jordan <me@jordan.im> | 2020-09-17 18:55:14 -0700 |
commit | caffd07d85a2a181ecc6705c1198036e030da3d2 (patch) | |
tree | 61b0f29fb571b3a08b73d07c01d51e044e12523f /tor-metrics/relays.py | |
parent | 6b2d5db77b3e17ab1eccec9483cfe1659ae56fa0 (diff) | |
download | allium-caffd07d85a2a181ecc6705c1198036e030da3d2.tar.gz allium-caffd07d85a2a181ecc6705c1198036e030da3d2.zip |
change project name to allium, closes #4
Diffstat (limited to 'tor-metrics/relays.py')
-rw-r--r-- | tor-metrics/relays.py | 283 |
1 files changed, 0 insertions, 283 deletions
diff --git a/tor-metrics/relays.py b/tor-metrics/relays.py deleted file mode 100644 index 97e1514..0000000 --- a/tor-metrics/relays.py +++ /dev/null @@ -1,283 +0,0 @@ -''' -File: relays.py - -Relays class object consisting of relays (list of dict) and onionoo fetch -timestamp -''' - -import hashlib -import json -import os -import re -import time -import urllib.request -from shutil import rmtree -import config -import countries -from jinja2 import Environment, FileSystemLoader - -ABS_PATH = os.path.dirname(os.path.abspath(__file__)) -ENV = Environment(loader=FileSystemLoader(os.path.join(ABS_PATH, 'templates')), - trim_blocks=True, lstrip_blocks=True) - -def hash_filter(value, hash_type='md5'): - ''' - Custom hash filter for jinja; defaults to "md5" if no type specified - - :param value: value to be hashed - :param hash_type: valid hash type - :return: computed hash as a hexadecimal string - ''' - hash_func = getattr(hashlib, hash_type, None) - - if hash_func: - computed_hash = hash_func(value.encode('utf-8')).hexdigest() - else: - raise AttributeError( - 'No hashing function named {hname}'.format(hname=hash_type) - ) - - return computed_hash - -ENV.filters['hash'] = hash_filter - -class Relays: - ''' - Relay class consisting of relays (list of dict) and onionoo fetch timestamp - - :ts_file: absolute path to timestamp file used in setting If-Modified_since - :json: relay listings stored as a list of dict, derived from onionoo JSON - :timestamp: timestamp of onionoo fetch - ''' - def __init__(self): - self.url = config.CONFIG['onionoo_url'] - self.ts_file = os.path.join(ABS_PATH, "timestamp") - self.json = self._fetch_onionoo_details() - self.timestamp = self._write_timestamp() - - self._fix_missing_observed_bandwidth() - self._sort_by_bandwidth() - self._trim_platform() - self._categorize() - - def _fetch_onionoo_details(self): - ''' - Make request to onionoo to retrieve details document, return prepared - JSON response (trimmed platform and sorted by highest observed - bandwidth) - ''' - if os.path.isfile(self.ts_file): - with open(self.ts_file, 'r') as ts_file: - prev_timestamp = ts_file.read() - headers = {"If-Modified-Since": prev_timestamp} - conn = urllib.request.Request(self.url, headers=headers) - else: - conn = urllib.request.Request(self.url) - - api_response = urllib.request.urlopen(conn).read() - - return json.loads(api_response.decode('utf-8')) - - def _trim_platform(self): - ''' - Trim platform to retain base operating system without version number or - unnecessary classification which could affect sorting - - e.g. "Tor 0.3.4.9 on Linux" -> "Linux" - ''' - for relay in self.json['relays']: - relay['platform'] = relay['platform'].split(' on ', 1)[1].split(' ')[0] - relay['platform'] = relay['platform'].split('/')[-1] # GNU/* - - def _fix_missing_observed_bandwidth(self): - ''' - Set the observed_bandwidth parameter value for any relay missing the - parameter to 0; the observed_bandwidth parameter is (apparently) - optional, I hadn't run into an instance of it missing until 2019-10-03 - - "[...] Missing if router descriptor containing this information cannot be - found." - --https://metrics.torproject.org/onionoo.html#details_relay_observed_bandwidth - - ''' - for idx, relay in enumerate(self.json['relays']): - if not relay.get('observed_bandwidth'): - self.json['relays'][idx]['observed_bandwidth'] = 0 - - def _sort_by_bandwidth(self): - ''' - Sort full JSON list by highest observed_bandwidth, retain this order - during subsequent sorting (country, AS, etc) - ''' - self.json['relays'].sort(key=lambda x: x['observed_bandwidth'], - reverse=True) - - def _write_timestamp(self): - ''' - Store encoded timestamp in a file to retain time of last request, passed - to onionoo via If-Modified-Since header during fetch() if exists - ''' - timestamp = time.time() - f_timestamp = time.strftime('%a, %d %b %Y %H:%M:%S GMT', - time.gmtime(timestamp)) - if self.json is not None: - with open(self.ts_file, 'w', encoding='utf8') as ts_file: - ts_file.write(f_timestamp) - - return f_timestamp - - def _sort(self, relay, idx, k, v): - ''' - Populate self.sorted dictionary with values from :relay: - - :relay: relay from which values are derived - :idx: index at which the relay can be found in self.json['relays'] - :k: the name of the key to use in self.sorted - :v: the name of the subkey to use in self.sorted[k] - ''' - if not v or not re.match(r'^[A-Za-z0-9_-]+$', v): - return - if not k in self.json['sorted']: - self.json['sorted'][k] = dict() - if not v in self.json['sorted'][k]: - self.json['sorted'][k][v] = { - 'relays': list(), - 'bandwidth': 0, - 'exit_count': 0, - 'middle_count': 0 - } - bw = relay['observed_bandwidth'] - self.json['sorted'][k][v]['relays'].append(idx) - self.json['sorted'][k][v]['bandwidth'] += bw - if 'Exit' in relay['flags']: - self.json['sorted'][k][v]['exit_count'] += 1 - else: - self.json['sorted'][k][v]['middle_count'] += 1 - - if k is 'as': - self.json['sorted'][k][v]['country'] = relay.get('country') - self.json['sorted'][k][v]['country_name'] = relay.get('country') - self.json['sorted'][k][v]['as_name'] = relay.get('as_name') - - if k is 'family': - self.json['sorted'][k][v]['contact'] = relay.get('contact') - - # update the first_seen parameter to always contain the oldest - # relay's first_seen date - if not self.json['sorted'][k][v].get('first_seen'): - self.json['sorted'][k][v]['first_seen'] = relay['first_seen'] - elif self.json['sorted'][k][v]['first_seen'] > relay['first_seen']: - self.json['sorted'][k][v]['first_seen'] = relay['first_seen'] - - def _categorize(self): - ''' - Iterate over self.json['relays'] set and call self._sort() against - discovered relays with attributes we use to generate static sets - ''' - self.json['sorted'] = dict() - for idx, relay in enumerate(self.json['relays']): - keys = ['as', 'country', 'platform'] - for key in keys: - self._sort(relay, idx, key, relay.get(key)) - - for flag in relay['flags']: - self._sort(relay, idx, 'flag', flag) - - for member in relay['effective_family']: - if not len(relay['effective_family']) > 1: - continue - self._sort(relay, idx, 'family', member) - - self._sort(relay, idx, 'first_seen', relay['first_seen'].split(' ')[0]) - - c_str = relay.get('contact', '').encode('utf-8') - c_hash = hashlib.md5(c_str).hexdigest() - self._sort(relay, idx, 'contact', c_hash) - - def create_output_dir(self): - ''' - Ensure config:output_root exists (required for write functions) - ''' - os.makedirs(config.CONFIG['output_root'],exist_ok=True) - - def write_misc(self, template, path, path_prefix='../', sorted_by=None, - reverse=True, is_index=False): - ''' - Render and write unsorted HTML listings to disk - - :template: jinja template name - :path_prefix: path to prefix other docs/includes - :path: path to generate HTML document - :sorted_by: key to sort by, used in family and networks pages - :reverse: passed to sort() function in family and networks pages - :is_index: whether document is main index listing, limits list to 500 - ''' - template = ENV.get_template(template) - self.json['relay_subset'] = self.json['relays'] - template_render = template.render( - relays = self, - sorted_by = sorted_by, - reverse = reverse, - is_index = is_index, - path_prefix = path_prefix - ) - output = os.path.join(config.CONFIG['output_root'], path) - os.makedirs(os.path.dirname(output), exist_ok=True) - with open(output, 'w', encoding='utf8') as html: - html.write(template_render) - - def write_pages_by_key(self, k): - ''' - Render and write HTML listings to disk sorted by :k: - ''' - template = ENV.get_template(k + '.html') - output_path = os.path.join(config.CONFIG['output_root'], k) - if os.path.exists(output_path): - rmtree(output_path) - for v in self.json['sorted'][k]: - i = self.json['sorted'][k][v] - members = [] - for m_relay in i['relays']: - members.append(self.json['relays'][m_relay]) - if k is 'flag': - dir_path = os.path.join(output_path, v.lower()) - else: - dir_path = os.path.join(output_path, v) - os.makedirs(dir_path) - self.json['relay_subset'] = members - rendered = template.render( - relays = self, - bandwidth = round(i['bandwidth'] / 1000000, 2), - exit_count = i['exit_count'], - middle_count = i['middle_count'], - is_index = False, - path_prefix = '../../', - key = k, - value = v, - sp_countries = countries.THE_PREFIXED - ) - with open(os.path.join(dir_path, 'index.html'), 'w', - encoding='utf8') as html: - html.write(rendered) - - def write_relay_info(self): - ''' - Render and write per-relay HTML info documents to disk - ''' - relay_list = self.json['relays'] - template = ENV.get_template('relay-info.html') - output_path = os.path.join(config.CONFIG['output_root'], 'relay') - if os.path.exists(output_path): - rmtree(output_path) - os.makedirs(output_path) - for relay in relay_list: - if not relay['fingerprint'].isalnum(): - continue - rendered = template.render( - relay = relay, - path_prefix = '../', - relays = self - ) - with open(os.path.join(output_path, '%s.html' % relay['fingerprint']), - 'w', encoding='utf8') as html: - html.write(rendered) |