aboutsummaryrefslogtreecommitdiff
path: root/allium/relays.py
diff options
context:
space:
mode:
authorJordan <me@jordan.im>2021-01-11 22:20:48 -0700
committerJordan <me@jordan.im>2021-01-11 22:20:48 -0700
commitfa85205fd0499f361b243943777590b348df290b (patch)
tree7c9d6e2f3b4d5115e7a5eae45257749cf7d2c8b5 /allium/relays.py
parent7d530a90fe15fe912ffa84424d9d24e20144fb38 (diff)
downloadallium-fa85205fd0499f361b243943777590b348df290b.tar.gz
allium-fa85205fd0499f361b243943777590b348df290b.zip
replace config.py with cli args, cleanup
Diffstat (limited to 'allium/relays.py')
-rw-r--r--allium/relays.py285
1 files changed, 0 insertions, 285 deletions
diff --git a/allium/relays.py b/allium/relays.py
deleted file mode 100644
index 5c50f66..0000000
--- a/allium/relays.py
+++ /dev/null
@@ -1,285 +0,0 @@
-'''
-File: relays.py
-
-Relays class object consisting of relays (list of dict) and onionoo fetch
-timestamp
-'''
-
-import hashlib
-import json
-import os
-import re
-import time
-import urllib.request
-from shutil import rmtree
-import config
-import countries
-from jinja2 import Environment, FileSystemLoader
-
-ABS_PATH = os.path.dirname(os.path.abspath(__file__))
-ENV = Environment(loader=FileSystemLoader(os.path.join(ABS_PATH, 'templates')),
- trim_blocks=True, lstrip_blocks=True)
-
-class Relays:
- '''
- Relay class consisting of processing routines and onionoo data
- '''
- def __init__(self):
- self.url = config.CONFIG['onionoo_url']
- self.ts_file = os.path.join(ABS_PATH, "timestamp")
- self.json = self._fetch_onionoo_details()
- self.timestamp = self._write_timestamp()
-
- self._fix_missing_observed_bandwidth()
- self._sort_by_bandwidth()
- self._trim_platform()
- self._add_hashed_contact()
- self._categorize()
-
- def _fetch_onionoo_details(self):
- '''
- Make request to onionoo to retrieve details document, return JSON
- response
- '''
- if os.path.isfile(self.ts_file):
- with open(self.ts_file, 'r') as ts_file:
- prev_timestamp = ts_file.read()
- headers = {"If-Modified-Since": prev_timestamp}
- conn = urllib.request.Request(self.url, headers=headers)
- else:
- conn = urllib.request.Request(self.url)
-
- api_response = urllib.request.urlopen(conn).read()
-
- return json.loads(api_response.decode('utf-8'))
-
- def _trim_platform(self):
- '''
- Trim platform to retain base operating system without version number or
- unnecessary classification which could affect sorting
-
- e.g. "Tor 0.3.4.9 on Linux" -> "Linux"
- '''
- for relay in self.json['relays']:
- relay['platform'] = relay['platform'].split(' on ', 1)[1].split(' ')[0]
- relay['platform'] = relay['platform'].split('/')[-1] # GNU/*
-
- def _fix_missing_observed_bandwidth(self):
- '''
- Set the observed_bandwidth parameter value for any relay missing the
- parameter to 0; the observed_bandwidth parameter is (apparently)
- optional, I hadn't run into an instance of it missing until 2019-10-03
-
- "[...] Missing if router descriptor containing this information cannot be
- found."
- --https://metrics.torproject.org/onionoo.html#details_relay_observed_bandwidth
-
- '''
- for idx, relay in enumerate(self.json['relays']):
- if not relay.get('observed_bandwidth'):
- self.json['relays'][idx]['observed_bandwidth'] = 0
-
- def _add_hashed_contact(self):
- '''
- Adds a hashed contact key/value for every relay
- '''
- for idx, relay in enumerate(self.json['relays']):
- c = relay.get('contact', '').encode('utf-8')
- self.json['relays'][idx]['contact_md5'] = hashlib.md5(c).hexdigest()
-
- def _sort_by_bandwidth(self):
- '''
- Sort full JSON list by highest observed_bandwidth, retain this order
- during subsequent sorting (country, AS, etc)
- '''
- self.json['relays'].sort(key=lambda x: x['observed_bandwidth'],
- reverse=True)
-
- def _write_timestamp(self):
- '''
- Store encoded timestamp in a file to retain time of last request, passed
- to onionoo via If-Modified-Since header during fetch() if exists
- '''
- timestamp = time.time()
- f_timestamp = time.strftime('%a, %d %b %Y %H:%M:%S GMT',
- time.gmtime(timestamp))
- if self.json is not None:
- with open(self.ts_file, 'w', encoding='utf8') as ts_file:
- ts_file.write(f_timestamp)
-
- return f_timestamp
-
- def _sort(self, relay, idx, k, v):
- '''
- Populate self.sorted dictionary with values from :relay:
-
- Args:
- relay: relay from which values are derived
- idx: index at which the relay can be found in self.json['relays']
- k: the name of the key to use in self.sorted
- v: the name of the subkey to use in self.sorted[k]
- '''
- if not v or not re.match(r'^[A-Za-z0-9_-]+$', v):
- return
-
- if not k in self.json['sorted']:
- self.json['sorted'][k] = dict()
-
- if not v in self.json['sorted'][k]:
- self.json['sorted'][k][v] = {
- 'relays': list(),
- 'bandwidth': 0,
- 'exit_count': 0,
- 'middle_count': 0
- }
-
- bw = relay['observed_bandwidth']
- self.json['sorted'][k][v]['relays'].append(idx)
- self.json['sorted'][k][v]['bandwidth'] += bw
-
- if 'Exit' in relay['flags']:
- self.json['sorted'][k][v]['exit_count'] += 1
- else:
- self.json['sorted'][k][v]['middle_count'] += 1
-
- if k is 'as':
- self.json['sorted'][k][v]['country'] = relay.get('country')
- self.json['sorted'][k][v]['country_name'] = relay.get('country')
- self.json['sorted'][k][v]['as_name'] = relay.get('as_name')
-
- if k is 'family':
- self.json['sorted'][k][v]['contact'] = relay.get('contact')
- self.json['sorted'][k][v]['contact_md5'] = relay.get('contact_md5')
-
- # update the first_seen parameter to always contain the oldest
- # relay's first_seen date
- if not self.json['sorted'][k][v].get('first_seen'):
- self.json['sorted'][k][v]['first_seen'] = relay['first_seen']
- elif self.json['sorted'][k][v]['first_seen'] > relay['first_seen']:
- self.json['sorted'][k][v]['first_seen'] = relay['first_seen']
-
- def _categorize(self):
- '''
- Iterate over self.json['relays'] set and call self._sort() against
- discovered relays with attributes we use to generate static sets
- '''
- self.json['sorted'] = dict()
-
- for idx, relay in enumerate(self.json['relays']):
- keys = ['as', 'country', 'platform']
- for key in keys:
- self._sort(relay, idx, key, relay.get(key))
-
- for flag in relay['flags']:
- self._sort(relay, idx, 'flag', flag)
-
- for member in relay['effective_family']:
- if not len(relay['effective_family']) > 1:
- continue
- self._sort(relay, idx, 'family', member)
-
- self._sort(relay, idx, 'first_seen', relay['first_seen'].split(' ')[0])
-
- c_str = relay.get('contact', '').encode('utf-8')
- c_hash = hashlib.md5(c_str).hexdigest()
- self._sort(relay, idx, 'contact', c_hash)
-
- def create_output_dir(self):
- '''
- Ensure config:output_root exists (required for write functions)
- '''
- os.makedirs(config.CONFIG['output_root'],exist_ok=True)
-
- def write_misc(self, template, path, path_prefix='../', sorted_by=None,
- reverse=True, is_index=False):
- '''
- Render and write unsorted HTML listings to disk
-
- Args:
- template: jinja template name
- path: path to generate HTML document
- path_prefix: path to prefix other docs/includes
- sorted_by: key to sort by, used in family and networks pages
- reverse: passed to sort() function in family and networks pages
- is_index: whether document is main index listing, limits list to 500
- '''
- template = ENV.get_template(template)
- self.json['relay_subset'] = self.json['relays']
- template_render = template.render(
- relays = self,
- sorted_by = sorted_by,
- reverse = reverse,
- is_index = is_index,
- path_prefix = path_prefix
- )
- output = os.path.join(config.CONFIG['output_root'], path)
- os.makedirs(os.path.dirname(output), exist_ok=True)
-
- with open(output, 'w', encoding='utf8') as html:
- html.write(template_render)
-
- def write_pages_by_key(self, k):
- '''
- Render and write sorted HTML relay listings to disk
-
- Args:
- k: onionoo key to sort by (as, country, platform...)
- '''
- template = ENV.get_template(k + '.html')
- output_path = os.path.join(config.CONFIG['output_root'], k)
-
- if os.path.exists(output_path):
- rmtree(output_path)
-
- for v in self.json['sorted'][k]:
- i = self.json['sorted'][k][v]
- members = []
-
- for m_relay in i['relays']:
- members.append(self.json['relays'][m_relay])
- if k is 'flag':
- dir_path = os.path.join(output_path, v.lower())
- else:
- dir_path = os.path.join(output_path, v)
-
- os.makedirs(dir_path)
- self.json['relay_subset'] = members
- rendered = template.render(
- relays = self,
- bandwidth = round(i['bandwidth'] / 1000000, 2),
- exit_count = i['exit_count'],
- middle_count = i['middle_count'],
- is_index = False,
- path_prefix = '../../',
- key = k,
- value = v,
- sp_countries = countries.THE_PREFIXED
- )
-
- with open(os.path.join(dir_path, 'index.html'), 'w',
- encoding='utf8') as html:
- html.write(rendered)
-
- def write_relay_info(self):
- '''
- Render and write per-relay HTML info documents to disk
- '''
- relay_list = self.json['relays']
- template = ENV.get_template('relay-info.html')
- output_path = os.path.join(config.CONFIG['output_root'], 'relay')
-
- if os.path.exists(output_path):
- rmtree(output_path)
- os.makedirs(output_path)
-
- for relay in relay_list:
- if not relay['fingerprint'].isalnum():
- continue
- rendered = template.render(
- relay = relay,
- path_prefix = '../',
- relays = self
- )
- with open(os.path.join(output_path, '%s.html' % relay['fingerprint']),
- 'w', encoding='utf8') as html:
- html.write(rendered)