aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJordan <me@jordan.im>2020-09-07 01:43:27 -0700
committerJordan <me@jordan.im>2020-09-07 01:43:27 -0700
commit9f79df795bf8587202cfe3fc0ba9050d9b4993e5 (patch)
treefbdb72fcca6363d66e814b5c74af6038e0d310f6
parent9107ee3a0dce0dc9949b9bcea97d9d5acd52d790 (diff)
downloadallium-9f79df795bf8587202cfe3fc0ba9050d9b4993e5.tar.gz
allium-9f79df795bf8587202cfe3fc0ba9050d9b4993e5.zip
yak shaving - generalize sort-by functions
-rwxr-xr-xtor-metrics/generate.py13
-rw-r--r--tor-metrics/relays.py227
-rw-r--r--tor-metrics/templates/country.html2
-rw-r--r--tor-metrics/templates/effective_family.html4
-rw-r--r--tor-metrics/templates/family.html4
-rw-r--r--tor-metrics/templates/flag.html6
6 files changed, 75 insertions, 181 deletions
diff --git a/tor-metrics/generate.py b/tor-metrics/generate.py
index ae4a748..b7b554b 100755
--- a/tor-metrics/generate.py
+++ b/tor-metrics/generate.py
@@ -18,23 +18,18 @@ from relays import Relays
ABS_PATH = os.path.dirname(os.path.abspath(__file__))
if __name__ == '__main__':
- try:
- RELAY_SET = Relays()
- except Exception as err:
- print('error creating relays object from onionoo response, aborting...')
- print(err)
- sys.exit()
+ RELAY_SET = Relays()
# generate relay HTML documents
RELAY_SET.create_output_dir()
RELAY_SET.write_unsorted('index.html', is_index=True)
RELAY_SET.write_unsorted('all.html', is_index=False)
- RELAY_SET.write_effective_family()
RELAY_SET.write_pages_by_key('as')
+ RELAY_SET.write_pages_by_key('contact')
RELAY_SET.write_pages_by_key('country')
+ RELAY_SET.write_pages_by_key('family')
+ RELAY_SET.write_pages_by_key('flag')
RELAY_SET.write_pages_by_key('platform')
- RELAY_SET.write_pages_by_key('contact')
- RELAY_SET.write_pages_by_flag()
RELAY_SET.write_relay_info()
# copy static directory and its contents
diff --git a/tor-metrics/relays.py b/tor-metrics/relays.py
index 2b79af5..91d0566 100644
--- a/tor-metrics/relays.py
+++ b/tor-metrics/relays.py
@@ -30,10 +30,10 @@ def hash_filter(value, hash_type='md5'):
hash_func = getattr(hashlib, hash_type, None)
if hash_func:
- computed_hash = hash_func(value.encode("utf-8")).hexdigest()
+ computed_hash = hash_func(value.encode('utf-8')).hexdigest()
else:
raise AttributeError(
- "No hashing function named {hname}".format(hname=hash_type)
+ 'No hashing function named {hname}'.format(hname=hash_type)
)
return computed_hash
@@ -57,7 +57,7 @@ class Relays:
self._fix_missing_observed_bandwidth()
self._sort_by_bandwidth()
self._trim_platform()
- self._categorize_relays()
+ self._categorize()
def _fetch_onionoo_details(self):
'''
@@ -75,8 +75,7 @@ class Relays:
api_response = urllib.request.urlopen(conn).read()
- json_data = json.loads(api_response.decode('utf-8'))
- return json_data
+ return json.loads(api_response.decode('utf-8'))
def _trim_platform(self):
'''
@@ -123,98 +122,59 @@ class Relays:
if self.json is not None:
with open(self.ts_file, 'w', encoding='utf8') as ts_file:
ts_file.write(f_timestamp)
+
return f_timestamp
- def _categorize_relays(self):
- '''
- Add a list of dict sorted by unique keys derived from relays as they're
- discovered, referenced by indice to the main set (relays.json['relays'])
+ def _sort(self, relay, idx, k, v):
+ '''
+ Populate self.sorted dictionary with values from :relay:
+
+ :relay: relay from which values are derived
+ :idx: index at which the relay can be found in self.json['relays']
+ :k: the name of the key to use in self.sorted
+ :v: the name of the subkey to use in self.sorted[k]
+ '''
+ if not v or not v.isalnum():
+ return
+ if not k in self.json['sorted']:
+ self.json['sorted'][k] = dict()
+ if not v in self.json['sorted'][k]:
+ self.json['sorted'][k][v] = {
+ 'relays': list(),
+ 'bw': 0,
+ 'exit_count': 0,
+ 'middle_count': 0
+ }
+ bw = relay['observed_bandwidth']
+ self.json['sorted'][k][v]['relays'].append(idx)
+ self.json['sorted'][k][v]['bw'] += bw
+ if 'Exit' in relay['flags']:
+ self.json['sorted'][k][v]['exit_count'] += 1
+ else:
+ self.json['sorted'][k][v]['middle_count'] += 1
- This code looks (is) redundant but it saves us from multiple passes
- over the entire set... not sure how to generalize it beyond the keys
- list
+ def _categorize(self):
+ '''
+ Iterate over self.json['relays'] set and call self._sort() against
+ discovered relays with attributes we use to generate static sets
'''
self.json['sorted'] = dict()
for idx, relay in enumerate(self.json['relays']):
keys = ['as', 'country', 'platform']
for key in keys:
- v = relay.get(key)
- if not v or not v.isalnum(): continue
- if not key in self.json['sorted']:
- self.json['sorted'][key] = dict()
- if not v in self.json['sorted'][key]:
- self.json['sorted'][key][v] = {
- 'relays': list(),
- 'bw': 0,
- 'exit_count': 0,
- 'middle_count': 0
- }
- bw = relay['observed_bandwidth']
- self.json['sorted'][key][v]['relays'].append(idx)
- self.json['sorted'][key][v]['bw'] += bw
- if 'Exit' in relay['flags']:
- self.json['sorted'][key][v]['exit_count'] += 1
- else:
- self.json['sorted'][key][v]['middle_count'] += 1
+ self._sort(relay, idx, key, relay.get(key))
for flag in relay['flags']:
- if not flag.isalnum(): continue
- if not 'flags' in self.json['sorted']:
- self.json['sorted']['flags'] = dict()
- if not flag in self.json['sorted']['flags']:
- self.json['sorted']['flags'][flag] = {
- 'relays': list(),
- 'bw': 0,
- 'exit_count': 0,
- 'middle_count': 0
- }
- bw = relay['observed_bandwidth']
- self.json['sorted']['flags'][flag]['relays'].append(idx)
- self.json['sorted']['flags'][flag]['bw'] += bw
- if 'Exit' in relay['flags']:
- self.json['sorted']['flags'][flag]['exit_count'] += 1
- else:
- self.json['sorted']['flags'][flag]['middle_count'] += 1
+ self._sort(relay, idx, 'flag', flag)
for member in relay['effective_family']:
- if not member.isalnum() or len(relay['effective_family']) < 2:
+ if not len(relay['effective_family']) > 2:
continue
- if not 'family' in self.json['sorted']:
- self.json['sorted']['family'] = dict()
- if not member in self.json['sorted']['family']:
- self.json['sorted']['family'][member] = {
- 'relays': list(),
- 'bw': 0,
- 'exit_count': 0,
- 'middle_count': 0
- }
- bw = relay['observed_bandwidth']
- self.json['sorted']['family'][member]['relays'].append(idx)
- self.json['sorted']['family'][member]['bw'] += bw
- if 'Exit' in relay['flags']:
- self.json['sorted']['family'][member]['exit_count'] += 1
- else:
- self.json['sorted']['family'][member]['middle_count'] += 1
+ self._sort(relay, idx, 'family', member)
c_str = relay.get('contact', '').encode('utf-8')
c_hash = hashlib.md5(c_str).hexdigest()
- if 'contact' not in self.json['sorted']:
- self.json['sorted']['contact'] = dict()
- if not c_hash in self.json['sorted']['contact']:
- self.json['sorted']['contact'][c_hash] = {
- 'relays': list(),
- 'contact': c_str,
- 'bw': 0,
- 'exit_count': 0,
- 'middle_count': 0
- }
- bw = relay['observed_bandwidth']
- self.json['sorted']['contact'][c_hash]['relays'].append(idx)
- self.json['sorted']['contact'][c_hash]['bw'] += bw
- if 'Exit' in relay['flags']:
- self.json['sorted']['contact'][c_hash]['exit_count'] += 1
- else:
- self.json['sorted']['contact'][c_hash]['middle_count'] += 1
+ self._sort(relay, idx, 'contact', c_hash)
def create_output_dir(self):
'''
@@ -236,101 +196,40 @@ class Relays:
with open(output, 'w', encoding='utf8') as html:
html.write(template_render)
- def write_effective_family(self):
+ def write_pages_by_key(self, k):
'''
- Render and write HTML listings to disk sorted by effective family
+ Render and write HTML listings to disk sorted by :k:
'''
- template = ENV.get_template('effective_family.html')
- output_path = os.path.join(config.CONFIG['output_root'], 'family')
+ template = ENV.get_template(k + '.html')
+ output_path = os.path.join(config.CONFIG['output_root'], k)
if os.path.exists(output_path):
rmtree(output_path)
- for family in self.json['sorted']['family']:
- i = self.json['sorted']['family'][family]
+ for v in self.json['sorted'][k]:
+ i = self.json['sorted'][k][v]
members = []
for m_relay in i['relays']:
members.append(self.json['relays'][m_relay])
- dir_path = os.path.join(output_path, family)
+ if k is 'flag':
+ dir_path = os.path.join(output_path, v.lower())
+ else:
+ dir_path = os.path.join(output_path, v)
os.makedirs(dir_path)
self.json['relay_subset'] = members
rendered = template.render(
- relays=self,
- bandwidth=round(i['bw'] / 1000000, 2),
- exit_count=i['exit_count'],
- middle_count=i['middle_count'],
- is_index=False,
- path_prefix='../../',
- deactivate='family',
- family=family
+ relays = self,
+ bandwidth = round(i['bw'] / 1000000, 2),
+ exit_count = i['exit_count'],
+ middle_count = i['middle_count'],
+ is_index = False,
+ path_prefix = '../../',
+ deactivate = k,
+ value = v,
+ sp_countries = countries.THE_PREFIXED
)
with open(os.path.join(dir_path, 'index.html'), 'w',
encoding='utf8') as html:
html.write(rendered)
- def write_pages_by_key(self, key):
- '''
- Render and write HTML listings to disk sorted by KEY
-
- :key: relays['sorted'] key (onionoo parameter) containing list of indices
- belonging to key
- '''
- template = ENV.get_template(key + '.html')
- output_path = os.path.join(config.CONFIG['output_root'], key)
- if os.path.exists(output_path):
- rmtree(output_path)
- for v in self.json['sorted'][key]:
- i = self.json['sorted'][key][v]
- m_relays = list()
- for idx in i['relays']:
- m_relays.append(self.json['relays'][idx])
- dir_path = os.path.join(output_path, v)
- os.makedirs(dir_path)
- self.json['relay_subset'] = m_relays
- rendered = template.render(
- relays=self,
- bandwidth=round(i['bw'] / 1000000, 2),
- exit_count=i['exit_count'],
- middle_count=i['middle_count'],
- is_index=False,
- path_prefix='../../',
- deactivate=key,
- special_countries=countries.THE_PREFIXED
- )
- with open(os.path.join(dir_path, 'index.html'), 'w',
- encoding='utf8') as html:
- html.write(rendered)
-
- def write_pages_by_flag(self):
- '''
- Render and write HTML listings to disk sorted by FLAG
- '''
- template = ENV.get_template('flag.html')
- for flag in self.json['sorted']['flags']:
- i = self.json['sorted']['flags'][flag]
- output_path = os.path.join(config.CONFIG['output_root'], 'flag',
- flag.lower())
- if os.path.exists(output_path):
- rmtree(output_path)
- relay_list = self.json['relays']
- m_relays = list()
- for idx in i['relays']:
- m_relays.append(self.json['relays'][idx])
- os.makedirs(output_path)
- self.json['relay_subset'] = m_relays
- rendered = template.render(
- relays=self,
- bandwidth=round(i['bw'] / 1000000, 2),
- exit_count=i['exit_count'],
- middle_count=i['middle_count'],
- is_index=False,
- path_prefix='../../',
- deactivate=flag,
- special_countries=countries.THE_PREFIXED,
- flag=flag
- )
- with open(os.path.join(output_path, 'index.html'), 'w',
- encoding='utf8') as html:
- html.write(rendered)
-
def write_relay_info(self):
'''
Render and write per-relay HTML info documents to disk
@@ -345,9 +244,9 @@ class Relays:
if not relay['fingerprint'].isalnum():
continue
rendered = template.render(
- relay=relay,
- path_prefix='../',
- relays=self
+ relay = relay,
+ path_prefix = '../',
+ relays = self
)
with open(os.path.join(output_path, '%s.html' % relay['fingerprint']),
'w', encoding='utf8') as html:
diff --git a/tor-metrics/templates/country.html b/tor-metrics/templates/country.html
index 41f97a4..ab55af3 100644
--- a/tor-metrics/templates/country.html
+++ b/tor-metrics/templates/country.html
@@ -1,7 +1,7 @@
{% extends "relay-list.html" %}
{% set country_orig = relays.json['relay_subset'][0]['country_name']|escape %}
{% set country_abbr = relays.json['relay_subset'][0]['country']|escape %}
-{% if country_orig in special_countries %}
+{% if country_orig in sp_countries %}
{% set country_name = 'The ' + country_orig %}
{% else %}
{% set country_name = country_orig %}
diff --git a/tor-metrics/templates/effective_family.html b/tor-metrics/templates/effective_family.html
deleted file mode 100644
index 067ac6d..0000000
--- a/tor-metrics/templates/effective_family.html
+++ /dev/null
@@ -1,4 +0,0 @@
-{% extends "relay-list.html" %}
-{% block title %}Tor Relays :: Family {{ family|escape }}{% endblock %}
-{% block header %}<a href="../../">Home</a> :: Family {{ family|escape }}{% endblock %}
-{% block description %}Relays with effective family member {{ family|escape }} are responsible for ~{{ bandwidth }} MB/s of traffic, with {% if middle_count > 0 %}{{ middle_count }} middle relay{% if middle_count > 1 %}s{% endif %}{% if exit_count > 0 %} and {% endif %}{% endif %}{% if exit_count > 0 %}{{ exit_count }} exit relay{% if exit_count > 1 %}s{% endif %}{% endif %}.{% endblock %}
diff --git a/tor-metrics/templates/family.html b/tor-metrics/templates/family.html
new file mode 100644
index 0000000..9f1d092
--- /dev/null
+++ b/tor-metrics/templates/family.html
@@ -0,0 +1,4 @@
+{% extends "relay-list.html" %}
+{% block title %}Tor Relays :: Family {{ value|escape }}{% endblock %}
+{% block header %}<a href="../../">Home</a> :: Family {{ value|escape }}{% endblock %}
+{% block description %}Relays with effective family member {{ value|escape }} are responsible for ~{{ bandwidth }} MB/s of traffic, with {% if middle_count > 0 %}{{ middle_count }} middle relay{% if middle_count > 1 %}s{% endif %}{% if exit_count > 0 %} and {% endif %}{% endif %}{% if exit_count > 0 %}{{ exit_count }} exit relay{% if exit_count > 1 %}s{% endif %}{% endif %}.{% endblock %}
diff --git a/tor-metrics/templates/flag.html b/tor-metrics/templates/flag.html
index 29d28f5..875c494 100644
--- a/tor-metrics/templates/flag.html
+++ b/tor-metrics/templates/flag.html
@@ -1,4 +1,4 @@
{% extends "relay-list.html" %}
-{% block title %}Tor Relays :: {{ flag|escape }} Relays{% endblock %}
-{% block header %}<a href="../../">Home</a> :: {{ flag|escape }} Relays{% endblock %}
-{% block description %}Relays with the {{ flag }} flag are responsible for ~{{ bandwidth }} MB/s of traffic, with {% if middle_count > 0 %}{{ middle_count }} middle relay{% if middle_count > 1 %}s{% endif %}{% if exit_count > 0 %} and {% endif %}{% endif %}{% if exit_count > 0 %}{{ exit_count }} exit relay{% if exit_count > 1 %}s{% endif %}{% endif %}.{% endblock %}
+{% block title %}Tor Relays :: {{ value|escape }} Relays{% endblock %}
+{% block header %}<a href="../../">Home</a> :: {{ value|escape }} Relays{% endblock %}
+{% block description %}Relays with the {{ value }} flag are responsible for ~{{ bandwidth }} MB/s of traffic, with {% if middle_count > 0 %}{{ middle_count }} middle relay{% if middle_count > 1 %}s{% endif %}{% if exit_count > 0 %} and {% endif %}{% endif %}{% if exit_count > 0 %}{{ exit_count }} exit relay{% if exit_count > 1 %}s{% endif %}{% endif %}.{% endblock %}