aboutsummaryrefslogtreecommitdiff
path: root/tor-metrics/relays.py
blob: c8b94ea0a8e47c0199936d5569d4cad50e144141 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
'''
File: relays.py

Relays class object consisting of relays (list of dict) and onionoo fetch
timestamp
'''

import os
import json
import time
import urllib.request
from urllib.error import URLError, HTTPError
import config

ABS_PATH = os.path.dirname(os.path.abspath(__file__))

class Relays:
    '''
    Relay class consisting of relays (list of dict) and onionoo fetch timestamp

    :ts_file: absolute path to timestamp file used in setting If-Modified_since
    :json: relay listings stored as a list of dict, derived from onionoo JSON
    :timestamp: timestamp of onionoo fetch
    '''
    def __init__(self):
        self.url = config.CONFIG['onionoo_url']
        self.ts_file = os.path.join(ABS_PATH, "timestamp")
        self.json = self.fetch_onionoo_details()
        self.timestamp = self.write_timestamp()

    def fetch_onionoo_details(self):
        '''
        Make request to onionoo to retrieve details document, return prepared
        JSON response (trimmed platform and sorted by highest observed
        bandwidth)
        '''
        if os.path.isfile(self.ts_file):
            with open(self.ts_file, 'r') as ts_file:
                prev_timestamp = ts_file.read()
            headers = {"If-Modified-Since": prev_timestamp}
            conn = urllib.request.Request(self.url, headers=headers)
        else:
            conn = urllib.request.Request(self.url)

        try:
            api_response = urllib.request.urlopen(conn).read()
        except HTTPError as err:
            print('HTTPError caught during onionoo fetch: %s' % err)
            return None
        except URLError as err:
            print('URLError caught during onionoo fetch: %s' % err)
            return None
        except Exception as err:
            print('Uncaught exception during onionoo fetch: %s' % err)
            return None

        json_data = json.loads(api_response.decode('utf-8'))
        fixed_bw = self.fix_missing_observed_bandwidth(json_data)
        sorted_json = self.sort_by_bandwidth(fixed_bw)
        trimmed_json = self.trim_platform(sorted_json)
        return trimmed_json

    def trim_platform(self, json_data):
        '''
        Trim platform to retain base operating system without version number or
        unnecessary classification which could affect sorting

        e.g. "Tor 0.3.4.9 on Linux" -> "Linux"
        '''
        for relay in json_data['relays']:
            relay['platform'] = relay['platform'].split(' on ', 1)[1].split(' ')[0]
        return json_data

    def fix_missing_observed_bandwidth(self, json_data):
        '''
        Set the observed_bandwidth parameter value for any relay missing the
        parameter to 0; the observed_bandwidth parameter is (apparently)
        optional, I hadn't run into an instance of it missing until 2019-10-03

        "[...] Missing if router descriptor containing this information cannot be
        found."
        --https://metrics.torproject.org/onionoo.html#details_relay_observed_bandwidth

        '''
        for idx, relay in enumerate(json_data['relays']):
            if not relay.get('observed_bandwidth'):
                json_data['relays'][idx]['observed_bandwidth'] = 0
        return json_data

    def sort_by_bandwidth(self, json_data):
        '''
        Sort full JSON list by highest observed_bandwidth, retain this order
        during subsequent sorting (country, AS, etc)
        '''
        json_data['relays'].sort(key=lambda x: x['observed_bandwidth'],
                                 reverse=True)
        return json_data

    def write_timestamp(self):
        '''
        Store encoded timestamp in a file to retain time of last request, passed
        to onionoo via If-Modified-Since header during fetch() if exists
        '''
        timestamp = time.time()
        f_timestamp = time.strftime('%a, %d %b %Y %H:%M:%S GMT',
                                    time.gmtime(timestamp))
        if self.json is not None:
            with open(self.ts_file, 'w', encoding='utf8') as ts_file:
                ts_file.write(f_timestamp)
        return f_timestamp