searx/engines/torznab.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143

# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Torznab WebAPI

A engine that implements the `torznab WebAPI`_.

.. _torznab WebAPI: https://torznab.github.io/spec-1.3-draft/torznab

"""

from datetime import datetime
from urllib.parse import quote
from lxml import etree

from searx.exceptions import SearxEngineAPIException

# about
about = {
    "website": None,
    "wikidata_id": None,
    "official_api_documentation": "https://torznab.github.io/spec-1.3-draft",
    "use_official_api": True,
    "require_api_key": False,
    "results": 'XML',
}

categories = ['files']
paging = False
time_range_support = False

# defined in settings.yml
# example (Jackett): "http://localhost:9117/api/v2.0/indexers/all/results/torznab"
base_url = ''
api_key = ''
# https://newznab.readthedocs.io/en/latest/misc/api/#predefined-categories
torznab_categories = []


def init(engine_settings=None):  # pylint: disable=unused-argument
    if len(base_url) < 1:
        raise ValueError('missing torznab base_url')


def request(query, params):

    search_url = base_url + '?t=search&q={search_query}'
    if len(api_key) > 0:
        search_url += '&apikey={api_key}'
    if len(torznab_categories) > 0:
        search_url += '&cat={torznab_categories}'

    params['url'] = search_url.format(
        search_query=quote(query), api_key=api_key, torznab_categories=",".join([str(x) for x in torznab_categories])
    )

    return params


def response(resp):
    results = []

    search_results = etree.XML(resp.content)

    # handle errors
    # https://newznab.readthedocs.io/en/latest/misc/api/#newznab-error-codes
    if search_results.tag == "error":
        raise SearxEngineAPIException(search_results.get("description"))

    for item in search_results[0].iterfind('item'):
        result = {'template': 'torrent.html'}

        enclosure = item.find('enclosure')

        result["filesize"] = int(enclosure.get('length'))

        link = get_property(item, 'link')
        guid = get_property(item, 'guid')
        comments = get_property(item, 'comments')

        # define url
        result["url"] = enclosure.get('url')
        if comments is not None and comments.startswith('http'):
            result["url"] = comments
        elif guid is not None and guid.startswith('http'):
            result["url"] = guid

        # define torrent file url
        result["torrentfile"] = None
        if enclosure.get('url').startswith("http"):
            result["torrentfile"] = enclosure.get('url')
        elif link is not None and link.startswith('http'):
            result["torrentfile"] = link

        # define magnet link
        result["magnetlink"] = get_torznab_attr(item, 'magneturl')
        if result["magnetlink"] is None:
            if enclosure.get('url').startswith("magnet"):
                result["magnetlink"] = enclosure.get('url')
            elif link is not None and link.startswith('magnet'):
                result["magnetlink"] = link

        result["title"] = get_property(item, 'title')
        result["files"] = get_property(item, 'files')

        result["publishedDate"] = None
        try:
            result["publishedDate"] = datetime.strptime(get_property(item, 'pubDate'), '%a, %d %b %Y %H:%M:%S %z')
        except (ValueError, TypeError) as e:
            logger.debug("ignore exception (publishedDate): %s", e)

        result["seed"] = get_torznab_attr(item, 'seeders')

        # define leech
        result["leech"] = get_torznab_attr(item, 'leechers')
        if result["leech"] is None and result["seed"] is not None:
            peers = get_torznab_attr(item, 'peers')
            if peers is not None:
                result["leech"] = int(peers) - int(result["seed"])

        results.append(result)

    return results


def get_property(item, property_name):
    property_element = item.find(property_name)

    if property_element is not None:
        return property_element.text

    return None


def get_torznab_attr(item, attr_name):
    element = item.find(
        './/torznab:attr[@name="{attr_name}"]'.format(attr_name=attr_name),
        {'torznab': 'http://torznab.com/schemas/2015/feed'},
    )

    if element is not None:
        return element.get("value")

    return None