diff options
author | Markus Heiser <markus.heiser@darmarit.de> | 2022-02-04 01:11:44 +0100 |
---|---|---|
committer | Markus Heiser <markus.heiser@darmarit.de> | 2022-02-04 14:53:37 +0100 |
commit | d92b3d96fdfad4dd009cefa3762d70fa76a987c7 (patch) | |
tree | 79c0318d0bfba68451aead691ba18a837ea2ba64 /searx/engines | |
parent | 9fae80facb8efaad22dc5e20f48c001f89e8e567 (diff) | |
download | searxng-d92b3d96fdfad4dd009cefa3762d70fa76a987c7.tar.gz searxng-d92b3d96fdfad4dd009cefa3762d70fa76a987c7.zip |
[fix] solidtorrents engine: JSON API no longer exists
The API endpoint, we where using does not exist anymore. This patch is a
rewrite that parses the HTML page.
Related: https://github.com/paulgoio/searxng/issues/17
Closes: https://github.com/searxng/searxng/issues/858
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/engines')
-rw-r--r-- | searx/engines/solidtorrents.py | 84 |
1 files changed, 59 insertions, 25 deletions
diff --git a/searx/engines/solidtorrents.py b/searx/engines/solidtorrents.py index 614b38277..6a98a1c29 100644 --- a/searx/engines/solidtorrents.py +++ b/searx/engines/solidtorrents.py @@ -1,51 +1,85 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""Solid Torrents - +"""SolidTorrents """ -from json import loads +from datetime import datetime from urllib.parse import urlencode +import random + +from lxml import html + +from searx.utils import extract_text, eval_xpath, eval_xpath_getindex about = { "website": 'https://www.solidtorrents.net/', "wikidata_id": None, "official_api_documentation": None, - "use_official_api": True, + "use_official_api": False, "require_api_key": False, - "results": 'JSON', + "results": 'HTML', } categories = ['files'] paging = True -base_url = 'https://www.solidtorrents.net/' -search_url = base_url + 'api/v1/search?{query}' +base_url = '' +base_url_rand = '' + +units = {"B": 1, "KB": 2 ** 10, "MB": 2 ** 20, "GB": 2 ** 30, "TB": 2 ** 40} + + +def size2int(size_str): + n, u = size_str.split() + return int(float(n.strip()) * units[u.strip()]) def request(query, params): - skip = (params['pageno'] - 1) * 20 - query = urlencode({'q': query, 'skip': skip}) + global base_url_rand # pylint: disable=global-statement + if isinstance(base_url, list): + base_url_rand = random.choice(base_url) + else: + base_url_rand = base_url + search_url = base_url_rand + '/search?{query}' + page = (params['pageno'] - 1) * 20 + query = urlencode({'q': query, 'page': page}) params['url'] = search_url.format(query=query) - logger.debug("query_url --> %s", params['url']) return params def response(resp): results = [] - search_results = loads(resp.text) - - for result in search_results["results"]: - results.append( - { - 'infohash': result["infohash"], - 'seed': result["swarm"]["seeders"], - 'leech': result["swarm"]["leechers"], - 'title': result["title"], - 'url': "https://solidtorrents.net/view/" + result["_id"], - 'filesize': result["size"], - 'magnetlink': result["magnet"], - 'template': "torrent.html", - } - ) + dom = html.fromstring(resp.text) + + for result in eval_xpath(dom, '//div[contains(@class, "search-result")]'): + a = eval_xpath_getindex(result, './div/h5/a', 0, None) + if a is None: + continue + title = extract_text(a) + url = eval_xpath_getindex(a, '@href', 0, None) + stats = eval_xpath(result, './div//div[contains(@class, "stats")]/div') + filesize = size2int(extract_text(stats[1])) + leech = extract_text(stats[2]) + seed = extract_text(stats[3]) + magnet = eval_xpath_getindex(result, './div//a[contains(@class, "dl-magnet")]/@href', 0, None) + + params = { + 'seed': seed, + 'leech': leech, + 'title': title, + 'url': base_url_rand + url, + 'filesize': filesize, + 'magnetlink': magnet, + 'template': "torrent.html", + } + + date_str = extract_text(stats[4]) + + try: + params['publishedDate'] = datetime.strptime(date_str, '%b %d, %Y') + except ValueError: + pass + + results.append(params) + return results |