diff options
author | Émilien Devos <contact@emiliendevos.be> | 2020-08-18 19:02:41 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-08-18 18:02:41 +0200 |
commit | 27d74826f1e00302336c8720e014cd4cda871981 (patch) | |
tree | 4a7c7fa296d207bdeb1d99d32c8c0c8a82ca1c24 /searx/engines/yggtorrent.py | |
parent | 093dd42bb05feafb2cc77304d3b1a9a1a3016949 (diff) | |
download | searxng-27d74826f1e00302336c8720e014cd4cda871981.tar.gz searxng-27d74826f1e00302336c8720e014cd4cda871981.zip |
[enh] add yggtorrent engine (#2135)
Diffstat (limited to 'searx/engines/yggtorrent.py')
-rw-r--r-- | searx/engines/yggtorrent.py | 124 |
1 files changed, 124 insertions, 0 deletions
diff --git a/searx/engines/yggtorrent.py b/searx/engines/yggtorrent.py new file mode 100644 index 000000000..739574e8d --- /dev/null +++ b/searx/engines/yggtorrent.py @@ -0,0 +1,124 @@ +# Yggtorrent (Videos, Music, Files) +# +# @website https://www2.yggtorrent.si +# @provide-api no (nothing found) +# +# @using-api no +# @results HTML (using search portal) +# @stable no (HTML can change) +# @parse url, title, seed, leech, publishedDate, filesize + +from lxml import html +from operator import itemgetter +from datetime import datetime +from searx.engines.xpath import extract_text +from searx.url_utils import quote +from searx.utils import get_torrent_size +from searx.poolrequests import get as http_get + +# engine dependent config +categories = ['videos', 'music', 'files'] +paging = True + +# search-url +url = 'https://www2.yggtorrent.si/' +search_url = url + 'engine/search?name={search_term}&do=search&page={pageno}&category={search_type}' + +# yggtorrent specific type-definitions +search_types = {'files': 'all', + 'music': '2139', + 'videos': '2145'} + +cookies = dict() + + +def init(engine_settings=None): + global cookies + # initial cookies + resp = http_get(url, allow_redirects=False) + if resp.ok: + for r in resp.history: + cookies.update(r.cookies) + cookies.update(resp.cookies) + + +# do search-request +def request(query, params): + search_type = search_types.get(params['category'], 'all') + pageno = (params['pageno'] - 1) * 50 + + params['url'] = search_url.format(search_term=quote(query), + search_type=search_type, + pageno=pageno) + + params['cookies'] = cookies + + return params + + +# get response from search-request +def response(resp): + results = [] + dom = html.fromstring(resp.text) + + search_res = dom.xpath('//section[@id="#torrents"]/div/table/tbody/tr') + + # return empty array if nothing is found + if not search_res: + return [] + + # parse results + for result in search_res: + link = result.xpath('.//a[@id="torrent_name"]')[0] + href = link.attrib.get('href') + title = extract_text(link) + seed = result.xpath('.//td[8]/text()')[0] + leech = result.xpath('.//td[9]/text()')[0] + + # convert seed to int if possible + if seed.isdigit(): + seed = int(seed) + else: + seed = 0 + + # convert leech to int if possible + if leech.isdigit(): + leech = int(leech) + else: + leech = 0 + + params = {'url': href, + 'title': title, + 'seed': seed, + 'leech': leech, + 'template': 'torrent.html'} + + # let's try to calculate the torrent size + try: + filesize_info = result.xpath('.//td[6]/text()')[0] + filesize = filesize_info[:-2] + filesize_multiplier = filesize_info[-2:].lower() + multiplier_french_to_english = { + 'to': 'TiB', + 'go': 'GiB', + 'mo': 'MiB', + 'ko': 'KiB' + } + filesize = get_torrent_size(filesize, multiplier_french_to_english[filesize_multiplier]) + params['filesize'] = filesize + except: + pass + + # extract and convert creation date + try: + date_ts = result.xpath('.//td[5]/div/text()')[0] + date = datetime.fromtimestamp(float(date_ts)) + params['publishedDate'] = date + except: + pass + + # append result + results.append(params) + + # return results sorted by seeder + return sorted(results, key=itemgetter('seed'), reverse=True) |