diff options
author | Kirill Isakov <ukwt@ya.ru> | 2016-03-25 00:24:37 +0600 |
---|---|---|
committer | Kirill Isakov <ukwt@ya.ru> | 2016-03-25 00:24:37 +0600 |
commit | e5677ae6b6b23c943e64d8e2abcb64c13c0e8bbf (patch) | |
tree | c437375db9a77f84a0eabf69279c31a3b4d3b7ce /searx/engines/nyaa.py | |
parent | d748b8419ad1ef875f34783bbbcf773ebc4cfb5e (diff) | |
download | searxng-e5677ae6b6b23c943e64d8e2abcb64c13c0e8bbf.tar.gz searxng-e5677ae6b6b23c943e64d8e2abcb64c13c0e8bbf.zip |
Add Nyaa.se search engine
Diffstat (limited to 'searx/engines/nyaa.py')
-rw-r--r-- | searx/engines/nyaa.py | 115 |
1 files changed, 115 insertions, 0 deletions
diff --git a/searx/engines/nyaa.py b/searx/engines/nyaa.py new file mode 100644 index 000000000..81caed76f --- /dev/null +++ b/searx/engines/nyaa.py @@ -0,0 +1,115 @@ +""" + Nyaa.se (Anime Bittorrent tracker) + + @website http://www.nyaa.se/ + @provide-api no + @using-api no + @results HTML + @stable no (HTML can change) + @parse url, title, content, seed, leech, torrentfile +""" + +from cgi import escape +from urllib import urlencode +from lxml import html +from searx.engines.xpath import extract_text + +# engine dependent config +categories = ['files', 'images', 'videos', 'music'] +paging = True + +# search-url +base_url = 'http://www.nyaa.se/' +search_url = base_url + '?page=search&{query}&offset={offset}' + +# xpath queries +xpath_results = '//table[@class="tlist"]//tr[contains(@class, "tlistrow")]' +xpath_category = './/td[@class="tlisticon"]/a' +xpath_title = './/td[@class="tlistname"]/a' +xpath_torrent_file = './/td[@class="tlistdownload"]/a' +xpath_filesize = './/td[@class="tlistsize"]/text()' +xpath_seeds = './/td[@class="tlistsn"]/text()' +xpath_leeches = './/td[@class="tlistln"]/text()' +xpath_downloads = './/td[@class="tlistdn"]/text()' + + +# convert a variable to integer or return 0 if it's not a number +def int_or_zero(num): + if isinstance(num, list): + if len(num) < 1: + return 0 + num = num[0] + if num.isdigit(): + return int(num) + return 0 + + +# do search-request +def request(query, params): + query = urlencode({'term': query}) + params['url'] = search_url.format(query=query, offset=params['pageno']) + return params + + +# get response from search-request +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + + for result in dom.xpath(xpath_results): + # category in which our torrent belongs + category = result.xpath(xpath_category)[0].attrib.get('title') + + # torrent title + page_a = result.xpath(xpath_title)[0] + title = escape(extract_text(page_a)) + + # link to the page + href = page_a.attrib.get('href') + + # link to the torrent file + torrent_link = result.xpath(xpath_torrent_file)[0].attrib.get('href') + + # torrent size + try: + file_size, suffix = result.xpath(xpath_filesize)[0].split(' ') + + # convert torrent size to bytes. + # if there is no correct index in this dictionary, + # the try block fails as it should + multiplier = { + 'KIB': 1024, + 'MIB': 1024 ** 2, + 'GIB': 1024 ** 3, + 'TIB': 1024 ** 4 + }[suffix.upper()] + + file_size = int(float(file_size) * multiplier) + except Exception as e: + file_size = None + + # seed count + seed = int_or_zero(result.xpath(xpath_seeds)) + + # leech count + leech = int_or_zero(result.xpath(xpath_leeches)) + + # torrent downloads count + downloads = int_or_zero(result.xpath(xpath_downloads)) + + # content string contains all information not included into template + content = 'Category: "{category}". Downloaded {downloads} times.' + content = content.format(category=category, downloads=downloads) + content = escape(content) + + results.append({'url': href, + 'title': title, + 'content': content, + 'seed': seed, + 'leech': leech, + 'filesize': file_size, + 'torrentfile': torrent_link, + 'template': 'torrent.html'}) + + return results |