summaryrefslogtreecommitdiff
path: root/searx/engines/btdigg.py
diff options
context:
space:
mode:
authorCqoicebordel <Cqoicebordel@users.noreply.github.com>2015-01-21 18:02:29 +0100
committerCqoicebordel <Cqoicebordel@users.noreply.github.com>2015-01-21 18:02:29 +0100
commit85dcfa2c7d66dbfde1c0aa349f01020b02195676 (patch)
tree2c649a35222bfc82c03312528016280bdca2c20b /searx/engines/btdigg.py
parent549dcac588e810090e98cb753fde2828bef66325 (diff)
downloadsearxng-85dcfa2c7d66dbfde1c0aa349f01020b02195676.tar.gz
searxng-85dcfa2c7d66dbfde1c0aa349f01020b02195676.zip
BTDigg and Mixcloud engines
Diffstat (limited to 'searx/engines/btdigg.py')
-rw-r--r--searx/engines/btdigg.py109
1 files changed, 109 insertions, 0 deletions
diff --git a/searx/engines/btdigg.py b/searx/engines/btdigg.py
new file mode 100644
index 000000000..59556a2ae
--- /dev/null
+++ b/searx/engines/btdigg.py
@@ -0,0 +1,109 @@
+## BTDigg (Videos, Music, Files)
+#
+# @website https://btdigg.org
+# @provide-api yes (on demand)
+#
+# @using-api no
+# @results HTML (using search portal)
+# @stable no (HTML can change)
+# @parse url, title, content, seed, leech, magnetlink
+
+from urlparse import urljoin
+from cgi import escape
+from urllib import quote
+from lxml import html
+from operator import itemgetter
+from searx.engines.xpath import extract_text
+
+# engine dependent config
+categories = ['videos', 'music', 'files']
+paging = True
+
+# search-url
+url = 'https://btdigg.org'
+search_url = url + '/search?q=22%20jump%20street&p=1'
+
+# specific xpath variables
+magnet_xpath = './/a[@title="Torrent magnet link"]'
+torrent_xpath = './/a[@title="Download torrent file"]'
+content_xpath = './/span[@class="font11px lightgrey block"]'
+
+
+# do search-request
+def request(query, params):
+ params['url'] = search_url.format(search_term=quote(query),
+ pageno=params['pageno']-1)
+
+ return params
+
+
+# get response from search-request
+def response(resp):
+ results = []
+
+ dom = html.fromstring(resp.text)
+
+ search_res = dom.xpath('//div[@id="search_res"]/table/tr')
+
+ # return empty array if nothing is found
+ if not search_res:
+ return []
+
+ # parse results
+ for result in search_res:
+ link = result.xpath('.//td[@class="torrent_name"]//a')[0]
+ href = urljoin(url, link.attrib['href'])
+ title = escape(extract_text(link.xpath('.//text()')))
+ content = escape(extract_text(result.xpath('.//pre[@class="snippet"]')[0]))
+ content = "<br />".join(content.split("\n"))
+
+ filesize = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[0]
+ filesize_multiplier = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[1]
+ files = result.xpath('.//span[@class="attr_val"]/text()')[1]
+ seed = result.xpath('.//span[@class="attr_val"]/text()')[2]
+
+ # convert seed to int if possible
+ if seed.isdigit():
+ seed = int(seed)
+ else:
+ seed = 0
+
+ leech = 0
+
+ # convert filesize to byte if possible
+ try:
+ filesize = float(filesize)
+
+ # convert filesize to byte
+ if filesize_multiplier == 'TB':
+ filesize = int(filesize * 1024 * 1024 * 1024 * 1024)
+ elif filesize_multiplier == 'GB':
+ filesize = int(filesize * 1024 * 1024 * 1024)
+ elif filesize_multiplier == 'MB':
+ filesize = int(filesize * 1024 * 1024)
+ elif filesize_multiplier == 'kb':
+ filesize = int(filesize * 1024)
+ except:
+ filesize = None
+
+ # convert files to int if possible
+ if files.isdigit():
+ files = int(files)
+ else:
+ files = None
+
+ magnetlink = result.xpath('.//td[@class="ttth"]//a')[0].attrib['href']
+
+ # append result
+ results.append({'url': href,
+ 'title': title,
+ 'content': content,
+ 'seed': seed,
+ 'leech': leech,
+ 'filesize': filesize,
+ 'files': files,
+ 'magnetlink': magnetlink,
+ 'template': 'torrent.html'})
+
+ # return results sorted by seeder
+ return sorted(results, key=itemgetter('seed'), reverse=True)