[feat] new engine: bt4g added & enabled and disable by default btdigg

Disable btdigg because on most SearXNG instances, SearXNG is blocked by btdigg due to cloudflare too many requests. This impementation did not parse the HTML page because there is an API in XML (RSS). The RSS feed provides fewer data like amount of seeders/leechers and the files in the torrent file. It's a tradeoff for a "stable" engine as the XML from RSS content will change way less than the HTML page. Closes: https://github.com/searxng/searxng/issues/2553
author: Emilien Devos <4016501+unixfox@users.noreply.github.com> 2023-08-04 18:15:13 +0200
committer: Markus Heiser <markus.heiser@darmarIT.de> 2023-08-06 09:30:48 +0200
commit: 0fc8f99ecc94232302b6b196eccf3ea9b132ec43 (patch)
tree: 80a10e089ae846b2da1c979727254815f5d82699 /searx/engines
parent: 5fcc75185620b80ec97d095f6bf88280e0e4d7f8 (diff)
download: searxng-0fc8f99ecc94232302b6b196eccf3ea9b132ec43.tar.gz
searxng-0fc8f99ecc94232302b6b196eccf3ea9b132ec43.zip
1 files changed, 80 insertions, 0 deletions
diff --git a/searx/engines/bt4g.py b/searx/engines/bt4g.py
new file mode 100644
index 000000000..a952e618d
--- /dev/null
+++ b/searx/engines/bt4g.py
@@ -0,0 +1,80 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""
+ BT4G (Videos, Music, Files)
+"""
+
+import re
+from datetime import datetime
+from urllib.parse import quote
+
+from lxml import etree
+
+from searx.utils import get_torrent_size
+
+# about
+about = {
+    "website": 'https://bt4gprx.com',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'XML',
+}
+
+# engine dependent config
+categories = ['files']
+paging = True
+time_range_support = True
+
+# search-url
+url = 'https://bt4gprx.com'
+search_url = url + '/search?q={search_term}&orderby={order_by}&category={category}&p={pageno}&page=rss'
+bt4g_order_by = 'relevance'  # relevance, size, seeders, time
+bt4g_category = 'all'  # all, audio, movie, doc, app, other
+
+
+def request(query, params):
+
+    order_by = bt4g_order_by
+    if params['time_range']:
+        order_by = 'time'
+
+    params['url'] = search_url.format(
+        search_term=quote(query),
+        order_by=order_by,
+        category=bt4g_category,
+        pageno=params['pageno'],
+    )
+    return params
+
+
+def response(resp):
+    results = []
+
+    search_results = etree.XML(resp.content)
+
+    # return empty array if nothing is found
+    if len(search_results) == 0:
+        return []
+
+    for entry in search_results.xpath('./channel/item'):
+        title = entry.find("title").text
+        link = entry.find("guid").text
+        fullDescription = entry.find("description").text.split('<br>')
+        filesize = fullDescription[1]
+        filesizeParsed = re.split(r"([A-Z]+)", filesize)
+        magnetlink = entry.find("link").text
+        pubDate = entry.find("pubDate").text
+        results.append(
+            {
+                'url': link,
+                'title': title,
+                'magnetlink': magnetlink,
+                'seed': 'N/A',
+                'leech': 'N/A',
+                'filesize': get_torrent_size(filesizeParsed[0], filesizeParsed[1]),
+                'publishedDate': datetime.strptime(pubDate, '%a,%d %b %Y %H:%M:%S %z'),
+                'template': 'torrent.html',
+            }
+        )
+
+    return results
author	Emilien Devos <4016501+unixfox@users.noreply.github.com>	2023-08-04 18:15:13 +0200
committer	Markus Heiser <markus.heiser@darmarIT.de>	2023-08-06 09:30:48 +0200
commit	0fc8f99ecc94232302b6b196eccf3ea9b132ec43 (patch)
tree	80a10e089ae846b2da1c979727254815f5d82699 /searx/engines
parent	5fcc75185620b80ec97d095f6bf88280e0e4d7f8 (diff)
download	searxng-0fc8f99ecc94232302b6b196eccf3ea9b132ec43.tar.gz searxng-0fc8f99ecc94232302b6b196eccf3ea9b132ec43.zip