summaryrefslogtreecommitdiff
path: root/searx/engines
diff options
context:
space:
mode:
authorPaolo Basso <12545838+paolobasso99@users.noreply.github.com>2023-06-24 18:58:27 +0200
committerMarkus Heiser <markus.heiser@darmarIT.de>2023-06-28 10:03:44 +0200
commit401561cb580454ef73d08072dbad0da1a5e897aa (patch)
tree906b6489da18995d25a8f620d03f9068fb2a5650 /searx/engines
parentda7c30291dcf53cc5b3d98f9aada5615cd1593a9 (diff)
downloadsearxng-401561cb580454ef73d08072dbad0da1a5e897aa.tar.gz
searxng-401561cb580454ef73d08072dbad0da1a5e897aa.zip
[mod] engine torznab - refactor & option to hide links
- torznab engine using types and clearer code - torznab option to hide torrent and magnet links. - document the torznab engine - add myself to authors Closes: https://github.com/searxng/searxng/issues/1124 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/engines')
-rw-r--r--searx/engines/torznab.py246
1 files changed, 179 insertions, 67 deletions
diff --git a/searx/engines/torznab.py b/searx/engines/torznab.py
index a48017c13..dc24919b5 100644
--- a/searx/engines/torznab.py
+++ b/searx/engines/torznab.py
@@ -1,21 +1,83 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-"""Torznab WebAPI
+""".. _torznab engine:
-A engine that implements the `torznab WebAPI`_.
+==============
+Torznab WebAPI
+==============
-.. _torznab WebAPI: https://torznab.github.io/spec-1.3-draft/torznab
+.. contents:: Contents
+ :depth: 2
+ :local:
+ :backlinks: entry
+
+Torznab_ is an API specification that provides a standardized way to query
+torrent site for content. It is used by a number of torrent applications,
+including Prowlarr_ and Jackett_.
+
+Using this engine together with Prowlarr_ or Jackett_ allows you to search
+a huge number of torrent sites which are not directly supported.
+
+Configuration
+=============
+
+The engine has the following settings:
+
+``base_url``:
+ Torznab endpoint URL.
+
+``api_key``:
+ The API key to use for authentication.
+
+``torznab_categories``:
+ The categories to use for searching. This is a list of category IDs. See
+ Prowlarr-categories_ or Jackett-categories_ for more information.
+
+``show_torrent_files``:
+ Whether to show the torrent file in the search results. Be carful as using
+ this with Prowlarr_ or Jackett_ leaks the API key. This should be used only
+ if you are querying a Torznab endpoint without authentication or if the
+ instance is private. Be aware that private trackers may ban you if you share
+ the torrent file. Defaults to ``false``.
+
+``show_magnet_links``:
+ Whether to show the magnet link in the search results. Be aware that private
+ trackers may ban you if you share the magnet link. Defaults to ``true``.
+
+.. _Torznab:
+ https://torznab.github.io/spec-1.3-draft/index.html
+.. _Prowlarr:
+ https://github.com/Prowlarr/Prowlarr
+.. _Jackett:
+ https://github.com/Jackett/Jackett
+.. _Prowlarr-categories:
+ https://wiki.servarr.com/en/prowlarr/cardigann-yml-definition#categories
+.. _Jackett-categories:
+ https://github.com/Jackett/Jackett/wiki/Jackett-Categories
+
+
+Implementations
+===============
"""
+from __future__ import annotations
+from typing import TYPE_CHECKING
+from typing import List, Dict, Any
from datetime import datetime
from urllib.parse import quote
-from lxml import etree
+from lxml import etree # type: ignore
from searx.exceptions import SearxEngineAPIException
-# about
-about = {
+if TYPE_CHECKING:
+ import httpx
+ import logging
+
+ logger: logging.Logger
+
+# engine settings
+about: Dict[str, Any] = {
"website": None,
"wikidata_id": None,
"official_api_documentation": "https://torznab.github.io/spec-1.3-draft",
@@ -23,27 +85,30 @@ about = {
"require_api_key": False,
"results": 'XML',
}
-
-categories = ['files']
-paging = False
-time_range_support = False
+categories: List[str] = ['files']
+paging: bool = False
+time_range_support: bool = False
# defined in settings.yml
# example (Jackett): "http://localhost:9117/api/v2.0/indexers/all/results/torznab"
-base_url = ''
-api_key = ''
+base_url: str = ''
+api_key: str = ''
# https://newznab.readthedocs.io/en/latest/misc/api/#predefined-categories
-torznab_categories = []
+torznab_categories: List[str] = []
+show_torrent_files: bool = False
+show_magnet_links: bool = True
def init(engine_settings=None): # pylint: disable=unused-argument
+ """Initialize the engine."""
if len(base_url) < 1:
raise ValueError('missing torznab base_url')
-def request(query, params):
+def request(query: str, params: Dict[str, Any]) -> Dict[str, Any]:
+ """Build the request params."""
+ search_url: str = base_url + '?t=search&q={search_query}'
- search_url = base_url + '?t=search&q={search_query}'
if len(api_key) > 0:
search_url += '&apikey={api_key}'
if len(torznab_categories) > 0:
@@ -56,88 +121,135 @@ def request(query, params):
return params
-def response(resp):
+def response(resp: httpx.Response) -> List[Dict[str, Any]]:
+ """Parse the XML response and return a list of results."""
results = []
-
search_results = etree.XML(resp.content)
- # handle errors
- # https://newznab.readthedocs.io/en/latest/misc/api/#newznab-error-codes
+ # handle errors: https://newznab.readthedocs.io/en/latest/misc/api/#newznab-error-codes
if search_results.tag == "error":
raise SearxEngineAPIException(search_results.get("description"))
- for item in search_results[0].iterfind('item'):
- result = {'template': 'torrent.html'}
+ channel: etree.Element = search_results[0]
- enclosure = item.find('enclosure')
+ item: etree.Element
+ for item in channel.iterfind('item'):
+ result: Dict[str, Any] = build_result(item)
+ results.append(result)
- result["filesize"] = int(enclosure.get('length'))
+ return results
- link = get_property(item, 'link')
- guid = get_property(item, 'guid')
- comments = get_property(item, 'comments')
- # define url
- result["url"] = enclosure.get('url')
- if comments is not None and comments.startswith('http'):
- result["url"] = comments
- elif guid is not None and guid.startswith('http'):
- result["url"] = guid
+def build_result(item: etree.Element) -> Dict[str, Any]:
+ """Build a result from a XML item."""
+
+ # extract attributes from XML
+ # see https://torznab.github.io/spec-1.3-draft/torznab/Specification-v1.3.html#predefined-attributes
+ enclosure: etree.Element | None = item.find('enclosure')
+ enclosure_url: str | None = None
+ if enclosure is not None:
+ enclosure_url = enclosure.get('url')
+
+ size = get_attribute(item, 'size')
+ if not size and enclosure:
+ size = enclosure.get('length')
+ if size:
+ size = int(size)
+
+ guid = get_attribute(item, 'guid')
+ comments = get_attribute(item, 'comments')
+ pubDate = get_attribute(item, 'pubDate')
+ seeders = get_torznab_attribute(item, 'seeders')
+ leechers = get_torznab_attribute(item, 'leechers')
+ peers = get_torznab_attribute(item, 'peers')
+
+ # map attributes to searx result
+ result: Dict[str, Any] = {
+ 'template': 'torrent.html',
+ 'title': get_attribute(item, 'title'),
+ 'filesize': size,
+ 'files': get_attribute(item, 'files'),
+ 'seed': seeders,
+ 'leech': _map_leechers(leechers, seeders, peers),
+ 'url': _map_result_url(guid, comments),
+ 'publishedDate': _map_published_date(pubDate),
+ 'torrentfile': None,
+ 'magnetlink': None,
+ }
+
+ link = get_attribute(item, 'link')
+ if show_torrent_files:
+ result['torrentfile'] = _map_torrent_file(link, enclosure_url)
+ if show_magnet_links:
+ magneturl = get_torznab_attribute(item, 'magneturl')
+ result['magnetlink'] = _map_magnet_link(magneturl, guid, enclosure_url, link)
+ return result
+
+
+def _map_result_url(guid: str | None, comments: str | None) -> str | None:
+ if guid and guid.startswith('http'):
+ return guid
+ if comments and comments.startswith('http'):
+ return comments
+ return None
- # define torrent file url
- result["torrentfile"] = None
- if enclosure.get('url').startswith("http"):
- result["torrentfile"] = enclosure.get('url')
- elif link is not None and link.startswith('http'):
- result["torrentfile"] = link
- # define magnet link
- result["magnetlink"] = get_torznab_attr(item, 'magneturl')
- if result["magnetlink"] is None:
- if enclosure.get('url').startswith("magnet"):
- result["magnetlink"] = enclosure.get('url')
- elif link is not None and link.startswith('magnet'):
- result["magnetlink"] = link
+def _map_leechers(leechers: str | None, seeders: str | None, peers: str | None) -> str | None:
+ if leechers:
+ return leechers
+ if seeders and peers:
+ return str(int(peers) - int(seeders))
+ return None
- result["title"] = get_property(item, 'title')
- result["files"] = get_property(item, 'files')
- result["publishedDate"] = None
+def _map_published_date(pubDate: str | None) -> datetime | None:
+ if pubDate is not None:
try:
- result["publishedDate"] = datetime.strptime(get_property(item, 'pubDate'), '%a, %d %b %Y %H:%M:%S %z')
+ return datetime.strptime(pubDate, '%a, %d %b %Y %H:%M:%S %z')
except (ValueError, TypeError) as e:
logger.debug("ignore exception (publishedDate): %s", e)
+ return None
- result["seed"] = get_torznab_attr(item, 'seeders')
-
- # define leech
- result["leech"] = get_torznab_attr(item, 'leechers')
- if result["leech"] is None and result["seed"] is not None:
- peers = get_torznab_attr(item, 'peers')
- if peers is not None:
- result["leech"] = int(peers) - int(result["seed"])
- results.append(result)
+def _map_torrent_file(link: str | None, enclosure_url: str | None) -> str | None:
+ if link and link.startswith('http'):
+ return link
+ if enclosure_url and enclosure_url.startswith('http'):
+ return enclosure_url
+ return None
- return results
+def _map_magnet_link(
+ magneturl: str | None,
+ guid: str | None,
+ enclosure_url: str | None,
+ link: str | None,
+) -> str | None:
+ if magneturl and magneturl.startswith('magnet'):
+ return magneturl
+ if guid and guid.startswith('magnet'):
+ return guid
+ if enclosure_url and enclosure_url.startswith('magnet'):
+ return enclosure_url
+ if link and link.startswith('magnet'):
+ return link
+ return None
-def get_property(item, property_name):
- property_element = item.find(property_name)
+def get_attribute(item: etree.Element, property_name: str) -> str | None:
+ """Get attribute from item."""
+ property_element: etree.Element | None = item.find(property_name)
if property_element is not None:
return property_element.text
-
return None
-def get_torznab_attr(item, attr_name):
- element = item.find(
- './/torznab:attr[@name="{attr_name}"]'.format(attr_name=attr_name),
+def get_torznab_attribute(item: etree.Element, attribute_name: str) -> str | None:
+ """Get torznab special attribute from item."""
+ element: etree.Element | None = item.find(
+ './/torznab:attr[@name="{attribute_name}"]'.format(attribute_name=attribute_name),
{'torznab': 'http://torznab.com/schemas/2015/feed'},
)
-
if element is not None:
return element.get("value")
-
return None