diff options
author | Paolo Basso <12545838+paolobasso99@users.noreply.github.com> | 2023-06-25 17:24:28 +0200 |
---|---|---|
committer | Markus Heiser <markus.heiser@darmarit.de> | 2023-06-29 09:32:57 +0200 |
commit | 7adb9090e5dbc25b0d120772beca01dc4eb0791e (patch) | |
tree | ebb4dca4e7aa6955b6fbacd91ae4720e3197ce62 /searx/engines/annas_archive.py | |
parent | e5637fe7b98d7fb06cbbe0e0f24deb12a33187ba (diff) | |
download | searxng-7adb9090e5dbc25b0d120772beca01dc4eb0791e.tar.gz searxng-7adb9090e5dbc25b0d120772beca01dc4eb0791e.zip |
[mod] engine: Anna's Archive - add language support
Diffstat (limited to 'searx/engines/annas_archive.py')
-rw-r--r-- | searx/engines/annas_archive.py | 68 |
1 files changed, 68 insertions, 0 deletions
diff --git a/searx/engines/annas_archive.py b/searx/engines/annas_archive.py new file mode 100644 index 000000000..1d5aa41ee --- /dev/null +++ b/searx/engines/annas_archive.py @@ -0,0 +1,68 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Anna's Archive + +""" +from typing import List, Dict, Any, Optional +from urllib.parse import quote +from lxml import html + +from searx.utils import extract_text, eval_xpath + +# about +about: Dict[str, Any] = { + "website": "https://annas-archive.org/", + "wikidata_id": "Q115288326", + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": "HTML", +} + +# engine dependent config +categories: List[str] = ["files"] +paging: bool = False + +# search-url +base_url: str = "https://annas-archive.org" + +# xpath queries +xpath_results: str = '//main//a[starts-with(@href,"/md5")]' +xpath_url: str = ".//@href" +xpath_title: str = ".//h3/text()[1]" +xpath_authors: str = './/div[contains(@class, "italic")]' +xpath_publisher: str = './/div[contains(@class, "text-sm")]' +xpath_file_info: str = './/div[contains(@class, "text-xs")]' + + +def request(query, params: Dict[str, Any]) -> Dict[str, Any]: + search_url: str = base_url + "/search?q={search_query}&lang={lang}" + lang: str = "" + if params["language"] != "all": + lang = params["language"] + + params["url"] = search_url.format(search_query=quote(query), lang=lang) + print(params) + return params + + +def response(resp) -> List[Dict[str, Optional[str]]]: + results: List[Dict[str, Optional[str]]] = [] + dom = html.fromstring(resp.text) + + for item in dom.xpath(xpath_results): + result: Dict[str, Optional[str]] = {} + + result["url"] = base_url + item.xpath(xpath_url)[0] + + result["title"] = extract_text(eval_xpath(item, xpath_title)) + + result["content"] = "{publisher}. {authors}. {file_info}".format( + authors=extract_text(eval_xpath(item, xpath_authors)), + publisher=extract_text(eval_xpath(item, xpath_publisher)), + file_info=extract_text(eval_xpath(item, xpath_file_info)), + ) + + results.append(result) + + return results |