summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2023-06-27 16:17:17 +0200
committerMarkus Heiser <markus.heiser@darmarit.de>2023-06-29 09:32:57 +0200
commiteafc2906f1ec6be52e89f5bd364093c5f1e66856 (patch)
treeba5f6996f5cb8f28ec145bee286ac1931a224629
parent7adb9090e5dbc25b0d120772beca01dc4eb0791e (diff)
downloadsearxng-eafc2906f1ec6be52e89f5bd364093c5f1e66856.tar.gz
searxng-eafc2906f1ec6be52e89f5bd364093c5f1e66856.zip
[mod] engine: Anna's Archive - fetch search arguments from search form
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
-rw-r--r--searx/data/engine_traits.json132
-rw-r--r--searx/engines/annas_archive.py53
-rw-r--r--searx/settings.yml2
3 files changed, 183 insertions, 4 deletions
diff --git a/searx/data/engine_traits.json b/searx/data/engine_traits.json
index aef8bae0b..072c9a5c4 100644
--- a/searx/data/engine_traits.json
+++ b/searx/data/engine_traits.json
@@ -1,4 +1,134 @@
{
+ "annas archive": {
+ "all_locale": "",
+ "custom": {
+ "content": [
+ "",
+ "journal_article",
+ "book_any",
+ "book_fiction",
+ "book_unknown",
+ "book_nonfiction",
+ "book_comic",
+ "magazine",
+ "standards_document"
+ ],
+ "ext": [
+ "",
+ "pdf",
+ "epub",
+ "cbr",
+ "fb2",
+ "mobi",
+ "cbz",
+ "djvu",
+ "azw3",
+ "fb2.zip",
+ "txt",
+ "rar",
+ "zip",
+ "doc",
+ "lit",
+ "rtf",
+ "htm",
+ "html",
+ "lrf",
+ "mht",
+ "docx"
+ ],
+ "sort": [
+ "",
+ "newest",
+ "oldest",
+ "largest",
+ "smallest"
+ ]
+ },
+ "data_type": "traits_v1",
+ "languages": {
+ "af": "af",
+ "ar": "ar",
+ "az": "az",
+ "be": "be",
+ "bg": "bg",
+ "bn": "bn",
+ "bo": "bo",
+ "bs": "bs",
+ "ca": "ca",
+ "cs": "cs",
+ "da": "da",
+ "de": "de",
+ "el": "el",
+ "en": "en",
+ "eo": "eo",
+ "es": "es",
+ "et": "et",
+ "eu": "eu",
+ "fa": "fa",
+ "fi": "fi",
+ "fil": "tl",
+ "fr": "fr",
+ "gl": "gl",
+ "gu": "gu",
+ "he": "he",
+ "hi": "hi",
+ "hr": "hr",
+ "hu": "hu",
+ "hy": "hy",
+ "id": "id",
+ "is": "is",
+ "it": "it",
+ "ja": "ja",
+ "ka": "ka",
+ "kk": "kk",
+ "kn": "kn",
+ "ko": "ko",
+ "ku": "ku",
+ "ky": "ky",
+ "lo": "lo",
+ "lt": "lt",
+ "lv": "lv",
+ "mk": "mk",
+ "ml": "ml",
+ "mn": "mn",
+ "mr": "mr",
+ "ms": "ms",
+ "my": "my",
+ "nb": "nb",
+ "ne": "ne",
+ "nl": "nl",
+ "no": "no",
+ "pa": "pa",
+ "pl": "pl",
+ "ps": "ps",
+ "pt": "pt",
+ "ro": "ro",
+ "ru": "ru",
+ "sa": "sa",
+ "sd": "sd",
+ "si": "si",
+ "sk": "sk",
+ "sl": "sl",
+ "so": "so",
+ "sq": "sq",
+ "sr": "sr",
+ "sv": "sv",
+ "sw": "sw",
+ "ta": "ta",
+ "te": "te",
+ "tg": "tg",
+ "tr": "tr",
+ "tt": "tt",
+ "ug": "ug",
+ "uk": "uk",
+ "ur": "ur",
+ "uz": "uz",
+ "vi": "vi",
+ "yi": "yi",
+ "zh": "zh"
+ },
+ "regions": {}
+ },
"arch linux wiki": {
"all_locale": null,
"custom": {
@@ -4127,4 +4257,4 @@
},
"regions": {}
}
-} \ No newline at end of file
+}
diff --git a/searx/engines/annas_archive.py b/searx/engines/annas_archive.py
index 1d5aa41ee..c845d67c6 100644
--- a/searx/engines/annas_archive.py
+++ b/searx/engines/annas_archive.py
@@ -7,7 +7,8 @@ from typing import List, Dict, Any, Optional
from urllib.parse import quote
from lxml import html
-from searx.utils import extract_text, eval_xpath
+from searx.utils import extract_text, eval_xpath, eval_xpath_list
+from searx.enginelib.traits import EngineTraits
# about
about: Dict[str, Any] = {
@@ -42,7 +43,6 @@ def request(query, params: Dict[str, Any]) -> Dict[str, Any]:
lang = params["language"]
params["url"] = search_url.format(search_query=quote(query), lang=lang)
- print(params)
return params
@@ -66,3 +66,52 @@ def response(resp) -> List[Dict[str, Optional[str]]]:
results.append(result)
return results
+
+
+def fetch_traits(engine_traits: EngineTraits):
+ """Fetch languages and other search arguments from Anna's search form."""
+ # pylint: disable=import-outside-toplevel
+
+ import babel
+ from searx.network import get # see https://github.com/searxng/searxng/issues/762
+ from searx.locales import language_tag
+
+ engine_traits.all_locale = ''
+ engine_traits.custom['content'] = []
+ engine_traits.custom['ext'] = []
+ engine_traits.custom['sort'] = []
+
+ resp = get(base_url + '/search')
+ if not resp.ok: # type: ignore
+ raise RuntimeError("Response from Anna's search page is not OK.")
+ dom = html.fromstring(resp.text) # type: ignore
+
+ # supported language codes
+
+ lang_map = {}
+ for x in eval_xpath_list(dom, "//form//select[@name='lang']//option"):
+ eng_lang = x.get("value")
+ if eng_lang in ('', '_empty', 'nl-BE', 'und'):
+ continue
+ try:
+ locale = babel.Locale.parse(lang_map.get(eng_lang, eng_lang), sep='-')
+ except babel.UnknownLocaleError:
+ # silently ignore unknown languages
+ # print("ERROR: %s -> %s is unknown by babel" % (x.get("data-name"), eng_lang))
+ continue
+ sxng_lang = language_tag(locale)
+ conflict = engine_traits.languages.get(sxng_lang)
+ if conflict:
+ if conflict != eng_lang:
+ print("CONFLICT: babel %s --> %s, %s" % (sxng_lang, conflict, eng_lang))
+ continue
+ engine_traits.languages[sxng_lang] = eng_lang
+
+ for x in eval_xpath_list(dom, "//form//select[@name='content']//option"):
+ engine_traits.custom['content'].append(x.get("value"))
+
+ for x in eval_xpath_list(dom, "//form//select[@name='ext']//option"):
+ engine_traits.custom['ext'].append(x.get("value"))
+
+ for x in eval_xpath_list(dom, "//form//select[@name='sort']//option"):
+ engine_traits.custom['sort'].append(x.get("value"))
diff --git a/searx/settings.yml b/searx/settings.yml
index 8877fba54..e42373a82 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -297,7 +297,7 @@ engines:
shortcut: 9g
disabled: true
- - name: anna's archive
+ - name: annas archive
engine: annas_archive
paging: false
categories: files