diff options
author | Markus Heiser <markus.heiser@darmarit.de> | 2024-12-29 09:55:39 +0100 |
---|---|---|
committer | Markus Heiser <markus.heiser@darmarIT.de> | 2024-12-29 10:12:45 +0100 |
commit | af3f272b0ba52312c07674b7d98b9380e6d5cb2d (patch) | |
tree | 65e06450ff63a4d25d9d91dec408b1f9412fb25e | |
parent | 05c82d3201ce8ea2858c020663afa8076a38df67 (diff) | |
download | searxng-af3f272b0ba52312c07674b7d98b9380e6d5cb2d.tar.gz searxng-af3f272b0ba52312c07674b7d98b9380e6d5cb2d.zip |
[fix] update_engine_traits.py: annas archive, bing-* and zlibrary engines
Github action Update data - update_engine_traits [1] had issues in annas
archive, bing-* and zlibrary engines:
./manage pyenv.cmd python ./searxng_extra/update/update_engine_traits.py
[1] https://github.com/searxng/searxng/actions/runs/12530827768/job/34953392587
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
-rw-r--r-- | searx/engines/annas_archive.py | 8 | ||||
-rw-r--r-- | searx/engines/bing.py | 15 | ||||
-rw-r--r-- | searx/engines/zlibrary.py | 20 |
3 files changed, 34 insertions, 9 deletions
diff --git a/searx/engines/annas_archive.py b/searx/engines/annas_archive.py index ea1ab42a4..f9b466ed4 100644 --- a/searx/engines/annas_archive.py +++ b/searx/engines/annas_archive.py @@ -169,7 +169,7 @@ def fetch_traits(engine_traits: EngineTraits): lang_map = {} for x in eval_xpath_list(dom, "//form//input[@name='lang']"): eng_lang = x.get("value") - if eng_lang in ('', '_empty', 'nl-BE', 'und'): + if eng_lang in ('', '_empty', 'nl-BE', 'und') or eng_lang.startswith('anti__'): continue try: locale = babel.Locale.parse(lang_map.get(eng_lang, eng_lang), sep='-') @@ -186,10 +186,12 @@ def fetch_traits(engine_traits: EngineTraits): engine_traits.languages[sxng_lang] = eng_lang for x in eval_xpath_list(dom, "//form//input[@name='content']"): - engine_traits.custom['content'].append(x.get("value")) + if not x.get("value").startswith("anti__"): + engine_traits.custom['content'].append(x.get("value")) for x in eval_xpath_list(dom, "//form//input[@name='ext']"): - engine_traits.custom['ext'].append(x.get("value")) + if not x.get("value").startswith("anti__"): + engine_traits.custom['ext'].append(x.get("value")) for x in eval_xpath_list(dom, "//form//select[@name='sort']//option"): engine_traits.custom['sort'].append(x.get("value")) diff --git a/searx/engines/bing.py b/searx/engines/bing.py index d4f46eaaf..da76f771e 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -192,8 +192,21 @@ def fetch_traits(engine_traits: EngineTraits): # pylint: disable=import-outside-toplevel from searx.network import get # see https://github.com/searxng/searxng/issues/762 + from searx.utils import gen_useragent + + headers = { + "User-Agent": gen_useragent(), + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", + "Accept-Language": "en-US;q=0.5,en;q=0.3", + "Accept-Encoding": "gzip, deflate, br", + "DNT": "1", + "Connection": "keep-alive", + "Upgrade-Insecure-Requests": "1", + "Sec-GPC": "1", + "Cache-Control": "max-age=0", + } - resp = get("https://www.bing.com/account/general") + resp = get("https://www.bing.com/account/general", headers=headers) if not resp.ok: # type: ignore print("ERROR: response from bing is not OK.") diff --git a/searx/engines/zlibrary.py b/searx/engines/zlibrary.py index 0eed5f621..c9acb28fe 100644 --- a/searx/engines/zlibrary.py +++ b/searx/engines/zlibrary.py @@ -183,17 +183,27 @@ def fetch_traits(engine_traits: EngineTraits) -> None: from searx.network import get # see https://github.com/searxng/searxng/issues/762 from searx.locales import language_tag - resp = get(base_url, verify=False) + def _use_old_values(): + # don't change anything, re-use the existing values + engine_traits.all_locale = ENGINE_TRAITS["z-library"]["all_locale"] + engine_traits.custom = ENGINE_TRAITS["z-library"]["custom"] + engine_traits.languages = ENGINE_TRAITS["z-library"]["languages"] + + try: + resp = get(base_url, verify=False) + except SearxException as exc: + print(f"ERROR: zlibrary domain '{base_url}' is seized?") + print(f" --> {exc}") + _use_old_values() + return + if not resp.ok: # type: ignore raise RuntimeError("Response from zlibrary's search page is not OK.") dom = html.fromstring(resp.text) # type: ignore if domain_is_seized(dom): print(f"ERROR: zlibrary domain is seized: {base_url}") - # don't change anything, re-use the existing values - engine_traits.all_locale = ENGINE_TRAITS["z-library"]["all_locale"] - engine_traits.custom = ENGINE_TRAITS["z-library"]["custom"] - engine_traits.languages = ENGINE_TRAITS["z-library"]["languages"] + _use_old_values() return engine_traits.all_locale = "" |