summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2024-12-29 09:55:39 +0100
committerMarkus Heiser <markus.heiser@darmarIT.de>2024-12-29 10:12:45 +0100
commitaf3f272b0ba52312c07674b7d98b9380e6d5cb2d (patch)
tree65e06450ff63a4d25d9d91dec408b1f9412fb25e
parent05c82d3201ce8ea2858c020663afa8076a38df67 (diff)
downloadsearxng-af3f272b0ba52312c07674b7d98b9380e6d5cb2d.tar.gz
searxng-af3f272b0ba52312c07674b7d98b9380e6d5cb2d.zip
[fix] update_engine_traits.py: annas archive, bing-* and zlibrary engines
Github action Update data - update_engine_traits [1] had issues in annas archive, bing-* and zlibrary engines: ./manage pyenv.cmd python ./searxng_extra/update/update_engine_traits.py [1] https://github.com/searxng/searxng/actions/runs/12530827768/job/34953392587 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
-rw-r--r--searx/engines/annas_archive.py8
-rw-r--r--searx/engines/bing.py15
-rw-r--r--searx/engines/zlibrary.py20
3 files changed, 34 insertions, 9 deletions
diff --git a/searx/engines/annas_archive.py b/searx/engines/annas_archive.py
index ea1ab42a4..f9b466ed4 100644
--- a/searx/engines/annas_archive.py
+++ b/searx/engines/annas_archive.py
@@ -169,7 +169,7 @@ def fetch_traits(engine_traits: EngineTraits):
lang_map = {}
for x in eval_xpath_list(dom, "//form//input[@name='lang']"):
eng_lang = x.get("value")
- if eng_lang in ('', '_empty', 'nl-BE', 'und'):
+ if eng_lang in ('', '_empty', 'nl-BE', 'und') or eng_lang.startswith('anti__'):
continue
try:
locale = babel.Locale.parse(lang_map.get(eng_lang, eng_lang), sep='-')
@@ -186,10 +186,12 @@ def fetch_traits(engine_traits: EngineTraits):
engine_traits.languages[sxng_lang] = eng_lang
for x in eval_xpath_list(dom, "//form//input[@name='content']"):
- engine_traits.custom['content'].append(x.get("value"))
+ if not x.get("value").startswith("anti__"):
+ engine_traits.custom['content'].append(x.get("value"))
for x in eval_xpath_list(dom, "//form//input[@name='ext']"):
- engine_traits.custom['ext'].append(x.get("value"))
+ if not x.get("value").startswith("anti__"):
+ engine_traits.custom['ext'].append(x.get("value"))
for x in eval_xpath_list(dom, "//form//select[@name='sort']//option"):
engine_traits.custom['sort'].append(x.get("value"))
diff --git a/searx/engines/bing.py b/searx/engines/bing.py
index d4f46eaaf..da76f771e 100644
--- a/searx/engines/bing.py
+++ b/searx/engines/bing.py
@@ -192,8 +192,21 @@ def fetch_traits(engine_traits: EngineTraits):
# pylint: disable=import-outside-toplevel
from searx.network import get # see https://github.com/searxng/searxng/issues/762
+ from searx.utils import gen_useragent
+
+ headers = {
+ "User-Agent": gen_useragent(),
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
+ "Accept-Language": "en-US;q=0.5,en;q=0.3",
+ "Accept-Encoding": "gzip, deflate, br",
+ "DNT": "1",
+ "Connection": "keep-alive",
+ "Upgrade-Insecure-Requests": "1",
+ "Sec-GPC": "1",
+ "Cache-Control": "max-age=0",
+ }
- resp = get("https://www.bing.com/account/general")
+ resp = get("https://www.bing.com/account/general", headers=headers)
if not resp.ok: # type: ignore
print("ERROR: response from bing is not OK.")
diff --git a/searx/engines/zlibrary.py b/searx/engines/zlibrary.py
index 0eed5f621..c9acb28fe 100644
--- a/searx/engines/zlibrary.py
+++ b/searx/engines/zlibrary.py
@@ -183,17 +183,27 @@ def fetch_traits(engine_traits: EngineTraits) -> None:
from searx.network import get # see https://github.com/searxng/searxng/issues/762
from searx.locales import language_tag
- resp = get(base_url, verify=False)
+ def _use_old_values():
+ # don't change anything, re-use the existing values
+ engine_traits.all_locale = ENGINE_TRAITS["z-library"]["all_locale"]
+ engine_traits.custom = ENGINE_TRAITS["z-library"]["custom"]
+ engine_traits.languages = ENGINE_TRAITS["z-library"]["languages"]
+
+ try:
+ resp = get(base_url, verify=False)
+ except SearxException as exc:
+ print(f"ERROR: zlibrary domain '{base_url}' is seized?")
+ print(f" --> {exc}")
+ _use_old_values()
+ return
+
if not resp.ok: # type: ignore
raise RuntimeError("Response from zlibrary's search page is not OK.")
dom = html.fromstring(resp.text) # type: ignore
if domain_is_seized(dom):
print(f"ERROR: zlibrary domain is seized: {base_url}")
- # don't change anything, re-use the existing values
- engine_traits.all_locale = ENGINE_TRAITS["z-library"]["all_locale"]
- engine_traits.custom = ENGINE_TRAITS["z-library"]["custom"]
- engine_traits.languages = ENGINE_TRAITS["z-library"]["languages"]
+ _use_old_values()
return
engine_traits.all_locale = ""