[fix] engine zlibrary: handle seized domain

The domains of zlibrary instances are known to be seized from time to time. This leads to problems when, for example, the automated tasks try to update the engine traits (aka fetch_traits). The search function should also generate a suitable error message (currently either SSL errors or empty result lists are returned). [1] [1] https://github.com/searxng/searxng/issues/3610 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
author: Markus Heiser <markus.heiser@darmarit.de> 2024-06-25 14:15:24 +0200
committer: Markus Heiser <markus.heiser@darmarIT.de> 2024-06-25 14:40:19 +0200
commit: 39ffec87b7f8631bb0ab3ba74971fbd4bcfa7520 (patch)
tree: 08c86b0275eb5d9403ad076ea6106e0ec6c4fc76 /searx
parent: b8fa4d61958c5c78d312376bfcfc5d2c5863ab69 (diff)
download: searxng-39ffec87b7f8631bb0ab3ba74971fbd4bcfa7520.tar.gz
searxng-39ffec87b7f8631bb0ab3ba74971fbd4bcfa7520.zip
1 files changed, 23 insertions, 6 deletions
diff --git a/searx/engines/zlibrary.py b/searx/engines/zlibrary.py
index c29c9135c..0eed5f621 100644
--- a/searx/engines/zlibrary.py
+++ b/searx/engines/zlibrary.py
@@ -43,6 +43,7 @@ from flask_babel import gettext
 from searx.utils import extract_text, eval_xpath, eval_xpath_list
 from searx.enginelib.traits import EngineTraits
 from searx.data import ENGINE_TRAITS
+from searx.exceptions import SearxException
 
 if TYPE_CHECKING:
     import httpx
@@ -108,13 +109,21 @@ def request(query: str, params: Dict[str, Any]) -> Dict[str, Any]:
         zlib_year_to=zlib_year_to,
         zlib_ext=zlib_ext,
     )
+    params["verify"] = False
     return params
 
 
+def domain_is_seized(dom):
+    return bool(dom.xpath('//title') and "seized" in dom.xpath('//title')[0].text.lower())
+
+
 def response(resp: httpx.Response) -> List[Dict[str, Any]]:
     results: List[Dict[str, Any]] = []
     dom = html.fromstring(resp.text)
 
+    if domain_is_seized(dom):
+        raise SearxException(f"zlibrary domain is seized: {base_url}")
+
     for item in dom.xpath('//div[@id="searchResultBox"]//div[contains(@class, "resItemBox")]'):
         results.append(_parse_result(item))
 
@@ -168,22 +177,30 @@ def _parse_result(item) -> Dict[str, Any]:
 
 def fetch_traits(engine_traits: EngineTraits) -> None:
     """Fetch languages and other search arguments from zlibrary's search form."""
-    # pylint: disable=import-outside-toplevel
+    # pylint: disable=import-outside-toplevel, too-many-branches
 
     import babel
     from searx.network import get  # see https://github.com/searxng/searxng/issues/762
     from searx.locales import language_tag
 
+    resp = get(base_url, verify=False)
+    if not resp.ok:  # type: ignore
+        raise RuntimeError("Response from zlibrary's search page is not OK.")
+    dom = html.fromstring(resp.text)  # type: ignore
+
+    if domain_is_seized(dom):
+        print(f"ERROR: zlibrary domain is seized: {base_url}")
+        # don't change anything, re-use the existing values
+        engine_traits.all_locale = ENGINE_TRAITS["z-library"]["all_locale"]
+        engine_traits.custom = ENGINE_TRAITS["z-library"]["custom"]
+        engine_traits.languages = ENGINE_TRAITS["z-library"]["languages"]
+        return
+
     engine_traits.all_locale = ""
     engine_traits.custom["ext"] = []
     engine_traits.custom["year_from"] = []
     engine_traits.custom["year_to"] = []
 
-    resp = get(base_url)
-    if not resp.ok:  # type: ignore
-        raise RuntimeError("Response from zlibrary's search page is not OK.")
-    dom = html.fromstring(resp.text)  # type: ignore
-
     for year in eval_xpath_list(dom, "//div[@id='advSearch-noJS']//select[@id='sf_yearFrom']/option"):
         engine_traits.custom["year_from"].append(year.get("value"))
author	Markus Heiser <markus.heiser@darmarit.de>	2024-06-25 14:15:24 +0200
committer	Markus Heiser <markus.heiser@darmarIT.de>	2024-06-25 14:40:19 +0200
commit	39ffec87b7f8631bb0ab3ba74971fbd4bcfa7520 (patch)
tree	08c86b0275eb5d9403ad076ea6106e0ec6c4fc76 /searx
parent	b8fa4d61958c5c78d312376bfcfc5d2c5863ab69 (diff)
download	searxng-39ffec87b7f8631bb0ab3ba74971fbd4bcfa7520.tar.gz searxng-39ffec87b7f8631bb0ab3ba74971fbd4bcfa7520.zip