[fix] engine & network issues / documentation and type annotations

This patch fixes some quirks and issues related to the engines and the network. Each engine has its own network and this network was broken for the following engines[1]: - archlinux - bing - dailymotion - duckduckgo - google - peertube - startpage - wikipedia Since the files have been touched anyway, the type annotaions of the engine modules has also been completed so that error messages from the type checker are no longer reported. Related and (partial) fixed issue: - [1] https://github.com/searxng/searxng/issues/762#issuecomment-1605323861 - [2] https://github.com/searxng/searxng/issues/2513 - [3] https://github.com/searxng/searxng/issues/2515 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
author: Markus Heiser <markus.heiser@darmarit.de> 2023-06-25 12:37:31 +0200
committer: Markus Heiser <markus.heiser@darmarIT.de> 2023-06-25 13:58:26 +0200
commit: e8706fb738da9feb21e596f403dddb40e69c8a7b (patch)
tree: 1ddf3dbd2860d65de879d9feecf7df01a7727680 /searx/engines/startpage.py
parent: 2e4a435134e0f677fbe24853dd81453a54770674 (diff)
download: searxng-e8706fb738da9feb21e596f403dddb40e69c8a7b.tar.gz
searxng-e8706fb738da9feb21e596f403dddb40e69c8a7b.zip
1 files changed, 15 insertions, 13 deletions
diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py
index 2813d0bf3..92d69867a 100644
--- a/searx/engines/startpage.py
+++ b/searx/engines/startpage.py
@@ -91,8 +91,8 @@ import dateutil.parser
 import lxml.html
 import babel
 
-from searx import network
 from searx.utils import extract_text, eval_xpath, gen_useragent
+from searx.network import get  # see https://github.com/searxng/searxng/issues/762
 from searx.exceptions import SearxEngineCaptchaException
 from searx.locales import region_tag
 from searx.enginelib.traits import EngineTraits
@@ -211,25 +211,25 @@ def get_sc_code(searxng_locale, params):
     get_sc_url = base_url + '/?sc=%s' % (sc_code)
     logger.debug("query new sc time-stamp ... %s", get_sc_url)
     logger.debug("headers: %s", headers)
-    resp = network.get(get_sc_url, headers=headers)
+    resp = get(get_sc_url, headers=headers)
 
     # ?? x = network.get('https://www.startpage.com/sp/cdn/images/filter-chevron.svg', headers=headers)
     # ?? https://www.startpage.com/sp/cdn/images/filter-chevron.svg
     # ?? ping-back URL: https://www.startpage.com/sp/pb?sc=TLsB0oITjZ8F21
 
-    if str(resp.url).startswith('https://www.startpage.com/sp/captcha'):
+    if str(resp.url).startswith('https://www.startpage.com/sp/captcha'):  # type: ignore
         raise SearxEngineCaptchaException(
             message="get_sc_code: got redirected to https://www.startpage.com/sp/captcha",
         )
 
-    dom = lxml.html.fromstring(resp.text)
+    dom = lxml.html.fromstring(resp.text)  # type: ignore
 
     try:
         sc_code = eval_xpath(dom, search_form_xpath + '//input[@name="sc"]/@value')[0]
     except IndexError as exc:
         logger.debug("suspend startpage API --> https://github.com/searxng/searxng/pull/695")
         raise SearxEngineCaptchaException(
-            message="get_sc_code: [PR-695] query new sc time-stamp failed! (%s)" % resp.url,
+            message="get_sc_code: [PR-695] query new sc time-stamp failed! (%s)" % resp.url,  # type: ignore
         ) from exc
 
     sc_code_ts = time()
@@ -350,7 +350,7 @@ def _response_cat_web(dom):
         title = extract_text(link)
 
         if eval_xpath(result, content_xpath):
-            content = extract_text(eval_xpath(result, content_xpath))
+            content: str = extract_text(eval_xpath(result, content_xpath))  # type: ignore
         else:
             content = ''
 
@@ -374,7 +374,7 @@ def _response_cat_web(dom):
             date_string = content[0 : date_pos - 5]
 
             # calculate datetime
-            published_date = datetime.now() - timedelta(days=int(re.match(r'\d+', date_string).group()))
+            published_date = datetime.now() - timedelta(days=int(re.match(r'\d+', date_string).group()))  # type: ignore
 
             # fix content string
             content = content[date_pos:]
@@ -399,12 +399,12 @@ def fetch_traits(engine_traits: EngineTraits):
         'User-Agent': gen_useragent(),
         'Accept-Language': "en-US,en;q=0.5",  # bing needs to set the English language
     }
-    resp = network.get('https://www.startpage.com/do/settings', headers=headers)
+    resp = get('https://www.startpage.com/do/settings', headers=headers)
 
-    if not resp.ok:
+    if not resp.ok:  # type: ignore
         print("ERROR: response from Startpage is not OK.")
 
-    dom = lxml.html.fromstring(resp.text)
+    dom = lxml.html.fromstring(resp.text)  # type: ignore
 
     # regions
 
@@ -443,8 +443,10 @@ def fetch_traits(engine_traits: EngineTraits):
 
     # get the native name of every language known by babel
 
-    for lang_code in filter(lambda lang_code: lang_code.find('_') == -1, babel.localedata.locale_identifiers()):
-        native_name = babel.Locale(lang_code).get_language_name().lower()
+    for lang_code in filter(
+        lambda lang_code: lang_code.find('_') == -1, babel.localedata.locale_identifiers()  # type: ignore
+    ):
+        native_name = babel.Locale(lang_code).get_language_name().lower()  # type: ignore
         # add native name exactly as it is
         catalog_engine2code[native_name] = lang_code
 
@@ -478,7 +480,7 @@ def fetch_traits(engine_traits: EngineTraits):
         eng_tag = option.get('value')
         if eng_tag in skip_eng_tags:
             continue
-        name = extract_text(option).lower()
+        name = extract_text(option).lower()  # type: ignore
 
         sxng_tag = catalog_engine2code.get(eng_tag)
         if sxng_tag is None:
author	Markus Heiser <markus.heiser@darmarit.de>	2023-06-25 12:37:31 +0200
committer	Markus Heiser <markus.heiser@darmarIT.de>	2023-06-25 13:58:26 +0200
commit	e8706fb738da9feb21e596f403dddb40e69c8a7b (patch)
tree	1ddf3dbd2860d65de879d9feecf7df01a7727680 /searx/engines/startpage.py
parent	2e4a435134e0f677fbe24853dd81453a54770674 (diff)
download	searxng-e8706fb738da9feb21e596f403dddb40e69c8a7b.tar.gz searxng-e8706fb738da9feb21e596f403dddb40e69c8a7b.zip