diff options
author | Alexandre FLAMENT <alexandre.flament@hesge.ch> | 2022-09-02 07:33:20 +0000 |
---|---|---|
committer | Markus Heiser <markus.heiser@darmarit.de> | 2022-09-04 09:07:28 +0200 |
commit | dd0887be186d208846cdc7c3df13dde020dfa957 (patch) | |
tree | 2d2518e194b905407c396d70b2f8f40864869ec8 /searx/engines/xpath.py | |
parent | a15dfa5ee11228881f7a846f89196cbdb69021fb (diff) | |
download | searxng-dd0887be186d208846cdc7c3df13dde020dfa957.tar.gz searxng-dd0887be186d208846cdc7c3df13dde020dfa957.zip |
xpath engine: change raise_for_httperror to no_result_for_http_status
no_result_for_http_status contains a list of HTTP status.
These HTTP status are seen an empty result list.
In other cases an exception is thrown as usual.
Previously raise_for_httperror were ignoring all HTTP error,
which make defective engines invisible in the stats.
Diffstat (limited to 'searx/engines/xpath.py')
-rw-r--r-- | searx/engines/xpath.py | 23 |
1 files changed, 18 insertions, 5 deletions
diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index 97656705a..f9528e92d 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -22,6 +22,7 @@ from urllib.parse import urlencode from lxml import html from searx.utils import extract_text, extract_url, eval_xpath, eval_xpath_list +from searx.network import raise_for_httperror search_url = None """ @@ -60,9 +61,14 @@ lang_all = 'en' '''Replacement ``{lang}`` in :py:obj:`search_url` if language ``all`` is selected. ''' -raise_for_httperror = True -'''True by default: raise an exception if the HTTP code of response is ``>= -300``''' + +no_result_for_http_status = [] +'''Return empty result for these HTTP status codes instead of throwing an error. + +.. code:: yaml + + no_result_for_http_status: [] +''' soft_max_redirects = 0 '''Maximum redirects, soft limit. Record an error but don't stop the engine''' @@ -179,12 +185,19 @@ def request(query, params): params['url'] = search_url.format(**fargs) params['soft_max_redirects'] = soft_max_redirects - params['raise_for_httperror'] = raise_for_httperror + + params['raise_for_httperror'] = False + return params -def response(resp): +def response(resp): # pylint: disable=too-many-branches '''Scrap *results* from the response (see :ref:`engine results`).''' + if no_result_for_http_status and resp.status_code in no_result_for_http_status: + return [] + + raise_for_httperror(resp) + results = [] dom = html.fromstring(resp.text) is_onion = 'onions' in categories |