diff options
author | Markus Heiser <markus.heiser@darmarit.de> | 2024-10-19 14:19:27 +0200 |
---|---|---|
committer | Markus Heiser <markus.heiser@darmarIT.de> | 2024-10-19 14:55:44 +0200 |
commit | 050451347b021d05d26c7a0797c790bbd83442e4 (patch) | |
tree | ea7be5222edf495bb7ca46be0de5b344417ca43d /searx/engines/duckduckgo.py | |
parent | 88caa1d7dbc44aa421f8d8713c3174a501a3f364 (diff) | |
download | searxng-050451347b021d05d26c7a0797c790bbd83442e4.tar.gz searxng-050451347b021d05d26c7a0797c790bbd83442e4.zip |
[fix] engine: duckduckgo - CAPTCHA detection
The previous implementation could not distinguish a CAPTCHA response from an
ordinary result list. In the previous implementation a CAPTCHA was taken as a
result list where no items are in.
DDG does not block IPs. Instead, a CAPTCHA wall is placed in front of request
on a dubious request.
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/engines/duckduckgo.py')
-rw-r--r-- | searx/engines/duckduckgo.py | 11 |
1 files changed, 11 insertions, 0 deletions
diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index 27171778d..2a917ed7a 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -25,6 +25,7 @@ from searx.network import get # see https://github.com/searxng/searxng/issues/7 from searx import redisdb from searx.enginelib.traits import EngineTraits from searx.utils import extr +from searx.exceptions import SearxEngineCaptchaException if TYPE_CHECKING: import logging @@ -292,6 +293,15 @@ def request(query, params): return params +def detect_ddg_captcha(dom): + """In case of CAPTCHA ddg open its own *not a Robot* dialog and is + not redirected to CAPTCHA page. + """ + if eval_xpath(dom, "//form[@id='challenge-form']"): + # set suspend time to zero is OK --> ddg does not block the IP + raise SearxEngineCaptchaException(suspended_time=0) + + def response(resp): if resp.status_code == 303: @@ -299,6 +309,7 @@ def response(resp): results = [] doc = lxml.html.fromstring(resp.text) + detect_ddg_captcha(doc) result_table = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table') |