summaryrefslogtreecommitdiff
path: root/searx/engines/duckduckgo.py
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2024-10-19 14:19:27 +0200
committerMarkus Heiser <markus.heiser@darmarIT.de>2024-10-19 14:55:44 +0200
commit050451347b021d05d26c7a0797c790bbd83442e4 (patch)
treeea7be5222edf495bb7ca46be0de5b344417ca43d /searx/engines/duckduckgo.py
parent88caa1d7dbc44aa421f8d8713c3174a501a3f364 (diff)
downloadsearxng-050451347b021d05d26c7a0797c790bbd83442e4.tar.gz
searxng-050451347b021d05d26c7a0797c790bbd83442e4.zip
[fix] engine: duckduckgo - CAPTCHA detection
The previous implementation could not distinguish a CAPTCHA response from an ordinary result list. In the previous implementation a CAPTCHA was taken as a result list where no items are in. DDG does not block IPs. Instead, a CAPTCHA wall is placed in front of request on a dubious request. Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/engines/duckduckgo.py')
-rw-r--r--searx/engines/duckduckgo.py11
1 files changed, 11 insertions, 0 deletions
diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index 27171778d..2a917ed7a 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -25,6 +25,7 @@ from searx.network import get # see https://github.com/searxng/searxng/issues/7
from searx import redisdb
from searx.enginelib.traits import EngineTraits
from searx.utils import extr
+from searx.exceptions import SearxEngineCaptchaException
if TYPE_CHECKING:
import logging
@@ -292,6 +293,15 @@ def request(query, params):
return params
+def detect_ddg_captcha(dom):
+ """In case of CAPTCHA ddg open its own *not a Robot* dialog and is
+ not redirected to CAPTCHA page.
+ """
+ if eval_xpath(dom, "//form[@id='challenge-form']"):
+ # set suspend time to zero is OK --> ddg does not block the IP
+ raise SearxEngineCaptchaException(suspended_time=0)
+
+
def response(resp):
if resp.status_code == 303:
@@ -299,6 +309,7 @@ def response(resp):
results = []
doc = lxml.html.fromstring(resp.text)
+ detect_ddg_captcha(doc)
result_table = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table')