summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2024-11-17 10:02:15 +0100
committerMarkus Heiser <markus.heiser@darmarIT.de>2024-11-17 18:14:22 +0100
commit10d3af84b833ab2f2d1095efa3a7ba240ffb32fc (patch)
tree808161417c6e17d0e16afbb5d2a6ea6a4ad0ae9d
parent4b57bc3db19b373d28b55db00d728a0c610ba9af (diff)
downloadsearxng-10d3af84b833ab2f2d1095efa3a7ba240ffb32fc.tar.gz
searxng-10d3af84b833ab2f2d1095efa3a7ba240ffb32fc.zip
[fix] engine: duckduckgo - don't quote query string
The query string send to DDG must not be qouted. The query string was URL-qouted in #4011, but the URL-qouted query string result in unexpected *URL decoded* and other garbish results as reported in #4019 and #4020. To test compare the results of a query like:: !ddg Häuser und Straßen :de !ddg Häuser und Straßen :all !ddg 房屋和街道 :all !ddg 房屋和街道 :zh Closed: - [#4019] https://github.com/searxng/searxng/issues/4019 - [#4020] https://github.com/searxng/searxng/issues/4020 Related: - [#4011] https://github.com/searxng/searxng/pull/4011 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
-rw-r--r--searx/engines/duckduckgo.py10
1 files changed, 7 insertions, 3 deletions
diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index a7226043b..36125556b 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -6,7 +6,7 @@ DuckDuckGo Lite
from typing import TYPE_CHECKING
import re
-from urllib.parse import urlencode, quote_plus
+from urllib.parse import urlencode
import json
import babel
import lxml.html
@@ -263,7 +263,7 @@ def request(query, params):
params['url'] = url
params['method'] = 'POST'
- params['data']['q'] = quote_plus(query)
+ params['data']['q'] = query
# The API is not documented, so we do some reverse engineering and emulate
# what https://html.duckduckgo.com/html does when you press "next Page" link
@@ -381,7 +381,11 @@ def response(resp):
zero_click_info_xpath = '//div[@id="zero_click_abstract"]'
zero_click = extract_text(eval_xpath(doc, zero_click_info_xpath)).strip()
- if zero_click and "Your IP address is" not in zero_click and "Your user agent:" not in zero_click:
+ if zero_click and (
+ "Your IP address is" not in zero_click
+ and "Your user agent:" not in zero_click
+ and "URL Decoded:" not in zero_click
+ ):
current_query = resp.search_params["data"].get("q")
results.append(