summaryrefslogtreecommitdiff
path: root/searx/engines
diff options
context:
space:
mode:
Diffstat (limited to 'searx/engines')
-rw-r--r--searx/engines/duckduckgo.py63
-rw-r--r--searx/engines/duckduckgo_extra.py9
2 files changed, 53 insertions, 19 deletions
diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index 3ce629eec..5ae456b04 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -61,17 +61,7 @@ form_data = {'v': 'l', 'api': 'd.js', 'o': 'json'}
def cache_vqd(query, value):
- """Caches a ``vqd`` value from a query.
-
- The vqd value depends on the query string and is needed for the follow up
- pages or the images loaded by a XMLHttpRequest:
-
- - DuckDuckGo Web: ``https://links.duckduckgo.com/d.js?q=...&vqd=...``
- - DuckDuckGo Images: ``https://duckduckgo.com/i.js??q=...&vqd=...``
- - DuckDuckGo Videos: ``https://duckduckgo.com/v.js??q=...&vqd=...``
- - DuckDuckGo News: ``https://duckduckgo.com/news.js??q=...&vqd=...``
-
- """
+ """Caches a ``vqd`` value from a query."""
c = redisdb.client()
if c:
logger.debug("cache vqd value: %s", value)
@@ -84,13 +74,43 @@ def get_vqd(query):
(:py:obj:`cache_vqd`) the query is sent to DDG to get a vqd value from the
response.
+ .. hint::
+
+ If an empty string is returned there are no results for the ``query`` and
+ therefore no ``vqd`` value.
+
+ DDG's bot detection is sensitive to the ``vqd`` value. For some search terms
+ (such as extremely long search terms that are often sent by bots), no ``vqd``
+ value can be determined.
+
+ If SearXNG cannot determine a ``vqd`` value, then no request should go out
+ to DDG:
+
+ A request with a wrong ``vqd`` value leads to DDG temporarily putting
+ SearXNG's IP on a block list.
+
+ Requests from IPs in this block list run into timeouts.
+
+ Not sure, but it seems the block list is a sliding window: to get my IP rid
+ from the bot list I had to cool down my IP for 1h (send no requests from
+ that IP to DDG).
+
+ TL;DR; the ``vqd`` value is needed to pass DDG's bot protection and is used
+ by all request to DDG:
+
+ - DuckDuckGo Lite: ``https://lite.duckduckgo.com/lite`` (POST form data)
+ - DuckDuckGo Web: ``https://links.duckduckgo.com/d.js?q=...&vqd=...``
+ - DuckDuckGo Images: ``https://duckduckgo.com/i.js??q=...&vqd=...``
+ - DuckDuckGo Videos: ``https://duckduckgo.com/v.js??q=...&vqd=...``
+ - DuckDuckGo News: ``https://duckduckgo.com/news.js??q=...&vqd=...``
+
"""
- value = None
+ value = ''
c = redisdb.client()
if c:
key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query)
value = c.get(key)
- if value:
+ if value or value == b'':
value = value.decode('utf-8')
logger.debug("re-use cached vqd value: %s", value)
return value
@@ -102,9 +122,11 @@ def get_vqd(query):
if value:
value = value[0]
else:
- # some search terms do not have results and therefore no vqd value
+ # Some search terms do not have results and therefore no vqd value. If
+ # no vqd value can be determined for the search term, an empty string is
+ # chached.
value = ''
- logger.debug("new vqd value: %s", value)
+ logger.debug("new vqd value: '%s'", value)
cache_vqd(query, value)
return value
@@ -204,6 +226,13 @@ ddg_lang_map = {
def request(query, params):
+ # request needs a vqd argument
+ vqd = get_vqd(query)
+ if not vqd:
+ # some search terms do not have results and therefore no vqd value
+ params['url'] = None
+ return params
+
# quote ddg bangs
query_parts = []
# for val in re.split(r'(\s+)', query):
@@ -227,6 +256,7 @@ def request(query, params):
# link again and again ..
params['headers']['Content-Type'] = 'application/x-www-form-urlencoded'
+ params['data']['vqd'] = vqd
# initial page does not have an offset
if params['pageno'] == 2:
@@ -248,9 +278,6 @@ def request(query, params):
params['data']['api'] = form_data.get('api', 'd.js')
params['data']['nextParams'] = form_data.get('nextParams', '')
params['data']['v'] = form_data.get('v', 'l')
-
- # request needs a vqd argument
- params['data']['vqd'] = get_vqd(query)
params['headers']['Referer'] = 'https://lite.duckduckgo.com/'
params['data']['kl'] = eng_region
diff --git a/searx/engines/duckduckgo_extra.py b/searx/engines/duckduckgo_extra.py
index 7e3a3282d..25692add7 100644
--- a/searx/engines/duckduckgo_extra.py
+++ b/searx/engines/duckduckgo_extra.py
@@ -48,6 +48,13 @@ search_path_map = {'images': 'i', 'videos': 'v', 'news': 'news'}
def request(query, params):
+ # request needs a vqd argument
+ vqd = get_vqd(query)
+ if not vqd:
+ # some search terms do not have results and therefore no vqd value
+ params['url'] = None
+ return params
+
eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
eng_lang = get_ddg_lang(traits, params['searxng_locale'])
@@ -57,7 +64,7 @@ def request(query, params):
# 'u': 'bing',
'l': eng_region,
'f': ',,,,,',
- 'vqd': get_vqd(query),
+ 'vqd': vqd,
}
if params['pageno'] > 1: