summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBnyro <bnyro@tutanota.com>2023-10-07 10:26:04 +0200
committerMarkus Heiser <markus.heiser@darmarIT.de>2023-10-09 06:53:43 +0200
commit48cb58bd2ec4eb9cb4ba416f7ece75c3c6c41e55 (patch)
tree76f9f4e619a3391bc571e9f23ae3f6308c8df54c
parentc3ab49cd903d27905d2da6f70699a55c9a74593e (diff)
downloadsearxng-48cb58bd2ec4eb9cb4ba416f7ece75c3c6c41e55.tar.gz
searxng-48cb58bd2ec4eb9cb4ba416f7ece75c3c6c41e55.zip
[feat] duckduckgo: support for videos and news
-rw-r--r--docs/dev/engines/online/duckduckgo.rst2
-rw-r--r--searx/data/engine_traits.json328
-rw-r--r--searx/engines/duckduckgo.py6
-rw-r--r--searx/engines/duckduckgo_extra.py (renamed from searx/engines/duckduckgo_images.py)71
-rw-r--r--searx/settings.yml19
5 files changed, 405 insertions, 21 deletions
diff --git a/docs/dev/engines/online/duckduckgo.rst b/docs/dev/engines/online/duckduckgo.rst
index a73b38983..0f1258ff9 100644
--- a/docs/dev/engines/online/duckduckgo.rst
+++ b/docs/dev/engines/online/duckduckgo.rst
@@ -12,7 +12,7 @@ DuckDuckGo Engines
.. automodule:: searx.engines.duckduckgo
:members:
-.. automodule:: searx.engines.duckduckgo_images
+.. automodule:: searx.engines.duckduckgo_extra
:members:
.. automodule:: searx.engines.duckduckgo_definitions
diff --git a/searx/data/engine_traits.json b/searx/data/engine_traits.json
index aee199b30..9cb47ee38 100644
--- a/searx/data/engine_traits.json
+++ b/searx/data/engine_traits.json
@@ -2390,6 +2390,334 @@
"zh-TW": "tw-tzh"
}
},
+ "duckduckgo videos": {
+ "all_locale": "wt-wt",
+ "custom": {
+ "lang_region": {
+ "ar-DZ": "ar_DZ",
+ "ar-JO": "ar_JO",
+ "ar-SA": "ar_SA",
+ "bn-IN": "bn_IN",
+ "de-CH": "de_CH",
+ "en-AU": "en_AU",
+ "en-CA": "en_CA",
+ "en-GB": "en_GB",
+ "es-AR": "es_AR",
+ "es-CL": "es_CL",
+ "es-CO": "es_CO",
+ "es-CR": "es_CR",
+ "es-EC": "es_EC",
+ "es-MX": "es_MX",
+ "es-PE": "es_PE",
+ "es-UY": "es_UY",
+ "es-VE": "es_VE",
+ "fr-BE": "fr_BE",
+ "fr-CA": "fr_CA",
+ "fr-CH": "fr_CH",
+ "nl-BE": "nl_BE",
+ "pt-BR": "pt_BR"
+ }
+ },
+ "data_type": "traits_v1",
+ "languages": {
+ "af": "af_ZA",
+ "ar": "ar_EG",
+ "ast": "ast_ES",
+ "az_Latn": "az_AZ",
+ "be": "be_BY",
+ "bg": "bg_BG",
+ "bn": "bn_BD",
+ "br": "br_FR",
+ "bs_Latn": "bs_BA",
+ "ca": "ca_ES",
+ "cs": "cs_CZ",
+ "cy": "cy_GB",
+ "da": "da_DK",
+ "de": "de_DE",
+ "el": "el_GR",
+ "en": "en_US",
+ "eo": "eo_XX",
+ "es": "es_ES",
+ "et": "et_EE",
+ "eu": "eu_ES",
+ "fa": "fa_IR",
+ "fi": "fi_FI",
+ "fil": "tl_PH",
+ "fr": "fr_FR",
+ "ga": "ga_IE",
+ "gd": "gd_GB",
+ "gl": "gl_ES",
+ "he": "he_IL",
+ "hi": "hi_IN",
+ "hr": "hr_HR",
+ "hu": "hu_HU",
+ "hy": "hy_AM",
+ "id": "id_ID",
+ "is": "is_IS",
+ "it": "it_IT",
+ "ja": "ja_JP",
+ "kab": "kab_DZ",
+ "kn": "kn_IN",
+ "ko": "ko_KR",
+ "ku": "ku",
+ "kw": "kw_GB",
+ "lt": "lt_LT",
+ "lv": "lv_LV",
+ "ml": "ml_IN",
+ "mr": "mr_IN",
+ "ms": "ms_MY",
+ "nb": "nb_NO",
+ "nl": "nl_NL",
+ "nn": "nn_NO",
+ "pl": "pl_PL",
+ "pt": "pt_PT",
+ "ro": "ro_RO",
+ "ru": "ru_RU",
+ "sc": "sc_IT",
+ "si": "si_LK",
+ "sk": "sk_SK",
+ "sl": "sl_SI",
+ "sq": "sq_AL",
+ "sr_Cyrl": "sr_RS",
+ "sv": "sv_SE",
+ "ta": "ta_IN",
+ "te": "te_IN",
+ "th": "th_TH",
+ "tr": "tr_TR",
+ "uk": "uk_UA",
+ "ur": "ur_PK",
+ "vi": "vi_VN",
+ "zh_Hans": "zh_CN",
+ "zh_Hant": "zh_TW"
+ },
+ "regions": {
+ "ar-SA": "xa-ar",
+ "bg-BG": "bg-bg",
+ "ca-ES": "es-ca",
+ "cs-CZ": "cz-cs",
+ "da-DK": "dk-da",
+ "de-AT": "at-de",
+ "de-CH": "ch-de",
+ "de-DE": "de-de",
+ "el-GR": "gr-el",
+ "en-AU": "au-en",
+ "en-CA": "ca-en",
+ "en-GB": "uk-en",
+ "en-IE": "ie-en",
+ "en-IL": "il-en",
+ "en-IN": "in-en",
+ "en-MY": "my-en",
+ "en-NZ": "nz-en",
+ "en-PH": "ph-en",
+ "en-PK": "pk-en",
+ "en-SG": "sg-en",
+ "en-US": "us-en",
+ "en-ZA": "za-en",
+ "es-AR": "ar-es",
+ "es-CL": "cl-es",
+ "es-CO": "co-es",
+ "es-ES": "es-es",
+ "es-MX": "mx-es",
+ "es-PE": "pe-es",
+ "es-US": "us-es",
+ "et-EE": "ee-et",
+ "fi-FI": "fi-fi",
+ "fr-BE": "be-fr",
+ "fr-CA": "ca-fr",
+ "fr-CH": "ch-fr",
+ "fr-FR": "fr-fr",
+ "hr-HR": "hr-hr",
+ "hu-HU": "hu-hu",
+ "id-ID": "id-en",
+ "it-IT": "it-it",
+ "ja-JP": "jp-jp",
+ "ko-KR": "kr-kr",
+ "lt-LT": "lt-lt",
+ "lv-LV": "lv-lv",
+ "nb-NO": "no-no",
+ "nl-BE": "be-nl",
+ "nl-NL": "nl-nl",
+ "pl-PL": "pl-pl",
+ "pt-BR": "br-pt",
+ "pt-PT": "pt-pt",
+ "ro-RO": "ro-ro",
+ "ru-RU": "ru-ru",
+ "sk-SK": "sk-sk",
+ "sl-SI": "sl-sl",
+ "sv-SE": "se-sv",
+ "th-TH": "th-en",
+ "tr-TR": "tr-tr",
+ "uk-UA": "ua-uk",
+ "vi-VN": "vn-en",
+ "zh-CN": "cn-zh",
+ "zh-HK": "hk-tzh",
+ "zh-TW": "tw-tzh"
+ }
+ },
+ "duckduckgo news": {
+ "all_locale": "wt-wt",
+ "custom": {
+ "lang_region": {
+ "ar-DZ": "ar_DZ",
+ "ar-JO": "ar_JO",
+ "ar-SA": "ar_SA",
+ "bn-IN": "bn_IN",
+ "de-CH": "de_CH",
+ "en-AU": "en_AU",
+ "en-CA": "en_CA",
+ "en-GB": "en_GB",
+ "es-AR": "es_AR",
+ "es-CL": "es_CL",
+ "es-CO": "es_CO",
+ "es-CR": "es_CR",
+ "es-EC": "es_EC",
+ "es-MX": "es_MX",
+ "es-PE": "es_PE",
+ "es-UY": "es_UY",
+ "es-VE": "es_VE",
+ "fr-BE": "fr_BE",
+ "fr-CA": "fr_CA",
+ "fr-CH": "fr_CH",
+ "nl-BE": "nl_BE",
+ "pt-BR": "pt_BR"
+ }
+ },
+ "data_type": "traits_v1",
+ "languages": {
+ "af": "af_ZA",
+ "ar": "ar_EG",
+ "ast": "ast_ES",
+ "az_Latn": "az_AZ",
+ "be": "be_BY",
+ "bg": "bg_BG",
+ "bn": "bn_BD",
+ "br": "br_FR",
+ "bs_Latn": "bs_BA",
+ "ca": "ca_ES",
+ "cs": "cs_CZ",
+ "cy": "cy_GB",
+ "da": "da_DK",
+ "de": "de_DE",
+ "el": "el_GR",
+ "en": "en_US",
+ "eo": "eo_XX",
+ "es": "es_ES",
+ "et": "et_EE",
+ "eu": "eu_ES",
+ "fa": "fa_IR",
+ "fi": "fi_FI",
+ "fil": "tl_PH",
+ "fr": "fr_FR",
+ "ga": "ga_IE",
+ "gd": "gd_GB",
+ "gl": "gl_ES",
+ "he": "he_IL",
+ "hi": "hi_IN",
+ "hr": "hr_HR",
+ "hu": "hu_HU",
+ "hy": "hy_AM",
+ "id": "id_ID",
+ "is": "is_IS",
+ "it": "it_IT",
+ "ja": "ja_JP",
+ "kab": "kab_DZ",
+ "kn": "kn_IN",
+ "ko": "ko_KR",
+ "ku": "ku",
+ "kw": "kw_GB",
+ "lt": "lt_LT",
+ "lv": "lv_LV",
+ "ml": "ml_IN",
+ "mr": "mr_IN",
+ "ms": "ms_MY",
+ "nb": "nb_NO",
+ "nl": "nl_NL",
+ "nn": "nn_NO",
+ "pl": "pl_PL",
+ "pt": "pt_PT",
+ "ro": "ro_RO",
+ "ru": "ru_RU",
+ "sc": "sc_IT",
+ "si": "si_LK",
+ "sk": "sk_SK",
+ "sl": "sl_SI",
+ "sq": "sq_AL",
+ "sr_Cyrl": "sr_RS",
+ "sv": "sv_SE",
+ "ta": "ta_IN",
+ "te": "te_IN",
+ "th": "th_TH",
+ "tr": "tr_TR",
+ "uk": "uk_UA",
+ "ur": "ur_PK",
+ "vi": "vi_VN",
+ "zh_Hans": "zh_CN",
+ "zh_Hant": "zh_TW"
+ },
+ "regions": {
+ "ar-SA": "xa-ar",
+ "bg-BG": "bg-bg",
+ "ca-ES": "es-ca",
+ "cs-CZ": "cz-cs",
+ "da-DK": "dk-da",
+ "de-AT": "at-de",
+ "de-CH": "ch-de",
+ "de-DE": "de-de",
+ "el-GR": "gr-el",
+ "en-AU": "au-en",
+ "en-CA": "ca-en",
+ "en-GB": "uk-en",
+ "en-IE": "ie-en",
+ "en-IL": "il-en",
+ "en-IN": "in-en",
+ "en-MY": "my-en",
+ "en-NZ": "nz-en",
+ "en-PH": "ph-en",
+ "en-PK": "pk-en",
+ "en-SG": "sg-en",
+ "en-US": "us-en",
+ "en-ZA": "za-en",
+ "es-AR": "ar-es",
+ "es-CL": "cl-es",
+ "es-CO": "co-es",
+ "es-ES": "es-es",
+ "es-MX": "mx-es",
+ "es-PE": "pe-es",
+ "es-US": "us-es",
+ "et-EE": "ee-et",
+ "fi-FI": "fi-fi",
+ "fr-BE": "be-fr",
+ "fr-CA": "ca-fr",
+ "fr-CH": "ch-fr",
+ "fr-FR": "fr-fr",
+ "hr-HR": "hr-hr",
+ "hu-HU": "hu-hu",
+ "id-ID": "id-en",
+ "it-IT": "it-it",
+ "ja-JP": "jp-jp",
+ "ko-KR": "kr-kr",
+ "lt-LT": "lt-lt",
+ "lv-LV": "lv-lv",
+ "nb-NO": "no-no",
+ "nl-BE": "be-nl",
+ "nl-NL": "nl-nl",
+ "pl-PL": "pl-pl",
+ "pt-BR": "br-pt",
+ "pt-PT": "pt-pt",
+ "ro-RO": "ro-ro",
+ "ru-RU": "ru-ru",
+ "sk-SK": "sk-sk",
+ "sl-SI": "sl-sl",
+ "sv-SE": "se-sv",
+ "th-TH": "th-en",
+ "tr-TR": "tr-tr",
+ "uk-UA": "ua-uk",
+ "vi-VN": "vn-en",
+ "zh-CN": "cn-zh",
+ "zh-HK": "hk-tzh",
+ "zh-TW": "tw-tzh"
+ }
+ },
"duckduckgo weather": {
"all_locale": "wt-wt",
"custom": {
diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index ebb4745b9..d0e818faf 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -66,8 +66,10 @@ def cache_vqd(query, value):
The vqd value depends on the query string and is needed for the follow up
pages or the images loaded by a XMLHttpRequest:
- - DuckDuckGo Web: `https://links.duckduckgo.com/d.js?q=...&vqd=...`
- - DuckDuckGo Images: `https://duckduckgo.com/i.js??q=...&vqd=...`
+ - DuckDuckGo Web: ``https://links.duckduckgo.com/d.js?q=...&vqd=...``
+ - DuckDuckGo Images: ``https://duckduckgo.com/i.js??q=...&vqd=...``
+ - DuckDuckGo Videos: ``https://duckduckgo.com/v.js??q=...&vqd=...``
+ - DuckDuckGo News: ``https://duckduckgo.com/news.js??q=...&vqd=...``
"""
c = redisdb.client()
diff --git a/searx/engines/duckduckgo_images.py b/searx/engines/duckduckgo_extra.py
index 7e7f133b1..7e3a3282d 100644
--- a/searx/engines/duckduckgo_images.py
+++ b/searx/engines/duckduckgo_extra.py
@@ -1,9 +1,10 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
-DuckDuckGo Images
-~~~~~~~~~~~~~~~~~
+DuckDuckGo Extra (images, videos, news)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
"""
+from datetime import datetime
from typing import TYPE_CHECKING
from urllib.parse import urlencode
@@ -32,6 +33,9 @@ about = {
# engine dependent config
categories = ['images', 'web']
+ddg_category = 'images'
+"""The category must be any of ``images``, ``videos`` and ``news``
+"""
paging = True
safesearch = True
send_accept_language_header = True
@@ -39,6 +43,8 @@ send_accept_language_header = True
safesearch_cookies = {0: '-2', 1: None, 2: '1'}
safesearch_args = {0: '1', 1: None, 2: '1'}
+search_path_map = {'images': 'i', 'videos': 'v', 'news': 'news'}
+
def request(query, params):
@@ -69,28 +75,61 @@ def request(query, params):
args['p'] = safe_search # "-1", "1"
logger.debug("cookies: %s", params['cookies'])
- args = urlencode(args)
- params['url'] = 'https://duckduckgo.com/i.js?{args}'.format(args=args)
+
+ params['url'] = f'https://duckduckgo.com/{search_path_map[ddg_category]}.js?{urlencode(args)}'
return params
+def _image_result(result):
+ return {
+ 'template': 'images.html',
+ 'url': result['url'],
+ 'title': result['title'],
+ 'content': '',
+ 'thumbnail_src': result['thumbnail'],
+ 'img_src': result['image'],
+ 'img_format': '%s x %s' % (result['width'], result['height']),
+ 'source': result['source'],
+ }
+
+
+def _video_result(result):
+ return {
+ 'template': 'videos.html',
+ 'url': result['content'],
+ 'title': result['title'],
+ 'content': result['description'],
+ 'thumbnail': result['images'].get('small') or result['images'].get('medium'),
+ 'iframe_src': result['embed_url'],
+ 'source': result['provider'],
+ 'length': result['duration'],
+ 'metadata': result.get('uploader'),
+ }
+
+
+def _news_result(result):
+ return {
+ 'url': result['url'],
+ 'title': result['title'],
+ 'content': result['excerpt'],
+ 'source': result['source'],
+ 'publishedDate': datetime.utcfromtimestamp(result['date']),
+ }
+
+
def response(resp):
results = []
res_json = resp.json()
for result in res_json['results']:
- results.append(
- {
- 'template': 'images.html',
- 'title': result['title'],
- 'content': '',
- 'thumbnail_src': result['thumbnail'],
- 'img_src': result['image'],
- 'url': result['url'],
- 'img_format': '%s x %s' % (result['width'], result['height']),
- 'source': result['source'],
- }
- )
+ if ddg_category == 'images':
+ results.append(_image_result(result))
+ elif ddg_category == 'videos':
+ results.append(_video_result(result))
+ elif ddg_category == 'news':
+ results.append(_news_result(result))
+ else:
+ raise ValueError(f"Invalid duckduckgo category: {ddg_category}")
return results
diff --git a/searx/settings.yml b/searx/settings.yml
index 5012f07ad..8b6d32301 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -603,9 +603,24 @@ engines:
shortcut: ddg
- name: duckduckgo images
- engine: duckduckgo_images
+ engine: duckduckgo_extra
+ categories: [images, web]
+ ddg_category: images
shortcut: ddi
- timeout: 3.0
+ disabled: true
+
+ - name: duckduckgo videos
+ engine: duckduckgo_extra
+ categories: [videos, web]
+ ddg_category: videos
+ shortcut: ddv
+ disabled: true
+
+ - name: duckduckgo news
+ engine: duckduckgo_extra
+ categories: [news, web]
+ ddg_category: news
+ shortcut: ddn
disabled: true
- name: duckduckgo weather