summaryrefslogtreecommitdiff
path: root/searx
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarIT.de>2022-08-01 17:59:49 +0200
committerGitHub <noreply@github.com>2022-08-01 17:59:49 +0200
commit7c9c112484ff091a7c11df1dbb645616b57d662f (patch)
tree844dc7ca4d31f0ff97c07d1817dbfba591420b30 /searx
parent1fbb514a4ead209c95b4ddca0430f754a4c11554 (diff)
parent8df1f0c47e03fe7525c40a2856dba950bab8998b (diff)
downloadsearxng-7c9c112484ff091a7c11df1dbb645616b57d662f.tar.gz
searxng-7c9c112484ff091a7c11df1dbb645616b57d662f.zip
Merge pull request #1560 from return42/http-accept-language
[mod] add 'Accept-Language' HTTP header to online processores
Diffstat (limited to 'searx')
-rw-r--r--searx/engines/__init__.py1
-rw-r--r--searx/engines/bing.py2
-rw-r--r--searx/engines/bing_images.py1
-rw-r--r--searx/engines/bing_news.py1
-rw-r--r--searx/engines/bing_videos.py5
-rw-r--r--searx/engines/demo_online.py1
-rw-r--r--searx/engines/duckduckgo.py1
-rw-r--r--searx/engines/duckduckgo_definitions.py3
-rw-r--r--searx/engines/duckduckgo_images.py1
-rw-r--r--searx/engines/google.py11
-rw-r--r--searx/engines/google_images.py2
-rw-r--r--searx/engines/google_news.py2
-rw-r--r--searx/engines/google_play_apps.py2
-rw-r--r--searx/engines/google_scholar.py2
-rw-r--r--searx/engines/google_videos.py2
-rw-r--r--searx/engines/openstreetmap.py6
-rw-r--r--searx/engines/wikipedia.py6
-rw-r--r--searx/search/models.py9
-rw-r--r--searx/search/processors/abstract.py7
-rw-r--r--searx/search/processors/online.py11
-rw-r--r--searx/search/processors/online_currency.py3
-rw-r--r--searx/search/processors/online_dictionary.py2
-rw-r--r--searx/search/processors/online_url_search.py3
-rw-r--r--searx/settings.yml1
24 files changed, 59 insertions, 26 deletions
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index 3fb0bcfb1..07d5b226c 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -44,6 +44,7 @@ ENGINE_DEFAULT_ARGS = {
"enable_http": False,
"using_tor_proxy": False,
"display_error_messages": True,
+ "send_accept_language_header": False,
"tokens": [],
"about": {},
}
diff --git a/searx/engines/bing.py b/searx/engines/bing.py
index 3d4ac08bd..8d024fed0 100644
--- a/searx/engines/bing.py
+++ b/searx/engines/bing.py
@@ -25,6 +25,7 @@ categories = ['general', 'web']
paging = True
time_range_support = False
safesearch = False
+send_accept_language_header = True
supported_languages_url = 'https://www.bing.com/account/general'
language_aliases = {}
@@ -68,7 +69,6 @@ def request(query, params):
logger.debug("headers.Referer --> %s", referer)
params['url'] = base_url + search_path
- params['headers']['Accept-Language'] = "en-US,en;q=0.5"
params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
return params
diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py
index cb69dc172..107ce3cff 100644
--- a/searx/engines/bing_images.py
+++ b/searx/engines/bing_images.py
@@ -31,6 +31,7 @@ categories = ['images', 'web']
paging = True
safesearch = True
time_range_support = True
+send_accept_language_header = True
supported_languages_url = 'https://www.bing.com/account/general'
number_of_results = 28
diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py
index 22856541b..7eea17bb4 100644
--- a/searx/engines/bing_news.py
+++ b/searx/engines/bing_news.py
@@ -34,6 +34,7 @@ about = {
categories = ['news']
paging = True
time_range_support = True
+send_accept_language_header = True
# search-url
base_url = 'https://www.bing.com/'
diff --git a/searx/engines/bing_videos.py b/searx/engines/bing_videos.py
index ae8e8d49a..9be8eeaef 100644
--- a/searx/engines/bing_videos.py
+++ b/searx/engines/bing_videos.py
@@ -30,6 +30,7 @@ categories = ['videos', 'web']
paging = True
safesearch = True
time_range_support = True
+send_accept_language_header = True
number_of_results = 28
base_url = 'https://www.bing.com/'
@@ -70,10 +71,6 @@ def request(query, params):
if params['time_range'] in time_range_dict:
params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']])
- # bing videos did not like "older" versions < 70.0.1 when selectin other
- # languages then 'en' .. very strange ?!?!
- params['headers']['User-Agent'] = 'Mozilla/5.0 (X11; Linux x86_64; rv:73.0.1) Gecko/20100101 Firefox/73.0.1'
-
return params
diff --git a/searx/engines/demo_online.py b/searx/engines/demo_online.py
index ee39a2f5a..08add5371 100644
--- a/searx/engines/demo_online.py
+++ b/searx/engines/demo_online.py
@@ -20,6 +20,7 @@ from json import loads
from urllib.parse import urlencode
engine_type = 'online'
+send_accept_language_header = True
categories = ['general']
disabled = True
timeout = 2.0
diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index 71da72677..17f0fae1c 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -31,6 +31,7 @@ categories = ['general', 'web']
paging = True
supported_languages_url = 'https://duckduckgo.com/util/u588.js'
time_range_support = True
+send_accept_language_header = True
language_aliases = {
'ar-SA': 'ar-XA',
diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py
index ad3c92169..a73ee55ff 100644
--- a/searx/engines/duckduckgo_definitions.py
+++ b/searx/engines/duckduckgo_definitions.py
@@ -27,6 +27,8 @@ about = {
"results": 'JSON',
}
+send_accept_language_header = True
+
URL = 'https://api.duckduckgo.com/' + '?{query}&format=json&pretty=0&no_redirect=1&d=1'
WIKIDATA_PREFIX = ['http://www.wikidata.org/entity/', 'https://www.wikidata.org/entity/']
@@ -62,7 +64,6 @@ def request(query, params):
params['url'] = URL.format(query=urlencode({'q': query}))
language = match_language(params['language'], supported_languages, language_aliases)
language = language.split('-')[0]
- params['headers']['Accept-Language'] = language
return params
diff --git a/searx/engines/duckduckgo_images.py b/searx/engines/duckduckgo_images.py
index 7d844b543..19f649ef4 100644
--- a/searx/engines/duckduckgo_images.py
+++ b/searx/engines/duckduckgo_images.py
@@ -30,6 +30,7 @@ about = {
categories = ['images', 'web']
paging = True
safesearch = True
+send_accept_language_header = True
# search-url
images_url = 'https://duckduckgo.com/i.js?{query}&s={offset}&p={safesearch}&o=json&vqd={vqd}'
diff --git a/searx/engines/google.py b/searx/engines/google.py
index 0d116db9f..5e80f6dcc 100644
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -45,6 +45,7 @@ categories = ['general', 'web']
paging = True
time_range_support = True
safesearch = True
+send_accept_language_header = True
use_mobile_ui = False
supported_languages_url = 'https://www.google.com/preferences?#languages'
@@ -241,16 +242,6 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
# language.
ret_val['params']['lr'] = "lang_" + lang_list.get(lang_country, language)
- # Accept-Language: fr-CH, fr;q=0.8, en;q=0.6, *;q=0.5
- ret_val['headers']['Accept-Language'] = ','.join(
- [
- lang_country,
- language + ';q=0.8,',
- 'en;q=0.6',
- '*;q=0.5',
- ]
- )
-
return ret_val
diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py
index a65c0ce37..e1f676dd6 100644
--- a/searx/engines/google_images.py
+++ b/searx/engines/google_images.py
@@ -51,6 +51,7 @@ paging = False
use_locale_domain = True
time_range_support = True
safesearch = True
+send_accept_language_header = True
filter_mapping = {0: 'images', 1: 'active', 2: 'active'}
@@ -125,7 +126,6 @@ def request(query, params):
"""Google-Video search request"""
lang_info = get_lang_info(params, supported_languages, language_aliases, False)
- logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
query_url = (
'https://'
diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py
index 0f97f9289..8f5a4b104 100644
--- a/searx/engines/google_news.py
+++ b/searx/engines/google_news.py
@@ -70,13 +70,13 @@ time_range_support = True
#
# safesearch : results are identitical for safesearch=0 and safesearch=2
safesearch = False
+send_accept_language_header = True
def request(query, params):
"""Google-News search request"""
lang_info = get_lang_info(params, supported_languages, language_aliases, False)
- logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
# google news has only one domain
lang_info['subdomain'] = 'news.google.com'
diff --git a/searx/engines/google_play_apps.py b/searx/engines/google_play_apps.py
index 226e48dab..6506a446a 100644
--- a/searx/engines/google_play_apps.py
+++ b/searx/engines/google_play_apps.py
@@ -22,6 +22,8 @@ about = {
}
categories = ["files", "apps"]
+send_accept_language_header = True
+
search_url = "https://play.google.com/store/search?{query}&c=apps"
diff --git a/searx/engines/google_scholar.py b/searx/engines/google_scholar.py
index f9c73097d..41c62886b 100644
--- a/searx/engines/google_scholar.py
+++ b/searx/engines/google_scholar.py
@@ -52,6 +52,7 @@ language_support = True
use_locale_domain = True
time_range_support = True
safesearch = False
+send_accept_language_header = True
def time_range_url(params):
@@ -75,7 +76,6 @@ def request(query, params):
offset = (params['pageno'] - 1) * 10
lang_info = get_lang_info(params, supported_languages, language_aliases, False)
- logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
# subdomain is: scholar.google.xy
lang_info['subdomain'] = lang_info['subdomain'].replace("www.", "scholar.")
diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py
index 6eb051e0a..26dbcdd3c 100644
--- a/searx/engines/google_videos.py
+++ b/searx/engines/google_videos.py
@@ -60,6 +60,7 @@ language_support = True
use_locale_domain = True
time_range_support = True
safesearch = True
+send_accept_language_header = True
RE_CACHE = {}
@@ -111,7 +112,6 @@ def request(query, params):
"""Google-Video search request"""
lang_info = get_lang_info(params, supported_languages, language_aliases, False)
- logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
query_url = (
'https://'
diff --git a/searx/engines/openstreetmap.py b/searx/engines/openstreetmap.py
index d44792077..4f799fce7 100644
--- a/searx/engines/openstreetmap.py
+++ b/searx/engines/openstreetmap.py
@@ -30,6 +30,7 @@ about = {
categories = ['map']
paging = False
language_support = True
+send_accept_language_header = True
# search-url
base_url = 'https://nominatim.openstreetmap.org/'
@@ -142,9 +143,8 @@ def request(query, params):
params['url'] = base_url + search_string.format(query=urlencode({'q': query}))
params['route'] = route_re.match(query)
params['headers']['User-Agent'] = searx_useragent()
-
- accept_language = 'en' if params['language'] == 'all' else params['language']
- params['headers']['Accept-Language'] = accept_language
+ if 'Accept-Language' not in params['headers']:
+ params['headers']['Accept-Language'] = 'en'
return params
diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py
index cc806a8de..52b1053ed 100644
--- a/searx/engines/wikipedia.py
+++ b/searx/engines/wikipedia.py
@@ -19,6 +19,9 @@ about = {
"results": 'JSON',
}
+
+send_accept_language_header = True
+
# search-url
search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
@@ -41,9 +44,6 @@ def request(query, params):
language = url_lang(params['language'])
params['url'] = search_url.format(title=quote(query), language=language)
- if params['language'].lower() in language_variants.get(language, []):
- params['headers']['Accept-Language'] = params['language'].lower()
-
params['headers']['User-Agent'] = searx_useragent()
params['raise_for_httperror'] = False
params['soft_max_redirects'] = 2
diff --git a/searx/search/models.py b/searx/search/models.py
index ff5897966..bbca1cd1d 100644
--- a/searx/search/models.py
+++ b/searx/search/models.py
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
import typing
+import babel
class EngineRef:
@@ -29,6 +30,7 @@ class SearchQuery:
'query',
'engineref_list',
'lang',
+ 'locale',
'safesearch',
'pageno',
'time_range',
@@ -59,6 +61,13 @@ class SearchQuery:
self.external_bang = external_bang
self.engine_data = engine_data or {}
+ self.locale = None
+ if self.lang:
+ try:
+ self.locale = babel.Locale.parse(self.lang, sep='-')
+ except babel.core.UnknownLocaleError:
+ pass
+
@property
def categories(self):
return list(set(map(lambda engineref: engineref.category, self.engineref_list)))
diff --git a/searx/search/processors/abstract.py b/searx/search/processors/abstract.py
index d4822fc56..d74616db0 100644
--- a/searx/search/processors/abstract.py
+++ b/searx/search/processors/abstract.py
@@ -138,6 +138,13 @@ class EngineProcessor(ABC):
return False
def get_params(self, search_query, engine_category):
+ """Returns a set of *request params* or ``None`` if request is not supported.
+
+ Not supported conditions (``None`` is returned):
+
+ - A page-number > 1 when engine does not support paging.
+ - A time range when the engine does not support time range.
+ """
# if paging is not supported, skip
if search_query.pageno > 1 and not self.engine.paging:
return None
diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py
index 0cfe6e123..17e9b6a96 100644
--- a/searx/search/processors/online.py
+++ b/searx/search/processors/online.py
@@ -60,6 +60,17 @@ class OnlineProcessor(EngineProcessor):
# add an user agent
params['headers']['User-Agent'] = gen_useragent()
+ # add Accept-Language header
+ if self.engine.send_accept_language_header and search_query.locale:
+ ac_lang = search_query.locale.language
+ if search_query.locale.territory:
+ ac_lang = "%s-%s,%s;q=0.9,*;q=0.5" % (
+ search_query.locale.language,
+ search_query.locale.territory,
+ search_query.locale.language,
+ )
+ params['headers']['Accept-Language'] = ac_lang
+
return params
def _send_http_request(self, params):
diff --git a/searx/search/processors/online_currency.py b/searx/search/processors/online_currency.py
index 6bd891b1d..92398239f 100644
--- a/searx/search/processors/online_currency.py
+++ b/searx/search/processors/online_currency.py
@@ -38,6 +38,9 @@ class OnlineCurrencyProcessor(OnlineProcessor):
engine_type = 'online_currency'
def get_params(self, search_query, engine_category):
+ """Returns a set of *request params* or ``None`` if search query does not match
+ to :py:obj:`parser_re`."""
+
params = super().get_params(search_query, engine_category)
if params is None:
return None
diff --git a/searx/search/processors/online_dictionary.py b/searx/search/processors/online_dictionary.py
index 77540de9a..fbfc9df8e 100644
--- a/searx/search/processors/online_dictionary.py
+++ b/searx/search/processors/online_dictionary.py
@@ -18,6 +18,8 @@ class OnlineDictionaryProcessor(OnlineProcessor):
engine_type = 'online_dictionary'
def get_params(self, search_query, engine_category):
+ """Returns a set of *request params* or ``None`` if search query does not match
+ to :py:obj:`parser_re`."""
params = super().get_params(search_query, engine_category)
if params is None:
return None
diff --git a/searx/search/processors/online_url_search.py b/searx/search/processors/online_url_search.py
index 2863be28e..6383fa37f 100644
--- a/searx/search/processors/online_url_search.py
+++ b/searx/search/processors/online_url_search.py
@@ -20,6 +20,9 @@ class OnlineUrlSearchProcessor(OnlineProcessor):
engine_type = 'online_url_search'
def get_params(self, search_query, engine_category):
+ """Returns a set of *request params* or ``None`` if search query does not match
+ to at least one of :py:obj:`re_search_urls`.
+ """
params = super().get_params(search_query, engine_category)
if params is None:
return None
diff --git a/searx/settings.yml b/searx/settings.yml
index 949550831..d98828ae1 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -748,6 +748,7 @@ engines:
- name: google play movies
engine: xpath
+ send_accept_language_header: true
search_url: https://play.google.com/store/search?q={query}&c=movies
results_xpath: '//div[@class="ImZGtf mpg5gc"]'
title_xpath: './/div[@class="RZEgze"]//div[@class="kCSSQe"]//a'