summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarIT.de>2023-04-25 15:46:26 +0200
committerGitHub <noreply@github.com>2023-04-25 15:46:26 +0200
commit45529f51a11c30e484dcce566108681003ea076d (patch)
tree38cbf0fddd3adcf85d67eceb97045605ba736fc0
parent12df30070be65d655611365042cccb7dabc0cd5e (diff)
parent8adbc4fcecd3c3acb50e719bfe3794615649c793 (diff)
downloadsearxng-45529f51a11c30e484dcce566108681003ea076d.tar.gz
searxng-45529f51a11c30e484dcce566108681003ea076d.zip
Merge pull request #2347 from return42/mod-lang-detection
If language recognition fails use the Accept-Language
-rw-r--r--searx/preferences.py61
-rw-r--r--searx/search/__init__.py43
-rw-r--r--searx/settings.yml2
-rw-r--r--searx/webadapter.py32
-rwxr-xr-xsearx/webapp.py39
5 files changed, 106 insertions, 71 deletions
diff --git a/searx/preferences.py b/searx/preferences.py
index 3da6d5d16..8552305a7 100644
--- a/searx/preferences.py
+++ b/searx/preferences.py
@@ -8,9 +8,10 @@
from base64 import urlsafe_b64encode, urlsafe_b64decode
from zlib import compress, decompress
from urllib.parse import parse_qs, urlencode
-from typing import Iterable, Dict, List
+from typing import Iterable, Dict, List, Optional
import flask
+import babel
from searx import settings, autocomplete
from searx.enginelib import Engine
@@ -287,10 +288,65 @@ class PluginsSetting(BooleanChoices):
return [item[len('plugin_') :] for item in items]
+class ClientPref:
+ """Container to assemble client prefferences and settings."""
+
+ # hint: searx.webapp.get_client_settings should be moved into this class
+
+ locale: babel.Locale
+ """Locale prefered by the client."""
+
+ def __init__(self, locale: Optional[babel.Locale] = None):
+ self.locale = locale
+
+ @property
+ def locale_tag(self):
+ if self.locale is None:
+ return None
+ tag = self.locale.language
+ if self.locale.territory:
+ tag += '-' + self.locale.territory
+ return tag
+
+ @classmethod
+ def from_http_request(cls, http_request: flask.Request):
+ """Build ClientPref object from HTTP request.
+
+ - `Accept-Language used for locale setting
+ <https://www.w3.org/International/questions/qa-accept-lang-locales.en>`__
+
+ """
+ al_header = http_request.headers.get("Accept-Language")
+ if not al_header:
+ return cls(locale=None)
+
+ pairs = []
+ for l in al_header.split(','):
+ # fmt: off
+ lang, qvalue = [_.strip() for _ in (l.split(';') + ['q=1',])[:2]]
+ # fmt: on
+ try:
+ qvalue = float(qvalue.split('=')[-1])
+ locale = babel.Locale.parse(lang, sep='-')
+ except (ValueError, babel.core.UnknownLocaleError):
+ continue
+ pairs.append((locale, qvalue))
+ pairs.sort(reverse=True, key=lambda x: x[1])
+ return cls(locale=pairs[0][0])
+
+
class Preferences:
"""Validates and saves preferences to cookies"""
- def __init__(self, themes: List[str], categories: List[str], engines: Dict[str, Engine], plugins: Iterable[Plugin]):
+ def __init__(
+ self,
+ themes: List[str],
+ categories: List[str],
+ engines: Dict[str, Engine],
+ plugins: Iterable[Plugin],
+ client: Optional[ClientPref] = None,
+ ):
+
super().__init__()
self.key_value_settings: Dict[str, Setting] = {
@@ -414,6 +470,7 @@ class Preferences:
self.engines = EnginesSetting('engines', engines=engines.values())
self.plugins = PluginsSetting('plugins', plugins=plugins)
self.tokens = SetSetting('tokens')
+ self.client = client or ClientPref()
self.unknown_params: Dict[str, str] = {}
def get_as_url_params(self):
diff --git a/searx/search/__init__.py b/searx/search/__init__.py
index e5465880c..77121c426 100644
--- a/searx/search/__init__.py
+++ b/searx/search/__init__.py
@@ -22,7 +22,6 @@ from searx.network import initialize as initialize_network, check_network_config
from searx.metrics import initialize as initialize_metrics, counter_inc, histogram_observe_time
from searx.search.processors import PROCESSORS, initialize as initialize_processors
from searx.search.checker import initialize as initialize_checker
-from searx.utils import detect_language
logger = logger.getChild('search')
@@ -40,57 +39,19 @@ def initialize(settings_engines=None, enable_checker=False, check_network=False,
initialize_checker()
-def replace_auto_language(search_query: SearchQuery):
- """
- Do nothing except if `search_query.lang` is "auto".
- In this case:
- * the value "auto" is replaced by the detected language of the query.
- The default value is "all" when no language is detected.
- * `search_query.locale` is updated accordingly
-
- Use :py:obj:`searx.utils.detect_language` with `only_search_languages=True` to keep
- only languages supported by the engines.
- """
- if search_query.lang != 'auto':
- return
-
- detected_lang = detect_language(search_query.query, threshold=0.3, only_search_languages=True)
- if detected_lang is None:
- # fallback to 'all' if no language has been detected
- search_query.lang = 'all'
- search_query.locale = None
- return
- search_query.lang = detected_lang
- try:
- search_query.locale = babel.Locale.parse(search_query.lang)
- except babel.core.UnknownLocaleError:
- search_query.locale = None
-
-
class Search:
"""Search information container"""
__slots__ = "search_query", "result_container", "start_time", "actual_timeout"
def __init__(self, search_query: SearchQuery):
- """Initialize the Search
-
- search_query is copied
- """
+ """Initialize the Search"""
# init vars
super().__init__()
+ self.search_query = search_query
self.result_container = ResultContainer()
self.start_time = None
self.actual_timeout = None
- self.search_query = copy(search_query)
- self.update_search_query(self.search_query)
-
- def update_search_query(self, search_query: SearchQuery):
- """Update search_query.
-
- call replace_auto_language to replace the "auto" language
- """
- replace_auto_language(search_query)
def search_external_bang(self):
"""
diff --git a/searx/settings.yml b/searx/settings.yml
index e63ded9ad..5e593dc40 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -31,7 +31,7 @@ search:
autocomplete_min: 4
# Default search language - leave blank to detect from browser information or
# use codes from 'languages.py'
- default_lang: ""
+ default_lang: "auto"
# Available languages
# languages:
# - all
diff --git a/searx/webadapter.py b/searx/webadapter.py
index dbcf25058..121319eeb 100644
--- a/searx/webadapter.py
+++ b/searx/webadapter.py
@@ -6,6 +6,7 @@ from searx.query import RawTextQuery
from searx.engines import categories, engines
from searx.search import SearchQuery, EngineRef
from searx.preferences import Preferences, is_locked
+from searx.utils import detect_language
# remove duplicate queries.
@@ -214,7 +215,27 @@ def parse_engine_data(form):
def get_search_query_from_webapp(
preferences: Preferences, form: Dict[str, str]
-) -> Tuple[SearchQuery, RawTextQuery, List[EngineRef], List[EngineRef]]:
+) -> Tuple[SearchQuery, RawTextQuery, List[EngineRef], List[EngineRef], str]:
+ """Assemble data from preferences and request.form (from the HTML form) needed
+ in a search query.
+
+ The returned tuple consits of:
+
+ 1. instance of :py:obj:`searx.search.SearchQuery`
+ 2. instance of :py:obj:`searx.query.RawTextQuery`
+ 3. list of :py:obj:`searx.search.EngineRef` instances
+ 4. string with the *selected locale* of the query
+
+ About language/locale: if the client selects the alias ``auto`` the
+ ``SearchQuery`` object is build up by the :py:obj:`detected language
+ <searx.utils.detect_language>`. If language recognition does not have a
+ match the language preferred by the :py:obj:`Preferences.client` is used.
+ If client does not have a preference, the default ``all`` is used.
+
+ The *selected locale* in the tuple always represents the selected
+ language/locale and might differ from the language recognition.
+
+ """
# no text for the query ?
if not form.get('q'):
raise SearxParameterException('q', '')
@@ -229,13 +250,19 @@ def get_search_query_from_webapp(
# set query
query = raw_text_query.getQuery()
query_pageno = parse_pageno(form)
- query_lang = parse_lang(preferences, form, raw_text_query)
query_safesearch = parse_safesearch(preferences, form)
query_time_range = parse_time_range(form)
query_timeout = parse_timeout(form, raw_text_query)
external_bang = raw_text_query.external_bang
engine_data = parse_engine_data(form)
+ query_lang = parse_lang(preferences, form, raw_text_query)
+ selected_locale = query_lang
+
+ if query_lang == 'auto':
+ query_lang = detect_language(query, threshold=0.8, only_search_languages=True)
+ query_lang = query_lang or preferences.client.locale_tag or 'all'
+
if not is_locked('categories') and raw_text_query.specific:
# if engines are calculated from query,
# set categories by using that information
@@ -265,4 +292,5 @@ def get_search_query_from_webapp(
raw_text_query,
query_engineref_list_unknown,
query_engineref_list_notoken,
+ selected_locale,
)
diff --git a/searx/webapp.py b/searx/webapp.py
index 388b28c38..79255652f 100755
--- a/searx/webapp.py
+++ b/searx/webapp.py
@@ -84,6 +84,7 @@ from searx.webutils import (
from searx.webadapter import (
get_search_query_from_webapp,
get_selected_categories,
+ parse_lang,
)
from searx.utils import (
html_to_text,
@@ -96,6 +97,7 @@ from searx.plugins import Plugin, plugins, initialize as plugin_initialize
from searx.plugins.oa_doi_rewrite import get_doi_resolver
from searx.preferences import (
Preferences,
+ ClientPref,
ValidationException,
)
from searx.answerers import (
@@ -221,16 +223,9 @@ babel = Babel(app, locale_selector=get_locale)
def _get_browser_language(req, lang_list):
- for lang in req.headers.get("Accept-Language", "en").split(","):
- if ';' in lang:
- lang = lang.split(';')[0]
- if '-' in lang:
- lang_parts = lang.split('-')
- lang = "{}-{}".format(lang_parts[0], lang_parts[-1].upper())
- locale = match_locale(lang, lang_list, fallback=None)
- if locale is not None:
- return locale
- return 'en'
+ client = ClientPref.from_http_request(req)
+ locale = match_locale(client.locale_tag, lang_list, fallback='en')
+ return locale
def _get_locale_rfc5646(locale):
@@ -446,11 +441,7 @@ def render(template_name: str, **kwargs):
kwargs['rtl'] = True
if 'current_language' not in kwargs:
- _locale = request.preferences.get_value('language')
- if _locale in ('auto', 'all'):
- kwargs['current_language'] = _locale
- else:
- kwargs['current_language'] = match_locale(_locale, settings['search']['languages'])
+ kwargs['current_language'] = parse_lang(request.preferences, {}, RawTextQuery('', []))
# values from settings
kwargs['search_formats'] = [x for x in settings['search']['formats'] if x != 'html']
@@ -512,7 +503,10 @@ def pre_request():
request.timings = [] # pylint: disable=assigning-non-slot
request.errors = [] # pylint: disable=assigning-non-slot
- preferences = Preferences(themes, list(categories.keys()), engines, plugins) # pylint: disable=redefined-outer-name
+ client_pref = ClientPref.from_http_request(request)
+ # pylint: disable=redefined-outer-name
+ preferences = Preferences(themes, list(categories.keys()), engines, plugins, client_pref)
+
user_agent = request.headers.get('User-Agent', '').lower()
if 'webkit' in user_agent and 'android' in user_agent:
preferences.key_value_settings['method'].value = 'GET'
@@ -681,7 +675,9 @@ def search():
raw_text_query = None
result_container = None
try:
- search_query, raw_text_query, _, _ = get_search_query_from_webapp(request.preferences, request.form)
+ search_query, raw_text_query, _, _, selected_locale = get_search_query_from_webapp(
+ request.preferences, request.form
+ )
# search = Search(search_query) # without plugins
search = SearchWithPlugins(search_query, request.user_plugins, request) # pylint: disable=redefined-outer-name
@@ -812,13 +808,6 @@ def search():
)
)
- if search_query.lang in ('auto', 'all'):
- current_language = search_query.lang
- else:
- current_language = match_locale(
- search_query.lang, settings['search']['languages'], fallback=request.preferences.get_value("language")
- )
-
# search_query.lang contains the user choice (all, auto, en, ...)
# when the user choice is "auto", search.search_query.lang contains the detected language
# otherwise it is equals to search_query.lang
@@ -841,7 +830,7 @@ def search():
result_container.unresponsive_engines
),
current_locale = request.preferences.get_value("locale"),
- current_language = current_language,
+ current_language = selected_locale,
search_language = match_locale(
search.search_query.lang,
settings['search']['languages'],