diff options
author | Markus Heiser <markus.heiser@darmarit.de> | 2024-05-09 10:40:55 +0200 |
---|---|---|
committer | Markus Heiser <markus.heiser@darmarIT.de> | 2024-05-09 17:29:15 +0200 |
commit | fb32425d784bbc8e2a958ed3062888b75cab230d (patch) | |
tree | f310560577a7b7789b2ae8c9015a8107a81297cf /searx/engines | |
parent | 72be98e12f5b8454f03cf3cb44a920fce75d4f7b (diff) | |
download | searxng-fb32425d784bbc8e2a958ed3062888b75cab230d.tar.gz searxng-fb32425d784bbc8e2a958ed3062888b75cab230d.zip |
[mod] yacy engine: pick base_url randomly from a list of instances
Inspired by post [1] in the disscussion we had, while yacy.searchlab.eu was
broken.
[1] https://github.com/searxng/searxng/issues/3428#issuecomment-2101080101
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/engines')
-rw-r--r-- | searx/engines/yacy.py | 59 |
1 files changed, 37 insertions, 22 deletions
diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py index 3a0414787..8f0718038 100644 --- a/searx/engines/yacy.py +++ b/searx/engines/yacy.py @@ -22,20 +22,26 @@ The engine has the following (additional) settings: - :py:obj:`search_mode` - :py:obj:`search_type` +The :py:obj:`base_url` has to be set in the engine named `yacy` and is used by +all yacy engines. + .. code:: yaml - name: yacy engine: yacy categories: general search_type: text - base_url: https://yacy.searchlab.eu shortcut: ya + base_url: + - https://yacy.searchlab.eu + - https://search.lomig.me + - https://yacy.ecosys.eu + - https://search.webproject.link - name: yacy images engine: yacy categories: images search_type: image - base_url: https://yacy.searchlab.eu shortcut: yai disabled: true @@ -45,6 +51,9 @@ Implementations """ # pylint: disable=fixme +from __future__ import annotations + +import random from json import loads from urllib.parse import urlencode from dateutil import parser @@ -87,15 +96,10 @@ search_type = 'text' ``video`` are not yet implemented (Pull-Requests are welcome). """ -# search-url -base_url = 'https://yacy.searchlab.eu' -search_url = ( - '/yacysearch.json?{query}' - '&startRecord={offset}' - '&maximumRecords={limit}' - '&contentdom={search_type}' - '&resource={resource}' -) +base_url: list | str = 'https://yacy.searchlab.eu' +"""The value is an URL or a list of URLs. In the latter case instance will be +selected randomly. +""" def init(_): @@ -108,24 +112,35 @@ def init(_): raise ValueError('search_type "%s" is not one of %s' % (search_type, valid_types)) +def _base_url() -> str: + from searx.engines import engines # pylint: disable=import-outside-toplevel + + url = engines['yacy'].base_url # type: ignore + if isinstance(url, list): + url = random.choice(url) + return url + + def request(query, params): + offset = (params['pageno'] - 1) * number_of_results + args = { + 'query': query, + 'startRecord': offset, + 'maximumRecords': number_of_results, + 'contentdom': search_type, + 'resource': search_mode, + } - params['url'] = base_url + search_url.format( - query=urlencode({'query': query}), - offset=offset, - limit=number_of_results, - search_type=search_type, - resource=search_mode, - ) + # add language tag if specified + if params['language'] != 'all': + args['lr'] = 'lang_' + params['language'].split('-')[0] + + params["url"] = f"{_base_url()}/yacysearch.json?{urlencode(args)}" if http_digest_auth_user and http_digest_auth_pass: params['auth'] = DigestAuth(http_digest_auth_user, http_digest_auth_pass) - # add language tag if specified - if params['language'] != 'all': - params['url'] += '&lr=lang_' + params['language'].split('-')[0] - return params |