diff options
author | Markus Heiser <markus.heiser@darmarit.de> | 2023-08-14 18:30:11 +0200 |
---|---|---|
committer | Markus Heiser <markus.heiser@darmarIT.de> | 2023-08-15 16:17:42 +0200 |
commit | 9100a48541f469df2973d3d42e8ed8f6bba4fac1 (patch) | |
tree | 0097b1e13bbc658afaec856389e922b077b62a3d /searx/engines/seekr.py | |
parent | 2bab658d390065022d81edd5fc1d422388042788 (diff) | |
download | searxng-9100a48541f469df2973d3d42e8ed8f6bba4fac1.tar.gz searxng-9100a48541f469df2973d3d42e8ed8f6bba4fac1.zip |
[mod] improve seekr engines and add documentation
Tis patch adds some more fields to the result items and changed paging to the
``nextResultSet`` given in seekr's JSON response.
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/engines/seekr.py')
-rw-r--r-- | searx/engines/seekr.py | 156 |
1 files changed, 135 insertions, 21 deletions
diff --git a/searx/engines/seekr.py b/searx/engines/seekr.py index c87d21d80..9250ac991 100644 --- a/searx/engines/seekr.py +++ b/searx/engines/seekr.py @@ -1,50 +1,120 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""Seekr (images, videos, news) +"""seekr.com Seeker Score + +Seekr is a privately held search and content evaluation engine that prioritizes +credibility over popularity. + +Configuration +============= + +The engine has the following additional settings: + +- :py:obj:`seekr_category` +- :py:obj:`api_key` + +This implementation is used by seekr engines in the :ref:`settings.yml +<settings engine>`: + +.. code:: yaml + + - name: seekr news + seekr_category: news + ... + - name: seekr images + seekr_category: images + ... + - name: seekr videos + seekr_category: videos + ... + +Known Quirks +============ + +The implementation to support :py:obj:`paging <searx.enginelib.Engine.paging>` +is based on the *nextpage* method of Seekr's REST API. This feature is *next +page driven* and plays well with the :ref:`infinite_scroll <settings ui>` +setting in SearXNG but it does not really fit into SearXNG's UI to select a page +by number. + +Implementations +=============== + """ from datetime import datetime from json import loads from urllib.parse import urlencode +from flask_babel import gettext about = { "website": 'https://seekr.com/', "official_api_documentation": None, - "use_official_api": True, + "use_official_api": False, "require_api_key": True, "results": 'JSON', + "language": 'en', } -paging = True # news search doesn't support paging base_url = "https://api.seekr.com" -# v2/newssearch, v1/imagetab, v1/videotab -seekr_path = "newssearch" -seekr_api_version = "v2" +paging = True + api_key = "srh1-22fb-sekr" -results_per_page = 10 +"""API key / reversed engineered / is still the same one since 2022.""" + +seekr_category: str = 'unset' +"""Search category, any of ``news``, ``videos`` or ``images``.""" + + +def init(engine_settings): + + # global paging + if engine_settings['seekr_category'] not in ['news', 'videos', 'images']: + raise ValueError(f"Unsupported seekr category: {engine_settings['seekr_category']}") def request(query, params): + + if not query: + return None + args = { 'query': query, 'apiKey': api_key, - 'limit': results_per_page, - 'offset': (params['pageno'] - 1) * results_per_page, } - path = f"{seekr_api_version}/{seekr_path}" - if seekr_api_version == "v1": - path = seekr_path + api_url = base_url + '/engine' + if seekr_category == 'news': + api_url += '/v2/newssearch' + + elif seekr_category == 'images': + api_url += '/imagetab' + + elif seekr_category == 'videos': + api_url += '/videotab' - params['url'] = f"{base_url}/engine/{path}?{urlencode(args)}" + params['url'] = f"{api_url}?{urlencode(args)}" + if params['pageno'] > 1: + nextpage = params['engine_data'].get('nextpage') + if nextpage: + params['url'] = nextpage return params def _images_response(json): + + search_results = json.get('expertResponses') + if search_results: + search_results = search_results[0].get('advice') + else: # response from a 'nextResultSet' + search_results = json.get('advice') + results = [] + if not search_results: + return results - for result in json['expertResponses'][0]['advice']['results']: + for result in search_results['results']: summary = loads(result['summary']) results.append( { @@ -53,52 +123,96 @@ def _images_response(json): 'title': result['title'], 'img_src': result['url'], 'img_format': f"{summary['width']}x{summary['height']}", + 'thumbnail_src': 'https://media.seekr.com/engine/rp/' + summary['tg'] + '/?src= ' + result['thumbnail'], } ) + if search_results.get('nextResultSet'): + results.append( + { + "engine_data": search_results.get('nextResultSet'), + "key": "nextpage", + } + ) return results def _videos_response(json): + + search_results = json.get('expertResponses') + if search_results: + search_results = search_results[0].get('advice') + else: # response from a 'nextResultSet' + search_results = json.get('advice') + results = [] + if not search_results: + return results - for result in json['expertResponses'][0]['advice']['results']: + for result in search_results['results']: + summary = loads(result['summary']) results.append( { 'template': 'videos.html', 'url': result['url'], 'title': result['title'], + 'thumbnail': 'https://media.seekr.com/engine/rp/' + summary['tg'] + '/?src= ' + result['thumbnail'], } ) + if search_results.get('nextResultSet'): + results.append( + { + "engine_data": search_results.get('nextResultSet'), + "key": "nextpage", + } + ) return results def _news_response(json): + + search_results = json.get('expertResponses') + if search_results: + search_results = search_results[0]['advice']['categorySearchResult']['searchResult'] + else: # response from a 'nextResultSet' + search_results = json.get('advice') + results = [] + if not search_results: + return results + + for result in search_results['results']: - for result in json['expertResponses'][0]['advice']['categorySearchResult']['searchResult']['results']: results.append( { 'url': result['url'], 'title': result['title'], - 'content': result['summary'], + 'content': result['summary'] or result["topCategory"] or result["displayUrl"] or '', 'thumbnail': result.get('thumbnail', ''), 'publishedDate': datetime.strptime(result['pubDate'][:19], '%Y-%m-%d %H:%M:%S'), + 'metadata': gettext("Language") + ': ' + result.get('language', ''), } ) + if search_results.get('nextResultSet'): + results.append( + { + "engine_data": search_results.get('nextResultSet'), + "key": "nextpage", + } + ) return results def response(resp): json = resp.json() - if seekr_path == "videotab": + if seekr_category == "videos": return _videos_response(json) - if seekr_path == "imagetab": + if seekr_category == "images": return _images_response(json) - if seekr_path == "newssearch": + if seekr_category == "news": return _news_response(json) - raise ValueError(f"Unsupported seekr path: {seekr_path}") + raise ValueError(f"Unsupported seekr category: {seekr_category}") |