diff options
author | Allen <64094914+allendema@users.noreply.github.com> | 2022-05-30 00:53:26 +0200 |
---|---|---|
committer | Markus Heiser <markus.heiser@darmarit.de> | 2022-06-02 14:32:37 +0200 |
commit | 43dc9eb7d64ddcdc07639129b9dbbc96c92d3caa (patch) | |
tree | bca1c898622e430a8e8b4fdedd246379d7e2af91 /searx/engines | |
parent | 51ba817e06bb15ca1768010d6873d1d7bf48b0b6 (diff) | |
download | searxng-43dc9eb7d64ddcdc07639129b9dbbc96c92d3caa.tar.gz searxng-43dc9eb7d64ddcdc07639129b9dbbc96c92d3caa.zip |
[enh] Initial Petalsearch Images support
Upstream example query:
https://petalsearch.com/search?query=test&channel=image&ps=50&pn=1®ion=de-de&ss_mode=off&ss_type=normal
Depending on locale it will internally use some/all results from other
engines. See:
https://seirdy.one/posts/2021/03/10/search-engines-with-own-indexes/#general-indexing-search-engines
Diffstat (limited to 'searx/engines')
-rw-r--r-- | searx/engines/petal_images.py | 94 |
1 files changed, 94 insertions, 0 deletions
diff --git a/searx/engines/petal_images.py b/searx/engines/petal_images.py new file mode 100644 index 000000000..88853c1bd --- /dev/null +++ b/searx/engines/petal_images.py @@ -0,0 +1,94 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""Petalsearch Images + +""" + +from json import loads +from urllib.parse import urlencode +from datetime import datetime + +from lxml import html + +from searx.utils import extract_text + +about = { + "website": 'https://petalsearch.com/', + "wikidata_id": 'Q104399280', + "official_api_documentation": False, + "use_official_api": False, + "require_api_key": False, + "results": 'JSON', +} + +categories = ['images'] +paging = True +time_range_support = False + +safesearch = True +safesearch_table = {0: 'off', 1: 'moderate', 2: 'on'} + +base_url = 'https://petalsearch.com/' +search_string = 'search?{query}&channel=image&ps=50&pn={page}®ion={lang}&ss_mode={safesearch}&ss_type=normal' + + +def request(query, params): + + search_path = search_string.format( + query=urlencode({'query': query}), + page=params['pageno'], + lang=params['language'].lower(), + safesearch=safesearch_table[params['safesearch']], + ) + + params['url'] = base_url + search_path + + return params + + +def response(resp): + results = [] + + tree = html.fromstring(resp.text) + root = tree.findall('.//script[3]') + + # Convert list to JSON + json_content = extract_text(root) + + # Manipulate with JSON + data = loads(json_content) + + for result in data['newImages']: + url = result['url'] + title = result['title'] + thumbnail_src = result['image'] + + pic_dict = result.get('extrainfo') + + date_from_api = pic_dict.get('publish_time') + width = pic_dict.get('width') + height = pic_dict.get('height') + img_src = pic_dict.get('real_url') + + # Continue if img_src is missing + if img_src is None or '': + continue + + # Get and convert published date + if date_from_api is not None: + publishedDate = datetime.fromtimestamp(int(date_from_api)) + + # Append results + results.append( + { + 'template': 'images.html', + 'url': url, + 'title': title, + 'img_src': img_src, + 'thumbnail_src': thumbnail_src, + 'width': width, + 'height': height, + 'publishedDate': publishedDate, + } + ) + + return results |