diff options
-rw-r--r-- | docs/dev/engines/online/adobe_stock.rst | 13 | ||||
-rw-r--r-- | requirements.txt | 1 | ||||
-rw-r--r-- | searx/engines/adobe_stock.py | 232 | ||||
-rw-r--r-- | searx/settings.yml | 29 |
4 files changed, 237 insertions, 38 deletions
diff --git a/docs/dev/engines/online/adobe_stock.rst b/docs/dev/engines/online/adobe_stock.rst new file mode 100644 index 000000000..48a6511c0 --- /dev/null +++ b/docs/dev/engines/online/adobe_stock.rst @@ -0,0 +1,13 @@ +.. _adobe stock engine: + +=========== +Adobe Stock +=========== + +.. contents:: Contents + :depth: 2 + :local: + :backlinks: entry + +.. automodule:: searx.engines.adobe_stock + :members: diff --git a/requirements.txt b/requirements.txt index 5225565e6..03babb7a8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,3 +19,4 @@ tomli==2.0.2; python_version < '3.11' msgspec==0.18.6 eval_type_backport; python_version < '3.9' typer-slim==0.13.1 +isodate==0.7.2 diff --git a/searx/engines/adobe_stock.py b/searx/engines/adobe_stock.py index f1b8e13ef..e6664d273 100644 --- a/searx/engines/adobe_stock.py +++ b/searx/engines/adobe_stock.py @@ -1,67 +1,229 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -"""Adobe Stock (images) +"""`Adobe Stock`_ is a service that gives access to millions of royalty-free +assets. Assets types include photos, vectors, illustrations, templates, 3D +assets, videos, motion graphics templates and audio tracks. + +.. Adobe Stock: https://stock.adobe.com/ + +Configuration +============= + +The engine has the following mandatory setting: + +- SearXNG's :ref:`engine categories` +- Adobe-Stock's :py:obj:`adobe_order` +- Adobe-Stock's :py:obj:`adobe_content_types` + +.. code:: yaml + + - name: adobe stock + engine: adobe_stock + shortcut: asi + categories: [images] + adobe_order: relevance + adobe_content_types: ["photo", "illustration", "zip_vector", "template", "3d", "image"] + + - name: adobe stock video + engine: adobe_stock + network: adobe stock + shortcut: asi + categories: [videos] + adobe_order: relevance + adobe_content_types: ["video"] + +Implementation +============== + """ +from __future__ import annotations +from typing import TYPE_CHECKING +from datetime import datetime, timedelta from urllib.parse import urlencode -from searx.utils import gen_useragent + +import isodate + +if TYPE_CHECKING: + import logging + + logger: logging.Logger about = { - "website": 'https://stock.adobe.com/', - "wikidata_id": 'Q5977430', + "website": "https://stock.adobe.com/", + "wikidata_id": "Q5977430", "official_api_documentation": None, "use_official_api": False, "require_api_key": False, - "results": 'JSON', + "results": "JSON", } -categories = ['images'] +categories = [] paging = True +send_accept_language_header = True +results_per_page = 10 -base_url = 'https://stock.adobe.com' +base_url = "https://stock.adobe.com" + +adobe_order: str = "" +"""Sort order, can be one of: + +- ``relevance`` or +- ``featured`` or +- ``creation`` (most recent) or +- ``nb_downloads`` (number of downloads) +""" + +ADOBE_VALID_TYPES = ["photo", "illustration", "zip_vector", "video", "template", "3d", "audio", "image"] +adobe_content_types: list = [] +"""A list of of content types. The following content types are offered: + +- Images: ``image`` +- Videos: ``video`` +- Templates: ``template`` +- 3D: ``3d`` +- Audio ``audio`` + +Additional subcategories: + +- Photos: ``photo`` +- Illustrations: ``illustration`` +- Vectors: ``zip_vector`` (Vectors), +""" + +# Do we need support for "free_collection" and "include_stock_enterprise"? -results_per_page = 10 -adobe_order = "relevance" # one of 'relevant', 'featured', 'creation' or 'nb_downloads' + +def init(_): + if not categories: + raise ValueError("adobe_stock engine: categories is unset") + + # adobe_order + if not adobe_order: + raise ValueError("adobe_stock engine: adobe_order is unset") + if adobe_order not in ["relevance", "featured", "creation", "nb_downloads"]: + raise ValueError(f"unsupported adobe_order: {adobe_order}") + + # adobe_content_types + if not adobe_content_types: + raise ValueError("adobe_stock engine: adobe_content_types is unset") + + if isinstance(adobe_content_types, list): + for t in adobe_content_types: + if t not in ADOBE_VALID_TYPES: + raise ValueError("adobe_stock engine: adobe_content_types: '%s' is invalid" % t) + else: + raise ValueError( + "adobe_stock engine: adobe_content_types must be a list of strings not %s" % type(adobe_content_types) + ) def request(query, params): + args = { - 'k': query, - 'limit': results_per_page, - 'order': adobe_order, - 'search_page': params['pageno'], - 'search_type': 'pagination', - 'filters[content_type:video]': 0, - 'filters[content_type:audio]': 0, + "k": query, + "limit": results_per_page, + "order": adobe_order, + "search_page": params["pageno"], + "search_type": "pagination", } - params['url'] = f"{base_url}/de/Ajax/Search?{urlencode(args)}" + + for content_type in ADOBE_VALID_TYPES: + args[f"filters[content_type:{content_type}]"] = 1 if content_type in adobe_content_types else 0 + + params["url"] = f"{base_url}/de/Ajax/Search?{urlencode(args)}" # headers required to bypass bot-detection - params['headers'] = { - "User-Agent": gen_useragent(), - "Accept-Language": "en-US,en;q=0.5", - } + if params["searxng_locale"] == "all": + params["headers"]["Accept-Language"] = "en-US,en;q=0.5" return params +def parse_image_item(item): + return { + "template": "images.html", + "url": item["content_url"], + "title": item["title"], + "content": item["asset_type"], + "img_src": item["content_thumb_extra_large_url"], + "thumbnail_src": item["thumbnail_url"], + "resolution": f"{item['content_original_width']}x{item['content_original_height']}", + "img_format": item["format"], + "author": item["author"], + } + + +def parse_video_item(item): + + # in video items, the title is more or less a "content description", we try + # to reduce the lenght of the title .. + + title = item["title"] + content = "" + if "." in title.strip()[:-1]: + content = title + title = title.split(".", 1)[0] + elif "," in title: + content = title + title = title.split(",", 1)[0] + elif len(title) > 50: + content = title + title = "" + for w in content.split(" "): + title += f" {w}" + if len(title) > 50: + title = title.strip() + "\u2026" + break + + return { + "template": "videos.html", + "url": item["content_url"], + "title": title, + "content": content, + # https://en.wikipedia.org/wiki/ISO_8601#Durations + "length": isodate.parse_duration(item["time_duration"]), + "publishedDate": datetime.strptime(item["creation_date"], "%Y-%m-%d"), + "thumbnail": item["thumbnail_url"], + "iframe_src": item["video_small_preview_url"], + "metadata": item["asset_type"], + } + + +def parse_audio_item(item): + audio_data = item["audio_data"] + content = audio_data.get("description") or "" + if audio_data.get("album"): + content = audio_data["album"] + " - " + content + + return { + "url": item["content_url"], + "title": item["title"], + "content": content, + # "thumbnail": base_url + item["thumbnail_url"], + "iframe_src": audio_data["preview"]["url"], + "publishedDate": datetime.fromisoformat(audio_data["release_date"]) if audio_data["release_date"] else None, + "length": timedelta(seconds=round(audio_data["duration"] / 1000)) if audio_data["duration"] else None, + "author": item.get("artist_name"), + } + + def response(resp): results = [] json_resp = resp.json() - for item in json_resp['items'].values(): - results.append( - { - 'template': 'images.html', - 'url': item['content_url'], - 'title': item['title'], - 'content': '', - 'img_src': item['content_thumb_extra_large_url'], - 'thumbnail_src': item['thumbnail_url'], - 'resolution': f"{item['content_original_width']}x{item['content_original_height']}", - 'img_format': item['format'], - 'author': item['author'], - } - ) + if isinstance(json_resp["items"], list): + return None + for item in json_resp["items"].values(): + if item["asset_type"].lower() in ["image", "premium-image", "illustration", "vector"]: + result = parse_image_item(item) + elif item["asset_type"].lower() == "video": + result = parse_video_item(item) + elif item["asset_type"].lower() == "audio": + result = parse_audio_item(item) + else: + logger.error("no handle for %s --> %s", item["asset_type"], item) + continue + results.append(result) return results diff --git a/searx/settings.yml b/searx/settings.yml index 0084db902..d27172aef 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -327,9 +327,32 @@ engines: - name: adobe stock engine: adobe_stock - # available search orders: 'relevant', 'featured', 'creation', 'nb_downloads' - # adobe_order: relevance - shortcut: as + shortcut: asi + categories: ["images"] + # https://docs.searxng.org/dev/engines/online/adobe_stock.html + adobe_order: relevance + adobe_content_types: ["photo", "illustration", "zip_vector", "template", "3d", "image"] + timeout: 6 + disabled: true + + - name: adobe stock video + engine: adobe_stock + shortcut: asv + network: adobe stock + categories: ["videos"] + adobe_order: relevance + adobe_content_types: ["video"] + timeout: 6 + disabled: true + + - name: adobe stock audio + engine: adobe_stock + shortcut: asa + network: adobe stock + categories: ["music"] + adobe_order: relevance + adobe_content_types: ["audio"] + timeout: 6 disabled: true - name: alpine linux packages |