summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/dev/engines/online/adobe_stock.rst13
-rw-r--r--requirements.txt1
-rw-r--r--searx/engines/adobe_stock.py232
-rw-r--r--searx/settings.yml29
4 files changed, 237 insertions, 38 deletions
diff --git a/docs/dev/engines/online/adobe_stock.rst b/docs/dev/engines/online/adobe_stock.rst
new file mode 100644
index 000000000..48a6511c0
--- /dev/null
+++ b/docs/dev/engines/online/adobe_stock.rst
@@ -0,0 +1,13 @@
+.. _adobe stock engine:
+
+===========
+Adobe Stock
+===========
+
+.. contents:: Contents
+ :depth: 2
+ :local:
+ :backlinks: entry
+
+.. automodule:: searx.engines.adobe_stock
+ :members:
diff --git a/requirements.txt b/requirements.txt
index 5225565e6..03babb7a8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -19,3 +19,4 @@ tomli==2.0.2; python_version < '3.11'
msgspec==0.18.6
eval_type_backport; python_version < '3.9'
typer-slim==0.13.1
+isodate==0.7.2
diff --git a/searx/engines/adobe_stock.py b/searx/engines/adobe_stock.py
index f1b8e13ef..e6664d273 100644
--- a/searx/engines/adobe_stock.py
+++ b/searx/engines/adobe_stock.py
@@ -1,67 +1,229 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
-"""Adobe Stock (images)
+"""`Adobe Stock`_ is a service that gives access to millions of royalty-free
+assets. Assets types include photos, vectors, illustrations, templates, 3D
+assets, videos, motion graphics templates and audio tracks.
+
+.. Adobe Stock: https://stock.adobe.com/
+
+Configuration
+=============
+
+The engine has the following mandatory setting:
+
+- SearXNG's :ref:`engine categories`
+- Adobe-Stock's :py:obj:`adobe_order`
+- Adobe-Stock's :py:obj:`adobe_content_types`
+
+.. code:: yaml
+
+ - name: adobe stock
+ engine: adobe_stock
+ shortcut: asi
+ categories: [images]
+ adobe_order: relevance
+ adobe_content_types: ["photo", "illustration", "zip_vector", "template", "3d", "image"]
+
+ - name: adobe stock video
+ engine: adobe_stock
+ network: adobe stock
+ shortcut: asi
+ categories: [videos]
+ adobe_order: relevance
+ adobe_content_types: ["video"]
+
+Implementation
+==============
+
"""
+from __future__ import annotations
+from typing import TYPE_CHECKING
+from datetime import datetime, timedelta
from urllib.parse import urlencode
-from searx.utils import gen_useragent
+
+import isodate
+
+if TYPE_CHECKING:
+ import logging
+
+ logger: logging.Logger
about = {
- "website": 'https://stock.adobe.com/',
- "wikidata_id": 'Q5977430',
+ "website": "https://stock.adobe.com/",
+ "wikidata_id": "Q5977430",
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
- "results": 'JSON',
+ "results": "JSON",
}
-categories = ['images']
+categories = []
paging = True
+send_accept_language_header = True
+results_per_page = 10
-base_url = 'https://stock.adobe.com'
+base_url = "https://stock.adobe.com"
+
+adobe_order: str = ""
+"""Sort order, can be one of:
+
+- ``relevance`` or
+- ``featured`` or
+- ``creation`` (most recent) or
+- ``nb_downloads`` (number of downloads)
+"""
+
+ADOBE_VALID_TYPES = ["photo", "illustration", "zip_vector", "video", "template", "3d", "audio", "image"]
+adobe_content_types: list = []
+"""A list of of content types. The following content types are offered:
+
+- Images: ``image``
+- Videos: ``video``
+- Templates: ``template``
+- 3D: ``3d``
+- Audio ``audio``
+
+Additional subcategories:
+
+- Photos: ``photo``
+- Illustrations: ``illustration``
+- Vectors: ``zip_vector`` (Vectors),
+"""
+
+# Do we need support for "free_collection" and "include_stock_enterprise"?
-results_per_page = 10
-adobe_order = "relevance" # one of 'relevant', 'featured', 'creation' or 'nb_downloads'
+
+def init(_):
+ if not categories:
+ raise ValueError("adobe_stock engine: categories is unset")
+
+ # adobe_order
+ if not adobe_order:
+ raise ValueError("adobe_stock engine: adobe_order is unset")
+ if adobe_order not in ["relevance", "featured", "creation", "nb_downloads"]:
+ raise ValueError(f"unsupported adobe_order: {adobe_order}")
+
+ # adobe_content_types
+ if not adobe_content_types:
+ raise ValueError("adobe_stock engine: adobe_content_types is unset")
+
+ if isinstance(adobe_content_types, list):
+ for t in adobe_content_types:
+ if t not in ADOBE_VALID_TYPES:
+ raise ValueError("adobe_stock engine: adobe_content_types: '%s' is invalid" % t)
+ else:
+ raise ValueError(
+ "adobe_stock engine: adobe_content_types must be a list of strings not %s" % type(adobe_content_types)
+ )
def request(query, params):
+
args = {
- 'k': query,
- 'limit': results_per_page,
- 'order': adobe_order,
- 'search_page': params['pageno'],
- 'search_type': 'pagination',
- 'filters[content_type:video]': 0,
- 'filters[content_type:audio]': 0,
+ "k": query,
+ "limit": results_per_page,
+ "order": adobe_order,
+ "search_page": params["pageno"],
+ "search_type": "pagination",
}
- params['url'] = f"{base_url}/de/Ajax/Search?{urlencode(args)}"
+
+ for content_type in ADOBE_VALID_TYPES:
+ args[f"filters[content_type:{content_type}]"] = 1 if content_type in adobe_content_types else 0
+
+ params["url"] = f"{base_url}/de/Ajax/Search?{urlencode(args)}"
# headers required to bypass bot-detection
- params['headers'] = {
- "User-Agent": gen_useragent(),
- "Accept-Language": "en-US,en;q=0.5",
- }
+ if params["searxng_locale"] == "all":
+ params["headers"]["Accept-Language"] = "en-US,en;q=0.5"
return params
+def parse_image_item(item):
+ return {
+ "template": "images.html",
+ "url": item["content_url"],
+ "title": item["title"],
+ "content": item["asset_type"],
+ "img_src": item["content_thumb_extra_large_url"],
+ "thumbnail_src": item["thumbnail_url"],
+ "resolution": f"{item['content_original_width']}x{item['content_original_height']}",
+ "img_format": item["format"],
+ "author": item["author"],
+ }
+
+
+def parse_video_item(item):
+
+ # in video items, the title is more or less a "content description", we try
+ # to reduce the lenght of the title ..
+
+ title = item["title"]
+ content = ""
+ if "." in title.strip()[:-1]:
+ content = title
+ title = title.split(".", 1)[0]
+ elif "," in title:
+ content = title
+ title = title.split(",", 1)[0]
+ elif len(title) > 50:
+ content = title
+ title = ""
+ for w in content.split(" "):
+ title += f" {w}"
+ if len(title) > 50:
+ title = title.strip() + "\u2026"
+ break
+
+ return {
+ "template": "videos.html",
+ "url": item["content_url"],
+ "title": title,
+ "content": content,
+ # https://en.wikipedia.org/wiki/ISO_8601#Durations
+ "length": isodate.parse_duration(item["time_duration"]),
+ "publishedDate": datetime.strptime(item["creation_date"], "%Y-%m-%d"),
+ "thumbnail": item["thumbnail_url"],
+ "iframe_src": item["video_small_preview_url"],
+ "metadata": item["asset_type"],
+ }
+
+
+def parse_audio_item(item):
+ audio_data = item["audio_data"]
+ content = audio_data.get("description") or ""
+ if audio_data.get("album"):
+ content = audio_data["album"] + " - " + content
+
+ return {
+ "url": item["content_url"],
+ "title": item["title"],
+ "content": content,
+ # "thumbnail": base_url + item["thumbnail_url"],
+ "iframe_src": audio_data["preview"]["url"],
+ "publishedDate": datetime.fromisoformat(audio_data["release_date"]) if audio_data["release_date"] else None,
+ "length": timedelta(seconds=round(audio_data["duration"] / 1000)) if audio_data["duration"] else None,
+ "author": item.get("artist_name"),
+ }
+
+
def response(resp):
results = []
json_resp = resp.json()
- for item in json_resp['items'].values():
- results.append(
- {
- 'template': 'images.html',
- 'url': item['content_url'],
- 'title': item['title'],
- 'content': '',
- 'img_src': item['content_thumb_extra_large_url'],
- 'thumbnail_src': item['thumbnail_url'],
- 'resolution': f"{item['content_original_width']}x{item['content_original_height']}",
- 'img_format': item['format'],
- 'author': item['author'],
- }
- )
+ if isinstance(json_resp["items"], list):
+ return None
+ for item in json_resp["items"].values():
+ if item["asset_type"].lower() in ["image", "premium-image", "illustration", "vector"]:
+ result = parse_image_item(item)
+ elif item["asset_type"].lower() == "video":
+ result = parse_video_item(item)
+ elif item["asset_type"].lower() == "audio":
+ result = parse_audio_item(item)
+ else:
+ logger.error("no handle for %s --> %s", item["asset_type"], item)
+ continue
+ results.append(result)
return results
diff --git a/searx/settings.yml b/searx/settings.yml
index 0084db902..d27172aef 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -327,9 +327,32 @@ engines:
- name: adobe stock
engine: adobe_stock
- # available search orders: 'relevant', 'featured', 'creation', 'nb_downloads'
- # adobe_order: relevance
- shortcut: as
+ shortcut: asi
+ categories: ["images"]
+ # https://docs.searxng.org/dev/engines/online/adobe_stock.html
+ adobe_order: relevance
+ adobe_content_types: ["photo", "illustration", "zip_vector", "template", "3d", "image"]
+ timeout: 6
+ disabled: true
+
+ - name: adobe stock video
+ engine: adobe_stock
+ shortcut: asv
+ network: adobe stock
+ categories: ["videos"]
+ adobe_order: relevance
+ adobe_content_types: ["video"]
+ timeout: 6
+ disabled: true
+
+ - name: adobe stock audio
+ engine: adobe_stock
+ shortcut: asa
+ network: adobe stock
+ categories: ["music"]
+ adobe_order: relevance
+ adobe_content_types: ["audio"]
+ timeout: 6
disabled: true
- name: alpine linux packages