4 files changed, 237 insertions, 38 deletions
diff --git a/docs/dev/engines/online/adobe_stock.rst b/docs/dev/engines/online/adobe_stock.rst
new file mode 100644
index 000000000..48a6511c0
--- /dev/null
+++ b/docs/dev/engines/online/adobe_stock.rst
@@ -0,0 +1,13 @@
+.. _adobe stock engine:
+
+===========
+Adobe Stock
+===========
+
+.. contents:: Contents
+   :depth: 2
+   :local:
+   :backlinks: entry
+
+.. automodule:: searx.engines.adobe_stock
+   :members:
diff --git a/requirements.txt b/requirements.txt
index 5225565e6..03babb7a8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -19,3 +19,4 @@ tomli==2.0.2; python_version < '3.11'
 msgspec==0.18.6
 eval_type_backport; python_version < '3.9'
 typer-slim==0.13.1
+isodate==0.7.2
diff --git a/searx/engines/adobe_stock.py b/searx/engines/adobe_stock.py
index f1b8e13ef..e6664d273 100644
--- a/searx/engines/adobe_stock.py
+++ b/searx/engines/adobe_stock.py
@@ -1,67 +1,229 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
-"""Adobe Stock (images)
+"""`Adobe Stock`_ is a service that gives access to millions of royalty-free
+assets. Assets types include photos, vectors, illustrations, templates, 3D
+assets, videos, motion graphics templates and audio tracks.
+
+.. Adobe Stock: https://stock.adobe.com/
+
+Configuration
+=============
+
+The engine has the following mandatory setting:
+
+- SearXNG's :ref:`engine categories`
+- Adobe-Stock's :py:obj:`adobe_order`
+- Adobe-Stock's :py:obj:`adobe_content_types`
+
+.. code:: yaml
+
+  - name: adobe stock
+    engine: adobe_stock
+    shortcut: asi
+    categories: [images]
+    adobe_order: relevance
+    adobe_content_types: ["photo", "illustration", "zip_vector", "template", "3d", "image"]
+
+  - name: adobe stock video
+    engine: adobe_stock
+    network: adobe stock
+    shortcut: asi
+    categories: [videos]
+    adobe_order: relevance
+    adobe_content_types: ["video"]
+
+Implementation
+==============
+
 """
+from __future__ import annotations
 
+from typing import TYPE_CHECKING
+from datetime import datetime, timedelta
 from urllib.parse import urlencode
-from searx.utils import gen_useragent
+
+import isodate
+
+if TYPE_CHECKING:
+    import logging
+
+    logger: logging.Logger
 
 about = {
-    "website": 'https://stock.adobe.com/',
-    "wikidata_id": 'Q5977430',
+    "website": "https://stock.adobe.com/",
+    "wikidata_id": "Q5977430",
     "official_api_documentation": None,
     "use_official_api": False,
     "require_api_key": False,
-    "results": 'JSON',
+    "results": "JSON",
 }
 
-categories = ['images']
+categories = []
 paging = True
+send_accept_language_header = True
+results_per_page = 10
 
-base_url = 'https://stock.adobe.com'
+base_url = "https://stock.adobe.com"
+
+adobe_order: str = ""
+"""Sort order, can be one of:
+
+- ``relevance`` or
+- ``featured`` or
+- ``creation`` (most recent) or
+- ``nb_downloads`` (number of downloads)
+"""
+
+ADOBE_VALID_TYPES = ["photo", "illustration", "zip_vector", "video", "template", "3d", "audio", "image"]
+adobe_content_types: list = []
+"""A list of of content types.  The following content types are offered:
+
+- Images: ``image``
+- Videos: ``video``
+- Templates: ``template``
+- 3D: ``3d``
+- Audio ``audio``
+
+Additional subcategories:
+
+- Photos: ``photo``
+- Illustrations: ``illustration``
+- Vectors: ``zip_vector`` (Vectors),
+"""
+
+# Do we need support for "free_collection" and "include_stock_enterprise"?
 
-results_per_page = 10
-adobe_order = "relevance"  # one of 'relevant', 'featured', 'creation' or 'nb_downloads'
+
+def init(_):
+    if not categories:
+        raise ValueError("adobe_stock engine: categories is unset")
+
+    # adobe_order
+    if not adobe_order:
+        raise ValueError("adobe_stock engine: adobe_order is unset")
+    if adobe_order not in ["relevance", "featured", "creation", "nb_downloads"]:
+        raise ValueError(f"unsupported adobe_order: {adobe_order}")
+
+    # adobe_content_types
+    if not adobe_content_types:
+        raise ValueError("adobe_stock engine: adobe_content_types is unset")
+
+    if isinstance(adobe_content_types, list):
+        for t in adobe_content_types:
+            if t not in ADOBE_VALID_TYPES:
+                raise ValueError("adobe_stock engine: adobe_content_types: '%s' is invalid" % t)
+    else:
+        raise ValueError(
+            "adobe_stock engine: adobe_content_types must be a list of strings not %s" % type(adobe_content_types)
+        )
 
 
 def request(query, params):
+
     args = {
-        'k': query,
-        'limit': results_per_page,
-        'order': adobe_order,
-        'search_page': params['pageno'],
-        'search_type': 'pagination',
-        'filters[content_type:video]': 0,
-        'filters[content_type:audio]': 0,
+        "k": query,
+        "limit": results_per_page,
+        "order": adobe_order,
+        "search_page": params["pageno"],
+        "search_type": "pagination",
     }
-    params['url'] = f"{base_url}/de/Ajax/Search?{urlencode(args)}"
+
+    for content_type in ADOBE_VALID_TYPES:
+        args[f"filters[content_type:{content_type}]"] = 1 if content_type in adobe_content_types else 0
+
+    params["url"] = f"{base_url}/de/Ajax/Search?{urlencode(args)}"
 
     # headers required to bypass bot-detection
-    params['headers'] = {
-        "User-Agent": gen_useragent(),
-        "Accept-Language": "en-US,en;q=0.5",
-    }
+    if params["searxng_locale"] == "all":
+        params["headers"]["Accept-Language"] = "en-US,en;q=0.5"
 
     return params
 
 
+def parse_image_item(item):
+    return {
+        "template": "images.html",
+        "url": item["content_url"],
+        "title": item["title"],
+        "content": item["asset_type"],
+        "img_src": item["content_thumb_extra_large_url"],
+        "thumbnail_src": item["thumbnail_url"],
+        "resolution": f"{item['content_original_width']}x{item['content_original_height']}",
+        "img_format": item["format"],
+        "author": item["author"],
+    }
+
+
+def parse_video_item(item):
+
+    # in video items, the title is more or less a "content description", we try
+    # to reduce the lenght of the title ..
+
+    title = item["title"]
+    content = ""
+    if "." in title.strip()[:-1]:
+        content = title
+        title = title.split(".", 1)[0]
+    elif "," in title:
+        content = title
+        title = title.split(",", 1)[0]
+    elif len(title) > 50:
+        content = title
+        title = ""
+        for w in content.split(" "):
+            title += f" {w}"
+            if len(title) > 50:
+                title = title.strip() + "\u2026"
+                break
+
+    return {
+        "template": "videos.html",
+        "url": item["content_url"],
+        "title": title,
+        "content": content,
+        # https://en.wikipedia.org/wiki/ISO_8601#Durations
+        "length": isodate.parse_duration(item["time_duration"]),
+        "publishedDate": datetime.strptime(item["creation_date"], "%Y-%m-%d"),
+        "thumbnail": item["thumbnail_url"],
+        "iframe_src": item["video_small_preview_url"],
+        "metadata": item["asset_type"],
+    }
+
+
+def parse_audio_item(item):
+    audio_data = item["audio_data"]
+    content = audio_data.get("description") or ""
+    if audio_data.get("album"):
+        content = audio_data["album"] + " - " + content
+
+    return {
+        "url": item["content_url"],
+        "title": item["title"],
+        "content": content,
+        # "thumbnail": base_url + item["thumbnail_url"],
+        "iframe_src": audio_data["preview"]["url"],
+        "publishedDate": datetime.fromisoformat(audio_data["release_date"]) if audio_data["release_date"] else None,
+        "length": timedelta(seconds=round(audio_data["duration"] / 1000)) if audio_data["duration"] else None,
+        "author": item.get("artist_name"),
+    }
+
+
 def response(resp):
     results = []
 
     json_resp = resp.json()
 
-    for item in json_resp['items'].values():
-        results.append(
-            {
-                'template': 'images.html',
-                'url': item['content_url'],
-                'title': item['title'],
-                'content': '',
-                'img_src': item['content_thumb_extra_large_url'],
-                'thumbnail_src': item['thumbnail_url'],
-                'resolution': f"{item['content_original_width']}x{item['content_original_height']}",
-                'img_format': item['format'],
-                'author': item['author'],
-            }
-        )
+    if isinstance(json_resp["items"], list):
+        return None
+    for item in json_resp["items"].values():
+        if item["asset_type"].lower() in ["image", "premium-image", "illustration", "vector"]:
+            result = parse_image_item(item)
+        elif item["asset_type"].lower() == "video":
+            result = parse_video_item(item)
+        elif item["asset_type"].lower() == "audio":
+            result = parse_audio_item(item)
+        else:
+            logger.error("no handle for %s --> %s", item["asset_type"], item)
+            continue
+        results.append(result)
 
     return results
diff --git a/searx/settings.yml b/searx/settings.yml
index 0084db902..d27172aef 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -327,9 +327,32 @@ engines:
 
   - name: adobe stock
     engine: adobe_stock
-    # available search orders: 'relevant', 'featured', 'creation', 'nb_downloads'
-    # adobe_order: relevance
-    shortcut: as
+    shortcut: asi
+    categories: ["images"]
+    # https://docs.searxng.org/dev/engines/online/adobe_stock.html
+    adobe_order: relevance
+    adobe_content_types: ["photo", "illustration", "zip_vector", "template", "3d", "image"]
+    timeout: 6
+    disabled: true
+
+  - name: adobe stock video
+    engine: adobe_stock
+    shortcut: asv
+    network: adobe stock
+    categories: ["videos"]
+    adobe_order: relevance
+    adobe_content_types: ["video"]
+    timeout: 6
+    disabled: true
+
+  - name: adobe stock audio
+    engine: adobe_stock
+    shortcut: asa
+    network: adobe stock
+    categories: ["music"]
+    adobe_order: relevance
+    adobe_content_types: ["audio"]
+    timeout: 6
     disabled: true
 
   - name: alpine linux packages