[feat] engine: implementation of German news, Tagesschau

Co-authored-by: Markus Heiser <markus.heiser@darmarit.de>
author: Bnyro <bnyro@tutanota.com> 2023-08-08 10:54:48 +0200
committer: Markus Heiser <markus.heiser@darmarIT.de> 2023-08-10 20:27:54 +0200
commit: e25d1c7288417a5a3773e0b592900156b246234b (patch)
tree: 6a36c7bcf30ff89b128a4678b73d0fc74c7d2556 /searx/engines
parent: 2256ba2ffbf625b03432c54352e5e05f6eeb9150 (diff)
download: searxng-e25d1c7288417a5a3773e0b592900156b246234b.tar.gz
searxng-e25d1c7288417a5a3773e0b592900156b246234b.zip
1 files changed, 101 insertions, 0 deletions
diff --git a/searx/engines/tagesschau.py b/searx/engines/tagesschau.py
new file mode 100644
index 000000000..4a36747c8
--- /dev/null
+++ b/searx/engines/tagesschau.py
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""ARD: `Tagesschau API`_
+
+The Tagesschau is a news program of the ARD.  Via the `Tagesschau API`_, current
+news and media reports are available in JSON format.  The `Bundesstelle für Open
+Data`_ offers a `OpenAPI`_ portal at bundDEV_ where APIs are documented an can
+be tested.
+
+This SearXNG engine uses the `/api2u/search`_ API.
+
+.. _/api2u/search: http://tagesschau.api.bund.dev/
+.. _bundDEV: https://bund.dev/apis
+.. _Bundesstelle für Open Data: https://github.com/bundesAPI
+.. _Tagesschau API: https://github.com/AndreasFischer1985/tagesschau-api/blob/main/README_en.md
+.. _OpenAPI: https://swagger.io/specification/
+
+"""
+from typing import TYPE_CHECKING
+
+from datetime import datetime
+from urllib.parse import urlencode
+import re
+
+if TYPE_CHECKING:
+    import logging
+
+    logger: logging.Logger
+
+about = {
+    'website': "https://tagesschau.de",
+    'wikidata_id': "Q703907",
+    'official_api_documentation': None,
+    'use_official_api': True,
+    'require_api_key': False,
+    'results': 'JSON',
+    'language': 'de',
+}
+categories = ['general', 'news']
+paging = True
+
+results_per_page = 10
+base_url = "https://www.tagesschau.de"
+
+
+def request(query, params):
+    args = {
+        'searchText': query,
+        'pageSize': results_per_page,
+        'resultPage': params['pageno'] - 1,
+    }
+
+    params['url'] = f"{base_url}/api2u/search?{urlencode(args)}"
+
+    return params
+
+
+def response(resp):
+    results = []
+
+    json = resp.json()
+
+    for item in json['searchResults']:
+        item_type = item.get('type')
+        if item_type in ('story', 'webview'):
+            results.append(_story(item))
+        elif item_type == 'video':
+            results.append(_video(item))
+        else:
+            logger.error("unknow result type: %s", item_type)
+
+    return results
+
+
+def _story(item):
+    return {
+        'title': item['title'],
+        'thumbnail': item.get('teaserImage', {}).get('imageVariants', {}).get('16x9-256'),
+        'publishedDate': datetime.strptime(item['date'][:19], '%Y-%m-%dT%H:%M:%S'),
+        'content': item['firstSentence'],
+        'url': item['shareURL'],
+    }
+
+
+def _video(item):
+    video_url = item['streams']['h264s']
+    title = item['title']
+
+    if "_vapp.mxf" in title:
+        title = title.replace("_vapp.mxf", "")
+        title = re.sub(r"APP\d+ (FC-)?", "", title, count=1)
+
+    return {
+        'template': 'videos.html',
+        'title': title,
+        'thumbnail': item.get('teaserImage', {}).get('imageVariants', {}).get('16x9-256'),
+        'publishedDate': datetime.strptime(item['date'][:19], '%Y-%m-%dT%H:%M:%S'),
+        'content': item.get('firstSentence', ''),
+        'iframe_src': video_url,
+        'url': video_url,
+    }
author	Bnyro <bnyro@tutanota.com>	2023-08-08 10:54:48 +0200
committer	Markus Heiser <markus.heiser@darmarIT.de>	2023-08-10 20:27:54 +0200
commit	e25d1c7288417a5a3773e0b592900156b246234b (patch)
tree	6a36c7bcf30ff89b128a4678b73d0fc74c7d2556 /searx/engines
parent	2256ba2ffbf625b03432c54352e5e05f6eeb9150 (diff)
download	searxng-e25d1c7288417a5a3773e0b592900156b246234b.tar.gz searxng-e25d1c7288417a5a3773e0b592900156b246234b.zip