diff options
author | Bnyro <bnyro@tutanota.com> | 2023-08-08 10:54:48 +0200 |
---|---|---|
committer | Markus Heiser <markus.heiser@darmarIT.de> | 2023-08-10 20:27:54 +0200 |
commit | e25d1c7288417a5a3773e0b592900156b246234b (patch) | |
tree | 6a36c7bcf30ff89b128a4678b73d0fc74c7d2556 /searx/engines | |
parent | 2256ba2ffbf625b03432c54352e5e05f6eeb9150 (diff) | |
download | searxng-e25d1c7288417a5a3773e0b592900156b246234b.tar.gz searxng-e25d1c7288417a5a3773e0b592900156b246234b.zip |
[feat] engine: implementation of German news, Tagesschau
Co-authored-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/engines')
-rw-r--r-- | searx/engines/tagesschau.py | 101 |
1 files changed, 101 insertions, 0 deletions
diff --git a/searx/engines/tagesschau.py b/searx/engines/tagesschau.py new file mode 100644 index 000000000..4a36747c8 --- /dev/null +++ b/searx/engines/tagesschau.py @@ -0,0 +1,101 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""ARD: `Tagesschau API`_ + +The Tagesschau is a news program of the ARD. Via the `Tagesschau API`_, current +news and media reports are available in JSON format. The `Bundesstelle für Open +Data`_ offers a `OpenAPI`_ portal at bundDEV_ where APIs are documented an can +be tested. + +This SearXNG engine uses the `/api2u/search`_ API. + +.. _/api2u/search: http://tagesschau.api.bund.dev/ +.. _bundDEV: https://bund.dev/apis +.. _Bundesstelle für Open Data: https://github.com/bundesAPI +.. _Tagesschau API: https://github.com/AndreasFischer1985/tagesschau-api/blob/main/README_en.md +.. _OpenAPI: https://swagger.io/specification/ + +""" +from typing import TYPE_CHECKING + +from datetime import datetime +from urllib.parse import urlencode +import re + +if TYPE_CHECKING: + import logging + + logger: logging.Logger + +about = { + 'website': "https://tagesschau.de", + 'wikidata_id': "Q703907", + 'official_api_documentation': None, + 'use_official_api': True, + 'require_api_key': False, + 'results': 'JSON', + 'language': 'de', +} +categories = ['general', 'news'] +paging = True + +results_per_page = 10 +base_url = "https://www.tagesschau.de" + + +def request(query, params): + args = { + 'searchText': query, + 'pageSize': results_per_page, + 'resultPage': params['pageno'] - 1, + } + + params['url'] = f"{base_url}/api2u/search?{urlencode(args)}" + + return params + + +def response(resp): + results = [] + + json = resp.json() + + for item in json['searchResults']: + item_type = item.get('type') + if item_type in ('story', 'webview'): + results.append(_story(item)) + elif item_type == 'video': + results.append(_video(item)) + else: + logger.error("unknow result type: %s", item_type) + + return results + + +def _story(item): + return { + 'title': item['title'], + 'thumbnail': item.get('teaserImage', {}).get('imageVariants', {}).get('16x9-256'), + 'publishedDate': datetime.strptime(item['date'][:19], '%Y-%m-%dT%H:%M:%S'), + 'content': item['firstSentence'], + 'url': item['shareURL'], + } + + +def _video(item): + video_url = item['streams']['h264s'] + title = item['title'] + + if "_vapp.mxf" in title: + title = title.replace("_vapp.mxf", "") + title = re.sub(r"APP\d+ (FC-)?", "", title, count=1) + + return { + 'template': 'videos.html', + 'title': title, + 'thumbnail': item.get('teaserImage', {}).get('imageVariants', {}).get('16x9-256'), + 'publishedDate': datetime.strptime(item['date'][:19], '%Y-%m-%dT%H:%M:%S'), + 'content': item.get('firstSentence', ''), + 'iframe_src': video_url, + 'url': video_url, + } |