diff options
author | Bnyro <bnyro@tutanota.com> | 2024-10-02 18:39:21 +0200 |
---|---|---|
committer | Markus Heiser <markus.heiser@darmarIT.de> | 2024-10-15 13:06:00 +0200 |
commit | 9f48d5f84fbc7c48ebe9513cebf0d6e02049ca60 (patch) | |
tree | 54dcdb5340c604facd4eaa46f8d2b23209414e08 /searx/engines/openlibrary.py | |
parent | 3e87354f0ea62d5e2cf22bba136f0e5fdf71cb81 (diff) | |
download | searxng-9f48d5f84fbc7c48ebe9513cebf0d6e02049ca60.tar.gz searxng-9f48d5f84fbc7c48ebe9513cebf0d6e02049ca60.zip |
[feat] engine: support for openlibrary
Diffstat (limited to 'searx/engines/openlibrary.py')
-rw-r--r-- | searx/engines/openlibrary.py | 71 |
1 files changed, 71 insertions, 0 deletions
diff --git a/searx/engines/openlibrary.py b/searx/engines/openlibrary.py new file mode 100644 index 000000000..cc1f53541 --- /dev/null +++ b/searx/engines/openlibrary.py @@ -0,0 +1,71 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""Open library (books) +""" +from urllib.parse import urlencode +import re + +from dateutil import parser + +about = { + 'website': 'https://openlibrary.org', + 'wikidata_id': 'Q1201876', + 'require_api_key': False, + 'use_official_api': False, + 'official_api_documentation': 'https://openlibrary.org/developers/api', +} + +paging = True +categories = [] + +base_url = "https://openlibrary.org" +results_per_page = 10 + + +def request(query, params): + args = { + 'q': query, + 'page': params['pageno'], + 'limit': results_per_page, + } + params['url'] = f"{base_url}/search.json?{urlencode(args)}" + return params + + +def _parse_date(date): + try: + return parser.parse(date) + except parser.ParserError: + return None + + +def response(resp): + results = [] + + for item in resp.json().get("docs", []): + cover = None + if 'lending_identifier_s' in item: + cover = f"https://archive.org/services/img/{item['lending_identifier_s']}" + + published = item.get('publish_date') + if published: + published_dates = [date for date in map(_parse_date, published) if date] + if published_dates: + published = min(published_dates) + + if not published: + published = parser.parse(str(item.get('first_published_year'))) + + result = { + 'template': 'paper.html', + 'url': f"{base_url}{item['key']}", + 'title': item['title'], + 'content': re.sub(r"\{|\}", "", item['first_sentence'][0]) if item.get('first_sentence') else '', + 'isbn': item.get('isbn', [])[:5], + 'authors': item.get('author_name', []), + 'thumbnail': cover, + 'publishedDate': published, + 'tags': item.get('subject', [])[:10] + item.get('place', [])[:10], + } + results.append(result) + + return results |