diff options
author | Alexandre FLAMENT <alexandre.flament@hesge.ch> | 2022-08-26 16:10:12 +0000 |
---|---|---|
committer | Alexandre Flament <alex@al-f.net> | 2022-09-23 20:45:58 +0200 |
commit | e36f85b8365e5d6a9263dd78242a10a305a9000c (patch) | |
tree | fce8f3e33d26847b004c20c378fb3fa35ca2c8eb /searx/engines/crossref.py | |
parent | 593026ad9cd024fd7b3182d48f274aa41b374c74 (diff) | |
download | searxng-e36f85b8365e5d6a9263dd78242a10a305a9000c.tar.gz searxng-e36f85b8365e5d6a9263dd78242a10a305a9000c.zip |
Science category: update the engines
* use the paper.html template
* fetch more data from the engines
* add crossref.py
Diffstat (limited to 'searx/engines/crossref.py')
-rw-r--r-- | searx/engines/crossref.py | 59 |
1 files changed, 59 insertions, 0 deletions
diff --git a/searx/engines/crossref.py b/searx/engines/crossref.py new file mode 100644 index 000000000..d61318146 --- /dev/null +++ b/searx/engines/crossref.py @@ -0,0 +1,59 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Semantic Scholar (Science) +""" + +from urllib.parse import urlencode +from searx.utils import html_to_text + +about = { + "website": 'https://www.crossref.org/', + "wikidata_id": 'Q5188229', + "official_api_documentation": 'https://github.com/CrossRef/rest-api-doc', + "use_official_api": False, + "require_api_key": False, + "results": 'JSON', +} + +categories = ['science', 'scientific publications'] +paging = True +search_url = 'https://api.crossref.org/works' + + +def request(query, params): + params['url'] = search_url + '?' + urlencode(dict(query=query, offset=20 * (params['pageno'] - 1))) + return params + + +def response(resp): + res = resp.json() + results = [] + for record in res['message']['items']: + record_type = record['type'] + if record_type == 'book-chapter': + title = record['container-title'][0] + if record['title'][0].lower().strip() != title.lower().strip(): + title = title + ' (' + record['title'][0] + ')' + journal = None + else: + title = record['title'][0] + journal = record.get('container-title', [None])[0] + url = record.get('resource', {}).get('primary', {}).get('URL') or record['URL'] + authors = [author.get('given', '') + ' ' + author.get('family', '') for author in record.get('author', [])] + isbn = record.get('isbn') or [i['value'] for i in record.get('isbn-type', [])] + results.append( + { + 'template': 'paper.html', + 'url': url, + 'title': title, + 'journal': journal, + 'volume': record.get('volume'), + 'type': record['type'], + 'content': html_to_text(record.get('abstract', '')), + 'publisher': record.get('publisher'), + 'authors': authors, + 'doi': record['DOI'], + 'isbn': isbn, + } + ) + return results |