summaryrefslogtreecommitdiff
path: root/searx/engines/crossref.py
diff options
context:
space:
mode:
authorAlexandre FLAMENT <alexandre.flament@hesge.ch>2022-08-26 16:10:12 +0000
committerAlexandre Flament <alex@al-f.net>2022-09-23 20:45:58 +0200
commite36f85b8365e5d6a9263dd78242a10a305a9000c (patch)
treefce8f3e33d26847b004c20c378fb3fa35ca2c8eb /searx/engines/crossref.py
parent593026ad9cd024fd7b3182d48f274aa41b374c74 (diff)
downloadsearxng-e36f85b8365e5d6a9263dd78242a10a305a9000c.tar.gz
searxng-e36f85b8365e5d6a9263dd78242a10a305a9000c.zip
Science category: update the engines
* use the paper.html template * fetch more data from the engines * add crossref.py
Diffstat (limited to 'searx/engines/crossref.py')
-rw-r--r--searx/engines/crossref.py59
1 files changed, 59 insertions, 0 deletions
diff --git a/searx/engines/crossref.py b/searx/engines/crossref.py
new file mode 100644
index 000000000..d61318146
--- /dev/null
+++ b/searx/engines/crossref.py
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""Semantic Scholar (Science)
+"""
+
+from urllib.parse import urlencode
+from searx.utils import html_to_text
+
+about = {
+ "website": 'https://www.crossref.org/',
+ "wikidata_id": 'Q5188229',
+ "official_api_documentation": 'https://github.com/CrossRef/rest-api-doc',
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'JSON',
+}
+
+categories = ['science', 'scientific publications']
+paging = True
+search_url = 'https://api.crossref.org/works'
+
+
+def request(query, params):
+ params['url'] = search_url + '?' + urlencode(dict(query=query, offset=20 * (params['pageno'] - 1)))
+ return params
+
+
+def response(resp):
+ res = resp.json()
+ results = []
+ for record in res['message']['items']:
+ record_type = record['type']
+ if record_type == 'book-chapter':
+ title = record['container-title'][0]
+ if record['title'][0].lower().strip() != title.lower().strip():
+ title = title + ' (' + record['title'][0] + ')'
+ journal = None
+ else:
+ title = record['title'][0]
+ journal = record.get('container-title', [None])[0]
+ url = record.get('resource', {}).get('primary', {}).get('URL') or record['URL']
+ authors = [author.get('given', '') + ' ' + author.get('family', '') for author in record.get('author', [])]
+ isbn = record.get('isbn') or [i['value'] for i in record.get('isbn-type', [])]
+ results.append(
+ {
+ 'template': 'paper.html',
+ 'url': url,
+ 'title': title,
+ 'journal': journal,
+ 'volume': record.get('volume'),
+ 'type': record['type'],
+ 'content': html_to_text(record.get('abstract', '')),
+ 'publisher': record.get('publisher'),
+ 'authors': authors,
+ 'doi': record['DOI'],
+ 'isbn': isbn,
+ }
+ )
+ return results