diff options
author | Markus Heiser <markus.heiser@darmarit.de> | 2022-03-04 22:00:59 +0100 |
---|---|---|
committer | Markus Heiser <markus.heiser@darmarit.de> | 2022-03-05 11:53:41 +0100 |
commit | 20f4538e1369cb4933c579dceccc56439b9bf7a6 (patch) | |
tree | 43fce7b6cef0ea9c3d398cbaa5204880d78c8ad5 /searx/engines/semantic_scholar.py | |
parent | 927aa71133839ce5713b742b43285953301adf36 (diff) | |
download | searxng-20f4538e1369cb4933c579dceccc56439b9bf7a6.tar.gz searxng-20f4538e1369cb4933c579dceccc56439b9bf7a6.zip |
[fix] engine: Semantic Scholar (Science) // rework & fix
Closes: https://github.com/searxng/searxng/issues/939
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/engines/semantic_scholar.py')
-rw-r--r-- | searx/engines/semantic_scholar.py | 53 |
1 files changed, 44 insertions, 9 deletions
diff --git a/searx/engines/semantic_scholar.py b/searx/engines/semantic_scholar.py index 5d9d1a8e9..bda731047 100644 --- a/searx/engines/semantic_scholar.py +++ b/searx/engines/semantic_scholar.py @@ -1,12 +1,23 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -""" - Semantic Scholar (Science) +# lint: pylint +"""Semantic Scholar (Science) """ from json import dumps, loads +from datetime import datetime +about = { + "website": 'https://www.semanticscholar.org/', + "wikidata_id": 'Q22908627', + "official_api_documentation": 'https://api.semanticscholar.org/', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} +paging = True search_url = 'https://www.semanticscholar.org/api/1/search' +paper_url = 'https://www.semanticscholar.org/paper' def request(query, params): @@ -34,13 +45,37 @@ def request(query, params): def response(resp): res = loads(resp.text) results = [] + for result in res['results']: - results.append( - { - 'url': result['primaryPaperLink']['url'], - 'title': result['title']['text'], - 'content': result['paperAbstractTruncated'], - } - ) + item = {} + metadata = [] + + url = result.get('primaryPaperLink', {}).get('url') + if not url and result.get('links'): + url = result.get('links')[0] + if not url: + alternatePaperLinks = result.get('alternatePaperLinks') + if alternatePaperLinks: + url = alternatePaperLinks[0].get('url') + if not url: + url = paper_url + '/%s' % result['id'] + + item['url'] = url + + item['title'] = result['title']['text'] + item['content'] = result['paperAbstract']['text'] + + metadata = result.get('fieldsOfStudy') or [] + venue = result.get('venue', {}).get('text') + if venue: + metadata.append(venue) + if metadata: + item['metadata'] = ', '.join(metadata) + + pubDate = result.get('pubDate') + if pubDate: + item['publishedDate'] = datetime.strptime(pubDate, "%Y-%m-%d") + + results.append(item) return results |