summaryrefslogtreecommitdiff
path: root/searx/engines/semantic_scholar.py
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2022-03-04 22:00:59 +0100
committerMarkus Heiser <markus.heiser@darmarit.de>2022-03-05 11:53:41 +0100
commit20f4538e1369cb4933c579dceccc56439b9bf7a6 (patch)
tree43fce7b6cef0ea9c3d398cbaa5204880d78c8ad5 /searx/engines/semantic_scholar.py
parent927aa71133839ce5713b742b43285953301adf36 (diff)
downloadsearxng-20f4538e1369cb4933c579dceccc56439b9bf7a6.tar.gz
searxng-20f4538e1369cb4933c579dceccc56439b9bf7a6.zip
[fix] engine: Semantic Scholar (Science) // rework & fix
Closes: https://github.com/searxng/searxng/issues/939 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/engines/semantic_scholar.py')
-rw-r--r--searx/engines/semantic_scholar.py53
1 files changed, 44 insertions, 9 deletions
diff --git a/searx/engines/semantic_scholar.py b/searx/engines/semantic_scholar.py
index 5d9d1a8e9..bda731047 100644
--- a/searx/engines/semantic_scholar.py
+++ b/searx/engines/semantic_scholar.py
@@ -1,12 +1,23 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
-"""
- Semantic Scholar (Science)
+# lint: pylint
+"""Semantic Scholar (Science)
"""
from json import dumps, loads
+from datetime import datetime
+about = {
+ "website": 'https://www.semanticscholar.org/',
+ "wikidata_id": 'Q22908627',
+ "official_api_documentation": 'https://api.semanticscholar.org/',
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+}
+paging = True
search_url = 'https://www.semanticscholar.org/api/1/search'
+paper_url = 'https://www.semanticscholar.org/paper'
def request(query, params):
@@ -34,13 +45,37 @@ def request(query, params):
def response(resp):
res = loads(resp.text)
results = []
+
for result in res['results']:
- results.append(
- {
- 'url': result['primaryPaperLink']['url'],
- 'title': result['title']['text'],
- 'content': result['paperAbstractTruncated'],
- }
- )
+ item = {}
+ metadata = []
+
+ url = result.get('primaryPaperLink', {}).get('url')
+ if not url and result.get('links'):
+ url = result.get('links')[0]
+ if not url:
+ alternatePaperLinks = result.get('alternatePaperLinks')
+ if alternatePaperLinks:
+ url = alternatePaperLinks[0].get('url')
+ if not url:
+ url = paper_url + '/%s' % result['id']
+
+ item['url'] = url
+
+ item['title'] = result['title']['text']
+ item['content'] = result['paperAbstract']['text']
+
+ metadata = result.get('fieldsOfStudy') or []
+ venue = result.get('venue', {}).get('text')
+ if venue:
+ metadata.append(venue)
+ if metadata:
+ item['metadata'] = ', '.join(metadata)
+
+ pubDate = result.get('pubDate')
+ if pubDate:
+ item['publishedDate'] = datetime.strptime(pubDate, "%Y-%m-%d")
+
+ results.append(item)
return results