[fix] engine: Semantic Scholar (Science) // rework & fix

Closes: https://github.com/searxng/searxng/issues/939 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
author: Markus Heiser <markus.heiser@darmarit.de> 2022-03-04 22:00:59 +0100
committer: Markus Heiser <markus.heiser@darmarit.de> 2022-03-05 11:53:41 +0100
commit: 20f4538e1369cb4933c579dceccc56439b9bf7a6 (patch)
tree: 43fce7b6cef0ea9c3d398cbaa5204880d78c8ad5 /searx/engines/semantic_scholar.py
parent: 927aa71133839ce5713b742b43285953301adf36 (diff)
download: searxng-20f4538e1369cb4933c579dceccc56439b9bf7a6.tar.gz
searxng-20f4538e1369cb4933c579dceccc56439b9bf7a6.zip
1 files changed, 44 insertions, 9 deletions
diff --git a/searx/engines/semantic_scholar.py b/searx/engines/semantic_scholar.py
index 5d9d1a8e9..bda731047 100644
--- a/searx/engines/semantic_scholar.py
+++ b/searx/engines/semantic_scholar.py
@@ -1,12 +1,23 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
-"""
- Semantic Scholar (Science)
+# lint: pylint
+"""Semantic Scholar (Science)
 """
 
 from json import dumps, loads
+from datetime import datetime
 
+about = {
+    "website": 'https://www.semanticscholar.org/',
+    "wikidata_id": 'Q22908627',
+    "official_api_documentation": 'https://api.semanticscholar.org/',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
 
+paging = True
 search_url = 'https://www.semanticscholar.org/api/1/search'
+paper_url = 'https://www.semanticscholar.org/paper'
 
 
 def request(query, params):
@@ -34,13 +45,37 @@ def request(query, params):
 def response(resp):
     res = loads(resp.text)
     results = []
+
     for result in res['results']:
-        results.append(
-            {
-                'url': result['primaryPaperLink']['url'],
-                'title': result['title']['text'],
-                'content': result['paperAbstractTruncated'],
-            }
-        )
+        item = {}
+        metadata = []
+
+        url = result.get('primaryPaperLink', {}).get('url')
+        if not url and result.get('links'):
+            url = result.get('links')[0]
+        if not url:
+            alternatePaperLinks = result.get('alternatePaperLinks')
+            if alternatePaperLinks:
+                url = alternatePaperLinks[0].get('url')
+        if not url:
+            url = paper_url + '/%s' % result['id']
+
+        item['url'] = url
+
+        item['title'] = result['title']['text']
+        item['content'] = result['paperAbstract']['text']
+
+        metadata = result.get('fieldsOfStudy') or []
+        venue = result.get('venue', {}).get('text')
+        if venue:
+            metadata.append(venue)
+        if metadata:
+            item['metadata'] = ', '.join(metadata)
+
+        pubDate = result.get('pubDate')
+        if pubDate:
+            item['publishedDate'] = datetime.strptime(pubDate, "%Y-%m-%d")
+
+        results.append(item)
 
     return results
author	Markus Heiser <markus.heiser@darmarit.de>	2022-03-04 22:00:59 +0100
committer	Markus Heiser <markus.heiser@darmarit.de>	2022-03-05 11:53:41 +0100
commit	20f4538e1369cb4933c579dceccc56439b9bf7a6 (patch)
tree	43fce7b6cef0ea9c3d398cbaa5204880d78c8ad5 /searx/engines/semantic_scholar.py
parent	927aa71133839ce5713b742b43285953301adf36 (diff)
download	searxng-20f4538e1369cb4933c579dceccc56439b9bf7a6.tar.gz searxng-20f4538e1369cb4933c579dceccc56439b9bf7a6.zip