diff options
Diffstat (limited to 'searx/engines/semantic_scholar.py')
-rw-r--r-- | searx/engines/semantic_scholar.py | 57 |
1 files changed, 41 insertions, 16 deletions
diff --git a/searx/engines/semantic_scholar.py b/searx/engines/semantic_scholar.py index bda731047..b2701c333 100644 --- a/searx/engines/semantic_scholar.py +++ b/searx/engines/semantic_scholar.py @@ -6,6 +6,8 @@ from json import dumps, loads from datetime import datetime +from flask_babel import gettext + about = { "website": 'https://www.semanticscholar.org/', "wikidata_id": 'Q22908627', @@ -15,6 +17,7 @@ about = { "results": 'JSON', } +categories = ['science', 'scientific publications'] paging = True search_url = 'https://www.semanticscholar.org/api/1/search' paper_url = 'https://www.semanticscholar.org/paper' @@ -47,9 +50,6 @@ def response(resp): results = [] for result in res['results']: - item = {} - metadata = [] - url = result.get('primaryPaperLink', {}).get('url') if not url and result.get('links'): url = result.get('links')[0] @@ -60,22 +60,47 @@ def response(resp): if not url: url = paper_url + '/%s' % result['id'] - item['url'] = url + # publishedDate + if 'pubDate' in result: + publishedDate = datetime.strptime(result['pubDate'], "%Y-%m-%d") + else: + publishedDate = None - item['title'] = result['title']['text'] - item['content'] = result['paperAbstract']['text'] + # authors + authors = [author[0]['name'] for author in result.get('authors', [])] - metadata = result.get('fieldsOfStudy') or [] - venue = result.get('venue', {}).get('text') - if venue: - metadata.append(venue) - if metadata: - item['metadata'] = ', '.join(metadata) + # pick for the first alternate link, but not from the crawler + pdf_url = None + for doc in result.get('alternatePaperLinks', []): + if doc['linkType'] != 'crawler': + pdf_url = doc['url'] + break - pubDate = result.get('pubDate') - if pubDate: - item['publishedDate'] = datetime.strptime(pubDate, "%Y-%m-%d") + # comments + comments = None + if 'citationStats' in result: + comments = gettext( + '{numCitations} citations from the year {firstCitationVelocityYear} to {lastCitationVelocityYear}' + ).format( + numCitations=result['citationStats']['numCitations'], + firstCitationVelocityYear=result['citationStats']['firstCitationVelocityYear'], + lastCitationVelocityYear=result['citationStats']['lastCitationVelocityYear'], + ) - results.append(item) + results.append( + { + 'template': 'paper.html', + 'url': url, + 'title': result['title']['text'], + 'content': result['paperAbstract']['text'], + 'journal': result.get('venue', {}).get('text') or result.get('journal', {}).get('name'), + 'doi': result.get('doiInfo', {}).get('doi'), + 'tags': result.get('fieldsOfStudy'), + 'authors': authors, + 'pdf_url': pdf_url, + 'publishedDate': publishedDate, + 'comments': comments, + } + ) return results |