summaryrefslogtreecommitdiff
path: root/searx/engines
diff options
context:
space:
mode:
authorAlexandre Flament <alex@al-f.net>2022-09-23 19:58:14 +0200
committerAlexandre Flament <alex@al-f.net>2022-09-23 20:52:55 +0200
commitd6446be38f3f858c09887a89c8fc490a3c300b95 (patch)
tree1b911cba1b96970455b115d1cc706db52d5613da /searx/engines
parent08b88597052dfdf17e947289d79510fdadad51e3 (diff)
downloadsearxng-d6446be38f3f858c09887a89c8fc490a3c300b95.tar.gz
searxng-d6446be38f3f858c09887a89c8fc490a3c300b95.zip
[mod] science category: various update of about PR 1705
Diffstat (limited to 'searx/engines')
-rw-r--r--searx/engines/crossref.py4
-rw-r--r--searx/engines/semantic_scholar.py3
-rw-r--r--searx/engines/springer.py3
3 files changed, 4 insertions, 6 deletions
diff --git a/searx/engines/crossref.py b/searx/engines/crossref.py
index d61318146..fbe2f0c2a 100644
--- a/searx/engines/crossref.py
+++ b/searx/engines/crossref.py
@@ -33,10 +33,10 @@ def response(resp):
if record_type == 'book-chapter':
title = record['container-title'][0]
if record['title'][0].lower().strip() != title.lower().strip():
- title = title + ' (' + record['title'][0] + ')'
+ title = html_to_text(title) + ' (' + html_to_text(record['title'][0]) + ')'
journal = None
else:
- title = record['title'][0]
+ title = html_to_text(record['title'][0])
journal = record.get('container-title', [None])[0]
url = record.get('resource', {}).get('primary', {}).get('URL') or record['URL']
authors = [author.get('given', '') + ' ' + author.get('family', '') for author in record.get('author', [])]
diff --git a/searx/engines/semantic_scholar.py b/searx/engines/semantic_scholar.py
index b2701c333..7a1b5b231 100644
--- a/searx/engines/semantic_scholar.py
+++ b/searx/engines/semantic_scholar.py
@@ -48,7 +48,6 @@ def request(query, params):
def response(resp):
res = loads(resp.text)
results = []
-
for result in res['results']:
url = result.get('primaryPaperLink', {}).get('url')
if not url and result.get('links'):
@@ -72,7 +71,7 @@ def response(resp):
# pick for the first alternate link, but not from the crawler
pdf_url = None
for doc in result.get('alternatePaperLinks', []):
- if doc['linkType'] != 'crawler':
+ if doc['linkType'] not in ('crawler', 'doi'):
pdf_url = doc['url']
break
diff --git a/searx/engines/springer.py b/searx/engines/springer.py
index 2711fa807..e5255b794 100644
--- a/searx/engines/springer.py
+++ b/searx/engines/springer.py
@@ -58,8 +58,7 @@ def response(resp):
'authors': authors,
'doi': record.get('doi'),
'journal': record.get('publicationName'),
- 'start_page': record.get('start_page'),
- 'end_page': record.get('end_page'),
+ 'pages': record.get('start_page') + '-' + record.get('end_page'),
'tags': tags,
'issn': [record.get('issn')],
'isbn': [record.get('isbn')],