summaryrefslogtreecommitdiff
path: root/searx
diff options
context:
space:
mode:
authorMarkus Heiser <markus@darmarit.de>2021-04-04 12:48:24 +0200
committerMarkus Heiser <markus@darmarit.de>2021-04-24 09:00:53 +0200
commit8efabd3ab75085402dc40911425b6db6e1bbdab3 (patch)
treee6f7aecf7d9f9d2f5f5f87d9079d10fa30249065 /searx
parent7528e38c8a412d9f56eba05b34d2f24f3471f21d (diff)
downloadsearxng-8efabd3ab75085402dc40911425b6db6e1bbdab3.tar.gz
searxng-8efabd3ab75085402dc40911425b6db6e1bbdab3.zip
[mod] core.ac.uk engine
- add to list of pylint scripts - add debug log messages - move API key int `settings.yml` - improved readability - add some metadata to results Signed-off-by: Markus Heiser <markus@darmarit.de>
Diffstat (limited to 'searx')
-rw-r--r--searx/engines/core.py67
1 files changed, 43 insertions, 24 deletions
diff --git a/searx/engines/core.py b/searx/engines/core.py
index 99b4b524b..3a1147f35 100644
--- a/searx/engines/core.py
+++ b/searx/engines/core.py
@@ -1,14 +1,18 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
-"""
-
-Core Engine (science)
+"""CORE (science)
"""
+# pylint: disable=missing-function-docstring
from json import loads
from datetime import datetime
from urllib.parse import urlencode
+from searx import logger
+from searx.exceptions import SearxEngineAPIException
+
+logger = logger.getChild('CORE engine')
+
about = {
"website": 'https://core.ac.uk',
"wikidata_id": 'Q22661180',
@@ -19,45 +23,60 @@ about = {
}
categories = ['science']
-
paging = True
-nb_per_page = 20
+nb_per_page = 10
+api_key = 'unset'
-# apikey = ''
-apikey = 'MVBozuTX8QF9I1D0GviL5bCn2Ueat6NS'
-
+logger = logger.getChild('CORE engine')
base_url = 'https://core.ac.uk:443/api-v2/search/'
search_string = '{query}?page={page}&pageSize={nb_per_page}&apiKey={apikey}'
-
def request(query, params):
- search_path = search_string.format(
- query=urlencode({'q': query}),
- nb_per_page=nb_per_page,
- page=params['pageno'],
- apikey=apikey)
+ if api_key == 'unset':
+ raise SearxEngineAPIException('missing CORE API key')
+ search_path = search_string.format(
+ query = urlencode({'q': query}),
+ nb_per_page = nb_per_page,
+ page = params['pageno'],
+ apikey = api_key,
+ )
params['url'] = base_url + search_path
- return params
+ logger.debug("query_url --> %s", params['url'])
+ return params
def response(resp):
results = []
-
json_data = loads(resp.text)
+
for result in json_data['data']:
- time = result['_source']['publishedDate']
- if time is None:
- date = datetime.now()
- else:
+
+ source = result['_source']
+ time = source['publishedDate'] or source['depositedDate']
+ if time :
date = datetime.fromtimestamp(time / 1000)
+ else:
+ date = None
+
+ metadata = []
+ if source['publisher'] and len(source['publisher']) > 3:
+ metadata.append(source['publisher'])
+ if source['topics']:
+ metadata.append(source['topics'][0])
+ if source['doi']:
+ metadata.append(source['doi'])
+ metadata = ' / '.join(metadata)
+
results.append({
- 'url': result['_source']['urls'][0],
- 'title': result['_source']['title'],
- 'content': result['_source']['description'],
- 'publishedDate': date})
+ 'url': source['urls'][0].replace('http://', 'https://', 1),
+ 'title': source['title'],
+ 'content': source['description'],
+ 'publishedDate': date,
+ 'metadata' : metadata,
+ })
return results