summaryrefslogtreecommitdiff
path: root/searx/engines
diff options
context:
space:
mode:
authorMohamed Elashri <muhammadelashri@gmail.com>2022-09-30 23:06:54 +0000
committerGitHub <noreply@github.com>2022-09-30 23:06:54 +0000
commit8d5653e60d5299979c0de5e55b1c5ca0bee8190c (patch)
tree8dc02b7663a5c9c91b09483e4499a612d9823698 /searx/engines
parent212c98c9f55dc602f57b4f01a73192450e9782b7 (diff)
parent62324655ff0d2e6f234b3e31413877b4b4a7a9fa (diff)
downloadsearxng-8d5653e60d5299979c0de5e55b1c5ca0bee8190c.tar.gz
searxng-8d5653e60d5299979c0de5e55b1c5ca0bee8190c.zip
Merge branch 'searxng:master' into master
Diffstat (limited to 'searx/engines')
-rw-r--r--searx/engines/__init__.py4
-rw-r--r--searx/engines/core.py5
-rw-r--r--searx/engines/duckduckgo_definitions.py2
-rw-r--r--searx/engines/github.py2
-rw-r--r--searx/engines/google.py6
-rw-r--r--searx/engines/google_news.py2
-rw-r--r--searx/engines/google_videos.py2
-rw-r--r--searx/engines/photon.py2
-rw-r--r--searx/engines/springer.py21
-rw-r--r--searx/engines/startpage.py2
-rw-r--r--searx/engines/wikidata.py4
-rw-r--r--searx/engines/wolframalpha_api.py2
-rw-r--r--searx/engines/xpath.py4
13 files changed, 31 insertions, 27 deletions
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index 57b090add..c61f50d4b 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -275,12 +275,12 @@ def is_engine_active(engine: Engine):
def register_engine(engine: Engine):
if engine.name in engines:
- logger.error('Engine config error: ambigious name: {0}'.format(engine.name))
+ logger.error('Engine config error: ambiguous name: {0}'.format(engine.name))
sys.exit(1)
engines[engine.name] = engine
if engine.shortcut in engine_shortcuts:
- logger.error('Engine config error: ambigious shortcut: {0}'.format(engine.shortcut))
+ logger.error('Engine config error: ambiguous shortcut: {0}'.format(engine.shortcut))
sys.exit(1)
engine_shortcuts[engine.shortcut] = engine.name
diff --git a/searx/engines/core.py b/searx/engines/core.py
index 2a71a216c..2fa66e226 100644
--- a/searx/engines/core.py
+++ b/searx/engines/core.py
@@ -77,6 +77,7 @@ def response(resp):
if url is None:
continue
+ publishedDate = None
time = source['publishedDate'] or source['depositedDate']
if time:
publishedDate = datetime.fromtimestamp(time / 1000)
@@ -106,8 +107,8 @@ def response(resp):
# 'pages' : '',
# 'number': '',
'doi': source['doi'],
- 'issn': source['issn'],
- 'isbn': source.get('isbn'), # exists in the rawRecordXml
+ 'issn': [x for x in [source.get('issn')] if x],
+ 'isbn': [x for x in [source.get('isbn')] if x], # exists in the rawRecordXml
'pdf_url': source.get('repositoryDocument', {}).get('pdfOrigin'),
}
)
diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py
index a73ee55ff..7ed0de35c 100644
--- a/searx/engines/duckduckgo_definitions.py
+++ b/searx/engines/duckduckgo_definitions.py
@@ -79,7 +79,7 @@ def response(resp):
# * book / performing art / film / television / media franchise / concert tour / playwright
# * prepared food
# * website / software / os / programming language / file format / software engineer
- # * compagny
+ # * company
content = ''
heading = search_res.get('Heading', '')
diff --git a/searx/engines/github.py b/searx/engines/github.py
index 343f3793d..3180418ef 100644
--- a/searx/engines/github.py
+++ b/searx/engines/github.py
@@ -40,7 +40,7 @@ def response(resp):
search_res = loads(resp.text)
- # check if items are recieved
+ # check if items are received
if 'items' not in search_res:
return []
diff --git a/searx/engines/google.py b/searx/engines/google.py
index 9cb936ccf..2f894b21f 100644
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -326,14 +326,14 @@ def response(resp):
# google *sections*
if extract_text(eval_xpath(result, g_section_with_header)):
- logger.debug("ingoring <g-section-with-header>")
+ logger.debug("ignoring <g-section-with-header>")
continue
try:
title_tag = eval_xpath_getindex(result, title_xpath, 0, default=None)
if title_tag is None:
# this not one of the common google results *section*
- logger.debug('ingoring item from the result_xpath list: missing title')
+ logger.debug('ignoring item from the result_xpath list: missing title')
continue
title = extract_text(title_tag)
url = eval_xpath_getindex(result, href_xpath, 0, None)
@@ -341,7 +341,7 @@ def response(resp):
continue
content = extract_text(eval_xpath_getindex(result, content_xpath, 0, default=None), allow_none=True)
if content is None:
- logger.debug('ingoring item from the result_xpath list: missing content of title "%s"', title)
+ logger.debug('ignoring item from the result_xpath list: missing content of title "%s"', title)
continue
logger.debug('add link to results: %s', title)
diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py
index 87867d65a..1ada2d64d 100644
--- a/searx/engines/google_news.py
+++ b/searx/engines/google_news.py
@@ -141,7 +141,7 @@ def response(resp):
padding = (4 - (len(jslog) % 4)) * "="
jslog = b64decode(jslog + padding)
except binascii.Error:
- # URL cant be read, skip this result
+ # URL can't be read, skip this result
continue
# now we have : b'[null, ... null,"https://www.cnn.com/.../index.html"]'
diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py
index 26dbcdd3c..fc574bd48 100644
--- a/searx/engines/google_videos.py
+++ b/searx/engines/google_videos.py
@@ -150,7 +150,7 @@ def response(resp):
# ignore google *sections*
if extract_text(eval_xpath(result, g_section_with_header)):
- logger.debug("ingoring <g-section-with-header>")
+ logger.debug("ignoring <g-section-with-header>")
continue
# ingnore articles without an image id / e.g. news articles
diff --git a/searx/engines/photon.py b/searx/engines/photon.py
index 16ea88194..2ea393679 100644
--- a/searx/engines/photon.py
+++ b/searx/engines/photon.py
@@ -70,7 +70,7 @@ def response(resp):
elif properties.get('osm_type') == 'R':
osm_type = 'relation'
else:
- # continue if invalide osm-type
+ # continue if invalid osm-type
continue
url = result_base_url.format(osm_type=osm_type, osm_id=properties.get('osm_id'))
diff --git a/searx/engines/springer.py b/searx/engines/springer.py
index e5255b794..a4d0832d8 100644
--- a/searx/engines/springer.py
+++ b/searx/engines/springer.py
@@ -41,7 +41,6 @@ def response(resp):
json_data = loads(resp.text)
for record in json_data['records']:
- content = record['abstract']
published = datetime.strptime(record['publicationDate'], '%Y-%m-%d')
authors = [" ".join(author['creator'].split(', ')[::-1]) for author in record['creators']]
tags = record.get('genre')
@@ -50,20 +49,24 @@ def response(resp):
results.append(
{
'template': 'paper.html',
- 'title': record['title'],
'url': record['url'][0]['value'].replace('http://', 'https://', 1),
- 'type': record.get('contentType'),
- 'content': content,
+ 'title': record['title'],
+ 'content': record['abstract'],
+ 'comments': record['publicationName'],
+ 'tags': tags,
'publishedDate': published,
+ 'type': record.get('contentType'),
'authors': authors,
- 'doi': record.get('doi'),
+ # 'editor': '',
+ 'publisher': record.get('publisher'),
'journal': record.get('publicationName'),
- 'pages': record.get('start_page') + '-' + record.get('end_page'),
- 'tags': tags,
- 'issn': [record.get('issn')],
- 'isbn': [record.get('isbn')],
'volume': record.get('volume') or None,
+ 'pages': '-'.join([x for x in [record.get('startingPage'), record.get('endingPage')] if x]),
'number': record.get('number') or None,
+ 'doi': record.get('doi'),
+ 'issn': [x for x in [record.get('issn')] if x],
+ 'isbn': [x for x in [record.get('isbn')] if x],
+ # 'pdf_url' : ''
}
)
return results
diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py
index 7b87808b9..087267bb7 100644
--- a/searx/engines/startpage.py
+++ b/searx/engines/startpage.py
@@ -209,7 +209,7 @@ def _fetch_supported_languages(resp):
# native name, the English name of the writing script used by the language,
# or occasionally something else entirely.
- # this cases are so special they need to be hardcoded, a couple of them are mispellings
+ # this cases are so special they need to be hardcoded, a couple of them are misspellings
language_names = {
'english_uk': 'en-GB',
'fantizhengwen': ['zh-TW', 'zh-HK'],
diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py
index d828f4be8..e0ad2e6c9 100644
--- a/searx/engines/wikidata.py
+++ b/searx/engines/wikidata.py
@@ -50,7 +50,7 @@ WIKIDATA_PROPERTIES = {
# SERVICE wikibase:label: https://en.wikibooks.org/wiki/SPARQL/SERVICE_-_Label#Manual_Label_SERVICE
# https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates
# https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format#Data_model
-# optmization:
+# optimization:
# * https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/query_optimization
# * https://github.com/blazegraph/database/wiki/QueryHints
QUERY_TEMPLATE = """
@@ -386,7 +386,7 @@ def get_attributes(language):
add_amount('P2046') # area
add_amount('P281') # postal code
add_label('P38') # currency
- add_amount('P2048') # heigth (building)
+ add_amount('P2048') # height (building)
# Media
for p in [
diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py
index 4c99c90b5..6a2423b51 100644
--- a/searx/engines/wolframalpha_api.py
+++ b/searx/engines/wolframalpha_api.py
@@ -50,7 +50,7 @@ def request(query, params):
# replace private user area characters to make text legible
def replace_pua_chars(text):
pua_chars = {
- '\uf522': '\u2192', # rigth arrow
+ '\uf522': '\u2192', # right arrow
'\uf7b1': '\u2115', # set of natural numbers
'\uf7b4': '\u211a', # set of rational numbers
'\uf7b5': '\u211d', # set of real numbers
diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py
index f9528e92d..2dc22028f 100644
--- a/searx/engines/xpath.py
+++ b/searx/engines/xpath.py
@@ -53,7 +53,7 @@ Replacements are:
0: none, 1: moderate, 2:strict
- If not supported, the URL paramter is an empty string.
+ If not supported, the URL parameter is an empty string.
"""
@@ -114,7 +114,7 @@ time_range_support = False
time_range_url = '&hours={time_range_val}'
'''Time range URL parameter in the in :py:obj:`search_url`. If no time range is
-requested by the user, the URL paramter is an empty string. The
+requested by the user, the URL parameter is an empty string. The
``{time_range_val}`` replacement is taken from the :py:obj:`time_range_map`.
.. code:: yaml