Merge branch 'searxng:master' into master

author: Mohamed Elashri <muhammadelashri@gmail.com> 2022-09-30 23:06:54 +0000
committer: GitHub <noreply@github.com> 2022-09-30 23:06:54 +0000
commit: 8d5653e60d5299979c0de5e55b1c5ca0bee8190c (patch)
tree: 8dc02b7663a5c9c91b09483e4499a612d9823698 /searx/engines
parent: 212c98c9f55dc602f57b4f01a73192450e9782b7 (diff)
parent: 62324655ff0d2e6f234b3e31413877b4b4a7a9fa (diff)
download: searxng-8d5653e60d5299979c0de5e55b1c5ca0bee8190c.tar.gz
searxng-8d5653e60d5299979c0de5e55b1c5ca0bee8190c.zip
13 files changed, 31 insertions, 27 deletions
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index 57b090add..c61f50d4b 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -275,12 +275,12 @@ def is_engine_active(engine: Engine):
 
 def register_engine(engine: Engine):
     if engine.name in engines:
-        logger.error('Engine config error: ambigious name: {0}'.format(engine.name))
+        logger.error('Engine config error: ambiguous name: {0}'.format(engine.name))
         sys.exit(1)
     engines[engine.name] = engine
 
     if engine.shortcut in engine_shortcuts:
-        logger.error('Engine config error: ambigious shortcut: {0}'.format(engine.shortcut))
+        logger.error('Engine config error: ambiguous shortcut: {0}'.format(engine.shortcut))
         sys.exit(1)
     engine_shortcuts[engine.shortcut] = engine.name
 
diff --git a/searx/engines/core.py b/searx/engines/core.py
index 2a71a216c..2fa66e226 100644
--- a/searx/engines/core.py
+++ b/searx/engines/core.py
@@ -77,6 +77,7 @@ def response(resp):
         if url is None:
             continue
 
+        publishedDate = None
         time = source['publishedDate'] or source['depositedDate']
         if time:
             publishedDate = datetime.fromtimestamp(time / 1000)
@@ -106,8 +107,8 @@ def response(resp):
                 # 'pages' : '',
                 # 'number': '',
                 'doi': source['doi'],
-                'issn': source['issn'],
-                'isbn': source.get('isbn'),  # exists in the rawRecordXml
+                'issn': [x for x in [source.get('issn')] if x],
+                'isbn': [x for x in [source.get('isbn')] if x],  # exists in the rawRecordXml
                 'pdf_url': source.get('repositoryDocument', {}).get('pdfOrigin'),
             }
         )
diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py
index a73ee55ff..7ed0de35c 100644
--- a/searx/engines/duckduckgo_definitions.py
+++ b/searx/engines/duckduckgo_definitions.py
@@ -79,7 +79,7 @@ def response(resp):
     # * book / performing art / film / television  / media franchise / concert tour / playwright
     # * prepared food
     # * website / software / os / programming language / file format / software engineer
-    # * compagny
+    # * company
 
     content = ''
     heading = search_res.get('Heading', '')
diff --git a/searx/engines/github.py b/searx/engines/github.py
index 343f3793d..3180418ef 100644
--- a/searx/engines/github.py
+++ b/searx/engines/github.py
@@ -40,7 +40,7 @@ def response(resp):
 
     search_res = loads(resp.text)
 
-    # check if items are recieved
+    # check if items are received
     if 'items' not in search_res:
         return []
 
diff --git a/searx/engines/google.py b/searx/engines/google.py
index 9cb936ccf..2f894b21f 100644
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -326,14 +326,14 @@ def response(resp):
 
         # google *sections*
         if extract_text(eval_xpath(result, g_section_with_header)):
-            logger.debug("ingoring <g-section-with-header>")
+            logger.debug("ignoring <g-section-with-header>")
             continue
 
         try:
             title_tag = eval_xpath_getindex(result, title_xpath, 0, default=None)
             if title_tag is None:
                 # this not one of the common google results *section*
-                logger.debug('ingoring item from the result_xpath list: missing title')
+                logger.debug('ignoring item from the result_xpath list: missing title')
                 continue
             title = extract_text(title_tag)
             url = eval_xpath_getindex(result, href_xpath, 0, None)
@@ -341,7 +341,7 @@ def response(resp):
                 continue
             content = extract_text(eval_xpath_getindex(result, content_xpath, 0, default=None), allow_none=True)
             if content is None:
-                logger.debug('ingoring item from the result_xpath list: missing content of title "%s"', title)
+                logger.debug('ignoring item from the result_xpath list: missing content of title "%s"', title)
                 continue
 
             logger.debug('add link to results: %s', title)
diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py
index 87867d65a..1ada2d64d 100644
--- a/searx/engines/google_news.py
+++ b/searx/engines/google_news.py
@@ -141,7 +141,7 @@ def response(resp):
                 padding = (4 - (len(jslog) % 4)) * "="
                 jslog = b64decode(jslog + padding)
             except binascii.Error:
-                # URL cant be read, skip this result
+                # URL can't be read, skip this result
                 continue
 
             # now we have : b'[null, ... null,"https://www.cnn.com/.../index.html"]'
diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py
index 26dbcdd3c..fc574bd48 100644
--- a/searx/engines/google_videos.py
+++ b/searx/engines/google_videos.py
@@ -150,7 +150,7 @@ def response(resp):
 
         # ignore google *sections*
         if extract_text(eval_xpath(result, g_section_with_header)):
-            logger.debug("ingoring <g-section-with-header>")
+            logger.debug("ignoring <g-section-with-header>")
             continue
 
         # ingnore articles without an image id / e.g. news articles
diff --git a/searx/engines/photon.py b/searx/engines/photon.py
index 16ea88194..2ea393679 100644
--- a/searx/engines/photon.py
+++ b/searx/engines/photon.py
@@ -70,7 +70,7 @@ def response(resp):
         elif properties.get('osm_type') == 'R':
             osm_type = 'relation'
         else:
-            # continue if invalide osm-type
+            # continue if invalid osm-type
             continue
 
         url = result_base_url.format(osm_type=osm_type, osm_id=properties.get('osm_id'))
diff --git a/searx/engines/springer.py b/searx/engines/springer.py
index e5255b794..a4d0832d8 100644
--- a/searx/engines/springer.py
+++ b/searx/engines/springer.py
@@ -41,7 +41,6 @@ def response(resp):
     json_data = loads(resp.text)
 
     for record in json_data['records']:
-        content = record['abstract']
         published = datetime.strptime(record['publicationDate'], '%Y-%m-%d')
         authors = [" ".join(author['creator'].split(', ')[::-1]) for author in record['creators']]
         tags = record.get('genre')
@@ -50,20 +49,24 @@ def response(resp):
         results.append(
             {
                 'template': 'paper.html',
-                'title': record['title'],
                 'url': record['url'][0]['value'].replace('http://', 'https://', 1),
-                'type': record.get('contentType'),
-                'content': content,
+                'title': record['title'],
+                'content': record['abstract'],
+                'comments': record['publicationName'],
+                'tags': tags,
                 'publishedDate': published,
+                'type': record.get('contentType'),
                 'authors': authors,
-                'doi': record.get('doi'),
+                # 'editor': '',
+                'publisher': record.get('publisher'),
                 'journal': record.get('publicationName'),
-                'pages': record.get('start_page') + '-' + record.get('end_page'),
-                'tags': tags,
-                'issn': [record.get('issn')],
-                'isbn': [record.get('isbn')],
                 'volume': record.get('volume') or None,
+                'pages': '-'.join([x for x in [record.get('startingPage'), record.get('endingPage')] if x]),
                 'number': record.get('number') or None,
+                'doi': record.get('doi'),
+                'issn': [x for x in [record.get('issn')] if x],
+                'isbn': [x for x in [record.get('isbn')] if x],
+                # 'pdf_url' : ''
             }
         )
     return results
diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py
index 7b87808b9..087267bb7 100644
--- a/searx/engines/startpage.py
+++ b/searx/engines/startpage.py
@@ -209,7 +209,7 @@ def _fetch_supported_languages(resp):
     # native name, the English name of the writing script used by the language,
     # or occasionally something else entirely.
 
-    # this cases are so special they need to be hardcoded, a couple of them are mispellings
+    # this cases are so special they need to be hardcoded, a couple of them are misspellings
     language_names = {
         'english_uk': 'en-GB',
         'fantizhengwen': ['zh-TW', 'zh-HK'],
diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py
index d828f4be8..e0ad2e6c9 100644
--- a/searx/engines/wikidata.py
+++ b/searx/engines/wikidata.py
@@ -50,7 +50,7 @@ WIKIDATA_PROPERTIES = {
 # SERVICE wikibase:label: https://en.wikibooks.org/wiki/SPARQL/SERVICE_-_Label#Manual_Label_SERVICE
 # https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates
 # https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format#Data_model
-# optmization:
+# optimization:
 # * https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/query_optimization
 # * https://github.com/blazegraph/database/wiki/QueryHints
 QUERY_TEMPLATE = """
@@ -386,7 +386,7 @@ def get_attributes(language):
     add_amount('P2046')  # area
     add_amount('P281')  # postal code
     add_label('P38')  # currency
-    add_amount('P2048')  # heigth (building)
+    add_amount('P2048')  # height (building)
 
     # Media
     for p in [
diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py
index 4c99c90b5..6a2423b51 100644
--- a/searx/engines/wolframalpha_api.py
+++ b/searx/engines/wolframalpha_api.py
@@ -50,7 +50,7 @@ def request(query, params):
 # replace private user area characters to make text legible
 def replace_pua_chars(text):
     pua_chars = {
-        '\uf522': '\u2192',  # rigth arrow
+        '\uf522': '\u2192',  # right arrow
         '\uf7b1': '\u2115',  # set of natural numbers
         '\uf7b4': '\u211a',  # set of rational numbers
         '\uf7b5': '\u211d',  # set of real numbers
diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py
index f9528e92d..2dc22028f 100644
--- a/searx/engines/xpath.py
+++ b/searx/engines/xpath.py
@@ -53,7 +53,7 @@ Replacements are:
 
       0: none, 1: moderate, 2:strict
 
-  If not supported, the URL paramter is an empty string.
+  If not supported, the URL parameter is an empty string.
 
 """
 
@@ -114,7 +114,7 @@ time_range_support = False
 
 time_range_url = '&hours={time_range_val}'
 '''Time range URL parameter in the in :py:obj:`search_url`.  If no time range is
-requested by the user, the URL paramter is an empty string.  The
+requested by the user, the URL parameter is an empty string.  The
 ``{time_range_val}`` replacement is taken from the :py:obj:`time_range_map`.
 
 .. code:: yaml
author	Mohamed Elashri <muhammadelashri@gmail.com>	2022-09-30 23:06:54 +0000
committer	GitHub <noreply@github.com>	2022-09-30 23:06:54 +0000
commit	8d5653e60d5299979c0de5e55b1c5ca0bee8190c (patch)
tree	8dc02b7663a5c9c91b09483e4499a612d9823698 /searx/engines
parent	212c98c9f55dc602f57b4f01a73192450e9782b7 (diff)
parent	62324655ff0d2e6f234b3e31413877b4b4a7a9fa (diff)
download	searxng-8d5653e60d5299979c0de5e55b1c5ca0bee8190c.tar.gz searxng-8d5653e60d5299979c0de5e55b1c5ca0bee8190c.zip