diff options
author | Mohamed Elashri <muhammadelashri@gmail.com> | 2022-09-30 23:06:54 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-30 23:06:54 +0000 |
commit | 8d5653e60d5299979c0de5e55b1c5ca0bee8190c (patch) | |
tree | 8dc02b7663a5c9c91b09483e4499a612d9823698 /searx/engines | |
parent | 212c98c9f55dc602f57b4f01a73192450e9782b7 (diff) | |
parent | 62324655ff0d2e6f234b3e31413877b4b4a7a9fa (diff) | |
download | searxng-8d5653e60d5299979c0de5e55b1c5ca0bee8190c.tar.gz searxng-8d5653e60d5299979c0de5e55b1c5ca0bee8190c.zip |
Merge branch 'searxng:master' into master
Diffstat (limited to 'searx/engines')
-rw-r--r-- | searx/engines/__init__.py | 4 | ||||
-rw-r--r-- | searx/engines/core.py | 5 | ||||
-rw-r--r-- | searx/engines/duckduckgo_definitions.py | 2 | ||||
-rw-r--r-- | searx/engines/github.py | 2 | ||||
-rw-r--r-- | searx/engines/google.py | 6 | ||||
-rw-r--r-- | searx/engines/google_news.py | 2 | ||||
-rw-r--r-- | searx/engines/google_videos.py | 2 | ||||
-rw-r--r-- | searx/engines/photon.py | 2 | ||||
-rw-r--r-- | searx/engines/springer.py | 21 | ||||
-rw-r--r-- | searx/engines/startpage.py | 2 | ||||
-rw-r--r-- | searx/engines/wikidata.py | 4 | ||||
-rw-r--r-- | searx/engines/wolframalpha_api.py | 2 | ||||
-rw-r--r-- | searx/engines/xpath.py | 4 |
13 files changed, 31 insertions, 27 deletions
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 57b090add..c61f50d4b 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -275,12 +275,12 @@ def is_engine_active(engine: Engine): def register_engine(engine: Engine): if engine.name in engines: - logger.error('Engine config error: ambigious name: {0}'.format(engine.name)) + logger.error('Engine config error: ambiguous name: {0}'.format(engine.name)) sys.exit(1) engines[engine.name] = engine if engine.shortcut in engine_shortcuts: - logger.error('Engine config error: ambigious shortcut: {0}'.format(engine.shortcut)) + logger.error('Engine config error: ambiguous shortcut: {0}'.format(engine.shortcut)) sys.exit(1) engine_shortcuts[engine.shortcut] = engine.name diff --git a/searx/engines/core.py b/searx/engines/core.py index 2a71a216c..2fa66e226 100644 --- a/searx/engines/core.py +++ b/searx/engines/core.py @@ -77,6 +77,7 @@ def response(resp): if url is None: continue + publishedDate = None time = source['publishedDate'] or source['depositedDate'] if time: publishedDate = datetime.fromtimestamp(time / 1000) @@ -106,8 +107,8 @@ def response(resp): # 'pages' : '', # 'number': '', 'doi': source['doi'], - 'issn': source['issn'], - 'isbn': source.get('isbn'), # exists in the rawRecordXml + 'issn': [x for x in [source.get('issn')] if x], + 'isbn': [x for x in [source.get('isbn')] if x], # exists in the rawRecordXml 'pdf_url': source.get('repositoryDocument', {}).get('pdfOrigin'), } ) diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py index a73ee55ff..7ed0de35c 100644 --- a/searx/engines/duckduckgo_definitions.py +++ b/searx/engines/duckduckgo_definitions.py @@ -79,7 +79,7 @@ def response(resp): # * book / performing art / film / television / media franchise / concert tour / playwright # * prepared food # * website / software / os / programming language / file format / software engineer - # * compagny + # * company content = '' heading = search_res.get('Heading', '') diff --git a/searx/engines/github.py b/searx/engines/github.py index 343f3793d..3180418ef 100644 --- a/searx/engines/github.py +++ b/searx/engines/github.py @@ -40,7 +40,7 @@ def response(resp): search_res = loads(resp.text) - # check if items are recieved + # check if items are received if 'items' not in search_res: return [] diff --git a/searx/engines/google.py b/searx/engines/google.py index 9cb936ccf..2f894b21f 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -326,14 +326,14 @@ def response(resp): # google *sections* if extract_text(eval_xpath(result, g_section_with_header)): - logger.debug("ingoring <g-section-with-header>") + logger.debug("ignoring <g-section-with-header>") continue try: title_tag = eval_xpath_getindex(result, title_xpath, 0, default=None) if title_tag is None: # this not one of the common google results *section* - logger.debug('ingoring item from the result_xpath list: missing title') + logger.debug('ignoring item from the result_xpath list: missing title') continue title = extract_text(title_tag) url = eval_xpath_getindex(result, href_xpath, 0, None) @@ -341,7 +341,7 @@ def response(resp): continue content = extract_text(eval_xpath_getindex(result, content_xpath, 0, default=None), allow_none=True) if content is None: - logger.debug('ingoring item from the result_xpath list: missing content of title "%s"', title) + logger.debug('ignoring item from the result_xpath list: missing content of title "%s"', title) continue logger.debug('add link to results: %s', title) diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py index 87867d65a..1ada2d64d 100644 --- a/searx/engines/google_news.py +++ b/searx/engines/google_news.py @@ -141,7 +141,7 @@ def response(resp): padding = (4 - (len(jslog) % 4)) * "=" jslog = b64decode(jslog + padding) except binascii.Error: - # URL cant be read, skip this result + # URL can't be read, skip this result continue # now we have : b'[null, ... null,"https://www.cnn.com/.../index.html"]' diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py index 26dbcdd3c..fc574bd48 100644 --- a/searx/engines/google_videos.py +++ b/searx/engines/google_videos.py @@ -150,7 +150,7 @@ def response(resp): # ignore google *sections* if extract_text(eval_xpath(result, g_section_with_header)): - logger.debug("ingoring <g-section-with-header>") + logger.debug("ignoring <g-section-with-header>") continue # ingnore articles without an image id / e.g. news articles diff --git a/searx/engines/photon.py b/searx/engines/photon.py index 16ea88194..2ea393679 100644 --- a/searx/engines/photon.py +++ b/searx/engines/photon.py @@ -70,7 +70,7 @@ def response(resp): elif properties.get('osm_type') == 'R': osm_type = 'relation' else: - # continue if invalide osm-type + # continue if invalid osm-type continue url = result_base_url.format(osm_type=osm_type, osm_id=properties.get('osm_id')) diff --git a/searx/engines/springer.py b/searx/engines/springer.py index e5255b794..a4d0832d8 100644 --- a/searx/engines/springer.py +++ b/searx/engines/springer.py @@ -41,7 +41,6 @@ def response(resp): json_data = loads(resp.text) for record in json_data['records']: - content = record['abstract'] published = datetime.strptime(record['publicationDate'], '%Y-%m-%d') authors = [" ".join(author['creator'].split(', ')[::-1]) for author in record['creators']] tags = record.get('genre') @@ -50,20 +49,24 @@ def response(resp): results.append( { 'template': 'paper.html', - 'title': record['title'], 'url': record['url'][0]['value'].replace('http://', 'https://', 1), - 'type': record.get('contentType'), - 'content': content, + 'title': record['title'], + 'content': record['abstract'], + 'comments': record['publicationName'], + 'tags': tags, 'publishedDate': published, + 'type': record.get('contentType'), 'authors': authors, - 'doi': record.get('doi'), + # 'editor': '', + 'publisher': record.get('publisher'), 'journal': record.get('publicationName'), - 'pages': record.get('start_page') + '-' + record.get('end_page'), - 'tags': tags, - 'issn': [record.get('issn')], - 'isbn': [record.get('isbn')], 'volume': record.get('volume') or None, + 'pages': '-'.join([x for x in [record.get('startingPage'), record.get('endingPage')] if x]), 'number': record.get('number') or None, + 'doi': record.get('doi'), + 'issn': [x for x in [record.get('issn')] if x], + 'isbn': [x for x in [record.get('isbn')] if x], + # 'pdf_url' : '' } ) return results diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index 7b87808b9..087267bb7 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -209,7 +209,7 @@ def _fetch_supported_languages(resp): # native name, the English name of the writing script used by the language, # or occasionally something else entirely. - # this cases are so special they need to be hardcoded, a couple of them are mispellings + # this cases are so special they need to be hardcoded, a couple of them are misspellings language_names = { 'english_uk': 'en-GB', 'fantizhengwen': ['zh-TW', 'zh-HK'], diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index d828f4be8..e0ad2e6c9 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -50,7 +50,7 @@ WIKIDATA_PROPERTIES = { # SERVICE wikibase:label: https://en.wikibooks.org/wiki/SPARQL/SERVICE_-_Label#Manual_Label_SERVICE # https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates # https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format#Data_model -# optmization: +# optimization: # * https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/query_optimization # * https://github.com/blazegraph/database/wiki/QueryHints QUERY_TEMPLATE = """ @@ -386,7 +386,7 @@ def get_attributes(language): add_amount('P2046') # area add_amount('P281') # postal code add_label('P38') # currency - add_amount('P2048') # heigth (building) + add_amount('P2048') # height (building) # Media for p in [ diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py index 4c99c90b5..6a2423b51 100644 --- a/searx/engines/wolframalpha_api.py +++ b/searx/engines/wolframalpha_api.py @@ -50,7 +50,7 @@ def request(query, params): # replace private user area characters to make text legible def replace_pua_chars(text): pua_chars = { - '\uf522': '\u2192', # rigth arrow + '\uf522': '\u2192', # right arrow '\uf7b1': '\u2115', # set of natural numbers '\uf7b4': '\u211a', # set of rational numbers '\uf7b5': '\u211d', # set of real numbers diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index f9528e92d..2dc22028f 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -53,7 +53,7 @@ Replacements are: 0: none, 1: moderate, 2:strict - If not supported, the URL paramter is an empty string. + If not supported, the URL parameter is an empty string. """ @@ -114,7 +114,7 @@ time_range_support = False time_range_url = '&hours={time_range_val}' '''Time range URL parameter in the in :py:obj:`search_url`. If no time range is -requested by the user, the URL paramter is an empty string. The +requested by the user, the URL parameter is an empty string. The ``{time_range_val}`` replacement is taken from the :py:obj:`time_range_map`. .. code:: yaml |