From 8850036ded3af2ba7455cef53a8134022e1b544d Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Sat, 21 Dec 2019 20:25:39 +0100 Subject: [fix] add explicit useragent header to requests - closes #1459 --- searx/engines/qwant.py | 1 + 1 file changed, 1 insertion(+) (limited to 'searx') diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py index de12955c6..54e9dafad 100644 --- a/searx/engines/qwant.py +++ b/searx/engines/qwant.py @@ -50,6 +50,7 @@ def request(query, params): language = match_language(params['language'], supported_languages, language_aliases) params['url'] += '&locale=' + language.replace('-', '_').lower() + params['headers']['User-Agent'] = 'Mozilla/5.0 (X11; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0' return params -- cgit v1.2.3-54-g00ecf From e5305f886c0d7d5fb3f34d1fbd7f9a545c14c284 Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Sat, 21 Dec 2019 20:51:30 +0100 Subject: [fix] fetch extra search param of gigablast - fixes #1293 --- searx/engines/gigablast.py | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) (limited to 'searx') diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py index a84f3f69d..2a5067bc3 100644 --- a/searx/engines/gigablast.py +++ b/searx/engines/gigablast.py @@ -14,6 +14,7 @@ import random from json import loads from time import time from lxml.html import fromstring +from searx.poolrequests import get from searx.url_utils import urlencode from searx.utils import eval_xpath @@ -31,13 +32,9 @@ search_string = 'search?{query}'\ '&c=main'\ '&s={offset}'\ '&format=json'\ - '&qh=0'\ - '&qlang={lang}'\ + '&langcountry={lang}'\ '&ff={safesearch}'\ - '&rxiec={rxieu}'\ - '&ulse={ulse}'\ - '&rand={rxikd}'\ - '&dbez={dbez}' + '&rand={rxikd}' # specific xpath variables results_xpath = '//response//result' url_xpath = './/url' @@ -46,9 +43,26 @@ content_xpath = './/sum' supported_languages_url = 'https://gigablast.com/search?&rxikd=1' +extra_param = '' # gigablast requires a random extra parameter +# which can be extracted from the source code of the search page + + +def parse_extra_param(text): + global extra_param + param_lines = [x for x in text.splitlines() if x.startswith('var url=') or x.startswith('url=url+')] + extra_param = '' + for l in param_lines: + extra_param += l.split("'")[1] + extra_param = extra_param.split('&')[-1] + + +def init(engine_settings=None): + parse_extra_param(get('http://gigablast.com/search?c=main&qlangcountry=en-us&q=south&s=10').text) + # do search-request def request(query, params): + print("EXTRAPARAM:", extra_param) offset = (params['pageno'] - 1) * number_of_results if params['language'] == 'all': @@ -67,14 +81,11 @@ def request(query, params): search_path = search_string.format(query=urlencode({'q': query}), offset=offset, number_of_results=number_of_results, - rxikd=int(time() * 1000), - rxieu=random.randint(1000000000, 9999999999), - ulse=random.randint(100000000, 999999999), lang=language, - safesearch=safesearch, - dbez=random.randint(100000000, 999999999)) + rxikd=int(time() * 1000), + safesearch=safesearch) - params['url'] = base_url + search_path + params['url'] = base_url + search_path + '&' + extra_param return params @@ -84,7 +95,11 @@ def response(resp): results = [] # parse results - response_json = loads(resp.text) + try: + response_json = loads(resp.text) + except: + parse_extra_param(resp.text) + return results for result in response_json['results']: # append result -- cgit v1.2.3-54-g00ecf From f8713512bedf19d4495e0b9a0fd86679daaf7f79 Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Sat, 21 Dec 2019 20:56:38 +0100 Subject: [fix] convert byte query to string in osm engine - fixes #1220 --- searx/engines/openstreetmap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'searx') diff --git a/searx/engines/openstreetmap.py b/searx/engines/openstreetmap.py index 733ba6203..cec10a3c7 100644 --- a/searx/engines/openstreetmap.py +++ b/searx/engines/openstreetmap.py @@ -24,7 +24,7 @@ result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}' # do search-request def request(query, params): - params['url'] = base_url + search_string.format(query=query) + params['url'] = base_url + search_string.format(query=query.decode('utf-8')) return params -- cgit v1.2.3-54-g00ecf From 00512e36c133312eb74a82f6a2dec6d06214c42b Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Sat, 21 Dec 2019 21:01:08 +0100 Subject: [fix] handle empty response from wikipedia engine - closes #1114 --- searx/engines/wikipedia.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'searx') diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py index 4dae735d1..690da72fe 100644 --- a/searx/engines/wikipedia.py +++ b/searx/engines/wikipedia.py @@ -79,6 +79,9 @@ def response(resp): # wikipedia article's unique id # first valid id is assumed to be the requested article + if 'pages' not in search_result['query']: + return results + for article_id in search_result['query']['pages']: page = search_result['query']['pages'][article_id] if int(article_id) > 0: -- cgit v1.2.3-54-g00ecf From fc457569f757dd10ff55393f472ea9ed49a42374 Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Sat, 21 Dec 2019 21:13:43 +0100 Subject: [fix] pep8 --- searx/engines/gigablast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'searx') diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py index 2a5067bc3..5af593e36 100644 --- a/searx/engines/gigablast.py +++ b/searx/engines/gigablast.py @@ -43,7 +43,7 @@ content_xpath = './/sum' supported_languages_url = 'https://gigablast.com/search?&rxikd=1' -extra_param = '' # gigablast requires a random extra parameter +extra_param = '' # gigablast requires a random extra parameter # which can be extracted from the source code of the search page -- cgit v1.2.3-54-g00ecf From 34ad3d6b34017523a9502f86b92c17fe389918eb Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Sat, 21 Dec 2019 21:25:50 +0100 Subject: [enh] display error message if gigablast extra param expired --- searx/engines/gigablast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'searx') diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py index 5af593e36..2bb29a9fe 100644 --- a/searx/engines/gigablast.py +++ b/searx/engines/gigablast.py @@ -99,7 +99,7 @@ def response(resp): response_json = loads(resp.text) except: parse_extra_param(resp.text) - return results + raise Exception('extra param expired, please reload') for result in response_json['results']: # append result -- cgit v1.2.3-54-g00ecf From c18048e0454f4e3dc75c778940903091fbeae06a Mon Sep 17 00:00:00 2001 From: Marc Abonce Seguin Date: Sun, 25 Aug 2019 22:23:37 -0700 Subject: exclude disambiguation pages from wikipedia infobox --- searx/engines/wikipedia.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'searx') diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py index 690da72fe..44dea56fa 100644 --- a/searx/engines/wikipedia.py +++ b/searx/engines/wikipedia.py @@ -21,7 +21,8 @@ search_url = base_url + u'w/api.php?'\ 'action=query'\ '&format=json'\ '&{query}'\ - '&prop=extracts|pageimages'\ + '&prop=extracts|pageimages|pageprops'\ + '&ppprop=disambiguation'\ '&exintro'\ '&explaintext'\ '&pithumbsize=300'\ @@ -87,7 +88,7 @@ def response(resp): if int(article_id) > 0: break - if int(article_id) < 0: + if int(article_id) < 0 or 'disambiguation' in page.get('pageprops', {}): return [] title = page.get('title') -- cgit v1.2.3-54-g00ecf From 5706c12fba98e169c7c76a4d3c29aabf48242d63 Mon Sep 17 00:00:00 2001 From: Marc Abonce Seguin Date: Sun, 25 Aug 2019 22:47:23 -0700 Subject: remove empty parenthesis in wikipedia's summary They're usually IPA pronunciations which are removed by the API. --- searx/engines/wikipedia.py | 1 + 1 file changed, 1 insertion(+) (limited to 'searx') diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py index 44dea56fa..a216ba886 100644 --- a/searx/engines/wikipedia.py +++ b/searx/engines/wikipedia.py @@ -100,6 +100,7 @@ def response(resp): extract = page.get('extract') summary = extract_first_paragraph(extract, title, image) + summary = summary.replace('() ', '') # link to wikipedia article wikipedia_link = base_url.format(language=url_lang(resp.search_params['language'])) \ -- cgit v1.2.3-54-g00ecf From 495ae59b31b6aafae484ecdfb6aece3a84f1ede7 Mon Sep 17 00:00:00 2001 From: Marc Abonce Seguin Date: Sun, 25 Aug 2019 23:01:30 -0700 Subject: hide suggestions box if empty This bug happens only in python3 because map returns an iterator. --- searx/webapp.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'searx') diff --git a/searx/webapp.py b/searx/webapp.py index 7cf4106d3..212c874c9 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -606,11 +606,11 @@ def index(): # HTML output format # suggestions: use RawTextQuery to get the suggestion URLs with the same bang - suggestion_urls = map(lambda suggestion: { - 'url': raw_text_query.changeSearchQuery(suggestion).getFullQuery(), - 'title': suggestion - }, - result_container.suggestions) + suggestion_urls = list(map(lambda suggestion: { + 'url': raw_text_query.changeSearchQuery(suggestion).getFullQuery(), + 'title': suggestion + }, + result_container.suggestions)) correction_urls = list(map(lambda correction: { 'url': raw_text_query.changeSearchQuery(correction).getFullQuery(), -- cgit v1.2.3-54-g00ecf From ee6781d777f3a95f6e1c23499ecbc7257d5e35ec Mon Sep 17 00:00:00 2001 From: Vipul Date: Sat, 14 Sep 2019 12:37:39 +0000 Subject: [Fix] Libgen engine Libgen has switched to new domain (i.e https://libgen.is) with TLS support and older domain (i.e. http://libgen.io) is no longer accessible. See, https://en.wikipedia.org/wiki/Library_Genesis, for more information. Resolves: #1693 --- searx/settings.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'searx') diff --git a/searx/settings.yml b/searx/settings.yml index cf2b13e08..539049ea0 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -407,7 +407,7 @@ engines: - name : library genesis engine : xpath - search_url : http://libgen.io/search.php?req={query} + search_url : https://libgen.is/search.php?req={query} url_xpath : //a[contains(@href,"bookfi.net")]/@href title_xpath : //a[contains(@href,"book/")]/text()[1] content_xpath : //td/a[1][contains(@href,"=author")]/text() -- cgit v1.2.3-54-g00ecf From f407dd8ef4e3f6c82bef31f678139d6db2a4d810 Mon Sep 17 00:00:00 2001 From: Vipul Date: Sat, 14 Sep 2019 12:45:02 +0000 Subject: Switch to https for some domains --- searx/settings.yml | 8 ++++---- searx/settings_robot.yml | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'searx') diff --git a/searx/settings.yml b/searx/settings.yml index 539049ea0..d9a1f45f0 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -463,7 +463,7 @@ engines: - name : openairedatasets engine : json_engine paging : True - search_url : http://api.openaire.eu/search/datasets?format=json&page={pageno}&size=10&title={query} + search_url : https://api.openaire.eu/search/datasets?format=json&page={pageno}&size=10&title={query} results_query : response/results/result url_query : metadata/oaf:entity/oaf:result/children/instance/webresource/url/$ title_query : metadata/oaf:entity/oaf:result/title/$ @@ -475,7 +475,7 @@ engines: - name : openairepublications engine : json_engine paging : True - search_url : http://api.openaire.eu/search/publications?format=json&page={pageno}&size=10&title={query} + search_url : https://api.openaire.eu/search/publications?format=json&page={pageno}&size=10&title={query} results_query : response/results/result url_query : metadata/oaf:entity/oaf:result/children/instance/webresource/url/$ title_query : metadata/oaf:entity/oaf:result/title/$ @@ -806,7 +806,7 @@ locales: doi_resolvers : oadoi.org : 'https://oadoi.org/' doi.org : 'https://doi.org/' - doai.io : 'http://doai.io/' - sci-hub.tw : 'http://sci-hub.tw/' + doai.io : 'https://doai.io/' + sci-hub.tw : 'https://sci-hub.tw/' default_doi_resolver : 'oadoi.org' diff --git a/searx/settings_robot.yml b/searx/settings_robot.yml index 635809041..25f229e56 100644 --- a/searx/settings_robot.yml +++ b/searx/settings_robot.yml @@ -43,7 +43,7 @@ locales: doi_resolvers : oadoi.org : 'https://oadoi.org/' doi.org : 'https://doi.org/' - doai.io : 'http://doai.io/' - sci-hub.tw : 'http://sci-hub.tw/' + doai.io : 'https://doai.io/' + sci-hub.tw : 'https://sci-hub.tw/' default_doi_resolver : 'oadoi.org' -- cgit v1.2.3-54-g00ecf From 8bea927bb02e02754834d6f9692942f621bd21c5 Mon Sep 17 00:00:00 2001 From: Vipul Date: Sun, 22 Dec 2019 01:21:22 +0000 Subject: [Fix] oscar: no HTML escaping prior to output When results are fetched from any programming related documentation site (like git-scm.com, docs.python.org etc), content in Info box is shown as raw HTML code. This change addresses the issue by using "safe" filter feature provided by Django. See, - https://docs.djangoproject.com/en/3.0/ref/templates/builtins/#safe - Searx issue tracker (issue #1649), for more information. Resolves: #1649 --- searx/templates/oscar/infobox.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'searx') diff --git a/searx/templates/oscar/infobox.html b/searx/templates/oscar/infobox.html index 9f5e58d2b..9802f11e2 100644 --- a/searx/templates/oscar/infobox.html +++ b/searx/templates/oscar/infobox.html @@ -6,7 +6,7 @@
{% if infobox.img_src %}{{ infobox.infobox }}{% endif %} - {% if infobox.content %}

{{ infobox.content }}

{% endif %} + {% if infobox.content %}

{{ infobox.content | safe }}

{% endif %} {% if infobox.attributes -%} -- cgit v1.2.3-54-g00ecf