Merge branch 'master' into ne/fix-infinite_scroll-with-vim_bindings

author: Markus Heiser <markus.heiser@darmarIT.de> 2019-12-24 15:42:05 +0100
committer: GitHub <noreply@github.com> 2019-12-24 15:42:05 +0100
commit: 38dad2e8e3b100711afe3ae942aaed5111841cd6 (patch)
tree: 51f1a35121155010411aa5970ef06aff80adf741 /searx
parent: 0ae86cd1685d244c83a6080a7816365096ab06f8 (diff)
parent: a395fb4a8d030d5b8fde496d2ae722bc034d3e32 (diff)
download: searxng-38dad2e8e3b100711afe3ae942aaed5111841cd6.tar.gz
searxng-38dad2e8e3b100711afe3ae942aaed5111841cd6.zip
8 files changed, 50 insertions, 29 deletions
diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py
index a84f3f69d..2bb29a9fe 100644
--- a/searx/engines/gigablast.py
+++ b/searx/engines/gigablast.py
@@ -14,6 +14,7 @@ import random
 from json import loads
 from time import time
 from lxml.html import fromstring
+from searx.poolrequests import get
 from searx.url_utils import urlencode
 from searx.utils import eval_xpath
 
@@ -31,13 +32,9 @@ search_string = 'search?{query}'\
     '&c=main'\
     '&s={offset}'\
     '&format=json'\
-    '&qh=0'\
-    '&qlang={lang}'\
+    '&langcountry={lang}'\
     '&ff={safesearch}'\
-    '&rxiec={rxieu}'\
-    '&ulse={ulse}'\
-    '&rand={rxikd}'\
-    '&dbez={dbez}'
+    '&rand={rxikd}'
 # specific xpath variables
 results_xpath = '//response//result'
 url_xpath = './/url'
@@ -46,9 +43,26 @@ content_xpath = './/sum'
 
 supported_languages_url = 'https://gigablast.com/search?&rxikd=1'
 
+extra_param = ''  # gigablast requires a random extra parameter
+# which can be extracted from the source code of the search page
+
+
+def parse_extra_param(text):
+    global extra_param
+    param_lines = [x for x in text.splitlines() if x.startswith('var url=') or x.startswith('url=url+')]
+    extra_param = ''
+    for l in param_lines:
+        extra_param += l.split("'")[1]
+    extra_param = extra_param.split('&')[-1]
+
+
+def init(engine_settings=None):
+    parse_extra_param(get('http://gigablast.com/search?c=main&qlangcountry=en-us&q=south&s=10').text)
+
 
 # do search-request
 def request(query, params):
+    print("EXTRAPARAM:", extra_param)
     offset = (params['pageno'] - 1) * number_of_results
 
     if params['language'] == 'all':
@@ -67,14 +81,11 @@ def request(query, params):
     search_path = search_string.format(query=urlencode({'q': query}),
                                        offset=offset,
                                        number_of_results=number_of_results,
-                                       rxikd=int(time() * 1000),
-                                       rxieu=random.randint(1000000000, 9999999999),
-                                       ulse=random.randint(100000000, 999999999),
                                        lang=language,
-                                       safesearch=safesearch,
-                                       dbez=random.randint(100000000, 999999999))
+                                       rxikd=int(time() * 1000),
+                                       safesearch=safesearch)
 
-    params['url'] = base_url + search_path
+    params['url'] = base_url + search_path + '&' + extra_param
 
     return params
 
@@ -84,7 +95,11 @@ def response(resp):
     results = []
 
     # parse results
-    response_json = loads(resp.text)
+    try:
+        response_json = loads(resp.text)
+    except:
+        parse_extra_param(resp.text)
+        raise Exception('extra param expired, please reload')
 
     for result in response_json['results']:
         # append result
diff --git a/searx/engines/openstreetmap.py b/searx/engines/openstreetmap.py
index 733ba6203..cec10a3c7 100644
--- a/searx/engines/openstreetmap.py
+++ b/searx/engines/openstreetmap.py
@@ -24,7 +24,7 @@ result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}'
 
 # do search-request
 def request(query, params):
-    params['url'] = base_url + search_string.format(query=query)
+    params['url'] = base_url + search_string.format(query=query.decode('utf-8'))
 
     return params
 
diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py
index de12955c6..54e9dafad 100644
--- a/searx/engines/qwant.py
+++ b/searx/engines/qwant.py
@@ -50,6 +50,7 @@ def request(query, params):
         language = match_language(params['language'], supported_languages, language_aliases)
         params['url'] += '&locale=' + language.replace('-', '_').lower()
 
+    params['headers']['User-Agent'] = 'Mozilla/5.0 (X11; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0'
     return params
 
 
diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py
index 4dae735d1..a216ba886 100644
--- a/searx/engines/wikipedia.py
+++ b/searx/engines/wikipedia.py
@@ -21,7 +21,8 @@ search_url = base_url + u'w/api.php?'\
     'action=query'\
     '&format=json'\
     '&{query}'\
-    '&prop=extracts|pageimages'\
+    '&prop=extracts|pageimages|pageprops'\
+    '&ppprop=disambiguation'\
     '&exintro'\
     '&explaintext'\
     '&pithumbsize=300'\
@@ -79,12 +80,15 @@ def response(resp):
 
     # wikipedia article's unique id
     # first valid id is assumed to be the requested article
+    if 'pages' not in search_result['query']:
+        return results
+
     for article_id in search_result['query']['pages']:
         page = search_result['query']['pages'][article_id]
         if int(article_id) > 0:
             break
 
-    if int(article_id) < 0:
+    if int(article_id) < 0 or 'disambiguation' in page.get('pageprops', {}):
         return []
 
     title = page.get('title')
@@ -96,6 +100,7 @@ def response(resp):
     extract = page.get('extract')
 
     summary = extract_first_paragraph(extract, title, image)
+    summary = summary.replace('() ', '')
 
     # link to wikipedia article
     wikipedia_link = base_url.format(language=url_lang(resp.search_params['language'])) \
diff --git a/searx/settings.yml b/searx/settings.yml
index c6f805331..e41b84c13 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -408,7 +408,7 @@ engines:
 
   - name : library genesis
     engine : xpath
-    search_url : http://libgen.io/search.php?req={query}
+    search_url : https://libgen.is/search.php?req={query}
     url_xpath : //a[contains(@href,"bookfi.net")]/@href
     title_xpath : //a[contains(@href,"book/")]/text()[1]
     content_xpath : //td/a[1][contains(@href,"=author")]/text()
@@ -464,7 +464,7 @@ engines:
   - name : openairedatasets
     engine : json_engine
     paging : True
-    search_url : http://api.openaire.eu/search/datasets?format=json&page={pageno}&size=10&title={query}
+    search_url : https://api.openaire.eu/search/datasets?format=json&page={pageno}&size=10&title={query}
     results_query : response/results/result
     url_query : metadata/oaf:entity/oaf:result/children/instance/webresource/url/$
     title_query : metadata/oaf:entity/oaf:result/title/$
@@ -476,7 +476,7 @@ engines:
   - name : openairepublications
     engine : json_engine
     paging : True
-    search_url : http://api.openaire.eu/search/publications?format=json&page={pageno}&size=10&title={query}
+    search_url : https://api.openaire.eu/search/publications?format=json&page={pageno}&size=10&title={query}
     results_query : response/results/result
     url_query : metadata/oaf:entity/oaf:result/children/instance/webresource/url/$
     title_query : metadata/oaf:entity/oaf:result/title/$
@@ -812,7 +812,7 @@ locales:
 doi_resolvers :
   oadoi.org : 'https://oadoi.org/'
   doi.org : 'https://doi.org/'
-  doai.io  : 'http://doai.io/'
-  sci-hub.tw : 'http://sci-hub.tw/'
+  doai.io  : 'https://doai.io/'
+  sci-hub.tw : 'https://sci-hub.tw/'
 
 default_doi_resolver : 'oadoi.org'
diff --git a/searx/settings_robot.yml b/searx/settings_robot.yml
index 635809041..25f229e56 100644
--- a/searx/settings_robot.yml
+++ b/searx/settings_robot.yml
@@ -43,7 +43,7 @@ locales:
 doi_resolvers :
   oadoi.org : 'https://oadoi.org/'
   doi.org : 'https://doi.org/'
-  doai.io  : 'http://doai.io/'
-  sci-hub.tw : 'http://sci-hub.tw/'
+  doai.io  : 'https://doai.io/'
+  sci-hub.tw : 'https://sci-hub.tw/'
 
 default_doi_resolver : 'oadoi.org'
diff --git a/searx/templates/oscar/infobox.html b/searx/templates/oscar/infobox.html
index 9f5e58d2b..9802f11e2 100644
--- a/searx/templates/oscar/infobox.html
+++ b/searx/templates/oscar/infobox.html
@@ -6,7 +6,7 @@
     <div class="panel-body">
         {% if infobox.img_src %}<img class="img-responsive center-block infobox_part" src="{{ image_proxify(infobox.img_src) }}" alt="{{ infobox.infobox }}" />{% endif %}
 
-        {% if infobox.content %}<bdi><p class="infobox_part">{{ infobox.content }}</p></bdi>{% endif %}
+        {% if infobox.content %}<bdi><p class="infobox_part">{{ infobox.content | safe }}</p></bdi>{% endif %}
 
         {% if infobox.attributes -%}
         <table class="table table-striped infobox_part">
diff --git a/searx/webapp.py b/searx/webapp.py
index 7cf4106d3..212c874c9 100644
--- a/searx/webapp.py
+++ b/searx/webapp.py
@@ -606,11 +606,11 @@ def index():
     # HTML output format
 
     # suggestions: use RawTextQuery to get the suggestion URLs with the same bang
-    suggestion_urls = map(lambda suggestion: {
-                          'url': raw_text_query.changeSearchQuery(suggestion).getFullQuery(),
-                          'title': suggestion
-                          },
-                          result_container.suggestions)
+    suggestion_urls = list(map(lambda suggestion: {
+                               'url': raw_text_query.changeSearchQuery(suggestion).getFullQuery(),
+                               'title': suggestion
+                               },
+                               result_container.suggestions))
 
     correction_urls = list(map(lambda correction: {
                                'url': raw_text_query.changeSearchQuery(correction).getFullQuery(),
author	Markus Heiser <markus.heiser@darmarIT.de>	2019-12-24 15:42:05 +0100
committer	GitHub <noreply@github.com>	2019-12-24 15:42:05 +0100
commit	38dad2e8e3b100711afe3ae942aaed5111841cd6 (patch)
tree	51f1a35121155010411aa5970ef06aff80adf741 /searx
parent	0ae86cd1685d244c83a6080a7816365096ab06f8 (diff)
parent	a395fb4a8d030d5b8fde496d2ae722bc034d3e32 (diff)
download	searxng-38dad2e8e3b100711afe3ae942aaed5111841cd6.tar.gz searxng-38dad2e8e3b100711afe3ae942aaed5111841cd6.zip