diff options
-rw-r--r-- | CONTRIBUTING.md | 4 | ||||
-rwxr-xr-x | dockerfiles/docker-entrypoint.sh | 2 | ||||
-rw-r--r-- | docs/admin/buildhosts.rst | 2 | ||||
-rw-r--r-- | docs/admin/installation-nginx.rst | 2 | ||||
-rw-r--r-- | docs/admin/settings.rst | 2 | ||||
-rw-r--r-- | docs/conf.py | 6 | ||||
-rw-r--r-- | docs/dev/contribution_guide.rst | 4 | ||||
-rw-r--r-- | docs/dev/engine_overview.rst | 2 | ||||
-rw-r--r-- | docs/dev/quickstart.rst | 2 | ||||
-rw-r--r-- | docs/dev/reST.rst | 24 | ||||
-rw-r--r-- | searx/autocomplete.py | 5 | ||||
-rw-r--r-- | searx/engines/duckduckgo_definitions.py | 4 | ||||
-rw-r--r-- | searx/engines/filecrop.py | 85 | ||||
-rw-r--r-- | searx/engines/soundcloud.py | 2 | ||||
-rw-r--r-- | searx/engines/wikipedia.py | 2 | ||||
-rw-r--r-- | searx/engines/www1x.py | 24 | ||||
-rw-r--r-- | searx/settings.yml | 1 | ||||
-rw-r--r-- | utils/makefile.python | 2 |
18 files changed, 44 insertions, 131 deletions
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6850ab405..300349f3b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,8 +2,8 @@ ## Resources in the documentation -* [Development quickstart](http://searx.github.io/searx/dev/contribution_guide.html) -* [Contribution guide](http://searx.github.io/searx/dev/contribution_guide.html) +* [Development quickstart](https://searx.github.io/searx/dev/contribution_guide.html) +* [Contribution guide](https://searx.github.io/searx/dev/contribution_guide.html) ## Submitting PRs diff --git a/dockerfiles/docker-entrypoint.sh b/dockerfiles/docker-entrypoint.sh index a8f18f05b..accc015f7 100755 --- a/dockerfiles/docker-entrypoint.sh +++ b/dockerfiles/docker-entrypoint.sh @@ -66,7 +66,7 @@ patch_searx_settings() { CONF="$1" # Make sure that there is trailing slash at the end of BASE_URL - # see http://www.gnu.org/savannah-checkouts/gnu/bash/manual/bash.html#Shell-Parameter-Expansion + # see https://www.gnu.org/savannah-checkouts/gnu/bash/manual/bash.html#Shell-Parameter-Expansion export BASE_URL="${BASE_URL%/}/" # update settings.yml diff --git a/docs/admin/buildhosts.rst b/docs/admin/buildhosts.rst index a727d25b9..1f6eb472e 100644 --- a/docs/admin/buildhosts.rst +++ b/docs/admin/buildhosts.rst @@ -67,7 +67,7 @@ to ``imgmath``: If your docs build (``make docs``) shows warnings like this:: WARNING: dot(1) not found, for better output quality install \ - graphviz from http://www.graphviz.org + graphviz from https://www.graphviz.org .. WARNING: LaTeX command 'latex' cannot be run (needed for math \ display), check the imgmath_latex setting diff --git a/docs/admin/installation-nginx.rst b/docs/admin/installation-nginx.rst index 65fd73573..589c40ada 100644 --- a/docs/admin/installation-nginx.rst +++ b/docs/admin/installation-nginx.rst @@ -9,7 +9,7 @@ Install with nginx .. _nginx server configuration: https://docs.nginx.com/nginx/admin-guide/web-server/web-server/#setting-up-virtual-servers .. _nginx beginners guide: - http://nginx.org/en/docs/beginners_guide.html + https://nginx.org/en/docs/beginners_guide.html .. _Getting Started wiki: https://www.nginx.com/resources/wiki/start/ .. _uWSGI support from nginx: diff --git a/docs/admin/settings.rst b/docs/admin/settings.rst index 532b99752..7bd990fbf 100644 --- a/docs/admin/settings.rst +++ b/docs/admin/settings.rst @@ -112,7 +112,7 @@ Global Settings specific instance of searx, a locale can be defined using an ISO language code, like ``fr``, ``en``, ``de``. -.. _requests proxies: http://requests.readthedocs.io/en/latest/user/advanced/#proxies +.. _requests proxies: https://requests.readthedocs.io/en/latest/user/advanced/#proxies .. _PySocks: https://pypi.org/project/PySocks/ ``proxies`` : diff --git a/docs/conf.py b/docs/conf.py index 4b348ae0e..d6fde9bec 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -49,11 +49,11 @@ extlinks['man'] = ('https://manpages.debian.org/jump?q=%s', '') #extlinks['role'] = ( # 'https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html#role-%s', '') extlinks['duref'] = ( - 'http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#%s', '') + 'https://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#%s', '') extlinks['durole'] = ( - 'http://docutils.sourceforge.net/docs/ref/rst/roles.html#%s', '') + 'https://docutils.sourceforge.net/docs/ref/rst/roles.html#%s', '') extlinks['dudir'] = ( - 'http://docutils.sourceforge.net/docs/ref/rst/directives.html#%s', '') + 'https://docutils.sourceforge.net/docs/ref/rst/directives.html#%s', '') extlinks['ctan'] = ( 'https://ctan.org/pkg/%s', 'CTAN: ') diff --git a/docs/dev/contribution_guide.rst b/docs/dev/contribution_guide.rst index 26f8d2bb7..90b22670c 100644 --- a/docs/dev/contribution_guide.rst +++ b/docs/dev/contribution_guide.rst @@ -117,8 +117,8 @@ Translation currently takes place on :ref:`transifex <translation>`. Documentation ============= -.. _Sphinx: http://www.sphinx-doc.org -.. _reST: http://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html +.. _Sphinx: https://www.sphinx-doc.org +.. _reST: https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html .. sidebar:: The reST sources diff --git a/docs/dev/engine_overview.rst b/docs/dev/engine_overview.rst index 0b5f9857f..99726a456 100644 --- a/docs/dev/engine_overview.rst +++ b/docs/dev/engine_overview.rst @@ -265,7 +265,7 @@ latitude latitude of result (in decimal format) longitude longitude of result (in decimal format) boundingbox boundingbox of result (array of 4. values ``[lat-min, lat-max, lon-min, lon-max]``) -geojson geojson of result (http://geojson.org) +geojson geojson of result (https://geojson.org/) osm.type type of osm-object (if OSM-Result) osm.id id of osm-object (if OSM-Result) address.name name of object diff --git a/docs/dev/quickstart.rst b/docs/dev/quickstart.rst index 3e1a5e344..14af03fa6 100644 --- a/docs/dev/quickstart.rst +++ b/docs/dev/quickstart.rst @@ -60,7 +60,7 @@ read :ref:`make test`. How to compile styles and javascript ==================================== -.. _less: http://lesscss.org/ +.. _less: https://lesscss.org/ .. _NodeJS: https://nodejs.org How to build styles diff --git a/docs/dev/reST.rst b/docs/dev/reST.rst index 906a0e9af..963378748 100644 --- a/docs/dev/reST.rst +++ b/docs/dev/reST.rst @@ -1391,27 +1391,27 @@ The next example shows the difference of ``\tfrac`` (*textstyle*) and ``\dfrac`` .. _readability: https://docs.python-guide.org/writing/style/ .. _Sphinx-Primer: - http://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html + https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html .. _reST: https://docutils.sourceforge.io/rst.html .. _Sphinx Roles: https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html -.. _Sphinx: http://www.sphinx-doc.org -.. _`sphinx-doc FAQ`: http://www.sphinx-doc.org/en/stable/faq.html +.. _Sphinx: https://www.sphinx-doc.org +.. _`sphinx-doc FAQ`: https://www.sphinx-doc.org/en/stable/faq.html .. _Sphinx markup constructs: - http://www.sphinx-doc.org/en/stable/markup/index.html + https://www.sphinx-doc.org/en/stable/markup/index.html .. _`sphinx cross references`: - http://www.sphinx-doc.org/en/stable/markup/inline.html#cross-referencing-arbitrary-locations + https://www.sphinx-doc.org/en/stable/markup/inline.html#cross-referencing-arbitrary-locations .. _sphinx.ext.extlinks: https://www.sphinx-doc.org/en/master/usage/extensions/extlinks.html -.. _intersphinx: http://www.sphinx-doc.org/en/stable/ext/intersphinx.html -.. _sphinx config: http://www.sphinx-doc.org/en/stable/config.html -.. _Sphinx's autodoc: http://www.sphinx-doc.org/en/stable/ext/autodoc.html +.. _intersphinx: https://www.sphinx-doc.org/en/stable/ext/intersphinx.html +.. _sphinx config: https://www.sphinx-doc.org/en/stable/config.html +.. _Sphinx's autodoc: https://www.sphinx-doc.org/en/stable/ext/autodoc.html .. _Sphinx's Python domain: - http://www.sphinx-doc.org/en/stable/domains.html#the-python-domain + https://www.sphinx-doc.org/en/stable/domains.html#the-python-domain .. _Sphinx's C domain: - http://www.sphinx-doc.org/en/stable/domains.html#cross-referencing-c-constructs + https://www.sphinx-doc.org/en/stable/domains.html#cross-referencing-c-constructs .. _doctree: - http://www.sphinx-doc.org/en/master/extdev/tutorial.html?highlight=doctree#build-phases + https://www.sphinx-doc.org/en/master/extdev/tutorial.html?highlight=doctree#build-phases .. _docutils: http://docutils.sourceforge.net/docs/index.html .. _docutils FAQ: http://docutils.sourceforge.net/FAQ.html .. _linuxdoc: https://return42.github.io/linuxdoc @@ -1424,5 +1424,5 @@ The next example shows the difference of ``\tfrac`` (*textstyle*) and ``\dfrac`` .. _ImageMagick: https://www.imagemagick.org .. _`Emacs Table Mode`: https://www.emacswiki.org/emacs/TableMode -.. _`Online Tables Generator`: http://www.tablesgenerator.com/text_tables +.. _`Online Tables Generator`: https://www.tablesgenerator.com/text_tables .. _`OASIS XML Exchange Table Model`: https://www.oasis-open.org/specs/tm9901.html diff --git a/searx/autocomplete.py b/searx/autocomplete.py index 420b8a461..fbe634a5b 100644 --- a/searx/autocomplete.py +++ b/searx/autocomplete.py @@ -113,7 +113,7 @@ def searx_bang(full_query): def dbpedia(query, lang): # dbpedia autocompleter, no HTTPS - autocomplete_url = 'http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?' + autocomplete_url = 'https://lookup.dbpedia.org/api/search.asmx/KeywordSearch?' response = get(autocomplete_url + urlencode(dict(QueryString=query))) @@ -121,8 +121,7 @@ def dbpedia(query, lang): if response.ok: dom = etree.fromstring(response.content) - results = dom.xpath('//a:Result/a:Label//text()', - namespaces={'a': 'http://lookup.dbpedia.org/'}) + results = dom.xpath('//Result/Label//text()') return results diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py index 5a7649173..1d1c84b4b 100644 --- a/searx/engines/duckduckgo_definitions.py +++ b/searx/engines/duckduckgo_definitions.py @@ -10,7 +10,7 @@ DuckDuckGo (definitions) """ import json -from urllib.parse import urlencode +from urllib.parse import urlencode, urlparse, urljoin from lxml import html from searx import logger @@ -102,6 +102,8 @@ def response(resp): # image image = search_res.get('Image') image = None if image == '' else image + if image is not None and urlparse(image).netloc == '': + image = urljoin('https://duckduckgo.com', image) # urls # Official website, Wikipedia page diff --git a/searx/engines/filecrop.py b/searx/engines/filecrop.py deleted file mode 100644 index 0331e7b19..000000000 --- a/searx/engines/filecrop.py +++ /dev/null @@ -1,85 +0,0 @@ -from html.parser import HTMLParser -from urllib.parse import urlencode - - -url = 'http://www.filecrop.com/' -search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}' # noqa - -paging = True - - -class FilecropResultParser(HTMLParser): # pylint: disable=W0223 # (see https://bugs.python.org/issue31844) - - def __init__(self): - HTMLParser.__init__(self) - self.__start_processing = False - - self.results = [] - self.result = {} - - self.tr_counter = 0 - self.data_counter = 0 - - def handle_starttag(self, tag, attrs): - - if tag == 'tr': - if ('bgcolor', '#edeff5') in attrs or\ - ('bgcolor', '#ffffff') in attrs: - self.__start_processing = True - - if not self.__start_processing: - return - - if tag == 'label': - self.result['title'] = [attr[1] for attr in attrs - if attr[0] == 'title'][0] - elif tag == 'a' and ('rel', 'nofollow') in attrs\ - and ('class', 'sourcelink') in attrs: - if 'content' in self.result: - self.result['content'] += [attr[1] for attr in attrs - if attr[0] == 'title'][0] - else: - self.result['content'] = [attr[1] for attr in attrs - if attr[0] == 'title'][0] - self.result['content'] += ' ' - elif tag == 'a': - self.result['url'] = url + [attr[1] for attr in attrs - if attr[0] == 'href'][0] - - def handle_endtag(self, tag): - if self.__start_processing is False: - return - - if tag == 'tr': - self.tr_counter += 1 - - if self.tr_counter == 2: - self.__start_processing = False - self.tr_counter = 0 - self.data_counter = 0 - self.results.append(self.result) - self.result = {} - - def handle_data(self, data): - if not self.__start_processing: - return - - if 'content' in self.result: - self.result['content'] += data + ' ' - else: - self.result['content'] = data + ' ' - - self.data_counter += 1 - - -def request(query, params): - index = 1 + (params['pageno'] - 1) * 30 - params['url'] = search_url.format(query=urlencode({'w': query}), index=index) - return params - - -def response(resp): - parser = FilecropResultParser() - parser.feed(resp.text) - - return parser.results diff --git a/searx/engines/soundcloud.py b/searx/engines/soundcloud.py index b1e01759f..84ff21a88 100644 --- a/searx/engines/soundcloud.py +++ b/searx/engines/soundcloud.py @@ -91,7 +91,7 @@ def response(resp): for result in search_res.get('collection', []): if result['kind'] in ('track', 'playlist'): title = result['title'] - content = result['description'] + content = result['description'] or '' publishedDate = parser.parse(result['last_modified']) uri = quote_plus(result['uri']) embedded = embedded_url.format(uri=uri) diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py index 9fce170eb..000e1af76 100644 --- a/searx/engines/wikipedia.py +++ b/searx/engines/wikipedia.py @@ -52,7 +52,7 @@ def response(resp): api_result = loads(resp.text) # skip disambiguation pages - if api_result['type'] != 'standard': + if api_result.get('type') != 'standard': return [] title = api_result['title'] diff --git a/searx/engines/www1x.py b/searx/engines/www1x.py index 8d691c852..b8f111a50 100644 --- a/searx/engines/www1x.py +++ b/searx/engines/www1x.py @@ -7,12 +7,12 @@ @using-api no @results HTML @stable no (HTML can change) - @parse url, title, thumbnail, img_src, content + @parse url, title, thumbnail """ -from lxml import html +from lxml import html, etree from urllib.parse import urlencode, urljoin -from searx.utils import extract_text +from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex # engine dependent config categories = ['images'] @@ -21,6 +21,7 @@ paging = False # search-url base_url = 'https://1x.com' search_url = base_url + '/backend/search.php?{query}' +gallery_url = 'https://gallery.1x.com/' # do search-request @@ -33,23 +34,18 @@ def request(query, params): # get response from search-request def response(resp): results = [] - - dom = html.fromstring(resp.text) - for res in dom.xpath('//div[@class="List-item MainListing"]'): - # processed start and end of link - link = res.xpath('//a')[0] - + xmldom = etree.fromstring(resp.content) + xmlsearchresult = eval_xpath_getindex(xmldom, '//searchresult', 0) + dom = html.fragment_fromstring(xmlsearchresult.text, create_parent='div') + for link in eval_xpath_list(dom, '/div/table/tr/td/div[2]//a'): url = urljoin(base_url, link.attrib.get('href')) title = extract_text(link) - - thumbnail_src = urljoin(base_url, res.xpath('.//img')[0].attrib['src']) - # TODO: get image with higher resolution - img_src = thumbnail_src + thumbnail_src = urljoin(gallery_url, eval_xpath_getindex(link, './/img', 0).attrib['src']) # append result results.append({'url': url, 'title': title, - 'img_src': img_src, + 'img_src': thumbnail_src, 'content': '', 'thumbnail_src': thumbnail_src, 'template': 'images.html'}) diff --git a/searx/settings.yml b/searx/settings.yml index 04b658e0a..f68770cda 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -289,6 +289,7 @@ engines: - name : 1x engine : www1x shortcut : 1x + timeout : 3.0 disabled : True - name : fdroid diff --git a/utils/makefile.python b/utils/makefile.python index 6c6696964..668b0894b 100644 --- a/utils/makefile.python +++ b/utils/makefile.python @@ -252,7 +252,7 @@ pyenv-python: pyenv-install # PyPi is required and since uploads via setuptools is not recommended, we have # to imstall / use twine ... its really a mess. # -# [1] http://python-packaging.readthedocs.io/en/latest/dependencies.html#packages-not-on-pypi +# [1] https://python-packaging.readthedocs.io/en/latest/dependencies.html#packages-not-on-pypi # [2] https://github.com/pypa/pip/pull/1519 # https://github.com/pypa/twine |