summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.config.sh1
-rw-r--r--CONTRIBUTING.md4
-rwxr-xr-xdockerfiles/docker-entrypoint.sh2
-rw-r--r--docs/admin/buildhosts.rst2
-rw-r--r--docs/admin/installation-nginx.rst2
-rw-r--r--docs/admin/installation-searx.rst29
-rw-r--r--docs/admin/settings.rst187
-rw-r--r--docs/build-templates/searx.rst20
-rw-r--r--docs/conf.py6
-rw-r--r--docs/dev/contribution_guide.rst4
-rw-r--r--docs/dev/engine_overview.rst28
-rw-r--r--docs/dev/quickstart.rst2
-rw-r--r--docs/dev/reST.rst24
-rw-r--r--searx/autocomplete.py5
-rw-r--r--searx/engines/__init__.py40
-rw-r--r--searx/engines/acgsou.py4
-rw-r--r--searx/engines/arxiv.py2
-rw-r--r--searx/engines/command.py2
-rw-r--r--searx/engines/currency_convert.py1
-rw-r--r--searx/engines/dictzone.py1
-rw-r--r--searx/engines/duckduckgo_definitions.py4
-rw-r--r--searx/engines/duden.py43
-rw-r--r--searx/engines/filecrop.py85
-rw-r--r--searx/engines/genius.py13
-rw-r--r--searx/engines/qwant.py27
-rw-r--r--searx/engines/seedpeer.py78
-rw-r--r--searx/engines/soundcloud.py2
-rw-r--r--searx/engines/translated.py1
-rw-r--r--searx/engines/wikidata.py3
-rw-r--r--searx/engines/wikipedia.py6
-rw-r--r--searx/engines/www1x.py24
-rw-r--r--searx/engines/xpath.py3
-rw-r--r--searx/engines/youtube_noapi.py2
-rw-r--r--searx/exceptions.py29
-rw-r--r--searx/metrology/error_recorder.py7
-rw-r--r--searx/poolrequests.py11
-rw-r--r--searx/raise_for_httperror.py66
-rw-r--r--searx/results.py5
-rw-r--r--searx/search.py36
-rw-r--r--searx/settings.yml27
-rw-r--r--searx/templates/oscar/macros.html4
-rw-r--r--searx/templates/oscar/preferences.html4
-rw-r--r--searx/templates/simple/macros.html4
-rw-r--r--searx/templates/simple/preferences.html4
-rw-r--r--utils/makefile.python2
-rwxr-xr-xutils/searx.sh13
-rw-r--r--utils/templates/etc/searx/use_default_settings.yml22
47 files changed, 476 insertions, 415 deletions
diff --git a/.config.sh b/.config.sh
index 4eff5f4c6..f9bac7383 100644
--- a/.config.sh
+++ b/.config.sh
@@ -26,6 +26,7 @@ fi
# ---------
# SEARX_INTERNAL_URL="127.0.0.1:8888"
+# SEARX_SETTINGS_TEMPLATE="${REPO_ROOT}/utils/templates/etc/searx/use_default_settings.yml"
# Only change, if you maintain a searx brand in your searx fork.
# GIT_BRANCH="${GIT_BRANCH:-master}"
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 6850ab405..300349f3b 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -2,8 +2,8 @@
## Resources in the documentation
-* [Development quickstart](http://searx.github.io/searx/dev/contribution_guide.html)
-* [Contribution guide](http://searx.github.io/searx/dev/contribution_guide.html)
+* [Development quickstart](https://searx.github.io/searx/dev/contribution_guide.html)
+* [Contribution guide](https://searx.github.io/searx/dev/contribution_guide.html)
## Submitting PRs
diff --git a/dockerfiles/docker-entrypoint.sh b/dockerfiles/docker-entrypoint.sh
index a8f18f05b..accc015f7 100755
--- a/dockerfiles/docker-entrypoint.sh
+++ b/dockerfiles/docker-entrypoint.sh
@@ -66,7 +66,7 @@ patch_searx_settings() {
CONF="$1"
# Make sure that there is trailing slash at the end of BASE_URL
- # see http://www.gnu.org/savannah-checkouts/gnu/bash/manual/bash.html#Shell-Parameter-Expansion
+ # see https://www.gnu.org/savannah-checkouts/gnu/bash/manual/bash.html#Shell-Parameter-Expansion
export BASE_URL="${BASE_URL%/}/"
# update settings.yml
diff --git a/docs/admin/buildhosts.rst b/docs/admin/buildhosts.rst
index a727d25b9..1f6eb472e 100644
--- a/docs/admin/buildhosts.rst
+++ b/docs/admin/buildhosts.rst
@@ -67,7 +67,7 @@ to ``imgmath``:
If your docs build (``make docs``) shows warnings like this::
WARNING: dot(1) not found, for better output quality install \
- graphviz from http://www.graphviz.org
+ graphviz from https://www.graphviz.org
..
WARNING: LaTeX command 'latex' cannot be run (needed for math \
display), check the imgmath_latex setting
diff --git a/docs/admin/installation-nginx.rst b/docs/admin/installation-nginx.rst
index 65fd73573..589c40ada 100644
--- a/docs/admin/installation-nginx.rst
+++ b/docs/admin/installation-nginx.rst
@@ -9,7 +9,7 @@ Install with nginx
.. _nginx server configuration:
https://docs.nginx.com/nginx/admin-guide/web-server/web-server/#setting-up-virtual-servers
.. _nginx beginners guide:
- http://nginx.org/en/docs/beginners_guide.html
+ https://nginx.org/en/docs/beginners_guide.html
.. _Getting Started wiki:
https://www.nginx.com/resources/wiki/start/
.. _uWSGI support from nginx:
diff --git a/docs/admin/installation-searx.rst b/docs/admin/installation-searx.rst
index a368bfe8c..3f8904a1d 100644
--- a/docs/admin/installation-searx.rst
+++ b/docs/admin/installation-searx.rst
@@ -64,17 +64,38 @@ from the login (*~/.profile*):
Open a second terminal for the configuration tasks and left the ``(searx)$``
terminal open for the tasks below.
+
+.. _use_default_settings.yml:
+
Configuration
-==============
+=============
+
+To create a initial ``/etc/searx/settings.yml`` you can start with a copy of the
+file :origin:`utils/templates/etc/searx/use_default_settings.yml`. This setup
+:option:ref:`use default settings <settings use_default_settings>` from
+:origin:`searx/settings.yml` and is recommended since :pull:`2291` is merged.
-Create a copy of the :origin:`searx/settings.yml` configuration file in system's
-*/etc* folder. Configure like shown below -- replace ``searx@\$(uname -n)`` with
-a name of your choice -- *and/or* edit ``/etc/searx/settings.yml`` if necessary.
+For minimal Setup, configure like shown below – replace ``searx@\$(uname -n)``
+with a name of your choice, set ``ultrasecretkey`` -- *and/or* edit
+``/etc/searx/settings.yml`` to your needs.
.. kernel-include:: $DOCS_BUILD/includes/searx.rst
:start-after: START searx config
:end-before: END searx config
+.. tabs::
+
+ .. group-tab:: Use default settings
+
+ .. literalinclude:: ../../utils/templates/etc/searx/use_default_settings.yml
+ :language: yaml
+
+ .. group-tab:: searx/settings.yml
+
+ .. literalinclude:: ../../searx/settings.yml
+ :language: yaml
+
+
Check
=====
diff --git a/docs/admin/settings.rst b/docs/admin/settings.rst
index 532b99752..985c16f85 100644
--- a/docs/admin/settings.rst
+++ b/docs/admin/settings.rst
@@ -9,6 +9,7 @@ file.
.. sidebar:: Further reading ..
+ - :ref:`use_default_settings.yml`
- :ref:`search API`
.. contents:: Contents
@@ -16,92 +17,61 @@ file.
:local:
:backlinks: entry
-.. _settings global:
+.. _settings location:
-Global Settings
-===============
+settings.yml location
+=====================
-.. code:: yaml
+First, searx will try to load settings.yml from these locations:
- server:
- port : 8888
- secret_key : "ultrasecretkey" # change this!
- debug : False # debug mode, only for development
- request_timeout : 2.0 # seconds
- base_url : False # set custom base_url (or False)
- themes_path : "" # custom ui themes path
- default_theme : oscar # ui theme
- useragent_suffix : "" # suffix of searx_useragent, could contain
- # informations like admins email address
- image_proxy : False # proxying image results through searx
- default_locale : "" # default interface locale
+1. the full path specified in the ``SEARX_SETTINGS_PATH`` environment variable.
+2. ``/etc/searx/settings.yml``
- outgoing: # communication with search engines
- request_timeout : 2.0 # default timeout in seconds, can be override by engine
- # max_request_timeout: 10.0 # the maximum timeout in seconds
- useragent_suffix : "" # suffix of searx_useragent, could contain informations like an email address to the administrator
- pool_connections : 100 # Number of different hosts
- pool_maxsize : 10 # Number of simultaneous requests by host
-
- #proxies:
- # http:
- # - http://proxy1:8080
- # - http://proxy2:8080
- # https:
- # - http://proxy1:8080
- # - http://proxy2:8080
- # - socks5://user:password@proxy3:1080
- # - socks5h://user:password@proxy4:1080
-
- #source_ips:
- # - 1.1.1.1
- # - 1.1.1.2
+If these files don't exist (or are empty or can't be read), searx uses the :origin:`searx/settings.yml` file.
- locales:
- en : English
- de : Deutsch
- he : Hebrew
- hu : Magyar
- fr : Français
- es : Español
- it : Italiano
- nl : Nederlands
- ja : 日本語 (Japanese)
- tr : Türkçe
- ru : Russian
- ro : Romanian
+.. _settings global:
-``port`` :
- Port number of the searx web application if you run it directly using ``python
- searx/webapp.py``. Doesn't apply to searx running on Apache or Nginx.
+Global Settings
+===============
-``secret_key`` :
- Used for cryptography purpose.
+.. code:: yaml
+
+ general:
+ debug : False # Debug mode, only for development
+ instance_name : "searx" # displayed name
``debug`` :
Allow a more detailed log if you run searx directly. Display *detailed* error
messages in the browser too, so this must be deactivated in production.
-``request_timeout`` :
- Global timeout of the requests made to others engines in seconds. A bigger
- timeout will allow to wait for answers from slow engines, but in consequence
- will slow searx reactivity (the result page may take the time specified in the
- timeout to load)
-
-``base_url`` :
- The base URL where searx is deployed. Used to create correct inbound links.
+.. code:: yaml
-``themes_path`` :
- Path to where the themes are located. If you didn't develop anything, leave it
- blank.
+ server:
+ port : 8888
+ bind_address : "127.0.0.1" # address to listen on
+ secret_key : "ultrasecretkey" # change this!
+ base_url : False # set custom base_url (or False)
+ image_proxy : False # proxying image results through searx
+ default_locale : "" # default interface locale
+ default_theme : oscar # ui theme
+ default_http_headers:
+ X-Content-Type-Options : nosniff
+ X-XSS-Protection : 1; mode=block
+ X-Download-Options : noopen
+ X-Robots-Tag : noindex, nofollow
+ Referrer-Policy : no-referrer
+
+``port`` & ``bind_address``:
+ Port number and *bind address* of the searx web application if you run it
+ directly using ``python searx/webapp.py``. Doesn't apply to searx running on
+ Apache or Nginx.
-``default_theme`` :
- Name of the theme you want to use by default on your searx instance.
+``secret_key`` :
+ Used for cryptography purpose.
-``useragent_suffix`` :
- Suffix to the user-agent searx uses to send requests to others engines. If an
- engine wish to block you, a contact info here may be useful to avoid that.
+``base_url`` :
+ The base URL where searx is deployed. Used to create correct inbound links.
``image_proxy`` :
Allow your instance of searx of being able to proxy images. Uses memory space.
@@ -112,7 +82,49 @@ Global Settings
specific instance of searx, a locale can be defined using an ISO language
code, like ``fr``, ``en``, ``de``.
-.. _requests proxies: http://requests.readthedocs.io/en/latest/user/advanced/#proxies
+``default_theme`` :
+ Name of the theme you want to use by default on your searx instance.
+
+.. _HTTP headers: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers
+
+``default_http_headers``:
+ Set additional HTTP headers, see `#755 <https://github.com/searx/searx/issues/715>`__
+
+
+.. code:: yaml
+
+ outgoing: # communication with search engines
+ request_timeout : 2.0 # default timeout in seconds, can be override by engine
+ # max_request_timeout: 10.0 # the maximum timeout in seconds
+ useragent_suffix : "" # informations like an email address to the administrator
+ pool_connections : 100 # Number of different hosts
+ pool_maxsize : 10 # Number of simultaneous requests by host
+ # uncomment below section if you want to use a proxy
+ # proxies:
+ # http:
+ # - http://proxy1:8080
+ # - http://proxy2:8080
+ # https:
+ # - http://proxy1:8080
+ # - http://proxy2:8080
+ # uncomment below section only if you have more than one network interface
+ # which can be the source of outgoing search requests
+ # source_ips:
+ # - 1.1.1.1
+ # - 1.1.1.2
+
+
+``request_timeout`` :
+ Global timeout of the requests made to others engines in seconds. A bigger
+ timeout will allow to wait for answers from slow engines, but in consequence
+ will slow searx reactivity (the result page may take the time specified in the
+ timeout to load). Can be override by :ref:`settings engine`
+
+``useragent_suffix`` :
+ Suffix to the user-agent searx uses to send requests to others engines. If an
+ engine wish to block you, a contact info here may be useful to avoid that.
+
+.. _requests proxies: https://requests.readthedocs.io/en/latest/user/advanced/#proxies
.. _PySocks: https://pypi.org/project/PySocks/
``proxies`` :
@@ -120,10 +132,29 @@ Global Settings
If there are more than one proxy for one protocol (http, https),
requests to the engines are distributed in a round-robin fashion.
+ - Proxy: `see <https://2.python-requests.org/en/latest/user/advanced/#proxies>`__.
+ - SOCKS proxies are also supported: `see <https://2.python-requests.org/en/latest/user/advanced/#socks>`__
+
``source_ips`` :
If you use multiple network interfaces, define from which IP the requests must
be made. This parameter is ignored when ``proxies`` is set.
+.. code:: yaml
+
+ locales:
+ en : English
+ de : Deutsch
+ he : Hebrew
+ hu : Magyar
+ fr : Français
+ es : Español
+ it : Italiano
+ nl : Nederlands
+ ja : 日本語 (Japanese)
+ tr : Türkçe
+ ru : Russian
+ ro : Romanian
+
``locales`` :
Locales codes and their names. Available translations of searx interface.
@@ -208,19 +239,7 @@ Engine settings
engines, and so won't be described here.
-.. _settings location:
-
-settings.yml location
-=====================
-
-First, searx will try to load settings.yml from these locations:
-
-1. the full path specified in the ``SEARX_SETTINGS_PATH`` environment variable.
-2. ``/etc/searx/settings.yml``
-
-If these files don't exist (or are empty or can't be read), searx uses the :origin:`searx/settings.yml` file.
-
-.. _ settings use_default_settings:
+.. _settings use_default_settings:
use_default_settings
====================
diff --git a/docs/build-templates/searx.rst b/docs/build-templates/searx.rst
index 080de293f..fe82ec3d0 100644
--- a/docs/build-templates/searx.rst
+++ b/docs/build-templates/searx.rst
@@ -128,12 +128,28 @@ ${fedora_build}
.. tabs::
- .. group-tab:: bash
+ .. group-tab:: Use default settings
+
+ .. code-block:: sh
+
+ $ sudo -H mkdir -p \"$(dirname ${SEARX_SETTINGS_PATH})\"
+ $ sudo -H cp \"$SEARX_SRC/utils/templates/etc/searx/use_default_settings.yml\" \\
+ \"${SEARX_SETTINGS_PATH}\"
+
+ .. group-tab:: searx/settings.yml
.. code-block:: sh
$ sudo -H mkdir -p \"$(dirname ${SEARX_SETTINGS_PATH})\"
- $ sudo -H cp \"$SEARX_SRC/searx/settings.yml\" \"${SEARX_SETTINGS_PATH}\"
+ $ sudo -H cp \"$SEARX_SRC/searx/settings.yml\" \\
+ \"${SEARX_SETTINGS_PATH}\"
+
+.. tabs::
+
+ .. group-tab:: minimal setup
+
+ .. code-block:: sh
+
$ sudo -H sed -i -e \"s/ultrasecretkey/\$(openssl rand -hex 16)/g\" \"$SEARX_SETTINGS_PATH\"
$ sudo -H sed -i -e \"s/{instance_name}/searx@\$(uname -n)/g\" \"$SEARX_SETTINGS_PATH\"
diff --git a/docs/conf.py b/docs/conf.py
index 4b348ae0e..d6fde9bec 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -49,11 +49,11 @@ extlinks['man'] = ('https://manpages.debian.org/jump?q=%s', '')
#extlinks['role'] = (
# 'https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html#role-%s', '')
extlinks['duref'] = (
- 'http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#%s', '')
+ 'https://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#%s', '')
extlinks['durole'] = (
- 'http://docutils.sourceforge.net/docs/ref/rst/roles.html#%s', '')
+ 'https://docutils.sourceforge.net/docs/ref/rst/roles.html#%s', '')
extlinks['dudir'] = (
- 'http://docutils.sourceforge.net/docs/ref/rst/directives.html#%s', '')
+ 'https://docutils.sourceforge.net/docs/ref/rst/directives.html#%s', '')
extlinks['ctan'] = (
'https://ctan.org/pkg/%s', 'CTAN: ')
diff --git a/docs/dev/contribution_guide.rst b/docs/dev/contribution_guide.rst
index 26f8d2bb7..90b22670c 100644
--- a/docs/dev/contribution_guide.rst
+++ b/docs/dev/contribution_guide.rst
@@ -117,8 +117,8 @@ Translation currently takes place on :ref:`transifex <translation>`.
Documentation
=============
-.. _Sphinx: http://www.sphinx-doc.org
-.. _reST: http://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html
+.. _Sphinx: https://www.sphinx-doc.org
+.. _reST: https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html
.. sidebar:: The reST sources
diff --git a/docs/dev/engine_overview.rst b/docs/dev/engine_overview.rst
index 0b5f9857f..3562ca61a 100644
--- a/docs/dev/engine_overview.rst
+++ b/docs/dev/engine_overview.rst
@@ -134,19 +134,19 @@ The function ``def request(query, params):`` always returns the ``params``
variable. Inside searx, the following paramters can be used to specify a search
request:
-================== =========== ==========================================================================
-argument type information
-================== =========== ==========================================================================
-url string requested url
-method string HTTP request method
-headers set HTTP header information
-data set HTTP data information (parsed if ``method != 'GET'``)
-cookies set HTTP cookies
-verify boolean Performing SSL-Validity check
-max_redirects int maximum redirects, hard limit
-soft_max_redirects int maximum redirects, soft limit. Record an error but don't stop the engine
-raise_for_status bool True by default: raise an exception if the HTTP code of response is >= 300
-================== =========== ==========================================================================
+=================== =========== ==========================================================================
+argument type information
+=================== =========== ==========================================================================
+url string requested url
+method string HTTP request method
+headers set HTTP header information
+data set HTTP data information (parsed if ``method != 'GET'``)
+cookies set HTTP cookies
+verify boolean Performing SSL-Validity check
+max_redirects int maximum redirects, hard limit
+soft_max_redirects int maximum redirects, soft limit. Record an error but don't stop the engine
+raise_for_httperror bool True by default: raise an exception if the HTTP code of response is >= 300
+=================== =========== ==========================================================================
example code
@@ -265,7 +265,7 @@ latitude latitude of result (in decimal format)
longitude longitude of result (in decimal format)
boundingbox boundingbox of result (array of 4. values
``[lat-min, lat-max, lon-min, lon-max]``)
-geojson geojson of result (http://geojson.org)
+geojson geojson of result (https://geojson.org/)
osm.type type of osm-object (if OSM-Result)
osm.id id of osm-object (if OSM-Result)
address.name name of object
diff --git a/docs/dev/quickstart.rst b/docs/dev/quickstart.rst
index 3e1a5e344..14af03fa6 100644
--- a/docs/dev/quickstart.rst
+++ b/docs/dev/quickstart.rst
@@ -60,7 +60,7 @@ read :ref:`make test`.
How to compile styles and javascript
====================================
-.. _less: http://lesscss.org/
+.. _less: https://lesscss.org/
.. _NodeJS: https://nodejs.org
How to build styles
diff --git a/docs/dev/reST.rst b/docs/dev/reST.rst
index 906a0e9af..963378748 100644
--- a/docs/dev/reST.rst
+++ b/docs/dev/reST.rst
@@ -1391,27 +1391,27 @@ The next example shows the difference of ``\tfrac`` (*textstyle*) and ``\dfrac``
.. _readability: https://docs.python-guide.org/writing/style/
.. _Sphinx-Primer:
- http://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html
+ https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html
.. _reST: https://docutils.sourceforge.io/rst.html
.. _Sphinx Roles:
https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html
-.. _Sphinx: http://www.sphinx-doc.org
-.. _`sphinx-doc FAQ`: http://www.sphinx-doc.org/en/stable/faq.html
+.. _Sphinx: https://www.sphinx-doc.org
+.. _`sphinx-doc FAQ`: https://www.sphinx-doc.org/en/stable/faq.html
.. _Sphinx markup constructs:
- http://www.sphinx-doc.org/en/stable/markup/index.html
+ https://www.sphinx-doc.org/en/stable/markup/index.html
.. _`sphinx cross references`:
- http://www.sphinx-doc.org/en/stable/markup/inline.html#cross-referencing-arbitrary-locations
+ https://www.sphinx-doc.org/en/stable/markup/inline.html#cross-referencing-arbitrary-locations
.. _sphinx.ext.extlinks:
https://www.sphinx-doc.org/en/master/usage/extensions/extlinks.html
-.. _intersphinx: http://www.sphinx-doc.org/en/stable/ext/intersphinx.html
-.. _sphinx config: http://www.sphinx-doc.org/en/stable/config.html
-.. _Sphinx's autodoc: http://www.sphinx-doc.org/en/stable/ext/autodoc.html
+.. _intersphinx: https://www.sphinx-doc.org/en/stable/ext/intersphinx.html
+.. _sphinx config: https://www.sphinx-doc.org/en/stable/config.html
+.. _Sphinx's autodoc: https://www.sphinx-doc.org/en/stable/ext/autodoc.html
.. _Sphinx's Python domain:
- http://www.sphinx-doc.org/en/stable/domains.html#the-python-domain
+ https://www.sphinx-doc.org/en/stable/domains.html#the-python-domain
.. _Sphinx's C domain:
- http://www.sphinx-doc.org/en/stable/domains.html#cross-referencing-c-constructs
+ https://www.sphinx-doc.org/en/stable/domains.html#cross-referencing-c-constructs
.. _doctree:
- http://www.sphinx-doc.org/en/master/extdev/tutorial.html?highlight=doctree#build-phases
+ https://www.sphinx-doc.org/en/master/extdev/tutorial.html?highlight=doctree#build-phases
.. _docutils: http://docutils.sourceforge.net/docs/index.html
.. _docutils FAQ: http://docutils.sourceforge.net/FAQ.html
.. _linuxdoc: https://return42.github.io/linuxdoc
@@ -1424,5 +1424,5 @@ The next example shows the difference of ``\tfrac`` (*textstyle*) and ``\dfrac``
.. _ImageMagick: https://www.imagemagick.org
.. _`Emacs Table Mode`: https://www.emacswiki.org/emacs/TableMode
-.. _`Online Tables Generator`: http://www.tablesgenerator.com/text_tables
+.. _`Online Tables Generator`: https://www.tablesgenerator.com/text_tables
.. _`OASIS XML Exchange Table Model`: https://www.oasis-open.org/specs/tm9901.html
diff --git a/searx/autocomplete.py b/searx/autocomplete.py
index 420b8a461..fbe634a5b 100644
--- a/searx/autocomplete.py
+++ b/searx/autocomplete.py
@@ -113,7 +113,7 @@ def searx_bang(full_query):
def dbpedia(query, lang):
# dbpedia autocompleter, no HTTPS
- autocomplete_url = 'http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?'
+ autocomplete_url = 'https://lookup.dbpedia.org/api/search.asmx/KeywordSearch?'
response = get(autocomplete_url + urlencode(dict(QueryString=query)))
@@ -121,8 +121,7 @@ def dbpedia(query, lang):
if response.ok:
dom = etree.fromstring(response.content)
- results = dom.xpath('//a:Result/a:Label//text()',
- namespaces={'a': 'http://lookup.dbpedia.org/'})
+ results = dom.xpath('//Result/Label//text()')
return results
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index ddd6a7feb..b2a9b25a4 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -20,6 +20,7 @@ import sys
import threading
from os.path import realpath, dirname
from babel.localedata import locale_identifiers
+from urllib.parse import urlparse
from flask_babel import gettext
from operator import itemgetter
from searx import settings
@@ -280,8 +281,12 @@ def initialize_engines(engine_list):
load_engines(engine_list)
def engine_init(engine_name, init_fn):
- init_fn(get_engine_from_settings(engine_name))
- logger.debug('%s engine: Initialized', engine_name)
+ try:
+ init_fn(get_engine_from_settings(engine_name))
+ except Exception:
+ logger.exception('%s engine: Fail to initialize', engine_name)
+ else:
+ logger.debug('%s engine: Initialized', engine_name)
for engine_name, engine in engines.items():
if hasattr(engine, 'init'):
@@ -289,3 +294,34 @@ def initialize_engines(engine_list):
if init_fn:
logger.debug('%s engine: Starting background initialization', engine_name)
threading.Thread(target=engine_init, args=(engine_name, init_fn)).start()
+
+ _set_https_support_for_engine(engine)
+
+
+def _set_https_support_for_engine(engine):
+ # check HTTPS support if it is not disabled
+ if not engine.offline and not hasattr(engine, 'https_support'):
+ params = engine.request('http_test', {
+ 'method': 'GET',
+ 'headers': {},
+ 'data': {},
+ 'url': '',
+ 'cookies': {},
+ 'verify': True,
+ 'auth': None,
+ 'pageno': 1,
+ 'time_range': None,
+ 'language': '',
+ 'safesearch': False,
+ 'is_test': True,
+ 'category': 'files',
+ 'raise_for_status': True,
+ })
+
+ if 'url' not in params:
+ return
+
+ parsed_url = urlparse(params['url'])
+ https_support = parsed_url.scheme == 'https'
+
+ setattr(engine, 'https_support', https_support)
diff --git a/searx/engines/acgsou.py b/searx/engines/acgsou.py
index b8b367c24..637443edc 100644
--- a/searx/engines/acgsou.py
+++ b/searx/engines/acgsou.py
@@ -18,7 +18,7 @@ categories = ['files', 'images', 'videos', 'music']
paging = True
# search-url
-base_url = 'http://www.acgsou.com/'
+base_url = 'https://www.acgsou.com/'
search_url = base_url + 'search.php?{query}&page={offset}'
# xpath queries
xpath_results = '//table[contains(@class, "list_style table_fixed")]//tr[not(th)]'
@@ -40,7 +40,7 @@ def response(resp):
for result in eval_xpath_list(dom, xpath_results):
# defaults
filesize = 0
- magnet_link = "magnet:?xt=urn:btih:{}&tr=http://tracker.acgsou.com:2710/announce"
+ magnet_link = "magnet:?xt=urn:btih:{}&tr=https://tracker.acgsou.com:2710/announce"
category = extract_text(eval_xpath_getindex(result, xpath_category, 0, default=[]))
page_a = eval_xpath_getindex(result, xpath_title, 0)
diff --git a/searx/engines/arxiv.py b/searx/engines/arxiv.py
index c702c5987..1190de363 100644
--- a/searx/engines/arxiv.py
+++ b/searx/engines/arxiv.py
@@ -19,7 +19,7 @@ from searx.utils import eval_xpath_list, eval_xpath_getindex
categories = ['science']
paging = True
-base_url = 'http://export.arxiv.org/api/query?search_query=all:'\
+base_url = 'https://export.arxiv.org/api/query?search_query=all:'\
+ '{query}&start={offset}&max_results={number_of_results}'
# engine dependent config
diff --git a/searx/engines/command.py b/searx/engines/command.py
index 08ee5da06..0268d52eb 100644
--- a/searx/engines/command.py
+++ b/searx/engines/command.py
@@ -80,7 +80,7 @@ def search(query, params):
def _get_command_to_run(query):
- params = shlex_split(query.decode('utf-8'))
+ params = shlex_split(query)
__check_query_params(params)
cmd = []
diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py
index f41c135b9..87e21d0af 100644
--- a/searx/engines/currency_convert.py
+++ b/searx/engines/currency_convert.py
@@ -9,6 +9,7 @@ url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'
weight = 100
parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
+https_support = True
def normalize_name(name):
diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py
index 5a1fea3cf..727eb6598 100644
--- a/searx/engines/dictzone.py
+++ b/searx/engines/dictzone.py
@@ -20,6 +20,7 @@ weight = 100
parser_re = re.compile('.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
results_xpath = './/table[@id="r"]/tr'
+https_support = True
def request(query, params):
diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py
index 5a7649173..1d1c84b4b 100644
--- a/searx/engines/duckduckgo_definitions.py
+++ b/searx/engines/duckduckgo_definitions.py
@@ -10,7 +10,7 @@ DuckDuckGo (definitions)
"""
import json
-from urllib.parse import urlencode
+from urllib.parse import urlencode, urlparse, urljoin
from lxml import html
from searx import logger
@@ -102,6 +102,8 @@ def response(resp):
# image
image = search_res.get('Image')
image = None if image == '' else image
+ if image is not None and urlparse(image).netloc == '':
+ image = urljoin('https://duckduckgo.com', image)
# urls
# Official website, Wikipedia page
diff --git a/searx/engines/duden.py b/searx/engines/duden.py
index 1484a21e5..1475fb846 100644
--- a/searx/engines/duden.py
+++ b/searx/engines/duden.py
@@ -8,11 +8,10 @@
@parse url, title, content
"""
-from lxml import html, etree
import re
from urllib.parse import quote, urljoin
-from searx.utils import extract_text, eval_xpath
-from searx import logger
+from lxml import html
+from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
categories = ['general']
paging = True
@@ -40,6 +39,9 @@ def request(query, params):
params['url'] = search_url_fmt.format(query=quote(query))
else:
params['url'] = search_url.format(offset=offset, query=quote(query))
+ # after the last page of results, spelling corrections are returned after a HTTP redirect
+ # whatever the page number is
+ params['soft_max_redirects'] = 1
return params
@@ -51,28 +53,21 @@ def response(resp):
dom = html.fromstring(resp.text)
- try:
- number_of_results_string =\
- re.sub('[^0-9]', '',
- eval_xpath(dom, '//a[@class="active" and contains(@href,"/suchen/dudenonline")]/span/text()')[0])
-
+ number_of_results_element =\
+ eval_xpath_getindex(dom, '//a[@class="active" and contains(@href,"/suchen/dudenonline")]/span/text()',
+ 0, default=None)
+ if number_of_results_element is not None:
+ number_of_results_string = re.sub('[^0-9]', '', number_of_results_element)
results.append({'number_of_results': int(number_of_results_string)})
- except:
- logger.debug("Couldn't read number of results.")
-
- for result in eval_xpath(dom, '//section[not(contains(@class, "essay"))]'):
- try:
- url = eval_xpath(result, './/h2/a')[0].get('href')
- url = urljoin(base_url, url)
- title = eval_xpath(result, 'string(.//h2/a)').strip()
- content = extract_text(eval_xpath(result, './/p'))
- # append result
- results.append({'url': url,
- 'title': title,
- 'content': content})
- except:
- logger.debug('result parse error in:\n%s', etree.tostring(result, pretty_print=True))
- continue
+ for result in eval_xpath_list(dom, '//section[not(contains(@class, "essay"))]'):
+ url = eval_xpath_getindex(result, './/h2/a', 0).get('href')
+ url = urljoin(base_url, url)
+ title = eval_xpath(result, 'string(.//h2/a)').strip()
+ content = extract_text(eval_xpath(result, './/p'))
+ # append result
+ results.append({'url': url,
+ 'title': title,
+ 'content': content})
return results
diff --git a/searx/engines/filecrop.py b/searx/engines/filecrop.py
deleted file mode 100644
index 0331e7b19..000000000
--- a/searx/engines/filecrop.py
+++ /dev/null
@@ -1,85 +0,0 @@
-from html.parser import HTMLParser
-from urllib.parse import urlencode
-
-
-url = 'http://www.filecrop.com/'
-search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}' # noqa
-
-paging = True
-
-
-class FilecropResultParser(HTMLParser): # pylint: disable=W0223 # (see https://bugs.python.org/issue31844)
-
- def __init__(self):
- HTMLParser.__init__(self)
- self.__start_processing = False
-
- self.results = []
- self.result = {}
-
- self.tr_counter = 0
- self.data_counter = 0
-
- def handle_starttag(self, tag, attrs):
-
- if tag == 'tr':
- if ('bgcolor', '#edeff5') in attrs or\
- ('bgcolor', '#ffffff') in attrs:
- self.__start_processing = True
-
- if not self.__start_processing:
- return
-
- if tag == 'label':
- self.result['title'] = [attr[1] for attr in attrs
- if attr[0] == 'title'][0]
- elif tag == 'a' and ('rel', 'nofollow') in attrs\
- and ('class', 'sourcelink') in attrs:
- if 'content' in self.result:
- self.result['content'] += [attr[1] for attr in attrs
- if attr[0] == 'title'][0]
- else:
- self.result['content'] = [attr[1] for attr in attrs
- if attr[0] == 'title'][0]
- self.result['content'] += ' '
- elif tag == 'a':
- self.result['url'] = url + [attr[1] for attr in attrs
- if attr[0] == 'href'][0]
-
- def handle_endtag(self, tag):
- if self.__start_processing is False:
- return
-
- if tag == 'tr':
- self.tr_counter += 1
-
- if self.tr_counter == 2:
- self.__start_processing = False
- self.tr_counter = 0
- self.data_counter = 0
- self.results.append(self.result)
- self.result = {}
-
- def handle_data(self, data):
- if not self.__start_processing:
- return
-
- if 'content' in self.result:
- self.result['content'] += data + ' '
- else:
- self.result['content'] = data + ' '
-
- self.data_counter += 1
-
-
-def request(query, params):
- index = 1 + (params['pageno'] - 1) * 30
- params['url'] = search_url.format(query=urlencode({'w': query}), index=index)
- return params
-
-
-def response(resp):
- parser = FilecropResultParser()
- parser.feed(resp.text)
-
- return parser.results
diff --git a/searx/engines/genius.py b/searx/engines/genius.py
index feb7d79d1..2bfbfddf5 100644
--- a/searx/engines/genius.py
+++ b/searx/engines/genius.py
@@ -36,7 +36,7 @@ def parse_lyric(hit):
try:
content = hit['highlights'][0]['value']
except:
- content = None
+ content = ''
timestamp = hit['result']['lyrics_updated_at']
result = {'url': hit['result']['url'],
'title': hit['result']['full_title'],
@@ -51,7 +51,7 @@ def parse_lyric(hit):
def parse_artist(hit):
result = {'url': hit['result']['url'],
'title': hit['result']['name'],
- 'content': None,
+ 'content': '',
'thumbnail': hit['result']['image_url'],
'template': 'videos.html'}
return result
@@ -61,6 +61,7 @@ def parse_album(hit):
result = {'url': hit['result']['url'],
'title': hit['result']['full_title'],
'thumbnail': hit['result']['cover_art_url'],
+ 'content': '',
# 'thumbnail': hit['result']['cover_art_thumbnail_url'],
'template': 'videos.html'}
try:
@@ -81,9 +82,7 @@ def response(resp):
json = loads(resp.text)
hits = [hit for section in json['response']['sections'] for hit in section['hits']]
for hit in hits:
- try:
- func = parse[hit['type']]
- except KeyError:
- continue
- results.append(func(hit))
+ func = parse.get(hit['type'])
+ if func:
+ results.append(func(hit))
return results
diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py
index c909ce11b..b785719d9 100644
--- a/searx/engines/qwant.py
+++ b/searx/engines/qwant.py
@@ -14,6 +14,8 @@ from datetime import datetime
from json import loads
from urllib.parse import urlencode
from searx.utils import html_to_text, match_language
+from searx.exceptions import SearxEngineAPIException, SearxEngineCaptchaException
+from searx.raise_for_httperror import raise_for_httperror
# engine dependent config
@@ -24,8 +26,7 @@ supported_languages_url = 'https://qwant.com/region'
category_to_keyword = {'general': 'web',
'images': 'images',
- 'news': 'news',
- 'social media': 'social'}
+ 'news': 'news'}
# search-url
url = 'https://api.qwant.com/api/search/{keyword}?count=10&offset={offset}&f=&{query}&t={keyword}&uiv=4'
@@ -51,6 +52,7 @@ def request(query, params):
params['url'] += '&locale=' + language.replace('-', '_').lower()
params['headers']['User-Agent'] = 'Mozilla/5.0 (X11; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0'
+ params['raise_for_httperror'] = False
return params
@@ -58,8 +60,20 @@ def request(query, params):
def response(resp):
results = []
+ # According to https://www.qwant.com/js/app.js
+ if resp.status_code == 429:
+ raise SearxEngineCaptchaException()
+
+ # raise for other errors
+ raise_for_httperror(resp)
+
+ # load JSON result
search_results = loads(resp.text)
+ # check for an API error
+ if search_results.get('status') != 'success':
+ raise SearxEngineAPIException('API error ' + str(search_results.get('error', '')))
+
# return empty array if there are no results
if 'data' not in search_results:
return []
@@ -90,15 +104,6 @@ def response(resp):
'thumbnail_src': thumbnail_src,
'img_src': img_src})
- elif category_to_keyword.get(categories[0], '') == 'social':
- published_date = datetime.fromtimestamp(result['date'], None)
- img_src = result.get('img', None)
- results.append({'url': res_url,
- 'title': title,
- 'publishedDate': published_date,
- 'content': content,
- 'img_src': img_src})
-
elif category_to_keyword.get(categories[0], '') == 'news':
published_date = datetime.fromtimestamp(result['date'], None)
media = result.get('media', [])
diff --git a/searx/engines/seedpeer.py b/searx/engines/seedpeer.py
deleted file mode 100644
index 39916da6e..000000000
--- a/searx/engines/seedpeer.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# Seedpeer (Videos, Music, Files)
-#
-# @website https://seedpeer.me
-# @provide-api no (nothing found)
-#
-# @using-api no
-# @results HTML (using search portal)
-# @stable yes (HTML can change)
-# @parse url, title, content, seed, leech, magnetlink
-
-from lxml import html
-from json import loads
-from operator import itemgetter
-from urllib.parse import quote, urljoin
-from searx.utils import extract_text
-
-
-url = 'https://seedpeer.me/'
-search_url = url + 'search/{search_term}?page={page_no}'
-torrent_file_url = url + 'torrent/{torrent_hash}'
-
-# specific xpath variables
-script_xpath = '//script[@type="text/javascript"][not(@src)]'
-torrent_xpath = '(//table)[2]/tbody/tr'
-link_xpath = '(./td)[1]/a/@href'
-age_xpath = '(./td)[2]'
-size_xpath = '(./td)[3]'
-
-
-# do search-request
-def request(query, params):
- params['url'] = search_url.format(search_term=quote(query),
- page_no=params['pageno'])
- return params
-
-
-# get response from search-request
-def response(resp):
- results = []
- dom = html.fromstring(resp.text)
- result_rows = dom.xpath(torrent_xpath)
-
- try:
- script_element = dom.xpath(script_xpath)[0]
- json_string = script_element.text[script_element.text.find('{'):]
- torrents_json = loads(json_string)
- except:
- return []
-
- # parse results
- for torrent_row, torrent_json in zip(result_rows, torrents_json['data']['list']):
- title = torrent_json['name']
- seed = int(torrent_json['seeds'])
- leech = int(torrent_json['peers'])
- size = int(torrent_json['size'])
- torrent_hash = torrent_json['hash']
-
- torrentfile = torrent_file_url.format(torrent_hash=torrent_hash)
- magnetlink = 'magnet:?xt=urn:btih:{}'.format(torrent_hash)
-
- age = extract_text(torrent_row.xpath(age_xpath))
- link = torrent_row.xpath(link_xpath)[0]
-
- href = urljoin(url, link)
-
- # append result
- results.append({'url': href,
- 'title': title,
- 'content': age,
- 'seed': seed,
- 'leech': leech,
- 'filesize': size,
- 'torrentfile': torrentfile,
- 'magnetlink': magnetlink,
- 'template': 'torrent.html'})
-
- # return results sorted by seeder
- return sorted(results, key=itemgetter('seed'), reverse=True)
diff --git a/searx/engines/soundcloud.py b/searx/engines/soundcloud.py
index b1e01759f..84ff21a88 100644
--- a/searx/engines/soundcloud.py
+++ b/searx/engines/soundcloud.py
@@ -91,7 +91,7 @@ def response(resp):
for result in search_res.get('collection', []):
if result['kind'] in ('track', 'playlist'):
title = result['title']
- content = result['description']
+ content = result['description'] or ''
publishedDate = parser.parse(result['last_modified'])
uri = quote_plus(result['uri'])
embedded = embedded_url.format(uri=uri)
diff --git a/searx/engines/translated.py b/searx/engines/translated.py
index a50e7c830..75b8b5f42 100644
--- a/searx/engines/translated.py
+++ b/searx/engines/translated.py
@@ -15,6 +15,7 @@ categories = ['general']
url = 'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
web_url = 'https://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
weight = 100
+https_support = True
parser_re = re.compile('.*?([a-z]+)-([a-z]+) (.{2,})$', re.I)
api_key = ''
diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py
index 60d0dc9a0..8d787caac 100644
--- a/searx/engines/wikidata.py
+++ b/searx/engines/wikidata.py
@@ -161,9 +161,6 @@ def request(query, params):
def response(resp):
results = []
- if resp.status_code != 200:
- logger.debug('SPARQL endpoint error %s', resp.content.decode())
- resp.raise_for_status()
jsonresponse = loads(resp.content.decode())
language = resp.search_params['language'].lower()
diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py
index 9fce170eb..54d75108e 100644
--- a/searx/engines/wikipedia.py
+++ b/searx/engines/wikipedia.py
@@ -14,6 +14,7 @@ from urllib.parse import quote
from json import loads
from lxml.html import fromstring
from searx.utils import match_language, searx_useragent
+from searx.raise_for_httperror import raise_for_httperror
# search-url
search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
@@ -37,7 +38,7 @@ def request(query, params):
language=url_lang(params['language']))
params['headers']['User-Agent'] = searx_useragent()
- params['raise_for_status'] = False
+ params['raise_for_httperror'] = False
params['soft_max_redirects'] = 2
return params
@@ -47,12 +48,13 @@ def request(query, params):
def response(resp):
if resp.status_code == 404:
return []
+ raise_for_httperror(resp)
results = []
api_result = loads(resp.text)
# skip disambiguation pages
- if api_result['type'] != 'standard':
+ if api_result.get('type') != 'standard':
return []
title = api_result['title']
diff --git a/searx/engines/www1x.py b/searx/engines/www1x.py
index 8d691c852..b8f111a50 100644
--- a/searx/engines/www1x.py
+++ b/searx/engines/www1x.py
@@ -7,12 +7,12 @@
@using-api no
@results HTML
@stable no (HTML can change)
- @parse url, title, thumbnail, img_src, content
+ @parse url, title, thumbnail
"""
-from lxml import html
+from lxml import html, etree
from urllib.parse import urlencode, urljoin
-from searx.utils import extract_text
+from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
# engine dependent config
categories = ['images']
@@ -21,6 +21,7 @@ paging = False
# search-url
base_url = 'https://1x.com'
search_url = base_url + '/backend/search.php?{query}'
+gallery_url = 'https://gallery.1x.com/'
# do search-request
@@ -33,23 +34,18 @@ def request(query, params):
# get response from search-request
def response(resp):
results = []
-
- dom = html.fromstring(resp.text)
- for res in dom.xpath('//div[@class="List-item MainListing"]'):
- # processed start and end of link
- link = res.xpath('//a')[0]
-
+ xmldom = etree.fromstring(resp.content)
+ xmlsearchresult = eval_xpath_getindex(xmldom, '//searchresult', 0)
+ dom = html.fragment_fromstring(xmlsearchresult.text, create_parent='div')
+ for link in eval_xpath_list(dom, '/div/table/tr/td/div[2]//a'):
url = urljoin(base_url, link.attrib.get('href'))
title = extract_text(link)
-
- thumbnail_src = urljoin(base_url, res.xpath('.//img')[0].attrib['src'])
- # TODO: get image with higher resolution
- img_src = thumbnail_src
+ thumbnail_src = urljoin(gallery_url, eval_xpath_getindex(link, './/img', 0).attrib['src'])
# append result
results.append({'url': url,
'title': title,
- 'img_src': img_src,
+ 'img_src': thumbnail_src,
'content': '',
'thumbnail_src': thumbnail_src,
'template': 'images.html'})
diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py
index d420e250a..1507176ec 100644
--- a/searx/engines/xpath.py
+++ b/searx/engines/xpath.py
@@ -7,7 +7,6 @@ url_xpath = None
content_xpath = None
title_xpath = None
thumbnail_xpath = False
-categories = []
paging = False
suggestion_xpath = ''
results_xpath = ''
@@ -39,7 +38,7 @@ def request(query, params):
def response(resp):
results = []
dom = html.fromstring(resp.text)
- is_onion = True if 'onions' in categories else False
+ is_onion = True if 'onions' in categories else False # pylint: disable=undefined-variable
if results_xpath:
for result in eval_xpath_list(dom, results_xpath):
diff --git a/searx/engines/youtube_noapi.py b/searx/engines/youtube_noapi.py
index 5f7d2ceab..36fc72e36 100644
--- a/searx/engines/youtube_noapi.py
+++ b/searx/engines/youtube_noapi.py
@@ -49,7 +49,7 @@ def response(resp):
results = []
results_data = resp.text[resp.text.find('ytInitialData'):]
- results_data = results_data[results_data.find('{'):results_data.find(';\n')]
+ results_data = results_data[results_data.find('{'):results_data.find(';</script>')]
results_json = loads(results_data) if results_data else {}
sections = results_json.get('contents', {})\
diff --git a/searx/exceptions.py b/searx/exceptions.py
index 82c1d76dc..67a282da2 100644
--- a/searx/exceptions.py
+++ b/searx/exceptions.py
@@ -64,8 +64,33 @@ class SearxEngineAPIException(SearxEngineResponseException):
"""The website has returned an application error"""
-class SearxEngineCaptchaException(SearxEngineResponseException):
- """The website has returned a CAPTCHA"""
+class SearxEngineAccessDeniedException(SearxEngineResponseException):
+ """The website is blocking the access"""
+
+ def __init__(self, suspended_time=24 * 3600, message='Access denied'):
+ super().__init__(message + ', suspended_time=' + str(suspended_time))
+ self.suspended_time = suspended_time
+ self.message = message
+
+
+class SearxEngineCaptchaException(SearxEngineAccessDeniedException):
+ """The website has returned a CAPTCHA
+
+ By default, searx stops sending requests to this engine for 1 day.
+ """
+
+ def __init__(self, suspended_time=24 * 3600, message='CAPTCHA'):
+ super().__init__(message=message, suspended_time=suspended_time)
+
+
+class SearxEngineTooManyRequestsException(SearxEngineAccessDeniedException):
+ """The website has returned a Too Many Request status code
+
+ By default, searx stops sending requests to this engine for 1 hour.
+ """
+
+ def __init__(self, suspended_time=3600, message='Too many request'):
+ super().__init__(message=message, suspended_time=suspended_time)
class SearxEngineXPathException(SearxEngineResponseException):
diff --git a/searx/metrology/error_recorder.py b/searx/metrology/error_recorder.py
index 4b67235e1..fee1ef7d6 100644
--- a/searx/metrology/error_recorder.py
+++ b/searx/metrology/error_recorder.py
@@ -4,7 +4,8 @@ import logging
from json import JSONDecodeError
from urllib.parse import urlparse
from requests.exceptions import RequestException
-from searx.exceptions import SearxXPathSyntaxException, SearxEngineXPathException
+from searx.exceptions import (SearxXPathSyntaxException, SearxEngineXPathException, SearxEngineAPIException,
+ SearxEngineAccessDeniedException)
from searx import logger
@@ -100,6 +101,10 @@ def get_messages(exc, filename) -> typing.Tuple:
return (exc.xpath_str, exc.message)
if isinstance(exc, SearxEngineXPathException):
return (exc.xpath_str, exc.message)
+ if isinstance(exc, SearxEngineAPIException):
+ return (str(exc.args[0]), )
+ if isinstance(exc, SearxEngineAccessDeniedException):
+ return (exc.message, )
return ()
diff --git a/searx/poolrequests.py b/searx/poolrequests.py
index 1eedc84b8..25a6baed9 100644
--- a/searx/poolrequests.py
+++ b/searx/poolrequests.py
@@ -7,6 +7,7 @@ import requests
from searx import settings
from searx import logger
+from searx.raise_for_httperror import raise_for_httperror
logger = logger.getChild('poolrequests')
@@ -156,6 +157,12 @@ def request(method, url, **kwargs):
if timeout is not None:
kwargs['timeout'] = timeout
+ # raise_for_error
+ check_for_httperror = True
+ if 'raise_for_httperror' in kwargs:
+ check_for_httperror = kwargs['raise_for_httperror']
+ del kwargs['raise_for_httperror']
+
# do request
response = session.request(method=method, url=url, **kwargs)
@@ -176,6 +183,10 @@ def request(method, url, **kwargs):
if hasattr(threadLocal, 'total_time'):
threadLocal.total_time += time_after_request - time_before_request
+ # raise an exception
+ if check_for_httperror:
+ raise_for_httperror(response)
+
return response
diff --git a/searx/raise_for_httperror.py b/searx/raise_for_httperror.py
new file mode 100644
index 000000000..bd12df9a9
--- /dev/null
+++ b/searx/raise_for_httperror.py
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+Raise exception for an HTTP response is an error.
+"""
+from searx.exceptions import (SearxEngineCaptchaException, SearxEngineTooManyRequestsException,
+ SearxEngineAccessDeniedException)
+
+
+def is_cloudflare_challenge(resp):
+ if resp.status_code in [429, 503]:
+ if ('__cf_chl_jschl_tk__=' in resp.text)\
+ or ('/cdn-cgi/challenge-platform/' in resp.text
+ and 'orchestrate/jsch/v1' in resp.text
+ and 'window._cf_chl_enter(' in resp.text):
+ return True
+ if resp.status_code == 403 and '__cf_chl_captcha_tk__=' in resp.text:
+ return True
+ return False
+
+
+def is_cloudflare_firewall(resp):
+ return resp.status_code == 403 and '<span class="cf-error-code">1020</span>' in resp.text
+
+
+def raise_for_cloudflare_captcha(resp):
+ if resp.headers.get('Server', '').startswith('cloudflare'):
+ if is_cloudflare_challenge(resp):
+ # https://support.cloudflare.com/hc/en-us/articles/200170136-Understanding-Cloudflare-Challenge-Passage-Captcha-
+ # suspend for 2 weeks
+ raise SearxEngineCaptchaException(message='Cloudflare CAPTCHA', suspended_time=3600 * 24 * 15)
+
+ if is_cloudflare_firewall(resp):
+ raise SearxEngineAccessDeniedException(message='Cloudflare Firewall', suspended_time=3600 * 24)
+
+
+def raise_for_recaptcha(resp):
+ if resp.status_code == 503 \
+ and '"https://www.google.com/recaptcha/' in resp.text:
+ raise SearxEngineCaptchaException(message='ReCAPTCHA', suspended_time=3600 * 24 * 7)
+
+
+def raise_for_captcha(resp):
+ raise_for_cloudflare_captcha(resp)
+ raise_for_recaptcha(resp)
+
+
+def raise_for_httperror(resp):
+ """Raise exception for an HTTP response is an error.
+
+ Args:
+ resp (requests.Response): Response to check
+
+ Raises:
+ requests.HTTPError: raise by resp.raise_for_status()
+ searx.exceptions.SearxEngineAccessDeniedException: raise when the HTTP status code is 402 or 403.
+ searx.exceptions.SearxEngineTooManyRequestsException: raise when the HTTP status code is 429.
+ searx.exceptions.SearxEngineCaptchaException: raise when if CATPCHA challenge is detected.
+ """
+ if resp.status_code and resp.status_code >= 400:
+ raise_for_captcha(resp)
+ if resp.status_code in (402, 403):
+ raise SearxEngineAccessDeniedException(message='HTTP error ' + str(resp.status_code),
+ suspended_time=3600 * 24)
+ if resp.status_code == 429:
+ raise SearxEngineTooManyRequestsException()
+ resp.raise_for_status()
diff --git a/searx/results.py b/searx/results.py
index 5bf4e6b9e..fb7e816eb 100644
--- a/searx/results.py
+++ b/searx/results.py
@@ -309,10 +309,11 @@ class ResultContainer:
for res in results:
# FIXME : handle more than one category per engine
- res['category'] = engines[res['engine']].categories[0]
+ engine = engines[res['engine']]
+ res['category'] = engine.categories[0] if len(engine.categories) > 0 else ''
# FIXME : handle more than one category per engine
- category = engines[res['engine']].categories[0]\
+ category = res['category']\
+ ':' + res.get('template', '')\
+ ':' + ('img_src' if 'img_src' in res or 'thumbnail' in res else '')
diff --git a/searx/search.py b/searx/search.py
index 8c2ad8d72..220950803 100644
--- a/searx/search.py
+++ b/searx/search.py
@@ -32,7 +32,8 @@ from searx.utils import gen_useragent
from searx.results import ResultContainer
from searx import logger
from searx.plugins import plugins
-from searx.exceptions import SearxEngineCaptchaException
+from searx.exceptions import (SearxEngineAccessDeniedException, SearxEngineCaptchaException,
+ SearxEngineTooManyRequestsException,)
from searx.metrology.error_recorder import record_exception, record_error
@@ -131,6 +132,9 @@ def send_http_request(engine, request_params):
# soft_max_redirects
soft_max_redirects = request_params.get('soft_max_redirects', max_redirects or 0)
+ # raise_for_status
+ request_args['raise_for_httperror'] = request_params.get('raise_for_httperror', False)
+
# specific type of request (GET or POST)
if request_params['method'] == 'GET':
req = requests_lib.get
@@ -142,10 +146,6 @@ def send_http_request(engine, request_params):
# send the request
response = req(request_params['url'], **request_args)
- # check HTTP status
- if request_params.get('raise_for_status'):
- response.raise_for_status()
-
# check soft limit of the redirect count
if len(response.history) > soft_max_redirects:
# unexpected redirect : record an error
@@ -191,6 +191,7 @@ def search_one_http_request_safe(engine_name, query, request_params, result_cont
# suppose everything will be alright
requests_exception = False
+ suspended_time = None
try:
# send requests and parse the results
@@ -240,6 +241,15 @@ def search_one_http_request_safe(engine_name, query, request_params, result_cont
elif (issubclass(e.__class__, SearxEngineCaptchaException)):
result_container.add_unresponsive_engine(engine_name, 'CAPTCHA required')
logger.exception('engine {0} : CAPTCHA')
+ suspended_time = e.suspended_time # pylint: disable=no-member
+ elif (issubclass(e.__class__, SearxEngineTooManyRequestsException)):
+ result_container.add_unresponsive_engine(engine_name, 'too many requests')
+ logger.exception('engine {0} : Too many requests')
+ suspended_time = e.suspended_time # pylint: disable=no-member
+ elif (issubclass(e.__class__, SearxEngineAccessDeniedException)):
+ result_container.add_unresponsive_engine(engine_name, 'blocked')
+ logger.exception('engine {0} : Searx is blocked')
+ suspended_time = e.suspended_time # pylint: disable=no-member
else:
result_container.add_unresponsive_engine(engine_name, 'unexpected crash')
# others errors
@@ -248,16 +258,18 @@ def search_one_http_request_safe(engine_name, query, request_params, result_cont
if getattr(threading.current_thread(), '_timeout', False):
record_error(engine_name, 'Timeout')
- # suspend or not the engine if there are HTTP errors
+ # suspend the engine if there is an HTTP error
+ # or suspended_time is defined
with threading.RLock():
- if requests_exception:
+ if requests_exception or suspended_time:
# update continuous_errors / suspend_end_time
engine.continuous_errors += 1
- engine.suspend_end_time = time() + min(settings['search']['max_ban_time_on_fail'],
- engine.continuous_errors * settings['search']['ban_time_on_fail'])
+ if suspended_time is None:
+ suspended_time = min(settings['search']['max_ban_time_on_fail'],
+ engine.continuous_errors * settings['search']['ban_time_on_fail'])
+ engine.suspend_end_time = time() + suspended_time
else:
- # no HTTP error (perhaps an engine error)
- # anyway, reset the suspend variables
+ # reset the suspend variables
engine.continuous_errors = 0
engine.suspend_end_time = 0
@@ -342,7 +354,7 @@ def default_request_params():
'cookies': {},
'verify': True,
'auth': None,
- 'raise_for_status': True
+ 'raise_for_httperror': True
}
diff --git a/searx/settings.yml b/searx/settings.yml
index 486521d6d..e263e3ad4 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -289,6 +289,7 @@ engines:
- name : 1x
engine : www1x
shortcut : 1x
+ timeout : 3.0
disabled : True
- name : fdroid
@@ -490,7 +491,7 @@ engines:
- name : library genesis
engine : xpath
- search_url : https://libgen.is/search.php?req={query}
+ search_url : http://libgen.rs/search.php?req={query}
url_xpath : //a[contains(@href,"bookfi.net")]/@href
title_xpath : //a[contains(@href,"book/")]/text()[1]
content_xpath : //td/a[1][contains(@href,"=author")]/text()
@@ -646,11 +647,6 @@ engines:
shortcut : qwn
categories : news
- - name : qwant social
- engine : qwant
- shortcut : qws
- categories : social media
-
# - name: library
# engine: recoll
# shortcut: lib
@@ -808,12 +804,13 @@ engines:
# Or you can use the html non-stable engine, activated by default
engine : youtube_noapi
- - name : yggtorrent
- engine : yggtorrent
- shortcut : ygg
- url: https://www2.yggtorrent.si/
- disabled : True
- timeout : 4.0
+ # tmp suspended: Cloudflare CAPTCHA
+ #- name : yggtorrent
+ # engine : yggtorrent
+ # shortcut : ygg
+ # url: https://www2.yggtorrent.si/
+ # disabled : True
+ # timeout : 4.0
- name : dailymotion
engine : dailymotion
@@ -958,12 +955,6 @@ engines:
page_size : 10
disabled : True
- - name : seedpeer
- shortcut : speu
- engine : seedpeer
- categories: files, music, videos
-
-
- name : naver
shortcut: nvr
engine: xpath
diff --git a/searx/templates/oscar/macros.html b/searx/templates/oscar/macros.html
index 2bc1e7805..f40eebd37 100644
--- a/searx/templates/oscar/macros.html
+++ b/searx/templates/oscar/macros.html
@@ -1,6 +1,6 @@
<!-- Draw glyphicon icon from bootstrap-theme -->
-{% macro icon(action) -%}
- <span class="glyphicon glyphicon-{{ action }}"></span>
+{% macro icon(action, alt) -%}
+ <span title="{{ alt }}" class="glyphicon glyphicon-{{ action }}"></span>
{%- endmacro %}
<!-- Draw favicon -->
diff --git a/searx/templates/oscar/preferences.html b/searx/templates/oscar/preferences.html
index bc688dade..fc20b8ca5 100644
--- a/searx/templates/oscar/preferences.html
+++ b/searx/templates/oscar/preferences.html
@@ -230,8 +230,8 @@
<td class="onoff-checkbox">
{{ checkbox_toggle('engine_' + search_engine.name|replace(' ', '_') + '__' + categ|replace(' ', '_'), (search_engine.name, categ) in disabled_engines) }}
</td>
- <th scope="row">{{ search_engine.name }}</th>
- <td class="name">{{ shortcuts[search_engine.name] }}</td>
+ <th scope="row">{% if not search_engine.https_support %}{{ icon('exclamation-sign', 'No HTTPS') }}{% endif %} {{ search_engine.name }}</td></th>
+ <td class="name">{{ shortcuts[search_engine.name] }}
<td>{{ support_toggle(stats[search_engine.name].supports_selected_language) }}</td>
<td>{{ support_toggle(search_engine.safesearch==True) }}</td>
<td>{{ support_toggle(search_engine.time_range_support==True) }}</td>
diff --git a/searx/templates/simple/macros.html b/searx/templates/simple/macros.html
index cacbbec9f..1eb42667a 100644
--- a/searx/templates/simple/macros.html
+++ b/searx/templates/simple/macros.html
@@ -1,6 +1,6 @@
<!-- Draw glyphicon icon from bootstrap-theme -->
-{% macro icon(action) -%}
- <span class="ion-icon-big ion-{{ action }}"></span>
+{% macro icon(action, alt) -%}
+ <span title="{{ alt }}" class="ion-icon-big ion-{{ action }}"></span>
{%- endmacro %}
{% macro icon_small(action) -%}
diff --git a/searx/templates/simple/preferences.html b/searx/templates/simple/preferences.html
index d68e4be5f..f091a97cf 100644
--- a/searx/templates/simple/preferences.html
+++ b/searx/templates/simple/preferences.html
@@ -1,4 +1,4 @@
-{% from 'simple/macros.html' import tabs_open, tabs_close, tab_header, tab_footer, checkbox_onoff, checkbox %}
+{% from 'simple/macros.html' import icon, tabs_open, tabs_close, tab_header, tab_footer, checkbox_onoff, checkbox %}
{% extends "simple/base.html" %}
@@ -121,7 +121,7 @@
{% set engine_id = 'engine_' + search_engine.name|replace(' ', '_') + '__' + categ|replace(' ', '_') %}
<tr>
<td class="engine_checkbox">{{ checkbox_onoff(engine_id, (search_engine.name, categ) in disabled_engines) }}</td>
- <th class="name">{{ search_engine.name }}</th>
+ <th class="name">{% if not search_engine.https_support %}{{ icon('warning', 'No HTTPS') }}{% endif %} {{ search_engine.name }}</th>
<td class="shortcut">{{ shortcuts[search_engine.name] }}</td>
<td>{{ checkbox(engine_id + '_supported_languages', current_language == 'all' or current_language in search_engine.supported_languages or current_language.split('-')[0] in search_engine.supported_languages, true, true) }}</td>
<td>{{ checkbox(engine_id + '_safesearch', search_engine.safesearch==True, true, true) }}</td>
diff --git a/utils/makefile.python b/utils/makefile.python
index 6c6696964..668b0894b 100644
--- a/utils/makefile.python
+++ b/utils/makefile.python
@@ -252,7 +252,7 @@ pyenv-python: pyenv-install
# PyPi is required and since uploads via setuptools is not recommended, we have
# to imstall / use twine ... its really a mess.
#
-# [1] http://python-packaging.readthedocs.io/en/latest/dependencies.html#packages-not-on-pypi
+# [1] https://python-packaging.readthedocs.io/en/latest/dependencies.html#packages-not-on-pypi
# [2] https://github.com/pypa/pip/pull/1519
# https://github.com/pypa/twine
diff --git a/utils/searx.sh b/utils/searx.sh
index 386b2861f..06b3c2dfc 100755
--- a/utils/searx.sh
+++ b/utils/searx.sh
@@ -36,6 +36,7 @@ GIT_BRANCH="${GIT_BRANCH:-master}"
SEARX_PYENV="${SERVICE_HOME}/searx-pyenv"
SEARX_SRC="${SERVICE_HOME}/searx-src"
SEARX_SETTINGS_PATH="/etc/searx/settings.yml"
+SEARX_SETTINGS_TEMPLATE="${REPO_ROOT}/utils/templates/etc/searx/use_default_settings.yml"
SEARX_UWSGI_APP="searx.ini"
# shellcheck disable=SC2034
SEARX_UWSGI_SOCKET="/run/uwsgi/app/searx/socket"
@@ -139,7 +140,7 @@ usage() {
cat <<EOF
usage::
$(basename "$0") shell
- $(basename "$0") install [all|user|searx-src|pyenv|uwsgi|packages|buildhost]
+ $(basename "$0") install [all|user|searx-src|pyenv|uwsgi|packages|settings|buildhost]
$(basename "$0") update [searx]
$(basename "$0") remove [all|user|pyenv|searx-src]
$(basename "$0") activate [service]
@@ -413,14 +414,14 @@ install_settings() {
if [[ ! -f ${SEARX_SETTINGS_PATH} ]]; then
info_msg "install settings ${REPO_ROOT}/searx/settings.yml"
info_msg " --> ${SEARX_SETTINGS_PATH}"
- cp "${REPO_ROOT}/searx/settings.yml" "${SEARX_SETTINGS_PATH}"
+ cp "${SEARX_SETTINGS_TEMPLATE}" "${SEARX_SETTINGS_PATH}"
configure_searx
return
fi
rst_para "Diff between origin's setting file (+) and current (-):"
- echo
- $DIFF_CMD "${SEARX_SETTINGS_PATH}" "${SEARX_SRC}/searx/settings.yml"
+ echo "${SEARX_SETTINGS_PATH}" "${SEARX_SETTINGS_TEMPLATE}"
+ $DIFF_CMD "${SEARX_SETTINGS_PATH}" "${SEARX_SETTINGS_TEMPLATE}"
local action
choose_one action "What should happen to the settings file? " \
@@ -434,7 +435,7 @@ install_settings() {
"use origin settings")
backup_file "${SEARX_SETTINGS_PATH}"
info_msg "install origin settings"
- cp "${SEARX_SRC}/searx/settings.yml" "${SEARX_SETTINGS_PATH}"
+ cp "${SEARX_SETTINGS_TEMPLATE}" "${SEARX_SETTINGS_PATH}"
;;
"start interactiv shell")
backup_file "${SEARX_SETTINGS_PATH}"
@@ -442,7 +443,7 @@ install_settings() {
sudo -H -i
rst_para 'Diff between new setting file (-) and current (+):'
echo
- $DIFF_CMD "${SEARX_SRC}/searx/settings.yml" "${SEARX_SETTINGS_PATH}"
+ $DIFF_CMD "${SEARX_SETTINGS_TEMPLATE}" "${SEARX_SETTINGS_PATH}"
wait_key
;;
esac
diff --git a/utils/templates/etc/searx/use_default_settings.yml b/utils/templates/etc/searx/use_default_settings.yml
new file mode 100644
index 000000000..e019a25bb
--- /dev/null
+++ b/utils/templates/etc/searx/use_default_settings.yml
@@ -0,0 +1,22 @@
+use_default_settings: True
+
+general:
+ debug : False # Debug mode, only for development
+ instance_name : "searx" # displayed name
+
+search:
+ safe_search : 0 # Filter results. 0: None, 1: Moderate, 2: Strict
+ autocomplete : "" # Existing autocomplete backends: "dbpedia", "duckduckgo", "google", "startpage", "swisscows", "qwant", "wikipedia" - leave blank to turn it off by default
+ default_lang : "" # Default search language - leave blank to detect from browser information or use codes from 'languages.py'
+
+server:
+ port : 8888
+ bind_address : "127.0.0.1" # address to listen on
+ secret_key : "ultrasecretkey" # change this!
+ base_url : False # Set custom base_url. Possible values: False or "https://your.custom.host/location/"
+ image_proxy : False # Proxying image results through searx
+
+# uncomment below section if you have running morty proxy
+#result_proxy:
+# url : http://127.0.0.1:3000/
+# key : !!binary "your_morty_proxy_key"