diff options
author | Markus Heiser <markus.heiser@darmarIT.de> | 2021-06-08 10:56:18 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-06-08 10:56:18 +0000 |
commit | 5c5db719d2039d34aa1426bf7eb3e57055d4a54a (patch) | |
tree | 15a7a5e1a811f1366cdb49b69e35633afaf56114 | |
parent | 5f76238d5cd6f9c850d5237fe639ba2900fd3e6c (diff) | |
parent | 6f7b0d72c084845ea073a82a357c5e99cd41a85f (diff) | |
download | searxng-5c5db719d2039d34aa1426bf7eb3e57055d4a54a.tar.gz searxng-5c5db719d2039d34aa1426bf7eb3e57055d4a54a.zip |
Merge pull request #97 from return42/drop-searx-admin
[docs] reorder blog articles
37 files changed, 1110 insertions, 974 deletions
diff --git a/docs/admin/engines.rst b/docs/admin/engines.rst deleted file mode 100644 index 3ad206303..000000000 --- a/docs/admin/engines.rst +++ /dev/null @@ -1,88 +0,0 @@ -======= -Engines -======= - -Special Engine Settings -======================= - -.. sidebar:: Further reading .. - - - :ref:`settings engine` - - :ref:`engine settings` & :ref:`engine file` - -.. toctree:: - :maxdepth: 1 - - engines/recoll.rst - - -.. _engines generic: - -General Engine Settings -======================= - -Explanation of the :ref:`general engine configuration` shown in the table -:ref:`configured engines`. - -============= =========== ==================== ============ -:ref:`engine settings` :ref:`engine file` -------------------------- --------------------------------- -Name (cfg) Categories -------------------------- --------------------------------- -Engine .. Paging support **P** -------------------------- -------------------- ------------ -Shortcut **S** Language support **L** -Timeout **TO** Time range support **TR** -Disabled **D** Engine type **ET** -------------- ----------- -------------------- ------------ -Safe search **SS** -------------- ----------- --------------------------------- -Weigth **W** -------------- ----------- --------------------------------- -Disabled **D** -------------- ----------- --------------------------------- -Show errors **DE** -============= =========== ================================= - -.. _configured engines: - -.. jinja:: searx - - .. flat-table:: Engines configured at built time (defaults) - :header-rows: 1 - :stub-columns: 2 - - * - Name (cfg) - - S - - Engine - - TO - - Categories - - P - - L - - SS - - D - - TR - - ET - - W - - D - - DE - - {% for name, mod in engines.items() %} - - * - {{name}} - - !{{mod.shortcut}} - - {{mod.__name__}} - - {{mod.timeout}} - - {{", ".join(mod.categories)}} - - {{(mod.paging and "y") or ""}} - - {{(mod.language_support and "y") or ""}} - - {{(mod.safesearch and "y") or ""}} - - {{(mod.disabled and "y") or ""}} - - {{(mod.time_range_support and "y") or ""}} - - {{mod.engine_type or ""}} - - {{mod.weight or 1 }} - - {{(mod.disabled and "y") or ""}} - - {{(mod.display_error_messages and "y") or ""}} - - {% endfor %} - diff --git a/docs/admin/engines/command-line-engines.rst b/docs/admin/engines/command-line-engines.rst new file mode 100644 index 000000000..e9535e74f --- /dev/null +++ b/docs/admin/engines/command-line-engines.rst @@ -0,0 +1,79 @@ +.. _engine command: + +==================== +Command Line Engines +==================== + +.. sidebar:: info + + - :origin:`command.py <searx/engines/command.py>` + - :ref:`offline engines` + +With *command engines* administrators can run engines to integrate arbitrary +shell commands. + +When creating and enabling a ``command`` engine on a public instance, you must +be careful to avoid leaking private data. The easiest solution is to limit the +access by setting ``tokens`` as described in section :ref:`private engines`. + +The engine base is flexible. Only your imagination can limit the power of this +engine (and maybe security concerns). The following options are available: + +``command``: + A comma separated list of the elements of the command. A special token + ``{{QUERY}}`` tells where to put the search terms of the user. Example: + + .. code:: yaml + + ['ls', '-l', '-h', '{{QUERY}}'] + +``delimiter``: + A mapping containing a delimiter ``char`` and the *titles* of each element in + ``keys``. + +``parse_regex``: + A dict containing the regular expressions for each result key. + +``query_type``: + + The expected type of user search terms. Possible values: ``path`` and + ``enum``. + + ``path``: + Checks if the user provided path is inside the working directory. If not, + the query is not executed. + + ``enum``: + Is a list of allowed search terms. If the user submits something which is + not included in the list, the query returns an error. + +``query_enum``: + A list containing allowed search terms if ``query_type`` is set to ``enum``. + +``working_dir``: + + The directory where the command has to be executed. Default: ``./`` + +``result_separator``: + The character that separates results. Default: ``\n`` + +The example engine below can be used to find files with a specific name in the +configured working directory: + +.. code:: yaml + + - name: find + engine: command + command: ['find', '.', '-name', '{{QUERY}}'] + query_type: path + shortcut: fnd + delimiter: + chars: ' ' + keys: ['line'] + + +Acknowledgment +============== + +This development was sponsored by `Search and Discovery Fund +<https://nlnet.nl/discovery>`_ of `NLnet Foundation <https://nlnet.nl/>`_. diff --git a/docs/admin/engines/configured_engines.rst b/docs/admin/engines/configured_engines.rst new file mode 100644 index 000000000..af20e5611 --- /dev/null +++ b/docs/admin/engines/configured_engines.rst @@ -0,0 +1,77 @@ +.. _configured engines: + +================== +Configured Engines +================== + +.. sidebar:: Further reading .. + + - :ref:`engines-dev` + - :ref:`settings engine` + +Explanation of the :ref:`general engine configuration` shown in the table +:ref:`configured engines`. + +.. table:: The legend for the following table + :width: 100% + + ============= =========== ==================== ============ + :ref:`engine settings` :ref:`engine file` + ------------------------- --------------------------------- + Name (cfg) .. Categories + ------------- ----------- -------------------- ------------ + Engine .. Paging support **P** + ------------- ----------- -------------------- ------------ + Shortcut **S** Language support **L** + Timeout **TO** Time range support **TR** + Disabled **D** Engine type **ET** + ------------- ----------- -------------------- ------------ + Safe search **SS** + ------------- ----------- --------------------------------- + Weigth **W** + ------------- ----------- --------------------------------- + Disabled **D** + ------------- ----------- --------------------------------- + Show errors **DE** + ============= =========== ================================= + +.. jinja:: searx + + .. flat-table:: Engines configured at built time (defaults) + :header-rows: 1 + :stub-columns: 2 + + * - Name (cfg) + - S + - Engine + - TO + - Categories + - P + - L + - SS + - D + - TR + - ET + - W + - D + - DE + + {% for name, mod in engines.items() %} + + * - {{name}} + - !{{mod.shortcut}} + - {{mod.__name__}} + - {{mod.timeout}} + - {{", ".join(mod.categories)}} + - {{(mod.paging and "y") or ""}} + - {{(mod.language_support and "y") or ""}} + - {{(mod.safesearch and "y") or ""}} + - {{(mod.disabled and "y") or ""}} + - {{(mod.time_range_support and "y") or ""}} + - {{mod.engine_type or ""}} + - {{mod.weight or 1 }} + - {{(mod.disabled and "y") or ""}} + - {{(mod.display_error_messages and "y") or ""}} + + {% endfor %} + diff --git a/docs/admin/engines/index.rst b/docs/admin/engines/index.rst new file mode 100644 index 000000000..80f4120a5 --- /dev/null +++ b/docs/admin/engines/index.rst @@ -0,0 +1,22 @@ +.. _engines and settings: + +================== +Engines & Settings +================== + +.. sidebar:: Further reading .. + + - :ref:`settings engine` + - :ref:`engine settings` & :ref:`engine file` + +.. toctree:: + :maxdepth: 1 + + settings + configured_engines + private-engines + recoll + sql-engines + search-indexer-engines + command-line-engines + searx.engines.xpath diff --git a/docs/admin/engines/private-engines.rst b/docs/admin/engines/private-engines.rst new file mode 100644 index 000000000..cc6ab2565 --- /dev/null +++ b/docs/admin/engines/private-engines.rst @@ -0,0 +1,49 @@ +.. _private engines: + +============================ +Private Engines (``tokens``) +============================ + +Administrators might find themselves wanting to limit access to some of the +enabled engines on their instances. It might be because they do not want to +expose some private information through :ref:`offline engines`. Or they would +rather share engines only with their trusted friends or colleagues. + +To solve this issue the concept of *private engines* exists. + + +A new option was added to engines named `tokens`. It expects a list of +strings. If the user making a request presents one of the tokens of an engine, +they can access information about the engine and make search requests. + +Example configuration to restrict access to the Arch Linux Wiki engine: + +.. code:: yaml + + - name: arch linux wiki + engine: archlinux + shortcut: al + tokens: [ 'my-secret-token' ] + + +Unless a user has configured the right token, the engine is going +to be hidden from him/her. It is not going to be included in the +list of engines on the Preferences page and in the output of +`/config` REST API call. + +Tokens can be added to one's configuration on the Preferences page +under "Engine tokens". The input expects a comma separated list of +strings. + +The distribution of the tokens from the administrator to the users +is not carved in stone. As providing access to such engines +implies that the admin knows and trusts the user, we do not see +necessary to come up with a strict process. Instead, +we would like to add guidelines to the documentation of the feature. + + +Acknowledgment +============== + +This development was sponsored by `Search and Discovery Fund +<https://nlnet.nl/discovery>`_ of `NLnet Foundation <https://nlnet.nl/>`_. diff --git a/docs/admin/engines/recoll.rst b/docs/admin/engines/recoll.rst index cba2e81f7..9b198db7e 100644 --- a/docs/admin/engines/recoll.rst +++ b/docs/admin/engines/recoll.rst @@ -1,17 +1,17 @@ .. _engine recoll: -====== -Recoll -====== +============= +Recoll Engine +============= .. sidebar:: info - `Recoll <https://www.lesbonscomptes.com/recoll/>`_ - `recoll-webui <https://framagit.org/medoc92/recollwebui.git>`_ + - :origin:`searx/engines/recoll.py` -Recoll_ is a desktop full-text search tool based on Xapian. By itself Recoll_ -does not offer web or API access, this can be achieved using recoll-webui_ - +Recoll_ is a desktop full-text search tool based on Xapian. By itself Recoll_ +does not offer WEB or API access, this can be achieved using recoll-webui_ Configuration diff --git a/docs/admin/engines/search-indexer-engines.rst b/docs/admin/engines/search-indexer-engines.rst new file mode 100644 index 000000000..8d7c10d67 --- /dev/null +++ b/docs/admin/engines/search-indexer-engines.rst @@ -0,0 +1,136 @@ +==================== +Local Search Engines +==================== + +.. sidebar:: further read + + - `Comparison to alternatives + <https://docs.meilisearch.com/learn/what_is_meilisearch/comparison_to_alternatives.html>`_ + +Administrators might find themselves wanting to integrate locally running search +engines. The following ones are supported for now: + +* `Elasticsearch`_ +* `Meilisearch`_ +* `Solr`_ + +Each search engine is powerful, capable of full-text search. All of the engines +above are added to ``settings.yml`` just commented out, as you have to +``base_url`` for all them. + +Please note that if you are not using HTTPS to access these engines, you have to enable +HTTP requests by setting ``enable_http`` to ``True``. + +Futhermore, if you do not want to expose these engines on a public instance, you +can still add them and limit the access by setting ``tokens`` as described in +section :ref:`private engines`. + +.. _engine meilisearch: + +MeiliSearch +=========== + +.. sidebar:: info + + - :origin:`meilisearch.py <searx/engines/meilisearch.py>` + - `MeiliSearch <https://www.meilisearch.com>`_ + - `MeiliSearch Documentation <https://docs.meilisearch.com/>`_ + - `Install MeiliSearch + <https://docs.meilisearch.com/learn/getting_started/installation.html>`_ + +MeiliSearch_ is aimed at individuals and small companies. It is designed for +small-scale (less than 10 million documents) data collections. E.g. it is great +for storing web pages you have visited and searching in the contents later. + +The engine supports faceted search, so you can search in a subset of documents +of the collection. Furthermore, you can search in MeiliSearch_ instances that +require authentication by setting ``auth_token``. + +Here is a simple example to query a Meilisearch instance: + +.. code:: yaml + + - name: meilisearch + engine: meilisearch + shortcut: mes + base_url: http://localhost:7700 + index: my-index + enable_http: true + + +.. _engine elasticsearch: + +Elasticsearch +============= + +.. sidebar:: info + + - :origin:`elasticsearch.py <searx/engines/elasticsearch.py>` + - `Elasticsearch <https://www.elastic.co/elasticsearch/>`_ + - `Elasticsearch Guide + <https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html>`_ + - `Install Elasticsearch + <https://www.elastic.co/guide/en/elasticsearch/reference/current/install-elasticsearch.html>`_ + +Elasticsearch_ supports numerous ways to query the data it is storing. At the +moment the engine supports the most popular search methods (``query_type``): + +- ``match``, +- ``simple_query_string``, +- ``term`` and +- ``terms``. + +If none of the methods fit your use case, you can select ``custom`` query type +and provide the JSON payload to submit to Elasticsearch in +``custom_query_json``. + +The following is an example configuration for an Elasticsearch_ instance with +authentication configured to read from ``my-index`` index. + +.. code:: yaml + + - name: elasticsearch + shortcut: es + engine: elasticsearch + base_url: http://localhost:9200 + username: elastic + password: changeme + index: my-index + query_type: match + # custom_query_json: '{ ... }' + enable_http: true + +.. _engine solr: + +Solr +==== + +.. sidebar:: info + + - :origin:`solr.py <searx/engines/solr.py>` + - `Solr <https://solr.apache.org>`_ + - `Solr Resources <https://solr.apache.org/resources.html>`_ + - `Install Solr <https://solr.apache.org/guide/installing-solr.html>`_ + +Solr_ is a popular search engine based on Lucene, just like Elasticsearch_. But +instead of searching in indices, you can search in collections. + +This is an example configuration for searching in the collection +``my-collection`` and get the results in ascending order. + +.. code:: yaml + + - name: solr + engine: solr + shortcut: slr + base_url: http://localhost:8983 + collection: my-collection + sort: asc + enable_http: true + + +Acknowledgment +============== + +This development was sponsored by `Search and Discovery Fund +<https://nlnet.nl/discovery>`_ of `NLnet Foundation <https://nlnet.nl/>`_. diff --git a/docs/admin/engines/searx.engines.xpath.rst b/docs/admin/engines/searx.engines.xpath.rst new file mode 100644 index 000000000..695aa5224 --- /dev/null +++ b/docs/admin/engines/searx.engines.xpath.rst @@ -0,0 +1,9 @@ +.. _xpath engine: + +============ +XPath Engine +============ + +.. automodule:: searx.engines.xpath + :members: + diff --git a/docs/admin/settings.rst b/docs/admin/engines/settings.rst index d0773467d..1fe083a97 100644 --- a/docs/admin/settings.rst +++ b/docs/admin/engines/settings.rst @@ -22,13 +22,14 @@ file. settings.yml location ===================== -First, searx will try to load settings.yml from these locations: +The initial ``settings.yml`` we be load from these locations: 1. the full path specified in the ``SEARX_SETTINGS_PATH`` environment variable. 2. ``/etc/searx/settings.yml`` If these files don't exist (or are empty or can't be read), searx uses the -:origin:`searx/settings.yml` file. +:origin:`searx/settings.yml` file. Read :ref:`settings use_default_settings` to +see how you can simplify your *user defined* ``settings.yml``. .. _settings global: @@ -42,17 +43,19 @@ Global Settings .. code:: yaml general: - debug : False # Debug mode, only for development - instance_name : "searxng" # displayed name - contact_url: False # mailto:contact@example.com + debug: false # Debug mode, only for development + instance_name: "searxng" # displayed name + contact_url: false # mailto:contact@example.com + +.. code:: yaml brand: - git_url: https://github.com/searxng/searxng - git_branch: master - issue_url: https://github.com/searxng/searxng/issues - docs_url: https://searxng/searxng.github.io/searxng - public_instances: https://searx.space - wiki_url: https://github.com/searxng/searxng/wiki + git_url: https://github.com/searxng/searxng + git_branch: master + issue_url: https://github.com/searxng/searxng/issues + docs_url: https://searxng/searxng.github.io/searxng + public_instances: https://searx.space + wiki_url: https://github.com/searxng/searxng/wiki ``debug`` : Allow a more detailed log if you run searx directly. Display *detailed* error @@ -68,10 +71,10 @@ Global Settings If you host your own documentation, change this URL. ``wiki_url``: - Link to your wiki (or ``False``) + Link to your wiki (or ``false``) ``twitter_url``: - Link to your tweets (or ``False``) + Link to your tweets (or ``false``) ``server:`` @@ -80,19 +83,19 @@ Global Settings .. code:: yaml server: - port : 8888 - bind_address : "127.0.0.1" # address to listen on - secret_key : "ultrasecretkey" # change this! - base_url : False # set custom base_url (or False) - image_proxy : False # proxying image results through searx - default_locale : "" # default interface locale - default_theme : oscar # ui theme + port: 8888 + bind_address: "127.0.0.1" # address to listen on + secret_key: "ultrasecretkey" # change this! + base_url: false # set custom base_url (or false) + image_proxy: false # proxying image results through searx + default_locale: "" # default interface locale + default_theme: oscar # ui theme default_http_headers: - X-Content-Type-Options : nosniff - X-XSS-Protection : 1; mode=block - X-Download-Options : noopen - X-Robots-Tag : noindex, nofollow - Referrer-Policy : no-referrer + X-Content-Type-Options : nosniff + X-XSS-Protection : 1; mode=block + X-Download-Options : noopen + X-Robots-Tag : noindex, nofollow + Referrer-Policy : no-referrer ``port`` & ``bind_address``: Port number and *bind address* of the searx web application if you run it @@ -125,26 +128,30 @@ Global Settings ``outgoing:`` ------------- +Communication with search engines. + .. code:: yaml - outgoing: # communication with search engines - request_timeout : 2.0 # default timeout in seconds, can be override by engine - # max_request_timeout: 10.0 # the maximum timeout in seconds - useragent_suffix : "" # informations like an email address to the administrator - pool_connections : 100 # Maximum number of allowable connections, or None for no limits. The default is 100. - pool_maxsize : 10 # Number of allowable keep-alive connections, or None to always allow. The default is 10. - enable_http2: True # See https://www.python-httpx.org/http2/ - # uncomment below section if you want to use a proxy - # proxies: - # all://: - # - http://proxy1:8080 - # - http://proxy2:8080 - # uncomment below section only if you have more than one network interface - # which can be the source of outgoing search requests - # source_ips: - # - 1.1.1.1 - # - 1.1.1.2 - # - fe80::/126 + outgoing: + request_timeout: 2.0 # default timeout in seconds, can be override by engine + max_request_timeout: 10.0 # the maximum timeout in seconds + useragent_suffix: "" # informations like an email address to the administrator + pool_connections: 100 # Maximum number of allowable connections, or null + # for no limits. The default is 100. + pool_maxsize: 10 # Number of allowable keep-alive connections, or null + # to always allow. The default is 10. + enable_http2: true # See https://www.python-httpx.org/http2/ + # uncomment below section if you want to use a proxy + # proxies: + # all://: + # - http://proxy1:8080 + # - http://proxy2:8080 + # uncomment below section only if you have more than one network interface + # which can be the source of outgoing search requests + # source_ips: + # - 1.1.1.1 + # - 1.1.1.2 + # - fe80::/126 ``request_timeout`` : @@ -180,8 +187,8 @@ Global Settings * ``[ 192.168.0.1, fe80::/126 ]`` ``retries`` : - Number of retry in case of an HTTP error. - On each retry, searx uses an different proxy and source ip. + Number of retry in case of an HTTP error. On each retry, searx uses an + different proxy and source ip. ``retry_on_http_error`` : Retry request on some HTTP status code. @@ -193,7 +200,7 @@ Global Settings * ``[403, 429]``: on HTTP status code 403 and 429. ``enable_http2`` : - Enable by default. Set to ``False`` to disable HTTP/2. + Enable by default. Set to ``false`` to disable HTTP/2. ``max_redirects`` : 30 by default. Maximum redirect before it is an error. @@ -205,18 +212,18 @@ Global Settings .. code:: yaml locales: - en : English - de : Deutsch - he : Hebrew - hu : Magyar - fr : Français - es : Español - it : Italiano - nl : Nederlands - ja : 日本語 (Japanese) - tr : Türkçe - ru : Russian - ro : Romanian + en: English + de: Deutsch + he: Hebrew + hu: Magyar + fr: Français + es: Español + it: Italiano + nl: Nederlands + ja: 日本語 (Japanese) + tr: Türkçe + ru: Russian + ro: Romanian ``locales`` : Locales codes and their names. Available translations of searx interface. @@ -229,35 +236,49 @@ Engine settings .. sidebar:: Further reading .. + - :ref:`configured engines` - :ref:`engines-dev` +In the code example below a *full fledged* example of a YAML setup from a dummy +engine is shown. Most of the options have a default value or even are optional. + .. code:: yaml - - name : bing - engine : bing - shortcut : bi - base_url : 'https://{language}.wikipedia.org/' - categories : general - timeout : 3.0 - api_key : 'apikey' - disabled : True - language : en_US - #enable_http: False - #enable_http2: False - #retries: 1 - #retry_on_http_error: True # or 403 or [404, 429] - #max_connections: 100 - #max_keepalive_connections: 10 - #keepalive_expiry: 5.0 - #proxies: - # http: - # - http://proxy1:8080 - # - http://proxy2:8080 - # https: - # - http://proxy1:8080 - # - http://proxy2:8080 - # - socks5://user:password@proxy3:1080 - # - socks5h://user:password@proxy4:1080 + - name: example engine + engine: example + shortcut: demo + base_url: 'https://{language}.example.com/' + categories: general + timeout: 3.0 + api_key: 'apikey' + disabled: false + language: en_US + tokens: [ 'my-secret-token' ] + weigth: 1 + display_error_messages: true + about: + website: https://example.com + wikidata_id: Q306656 + official_api_documentation: https://example.com/api-doc + use_official_api: true + require_api_key: true + results: HTML + enable_http: false + enable_http2: false + retries: 1 + retry_on_http_error: true # or 403 or [404, 429] + max_connections: 100 + max_keepalive_connections: 10 + keepalive_expiry: 5.0 + proxies: + http: + - http://proxy1:8080 + - http://proxy2:8080 + https: + - http://proxy1:8080 + - http://proxy2:8080 + - socks5://user:password@proxy3:1080 + - socks5h://user:password@proxy4:1080 ``name`` : Name that will be used across searx to define this engine. In settings, on @@ -297,10 +318,14 @@ Engine settings by using the full ISO code of language and country, like ``fr_FR``, ``en_US``, ``de_DE``. +``tokens`` : optional + A list of secret tokens to make this engine *private*, more details see + :ref:`private engines`. + ``weigth`` : default ``1`` Weighting of the results of this engine. -``display_error_messages`` : default ``True`` +``display_error_messages`` : default ``true`` When an engine returns an error, the message is displayed on the user interface. ``network``: optional @@ -320,7 +345,7 @@ Engine settings use_default_settings ==================== -.. sidebar:: ``use_default_settings: True`` +.. sidebar:: ``use_default_settings: true`` - :ref:`settings location` - :ref:`use_default_settings.yml` @@ -329,7 +354,7 @@ use_default_settings The user defined ``settings.yml`` is loaded from the :ref:`settings location` and can relied on the default configuration :origin:`searx/settings.yml` using: - ``use_default_settings: True`` + ``use_default_settings: true`` ``server:`` In the following example, the actual settings are the default settings defined @@ -338,22 +363,22 @@ and can relied on the default configuration :origin:`searx/settings.yml` using: .. code-block:: yaml - use_default_settings: True + use_default_settings: true server: - secret_key: "uvys6bRhKHUdFF5CqbJonSDSRN8H0sCBziNSrDGNVdpz7IeZhveVart3yvghoKHA" + secret_key: "ultrasecretkey" # change this! bind_address: "0.0.0.0" ``engines:`` - With ``use_default_settings: True``, each settings can be override in a + With ``use_default_settings: true``, each settings can be override in a similar way, the ``engines`` section is merged according to the engine ``name``. In this example, searx will load all the engine and the arch linux - wiki engine has a :ref:`token<private engines>`: + wiki engine has a :ref:`token <private engines>`: .. code-block:: yaml - use_default_settings: True + use_default_settings: true server: - secret_key: "uvys6bRhKHUdFF5CqbJonSDSRN8H0sCBziNSrDGNVdpz7IeZhveVart3yvghoKHA" + secret_key: "ultrasecretkey" # change this! engines: - name: arch linux wiki tokens: ['$ecretValue'] @@ -366,11 +391,11 @@ and can relied on the default configuration :origin:`searx/settings.yml` using: .. code-block:: yaml use_default_settings: - engines: - remove: - - google + engines: + remove: + - google server: - secret_key: "uvys6bRhKHUdFF5CqbJonSDSRN8H0sCBziNSrDGNVdpz7IeZhveVart3yvghoKHA" + secret_key: "ultrasecretkey" # change this! engines: - name: arch linux wiki tokens: ['$ecretValue'] @@ -382,12 +407,12 @@ and can relied on the default configuration :origin:`searx/settings.yml` using: .. code-block:: yaml use_default_settings: - engines: - keep_only: - - google - - duckduckgo + engines: + keep_only: + - google + - duckduckgo server: - secret_key: "uvys6bRhKHUdFF5CqbJonSDSRN8H0sCBziNSrDGNVdpz7IeZhveVart3yvghoKHA" + secret_key: "ultrasecretkey" # change this! engines: - name: google tokens: ['$ecretValue'] diff --git a/docs/blog/sql-engines.rst b/docs/admin/engines/sql-engines.rst index 413316054..ee7f31c27 100644 --- a/docs/blog/sql-engines.rst +++ b/docs/admin/engines/sql-engines.rst @@ -1,5 +1,7 @@ +.. _sql engines: + =========== -SQL engines +SQL Engines =========== .. sidebar:: further read @@ -36,9 +38,10 @@ place the templates at:: searx/templates/{theme_name}/result_templates/{template_name} -As mentioned in previous blog posts, if you do not wish to expose these engines -on a public instance, you can still add them and limit the access by setting -``tokens`` as described in section :ref:`private engines`. +If you do not wish to expose these engines on a public instance, you can still +add them and limit the access by setting ``tokens`` as described in section +:ref:`private engines`. + Configure the engines ===================== @@ -58,6 +61,10 @@ returned results use the option ``limit``. SQLite ------ +.. sidebar:: info + + - :origin:`sqlite.py <searx/engines/sqlite.py>` + .. _MediathekView: https://mediathekview.de/ SQLite is a small, fast and reliable SQL database engine. It does not require @@ -106,9 +113,10 @@ PostgreSQL .. _psycopg2: https://www.psycopg.org/install -.. sidebar:: requirements +.. sidebar:: info - ``pip install`` psycopg2_ + - :origin:`postgresql.py <searx/engines/postgresql.py>` + - ``pip install`` psycopg2_ PostgreSQL is a powerful and robust open source database. Before configuring the PostgreSQL engine, you must install the dependency ``psychopg2``. You can @@ -130,9 +138,10 @@ MySQL .. _mysql-connector-python: https://pypi.org/project/mysql-connector-python -.. sidebar:: requirements +.. sidebar:: info - ``pip install`` mysql-connector-python_ + - :origin:`mysql_server.py <searx/engines/mysql_server.py>` + - ``pip install`` mysql-connector-python_ MySQL is said to be the most popular open source database. Before enabling MySQL engine, you must install the package ``mysql-connector-python``. @@ -152,9 +161,9 @@ example configuration for quering a MySQL server: query_str: 'SELECT * from my_table WHERE my_column=%(query)s' -Acknowledgement -=============== +Acknowledgment +============== This development was sponsored by `Search and Discovery Fund -<https://nlnet.nl/discovery>`_ of `NLnet Foundation <https://nlnet.nl/>`_ . +<https://nlnet.nl/discovery>`_ of `NLnet Foundation <https://nlnet.nl/>`_. diff --git a/docs/admin/index.rst b/docs/admin/index.rst index c708c4ffa..3139db99c 100644 --- a/docs/admin/index.rst +++ b/docs/admin/index.rst @@ -13,11 +13,10 @@ Administrator documentation installation-apache installation-docker update-searx - settings + engines/index api architecture filtron morty - engines plugins buildhosts diff --git a/docs/blog/admin.rst b/docs/blog/admin.rst deleted file mode 100644 index e95316192..000000000 --- a/docs/blog/admin.rst +++ /dev/null @@ -1,43 +0,0 @@ -============================================================= -Searx admin interface -============================================================= - -.. _searx-admin: https://github.com/kvch/searx-admin#searx-admin -.. _NLnet Foundation: https://nlnet.nl/ - - manage your instance from your browser - -.. sidebar:: Installation - - Installation guide can be found in the repository of searx-admin_. - -One of the biggest advantages of searx is being extremely customizable. But at -first it can be daunting to newcomers. A barrier of taking advantage of this -feature is our ugly settings file which is sometimes hard to understand and -edit. - -To make self-hosting searx more accessible a new tool is introduced, called -``searx-admin``. It is a web application which is capable of managing your -instance and manipulating its settings via a web UI. It aims to replace editing -of ``settings.yml`` for less experienced administrators or people who prefer -graphical admin interfaces. - -.. figure:: searx-admin-engines.png - :alt: Screenshot of engine list - - Configuration page of engines - -Since ``searx-admin`` acts as a supervisor for searx, we have decided to -implement it as a standalone tool instead of part of searx. Another reason for -making it a standalone tool is that the codebase and dependencies of searx -should not grow because of a fully optional feature, which does not affect -existing instances. - - -Acknowledgements -================ - -This development was sponsored by `NLnet Foundation`_. - -| Happy hacking. -| kvch // 2017.08.22 21:25 diff --git a/docs/blog/command-line-engines.rst b/docs/blog/command-line-engines.rst deleted file mode 100644 index 09eb84fb4..000000000 --- a/docs/blog/command-line-engines.rst +++ /dev/null @@ -1,65 +0,0 @@ -======================================== -Running shell commands to fetch results -======================================== - -Previously, with searx you could search over the Internet on other people's -computers. Now it is possible to fetch results from your local machine without -connecting to any networks from the same graphical user interface. - - -Command line engines -==================== - -In :pull-searx:`2128` a new type of engine has been introduced called ``command``. -This engine lets administrators add engines which run arbitrary shell commands -and show its output on the web UI of searx. - -When creating and enabling a ``command`` engine on a public searx instance, -you must be careful to avoid leaking private data. The easiest solution -is to add tokens to the engine. Thus, only those who have the appropriate token -can retrieve results from the it. - -The engine base is flexible. Only your imagination can limit the power of this engine. (And -maybe security concerns.) The following options are available: - -* ``command``: A comma separated list of the elements of the command. A special token {{QUERY}} tells searx where to put the search terms of the user. Example: ``['ls', '-l', '-h', '{{QUERY}}']`` -* ``delimiter``: A dict containing a delimiter char and the "titles" of each element in keys. -* ``parse_regex``: A dict containing the regular expressions for each result key. -* ``query_type``: The expected type of user search terms. Possible values: ``path`` and ``enum``. ``path`` checks if the uesr provided path is inside the working directory. If not the query is not executed. ``enum`` is a list of allowed search terms. If the user submits something which is not included in the list, the query returns an error. -* ``query_enum``: A list containing allowed search terms if ``query_type`` is set to ``enum``. -* ``working_dir``: The directory where the command has to be executed. Default: ``.`` -* ``result_separator``: The character that separates results. Default: ``\n`` - - -The example engine below can be used to find files with a specific name in the configured -working directory. - -.. code:: yaml - - - name: find - engine: command - command: ['find', '.', '-name', '{{QUERY}}'] - query_type: path - shortcut: fnd - delimiter: - chars: ' ' - keys: ['line'] - - -Next steps -========== - -In the next milestone, support for local search engines and indexers (e.g. Elasticsearch) -are going to be added. This way, you will be able to query your own databases/indexers. - -Acknowledgement -=============== - -This development was sponsored by `Search and Discovery Fund`_ of `NLnet Foundation`_ . - -.. _Search and Discovery Fund: https://nlnet.nl/discovery -.. _NLnet Foundation: https://nlnet.nl/ - - -| Happy hacking. -| kvch // 2020.09.28 21:26 diff --git a/docs/blog/index.rst b/docs/blog/index.rst deleted file mode 100644 index a396ecaf6..000000000 --- a/docs/blog/index.rst +++ /dev/null @@ -1,16 +0,0 @@ -==== -Blog -==== - -.. toctree:: - :maxdepth: 2 - :caption: Contents - - lxcdev-202006 - python3 - admin - intro-offline - private-engines - command-line-engines - search-indexer-engines - sql-engines diff --git a/docs/blog/intro-offline.rst b/docs/blog/intro-offline.rst deleted file mode 100644 index 3a706d038..000000000 --- a/docs/blog/intro-offline.rst +++ /dev/null @@ -1,77 +0,0 @@ -=============================== -Preparation for offline engines -=============================== - -Offline engines -=============== - -To extend the functionality of searx, offline engines are going to be -introduced. An offline engine is an engine which does not need Internet -connection to perform a search and does not use HTTP to communicate. - -Offline engines can be configured as online engines, by adding those to the -`engines` list of :origin:`settings.yml <searx/settings.yml>`. Thus, searx -finds the engine file and imports it. - -Example skeleton for the new engines: - -.. code:: python - - from subprocess import PIPE, Popen - - categories = ['general'] - offline = True - - def init(settings): - pass - - def search(query, params): - process = Popen(['ls', query], stdout=PIPE) - return_code = process.wait() - if return_code != 0: - raise RuntimeError('non-zero return code', return_code) - - results = [] - line = process.stdout.readline() - while line: - result = parse_line(line) - results.append(results) - - line = process.stdout.readline() - - return results - - -Development progress -==================== - -First, a proposal has been created as a Github issue. Then it was moved to the -wiki as a design document. You can read it here: :wiki:`Offline-engines`. - -In this development step, searx core was prepared to accept and perform offline -searches. Offline search requests are scheduled together with regular offline -requests. - -As offline searches can return arbitrary results depending on the engine, the -current result templates were insufficient to present such results. Thus, a new -template is introduced which is caplable of presenting arbitrary key value pairs -as a table. You can check out the pull request for more details see -:pull-searx:`1700`. - -Next steps -========== - -Today, it is possible to create/run an offline engine. However, it is going to be publicly available for everyone who knows the searx instance. So the next step is to introduce token based access for engines. This way administrators are able to limit the access to private engines. - -Acknowledgement -=============== - -This development was sponsored by `Search and Discovery Fund`_ of `NLnet Foundation`_ . - -.. _Search and Discovery Fund: https://nlnet.nl/discovery -.. _NLnet Foundation: https://nlnet.nl/ - - -| Happy hacking. -| kvch // 2019.10.21 17:03 - diff --git a/docs/blog/private-engines.rst b/docs/blog/private-engines.rst deleted file mode 100644 index 37b1d4cc5..000000000 --- a/docs/blog/private-engines.rst +++ /dev/null @@ -1,65 +0,0 @@ -================================== -Limit access to your searx engines -================================== - -Administrators might find themselves wanting to limit access to some of the -enabled engines on their instances. It might be because they do not want to -expose some private information through an offline engine. Or they -would rather share engines only with their trusted friends or colleagues. - -.. _private engines: - -Private engines -=============== - -To solve this issue private engines were introduced in :pull-searx:`1823`. -A new option was added to engines named `tokens`. It expects a list -of strings. If the user making a request presents one of the tokens -of an engine, they can access information about the engine -and make search requests. - -Example configuration to restrict access to the Arch Linux Wiki engine: - -.. code:: yaml - - - name : arch linux wiki - engine : archlinux - shortcut : al - tokens : [ 'my-secret-token' ] - - -Unless a user has configured the right token, the engine is going -to be hidden from him/her. It is not going to be included in the -list of engines on the Preferences page and in the output of -`/config` REST API call. - -Tokens can be added to one's configuration on the Preferences page -under "Engine tokens". The input expects a comma separated list of -strings. - -The distribution of the tokens from the administrator to the users -is not carved in stone. As providing access to such engines -implies that the admin knows and trusts the user, we do not see -necessary to come up with a strict process. Instead, -we would like to add guidelines to the documentation of the feature. - -Next steps -========== - -Now that searx has support for both offline engines and private engines, -it is possible to add concrete engines which benefit from these features. -For example engines which search on the local host running the instance. -Be it searching your file system or querying a private database. Be creative -and come up with new solutions which fit your use case. - -Acknowledgement -=============== - -This development was sponsored by `Search and Discovery Fund`_ of `NLnet Foundation`_ . - -.. _Search and Discovery Fund: https://nlnet.nl/discovery -.. _NLnet Foundation: https://nlnet.nl/ - - -| Happy hacking. -| kvch // 2020.02.28 22:26 diff --git a/docs/blog/python3.rst b/docs/blog/python3.rst deleted file mode 100644 index 62d7052fc..000000000 --- a/docs/blog/python3.rst +++ /dev/null @@ -1,65 +0,0 @@ -============================ -Introducing Python 3 support -============================ - -.. _Python 2.7 clock: https://pythonclock.org/ - -.. sidebar:: Python 2.7 to 3 upgrade - - This chapter exists of historical reasons. Python 2.7 release schedule ends - (`Python 2.7 clock`_) after 11 years Python 3 exists - -As most operation systems are coming with Python3 installed by default. So it is -time for searx to support Python3. But don't worry support of Python2.7 won't be -dropped. - -.. image:: searxpy3.png - :scale: 50 % - :alt: hurray - :align: center - - -How to run searx using Python 3 -=============================== - -Please make sure that you run at least Python 3.5. - -To run searx, first a Python3 virtualenv should be created. After entering the -virtualenv, dependencies and searx must be installed. Then run searx from the -command line. - -.. code:: sh - - python3 -m venv venv3 - source venv3/bin/activate - pip install -U pip setuptools wheel pyyaml - pip install -e . - searx-run - -Fun facts -========= - -- 115 files were changed when implementing the support for both Python versions. - -- All of the dependencies was compatible except for the robotframework used for - browser tests. Thus, these tests were migrated to splinter. So from now on - both versions are being tested on Travis and can be tested locally. - -If you found bugs -================= - -Please open an issue on `GitHub`_. Make sure that you mention your Python -version in your issue, so we can investigate it properly. - -.. _GitHub: https://github.com/searxng/searxng/issues - -Acknowledgment -============== - -This development was sponsored by `NLnet Foundation`_. - -.. _NLnet Foundation: https://nlnet.nl/ - - -| Happy hacking. -| kvch // 2017.05.13 22:57 diff --git a/docs/blog/search-indexer-engines.rst b/docs/blog/search-indexer-engines.rst deleted file mode 100644 index ca4dd3c88..000000000 --- a/docs/blog/search-indexer-engines.rst +++ /dev/null @@ -1,114 +0,0 @@ -=============================== -Query your local search engines -=============================== - -From now on, searx lets you to query your locally running search engines. The following -ones are supported now: - -* `Elasticsearch`_ -* `Meilisearch`_ -* `Solr`_ - -All of the engines above are added to ``settings.yml`` just commented out, as you have to -``base_url`` for all them. - -Please note that if you are not using HTTPS to access these engines, you have to enable -HTTP requests by setting ``enable_http`` to ``True``. - -Futhermore, if you do not want to expose these engines on a public instance, you can -still add them and limit the access by setting ``tokens`` as described in the `blog post about -private engines`_. - -Configuring searx for search engines -==================================== - -Each search engine is powerful, capable of full-text search. - -Elasticsearch -------------- - -Elasticsearch supports numerous ways to query the data it is storing. At the moment -the engine supports the most popular search methods: ``match``, ``simple_query_string``, ``term`` and ``terms``. - -If none of the methods fit your use case, you can select ``custom`` query type and provide the JSON payload -searx has to submit to Elasticsearch in ``custom_query_json``. - -The following is an example configuration for an Elasticsearch instance with authentication -configured to read from ``my-index`` index. - -.. code:: yaml - - - name : elasticsearch - shortcut : es - engine : elasticsearch - base_url : http://localhost:9200 - username : elastic - password : changeme - index : my-index - query_type : match - enable_http : True - - -Meilisearch ------------ - -This search engine is aimed at individuals and small companies. It is designed for -small-scale (less than 10 million documents) data collections. E.g. it is great for storing -web pages you have visited and searching in the contents later. - -The engine supports faceted search, so you can search in a subset of documents of the collection. -Futhermore, you can search in Meilisearch instances that require authentication by setting ``auth_token``. - -Here is a simple example to query a Meilisearch instance: - -.. code:: yaml - - - name : meilisearch - engine : meilisearch - shortcut: mes - base_url : http://localhost:7700 - index : my-index - enable_http: True - - -Solr ----- - -Solr is a popular search engine based on Lucene, just like Elasticsearch. -But instead of searching in indices, you can search in collections. - -This is an example configuration for searching in the collection ``my-collection`` and get -the results in ascending order. - -.. code:: yaml - - - name : solr - engine : solr - shortcut : slr - base_url : http://localhost:8983 - collection : my-collection - sort : asc - enable_http : True - - -Next steps -========== - -The next step is to add support for various SQL databases. - -Acknowledgement -=============== - -This development was sponsored by `Search and Discovery Fund`_ of `NLnet Foundation`_ . - -.. _blog post about private engines: private-engines.html#private-engines -.. _Elasticsearch: https://www.elastic.co/elasticsearch/ -.. _Meilisearch: https://www.meilisearch.com/ -.. _Solr: https://solr.apache.org/ -.. _Search and Discovery Fund: https://nlnet.nl/discovery -.. _NLnet Foundation: https://nlnet.nl/ - - -| Happy hacking. -| kvch // 2021.04.07 23:16 - diff --git a/docs/blog/searx-admin-engines.png b/docs/blog/searx-admin-engines.png Binary files differdeleted file mode 100644 index 610bacdf7..000000000 --- a/docs/blog/searx-admin-engines.png +++ /dev/null diff --git a/docs/blog/searxpy3.png b/docs/blog/searxpy3.png Binary files differdeleted file mode 100644 index 8eeaeec55..000000000 --- a/docs/blog/searxpy3.png +++ /dev/null diff --git a/docs/conf.py b/docs/conf.py index 83367229f..09cc22878 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -58,11 +58,11 @@ extlinks['man'] = ('https://manpages.debian.org/jump?q=%s', '') #extlinks['role'] = ( # 'https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html#role-%s', '') extlinks['duref'] = ( - 'https://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#%s', '') + 'https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#%s', '') extlinks['durole'] = ( - 'https://docutils.sourceforge.net/docs/ref/rst/roles.html#%s', '') + 'https://docutils.sourceforge.io/docs/ref/rst/roles.html#%s', '') extlinks['dudir'] = ( - 'https://docutils.sourceforge.net/docs/ref/rst/directives.html#%s', '') + 'https://docutils.sourceforge.io/docs/ref/rst/directives.html#%s', '') extlinks['ctan'] = ( 'https://ctan.org/pkg/%s', 'CTAN: ') diff --git a/docs/dev/engine_overview.rst b/docs/dev/engine_overview.rst index 2a661ec40..d79f662b8 100644 --- a/docs/dev/engine_overview.rst +++ b/docs/dev/engine_overview.rst @@ -1,310 +1,317 @@ - .. _engines-dev: =============== -Engine overview +Engine Overview =============== .. _metasearch-engine: https://en.wikipedia.org/wiki/Metasearch_engine -searx is a metasearch-engine_, so it uses different search engines to provide -better results. +.. sidebar:: Further reading .. -Because there is no general search API which could be used for every search -engine, an adapter has to be built between searx and the external search -engines. Adapters are stored under the folder :origin:`searx/engines`. + - :ref:`configured engines` + - :ref:`settings engine` .. contents:: :depth: 3 :backlinks: entry +searx is a metasearch-engine_, so it uses different search engines to provide +better results. + +Because there is no general search API which could be used for every search +engine, an adapter has to be built between searx and the external search +engines. Adapters are stored under the folder :origin:`searx/engines`. .. _general engine configuration: -general engine configuration +General Engine Configuration ============================ It is required to tell searx the type of results the engine provides. The -arguments can be set in the engine file or in the settings file -(normally ``settings.yml``). The arguments in the settings file override -the ones in the engine file. +arguments can be set in the engine file or in the settings file (normally +``settings.yml``). The arguments in the settings file override the ones in the +engine file. -It does not matter if an option is stored in the engine file or in the -settings. However, the standard way is the following: +It does not matter if an option is stored in the engine file or in the settings. +However, the standard way is the following: .. _engine file: -engine file +Engine File ----------- -======================= =========== ======================================================== -argument type information -======================= =========== ======================================================== -categories list pages, in which the engine is working -paging boolean support multible pages -time_range_support boolean support search time range -engine_type str ``online`` by default, other possibles values are - ``offline``, ``online_dictionnary``, ``online_currency`` -======================= =========== ======================================================== +.. table:: Common options in the engine module + :width: 100% + + ======================= =========== ======================================================== + argument type information + ======================= =========== ======================================================== + categories list pages, in which the engine is working + paging boolean support multible pages + time_range_support boolean support search time range + engine_type str - ``online`` :ref:`[ref] <demo online engine>` by + default, other possibles values are: + - ``offline`` :ref:`[ref] <offline engines>` + - ``online_dictionary`` + - ``online_currency`` + ======================= =========== ======================================================== .. _engine settings: -settings.yml ------------- - -======================= =========== ============================================= -argument type information -======================= =========== ============================================= -name string name of search-engine -engine string name of searx-engine - (filename without ``.py``) -enable_http bool enable HTTP - (by default only HTTPS is enabled). -shortcut string shortcut of search-engine -timeout string specific timeout for search-engine -display_error_messages boolean display error messages on the web UI -proxies dict set proxies for a specific engine - (e.g. ``proxies : {http: socks5://proxy:port, - https: socks5://proxy:port}``) -======================= =========== ============================================= - - -overrides +Engine ``settings.yml`` +----------------------- + +For a more detailed description, see :ref:`settings engine` in the :ref:`settings.yml`. + +.. table:: Common options in the engine setup (``settings.yml``) + :width: 100% + + ======================= =========== =============================================== + argument type information + ======================= =========== =============================================== + name string name of search-engine + engine string name of searx-engine (filename without ``.py``) + enable_http bool enable HTTP (by default only HTTPS is enabled). + shortcut string shortcut of search-engine + timeout string specific timeout for search-engine + display_error_messages boolean display error messages on the web UI + proxies dict set proxies for a specific engine + (e.g. ``proxies : {http: socks5://proxy:port, + https: socks5://proxy:port}``) + ======================= =========== =============================================== + +.. _engine overrides: + +Overrides --------- -A few of the options have default values in the engine, but are often -overwritten by the settings. If ``None`` is assigned to an option in the engine -file, it has to be redefined in the settings, otherwise searx will not start -with that engine. +.. sidebar:: engine's global names -The naming of overrides is arbitrary. But the recommended overrides are the -following: + Global names with a leading underline are *private to the engine* and will + not be overwritten. -======================= =========== =========================================== -argument type information -======================= =========== =========================================== -base_url string base-url, can be overwritten to use same - engine on other URL -number_of_results int maximum number of results per request -language string ISO code of language and country like en_US -api_key string api-key if required by engine -======================= =========== =========================================== +A few of the options have default values in the namespace of engine's python +modul, but are often overwritten by the settings. If ``None`` is assigned to an +option in the engine file, it has to be redefined in the settings, otherwise +searx will not start with that engine. -example code ------------- +Here is an very simple example of the global names in the namespace of engine's +module: .. code:: python # engine dependent config categories = ['general'] paging = True + _non_overwritten_global = 'foo' + +.. table:: The naming of overrides is arbitrary / recommended overrides are: + :width: 100% + + ======================= =========== =========================================== + argument type information + ======================= =========== =========================================== + base_url string base-url, can be overwritten to use same + engine on other URL + number_of_results int maximum number of results per request + language string ISO code of language and country like en_US + api_key string api-key if required by engine + ======================= =========== =========================================== .. _engine request: -making a request +Making a Request ================ To perform a search an URL have to be specified. In addition to specifying an URL, arguments can be passed to the query. -passed arguments ----------------- +.. _engine request arguments: + +Passed Arguments (request) +-------------------------- These arguments can be used to construct the search query. Furthermore, parameters with default value can be redefined for special purposes. -If the ``engine_type`` is ``online```: - -====================== ============== ======================================================================== -argument type default-value, information -====================== ============== ======================================================================== -url str ``''`` -method str ``'GET'`` -headers set ``{}`` -data set ``{}`` -cookies set ``{}`` -verify bool ``True`` -headers.User-Agent str a random User-Agent -category str current category, like ``'general'`` -safesearch int ``0``, between ``0`` and ``2`` (normal, moderate, strict) -time_range Optional[str] ``None``, can be ``day``, ``week``, ``month``, ``year`` -pageno int current pagenumber -language str specific language code like ``'en_US'``, or ``'all'`` if unspecified -====================== ============== ======================================================================== - - -If the ``engine_type`` is ``online_dictionnary```, in addition to the ``online`` arguments: - -====================== ============ ======================================================================== -argument type default-value, information -====================== ============ ======================================================================== -from_lang str specific language code like ``'en_US'`` -to_lang str specific language code like ``'en_US'`` -query str the text query without the languages -====================== ============ ======================================================================== - -If the ``engine_type`` is ``online_currency```, in addition to the ``online`` arguments: - -====================== ============ ======================================================================== -argument type default-value, information -====================== ============ ======================================================================== -amount float the amount to convert -from str ISO 4217 code -to str ISO 4217 code -from_name str currency name -to_name str currency name -====================== ============ ======================================================================== - - -parsed arguments ----------------- - -The function ``def request(query, params):`` always returns the ``params`` -variable. Inside searx, the following paramters can be used to specify a search -request: - -=================== =========== ========================================================================== -argument type information -=================== =========== ========================================================================== -url str requested url -method str HTTP request method -headers set HTTP header information -data set HTTP data information -cookies set HTTP cookies -verify bool Performing SSL-Validity check -allow_redirects bool Follow redirects -max_redirects int maximum redirects, hard limit -soft_max_redirects int maximum redirects, soft limit. Record an error but don't stop the engine -raise_for_httperror bool True by default: raise an exception if the HTTP code of response is >= 300 -=================== =========== ========================================================================== - - -example code ------------- - -.. code:: python - - # search-url - base_url = 'https://example.com/' - search_string = 'search?{query}&page={page}' - - # do search-request - def request(query, params): - search_path = search_string.format( - query=urlencode({'q': query}), - page=params['pageno']) - - params['url'] = base_url + search_path - return params +.. table:: If the ``engine_type`` is ``online`` + :width: 100% + + ====================== ============== ======================================================================== + argument type default-value, information + ====================== ============== ======================================================================== + url str ``''`` + method str ``'GET'`` + headers set ``{}`` + data set ``{}`` + cookies set ``{}`` + verify bool ``True`` + headers.User-Agent str a random User-Agent + category str current category, like ``'general'`` + safesearch int ``0``, between ``0`` and ``2`` (normal, moderate, strict) + time_range Optional[str] ``None``, can be ``day``, ``week``, ``month``, ``year`` + pageno int current pagenumber + language str specific language code like ``'en_US'``, or ``'all'`` if unspecified + ====================== ============== ======================================================================== + + +.. table:: If the ``engine_type`` is ``online_dictionary``, in addition to the + ``online`` arguments: + :width: 100% + + ====================== ============== ======================================================================== + argument type default-value, information + ====================== ============== ======================================================================== + from_lang str specific language code like ``'en_US'`` + to_lang str specific language code like ``'en_US'`` + query str the text query without the languages + ====================== ============== ======================================================================== + +.. table:: If the ``engine_type`` is ``online_currency```, in addition to the + ``online`` arguments: + :width: 100% + + ====================== ============== ======================================================================== + argument type default-value, information + ====================== ============== ======================================================================== + amount float the amount to convert + from str ISO 4217 code + to str ISO 4217 code + from_name str currency name + to_name str currency name + ====================== ============== ======================================================================== + + +Specify Request +--------------- + +The function :py:func:`def request(query, params): +<searx.engines.demo_online.request>` always returns the ``params`` variable, the +following parameters can be used to specify a search request: + +.. table:: + :width: 100% + + =================== =========== ========================================================================== + argument type information + =================== =========== ========================================================================== + url str requested url + method str HTTP request method + headers set HTTP header information + data set HTTP data information + cookies set HTTP cookies + verify bool Performing SSL-Validity check + allow_redirects bool Follow redirects + max_redirects int maximum redirects, hard limit + soft_max_redirects int maximum redirects, soft limit. Record an error but don't stop the engine + raise_for_httperror bool True by default: raise an exception if the HTTP code of response is >= 300 + =================== =========== ========================================================================== .. _engine results: - -returned results -================ - -Searx is able to return results of different media-types. Currently the -following media-types are supported: - -- default_ -- images_ -- videos_ -- torrent_ -- map_ - -To set another media-type as default, the parameter ``template`` must be set to -the desired type. - -default -------- - -========================= ===================================================== -result-parameter information -========================= ===================================================== -url string, url of the result -title string, title of the result -content string, general result-text -publishedDate :py:class:`datetime.datetime`, time of publish -========================= ===================================================== - -images ------- - -To use this template, the parameter: - -========================= ===================================================== -result-parameter information -========================= ===================================================== -template is set to ``images.html`` -url string, url to the result site -title string, title of the result *(partly implemented)* -content *(partly implemented)* -publishedDate :py:class:`datetime.datetime`, - time of publish *(partly implemented)* -img\_src string, url to the result image -thumbnail\_src string, url to a small-preview image -========================= ===================================================== - -videos ------- - -========================= ===================================================== -result-parameter information -========================= ===================================================== -template is set to ``videos.html`` -url string, url of the result -title string, title of the result -content *(not implemented yet)* -publishedDate :py:class:`datetime.datetime`, time of publish -thumbnail string, url to a small-preview image -========================= ===================================================== - -torrent -------- +.. _engine media types: + +Media Types +=========== + +Each result item of an engine can be of different media-types. Currently the +following media-types are supported. To set another media-type as ``default``, +the parameter ``template`` must be set to the desired type. + +.. table:: Parameter of the **default** media type: + :width: 100% + + ========================= ===================================================== + result-parameter information + ========================= ===================================================== + url string, url of the result + title string, title of the result + content string, general result-text + publishedDate :py:class:`datetime.datetime`, time of publish + ========================= ===================================================== + + +.. table:: Parameter of the **images** media type: + :width: 100% + + ========================= ===================================================== + result-parameter information + ------------------------- ----------------------------------------------------- + template is set to ``images.html`` + ========================= ===================================================== + url string, url to the result site + title string, title of the result *(partly implemented)* + content *(partly implemented)* + publishedDate :py:class:`datetime.datetime`, + time of publish *(partly implemented)* + img\_src string, url to the result image + thumbnail\_src string, url to a small-preview image + ========================= ===================================================== + + +.. table:: Parameter of the **videos** media type: + :width: 100% + + ========================= ===================================================== + result-parameter information + ------------------------- ----------------------------------------------------- + template is set to ``videos.html`` + ========================= ===================================================== + url string, url of the result + title string, title of the result + content *(not implemented yet)* + publishedDate :py:class:`datetime.datetime`, time of publish + thumbnail string, url to a small-preview image + ========================= ===================================================== .. _magnetlink: https://en.wikipedia.org/wiki/Magnet_URI_scheme -========================= ===================================================== -result-parameter information -========================= ===================================================== -template is set to ``torrent.html`` -url string, url of the result -title string, title of the result -content string, general result-text -publishedDate :py:class:`datetime.datetime`, - time of publish *(not implemented yet)* -seed int, number of seeder -leech int, number of leecher -filesize int, size of file in bytes -files int, number of files -magnetlink string, magnetlink_ of the result -torrentfile string, torrentfile of the result -========================= ===================================================== - - -map ---- - -========================= ===================================================== -result-parameter information -========================= ===================================================== -url string, url of the result -title string, title of the result -content string, general result-text -publishedDate :py:class:`datetime.datetime`, time of publish -latitude latitude of result (in decimal format) -longitude longitude of result (in decimal format) -boundingbox boundingbox of result (array of 4. values - ``[lat-min, lat-max, lon-min, lon-max]``) -geojson geojson of result (https://geojson.org/) -osm.type type of osm-object (if OSM-Result) -osm.id id of osm-object (if OSM-Result) -address.name name of object -address.road street name of object -address.house_number house number of object -address.locality city, place of object -address.postcode postcode of object -address.country country of object -========================= ===================================================== +.. table:: Parameter of the **torrent** media type: + :width: 100% + + ========================= ===================================================== + result-parameter information + ------------------------- ----------------------------------------------------- + template is set to ``torrent.html`` + ========================= ===================================================== + url string, url of the result + title string, title of the result + content string, general result-text + publishedDate :py:class:`datetime.datetime`, + time of publish *(not implemented yet)* + seed int, number of seeder + leech int, number of leecher + filesize int, size of file in bytes + files int, number of files + magnetlink string, magnetlink_ of the result + torrentfile string, torrentfile of the result + ========================= ===================================================== + +.. table:: Parameter of the **map** media type: + :width: 100% + + ========================= ===================================================== + result-parameter information + ------------------------- ----------------------------------------------------- + template is set to ``map.html`` + ========================= ===================================================== + url string, url of the result + title string, title of the result + content string, general result-text + publishedDate :py:class:`datetime.datetime`, time of publish + latitude latitude of result (in decimal format) + longitude longitude of result (in decimal format) + boundingbox boundingbox of result (array of 4. values + ``[lat-min, lat-max, lon-min, lon-max]``) + geojson geojson of result (https://geojson.org/) + osm.type type of osm-object (if OSM-Result) + osm.id id of osm-object (if OSM-Result) + address.name name of object + address.road street name of object + address.house_number house number of object + address.locality city, place of object + address.postcode postcode of object + address.country country of object + ========================= ===================================================== diff --git a/docs/dev/index.rst b/docs/dev/index.rst index ba0a25a9c..93c914bbb 100644 --- a/docs/dev/index.rst +++ b/docs/dev/index.rst @@ -9,8 +9,10 @@ Developer documentation quickstart contribution_guide engine_overview + offline_engines search_api plugins translation + lxcdev makefile reST diff --git a/docs/blog/lxcdev-202006.rst b/docs/dev/lxcdev.rst index 24d9028c2..ef26e3734 100644 --- a/docs/blog/lxcdev-202006.rst +++ b/docs/dev/lxcdev.rst @@ -1,48 +1,48 @@ -.. _blog-lxcdev-202006: +.. _lxcdev: -======================================= -Developing in Linux containers [202006] -======================================= +============================== +Developing in Linux Containers +============================== .. _LXC: https://linuxcontainers.org/lxc/introduction/ +In this article we will show, how you can make use of Linux Containers (LXC_) in +*distributed and heterogeneous development cycles* (TL;DR; jump to the +:ref:`lxcdev summary`). + .. sidebar:: Audience - This blog post is written for experienced admins and developers / readers - should have a serious meaning about: *distributed*, *merge* and *linux - container*. + This blog post is written for experienced admins and developers. Readers + should have a serious meaning about the terms: *distributed*, *merge* and + *linux container*. .. contents:: Contents :depth: 2 :local: :backlinks: entry -In PR :PR:`1803` we added a lot of scripts to Searx's boilerplate. In this blog -post I will show you, how you can make use of them in *distributed and -heterogeneous development cycles* (TL;DR; jump to the :ref:`blog-lxcdev-202006 -abstract`). Motivation ========== -Normally in our development cycle, we edit the sources and run some test and/or -builds by using ``make`` before we commit. This cycle is simple and perfect but -might fail in some aspects we should not overlook. +Usually in our development cycle, we edit the sources and run some test and/or +builds by using ``make`` :ref:`[ref] <makefile>` before we commit. This cycle +is simple and perfect but might fail in some aspects we should not overlook. - The environment in which we run all our development processes matters! + **The environment in which we run all our development processes matters!** -The :ref:`makefile` and the :ref:`make install` encapsulate a lot for us, but they -do not have access to all prerequisites. For example, there may have +The :ref:`makefile` and the :ref:`make install` encapsulate a lot for us, but +they do not have access to all prerequisites. For example, there may have dependencies on packages that are installed on the developer's desktop, but -usually are not preinstalled on a server or client system. Another examples -are; settings have been made to the software on the developer's host that would -never be set on a *production* system. +usually are not preinstalled on a server or client system. Another example is; +settings have been made to the software on developer's desktop that would never +be set on a *production* system. -*Linux Containers* (LXC_) are isolate environments and not to mix up on -developer's all the prerequisites of all the projects he contribute to, is -always a good choice. + **Linux Containers are isolate environments and not to mix up all the + prerequisites from various projects on developer's desktop is always a good + choice.** -The scripts from PR :PR:`1803` can divide in those to install and maintain +The scripts from :ref:`searx_utils` can divide in those to install and maintain software: - :ref:`searx.sh` @@ -50,8 +50,10 @@ software: - :ref:`morty.sh` and the script :ref:`lxc.sh`, with we can scale our installation, maintenance or -even development tasks over a stack of containers, what we call: *Searx's lxc -suite*. +even development tasks over a stack of isolated containers / what we call the: + + **searxNG LXC suite** + Gentlemen, start your engines! ============================== @@ -141,18 +143,18 @@ and once for the content sanitizer (content proxy morty): ... INFO: got 200 from http://10.174.184.156/morty/ -.. sidebar:: Fully functional searx suite +.. sidebar:: Fully functional searXNG suite - From here on you have a fully functional searx suite running with bot blocker - (filtron) and Web content sanitizer (content proxy morty) needed for a - *privacy protecting* search engine. + From here on you have a fully functional searXNG suite running with bot + blocker (filtron) and WEB content sanitizer (content proxy morty), both are + needed for a *privacy protecting* search engine. On your system, the IP of your ``searx-archlinux`` container differs from http://10.174.184.156/searx, just open the URL reported in your installation protocol in your WEB browser from the desktop to test the instance from outside of the container. -In such a searx suite admins can maintain and access the debug log of the +In such a searXNG suite admins can maintain and access the debug log of the different services quite easy. .. _working in containers: @@ -176,7 +178,7 @@ searx-archlinux``: /share/searx The prompt ``[root@searx-archlinux ...]`` signals, that you are the root user in -the searx-container. To debug the running searx instance use: +the searx-container. To debug the running searXNG instance use: .. tabs:: @@ -192,8 +194,8 @@ the searx-container. To debug the running searx instance use: Back in the browser on your desktop open the service http://10.174.184.156/searx and run your application tests while the debug log is shown in the terminal from above. You can stop monitoring using ``CTRL-C``, this also disables the *"debug -option"* in searx's settings file and restarts the searx uwsgi application. To -debug services from filtron and morty analogous use: +option"* in searXNG's settings file and restarts the searXNG uwsgi application. +To debug services from filtron and morty analogous use: .. tabs:: @@ -250,18 +252,18 @@ user ``searx`` in the ``searx-archlinux`` container and the python *virtualenv* Wrap production into developer suite ==================================== -In this section we will see how to change the *"Fully functional searx suite"* +In this section we will see how to change the *"Fully functional searXNG suite"* from a LXC container (which is quite ready for production) into a developer suite. For this, we have to keep an eye on the :ref:`installation basic`: -- searx setup in: ``/etc/searx/settings.yml`` -- searx user's home: ``/usr/local/searx`` +- searXNG setup in: ``/etc/searx/settings.yml`` +- searXNG user's home: ``/usr/local/searx`` - virtualenv in: ``/usr/local/searx/searx-pyenv`` -- searx software in: ``/usr/local/searx/searx-src`` +- searXNG software in: ``/usr/local/searx/searx-src`` -The searx software is a clone of the ``git_url`` (see :ref:`settings global`) and -the working tree is checked out from the ``git_branch``. With the use of the -:ref:`searx.sh` the searx service was installed as :ref:`uWSGI application +The searXNG software is a clone of the ``git_url`` (see :ref:`settings global`) +and the working tree is checked out from the ``git_branch``. With the use of +the :ref:`searx.sh` the searx service was installed as :ref:`uWSGI application <searx uwsgi>`. To maintain this service, we can use ``systemctl`` (compare :ref:`service architectures on distributions <uwsgi configuration>`). @@ -292,7 +294,7 @@ least you should attend the settings of ``uid``, ``chdir``, ``env`` and If you have read the :ref:`"Good to know section" <lxc.sh>` you remember, that each container shares the root folder of the repository and the command ``utils/lxc.sh cmd`` handles relative path names **transparent**. To wrap the -searx installation into a developer one, we simple have to create a smylink to +searXNG installation into a developer one, we simple have to create a smylink to the **transparent** reposetory from the desktop. Now lets replace the repository at ``searx-src`` in the container with the working tree from outside of the container: @@ -330,7 +332,7 @@ daily usage: .. group-tab:: desktop - To *inspect* the searx instance (already described above): + To *inspect* the searXNG instance (already described above): .. code:: sh @@ -358,12 +360,12 @@ daily usage: $ sudo -H ./utils/lxc.sh cmd searx-archlinux \ make docs.html -.. _blog-lxcdev-202006 abstract: +.. _lxcdev summary: -Abstract -======== +Summary +======= -We build up a fully functional searx suite in a archlinux container: +We build up a fully functional searXNG suite in a archlinux container: .. code:: sh @@ -395,7 +397,8 @@ the container : $ ln -s /share/searx/ /usr/local/searx/searx-src $ systemctl restart uwsgi@searx -To get remarks from the suite of the archlinux container we can use: +To get information about the searxNG suite in the archlinux container we can +use: .. tabs:: diff --git a/docs/dev/makefile.rst b/docs/dev/makefile.rst index 870b5d49c..b7472dad7 100644 --- a/docs/dev/makefile.rst +++ b/docs/dev/makefile.rst @@ -29,8 +29,8 @@ Calling the ``help`` target gives a first overview (``make help``): .. _make install: -Python environment -================== +Python Environment (``make install``) +===================================== .. sidebar:: activate environment diff --git a/docs/dev/offline_engines.rst b/docs/dev/offline_engines.rst new file mode 100644 index 000000000..ce6924542 --- /dev/null +++ b/docs/dev/offline_engines.rst @@ -0,0 +1,78 @@ +.. _offline engines: + +=============== +Offline Engines +=============== + +.. sidebar:: offline engines + + - :ref:`demo offline engine` + - :ref:`sql engines` + - :ref:`engine command` + - :origin:`Redis <searx/engines/redis_server.py>` + +To extend the functionality of SearxNG, offline engines are going to be +introduced. An offline engine is an engine which does not need Internet +connection to perform a search and does not use HTTP to communicate. + +Offline engines can be configured, by adding those to the `engines` list of +:origin:`settings.yml <searx/settings.yml>`. An example skeleton for offline +engines can be found in :ref:`demo offline engine` (:origin:`demo_offline.py +<searx/engines/demo_offline.py>`). + + +Programming Interface +===================== + +:py:func:`init(engine_settings=None) <searx.engines.demo_offline.init>` + All offline engines can have their own init function to setup the engine before + accepting requests. The function gets the settings from settings.yml as a + parameter. This function can be omitted, if there is no need to setup anything + in advance. + +:py:func:`search(query, params) <searx.engines.demo_offline.searc>` + + Each offline engine has a function named ``search``. This function is + responsible to perform a search and return the results in a presentable + format. (Where *presentable* means presentable by the selected result + template.) + + The return value is a list of results retrieved by the engine. + +Engine representation in ``/config`` + If an engine is offline, the attribute ``offline`` is set to ``True``. + +.. _offline requirements: + +Extra Dependencies +================== + +If an offline engine depends on an external tool, SearxNG does not install it by +default. When an administrator configures such engine and starts the instance, +the process returns an error with the list of missing dependencies. Also, +required dependencies will be added to the comment/description of the engine, so +admins can install packages in advance. + +If there is a need to install additional packages in *Python's Virtual +Environment* of your SearxNG instance you need to switch into the environment +(:ref:`searx-src`) first, for this you can use :ref:`searx.sh`:: + + $ sudo utils/searx.sh shell + (searx-pyenv)$ pip install ... + + +Private engines (Security) +========================== + +To limit the access to offline engines, if an instance is available publicly, +administrators can set token(s) for each of the :ref:`private engines`. If a +query contains a valid token, then SearxNG performs the requested private +search. If not, requests from an offline engines return errors. + + +Acknowledgement +=============== + +This development was sponsored by `Search and Discovery Fund +<https://nlnet.nl/discovery>`_ of `NLnet Foundation <https://nlnet.nl/>`_ . + diff --git a/docs/dev/reST.rst b/docs/dev/reST.rst index 181d9829d..230c92a78 100644 --- a/docs/dev/reST.rst +++ b/docs/dev/reST.rst @@ -1281,10 +1281,10 @@ Templating Templating is suitable for documentation which is created generic at the build time. The sphinx-jinja_ extension evaluates jinja_ templates in the :ref:`make install` (with searx modules installed). We use this e.g. to build chapter: -:ref:`engines generic`. Below the jinja directive from the +:ref:`configured engines`. Below the jinja directive from the :origin:`docs/admin/engines.rst` is shown: -.. literalinclude:: ../admin/engines.rst +.. literalinclude:: ../admin/engines/configured_engines.rst :language: reST :start-after: .. _configured engines: diff --git a/docs/dev/search_api.rst b/docs/dev/search_api.rst index 68fee94bf..5fcdc4560 100644 --- a/docs/dev/search_api.rst +++ b/docs/dev/search_api.rst @@ -20,7 +20,7 @@ Parameters - :ref:`engines-dev` - :ref:`settings.yml` - - :ref:`engines generic` + - :ref:`configured engines` ``q`` : required The search query. This string is passed to external search services. Thus, diff --git a/docs/index.rst b/docs/index.rst index 71f0d8855..2b7bdeb37 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -34,7 +34,6 @@ anyone, you can set up your own, see :ref:`installation`. dev/index searx_extra/index utils/index - blog/index src/index .. _Searx-instances: https://searx.space diff --git a/docs/src/searx.engines.demo_offline.rst b/docs/src/searx.engines.demo_offline.rst new file mode 100644 index 000000000..9424244fd --- /dev/null +++ b/docs/src/searx.engines.demo_offline.rst @@ -0,0 +1,9 @@ +.. _demo offline engine: + +=================== +Demo Offline Engine +=================== + +.. automodule:: searx.engines.demo_offline + :members: + diff --git a/docs/src/searx.engines.demo_online.rst b/docs/src/searx.engines.demo_online.rst new file mode 100644 index 000000000..0a8c8e985 --- /dev/null +++ b/docs/src/searx.engines.demo_online.rst @@ -0,0 +1,9 @@ +.. _demo online engine: + +================== +Demo Online Engine +================== + +.. automodule:: searx.engines.demo_online + :members: + diff --git a/docs/src/searx.engines.xpath.rst b/docs/src/searx.engines.xpath.rst deleted file mode 100644 index 4c73763d1..000000000 --- a/docs/src/searx.engines.xpath.rst +++ /dev/null @@ -1,9 +0,0 @@ -.. _xpath_engine: - -================ -The XPath engine -================ - -.. automodule:: searx.engines.xpath - :members: - diff --git a/searx/engines/demo_offline.py b/searx/engines/demo_offline.py new file mode 100644 index 000000000..06609d2c3 --- /dev/null +++ b/searx/engines/demo_offline.py @@ -0,0 +1,74 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Within this module we implement a *demo offline engine*. Do not look to +close to the implementation, its just a simple example. To get in use of this +*demo* engine add the following entry to your engines list in ``settings.yml``: + +.. code:: yaml + + - name: my offline engine + engine: demo_offline + shortcut: demo + disabled: false + +""" + +import json + +engine_type = 'offline' +categories = ['general'] +disabled = True +timeout = 2.0 + +about = { + "wikidata_id": None, + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'JSON', +} + +# if there is a need for globals, use a leading underline +_my_offline_engine = None + +def init(engine_settings=None): + """Initialization of the (offline) engine. The origin of this demo engine is a + simple json string which is loaded in this example while the engine is + initialized. + + """ + global _my_offline_engine # pylint: disable=global-statement + + _my_offline_engine = ( + '[ {"value": "%s"}' + ', {"value":"first item"}' + ', {"value":"second item"}' + ', {"value":"third item"}' + ']' + + % engine_settings.get('name') + ) + +def search(query, request_params): + """Query (offline) engine and return results. Assemble the list of results from + your local engine. In this demo engine we ignore the 'query' term, usual + you would pass the 'query' term to your local engine to filter out the + results. + + """ + global _my_offline_engine # pylint: disable=global-statement + ret_val = [] + + result_list = json.loads(_my_offline_engine) + + for row in result_list: + entry = { + 'query' : query, + 'language' : request_params['language'], + 'value' : row.get("value"), + # choose a result template or comment out to use the *default* + 'template' : 'key-value.html', + } + ret_val.append(entry) + + return ret_val diff --git a/searx/engines/demo_online.py b/searx/engines/demo_online.py new file mode 100644 index 000000000..a0f736e42 --- /dev/null +++ b/searx/engines/demo_online.py @@ -0,0 +1,92 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Within this module we implement a *demo online engine*. Do not look to +close to the implementation, its just a simple example which queries `The Art +Institute of Chicago <https://www.artic.edu>`_ + +To get in use of this *demo* engine add the following entry to your engines +list in ``settings.yml``: + +.. code:: yaml + + - name: my online engine + engine: demo_online + shortcut: demo + disabled: false + +""" + +from json import loads +from urllib.parse import urlencode + +engine_type = 'offline' +categories = ['general'] +disabled = True +timeout = 2.0 +categories = ['images'] +paging = True +page_size = 20 + +search_api = 'https://api.artic.edu/api/v1/artworks/search?' +image_api = 'https://www.artic.edu/iiif/2/' + +about = { + "website": 'https://www.artic.edu', + "wikidata_id": 'Q239303', + "official_api_documentation": 'http://api.artic.edu/docs/', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + + +# if there is a need for globals, use a leading underline +_my_online_engine = None + +def init(engine_settings): + """Initialization of the (online) engine. If no initialization is needed, drop + this init function. + + """ + global _my_online_engine # pylint: disable=global-statement + _my_online_engine = engine_settings.get('name') + +def request(query, params): + """Build up the ``params`` for the online request. In this example we build a + URL to fetch images from `artic.edu <https://artic.edu>`__ + + """ + args = urlencode({ + 'q' : query, + 'page' : params['pageno'], + 'fields' : 'id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles', + 'limit' : page_size, + }) + params['url'] = search_api + args + return params + +def response(resp): + """Parse out the result items from the response. In this example we parse the + response from `api.artic.edu <https://artic.edu>`__ and filter out all + images. + + """ + results = [] + json_data = loads(resp.text) + + for result in json_data['data']: + + if not result['image_id']: + continue + + results.append({ + 'url': 'https://artic.edu/artworks/%(id)s' % result, + 'title': result['title'] + " (%(date_display)s) // %(artist_display)s" % result, + 'content': result['medium_display'], + 'author': ', '.join(result['artist_titles']), + 'img_src': image_api + '/%(image_id)s/full/843,/0/default.jpg' % result, + 'img_format': result['dimensions'], + 'template': 'images.html' + }) + + return results diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py index eaa8b6ab4..4a92a22c3 100644 --- a/searx/engines/dictzone.py +++ b/searx/engines/dictzone.py @@ -17,7 +17,7 @@ about = { "results": 'HTML', } -engine_type = 'online_dictionnary' +engine_type = 'online_dictionary' categories = ['general'] url = 'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}' weight = 100 diff --git a/searx/engines/translated.py b/searx/engines/translated.py index 9c53d70ad..8d67ca0bb 100644 --- a/searx/engines/translated.py +++ b/searx/engines/translated.py @@ -13,7 +13,7 @@ about = { "results": 'JSON', } -engine_type = 'online_dictionnary' +engine_type = 'online_dictionary' categories = ['general'] url = 'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}' web_url = 'https://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}' diff --git a/searx/search/processors/online_dictionary.py b/searx/search/processors/online_dictionary.py index 11ca0335d..72941d57a 100644 --- a/searx/search/processors/online_dictionary.py +++ b/searx/search/processors/online_dictionary.py @@ -12,9 +12,9 @@ from .online import OnlineProcessor parser_re = re.compile('.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I) class OnlineDictionaryProcessor(OnlineProcessor): - """Processor class used by ``online_dictionnary`` engines.""" + """Processor class used by ``online_dictionary`` engines.""" - engine_type = 'online_dictionnary' + engine_type = 'online_dictionary' def get_params(self, search_query, engine_category): params = super().get_params(search_query, engine_category) |