summaryrefslogtreecommitdiff
path: root/searx/engines
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2023-06-30 18:07:02 +0200
committerMarkus Heiser <markus.heiser@darmarIT.de>2023-07-01 22:45:19 +0200
commit5720844fcdc8601798e10544e2fd25ce4f2ad099 (patch)
treeaf611e4aef436253f4fda9504d06e05e2621114d /searx/engines
parent8e8d8dabe9b17c9db8db7432c6bc063d9ae980d1 (diff)
downloadsearxng-5720844fcdc8601798e10544e2fd25ce4f2ad099.tar.gz
searxng-5720844fcdc8601798e10544e2fd25ce4f2ad099.zip
[doc] rearranges Settings & Engines docs for better readability
We have built up detailed documentation of the *settings* and the *engines* over the past few years. However, this documentation was still spread over various chapters and was difficult to navigate in its entirety. This patch rearranges the Settings & Engines documentation for better readability. To review new ordered docs:: make docs.clean docs.live Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/engines')
-rw-r--r--searx/engines/__init__.py4
-rw-r--r--searx/engines/annas_archive.py19
-rw-r--r--searx/engines/command.py75
-rw-r--r--searx/engines/elasticsearch.py42
-rw-r--r--searx/engines/meilisearch.py32
-rw-r--r--searx/engines/mongodb.py46
-rw-r--r--searx/engines/mysql_server.py33
-rw-r--r--searx/engines/postgresql.py29
-rw-r--r--searx/engines/recoll.py51
-rw-r--r--searx/engines/redis_server.py33
-rw-r--r--searx/engines/solr.py28
-rw-r--r--searx/engines/sqlite.py44
-rw-r--r--searx/engines/torznab.py14
-rw-r--r--searx/engines/xpath.py75
14 files changed, 455 insertions, 70 deletions
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index e9e9f87c9..da2b2037e 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -1,8 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-"""This module implements the engine loader.
-
-Load and initialize the ``engines``, see :py:func:`load_engines` and register
+"""Load and initialize the ``engines``, see :py:func:`load_engines` and register
:py:obj:`engine_shortcuts`.
usage::
diff --git a/searx/engines/annas_archive.py b/searx/engines/annas_archive.py
index db9bd1719..1bcdeeec6 100644
--- a/searx/engines/annas_archive.py
+++ b/searx/engines/annas_archive.py
@@ -1,24 +1,12 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-""".. _annas_archive engine:
-
-==============
-Anna's Archive
-==============
+"""`Anna's Archive`_ is a free non-profit online shadow library metasearch
+engine providing access to a variety of book resources (also via IPFS), created
+by a team of anonymous archivists (AnnaArchivist_).
.. _Anna's Archive: https://annas-archive.org/
.. _AnnaArchivist: https://annas-software.org/AnnaArchivist/annas-archive
-`Anna's Archive`_ is a free non-profit online shadow library metasearch engine
-providing access to a variety of book resources (also via IPFS), created by a
-team of anonymous archivists (AnnaArchivist_).
-
-.. contents:: Contents
- :depth: 2
- :local:
- :backlinks: entry
-
-
Configuration
=============
@@ -41,7 +29,6 @@ for *newest* articles and journals (PDF) / by shortcut ``!aaa <search-term>``.
aa_ext: 'pdf'
aa_sort: 'newest'
-
Implementations
===============
diff --git a/searx/engines/command.py b/searx/engines/command.py
index abd29e2a5..ffb87509a 100644
--- a/searx/engines/command.py
+++ b/searx/engines/command.py
@@ -1,6 +1,77 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
-"""
- Command (offline)
+"""With *command engines* administrators can run engines to integrate arbitrary
+shell commands.
+
+.. attention::
+
+ When creating and enabling a ``command`` engine on a public instance, you
+ must be careful to avoid leaking private data.
+
+The easiest solution is to limit the access by setting ``tokens`` as described
+in section :ref:`private engines`. The engine base is flexible. Only your
+imagination can limit the power of this engine (and maybe security concerns).
+
+Configuration
+=============
+
+The following options are available:
+
+``command``:
+ A comma separated list of the elements of the command. A special token
+ ``{{QUERY}}`` tells where to put the search terms of the user. Example:
+
+ .. code:: yaml
+
+ ['ls', '-l', '-h', '{{QUERY}}']
+
+``delimiter``:
+ A mapping containing a delimiter ``char`` and the *titles* of each element in
+ ``keys``.
+
+``parse_regex``:
+ A dict containing the regular expressions for each result key.
+
+``query_type``:
+
+ The expected type of user search terms. Possible values: ``path`` and
+ ``enum``.
+
+ ``path``:
+ Checks if the user provided path is inside the working directory. If not,
+ the query is not executed.
+
+ ``enum``:
+ Is a list of allowed search terms. If the user submits something which is
+ not included in the list, the query returns an error.
+
+``query_enum``:
+ A list containing allowed search terms if ``query_type`` is set to ``enum``.
+
+``working_dir``:
+ The directory where the command has to be executed. Default: ``./``.
+
+``result_separator``:
+ The character that separates results. Default: ``\\n``.
+
+Example
+=======
+
+The example engine below can be used to find files with a specific name in the
+configured working directory:
+
+.. code:: yaml
+
+ - name: find
+ engine: command
+ command: ['find', '.', '-name', '{{QUERY}}']
+ query_type: path
+ shortcut: fnd
+ delimiter:
+ chars: ' '
+ keys: ['line']
+
+Implementations
+===============
"""
import re
diff --git a/searx/engines/elasticsearch.py b/searx/engines/elasticsearch.py
index f6e207b4d..7bddab1cb 100644
--- a/searx/engines/elasticsearch.py
+++ b/searx/engines/elasticsearch.py
@@ -1,6 +1,44 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
-"""
- Elasticsearch
+""".. sidebar:: info
+
+ - :origin:`elasticsearch.py <searx/engines/elasticsearch.py>`
+ - `Elasticsearch <https://www.elastic.co/elasticsearch/>`_
+ - `Elasticsearch Guide
+ <https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html>`_
+ - `Install Elasticsearch
+ <https://www.elastic.co/guide/en/elasticsearch/reference/current/install-elasticsearch.html>`_
+
+Elasticsearch_ supports numerous ways to query the data it is storing. At the
+moment the engine supports the most popular search methods (``query_type``):
+
+- ``match``,
+- ``simple_query_string``,
+- ``term`` and
+- ``terms``.
+
+If none of the methods fit your use case, you can select ``custom`` query type
+and provide the JSON payload to submit to Elasticsearch in
+``custom_query_json``.
+
+Example
+=======
+
+The following is an example configuration for an Elasticsearch_ instance with
+authentication configured to read from ``my-index`` index.
+
+.. code:: yaml
+
+ - name: elasticsearch
+ shortcut: es
+ engine: elasticsearch
+ base_url: http://localhost:9200
+ username: elastic
+ password: changeme
+ index: my-index
+ query_type: match
+ # custom_query_json: '{ ... }'
+ enable_http: true
+
"""
from json import loads, dumps
diff --git a/searx/engines/meilisearch.py b/searx/engines/meilisearch.py
index c41d23eb4..0c2370216 100644
--- a/searx/engines/meilisearch.py
+++ b/searx/engines/meilisearch.py
@@ -1,7 +1,35 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-"""
- Meilisearch
+""".. sidebar:: info
+
+ - :origin:`meilisearch.py <searx/engines/meilisearch.py>`
+ - `MeiliSearch <https://www.meilisearch.com>`_
+ - `MeiliSearch Documentation <https://docs.meilisearch.com/>`_
+ - `Install MeiliSearch
+ <https://docs.meilisearch.com/learn/getting_started/installation.html>`_
+
+MeiliSearch_ is aimed at individuals and small companies. It is designed for
+small-scale (less than 10 million documents) data collections. E.g. it is great
+for storing web pages you have visited and searching in the contents later.
+
+The engine supports faceted search, so you can search in a subset of documents
+of the collection. Furthermore, you can search in MeiliSearch_ instances that
+require authentication by setting ``auth_token``.
+
+Example
+=======
+
+Here is a simple example to query a Meilisearch instance:
+
+.. code:: yaml
+
+ - name: meilisearch
+ engine: meilisearch
+ shortcut: mes
+ base_url: http://localhost:7700
+ index: my-index
+ enable_http: true
+
"""
# pylint: disable=global-statement
diff --git a/searx/engines/mongodb.py b/searx/engines/mongodb.py
index 63452bb68..260d6da97 100644
--- a/searx/engines/mongodb.py
+++ b/searx/engines/mongodb.py
@@ -1,11 +1,53 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-"""MongoDB engine (Offline)
+"""MongoDB_ is a document based database program that handles JSON like data.
+Before configuring the ``mongodb`` engine, you must install the dependency
+pymongo_.
+
+Configuration
+=============
+
+In order to query MongoDB_, you have to select a ``database`` and a
+``collection``. Furthermore, you have to select a ``key`` that is going to be
+searched. MongoDB_ also supports the option ``exact_match_only``, so configure
+it as you wish.
+
+Example
+=======
+
+Below is an example configuration for using a MongoDB collection:
+
+.. code:: yaml
+
+ # MongoDB engine
+ # Required dependency: pymongo
+
+ - name: mymongo
+ engine: mongodb
+ shortcut: md
+ exact_match_only: false
+ host: '127.0.0.1'
+ port: 27017
+ enable_http: true
+ results_per_page: 20
+ database: 'business'
+ collection: 'reviews' # name of the db collection
+ key: 'name' # key in the collection to search for
+
+Implementations
+===============
"""
import re
-from pymongo import MongoClient # pyright: ignore # pylint: disable=import-error
+
+try:
+ from pymongo import MongoClient # type: ignore
+except ImportError:
+ # import error is ignored because the admin has to install pymongo manually
+ # to use the engine
+ pass
+
engine_type = 'offline'
diff --git a/searx/engines/mysql_server.py b/searx/engines/mysql_server.py
index 8d0a49565..82bb37f51 100644
--- a/searx/engines/mysql_server.py
+++ b/searx/engines/mysql_server.py
@@ -1,12 +1,37 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-"""MySQL database (offline)
+"""MySQL is said to be the most popular open source database. Before enabling
+MySQL engine, you must install the package ``mysql-connector-python``.
+
+The authentication plugin is configurable by setting ``auth_plugin`` in the
+attributes. By default it is set to ``caching_sha2_password``.
+
+Example
+=======
+
+This is an example configuration for querying a MySQL server:
+
+.. code:: yaml
+
+ - name: my_database
+ engine: mysql_server
+ database: my_database
+ username: searxng
+ password: password
+ limit: 5
+ query_str: 'SELECT * from my_table WHERE my_column=%(query)s'
+
+Implementations
+===============
"""
-# import error is ignored because the admin has to install mysql manually to use
-# the engine
-import mysql.connector # pyright: ignore # pylint: disable=import-error
+try:
+ import mysql.connector # type: ignore
+except ImportError:
+ # import error is ignored because the admin has to install mysql manually to use
+ # the engine
+ pass
engine_type = 'offline'
auth_plugin = 'caching_sha2_password'
diff --git a/searx/engines/postgresql.py b/searx/engines/postgresql.py
index d7ff6a11b..c0277207c 100644
--- a/searx/engines/postgresql.py
+++ b/searx/engines/postgresql.py
@@ -1,12 +1,33 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-"""PostgreSQL database (offline)
+"""PostgreSQL is a powerful and robust open source database. Before configuring
+the PostgreSQL engine, you must install the dependency ``psychopg2``.
+
+Example
+=======
+
+Below is an example configuration:
+
+.. code:: yaml
+
+ - name: my_database
+ engine: postgresql
+ database: my_database
+ username: searxng
+ password: password
+ query_str: 'SELECT * from my_table WHERE my_column = %(query)s'
+
+Implementations
+===============
"""
-# import error is ignored because the admin has to install mysql manually to use
-# the engine
-import psycopg2 # pyright: ignore # pylint: disable=import-error
+try:
+ import psycopg2 # type: ignore
+except ImportError:
+ # import error is ignored because the admin has to install postgresql
+ # manually to use the engine.
+ pass
engine_type = 'offline'
host = "127.0.0.1"
diff --git a/searx/engines/recoll.py b/searx/engines/recoll.py
index ebcd83b8d..c11e197ed 100644
--- a/searx/engines/recoll.py
+++ b/searx/engines/recoll.py
@@ -1,6 +1,51 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
-"""
- Recoll (local search engine)
+# lint: pylint
+""".. sidebar:: info
+
+ - `Recoll <https://www.lesbonscomptes.com/recoll/>`_
+ - `recoll-webui <https://framagit.org/medoc92/recollwebui.git>`_
+ - :origin:`searx/engines/recoll.py`
+
+Recoll_ is a desktop full-text search tool based on Xapian. By itself Recoll_
+does not offer WEB or API access, this can be achieved using recoll-webui_
+
+Configuration
+=============
+
+You must configure the following settings:
+
+``base_url``:
+ Location where recoll-webui can be reached.
+
+``mount_prefix``:
+ Location where the file hierarchy is mounted on your *local* filesystem.
+
+``dl_prefix``:
+ Location where the file hierarchy as indexed by recoll can be reached.
+
+``search_dir``:
+ Part of the indexed file hierarchy to be search, if empty the full domain is
+ searched.
+
+Example
+=======
+
+Scenario:
+
+#. Recoll indexes a local filesystem mounted in ``/export/documents/reference``,
+#. the Recoll search interface can be reached at https://recoll.example.org/ and
+#. the contents of this filesystem can be reached though https://download.example.org/reference
+
+.. code:: yaml
+
+ base_url: https://recoll.example.org/
+ mount_prefix: /export/documents
+ dl_prefix: https://download.example.org
+ search_dir: ''
+
+Implementations
+===============
+
"""
from datetime import date, timedelta
@@ -33,7 +78,7 @@ embedded_url = '<{ttype} controls height="166px" ' + 'src="{url}" type="{mtype}"
# helper functions
def get_time_range(time_range):
- sw = {'day': 1, 'week': 7, 'month': 30, 'year': 365}
+ sw = {'day': 1, 'week': 7, 'month': 30, 'year': 365} # pylint: disable=invalid-name
offset = sw.get(time_range, 0)
if not offset:
diff --git a/searx/engines/redis_server.py b/searx/engines/redis_server.py
index 03786f81d..980812509 100644
--- a/searx/engines/redis_server.py
+++ b/searx/engines/redis_server.py
@@ -1,6 +1,37 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-"""Redis engine (offline)
+"""Redis is an open source (BSD licensed), in-memory data structure (key value
+based) store. Before configuring the ``redis_server`` engine, you must install
+the dependency redis_.
+
+Configuration
+=============
+
+Select a database to search in and set its index in the option ``db``. You can
+either look for exact matches or use partial keywords to find what you are
+looking for by configuring ``exact_match_only``.
+
+Example
+=======
+
+Below is an example configuration:
+
+.. code:: yaml
+
+ # Required dependency: redis
+
+ - name: myredis
+ shortcut : rds
+ engine: redis_server
+ exact_match_only: false
+ host: '127.0.0.1'
+ port: 6379
+ enable_http: true
+ password: ''
+ db: 0
+
+Implementations
+===============
"""
diff --git a/searx/engines/solr.py b/searx/engines/solr.py
index 3e7846f8e..85ed42cf9 100644
--- a/searx/engines/solr.py
+++ b/searx/engines/solr.py
@@ -1,7 +1,31 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-"""
- Solr
+""".. sidebar:: info
+
+ - :origin:`solr.py <searx/engines/solr.py>`
+ - `Solr <https://solr.apache.org>`_
+ - `Solr Resources <https://solr.apache.org/resources.html>`_
+ - `Install Solr <https://solr.apache.org/guide/installing-solr.html>`_
+
+Solr_ is a popular search engine based on Lucene, just like Elasticsearch_. But
+instead of searching in indices, you can search in collections.
+
+Example
+=======
+
+This is an example configuration for searching in the collection
+``my-collection`` and get the results in ascending order.
+
+.. code:: yaml
+
+ - name: solr
+ engine: solr
+ shortcut: slr
+ base_url: http://localhost:8983
+ collection: my-collection
+ sort: asc
+ enable_http: true
+
"""
# pylint: disable=global-statement
diff --git a/searx/engines/sqlite.py b/searx/engines/sqlite.py
index 6de12f5fe..c86df5867 100644
--- a/searx/engines/sqlite.py
+++ b/searx/engines/sqlite.py
@@ -1,7 +1,40 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
+"""SQLite is a small, fast and reliable SQL database engine. It does not require
+any extra dependency.
-"""SQLite database (Offline)
+Example
+=======
+
+.. _MediathekView: https://mediathekview.de/
+
+To demonstrate the power of database engines, here is a more complex example
+which reads from a MediathekView_ (DE) movie database. For this example of the
+SQlite engine download the database:
+
+- https://liste.mediathekview.de/filmliste-v2.db.bz2
+
+and unpack into ``searx/data/filmliste-v2.db``. To search the database use e.g
+Query to test: ``!mediathekview concert``
+
+.. code:: yaml
+
+ - name: mediathekview
+ engine: sqlite
+ disabled: False
+ categories: general
+ result_template: default.html
+ database: searx/data/filmliste-v2.db
+ query_str: >-
+ SELECT title || ' (' || time(duration, 'unixepoch') || ')' AS title,
+ COALESCE( NULLIF(url_video_hd,''), NULLIF(url_video_sd,''), url_video) AS url,
+ description AS content
+ FROM film
+ WHERE title LIKE :wildcard OR description LIKE :wildcard
+ ORDER BY duration DESC
+
+Implementations
+===============
"""
@@ -26,14 +59,15 @@ def init(engine_settings):
@contextlib.contextmanager
def sqlite_cursor():
- """Implements a `Context Manager`_ for a :py:obj:`sqlite3.Cursor`.
+ """Implements a :py:obj:`Context Manager <contextlib.contextmanager>` for a
+ :py:obj:`sqlite3.Cursor`.
- Open database in read only mode: if the database doesn't exist.
- The default mode creates an empty file on the file system.
+ Open database in read only mode: if the database doesn't exist. The default
+ mode creates an empty file on the file system. See:
- see:
* https://docs.python.org/3/library/sqlite3.html#sqlite3.connect
* https://www.sqlite.org/uri.html
+
"""
uri = 'file:' + database + '?mode=ro'
with contextlib.closing(sqlite3.connect(uri, uri=True)) as connect:
diff --git a/searx/engines/torznab.py b/searx/engines/torznab.py
index dc24919b5..0692d4a7a 100644
--- a/searx/engines/torznab.py
+++ b/searx/engines/torznab.py
@@ -1,17 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-""".. _torznab engine:
-
-==============
-Torznab WebAPI
-==============
-
-.. contents:: Contents
- :depth: 2
- :local:
- :backlinks: entry
-
-Torznab_ is an API specification that provides a standardized way to query
+"""Torznab_ is an API specification that provides a standardized way to query
torrent site for content. It is used by a number of torrent applications,
including Prowlarr_ and Jackett_.
@@ -55,7 +44,6 @@ The engine has the following settings:
.. _Jackett-categories:
https://github.com/Jackett/Jackett/wiki/Jackett-Categories
-
Implementations
===============
diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py
index 2dc22028f..51ddcda78 100644
--- a/searx/engines/xpath.py
+++ b/searx/engines/xpath.py
@@ -3,8 +3,55 @@
"""The XPath engine is a *generic* engine with which it is possible to configure
engines in the settings.
-Here is a simple example of a XPath engine configured in the
-:ref:`settings engine` section, further read :ref:`engines-dev`.
+.. _XPath selector: https://quickref.me/xpath.html#xpath-selectors
+
+Configuration
+=============
+
+Request:
+
+- :py:obj:`search_url`
+- :py:obj:`lang_all`
+- :py:obj:`soft_max_redirects`
+- :py:obj:`cookies`
+- :py:obj:`headers`
+
+Paging:
+
+- :py:obj:`paging`
+- :py:obj:`page_size`
+- :py:obj:`first_page_num`
+
+Time Range:
+
+- :py:obj:`time_range_support`
+- :py:obj:`time_range_url`
+- :py:obj:`time_range_map`
+
+Safe-Search:
+
+- :py:obj:`safe_search_support`
+- :py:obj:`safe_search_map`
+
+Response:
+
+- :py:obj:`no_result_for_http_status`
+
+`XPath selector`_:
+
+- :py:obj:`results_xpath`
+- :py:obj:`url_xpath`
+- :py:obj:`title_xpath`
+- :py:obj:`content_xpath`
+- :py:obj:`thumbnail_xpath`
+- :py:obj:`suggestion_xpath`
+
+
+Example
+=======
+
+Here is a simple example of a XPath engine configured in the :ref:`settings
+engine` section, further read :ref:`engines-dev`.
.. code:: yaml
@@ -16,6 +63,9 @@ Here is a simple example of a XPath engine configured in the
title_xpath : //article[@class="repo-summary"]//a[@class="repo-link"]
content_xpath : //article[@class="repo-summary"]/p
+Implementations
+===============
+
"""
from urllib.parse import urlencode
@@ -26,7 +76,7 @@ from searx.network import raise_for_httperror
search_url = None
"""
-Search URL of the engine. Example::
+Search URL of the engine. Example::
https://example.org/?search={query}&page={pageno}{time_range}{safe_search}
@@ -74,30 +124,33 @@ soft_max_redirects = 0
'''Maximum redirects, soft limit. Record an error but don't stop the engine'''
results_xpath = ''
-'''XPath selector for the list of result items'''
+'''`XPath selector`_ for the list of result items'''
url_xpath = None
-'''XPath selector of result's ``url``.'''
+'''`XPath selector`_ of result's ``url``.'''
content_xpath = None
-'''XPath selector of result's ``content``.'''
+'''`XPath selector`_ of result's ``content``.'''
title_xpath = None
-'''XPath selector of result's ``title``.'''
+'''`XPath selector`_ of result's ``title``.'''
thumbnail_xpath = False
-'''XPath selector of result's ``img_src``.'''
+'''`XPath selector`_ of result's ``img_src``.'''
suggestion_xpath = ''
-'''XPath selector of result's ``suggestion``.'''
+'''`XPath selector`_ of result's ``suggestion``.'''
cached_xpath = ''
cached_url = ''
cookies = {}
+'''Some engines might offer different result based on cookies.
+Possible use-case: To set safesearch cookie.'''
+
headers = {}
-'''Some engines might offer different result based on cookies or headers.
-Possible use-case: To set safesearch cookie or header to moderate.'''
+'''Some engines might offer different result based headers. Possible use-case:
+To set header to moderate.'''
paging = False
'''Engine supports paging [True or False].'''