diff options
Diffstat (limited to 'searx')
-rw-r--r-- | searx/enginelib/__init__.py | 17 | ||||
-rw-r--r-- | searx/engines/__init__.py | 4 | ||||
-rw-r--r-- | searx/engines/annas_archive.py | 19 | ||||
-rw-r--r-- | searx/engines/command.py | 75 | ||||
-rw-r--r-- | searx/engines/elasticsearch.py | 42 | ||||
-rw-r--r-- | searx/engines/meilisearch.py | 32 | ||||
-rw-r--r-- | searx/engines/mongodb.py | 46 | ||||
-rw-r--r-- | searx/engines/mysql_server.py | 33 | ||||
-rw-r--r-- | searx/engines/postgresql.py | 29 | ||||
-rw-r--r-- | searx/engines/recoll.py | 51 | ||||
-rw-r--r-- | searx/engines/redis_server.py | 33 | ||||
-rw-r--r-- | searx/engines/solr.py | 28 | ||||
-rw-r--r-- | searx/engines/sqlite.py | 44 | ||||
-rw-r--r-- | searx/engines/torznab.py | 14 | ||||
-rw-r--r-- | searx/engines/xpath.py | 75 |
15 files changed, 462 insertions, 80 deletions
diff --git a/searx/enginelib/__init__.py b/searx/enginelib/__init__.py index fd3019e6c..6a0bb67c5 100644 --- a/searx/enginelib/__init__.py +++ b/searx/enginelib/__init__.py @@ -1,18 +1,15 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""Engine related implementations +"""Implementations of the framework for the SearXNG engines. -.. note:: +.. hint:: - The long term goal is to modularize all relevant implementations to the - engines here in this Python package. In addition to improved modularization, - this will also be necessary in part because the probability of circular - imports will increase due to the increased typification of implementations in - the future. + The long term goal is to modularize all implementations of the engine + framework here in this Python package. ToDo: - ToDo: + - move implementations of the :ref:`searx.engines loader` to a new module in + the :py:obj:`searx.enginelib` namespace. - - move :py:obj:`searx.engines.load_engine` to a new module `searx.enginelib`. """ @@ -36,7 +33,7 @@ class Engine: # pylint: disable=too-few-public-methods # Common options in the engine module engine_type: str - """Type of the engine (:origin:`searx/search/processors`)""" + """Type of the engine (:ref:`searx.search.processors`)""" paging: bool """Engine supports multiple pages.""" diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index e9e9f87c9..da2b2037e 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -1,8 +1,6 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""This module implements the engine loader. - -Load and initialize the ``engines``, see :py:func:`load_engines` and register +"""Load and initialize the ``engines``, see :py:func:`load_engines` and register :py:obj:`engine_shortcuts`. usage:: diff --git a/searx/engines/annas_archive.py b/searx/engines/annas_archive.py index db9bd1719..1bcdeeec6 100644 --- a/searx/engines/annas_archive.py +++ b/searx/engines/annas_archive.py @@ -1,24 +1,12 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -""".. _annas_archive engine: - -============== -Anna's Archive -============== +"""`Anna's Archive`_ is a free non-profit online shadow library metasearch +engine providing access to a variety of book resources (also via IPFS), created +by a team of anonymous archivists (AnnaArchivist_). .. _Anna's Archive: https://annas-archive.org/ .. _AnnaArchivist: https://annas-software.org/AnnaArchivist/annas-archive -`Anna's Archive`_ is a free non-profit online shadow library metasearch engine -providing access to a variety of book resources (also via IPFS), created by a -team of anonymous archivists (AnnaArchivist_). - -.. contents:: Contents - :depth: 2 - :local: - :backlinks: entry - - Configuration ============= @@ -41,7 +29,6 @@ for *newest* articles and journals (PDF) / by shortcut ``!aaa <search-term>``. aa_ext: 'pdf' aa_sort: 'newest' - Implementations =============== diff --git a/searx/engines/command.py b/searx/engines/command.py index abd29e2a5..ffb87509a 100644 --- a/searx/engines/command.py +++ b/searx/engines/command.py @@ -1,6 +1,77 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -""" - Command (offline) +"""With *command engines* administrators can run engines to integrate arbitrary +shell commands. + +.. attention:: + + When creating and enabling a ``command`` engine on a public instance, you + must be careful to avoid leaking private data. + +The easiest solution is to limit the access by setting ``tokens`` as described +in section :ref:`private engines`. The engine base is flexible. Only your +imagination can limit the power of this engine (and maybe security concerns). + +Configuration +============= + +The following options are available: + +``command``: + A comma separated list of the elements of the command. A special token + ``{{QUERY}}`` tells where to put the search terms of the user. Example: + + .. code:: yaml + + ['ls', '-l', '-h', '{{QUERY}}'] + +``delimiter``: + A mapping containing a delimiter ``char`` and the *titles* of each element in + ``keys``. + +``parse_regex``: + A dict containing the regular expressions for each result key. + +``query_type``: + + The expected type of user search terms. Possible values: ``path`` and + ``enum``. + + ``path``: + Checks if the user provided path is inside the working directory. If not, + the query is not executed. + + ``enum``: + Is a list of allowed search terms. If the user submits something which is + not included in the list, the query returns an error. + +``query_enum``: + A list containing allowed search terms if ``query_type`` is set to ``enum``. + +``working_dir``: + The directory where the command has to be executed. Default: ``./``. + +``result_separator``: + The character that separates results. Default: ``\\n``. + +Example +======= + +The example engine below can be used to find files with a specific name in the +configured working directory: + +.. code:: yaml + + - name: find + engine: command + command: ['find', '.', '-name', '{{QUERY}}'] + query_type: path + shortcut: fnd + delimiter: + chars: ' ' + keys: ['line'] + +Implementations +=============== """ import re diff --git a/searx/engines/elasticsearch.py b/searx/engines/elasticsearch.py index f6e207b4d..7bddab1cb 100644 --- a/searx/engines/elasticsearch.py +++ b/searx/engines/elasticsearch.py @@ -1,6 +1,44 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -""" - Elasticsearch +""".. sidebar:: info + + - :origin:`elasticsearch.py <searx/engines/elasticsearch.py>` + - `Elasticsearch <https://www.elastic.co/elasticsearch/>`_ + - `Elasticsearch Guide + <https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html>`_ + - `Install Elasticsearch + <https://www.elastic.co/guide/en/elasticsearch/reference/current/install-elasticsearch.html>`_ + +Elasticsearch_ supports numerous ways to query the data it is storing. At the +moment the engine supports the most popular search methods (``query_type``): + +- ``match``, +- ``simple_query_string``, +- ``term`` and +- ``terms``. + +If none of the methods fit your use case, you can select ``custom`` query type +and provide the JSON payload to submit to Elasticsearch in +``custom_query_json``. + +Example +======= + +The following is an example configuration for an Elasticsearch_ instance with +authentication configured to read from ``my-index`` index. + +.. code:: yaml + + - name: elasticsearch + shortcut: es + engine: elasticsearch + base_url: http://localhost:9200 + username: elastic + password: changeme + index: my-index + query_type: match + # custom_query_json: '{ ... }' + enable_http: true + """ from json import loads, dumps diff --git a/searx/engines/meilisearch.py b/searx/engines/meilisearch.py index c41d23eb4..0c2370216 100644 --- a/searx/engines/meilisearch.py +++ b/searx/engines/meilisearch.py @@ -1,7 +1,35 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -""" - Meilisearch +""".. sidebar:: info + + - :origin:`meilisearch.py <searx/engines/meilisearch.py>` + - `MeiliSearch <https://www.meilisearch.com>`_ + - `MeiliSearch Documentation <https://docs.meilisearch.com/>`_ + - `Install MeiliSearch + <https://docs.meilisearch.com/learn/getting_started/installation.html>`_ + +MeiliSearch_ is aimed at individuals and small companies. It is designed for +small-scale (less than 10 million documents) data collections. E.g. it is great +for storing web pages you have visited and searching in the contents later. + +The engine supports faceted search, so you can search in a subset of documents +of the collection. Furthermore, you can search in MeiliSearch_ instances that +require authentication by setting ``auth_token``. + +Example +======= + +Here is a simple example to query a Meilisearch instance: + +.. code:: yaml + + - name: meilisearch + engine: meilisearch + shortcut: mes + base_url: http://localhost:7700 + index: my-index + enable_http: true + """ # pylint: disable=global-statement diff --git a/searx/engines/mongodb.py b/searx/engines/mongodb.py index 63452bb68..260d6da97 100644 --- a/searx/engines/mongodb.py +++ b/searx/engines/mongodb.py @@ -1,11 +1,53 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""MongoDB engine (Offline) +"""MongoDB_ is a document based database program that handles JSON like data. +Before configuring the ``mongodb`` engine, you must install the dependency +pymongo_. + +Configuration +============= + +In order to query MongoDB_, you have to select a ``database`` and a +``collection``. Furthermore, you have to select a ``key`` that is going to be +searched. MongoDB_ also supports the option ``exact_match_only``, so configure +it as you wish. + +Example +======= + +Below is an example configuration for using a MongoDB collection: + +.. code:: yaml + + # MongoDB engine + # Required dependency: pymongo + + - name: mymongo + engine: mongodb + shortcut: md + exact_match_only: false + host: '127.0.0.1' + port: 27017 + enable_http: true + results_per_page: 20 + database: 'business' + collection: 'reviews' # name of the db collection + key: 'name' # key in the collection to search for + +Implementations +=============== """ import re -from pymongo import MongoClient # pyright: ignore # pylint: disable=import-error + +try: + from pymongo import MongoClient # type: ignore +except ImportError: + # import error is ignored because the admin has to install pymongo manually + # to use the engine + pass + engine_type = 'offline' diff --git a/searx/engines/mysql_server.py b/searx/engines/mysql_server.py index 8d0a49565..82bb37f51 100644 --- a/searx/engines/mysql_server.py +++ b/searx/engines/mysql_server.py @@ -1,12 +1,37 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""MySQL database (offline) +"""MySQL is said to be the most popular open source database. Before enabling +MySQL engine, you must install the package ``mysql-connector-python``. + +The authentication plugin is configurable by setting ``auth_plugin`` in the +attributes. By default it is set to ``caching_sha2_password``. + +Example +======= + +This is an example configuration for querying a MySQL server: + +.. code:: yaml + + - name: my_database + engine: mysql_server + database: my_database + username: searxng + password: password + limit: 5 + query_str: 'SELECT * from my_table WHERE my_column=%(query)s' + +Implementations +=============== """ -# import error is ignored because the admin has to install mysql manually to use -# the engine -import mysql.connector # pyright: ignore # pylint: disable=import-error +try: + import mysql.connector # type: ignore +except ImportError: + # import error is ignored because the admin has to install mysql manually to use + # the engine + pass engine_type = 'offline' auth_plugin = 'caching_sha2_password' diff --git a/searx/engines/postgresql.py b/searx/engines/postgresql.py index d7ff6a11b..c0277207c 100644 --- a/searx/engines/postgresql.py +++ b/searx/engines/postgresql.py @@ -1,12 +1,33 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""PostgreSQL database (offline) +"""PostgreSQL is a powerful and robust open source database. Before configuring +the PostgreSQL engine, you must install the dependency ``psychopg2``. + +Example +======= + +Below is an example configuration: + +.. code:: yaml + + - name: my_database + engine: postgresql + database: my_database + username: searxng + password: password + query_str: 'SELECT * from my_table WHERE my_column = %(query)s' + +Implementations +=============== """ -# import error is ignored because the admin has to install mysql manually to use -# the engine -import psycopg2 # pyright: ignore # pylint: disable=import-error +try: + import psycopg2 # type: ignore +except ImportError: + # import error is ignored because the admin has to install postgresql + # manually to use the engine. + pass engine_type = 'offline' host = "127.0.0.1" diff --git a/searx/engines/recoll.py b/searx/engines/recoll.py index ebcd83b8d..c11e197ed 100644 --- a/searx/engines/recoll.py +++ b/searx/engines/recoll.py @@ -1,6 +1,51 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -""" - Recoll (local search engine) +# lint: pylint +""".. sidebar:: info + + - `Recoll <https://www.lesbonscomptes.com/recoll/>`_ + - `recoll-webui <https://framagit.org/medoc92/recollwebui.git>`_ + - :origin:`searx/engines/recoll.py` + +Recoll_ is a desktop full-text search tool based on Xapian. By itself Recoll_ +does not offer WEB or API access, this can be achieved using recoll-webui_ + +Configuration +============= + +You must configure the following settings: + +``base_url``: + Location where recoll-webui can be reached. + +``mount_prefix``: + Location where the file hierarchy is mounted on your *local* filesystem. + +``dl_prefix``: + Location where the file hierarchy as indexed by recoll can be reached. + +``search_dir``: + Part of the indexed file hierarchy to be search, if empty the full domain is + searched. + +Example +======= + +Scenario: + +#. Recoll indexes a local filesystem mounted in ``/export/documents/reference``, +#. the Recoll search interface can be reached at https://recoll.example.org/ and +#. the contents of this filesystem can be reached though https://download.example.org/reference + +.. code:: yaml + + base_url: https://recoll.example.org/ + mount_prefix: /export/documents + dl_prefix: https://download.example.org + search_dir: '' + +Implementations +=============== + """ from datetime import date, timedelta @@ -33,7 +78,7 @@ embedded_url = '<{ttype} controls height="166px" ' + 'src="{url}" type="{mtype}" # helper functions def get_time_range(time_range): - sw = {'day': 1, 'week': 7, 'month': 30, 'year': 365} + sw = {'day': 1, 'week': 7, 'month': 30, 'year': 365} # pylint: disable=invalid-name offset = sw.get(time_range, 0) if not offset: diff --git a/searx/engines/redis_server.py b/searx/engines/redis_server.py index 03786f81d..980812509 100644 --- a/searx/engines/redis_server.py +++ b/searx/engines/redis_server.py @@ -1,6 +1,37 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""Redis engine (offline) +"""Redis is an open source (BSD licensed), in-memory data structure (key value +based) store. Before configuring the ``redis_server`` engine, you must install +the dependency redis_. + +Configuration +============= + +Select a database to search in and set its index in the option ``db``. You can +either look for exact matches or use partial keywords to find what you are +looking for by configuring ``exact_match_only``. + +Example +======= + +Below is an example configuration: + +.. code:: yaml + + # Required dependency: redis + + - name: myredis + shortcut : rds + engine: redis_server + exact_match_only: false + host: '127.0.0.1' + port: 6379 + enable_http: true + password: '' + db: 0 + +Implementations +=============== """ diff --git a/searx/engines/solr.py b/searx/engines/solr.py index 3e7846f8e..85ed42cf9 100644 --- a/searx/engines/solr.py +++ b/searx/engines/solr.py @@ -1,7 +1,31 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -""" - Solr +""".. sidebar:: info + + - :origin:`solr.py <searx/engines/solr.py>` + - `Solr <https://solr.apache.org>`_ + - `Solr Resources <https://solr.apache.org/resources.html>`_ + - `Install Solr <https://solr.apache.org/guide/installing-solr.html>`_ + +Solr_ is a popular search engine based on Lucene, just like Elasticsearch_. But +instead of searching in indices, you can search in collections. + +Example +======= + +This is an example configuration for searching in the collection +``my-collection`` and get the results in ascending order. + +.. code:: yaml + + - name: solr + engine: solr + shortcut: slr + base_url: http://localhost:8983 + collection: my-collection + sort: asc + enable_http: true + """ # pylint: disable=global-statement diff --git a/searx/engines/sqlite.py b/searx/engines/sqlite.py index 6de12f5fe..c86df5867 100644 --- a/searx/engines/sqlite.py +++ b/searx/engines/sqlite.py @@ -1,7 +1,40 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint +"""SQLite is a small, fast and reliable SQL database engine. It does not require +any extra dependency. -"""SQLite database (Offline) +Example +======= + +.. _MediathekView: https://mediathekview.de/ + +To demonstrate the power of database engines, here is a more complex example +which reads from a MediathekView_ (DE) movie database. For this example of the +SQlite engine download the database: + +- https://liste.mediathekview.de/filmliste-v2.db.bz2 + +and unpack into ``searx/data/filmliste-v2.db``. To search the database use e.g +Query to test: ``!mediathekview concert`` + +.. code:: yaml + + - name: mediathekview + engine: sqlite + disabled: False + categories: general + result_template: default.html + database: searx/data/filmliste-v2.db + query_str: >- + SELECT title || ' (' || time(duration, 'unixepoch') || ')' AS title, + COALESCE( NULLIF(url_video_hd,''), NULLIF(url_video_sd,''), url_video) AS url, + description AS content + FROM film + WHERE title LIKE :wildcard OR description LIKE :wildcard + ORDER BY duration DESC + +Implementations +=============== """ @@ -26,14 +59,15 @@ def init(engine_settings): @contextlib.contextmanager def sqlite_cursor(): - """Implements a `Context Manager`_ for a :py:obj:`sqlite3.Cursor`. + """Implements a :py:obj:`Context Manager <contextlib.contextmanager>` for a + :py:obj:`sqlite3.Cursor`. - Open database in read only mode: if the database doesn't exist. - The default mode creates an empty file on the file system. + Open database in read only mode: if the database doesn't exist. The default + mode creates an empty file on the file system. See: - see: * https://docs.python.org/3/library/sqlite3.html#sqlite3.connect * https://www.sqlite.org/uri.html + """ uri = 'file:' + database + '?mode=ro' with contextlib.closing(sqlite3.connect(uri, uri=True)) as connect: diff --git a/searx/engines/torznab.py b/searx/engines/torznab.py index dc24919b5..0692d4a7a 100644 --- a/searx/engines/torznab.py +++ b/searx/engines/torznab.py @@ -1,17 +1,6 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -""".. _torznab engine: - -============== -Torznab WebAPI -============== - -.. contents:: Contents - :depth: 2 - :local: - :backlinks: entry - -Torznab_ is an API specification that provides a standardized way to query +"""Torznab_ is an API specification that provides a standardized way to query torrent site for content. It is used by a number of torrent applications, including Prowlarr_ and Jackett_. @@ -55,7 +44,6 @@ The engine has the following settings: .. _Jackett-categories: https://github.com/Jackett/Jackett/wiki/Jackett-Categories - Implementations =============== diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index 2dc22028f..51ddcda78 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -3,8 +3,55 @@ """The XPath engine is a *generic* engine with which it is possible to configure engines in the settings. -Here is a simple example of a XPath engine configured in the -:ref:`settings engine` section, further read :ref:`engines-dev`. +.. _XPath selector: https://quickref.me/xpath.html#xpath-selectors + +Configuration +============= + +Request: + +- :py:obj:`search_url` +- :py:obj:`lang_all` +- :py:obj:`soft_max_redirects` +- :py:obj:`cookies` +- :py:obj:`headers` + +Paging: + +- :py:obj:`paging` +- :py:obj:`page_size` +- :py:obj:`first_page_num` + +Time Range: + +- :py:obj:`time_range_support` +- :py:obj:`time_range_url` +- :py:obj:`time_range_map` + +Safe-Search: + +- :py:obj:`safe_search_support` +- :py:obj:`safe_search_map` + +Response: + +- :py:obj:`no_result_for_http_status` + +`XPath selector`_: + +- :py:obj:`results_xpath` +- :py:obj:`url_xpath` +- :py:obj:`title_xpath` +- :py:obj:`content_xpath` +- :py:obj:`thumbnail_xpath` +- :py:obj:`suggestion_xpath` + + +Example +======= + +Here is a simple example of a XPath engine configured in the :ref:`settings +engine` section, further read :ref:`engines-dev`. .. code:: yaml @@ -16,6 +63,9 @@ Here is a simple example of a XPath engine configured in the title_xpath : //article[@class="repo-summary"]//a[@class="repo-link"] content_xpath : //article[@class="repo-summary"]/p +Implementations +=============== + """ from urllib.parse import urlencode @@ -26,7 +76,7 @@ from searx.network import raise_for_httperror search_url = None """ -Search URL of the engine. Example:: +Search URL of the engine. Example:: https://example.org/?search={query}&page={pageno}{time_range}{safe_search} @@ -74,30 +124,33 @@ soft_max_redirects = 0 '''Maximum redirects, soft limit. Record an error but don't stop the engine''' results_xpath = '' -'''XPath selector for the list of result items''' +'''`XPath selector`_ for the list of result items''' url_xpath = None -'''XPath selector of result's ``url``.''' +'''`XPath selector`_ of result's ``url``.''' content_xpath = None -'''XPath selector of result's ``content``.''' +'''`XPath selector`_ of result's ``content``.''' title_xpath = None -'''XPath selector of result's ``title``.''' +'''`XPath selector`_ of result's ``title``.''' thumbnail_xpath = False -'''XPath selector of result's ``img_src``.''' +'''`XPath selector`_ of result's ``img_src``.''' suggestion_xpath = '' -'''XPath selector of result's ``suggestion``.''' +'''`XPath selector`_ of result's ``suggestion``.''' cached_xpath = '' cached_url = '' cookies = {} +'''Some engines might offer different result based on cookies. +Possible use-case: To set safesearch cookie.''' + headers = {} -'''Some engines might offer different result based on cookies or headers. -Possible use-case: To set safesearch cookie or header to moderate.''' +'''Some engines might offer different result based headers. Possible use-case: +To set header to moderate.''' paging = False '''Engine supports paging [True or False].''' |