From 2a29e16d25ae19d9216b959f177cdcd71c43511e Mon Sep 17 00:00:00 2001 From: Grant Lanham Date: Sun, 29 Sep 2024 20:56:59 -0400 Subject: [feat] implement mariadb engine --- searx/engines/mariadb_server.py | 79 +++++++++++++++++++++++++++ searx/settings.yml | 10 ++++ tests/unit/test_engine_mariadb_server.py | 44 +++++++++++++++ tests/unit/test_engine_tineye.py | 94 ++++++++++++++++++++++++++++++++ tests/unit/test_tineye.py | 94 -------------------------------- 5 files changed, 227 insertions(+), 94 deletions(-) create mode 100644 searx/engines/mariadb_server.py create mode 100644 tests/unit/test_engine_mariadb_server.py create mode 100644 tests/unit/test_engine_tineye.py delete mode 100644 tests/unit/test_tineye.py diff --git a/searx/engines/mariadb_server.py b/searx/engines/mariadb_server.py new file mode 100644 index 000000000..7cf7eec33 --- /dev/null +++ b/searx/engines/mariadb_server.py @@ -0,0 +1,79 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""MariaDB is a community driven fork of MySQL. Before enabling MariaDB engine, +you must the install the pip package ``mariadb`` along with the necessary +prerequities. + +`See the following documentation for more details +`_ + +Example +======= + +This is an example configuration for querying a MariaDB server: + +.. code:: yaml + + - name: my_database + engine: mariadb_server + database: my_database + username: searxng + password: password + limit: 5 + query_str: 'SELECT * from my_table WHERE my_column=%(query)s' + +""" + +from typing import TYPE_CHECKING + +try: + import mariadb +except ImportError: + # import error is ignored because the admin has to install mysql manually to use + # the engine + pass + +if TYPE_CHECKING: + import logging + + logger = logging.getLogger() + + +engine_type = 'offline' +host = "127.0.0.1" +port = 3306 +database = "" +username = "" +password = "" +query_str = "" +limit = 10 +paging = True +result_template = 'key-value.html' +_connection = None + + +def init(engine_settings): + global _connection # pylint: disable=global-statement + + if 'query_str' not in engine_settings: + raise ValueError('query_str cannot be empty') + + if not engine_settings['query_str'].lower().startswith('select '): + raise ValueError('only SELECT query is supported') + + _connection = mariadb.connect(database=database, user=username, password=password, host=host, port=port) + + +def search(query, params): + query_params = {'query': query} + query_to_run = query_str + ' LIMIT {0} OFFSET {1}'.format(limit, (params['pageno'] - 1) * limit) + logger.debug("SQL Query: %s", query_to_run) + + with _connection.cursor() as cur: + cur.execute(query_to_run, query_params) + results = [] + col_names = [i[0] for i in cur.description] + for res in cur: + result = dict(zip(col_names, map(str, res))) + result['template'] = result_template + results.append(result) + return results diff --git a/searx/settings.yml b/searx/settings.yml index 44245ab7c..8b264eaf6 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -2031,6 +2031,16 @@ engines: # query_str: 'SELECT * from mytable WHERE fieldname=%(query)s' # shortcut: mysql + # Required dependency: mariadb + # - name: mariadb + # engine: mariadb_server + # database: mydatabase + # username: user + # password: pass + # limit: 10 + # query_str: 'SELECT * from mytable WHERE fieldname=%(query)s' + # shortcut: mdb + - name: 1337x engine: 1337x shortcut: 1337x diff --git a/tests/unit/test_engine_mariadb_server.py b/tests/unit/test_engine_mariadb_server.py new file mode 100644 index 000000000..423132e34 --- /dev/null +++ b/tests/unit/test_engine_mariadb_server.py @@ -0,0 +1,44 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# pylint: disable=missing-module-docstring + +from unittest.mock import MagicMock, Mock +from searx.engines import load_engines, mariadb_server +from tests import SearxTestCase + + +class MariadbServerTests(SearxTestCase): # pylint: disable=missing-class-docstring + def setUp(self): + load_engines( + [ + { + 'name': 'mariadb server', + 'engine': 'mariadb_server', + 'shortcut': 'mdb', + 'timeout': 9.0, + 'disabled': True, + } + ] + ) + + def tearDown(self): + load_engines([]) + + def test_init_no_query_str_raises(self): + self.assertRaises(ValueError, lambda: mariadb_server.init({})) + + def test_init_non_select_raises(self): + self.assertRaises(ValueError, lambda: mariadb_server.init({'query_str': 'foobar'})) + + def test_search_returns_results(self): + test_string = 'FOOBAR' + cursor_mock = MagicMock() + with cursor_mock as setup: # pylint: disable=not-context-manager + setup.__iter__ = Mock(return_value=iter([{test_string, 1}])) + setup.description = [[test_string]] + conn_mock = Mock() + conn_mock.cursor.return_value = cursor_mock + mariadb_server._connection = conn_mock # pylint: disable=protected-access + results = mariadb_server.search(test_string, {'pageno': 1}) + self.assertEqual(1, len(results)) + self.assertIn(test_string, results[0]) + self.assertEqual(mariadb_server.result_template, results[0]['template']) diff --git a/tests/unit/test_engine_tineye.py b/tests/unit/test_engine_tineye.py new file mode 100644 index 000000000..5855a7313 --- /dev/null +++ b/tests/unit/test_engine_tineye.py @@ -0,0 +1,94 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# pylint: disable=missing-module-docstring + + +from datetime import datetime +from unittest.mock import Mock +from requests import HTTPError +from parameterized import parameterized +from searx.engines import load_engines, tineye +from tests import SearxTestCase + + +class TinEyeTests(SearxTestCase): # pylint: disable=missing-class-docstring + + def setUp(self): + load_engines([{'name': 'tineye', 'engine': 'tineye', 'shortcut': 'tin', 'timeout': 9.0, 'disabled': True}]) + + def tearDown(self): + load_engines([]) + + def test_status_code_raises(self): + response = Mock() + response.status_code = 401 + response.raise_for_status.side_effect = HTTPError() + self.assertRaises(HTTPError, lambda: tineye.response(response)) + + @parameterized.expand([(400), (422)]) + def test_returns_empty_list(self, status_code): + response = Mock() + response.json.return_value = {} + response.status_code = status_code + response.raise_for_status.side_effect = HTTPError() + results = tineye.response(response) + self.assertEqual(0, len(results)) + + def test_logs_format_for_422(self): + response = Mock() + response.json.return_value = {"suggestions": {"key": "Invalid image URL"}} + response.status_code = 422 + response.raise_for_status.side_effect = HTTPError() + + with self.assertLogs(tineye.logger) as assert_logs_context: + tineye.response(response) + self.assertIn(tineye.FORMAT_NOT_SUPPORTED, ','.join(assert_logs_context.output)) + + def test_logs_signature_for_422(self): + response = Mock() + response.json.return_value = {"suggestions": {"key": "NO_SIGNATURE_ERROR"}} + response.status_code = 422 + response.raise_for_status.side_effect = HTTPError() + + with self.assertLogs(tineye.logger) as assert_logs_context: + tineye.response(response) + self.assertIn(tineye.NO_SIGNATURE_ERROR, ','.join(assert_logs_context.output)) + + def test_logs_download_for_422(self): + response = Mock() + response.json.return_value = {"suggestions": {"key": "Download Error"}} + response.status_code = 422 + response.raise_for_status.side_effect = HTTPError() + + with self.assertLogs(tineye.logger) as assert_logs_context: + tineye.response(response) + self.assertIn(tineye.DOWNLOAD_ERROR, ','.join(assert_logs_context.output)) + + def test_logs_description_for_400(self): + description = 'There was a problem with that request. Error ID: ad5fc955-a934-43c1-8187-f9a61d301645' + response = Mock() + response.json.return_value = {"suggestions": {"description": [description], "title": "Oops! We're sorry!"}} + response.status_code = 400 + response.raise_for_status.side_effect = HTTPError() + + with self.assertLogs(tineye.logger) as assert_logs_context: + tineye.response(response) + self.assertIn(description, ','.join(assert_logs_context.output)) + + def test_crawl_date_parses(self): + date_str = '2020-05-25' + date = datetime.strptime(date_str, '%Y-%m-%d') + response = Mock() + response.json.return_value = { + 'matches': [ + { + 'backlinks': [ + { + 'crawl_date': date_str, + } + ] + } + ] + } + response.status_code = 200 + results = tineye.response(response) + self.assertEqual(date, results[0]['publishedDate']) diff --git a/tests/unit/test_tineye.py b/tests/unit/test_tineye.py deleted file mode 100644 index 5855a7313..000000000 --- a/tests/unit/test_tineye.py +++ /dev/null @@ -1,94 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-or-later -# pylint: disable=missing-module-docstring - - -from datetime import datetime -from unittest.mock import Mock -from requests import HTTPError -from parameterized import parameterized -from searx.engines import load_engines, tineye -from tests import SearxTestCase - - -class TinEyeTests(SearxTestCase): # pylint: disable=missing-class-docstring - - def setUp(self): - load_engines([{'name': 'tineye', 'engine': 'tineye', 'shortcut': 'tin', 'timeout': 9.0, 'disabled': True}]) - - def tearDown(self): - load_engines([]) - - def test_status_code_raises(self): - response = Mock() - response.status_code = 401 - response.raise_for_status.side_effect = HTTPError() - self.assertRaises(HTTPError, lambda: tineye.response(response)) - - @parameterized.expand([(400), (422)]) - def test_returns_empty_list(self, status_code): - response = Mock() - response.json.return_value = {} - response.status_code = status_code - response.raise_for_status.side_effect = HTTPError() - results = tineye.response(response) - self.assertEqual(0, len(results)) - - def test_logs_format_for_422(self): - response = Mock() - response.json.return_value = {"suggestions": {"key": "Invalid image URL"}} - response.status_code = 422 - response.raise_for_status.side_effect = HTTPError() - - with self.assertLogs(tineye.logger) as assert_logs_context: - tineye.response(response) - self.assertIn(tineye.FORMAT_NOT_SUPPORTED, ','.join(assert_logs_context.output)) - - def test_logs_signature_for_422(self): - response = Mock() - response.json.return_value = {"suggestions": {"key": "NO_SIGNATURE_ERROR"}} - response.status_code = 422 - response.raise_for_status.side_effect = HTTPError() - - with self.assertLogs(tineye.logger) as assert_logs_context: - tineye.response(response) - self.assertIn(tineye.NO_SIGNATURE_ERROR, ','.join(assert_logs_context.output)) - - def test_logs_download_for_422(self): - response = Mock() - response.json.return_value = {"suggestions": {"key": "Download Error"}} - response.status_code = 422 - response.raise_for_status.side_effect = HTTPError() - - with self.assertLogs(tineye.logger) as assert_logs_context: - tineye.response(response) - self.assertIn(tineye.DOWNLOAD_ERROR, ','.join(assert_logs_context.output)) - - def test_logs_description_for_400(self): - description = 'There was a problem with that request. Error ID: ad5fc955-a934-43c1-8187-f9a61d301645' - response = Mock() - response.json.return_value = {"suggestions": {"description": [description], "title": "Oops! We're sorry!"}} - response.status_code = 400 - response.raise_for_status.side_effect = HTTPError() - - with self.assertLogs(tineye.logger) as assert_logs_context: - tineye.response(response) - self.assertIn(description, ','.join(assert_logs_context.output)) - - def test_crawl_date_parses(self): - date_str = '2020-05-25' - date = datetime.strptime(date_str, '%Y-%m-%d') - response = Mock() - response.json.return_value = { - 'matches': [ - { - 'backlinks': [ - { - 'crawl_date': date_str, - } - ] - } - ] - } - response.status_code = 200 - results = tineye.response(response) - self.assertEqual(date, results[0]['publishedDate']) -- cgit v1.2.3-54-g00ecf