summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAllen <64094914+allendema@users.noreply.github.com>2024-10-17 04:57:21 +0000
committerMarkus Heiser <markus.heiser@darmarIT.de>2024-11-29 15:26:03 +0100
commit6948689d2a2a29f7ffc5ca9d212e76a3e8e43956 (patch)
tree4bc0800651b2f416cdea85fbba25e20e14c649c4
parent94aafc83a6beca1ddfb9c4dd889c4302eb79d43f (diff)
downloadsearxng-6948689d2a2a29f7ffc5ca9d212e76a3e8e43956.tar.gz
searxng-6948689d2a2a29f7ffc5ca9d212e76a3e8e43956.zip
[enh] use longest title and test get_ordered_results()
-rw-r--r--searx/results.py11
-rw-r--r--tests/unit/test_results.py61
2 files changed, 55 insertions, 17 deletions
diff --git a/searx/results.py b/searx/results.py
index 7c973ca8f..2b677b105 100644
--- a/searx/results.py
+++ b/searx/results.py
@@ -12,7 +12,6 @@ from searx import logger
from searx.engines import engines
from searx.metrics import histogram_observe, counter_add, count_error
-
CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
@@ -133,7 +132,7 @@ def result_score(result, priority):
weight = 1.0
for result_engine in result['engines']:
- if hasattr(engines[result_engine], 'weight'):
+ if hasattr(engines.get(result_engine), 'weight'):
weight *= float(engines[result_engine].weight)
weight *= len(result['positions'])
@@ -332,10 +331,14 @@ class ResultContainer:
return None
def __merge_duplicated_http_result(self, duplicated, result, position):
- # using content with more text
+ # use content with more text
if result_content_len(result.get('content', '')) > result_content_len(duplicated.get('content', '')):
duplicated['content'] = result['content']
+ # use title with more text
+ if result_content_len(result.get('title', '')) > len(duplicated.get('title', '')):
+ duplicated['title'] = result['title']
+
# merge all result's parameters not found in duplicate
for key in result.keys():
if not duplicated.get(key):
@@ -347,7 +350,7 @@ class ResultContainer:
# add engine to list of result-engines
duplicated['engines'].add(result['engine'])
- # using https if possible
+ # use https if possible
if duplicated['parsed_url'].scheme != 'https' and result['parsed_url'].scheme == 'https':
duplicated['url'] = result['parsed_url'].geturl()
duplicated['parsed_url'] = result['parsed_url']
diff --git a/tests/unit/test_results.py b/tests/unit/test_results.py
index 72486bbc7..608d3c8c3 100644
--- a/tests/unit/test_results.py
+++ b/tests/unit/test_results.py
@@ -2,9 +2,26 @@
# pylint: disable=missing-module-docstring
from searx.results import ResultContainer
+from searx.engines import load_engines
from tests import SearxTestCase
+def make_test_engine_dict(**kwargs) -> dict:
+ test_engine = {
+ # fmt: off
+ 'name': None,
+ 'engine': None,
+ 'categories': 'general',
+ 'shortcut': 'dummy',
+ 'timeout': 3.0,
+ 'tokens': [],
+ # fmt: on
+ }
+
+ test_engine.update(**kwargs)
+ return test_engine
+
+
def fake_result(url='https://aa.bb/cc?dd=ee#ff', title='aaa', content='bbb', engine='wikipedia', **kwargs):
result = {
# fmt: off
@@ -19,23 +36,41 @@ def fake_result(url='https://aa.bb/cc?dd=ee#ff', title='aaa', content='bbb', eng
class ResultContainerTestCase(SearxTestCase): # pylint: disable=missing-class-docstring
+ def setUp(self) -> None:
+ stract_engine = make_test_engine_dict(name="stract", engine="stract", shortcut="stra")
+ duckduckgo_engine = make_test_engine_dict(name="duckduckgo", engine="duckduckgo", shortcut="ddg")
+ mojeek_engine = make_test_engine_dict(name="mojeek", engine="mojeek", shortcut="mjk")
+
+ load_engines([stract_engine, duckduckgo_engine, mojeek_engine])
+
+ self.container = ResultContainer()
+
+ def tearDown(self):
+ load_engines([])
+
def test_empty(self):
- c = ResultContainer()
- self.assertEqual(c.get_ordered_results(), [])
+ self.assertEqual(self.container.get_ordered_results(), [])
def test_one_result(self):
- c = ResultContainer()
- c.extend('wikipedia', [fake_result()])
- self.assertEqual(c.results_length(), 1)
+ self.container.extend('wikipedia', [fake_result()])
+
+ self.assertEqual(self.container.results_length(), 1)
def test_one_suggestion(self):
- c = ResultContainer()
- c.extend('wikipedia', [fake_result(suggestion=True)])
- self.assertEqual(len(c.suggestions), 1)
- self.assertEqual(c.results_length(), 0)
+ self.container.extend('wikipedia', [fake_result(suggestion=True)])
+
+ self.assertEqual(len(self.container.suggestions), 1)
+ self.assertEqual(self.container.results_length(), 0)
def test_result_merge(self):
- c = ResultContainer()
- c.extend('wikipedia', [fake_result()])
- c.extend('wikidata', [fake_result(), fake_result(url='https://example.com/')])
- self.assertEqual(c.results_length(), 2)
+ self.container.extend('wikipedia', [fake_result()])
+ self.container.extend('wikidata', [fake_result(), fake_result(url='https://example.com/')])
+
+ self.assertEqual(self.container.results_length(), 2)
+
+ def test_result_merge_by_title(self):
+ self.container.extend('stract', [fake_result(engine='stract', title='short title')])
+ self.container.extend('duckduckgo', [fake_result(engine='duckduckgo', title='normal title')])
+ self.container.extend('mojeek', [fake_result(engine='mojeek', title='this long long title')])
+
+ self.assertEqual(self.container.get_ordered_results()[0].get('title', ''), 'this long long title')