diff options
author | Allen <64094914+allendema@users.noreply.github.com> | 2024-10-17 04:57:21 +0000 |
---|---|---|
committer | Markus Heiser <markus.heiser@darmarIT.de> | 2024-11-29 15:26:03 +0100 |
commit | 6948689d2a2a29f7ffc5ca9d212e76a3e8e43956 (patch) | |
tree | 4bc0800651b2f416cdea85fbba25e20e14c649c4 /searx/results.py | |
parent | 94aafc83a6beca1ddfb9c4dd889c4302eb79d43f (diff) | |
download | searxng-6948689d2a2a29f7ffc5ca9d212e76a3e8e43956.tar.gz searxng-6948689d2a2a29f7ffc5ca9d212e76a3e8e43956.zip |
[enh] use longest title and test get_ordered_results()
Diffstat (limited to 'searx/results.py')
-rw-r--r-- | searx/results.py | 11 |
1 files changed, 7 insertions, 4 deletions
diff --git a/searx/results.py b/searx/results.py index 7c973ca8f..2b677b105 100644 --- a/searx/results.py +++ b/searx/results.py @@ -12,7 +12,6 @@ from searx import logger from searx.engines import engines from searx.metrics import histogram_observe, counter_add, count_error - CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U) WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U) @@ -133,7 +132,7 @@ def result_score(result, priority): weight = 1.0 for result_engine in result['engines']: - if hasattr(engines[result_engine], 'weight'): + if hasattr(engines.get(result_engine), 'weight'): weight *= float(engines[result_engine].weight) weight *= len(result['positions']) @@ -332,10 +331,14 @@ class ResultContainer: return None def __merge_duplicated_http_result(self, duplicated, result, position): - # using content with more text + # use content with more text if result_content_len(result.get('content', '')) > result_content_len(duplicated.get('content', '')): duplicated['content'] = result['content'] + # use title with more text + if result_content_len(result.get('title', '')) > len(duplicated.get('title', '')): + duplicated['title'] = result['title'] + # merge all result's parameters not found in duplicate for key in result.keys(): if not duplicated.get(key): @@ -347,7 +350,7 @@ class ResultContainer: # add engine to list of result-engines duplicated['engines'].add(result['engine']) - # using https if possible + # use https if possible if duplicated['parsed_url'].scheme != 'https' and result['parsed_url'].scheme == 'https': duplicated['url'] = result['parsed_url'].geturl() duplicated['parsed_url'] = result['parsed_url'] |