diff options
author | allixx <1695323+allixx@users.noreply.github.com> | 2023-12-19 11:21:54 +0300 |
---|---|---|
committer | Markus Heiser <markus.heiser@darmarIT.de> | 2024-01-29 13:15:37 +0100 |
commit | e4cf0a7d4f0416c9b7c45d45db26ccb3eb09af42 (patch) | |
tree | d43b076e913c8cd03d401fb0927cfb82d2ed83ac | |
parent | 8c73aa772b7d4446f77be82d8f9d9eef1e348deb (diff) | |
download | searxng-e4cf0a7d4f0416c9b7c45d45db26ccb3eb09af42.tar.gz searxng-e4cf0a7d4f0416c9b7c45d45db26ccb3eb09af42.zip |
[fix] do highlight replacement at once
Highlights all search queries in search result in one go.
Fixes the case where search query contains word from highlight HTML code,
which causes broken HTML to appear in search results.
Closes #3057
-rw-r--r-- | searx/webutils.py | 6 | ||||
-rw-r--r-- | tests/unit/test_webutils.py | 5 |
2 files changed, 7 insertions, 4 deletions
diff --git a/searx/webutils.py b/searx/webutils.py index bfc6b22f7..8cdcab84b 100644 --- a/searx/webutils.py +++ b/searx/webutils.py @@ -290,10 +290,8 @@ def highlight_content(content, query): if len(qs) > 0: queries.extend(re.findall(regex_highlight_cjk(qs), content, flags=re.I | re.U)) if len(queries) > 0: - for q in set(queries): - content = re.sub( - regex_highlight_cjk(q), f'<span class="highlight">{q}</span>'.replace('\\', r'\\'), content - ) + regex = re.compile("|".join(map(regex_highlight_cjk, queries))) + return regex.sub(lambda match: f'<span class="highlight">{match.group(0)}</span>'.replace('\\', r'\\'), content) return content diff --git a/tests/unit/test_webutils.py b/tests/unit/test_webutils.py index 244d2b180..b4395539b 100644 --- a/tests/unit/test_webutils.py +++ b/tests/unit/test_webutils.py @@ -57,6 +57,11 @@ class TestWebUtils(SearxTestCase): ] ), ), + ( + 'a class', + 'a string with class.', + '<span class="highlight">a</span> string with <span class="highlight">class</span>.', + ), ) for query, content, expected in data: self.assertEqual(webutils.highlight_content(content, query), expected) |