diff options
-rw-r--r-- | searx/webutils.py | 19 | ||||
-rw-r--r-- | tests/unit/test_webutils.py | 19 |
2 files changed, 21 insertions, 17 deletions
diff --git a/searx/webutils.py b/searx/webutils.py index 150b376fa..7b9a8045c 100644 --- a/searx/webutils.py +++ b/searx/webutils.py @@ -124,13 +124,14 @@ def contains_cjko(s: str) -> bool: Returns: bool: True if the input s contains the characters and False otherwise. """ - unicode_ranges = ('\u4e00-\u9fff' # Chinese characters - '\u3040-\u309f' # Japanese hiragana - '\u30a0-\u30ff' # Japanese katakana - '\u4e00-\u9faf' # Japanese kanji - '\uac00-\ud7af' # Korean hangul syllables - '\u1100-\u11ff' # Korean hangul jamo - ) + unicode_ranges = ( + '\u4e00-\u9fff' # Chinese characters + '\u3040-\u309f' # Japanese hiragana + '\u30a0-\u30ff' # Japanese katakana + '\u4e00-\u9faf' # Japanese kanji + '\uac00-\ud7af' # Korean hangul syllables + '\u1100-\u11ff' # Korean hangul jamo + ) return bool(re.search(fr'[{unicode_ranges}]', s)) @@ -168,7 +169,9 @@ def highlight_content(content, query): querysplit = query.split() queries = [] for qs in querysplit: - queries.extend(re.findall(regex_highlight_cjk(qs), content, flags=re.I | re.U)) + qs = qs.replace("'", "").replace('"', '').replace(" ", "") + if len(qs) > 0: + queries.extend(re.findall(regex_highlight_cjk(qs), content, flags=re.I | re.U)) if len(queries) > 0: for q in set(queries): content = re.sub(regex_highlight_cjk(q), f'<span class="highlight">{q}</span>', content) diff --git a/tests/unit/test_webutils.py b/tests/unit/test_webutils.py index 31a0f86ce..acf1aeeb7 100644 --- a/tests/unit/test_webutils.py +++ b/tests/unit/test_webutils.py @@ -28,32 +28,33 @@ class TestWebUtils(SearxTestCase): content = 'a' query = 'test' - self.assertEqual(webutils.highlight_content(content, query), content) + self.assertEqual(webutils.highlight_content(content, query), 'a') query = 'a test' - self.assertEqual(webutils.highlight_content(content, query), content) + self.assertEqual(webutils.highlight_content(content, query), '<span class="highlight">a</span>') data = ( ('" test "', 'a test string', 'a <span class="highlight">test</span> string'), - ('"a"', 'this is a test string', 'this is<span class="highlight"> a </span>test string'), + ('"a"', 'this is a test string', 'this is <span class="highlight">a</span> test string'), ( 'a test', 'this is a test string that matches entire query', - 'this is <span class="highlight">a test</span> string that matches entire query', + 'this is <span class="highlight">a</span> <span class="highlight">test</span> string that matches entire query', ), ( 'this a test', 'this is a string to test.', ( - '<span class="highlight">this</span> is<span class="highlight"> a </span>' - 'string to <span class="highlight">test</span>.' + '<span class="highlight">this</span> is <span class="highlight">a</span> string to <span class="highlight">test</span>.' ), ), ( 'match this "exact phrase"', 'this string contains the exact phrase we want to match', - ( - '<span class="highlight">this</span> string contains the <span class="highlight">exact</span>' - ' <span class="highlight">phrase</span> we want to <span class="highlight">match</span>' + ''.join( + [ + '<span class="highlight">this</span> string contains the <span class="highlight">exact</span> ', + '<span class="highlight">phrase</span> we want to <span class="highlight">match</span>', + ] ), ), ) |