diff options
author | ahmad-alkadri <ahmad.alkadri@outlook.com> | 2023-01-15 15:08:11 +0000 |
---|---|---|
committer | Ahmad Alkadri <ahmad.alkadri@outlook.com> | 2023-01-15 16:51:31 +0100 |
commit | 99b5272d9a17ffd813fc8c0b2f3cae3201d2398e (patch) | |
tree | 1742000ca4b3ac8118bb227139dacb704f9e7800 /searx | |
parent | 6c421110b57c695e9c0a0d9212bc271d701d17da (diff) | |
download | searxng-99b5272d9a17ffd813fc8c0b2f3cae3201d2398e.tar.gz searxng-99b5272d9a17ffd813fc8c0b2f3cae3201d2398e.zip |
A little fix and modified the testing for content highlight
Diffstat (limited to 'searx')
-rw-r--r-- | searx/webutils.py | 19 |
1 files changed, 11 insertions, 8 deletions
diff --git a/searx/webutils.py b/searx/webutils.py index 150b376fa..7b9a8045c 100644 --- a/searx/webutils.py +++ b/searx/webutils.py @@ -124,13 +124,14 @@ def contains_cjko(s: str) -> bool: Returns: bool: True if the input s contains the characters and False otherwise. """ - unicode_ranges = ('\u4e00-\u9fff' # Chinese characters - '\u3040-\u309f' # Japanese hiragana - '\u30a0-\u30ff' # Japanese katakana - '\u4e00-\u9faf' # Japanese kanji - '\uac00-\ud7af' # Korean hangul syllables - '\u1100-\u11ff' # Korean hangul jamo - ) + unicode_ranges = ( + '\u4e00-\u9fff' # Chinese characters + '\u3040-\u309f' # Japanese hiragana + '\u30a0-\u30ff' # Japanese katakana + '\u4e00-\u9faf' # Japanese kanji + '\uac00-\ud7af' # Korean hangul syllables + '\u1100-\u11ff' # Korean hangul jamo + ) return bool(re.search(fr'[{unicode_ranges}]', s)) @@ -168,7 +169,9 @@ def highlight_content(content, query): querysplit = query.split() queries = [] for qs in querysplit: - queries.extend(re.findall(regex_highlight_cjk(qs), content, flags=re.I | re.U)) + qs = qs.replace("'", "").replace('"', '').replace(" ", "") + if len(qs) > 0: + queries.extend(re.findall(regex_highlight_cjk(qs), content, flags=re.I | re.U)) if len(queries) > 0: for q in set(queries): content = re.sub(regex_highlight_cjk(q), f'<span class="highlight">{q}</span>', content) |