diff options
author | asciimoo <asciimoo@gmail.com> | 2013-11-10 21:41:01 +0100 |
---|---|---|
committer | asciimoo <asciimoo@gmail.com> | 2013-11-10 21:41:01 +0100 |
commit | cbb397939db355fb53da6bcf6bd8a2627f7a9584 (patch) | |
tree | 6e876fa1a4abb2d5b564033f32d07191b05651a4 | |
parent | 14cd1d6faf16484a42fc040a81d381b20c20e282 (diff) | |
download | searxng-cbb397939db355fb53da6bcf6bd8a2627f7a9584.tar.gz searxng-cbb397939db355fb53da6bcf6bd8a2627f7a9584.zip |
[enh] incasesensitive query highlighting
-rw-r--r-- | searx/engines/__init__.py | 14 |
1 files changed, 12 insertions, 2 deletions
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 2abf1610c..c17a53f46 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -25,6 +25,7 @@ from urlparse import urlparse from searx import settings import ConfigParser import sys +import re from datetime import datetime engine_dir = dirname(realpath(__file__)) @@ -106,8 +107,17 @@ def highlight_content(content, query): # TODO better html content detection if content.find('<') != -1: return content - for chunk in query.split(): - content = content.replace(chunk, '<b>{0}</b>'.format(chunk)) + + if content.lower().find(query.lower()) > -1: + query_regex = '({0})'.format(re.escape(query)) + content = re.sub(query_regex, '<b>\\1</b>', content, flags=re.I) + else: + for chunk in query.split(): + if len(chunk) == 1: + query_regex = '(\W+{0}\W+)'.format(re.escape(chunk)) + else: + query_regex = '({0})'.format(re.escape(chunk)) + content = re.sub(query_regex, '<b>\\1</b>', content, flags=re.I) return content |