[fix] highlighting only html

author: asciimoo <asciimoo@gmail.com> 2014-01-10 23:38:08 +0100
committer: asciimoo <asciimoo@gmail.com> 2014-01-10 23:38:08 +0100
commit: 7b4ec5c5e9a89fc1bc3b3fc8dfad26450530a2da (patch)
tree: d7d83df0a8910bea8aae6100749f8009b2c7c740 /searx/utils.py
parent: 04c408389d3d1a97a6a4b59502490372d67357cf (diff)
download: searxng-7b4ec5c5e9a89fc1bc3b3fc8dfad26450530a2da.tar.gz
searxng-7b4ec5c5e9a89fc1bc3b3fc8dfad26450530a2da.zip
1 files changed, 26 insertions, 0 deletions
diff --git a/searx/utils.py b/searx/utils.py
index 670499805..53300181f 100644
--- a/searx/utils.py
+++ b/searx/utils.py
@@ -3,6 +3,32 @@ from HTMLParser import HTMLParser
 import csv
 import codecs
 import cStringIO
+import re
+
+def highlight_content(content, query):
+
+    if not content:
+        return None
+    # ignoring html contents
+    # TODO better html content detection
+    if content.find('<') != -1:
+        return content
+
+    query = query.decode('utf-8')
+    if content.lower().find(query.lower()) > -1:
+        query_regex = u'({0})'.format(re.escape(query))
+        content = re.sub(query_regex, '<b>\\1</b>', content, flags=re.I | re.U)
+    else:
+        regex_parts = []
+        for chunk in query.split():
+            if len(chunk) == 1:
+                regex_parts.append(u'\W+{0}\W+'.format(re.escape(chunk)))
+            else:
+                regex_parts.append(u'{0}'.format(re.escape(chunk)))
+        query_regex = u'({0})'.format('|'.join(regex_parts))
+        content = re.sub(query_regex, '<b>\\1</b>', content, flags=re.I | re.U)
+
+    return content
 
 class HTMLTextExtractor(HTMLParser):
     def __init__(self):
author	asciimoo <asciimoo@gmail.com>	2014-01-10 23:38:08 +0100
committer	asciimoo <asciimoo@gmail.com>	2014-01-10 23:38:08 +0100
commit	7b4ec5c5e9a89fc1bc3b3fc8dfad26450530a2da (patch)
tree	d7d83df0a8910bea8aae6100749f8009b2c7c740 /searx/utils.py
parent	04c408389d3d1a97a6a4b59502490372d67357cf (diff)
download	searxng-7b4ec5c5e9a89fc1bc3b3fc8dfad26450530a2da.tar.gz searxng-7b4ec5c5e9a89fc1bc3b3fc8dfad26450530a2da.zip