summaryrefslogtreecommitdiff
path: root/searx/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'searx/utils.py')
-rw-r--r--searx/utils.py6
1 files changed, 5 insertions, 1 deletions
diff --git a/searx/utils.py b/searx/utils.py
index 416055dfa..4b8cb615c 100644
--- a/searx/utils.py
+++ b/searx/utils.py
@@ -5,10 +5,12 @@ import codecs
import cStringIO
import re
+
def gen_useragent():
# TODO
return "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0"
+
def highlight_content(content, query):
if not content:
@@ -34,10 +36,11 @@ def highlight_content(content, query):
return content
+
class HTMLTextExtractor(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
- self.result = [ ]
+ self.result = []
def handle_data(self, d):
self.result.append(d)
@@ -54,6 +57,7 @@ class HTMLTextExtractor(HTMLParser):
def get_text(self):
return u''.join(self.result)
+
def html_to_text(html):
s = HTMLTextExtractor()
s.feed(html)