diff options
Diffstat (limited to 'searx/utils.py')
-rw-r--r-- | searx/utils.py | 6 |
1 files changed, 5 insertions, 1 deletions
diff --git a/searx/utils.py b/searx/utils.py index 416055dfa..4b8cb615c 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -5,10 +5,12 @@ import codecs import cStringIO import re + def gen_useragent(): # TODO return "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0" + def highlight_content(content, query): if not content: @@ -34,10 +36,11 @@ def highlight_content(content, query): return content + class HTMLTextExtractor(HTMLParser): def __init__(self): HTMLParser.__init__(self) - self.result = [ ] + self.result = [] def handle_data(self, d): self.result.append(d) @@ -54,6 +57,7 @@ class HTMLTextExtractor(HTMLParser): def get_text(self): return u''.join(self.result) + def html_to_text(html): s = HTMLTextExtractor() s.feed(html) |