diff options
author | Matej Cotman <cotman.matej@gmail.com> | 2014-01-19 22:59:01 +0100 |
---|---|---|
committer | Matej Cotman <cotman.matej@gmail.com> | 2014-01-20 01:06:29 +0100 |
commit | dd4662978dd74c0dce089790689fe0a8a4f9bb16 (patch) | |
tree | f10d4c2cff38a66c01fe763ee666361ec6975581 /searx/utils.py | |
parent | b7fa79081f3c7c9ce2974c406e07b1e48cb9534a (diff) | |
download | searxng-dd4662978dd74c0dce089790689fe0a8a4f9bb16.tar.gz searxng-dd4662978dd74c0dce089790689fe0a8a4f9bb16.zip |
fix: robot fw, entry points, some flake8, package searx egg
Diffstat (limited to 'searx/utils.py')
-rw-r--r-- | searx/utils.py | 6 |
1 files changed, 5 insertions, 1 deletions
diff --git a/searx/utils.py b/searx/utils.py index 416055dfa..4b8cb615c 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -5,10 +5,12 @@ import codecs import cStringIO import re + def gen_useragent(): # TODO return "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0" + def highlight_content(content, query): if not content: @@ -34,10 +36,11 @@ def highlight_content(content, query): return content + class HTMLTextExtractor(HTMLParser): def __init__(self): HTMLParser.__init__(self) - self.result = [ ] + self.result = [] def handle_data(self, d): self.result.append(d) @@ -54,6 +57,7 @@ class HTMLTextExtractor(HTMLParser): def get_text(self): return u''.join(self.result) + def html_to_text(html): s = HTMLTextExtractor() s.feed(html) |