summaryrefslogtreecommitdiff
path: root/searx/utils.py
diff options
context:
space:
mode:
authorMatej Cotman <cotman.matej@gmail.com>2014-01-19 22:59:01 +0100
committerMatej Cotman <cotman.matej@gmail.com>2014-01-20 01:06:29 +0100
commitdd4662978dd74c0dce089790689fe0a8a4f9bb16 (patch)
treef10d4c2cff38a66c01fe763ee666361ec6975581 /searx/utils.py
parentb7fa79081f3c7c9ce2974c406e07b1e48cb9534a (diff)
downloadsearxng-dd4662978dd74c0dce089790689fe0a8a4f9bb16.tar.gz
searxng-dd4662978dd74c0dce089790689fe0a8a4f9bb16.zip
fix: robot fw, entry points, some flake8, package searx egg
Diffstat (limited to 'searx/utils.py')
-rw-r--r--searx/utils.py6
1 files changed, 5 insertions, 1 deletions
diff --git a/searx/utils.py b/searx/utils.py
index 416055dfa..4b8cb615c 100644
--- a/searx/utils.py
+++ b/searx/utils.py
@@ -5,10 +5,12 @@ import codecs
import cStringIO
import re
+
def gen_useragent():
# TODO
return "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0"
+
def highlight_content(content, query):
if not content:
@@ -34,10 +36,11 @@ def highlight_content(content, query):
return content
+
class HTMLTextExtractor(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
- self.result = [ ]
+ self.result = []
def handle_data(self, d):
self.result.append(d)
@@ -54,6 +57,7 @@ class HTMLTextExtractor(HTMLParser):
def get_text(self):
return u''.join(self.result)
+
def html_to_text(html):
s = HTMLTextExtractor()
s.feed(html)