summaryrefslogtreecommitdiff
path: root/searx/utils.py
diff options
context:
space:
mode:
authorCqoicebordel <Cqoicebordel@users.noreply.github.com>2015-01-30 21:00:49 +0100
committerCqoicebordel <Cqoicebordel@users.noreply.github.com>2015-01-30 21:00:49 +0100
commit52a57ee045e02844a8f650a9d3ae30e0092d86cd (patch)
tree9662062955faff6ac069039bf614a4c2e427cc8e /searx/utils.py
parenta3d444ab85dbb85dc3200c686ec3323dbb7008cb (diff)
downloadsearxng-52a57ee045e02844a8f650a9d3ae30e0092d86cd.tar.gz
searxng-52a57ee045e02844a8f650a9d3ae30e0092d86cd.zip
Replace every bunch of whitespaces with only one space in HTML text
Diffstat (limited to 'searx/utils.py')
-rw-r--r--searx/utils.py2
1 files changed, 2 insertions, 0 deletions
diff --git a/searx/utils.py b/searx/utils.py
index 59d4b85be..ef221ef8e 100644
--- a/searx/utils.py
+++ b/searx/utils.py
@@ -119,6 +119,8 @@ class HTMLTextExtractor(HTMLParser):
def html_to_text(html):
+ html = html.replace('\n', ' ')
+ html = ' '.join(html.split())
s = HTMLTextExtractor()
s.feed(html)
return s.get_text()