diff options
author | Cqoicebordel <Cqoicebordel@users.noreply.github.com> | 2015-01-30 21:00:49 +0100 |
---|---|---|
committer | Cqoicebordel <Cqoicebordel@users.noreply.github.com> | 2015-01-30 21:00:49 +0100 |
commit | 52a57ee045e02844a8f650a9d3ae30e0092d86cd (patch) | |
tree | 9662062955faff6ac069039bf614a4c2e427cc8e /searx/utils.py | |
parent | a3d444ab85dbb85dc3200c686ec3323dbb7008cb (diff) | |
download | searxng-52a57ee045e02844a8f650a9d3ae30e0092d86cd.tar.gz searxng-52a57ee045e02844a8f650a9d3ae30e0092d86cd.zip |
Replace every bunch of whitespaces with only one space in HTML text
Diffstat (limited to 'searx/utils.py')
-rw-r--r-- | searx/utils.py | 2 |
1 files changed, 2 insertions, 0 deletions
diff --git a/searx/utils.py b/searx/utils.py index 59d4b85be..ef221ef8e 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -119,6 +119,8 @@ class HTMLTextExtractor(HTMLParser): def html_to_text(html): + html = html.replace('\n', ' ') + html = ' '.join(html.split()) s = HTMLTextExtractor() s.feed(html) return s.get_text() |