summaryrefslogtreecommitdiff
path: root/searx/utils.py
diff options
context:
space:
mode:
authorAdam Tauber <adam.tauber@balabit.com>2015-02-02 09:36:43 +0100
committerAdam Tauber <adam.tauber@balabit.com>2015-02-02 09:36:43 +0100
commit7f865356f9a6c1b40d0c668c59b3d081de618bac (patch)
tree60e9acb27577968a41136c04f248c24871e83860 /searx/utils.py
parent03137eebd9fdfaa57452cb364c1bc9f31b243f67 (diff)
parent5a16077455ef9e821a2b5f5f7e975be8a37ce83d (diff)
downloadsearxng-7f865356f9a6c1b40d0c668c59b3d081de618bac.tar.gz
searxng-7f865356f9a6c1b40d0c668c59b3d081de618bac.zip
Merge branch 'unit-tests' of https://github.com/Cqoicebordel/searx into Cqoicebordel-unit-tests
Conflicts: searx/tests/test_engines.py
Diffstat (limited to 'searx/utils.py')
-rw-r--r--searx/utils.py4
1 files changed, 3 insertions, 1 deletions
diff --git a/searx/utils.py b/searx/utils.py
index c47d3be17..c0afc94cb 100644
--- a/searx/utils.py
+++ b/searx/utils.py
@@ -115,10 +115,12 @@ class HTMLTextExtractor(HTMLParser):
self.result.append(name)
def get_text(self):
- return u''.join(self.result)
+ return u''.join(self.result).strip()
def html_to_text(html):
+ html = html.replace('\n', ' ')
+ html = ' '.join(html.split())
s = HTMLTextExtractor()
s.feed(html)
return s.get_text()