summaryrefslogtreecommitdiff
path: root/searx
diff options
context:
space:
mode:
authorCqoicebordel <Cqoicebordel@users.noreply.github.com>2015-01-27 20:03:33 +0100
committerCqoicebordel <Cqoicebordel@users.noreply.github.com>2015-01-27 20:03:33 +0100
commitcfe81d741cdd2517c4587071e4afbdd0adb923bd (patch)
treea13448cf953d7d7e3641349adedc80ccbfbe1d77 /searx
parent4dba3739fb3b98572cbd51adab226376b5844105 (diff)
downloadsearxng-cfe81d741cdd2517c4587071e4afbdd0adb923bd.tar.gz
searxng-cfe81d741cdd2517c4587071e4afbdd0adb923bd.zip
A bit of utils unit tests
Diffstat (limited to 'searx')
-rw-r--r--searx/tests/test_utils.py22
-rw-r--r--searx/utils.py2
2 files changed, 23 insertions, 1 deletions
diff --git a/searx/tests/test_utils.py b/searx/tests/test_utils.py
index 817fd4372..abe411c2b 100644
--- a/searx/tests/test_utils.py
+++ b/searx/tests/test_utils.py
@@ -10,6 +10,11 @@ class TestUtils(SearxTestCase):
self.assertIsNotNone(utils.gen_useragent())
self.assertTrue(utils.gen_useragent().startswith('Mozilla'))
+ def test_searx_useragent(self):
+ self.assertIsInstance(utils.searx_useragent(), str)
+ self.assertIsNotNone(utils.searx_useragent())
+ self.assertTrue(utils.searx_useragent().startswith('searx'))
+
def test_highlight_content(self):
self.assertEqual(utils.highlight_content(0, None), None)
self.assertEqual(utils.highlight_content(None, None), None)
@@ -29,6 +34,23 @@ class TestUtils(SearxTestCase):
query = 'a test'
self.assertEqual(utils.highlight_content(content, query), content)
+ def test_html_to_text(self):
+ html = """
+ <a href="/testlink" class="link_access_account">
+ <span class="toto">
+ <span>
+ <img src="test.jpg" />
+ </span>
+ </span>
+ <span class="titi">
+ Test text
+ </span>
+ </a>
+ """
+ self.assertIsInstance(utils.html_to_text(html), unicode)
+ self.assertIsNotNone(utils.html_to_text(html))
+ self.assertEqual(utils.html_to_text(html), "Test text")
+
class TestHTMLTextExtractor(SearxTestCase):
diff --git a/searx/utils.py b/searx/utils.py
index f15f8a4bc..59d4b85be 100644
--- a/searx/utils.py
+++ b/searx/utils.py
@@ -115,7 +115,7 @@ class HTMLTextExtractor(HTMLParser):
self.result.append(name)
def get_text(self):
- return u''.join(self.result)
+ return u''.join(self.result).strip()
def html_to_text(html):