summaryrefslogtreecommitdiff
path: root/tests/unit/test_utils.py
diff options
context:
space:
mode:
authorAlexandre Flament <alex@al-f.net>2020-09-11 10:23:56 +0200
committerAlexandre Flament <alex@al-f.net>2020-09-13 10:28:11 +0200
commit6deb85072ad00b85d2b3c1981c37aeb75ef68cc7 (patch)
treea167d983a0006b1dddea5b7e6025d18b772dfc10 /tests/unit/test_utils.py
parentae07f4a211ecba0331bcab5903e3263c646f8bdb (diff)
downloadsearxng-6deb85072ad00b85d2b3c1981c37aeb75ef68cc7.tar.gz
searxng-6deb85072ad00b85d2b3c1981c37aeb75ef68cc7.zip
[fix] searx.utils.HTMLTextExtractor: invalid HTML don't raise an Exception
Close #2188
Diffstat (limited to 'tests/unit/test_utils.py')
-rw-r--r--tests/unit/test_utils.py9
1 files changed, 9 insertions, 0 deletions
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index 5f98511c3..08b759542 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -52,6 +52,10 @@ class TestUtils(SearxTestCase):
self.assertIsNotNone(utils.html_to_text(html))
self.assertEqual(utils.html_to_text(html), "Test text")
+ def test_html_to_text_invalid(self):
+ html = '<p><b>Lorem ipsum</i>dolor sit amet</p>'
+ self.assertEqual(utils.html_to_text(html), "Lorem ipsum")
+
def test_prettify_url(self):
data = (('https://searx.me/', 'https://searx.me/'),
('https://searx.me/ű', 'https://searx.me/ű'),
@@ -116,6 +120,11 @@ class TestHTMLTextExtractor(SearxTestCase):
self.html_text_extractor.handle_entityref(entity)
self.assertIn(entity, self.html_text_extractor.result)
+ def test_invalid_html(self):
+ text = '<p><b>Lorem ipsum</i>dolor sit amet</p>'
+ with self.assertRaises(utils.HTMLTextExtractorException):
+ self.html_text_extractor.feed(text)
+
class TestUnicodeWriter(SearxTestCase):