From 1d0c368746e0ae28ea042edaf4c75ee3a2b738c2 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Thu, 26 Nov 2020 15:12:11 +0100 Subject: [enh] record details exception per engine add an new API /stats/errors --- tests/unit/test_utils.py | 90 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) (limited to 'tests') diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index f3a98ad71..2c244966b 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -3,6 +3,7 @@ import lxml.etree from lxml import html from searx.testing import SearxTestCase +from searx.exceptions import SearxXPathSyntaxException, SearxEngineXPathException from searx import utils @@ -57,8 +58,16 @@ class TestUtils(SearxTestCase): dom = html.fromstring(html_str) self.assertEqual(utils.extract_text(dom), 'Test text') self.assertEqual(utils.extract_text(dom.xpath('//span')), 'Test text') + self.assertEqual(utils.extract_text(dom.xpath('//span/text()')), 'Test text') + self.assertEqual(utils.extract_text(dom.xpath('count(//span)')), '3.0') + self.assertEqual(utils.extract_text(dom.xpath('boolean(//span)')), 'True') self.assertEqual(utils.extract_text(dom.xpath('//img/@src')), 'test.jpg') self.assertEqual(utils.extract_text(dom.xpath('//unexistingtag')), '') + self.assertEqual(utils.extract_text(None, allow_none=True), None) + with self.assertRaises(ValueError): + utils.extract_text(None) + with self.assertRaises(ValueError): + utils.extract_text({}) def test_extract_url(self): def f(html_str, search_url): @@ -136,3 +145,84 @@ class TestHTMLTextExtractor(SearxTestCase): text = '

Lorem ipsumdolor sit amet

' with self.assertRaises(utils.HTMLTextExtractorException): self.html_text_extractor.feed(text) + + +class TestXPathUtils(SearxTestCase): + + TEST_DOC = """""" + + def test_get_xpath_cache(self): + xp1 = utils.get_xpath('//a') + xp2 = utils.get_xpath('//div') + xp3 = utils.get_xpath('//a') + + self.assertEqual(id(xp1), id(xp3)) + self.assertNotEqual(id(xp1), id(xp2)) + + def test_get_xpath_type(self): + utils.get_xpath(lxml.etree.XPath('//a')) + + with self.assertRaises(TypeError): + utils.get_xpath([]) + + def test_get_xpath_invalid(self): + invalid_xpath = '//a[0].text' + with self.assertRaises(SearxXPathSyntaxException) as context: + utils.get_xpath(invalid_xpath) + + self.assertEqual(context.exception.message, 'Invalid expression') + self.assertEqual(context.exception.xpath_str, invalid_xpath) + + def test_eval_xpath_unregistered_function(self): + doc = html.fromstring(TestXPathUtils.TEST_DOC) + + invalid_function_xpath = 'int(//a)' + with self.assertRaises(SearxEngineXPathException) as context: + utils.eval_xpath(doc, invalid_function_xpath) + + self.assertEqual(context.exception.message, 'Unregistered function') + self.assertEqual(context.exception.xpath_str, invalid_function_xpath) + + def test_eval_xpath(self): + doc = html.fromstring(TestXPathUtils.TEST_DOC) + + self.assertEqual(utils.eval_xpath(doc, '//p'), []) + self.assertEqual(utils.eval_xpath(doc, '//i/text()'), ['italic']) + self.assertEqual(utils.eval_xpath(doc, 'count(//i)'), 1.0) + + def test_eval_xpath_list(self): + doc = html.fromstring(TestXPathUtils.TEST_DOC) + + # check a not empty list + self.assertEqual(utils.eval_xpath_list(doc, '//i/text()'), ['italic']) + + # check min_len parameter + with self.assertRaises(SearxEngineXPathException) as context: + utils.eval_xpath_list(doc, '//p', min_len=1) + self.assertEqual(context.exception.message, 'len(xpath_str) < 1') + self.assertEqual(context.exception.xpath_str, '//p') + + def test_eval_xpath_getindex(self): + doc = html.fromstring(TestXPathUtils.TEST_DOC) + + # check index 0 + self.assertEqual(utils.eval_xpath_getindex(doc, '//i/text()', 0), 'italic') + + # default is 'something' + self.assertEqual(utils.eval_xpath_getindex(doc, '//i/text()', 1, default='something'), 'something') + + # default is None + self.assertEqual(utils.eval_xpath_getindex(doc, '//i/text()', 1, default=None), None) + + # index not found + with self.assertRaises(SearxEngineXPathException) as context: + utils.eval_xpath_getindex(doc, '//i/text()', 1) + self.assertEqual(context.exception.message, 'index 1 not found') + + # not a list + with self.assertRaises(SearxEngineXPathException) as context: + utils.eval_xpath_getindex(doc, 'count(//i)', 1) + self.assertEqual(context.exception.message, 'the result is not a list') -- cgit v1.2.3-54-g00ecf