summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorAlexandre Flament <alex@al-f.net>2020-11-26 15:12:11 +0100
committerAlexandre Flament <alex@al-f.net>2020-12-03 10:22:48 +0100
commit1d0c368746e0ae28ea042edaf4c75ee3a2b738c2 (patch)
tree8a277759920f97677510e0e72cc0f16d84817f11 /tests
parent6b5a57882242f24f867b6aa14b79b514720c6d83 (diff)
downloadsearxng-1d0c368746e0ae28ea042edaf4c75ee3a2b738c2.tar.gz
searxng-1d0c368746e0ae28ea042edaf4c75ee3a2b738c2.zip
[enh] record details exception per engine
add an new API /stats/errors
Diffstat (limited to 'tests')
-rw-r--r--tests/unit/test_utils.py90
1 files changed, 90 insertions, 0 deletions
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index f3a98ad71..2c244966b 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -3,6 +3,7 @@ import lxml.etree
from lxml import html
from searx.testing import SearxTestCase
+from searx.exceptions import SearxXPathSyntaxException, SearxEngineXPathException
from searx import utils
@@ -57,8 +58,16 @@ class TestUtils(SearxTestCase):
dom = html.fromstring(html_str)
self.assertEqual(utils.extract_text(dom), 'Test text')
self.assertEqual(utils.extract_text(dom.xpath('//span')), 'Test text')
+ self.assertEqual(utils.extract_text(dom.xpath('//span/text()')), 'Test text')
+ self.assertEqual(utils.extract_text(dom.xpath('count(//span)')), '3.0')
+ self.assertEqual(utils.extract_text(dom.xpath('boolean(//span)')), 'True')
self.assertEqual(utils.extract_text(dom.xpath('//img/@src')), 'test.jpg')
self.assertEqual(utils.extract_text(dom.xpath('//unexistingtag')), '')
+ self.assertEqual(utils.extract_text(None, allow_none=True), None)
+ with self.assertRaises(ValueError):
+ utils.extract_text(None)
+ with self.assertRaises(ValueError):
+ utils.extract_text({})
def test_extract_url(self):
def f(html_str, search_url):
@@ -136,3 +145,84 @@ class TestHTMLTextExtractor(SearxTestCase):
text = '<p><b>Lorem ipsum</i>dolor sit amet</p>'
with self.assertRaises(utils.HTMLTextExtractorException):
self.html_text_extractor.feed(text)
+
+
+class TestXPathUtils(SearxTestCase):
+
+ TEST_DOC = """<ul>
+ <li>Text in <b>bold</b> and <i>italic</i> </li>
+ <li>Another <b>text</b> <img src="data:image/gif;base64,R0lGODlhAQABAIAAAAUEBAAAACwAAAAAAQABAAACAkQBADs="></li>
+ </ul>"""
+
+ def test_get_xpath_cache(self):
+ xp1 = utils.get_xpath('//a')
+ xp2 = utils.get_xpath('//div')
+ xp3 = utils.get_xpath('//a')
+
+ self.assertEqual(id(xp1), id(xp3))
+ self.assertNotEqual(id(xp1), id(xp2))
+
+ def test_get_xpath_type(self):
+ utils.get_xpath(lxml.etree.XPath('//a'))
+
+ with self.assertRaises(TypeError):
+ utils.get_xpath([])
+
+ def test_get_xpath_invalid(self):
+ invalid_xpath = '//a[0].text'
+ with self.assertRaises(SearxXPathSyntaxException) as context:
+ utils.get_xpath(invalid_xpath)
+
+ self.assertEqual(context.exception.message, 'Invalid expression')
+ self.assertEqual(context.exception.xpath_str, invalid_xpath)
+
+ def test_eval_xpath_unregistered_function(self):
+ doc = html.fromstring(TestXPathUtils.TEST_DOC)
+
+ invalid_function_xpath = 'int(//a)'
+ with self.assertRaises(SearxEngineXPathException) as context:
+ utils.eval_xpath(doc, invalid_function_xpath)
+
+ self.assertEqual(context.exception.message, 'Unregistered function')
+ self.assertEqual(context.exception.xpath_str, invalid_function_xpath)
+
+ def test_eval_xpath(self):
+ doc = html.fromstring(TestXPathUtils.TEST_DOC)
+
+ self.assertEqual(utils.eval_xpath(doc, '//p'), [])
+ self.assertEqual(utils.eval_xpath(doc, '//i/text()'), ['italic'])
+ self.assertEqual(utils.eval_xpath(doc, 'count(//i)'), 1.0)
+
+ def test_eval_xpath_list(self):
+ doc = html.fromstring(TestXPathUtils.TEST_DOC)
+
+ # check a not empty list
+ self.assertEqual(utils.eval_xpath_list(doc, '//i/text()'), ['italic'])
+
+ # check min_len parameter
+ with self.assertRaises(SearxEngineXPathException) as context:
+ utils.eval_xpath_list(doc, '//p', min_len=1)
+ self.assertEqual(context.exception.message, 'len(xpath_str) < 1')
+ self.assertEqual(context.exception.xpath_str, '//p')
+
+ def test_eval_xpath_getindex(self):
+ doc = html.fromstring(TestXPathUtils.TEST_DOC)
+
+ # check index 0
+ self.assertEqual(utils.eval_xpath_getindex(doc, '//i/text()', 0), 'italic')
+
+ # default is 'something'
+ self.assertEqual(utils.eval_xpath_getindex(doc, '//i/text()', 1, default='something'), 'something')
+
+ # default is None
+ self.assertEqual(utils.eval_xpath_getindex(doc, '//i/text()', 1, default=None), None)
+
+ # index not found
+ with self.assertRaises(SearxEngineXPathException) as context:
+ utils.eval_xpath_getindex(doc, '//i/text()', 1)
+ self.assertEqual(context.exception.message, 'index 1 not found')
+
+ # not a list
+ with self.assertRaises(SearxEngineXPathException) as context:
+ utils.eval_xpath_getindex(doc, 'count(//i)', 1)
+ self.assertEqual(context.exception.message, 'the result is not a list')