diff options
Diffstat (limited to 'tests/unit/engines/test_wikidata.py')
-rw-r--r-- | tests/unit/engines/test_wikidata.py | 514 |
1 files changed, 0 insertions, 514 deletions
diff --git a/tests/unit/engines/test_wikidata.py b/tests/unit/engines/test_wikidata.py deleted file mode 100644 index 48be17bb4..000000000 --- a/tests/unit/engines/test_wikidata.py +++ /dev/null @@ -1,514 +0,0 @@ -# -*- coding: utf-8 -*- -from lxml.html import fromstring -from lxml import etree -from collections import defaultdict -import mock -from searx.engines import wikidata -from searx.testing import SearxTestCase - - -class TestWikidataEngine(SearxTestCase): - - def test_request(self): - query = 'test_query' - dicto = defaultdict(dict) - dicto['language'] = 'all' - params = wikidata.request(query, dicto) - self.assertIn('url', params) - self.assertIn(query, params['url']) - self.assertIn('wikidata.org', params['url']) - - dicto['language'] = 'es_ES' - params = wikidata.request(query, dicto) - self.assertIn(query, params['url']) - - # successful cases are not tested here to avoid sending additional requests - def test_response(self): - self.assertRaises(AttributeError, wikidata.response, None) - self.assertRaises(AttributeError, wikidata.response, []) - self.assertRaises(AttributeError, wikidata.response, '') - self.assertRaises(AttributeError, wikidata.response, '[]') - - wikidata.supported_languages = ['en', 'es'] - wikidata.language_aliases = {} - response = mock.Mock(content='<html></html>'.encode("utf-8"), search_params={"language": "en"}) - self.assertEqual(wikidata.response(response), []) - - def test_getDetail(self): - response = {} - results = wikidata.getDetail(response, "Q123", "en", "en-US", etree.HTMLParser()) - self.assertEqual(results, []) - - title_html = '<div><div class="wikibase-title-label">Test</div></div>' - html = """ - <div> - <div class="wikibase-entitytermsview-heading-description"> - </div> - <div> - <ul class="wikibase-sitelinklistview-listview"> - <li data-wb-siteid="enwiki"><a href="http://en.wikipedia.org/wiki/Test">Test</a></li> - </ul> - </div> - </div> - """ - response = {"parse": {"displaytitle": title_html, "text": html}} - - results = wikidata.getDetail(response, "Q123", "en", "en-US", etree.HTMLParser()) - self.assertEqual(len(results), 1) - self.assertEqual(results[0]['url'], 'https://en.wikipedia.org/wiki/Test') - - title_html = """ - <div> - <div class="wikibase-title-label"> - <span lang="en">Test</span> - <sup class="wb-language-fallback-indicator">English</sup> - </div> - </div> - """ - html = """ - <div> - <div class="wikibase-entitytermsview-heading-description"> - <span lang="en">Description</span> - <sup class="wb-language-fallback-indicator">English</sup> - </div> - <div id="P856"> - <div class="wikibase-statementgroupview-property-label"> - <a href="/wiki/Property:P856"> - <span lang="en">official website</span> - <sup class="wb-language-fallback-indicator">English</sup> - </a> - </div> - <div class="wikibase-statementview-mainsnak"> - <a class="external free" href="https://officialsite.com"> - https://officialsite.com - </a> - </div> - </div> - <div> - <ul class="wikibase-sitelinklistview-listview"> - <li data-wb-siteid="enwiki"><a href="http://en.wikipedia.org/wiki/Test">Test</a></li> - </ul> - </div> - </div> - """ - response = {"parse": {"displaytitle": title_html, "text": html}} - - results = wikidata.getDetail(response, "Q123", "yua", "yua_MX", etree.HTMLParser()) - self.assertEqual(len(results), 2) - self.assertEqual(results[0]['title'], 'Official website') - self.assertEqual(results[0]['url'], 'https://officialsite.com') - - self.assertEqual(results[1]['infobox'], 'Test') - self.assertEqual(results[1]['id'], None) - self.assertEqual(results[1]['content'], 'Description') - self.assertEqual(results[1]['attributes'], []) - self.assertEqual(results[1]['urls'][0]['title'], 'Official website') - self.assertEqual(results[1]['urls'][0]['url'], 'https://officialsite.com') - self.assertEqual(results[1]['urls'][1]['title'], 'Wikipedia (en)') - self.assertEqual(results[1]['urls'][1]['url'], 'https://en.wikipedia.org/wiki/Test') - - def test_add_image(self): - image_src = wikidata.add_image(fromstring("<div></div>")) - self.assertEqual(image_src, None) - - html = u""" - <div> - <div id="P18"> - <div class="wikibase-statementgroupview-property-label"> - <a href="/wiki/Property:P18"> - image - </a> - </div> - <div class="wikibase-statementlistview"> - <div class="wikibase-statementview listview-item"> - <div class="wikibase-statementview-rankselector"> - <span class="wikibase-rankselector-normal"></span> - </div> - <div class="wikibase-statementview-mainsnak"> - <div> - <div class="wikibase-snakview-value"> - <div class="commons-media-caption"> - <a href="https://commons.wikimedia.org/wiki/File:image.png">image.png</a> - <br/>2,687 × 3,356; 1.22 MB - </div> - </div> - </div> - </div> - </div> - </div> - </div> - </div> - """ - html_etree = fromstring(html) - id_cache = wikidata.get_id_cache(html_etree) - image_src = wikidata.add_image(id_cache) - self.assertEqual(image_src, - "https://commons.wikimedia.org/wiki/Special:FilePath/image.png?width=500&height=400") - - html = u""" - <div> - <div id="P2910"> - <div class="wikibase-statementgroupview-property-label"> - <a href="/wiki/Property:P2910"> - icon - </a> - </div> - <div class="wikibase-statementlistview"> - <div class="wikibase-statementview listview-item"> - <div class="wikibase-statementview-rankselector"> - <span class="wikibase-rankselector-normal"></span> - </div> - <div class="wikibase-statementview-mainsnak"> - <div> - <div class="wikibase-snakview-value"> - <div class="commons-media-caption"> - <a href="https://commons.wikimedia.org/wiki/File:icon.png">icon.png</a> - <br/>671 × 671; 18 KB</div> - </div> - </div> - </div> - </div> - </div> - </div> - </div> - <div id="P154"> - <div class="wikibase-statementgroupview-property-label"> - <a href="/wiki/Property:P154"> - logo - </a> - </div> - <div class="wikibase-statementlistview"> - <div class="wikibase-statementview listview-item"> - <div class="wikibase-statementview-rankselector"> - <span class="wikibase-rankselector-normal"></span> - </div> - <div class="wikibase-statementview-mainsnak"> - <div> - <div class="wikibase-snakview-value"> - <div class="commons-media-caption"> - <a href="https://commons.wikimedia.org/wiki/File:logo.png">logo.png</a> - <br/>170 × 170; 1 KB - </div> - </div> - </div> - </div> - </div> - </div> - </div> - </div> - """ - html_etree = fromstring(html) - id_cache = wikidata.get_id_cache(html_etree) - - image_src = wikidata.add_image(id_cache) - self.assertEqual(image_src, - "https://commons.wikimedia.org/wiki/Special:FilePath/logo.png?width=500&height=400") - - def test_add_attribute(self): - html = u""" - <div> - <div id="P27"> - <div class="wikibase-statementgroupview-property-label"> - <a href="/wiki/Property:P27"> - country of citizenship - </a> - </div> - <div class="wikibase-statementlistview"> - <div class="wikibase-statementview listview-item"> - <div class="wikibase-statementview-rankselector"> - <span class="wikibase-rankselector-normal"></span> - </div> - <div class="wikibase-statementview-mainsnak"> - <div> - <div class="wikibase-snakview-value"> - <a href="/wiki/Q145"> - United Kingdom - </a> - </div> - </div> - </div> - </div> - </div> - </div> - </div> - """ - attributes = [] - html_etree = fromstring(html) - id_cache = wikidata.get_id_cache(html_etree) - - wikidata.add_attribute(attributes, id_cache, "Fail") - self.assertEqual(attributes, []) - - wikidata.add_attribute(attributes, id_cache, "P27") - self.assertEqual(len(attributes), 1) - self.assertEqual(attributes[0]["label"], "Country of citizenship") - self.assertEqual(attributes[0]["value"], "United Kingdom") - - html = u""" - <div> - <div id="P569"> - <div class="wikibase-statementgroupview-property-label"> - <a href="/wiki/Property:P569"> - date of birth - </a> - </div> - <div class="wikibase-statementlistview"> - <div class="wikibase-statementview listview-item"> - <div class="wikibase-statementview-rankselector"> - <span class="wikibase-rankselector-normal"></span> - </div> - <div class="wikibase-statementview-mainsnak"> - <div> - <div class="wikibase-snakview-value"> - 27 January 1832 - <sup class="wb-calendar-name"> - Gregorian - </sup> - </div> - </div> - </div> - </div> - </div> - </div> - </div> - """ - attributes = [] - html_etree = fromstring(html) - id_cache = wikidata.get_id_cache(html_etree) - wikidata.add_attribute(attributes, id_cache, "P569", date=True) - self.assertEqual(len(attributes), 1) - self.assertEqual(attributes[0]["label"], "Date of birth") - self.assertEqual(attributes[0]["value"], "27 January 1832") - - html = u""" - <div> - <div id="P6"> - <div class="wikibase-statementgroupview-property-label"> - <a href="/wiki/Property:P27"> - head of government - </a> - </div> - <div class="wikibase-statementlistview"> - <div class="wikibase-statementview listview-item"> - <div class="wikibase-statementview-rankselector"> - <span class="wikibase-rankselector-normal"></span> - </div> - <div class="wikibase-statementview-mainsnak"> - <div> - <div class="wikibase-snakview-value"> - <a href="/wiki/Q206"> - Old Prime Minister - </a> - </div> - </div> - </div> - </div> - <div class="wikibase-statementview listview-item"> - <div class="wikibase-statementview-rankselector"> - <span class="wikibase-rankselector-preferred"></span> - </div> - <div class="wikibase-statementview-mainsnak"> - <div> - <div class="wikibase-snakview-value"> - <a href="/wiki/Q3099714"> - Actual Prime Minister - </a> - </div> - </div> - </div> - </div> - </div> - </div> - </div> - """ - attributes = [] - html_etree = fromstring(html) - id_cache = wikidata.get_id_cache(html_etree) - wikidata.add_attribute(attributes, id_cache, "P6") - self.assertEqual(len(attributes), 1) - self.assertEqual(attributes[0]["label"], "Head of government") - self.assertEqual(attributes[0]["value"], "Old Prime Minister, Actual Prime Minister") - - attributes = [] - html_etree = fromstring(html) - id_cache = wikidata.get_id_cache(html_etree) - wikidata.add_attribute(attributes, id_cache, "P6", trim=True) - self.assertEqual(len(attributes), 1) - self.assertEqual(attributes[0]["value"], "Actual Prime Minister") - - def test_add_url(self): - html = u""" - <div> - <div id="P856"> - <div class="wikibase-statementgroupview-property-label"> - <a href="/wiki/Property:P856"> - official website - </a> - </div> - <div class="wikibase-statementlistview"> - <div class="wikibase-statementview listview-item"> - <div class="wikibase-statementview-mainsnak"> - <div> - <div class="wikibase-snakview-value"> - <a class="external free" href="https://searx.me"> - https://searx.me/ - </a> - </div> - </div> - </div> - </div> - </div> - </div> - </div> - """ - urls = [] - html_etree = fromstring(html) - id_cache = wikidata.get_id_cache(html_etree) - wikidata.add_url(urls, html_etree, id_cache, 'P856') - self.assertEquals(len(urls), 1) - self.assertIn({'title': 'Official website', 'url': 'https://searx.me/'}, urls) - urls = [] - results = [] - wikidata.add_url(urls, html_etree, id_cache, 'P856', 'custom label', results=results) - self.assertEquals(len(urls), 1) - self.assertEquals(len(results), 1) - self.assertIn({'title': 'custom label', 'url': 'https://searx.me/'}, urls) - self.assertIn({'title': 'custom label', 'url': 'https://searx.me/'}, results) - - html = u""" - <div> - <div id="P856"> - <div class="wikibase-statementgroupview-property-label"> - <a href="/wiki/Property:P856"> - official website - </a> - </div> - <div class="wikibase-statementlistview"> - <div class="wikibase-statementview listview-item"> - <div class="wikibase-statementview-mainsnak"> - <div> - <div class="wikibase-snakview-value"> - <a class="external free" href="http://www.worldofwarcraft.com"> - http://www.worldofwarcraft.com - </a> - </div> - </div> - </div> - </div> - <div class="wikibase-statementview listview-item"> - <div class="wikibase-statementview-mainsnak"> - <div> - <div class="wikibase-snakview-value"> - <a class="external free" href="http://eu.battle.net/wow/en/"> - http://eu.battle.net/wow/en/ - </a> - </div> - </div> - </div> - </div> - </div> - </div> - </div> - """ - urls = [] - html_etree = fromstring(html) - id_cache = wikidata.get_id_cache(html_etree) - wikidata.add_url(urls, html_etree, id_cache, 'P856') - self.assertEquals(len(urls), 2) - self.assertIn({'title': 'Official website', 'url': 'http://www.worldofwarcraft.com'}, urls) - self.assertIn({'title': 'Official website', 'url': 'http://eu.battle.net/wow/en/'}, urls) - - def test_get_imdblink(self): - html = u""" - <div> - <div class="wikibase-statementview-mainsnak"> - <div> - <div class="wikibase-snakview-value"> - <a class="wb-external-id" href="http://www.imdb.com/tt0433664"> - tt0433664 - </a> - </div> - </div> - </div> - </div> - """ - html_etree = fromstring(html) - imdblink = wikidata.get_imdblink(html_etree, 'https://www.imdb.com/') - - html = u""" - <div> - <div class="wikibase-statementview-mainsnak"> - <div> - <div class="wikibase-snakview-value"> - <a class="wb-external-id" - href="href="http://tools.wmflabs.org/...http://www.imdb.com/&id=nm4915994""> - nm4915994 - </a> - </div> - </div> - </div> - </div> - """ - html_etree = fromstring(html) - imdblink = wikidata.get_imdblink(html_etree, 'https://www.imdb.com/') - self.assertIn('https://www.imdb.com/name/nm4915994', imdblink) - - def test_get_geolink(self): - html = u""" - <div> - <div class="wikibase-statementview-mainsnak"> - <div> - <div class="wikibase-snakview-value"> - 60°N, 40°E - </div> - </div> - </div> - </div> - """ - html_etree = fromstring(html) - geolink = wikidata.get_geolink(html_etree) - self.assertIn('https://www.openstreetmap.org/', geolink) - self.assertIn('lat=60&lon=40', geolink) - - html = u""" - <div> - <div class="wikibase-statementview-mainsnak"> - <div> - <div class="wikibase-snakview-value"> - 34°35'59"S, 58°22'55"W - </div> - </div> - </div> - </div> - """ - html_etree = fromstring(html) - geolink = wikidata.get_geolink(html_etree) - self.assertIn('https://www.openstreetmap.org/', geolink) - self.assertIn('lat=-34.59', geolink) - self.assertIn('lon=-58.38', geolink) - - def test_get_wikilink(self): - html = """ - <div> - <div> - <ul class="wikibase-sitelinklistview-listview"> - <li data-wb-siteid="arwiki"><a href="http://ar.wikipedia.org/wiki/Test">Test</a></li> - <li data-wb-siteid="enwiki"><a href="http://en.wikipedia.org/wiki/Test">Test</a></li> - </ul> - </div> - <div> - <ul class="wikibase-sitelinklistview-listview"> - <li data-wb-siteid="enwikiquote"><a href="https://en.wikiquote.org/wiki/Test">Test</a></li> - </ul> - </div> - </div> - """ - html_etree = fromstring(html) - wikilink = wikidata.get_wikilink(html_etree, 'nowiki') - self.assertEqual(wikilink, None) - wikilink = wikidata.get_wikilink(html_etree, 'enwiki') - self.assertEqual(wikilink, 'https://en.wikipedia.org/wiki/Test') - wikilink = wikidata.get_wikilink(html_etree, 'arwiki') - self.assertEqual(wikilink, 'https://ar.wikipedia.org/wiki/Test') - wikilink = wikidata.get_wikilink(html_etree, 'enwikiquote') - self.assertEqual(wikilink, 'https://en.wikiquote.org/wiki/Test') |