diff options
author | Marc Abonce Seguin <marc-abonce@mailbox.org> | 2018-04-08 21:17:00 -0500 |
---|---|---|
committer | Marc Abonce Seguin <marc-abonce@mailbox.org> | 2018-04-08 21:17:00 -0500 |
commit | b12857a70dd947a804e667d864ba56055b528ee0 (patch) | |
tree | 2f52c16c1cc3f40d8b1c1d747a6126956625f204 /searx/engines/wikidata.py | |
parent | 835d1edd5834c3c8117dc4614cb0b0b4316d3153 (diff) | |
download | searxng-b12857a70dd947a804e667d864ba56055b528ee0.tar.gz searxng-b12857a70dd947a804e667d864ba56055b528ee0.zip |
[fix] make search requests on wikidata more accurate
Diffstat (limited to 'searx/engines/wikidata.py')
-rw-r--r-- | searx/engines/wikidata.py | 13 |
1 files changed, 6 insertions, 7 deletions
diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index 1fdbc9869..fe53609c1 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -27,7 +27,7 @@ result_count = 1 # urls wikidata_host = 'https://www.wikidata.org' url_search = wikidata_host \ - + '/wiki/Special:ItemDisambiguation?{query}' + + '/w/index.php?{query}' wikidata_api = wikidata_host + '/w/api.php' url_detail = wikidata_api\ @@ -40,7 +40,7 @@ url_map = 'https://www.openstreetmap.org/'\ url_image = 'https://commons.wikimedia.org/wiki/Special:FilePath/{filename}?width=500&height=400' # xpaths -wikidata_ids_xpath = '//div/ul[@class="wikibase-disambiguation"]/li/a/@title' +wikidata_ids_xpath = '//ul[@class="mw-search-results"]/li//a/@href' title_xpath = '//*[contains(@class,"wikibase-title-label")]' description_xpath = '//div[contains(@class,"wikibase-entitytermsview-heading-description")]' property_xpath = '//div[@id="{propertyid}"]' @@ -57,22 +57,21 @@ calendar_name_xpath = './/sup[contains(@class,"wb-calendar-name")]' def request(query, params): - language = match_language(params['language'], supported_languages).split('-')[0] - params['url'] = url_search.format( - query=urlencode({'label': query, 'language': language})) + query=urlencode({'search': query})) return params def response(resp): results = [] html = fromstring(resp.text) - wikidata_ids = html.xpath(wikidata_ids_xpath) + search_results = html.xpath(wikidata_ids_xpath) language = match_language(resp.search_params['language'], supported_languages).split('-')[0] # TODO: make requests asynchronous to avoid timeout when result_count > 1 - for wikidata_id in wikidata_ids[:result_count]: + for search_result in search_results[:result_count]: + wikidata_id = search_result.split('/')[-1] url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language})) htmlresponse = get(url) jsonresponse = loads(htmlresponse.text) |