summaryrefslogtreecommitdiff
path: root/utils/fetch_wikidata_units.py
diff options
context:
space:
mode:
Diffstat (limited to 'utils/fetch_wikidata_units.py')
-rw-r--r--utils/fetch_wikidata_units.py43
1 files changed, 26 insertions, 17 deletions
diff --git a/utils/fetch_wikidata_units.py b/utils/fetch_wikidata_units.py
index 69505968e..69ae8ab27 100644
--- a/utils/fetch_wikidata_units.py
+++ b/utils/fetch_wikidata_units.py
@@ -12,31 +12,40 @@ from searx import searx_dir
from searx.engines.wikidata import send_wikidata_query
+# the response contains duplicate ?item with the different ?symbol
+# "ORDER BY ?item DESC(?rank) ?symbol" provides a deterministic result
+# even if a ?item has different ?symbol of the same rank.
+# A deterministic result
+# see:
+# * https://www.wikidata.org/wiki/Help:Ranking
+# * https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format ("Statement representation" section)
+# * https://w.wiki/32BT
+# see the result for https://www.wikidata.org/wiki/Q11582
+# there are multiple symbols the same rank
SARQL_REQUEST = """
-SELECT DISTINCT ?item ?symbol ?P2370 ?P2370Unit ?P2442 ?P2442Unit
+SELECT DISTINCT ?item ?symbol
WHERE
{
-?item wdt:P31/wdt:P279 wd:Q47574.
-?item wdt:P5061 ?symbol.
-FILTER(LANG(?symbol) = "en").
+ ?item wdt:P31/wdt:P279 wd:Q47574 .
+ ?item p:P5061 ?symbolP .
+ ?symbolP ps:P5061 ?symbol ;
+ wikibase:rank ?rank .
+ FILTER(LANG(?symbol) = "en").
}
-ORDER BY ?item
+ORDER BY ?item DESC(?rank) ?symbol
"""
def get_data():
- def get_key(unit):
- return unit['item']['value'].replace('http://www.wikidata.org/entity/', '')
-
- def get_value(unit):
- return unit['symbol']['value']
-
- result = send_wikidata_query(SARQL_REQUEST)
- if result is not None:
- # sort the unit by entity name
- # so different fetchs keep the file unchanged.
- list(result['results']['bindings']).sort(key=get_key)
- return collections.OrderedDict([(get_key(unit), get_value(unit)) for unit in result['results']['bindings']])
+ results = collections.OrderedDict()
+ response = send_wikidata_query(SARQL_REQUEST)
+ for unit in response['results']['bindings']:
+ name = unit['item']['value'].replace('http://www.wikidata.org/entity/', '')
+ unit = unit['symbol']['value']
+ if name not in results:
+ # ignore duplicate: always use the first one
+ results[name] = unit
+ return results
def get_wikidata_units_filename():