diff options
Diffstat (limited to 'searx/engines/wikidata.py')
-rw-r--r-- | searx/engines/wikidata.py | 243 |
1 files changed, 126 insertions, 117 deletions
diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index 59413499c..c8881d299 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -14,7 +14,10 @@ from searx.data import WIKIDATA_UNITS from searx.network import post, get from searx.utils import match_language, searx_useragent, get_string_replaces_function from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom -from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import +from searx.engines.wikipedia import ( + _fetch_supported_languages, + supported_languages_url, +) # NOQA # pylint: disable=unused-import # about about = { @@ -112,10 +115,7 @@ replace_http_by_https = get_string_replaces_function({'http:': 'https:'}) def get_headers(): # user agent: https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual#Query_limits - return { - 'Accept': 'application/sparql-results+json', - 'User-Agent': searx_useragent() - } + return {'Accept': 'application/sparql-results+json', 'User-Agent': searx_useragent()} def get_label_for_entity(entity_id, language): @@ -211,9 +211,9 @@ def get_results(attribute_result, attributes, language): results.append({'title': infobox_title, 'url': url}) # update the infobox_id with the wikipedia URL # first the local wikipedia URL, and as fallback the english wikipedia URL - if attribute_type == WDArticle\ - and ((attribute.language == 'en' and infobox_id_lang is None) - or attribute.language != 'en'): + if attribute_type == WDArticle and ( + (attribute.language == 'en' and infobox_id_lang is None) or attribute.language != 'en' + ): infobox_id_lang = attribute.language infobox_id = url elif attribute_type == WDImageAttribute: @@ -232,13 +232,11 @@ def get_results(attribute_result, attributes, language): osm_zoom = area_to_osm_zoom(area) if area else 19 url = attribute.get_geo_url(attribute_result, osm_zoom=osm_zoom) if url: - infobox_urls.append({'title': attribute.get_label(language), - 'url': url, - 'entity': attribute.name}) + infobox_urls.append({'title': attribute.get_label(language), 'url': url, 'entity': attribute.name}) else: - infobox_attributes.append({'label': attribute.get_label(language), - 'value': value, - 'entity': attribute.name}) + infobox_attributes.append( + {'label': attribute.get_label(language), 'value': value, 'entity': attribute.name} + ) if infobox_id: infobox_id = replace_http_by_https(infobox_id) @@ -246,22 +244,19 @@ def get_results(attribute_result, attributes, language): # add the wikidata URL at the end infobox_urls.append({'title': 'Wikidata', 'url': attribute_result['item']}) - if img_src is None and len(infobox_attributes) == 0 and len(infobox_urls) == 1 and\ - len(infobox_content) == 0: - results.append({ - 'url': infobox_urls[0]['url'], - 'title': infobox_title, - 'content': infobox_content - }) + if img_src is None and len(infobox_attributes) == 0 and len(infobox_urls) == 1 and len(infobox_content) == 0: + results.append({'url': infobox_urls[0]['url'], 'title': infobox_title, 'content': infobox_content}) else: - results.append({ - 'infobox': infobox_title, - 'id': infobox_id, - 'content': infobox_content, - 'img_src': img_src, - 'urls': infobox_urls, - 'attributes': infobox_attributes - }) + results.append( + { + 'infobox': infobox_title, + 'id': infobox_id, + 'content': infobox_content, + 'img_src': img_src, + 'urls': infobox_urls, + 'attributes': infobox_attributes, + } + ) return results @@ -271,13 +266,14 @@ def get_query(query, language): where = list(filter(lambda s: len(s) > 0, [a.get_where() for a in attributes])) wikibase_label = list(filter(lambda s: len(s) > 0, [a.get_wikibase_label() for a in attributes])) group_by = list(filter(lambda s: len(s) > 0, [a.get_group_by() for a in attributes])) - query = QUERY_TEMPLATE\ - .replace('%QUERY%', sparql_string_escape(query))\ - .replace('%SELECT%', ' '.join(select))\ - .replace('%WHERE%', '\n '.join(where))\ - .replace('%WIKIBASE_LABELS%', '\n '.join(wikibase_label))\ - .replace('%GROUP_BY%', ' '.join(group_by))\ + query = ( + QUERY_TEMPLATE.replace('%QUERY%', sparql_string_escape(query)) + .replace('%SELECT%', ' '.join(select)) + .replace('%WHERE%', '\n '.join(where)) + .replace('%WIKIBASE_LABELS%', '\n '.join(wikibase_label)) + .replace('%GROUP_BY%', ' '.join(group_by)) .replace('%LANGUAGE%', language) + ) return query, attributes @@ -303,90 +299,98 @@ def get_attributes(language): attributes.append(WDDateAttribute(name)) # Dates - for p in ['P571', # inception date - 'P576', # dissolution date - 'P580', # start date - 'P582', # end date - 'P569', # date of birth - 'P570', # date of death - 'P619', # date of spacecraft launch - 'P620']: # date of spacecraft landing + for p in [ + 'P571', # inception date + 'P576', # dissolution date + 'P580', # start date + 'P582', # end date + 'P569', # date of birth + 'P570', # date of death + 'P619', # date of spacecraft launch + 'P620', + ]: # date of spacecraft landing add_date(p) - for p in ['P27', # country of citizenship - 'P495', # country of origin - 'P17', # country - 'P159']: # headquarters location + for p in [ + 'P27', # country of citizenship + 'P495', # country of origin + 'P17', # country + 'P159', + ]: # headquarters location add_label(p) # Places - for p in ['P36', # capital - 'P35', # head of state - 'P6', # head of government - 'P122', # basic form of government - 'P37']: # official language + for p in [ + 'P36', # capital + 'P35', # head of state + 'P6', # head of government + 'P122', # basic form of government + 'P37', + ]: # official language add_label(p) - add_value('P1082') # population + add_value('P1082') # population add_amount('P2046') # area - add_amount('P281') # postal code - add_label('P38') # currency + add_amount('P281') # postal code + add_label('P38') # currency add_amount('P2048') # heigth (building) # Media - for p in ['P400', # platform (videogames, computing) - 'P50', # author - 'P170', # creator - 'P57', # director - 'P175', # performer - 'P178', # developer - 'P162', # producer - 'P176', # manufacturer - 'P58', # screenwriter - 'P272', # production company - 'P264', # record label - 'P123', # publisher - 'P449', # original network - 'P750', # distributed by - 'P86']: # composer + for p in [ + 'P400', # platform (videogames, computing) + 'P50', # author + 'P170', # creator + 'P57', # director + 'P175', # performer + 'P178', # developer + 'P162', # producer + 'P176', # manufacturer + 'P58', # screenwriter + 'P272', # production company + 'P264', # record label + 'P123', # publisher + 'P449', # original network + 'P750', # distributed by + 'P86', + ]: # composer add_label(p) - add_date('P577') # publication date - add_label('P136') # genre (music, film, artistic...) - add_label('P364') # original language - add_value('P212') # ISBN-13 - add_value('P957') # ISBN-10 - add_label('P275') # copyright license - add_label('P277') # programming language - add_value('P348') # version - add_label('P840') # narrative location + add_date('P577') # publication date + add_label('P136') # genre (music, film, artistic...) + add_label('P364') # original language + add_value('P212') # ISBN-13 + add_value('P957') # ISBN-10 + add_label('P275') # copyright license + add_label('P277') # programming language + add_value('P348') # version + add_label('P840') # narrative location # Languages - add_value('P1098') # number of speakers - add_label('P282') # writing system - add_label('P1018') # language regulatory body - add_value('P218') # language code (ISO 639-1) + add_value('P1098') # number of speakers + add_label('P282') # writing system + add_label('P1018') # language regulatory body + add_value('P218') # language code (ISO 639-1) # Other - add_label('P169') # ceo - add_label('P112') # founded by - add_label('P1454') # legal form (company, organization) - add_label('P137') # operator (service, facility, ...) - add_label('P1029') # crew members (tripulation) - add_label('P225') # taxon name - add_value('P274') # chemical formula - add_label('P1346') # winner (sports, contests, ...) - add_value('P1120') # number of deaths - add_value('P498') # currency code (ISO 4217) + add_label('P169') # ceo + add_label('P112') # founded by + add_label('P1454') # legal form (company, organization) + add_label('P137') # operator (service, facility, ...) + add_label('P1029') # crew members (tripulation) + add_label('P225') # taxon name + add_value('P274') # chemical formula + add_label('P1346') # winner (sports, contests, ...) + add_value('P1120') # number of deaths + add_value('P498') # currency code (ISO 4217) # URL - add_url('P856', official=True) # official website + add_url('P856', official=True) # official website attributes.append(WDArticle(language)) # wikipedia (user language) if not language.startswith('en'): attributes.append(WDArticle('en')) # wikipedia (english) - add_url('P1324') # source code repository - add_url('P1581') # blog + add_url('P1324') # source code repository + add_url('P1581') # blog add_url('P434', url_id='musicbrainz_artist') add_url('P435', url_id='musicbrainz_work') add_url('P436', url_id='musicbrainz_release_group') @@ -402,11 +406,11 @@ def get_attributes(language): attributes.append(WDGeoAttribute('P625')) # Image - add_image('P15', priority=1, url_id='wikimedia_image') # route map - add_image('P242', priority=2, url_id='wikimedia_image') # locator map - add_image('P154', priority=3, url_id='wikimedia_image') # logo - add_image('P18', priority=4, url_id='wikimedia_image') # image - add_image('P41', priority=5, url_id='wikimedia_image') # flag + add_image('P15', priority=1, url_id='wikimedia_image') # route map + add_image('P242', priority=2, url_id='wikimedia_image') # locator map + add_image('P154', priority=3, url_id='wikimedia_image') # logo + add_image('P18', priority=4, url_id='wikimedia_image') # image + add_image('P41', priority=5, url_id='wikimedia_image') # flag add_image('P2716', priority=6, url_id='wikimedia_image') # collage add_image('P2910', priority=7, url_id='wikimedia_image') # icon @@ -415,7 +419,7 @@ def get_attributes(language): class WDAttribute: - __slots__ = 'name', + __slots__ = ('name',) def __init__(self, name): self.name = name @@ -443,14 +447,15 @@ class WDAttribute: class WDAmountAttribute(WDAttribute): - def get_select(self): return '?{name} ?{name}Unit'.replace('{name}', self.name) def get_where(self): return """ OPTIONAL { ?item p:{name} ?{name}Node . ?{name}Node rdf:type wikibase:BestRank ; ps:{name} ?{name} . - OPTIONAL { ?{name}Node psv:{name}/wikibase:quantityUnit ?{name}Unit. } }""".replace('{name}', self.name) + OPTIONAL { ?{name}Node psv:{name}/wikibase:quantityUnit ?{name}Unit. } }""".replace( + '{name}', self.name + ) def get_group_by(self): return self.get_select() @@ -484,7 +489,9 @@ class WDArticle(WDAttribute): return """OPTIONAL { ?article{language} schema:about ?item ; schema:inLanguage "{language}" ; schema:isPartOf <https://{language}.wikipedia.org/> ; - schema:name ?articleName{language} . }""".replace('{language}', self.language) + schema:name ?articleName{language} . }""".replace( + '{language}', self.language + ) def get_group_by(self): return self.get_select() @@ -495,7 +502,6 @@ class WDArticle(WDAttribute): class WDLabelAttribute(WDAttribute): - def get_select(self): return '(group_concat(distinct ?{name}Label;separator=", ") as ?{name}Labels)'.replace('{name}', self.name) @@ -526,14 +532,13 @@ class WDURLAttribute(WDAttribute): value = value.split(',')[0] url_id = self.url_id if value.startswith(WDURLAttribute.HTTP_WIKIMEDIA_IMAGE): - value = value[len(WDURLAttribute.HTTP_WIKIMEDIA_IMAGE):] + value = value[len(WDURLAttribute.HTTP_WIKIMEDIA_IMAGE) :] url_id = 'wikimedia_image' return get_external_url(url_id, value) return value class WDGeoAttribute(WDAttribute): - def get_label(self, language): return "OpenStreetMap" @@ -543,7 +548,9 @@ class WDGeoAttribute(WDAttribute): def get_where(self): return """OPTIONAL { ?item p:{name}/psv:{name} [ wikibase:geoLatitude ?{name}Lat ; - wikibase:geoLongitude ?{name}Long ] }""".replace('{name}', self.name) + wikibase:geoLongitude ?{name}Long ] }""".replace( + '{name}', self.name + ) def get_group_by(self): return self.get_select() @@ -565,7 +572,7 @@ class WDGeoAttribute(WDAttribute): class WDImageAttribute(WDURLAttribute): - __slots__ = 'priority', + __slots__ = ('priority',) def __init__(self, name, url_id=None, priority=100): super().__init__(name, url_id) @@ -573,7 +580,6 @@ class WDImageAttribute(WDURLAttribute): class WDDateAttribute(WDAttribute): - def get_select(self): return '?{name} ?{name}timePrecision ?{name}timeZone ?{name}timeCalendar'.replace('{name}', self.name) @@ -587,7 +593,9 @@ class WDDateAttribute(WDAttribute): wikibase:timePrecision ?{name}timePrecision ; wikibase:timeTimezone ?{name}timeZone ; wikibase:timeCalendarModel ?{name}timeCalendar ] . } - hint:Prior hint:rangeSafe true;""".replace('{name}', self.name) + hint:Prior hint:rangeSafe true;""".replace( + '{name}', self.name + ) def get_group_by(self): return self.get_select() @@ -619,11 +627,12 @@ class WDDateAttribute(WDAttribute): def format_13(self, value, locale): timestamp = isoparse(value) # precision: minute - return get_datetime_format(format, locale=locale) \ - .replace("'", "") \ - .replace('{0}', format_time(timestamp, 'full', tzinfo=None, - locale=locale)) \ + return ( + get_datetime_format(format, locale=locale) + .replace("'", "") + .replace('{0}', format_time(timestamp, 'full', tzinfo=None, locale=locale)) .replace('{1}', format_date(timestamp, 'short', locale=locale)) + ) def format_14(self, value, locale): # precision: second. @@ -644,7 +653,7 @@ class WDDateAttribute(WDAttribute): '11': ('format_11', 0), # day '12': ('format_13', 0), # hour (not supported by babel, display minute) '13': ('format_13', 0), # minute - '14': ('format_14', 0) # second + '14': ('format_14', 0), # second } def get_str(self, result, language): |