summaryrefslogtreecommitdiff
path: root/searx
diff options
context:
space:
mode:
authorLéon Tiekötter <leon@tiekoetter.com>2022-02-06 17:50:00 +0100
committerLéon Tiekötter <leon@tiekoetter.com>2022-02-06 22:16:06 +0100
commit560a14e77bcf804ab55ae1cc8c93ce0b2289cf2a (patch)
tree00e707854b8372247cf226691512d75af7010cf4 /searx
parent10e6881ca861cfce70e99ededf2c864a9274b5dd (diff)
downloadsearxng-560a14e77bcf804ab55ae1cc8c93ce0b2289cf2a.tar.gz
searxng-560a14e77bcf804ab55ae1cc8c93ce0b2289cf2a.zip
[fix] wikidata info box images
Wikidata info box images are now loaded from uploads.wikimedia.org instead of commons.wikimedia.org to prevent redirects Co-authored-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx')
-rw-r--r--searx/engines/wikidata.py33
1 files changed, 32 insertions, 1 deletions
diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py
index e5d3f55c0..ce500cda6 100644
--- a/searx/engines/wikidata.py
+++ b/searx/engines/wikidata.py
@@ -18,6 +18,7 @@ from searx.engines.wikipedia import ( # pylint: disable=unused-import
_fetch_supported_languages,
supported_languages_url,
)
+from hashlib import md5
# about
about = {
@@ -185,6 +186,36 @@ def response(resp):
return results
+_IMG_SRC_DEFAULT_URL_PREFIX = "https://commons.wikimedia.org/wiki/Special:FilePath/"
+_IMG_SRC_NEW_URL_PREFIX = "https://upload.wikimedia.org/wikipedia/commons/"
+
+
+def get_thumbnail(img_src):
+ """Get Thumbnail image from wikimedia commons
+
+ Images from commons.wikimedia.org are (HTTP) redirected to
+ upload.wikimedia.org. The redirected URL can be calculated by this
+ function.
+
+ - https://stackoverflow.com/a/33691240
+
+ """
+ logger.debug('get_thumbnail(): %s', img_src)
+ if not img_src is None and _IMG_SRC_DEFAULT_URL_PREFIX in img_src.split()[0]:
+ img_src_name = (
+ img_src.replace(_IMG_SRC_DEFAULT_URL_PREFIX, "")
+ .split("?", 1)[0]
+ .replace("%20", "_")
+ .replace("%28", "(")
+ .replace("%29", ")")
+ )
+ img_src_name_md5 = md5(img_src_name.encode("utf-8")).hexdigest()
+ img_src = _IMG_SRC_NEW_URL_PREFIX + img_src_name_md5[0] + "/" + img_src_name_md5[0:2] + "/" + img_src_name
+ logger.debug('get_thumbnail() redirected: %s', img_src)
+
+ return img_src
+
+
def get_results(attribute_result, attributes, language):
results = []
infobox_title = attribute_result.get('itemLabel')
@@ -221,7 +252,7 @@ def get_results(attribute_result, attributes, language):
# replace the current image only the priority is lower
# (the infobox contain only one image).
if attribute.priority < img_src_priority:
- img_src = value
+ img_src = get_thumbnail(value)
img_src_priority = attribute.priority
elif attribute_type == WDGeoAttribute:
# geocoordinate link