From 270ad188978fbc7c16e61ea0b997687ccfa0e3ee Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Thu, 30 Mar 2023 21:04:53 +0200 Subject: [fix] engine flickr: adapt to the new data model from flicker's response Closes: https://github.com/searxng/searxng/issues/1879 Signed-off-by: Markus Heiser --- searx/engines/flickr_noapi.py | 75 ++++++++++++++++++++++++++----------------- 1 file changed, 46 insertions(+), 29 deletions(-) diff --git a/searx/engines/flickr_noapi.py b/searx/engines/flickr_noapi.py index 4ff59fc52..5299c604f 100644 --- a/searx/engines/flickr_noapi.py +++ b/searx/engines/flickr_noapi.py @@ -1,14 +1,22 @@ # SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Flickr (Images) + """ - Flickr (Images) -""" -from json import loads +from typing import TYPE_CHECKING + +import json from time import time import re from urllib.parse import urlencode from searx.utils import ecma_unescape, html_to_text +if TYPE_CHECKING: + import logging + + logger: logging.Logger + # about about = { "website": 'https://www.flickr.com', @@ -19,23 +27,24 @@ about = { "results": 'HTML', } +# engine dependent config categories = ['images'] - -url = 'https://www.flickr.com/' -search_url = url + 'search?{query}&page={page}' -time_range_url = '&min_upload_date={start}&max_upload_date={end}' -photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}' -modelexport_re = re.compile(r"^\s*modelExport:\s*({.*}),$", re.M) -image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'n', 'm', 't', 'q', 's') - paging = True time_range_support = True +safesearch = False + time_range_dict = { 'day': 60 * 60 * 24, 'week': 60 * 60 * 24 * 7, 'month': 60 * 60 * 24 * 7 * 4, 'year': 60 * 60 * 24 * 7 * 52, } +image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'm', 'n', 't', 'q', 's') + +search_url = 'https://www.flickr.com/search?{query}&page={page}' +time_range_url = '&min_upload_date={start}&max_upload_date={end}' +photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}' +modelexport_re = re.compile(r"^\s*modelExport:\s*({.*}),$", re.M) def build_flickr_url(user_id, photo_id): @@ -55,51 +64,59 @@ def request(query, params): return params -def response(resp): +def response(resp): # pylint: disable=too-many-branches results = [] matches = modelexport_re.search(resp.text) - if matches is None: return results match = matches.group(1) - model_export = loads(match) + model_export = json.loads(match) if 'legend' not in model_export: return results - legend = model_export['legend'] # handle empty page if not legend or not legend[0]: return results - for index in legend: - photo = model_export['main'][index[0]][int(index[1])][index[2]][index[3]][int(index[4])] + for x, index in enumerate(legend): + if len(index) != 8: + logger.debug("skip legend enty %s : %s", x, index) + continue + + photo = model_export['main'][index[0]][int(index[1])][index[2]][index[3]][index[4]][index[5]][int(index[6])][ + index[7] + ] author = ecma_unescape(photo.get('realname', '')) - source = ecma_unescape(photo.get('username', '')) + ' @ Flickr' + source = ecma_unescape(photo.get('username', '')) + if source: + source += ' @ Flickr' title = ecma_unescape(photo.get('title', '')) content = html_to_text(ecma_unescape(photo.get('description', ''))) img_src = None + # From the biggest to the lowest format + size_data = None for image_size in image_sizes: - if image_size in photo['sizes']: - img_src = photo['sizes'][image_size]['url'] - img_format = ( - 'jpg ' + str(photo['sizes'][image_size]['width']) + 'x' + str(photo['sizes'][image_size]['height']) - ) + if image_size in photo['sizes']['data']: + size_data = photo['sizes']['data'][image_size]['data'] break - if not img_src: - logger.debug('cannot find valid image size: {0}'.format(repr(photo))) + if not size_data: + logger.debug('cannot find valid image size: {0}'.format(repr(photo['sizes']['data']))) continue + img_src = size_data['url'] + img_format = f"{size_data['width']} x {size_data['height']}" + # For a bigger thumbnail, keep only the url_z, not the url_n - if 'n' in photo['sizes']: - thumbnail_src = photo['sizes']['n']['url'] - elif 'z' in photo['sizes']: - thumbnail_src = photo['sizes']['z']['url'] + if 'n' in photo['sizes']['data']: + thumbnail_src = photo['sizes']['data']['n']['data']['url'] + elif 'z' in photo['sizes']['data']: + thumbnail_src = photo['sizes']['data']['z']['data']['url'] else: thumbnail_src = img_src -- cgit v1.2.3-54-g00ecf