+ +

Source code for searx.engines.tineye

+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""This engine implements *Tineye - reverse image search*
+
+Using TinEye, you can search by image or perform what we call a reverse image
+search.  You can do that by uploading an image or searching by URL. You can also
+simply drag and drop your images to start your search.  TinEye constantly crawls
+the web and adds images to its index.  Today, the TinEye index is over 50.2
+billion images `[tineye.com] <https://tineye.com/how>`_.
+
+.. hint::
+
+   This SearXNG engine only supports *'searching by URL'* and it does not use
+   the official API `[api.tineye.com] <https://api.tineye.com/python/docs/>`_.
+
+"""
+
+from typing import TYPE_CHECKING
+from urllib.parse import urlencode
+from datetime import datetime
+from flask_babel import gettext
+
+if TYPE_CHECKING:
+    import logging
+
+    logger = logging.getLogger()
+
+about = {
+    "website": 'https://tineye.com',
+    "wikidata_id": 'Q2382535',
+    "official_api_documentation": 'https://api.tineye.com/python/docs/',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
+engine_type = 'online_url_search'
+""":py:obj:`searx.search.processors.online_url_search`"""
+
+categories = ['general']
+paging = True
+safesearch = False
+base_url = 'https://tineye.com'
+search_string = '/api/v1/result_json/?page={page}&{query}'
+
+FORMAT_NOT_SUPPORTED = gettext(
+    "Could not read that image url. This may be due to an unsupported file"
+    " format. TinEye only supports images that are JPEG, PNG, GIF, BMP, TIFF or WebP."
+)
+"""TinEye error message"""
+
+NO_SIGNATURE_ERROR = gettext(
+    "The image is too simple to find matches. TinEye requires a basic level of"
+    " visual detail to successfully identify matches."
+)
+"""TinEye error message"""
+
+DOWNLOAD_ERROR = gettext("The image could not be downloaded.")
+"""TinEye error message"""
+
+
+
+[docs] +def request(query, params): + """Build TinEye HTTP request using ``search_urls`` of a :py:obj:`engine_type`.""" + + params['raise_for_httperror'] = False + + if params['search_urls']['data:image']: + query = params['search_urls']['data:image'] + elif params['search_urls']['http']: + query = params['search_urls']['http'] + + logger.debug("query URL: %s", query) + query = urlencode({'url': query}) + + # see https://github.com/TinEye/pytineye/blob/main/pytineye/api.py + params['url'] = base_url + search_string.format(query=query, page=params['pageno']) + + params['headers'].update( + { + 'Connection': 'keep-alive', + 'Accept-Encoding': 'gzip, defalte, br', + 'Host': 'tineye.com', + 'DNT': '1', + 'TE': 'trailers', + } + ) + return params
+ + + +
+[docs] +def parse_tineye_match(match_json): + """Takes parsed JSON from the API server and turns it into a :py:obj:`dict` + object. + + Attributes `(class Match) <https://github.com/TinEye/pytineye/blob/main/pytineye/api.py>`__ + + - `image_url`, link to the result image. + - `domain`, domain this result was found on. + - `score`, a number (0 to 100) that indicates how closely the images match. + - `width`, image width in pixels. + - `height`, image height in pixels. + - `size`, image area in pixels. + - `format`, image format. + - `filesize`, image size in bytes. + - `overlay`, overlay URL. + - `tags`, whether this match belongs to a collection or stock domain. + + - `backlinks`, a list of Backlink objects pointing to the original websites + and image URLs. List items are instances of :py:obj:`dict`, (`Backlink + <https://github.com/TinEye/pytineye/blob/main/pytineye/api.py>`__): + + - `url`, the image URL to the image. + - `backlink`, the original website URL. + - `crawl_date`, the date the image was crawled. + + """ + + # HINT: there exists an alternative backlink dict in the domains list / e.g.:: + # + # match_json['domains'][0]['backlinks'] + + backlinks = [] + if "backlinks" in match_json: + + for backlink_json in match_json["backlinks"]: + if not isinstance(backlink_json, dict): + continue + + crawl_date = backlink_json.get("crawl_date") + if crawl_date: + crawl_date = datetime.strptime(crawl_date, '%Y-%m-%d') + else: + crawl_date = datetime.min + + backlinks.append( + { + 'url': backlink_json.get("url"), + 'backlink': backlink_json.get("backlink"), + 'crawl_date': crawl_date, + 'image_name': backlink_json.get("image_name"), + } + ) + + return { + 'image_url': match_json.get("image_url"), + 'domain': match_json.get("domain"), + 'score': match_json.get("score"), + 'width': match_json.get("width"), + 'height': match_json.get("height"), + 'size': match_json.get("size"), + 'image_format': match_json.get("format"), + 'filesize': match_json.get("filesize"), + 'overlay': match_json.get("overlay"), + 'tags': match_json.get("tags"), + 'backlinks': backlinks, + }
+ + + +
+[docs] +def response(resp): + """Parse HTTP response from TinEye.""" + + # handle the 422 client side errors, and the possible 400 status code error + if resp.status_code in (400, 422): + json_data = resp.json() + suggestions = json_data.get('suggestions', {}) + message = f'HTTP Status Code: {resp.status_code}' + + if resp.status_code == 422: + s_key = suggestions.get('key', '') + if s_key == "Invalid image URL": + # test https://docs.searxng.org/_static/searxng-wordmark.svg + message = FORMAT_NOT_SUPPORTED + elif s_key == 'NO_SIGNATURE_ERROR': + # test https://pngimg.com/uploads/dot/dot_PNG4.png + message = NO_SIGNATURE_ERROR + elif s_key == 'Download Error': + # test https://notexists + message = DOWNLOAD_ERROR + else: + logger.warning("Unknown suggestion key encountered: %s", s_key) + else: # 400 + description = suggestions.get('description') + if isinstance(description, list): + message = ','.join(description) + + # see https://github.com/searxng/searxng/pull/1456#issuecomment-1193105023 + # results.append({'answer': message}) + logger.error(message) + return [] + + # Raise for all other responses + resp.raise_for_status() + + results = [] + json_data = resp.json() + + for match_json in json_data['matches']: + + tineye_match = parse_tineye_match(match_json) + if not tineye_match['backlinks']: + continue + + backlink = tineye_match['backlinks'][0] + results.append( + { + 'template': 'images.html', + 'url': backlink['backlink'], + 'thumbnail_src': tineye_match['image_url'], + 'source': backlink['url'], + 'title': backlink['image_name'], + 'img_src': backlink['url'], + 'format': tineye_match['image_format'], + 'width': tineye_match['width'], + 'height': tineye_match['height'], + 'publishedDate': backlink['crawl_date'], + } + ) + + # append number of results + + number_of_results = json_data.get('num_matches') + if number_of_results: + results.append({'number_of_results': number_of_results}) + + return results
+ +
+ +
+