diff options
author | Alexandre Flament <alex@al-f.net> | 2021-08-25 11:24:27 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-08-25 11:24:27 +0200 |
commit | 38e0b9360b6a04908c6b8cdbdbd7a15cbdaf19b1 (patch) | |
tree | 7c156a017b837e49ecb77ca9886d89e20ef7c069 /searx | |
parent | dec9df28f3e37c5793e53898dd63419660230b4e (diff) | |
parent | 697ebeddcc6066e5c5f316c9ef2baf8d3769a868 (diff) | |
download | searxng-38e0b9360b6a04908c6b8cdbdbd7a15cbdaf19b1.tar.gz searxng-38e0b9360b6a04908c6b8cdbdbd7a15cbdaf19b1.zip |
Merge pull request #257 from dalf/mod_image_proxy
Mod image proxy
Diffstat (limited to 'searx')
-rw-r--r-- | searx/network/__init__.py | 27 | ||||
-rw-r--r-- | searx/network/network.py | 8 | ||||
-rw-r--r-- | searx/utils.py | 6 | ||||
-rwxr-xr-x | searx/webapp.py | 55 |
4 files changed, 66 insertions, 30 deletions
diff --git a/searx/network/__init__.py b/searx/network/__init__.py index c921bdecb..9e80a30a1 100644 --- a/searx/network/__init__.py +++ b/searx/network/__init__.py @@ -5,6 +5,7 @@ import asyncio import threading import concurrent.futures +from types import MethodType from timeit import default_timer import httpx @@ -161,19 +162,32 @@ def patch(url, data=None, **kwargs): def delete(url, **kwargs): return request('delete', url, **kwargs) + async def stream_chunk_to_queue(network, queue, method, url, **kwargs): try: async with network.stream(method, url, **kwargs) as response: queue.put(response) - async for chunk in response.aiter_bytes(65536): + # aiter_raw: access the raw bytes on the response without applying any HTTP content decoding + # https://www.python-httpx.org/quickstart/#streaming-responses + async for chunk in response.aiter_raw(65536): if len(chunk) > 0: queue.put(chunk) + except httpx.ResponseClosed as e: + # the response was closed + pass except (httpx.HTTPError, OSError, h2.exceptions.ProtocolError) as e: queue.put(e) finally: queue.put(None) +def _close_response_method(self): + asyncio.run_coroutine_threadsafe( + self.aclose(), + get_loop() + ) + + def stream(method, url, **kwargs): """Replace httpx.stream. @@ -191,10 +205,19 @@ def stream(method, url, **kwargs): stream_chunk_to_queue(get_network(), queue, method, url, **kwargs), get_loop() ) + + # yield response + response = queue.get() + if isinstance(response, Exception): + raise response + response.close = MethodType(_close_response_method, response) + yield response + + # yield chunks chunk_or_exception = queue.get() while chunk_or_exception is not None: if isinstance(chunk_or_exception, Exception): raise chunk_or_exception yield chunk_or_exception chunk_or_exception = queue.get() - return future.result() + future.result() diff --git a/searx/network/network.py b/searx/network/network.py index e7dc5b56e..94e91593d 100644 --- a/searx/network/network.py +++ b/searx/network/network.py @@ -289,6 +289,14 @@ def initialize(settings_engines=None, settings_outgoing=None): if isinstance(network, str): NETWORKS[engine_name] = NETWORKS[network] + # the /image_proxy endpoint has a dedicated network. + # same parameters than the default network, but HTTP/2 is disabled. + # It decreases the CPU load average, and the total time is more or less the same + if 'image_proxy' not in NETWORKS: + image_proxy_params = default_params.copy() + image_proxy_params['enable_http2'] = False + NETWORKS['image_proxy'] = new_network(image_proxy_params) + @atexit.register def done(): diff --git a/searx/utils.py b/searx/utils.py index 5e2ab18c4..22824d829 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -262,11 +262,7 @@ def dict_subset(d, properties): >>> >> dict_subset({'A': 'a', 'B': 'b', 'C': 'c'}, ['A', 'D']) {'A': 'a'} """ - result = {} - for k in properties: - if k in d: - result[k] = d[k] - return result + return {k: d[k] for k in properties if k in d} def get_torrent_size(filesize, filesize_multiplier): diff --git a/searx/webapp.py b/searx/webapp.py index a1abea887..3b7e0f972 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -108,7 +108,7 @@ from searx.autocomplete import search_autocomplete, backends as autocomplete_bac from searx.languages import language_codes as languages from searx.locales import LOCALE_NAMES, UI_LOCALE_CODES, RTL_LOCALES from searx.search import SearchWithPlugins, initialize as search_initialize -from searx.network import stream as http_stream +from searx.network import stream as http_stream, set_context_network_name from searx.search.checker import get_result as checker_get_result from searx.settings_loader import get_default_settings_path @@ -1065,7 +1065,7 @@ def _is_selected_language_supported(engine, preferences): # pylint: disable=red @app.route('/image_proxy', methods=['GET']) def image_proxy(): - # pylint: disable=too-many-return-statements + # pylint: disable=too-many-return-statements, too-many-branches url = request.args.get('url') if not url: @@ -1076,17 +1076,21 @@ def image_proxy(): return '', 400 maximum_size = 5 * 1024 * 1024 - + forward_resp = False + resp = None try: - headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'}) - headers['User-Agent'] = gen_useragent() + request_headers = { + 'User-Agent': gen_useragent(), + 'Accept': 'image/webp,*/*', + 'Accept-Encoding': 'gzip, deflate', + 'Sec-GPC': '1', + 'DNT': '1', + } + set_context_network_name('image_proxy') stream = http_stream( method = 'GET', url = url, - headers = headers, - timeout = settings['outgoing']['request_timeout'], - allow_redirects = True, - max_redirects = 20 + headers = request_headers ) resp = next(stream) content_length = resp.headers.get('Content-Length') @@ -1095,32 +1099,37 @@ def image_proxy(): and int(content_length) > maximum_size ): return 'Max size', 400 - if resp.status_code == 304: - return '', resp.status_code - if resp.status_code != 200: - logger.debug( - 'image-proxy: wrong response code: {0}'.format( - resp.status_code)) + logger.debug('image-proxy: wrong response code: %i', resp.status_code) if resp.status_code >= 400: return '', resp.status_code return '', 400 - if not resp.headers.get('content-type', '').startswith('image/'): - logger.debug( - 'image-proxy: wrong content-type: {0}'.format( - resp.headers.get('content-type'))) + if not resp.headers.get('Content-Type', '').startswith('image/'): + logger.debug('image-proxy: wrong content-type: %s', resp.headers.get('Content-Type', '')) return '', 400 + forward_resp = True + except httpx.HTTPError: + logger.exception('HTTP error') + return '', 400 + finally: + if resp and not forward_resp: + # the code is about to return an HTTP 400 error to the browser + # we make sure to close the response between searxng and the HTTP server + try: + resp.close() + except httpx.HTTPError: + logger.exception('HTTP error on closing') + + try: headers = dict_subset( resp.headers, - {'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'} + {'Content-Type', 'Content-Encoding', 'Content-Length', 'Length'} ) - total_length = 0 - def forward_chunk(): - nonlocal total_length + total_length = 0 for chunk in stream: total_length += len(chunk) if total_length > maximum_size: |