diff options
author | Alexandre Flament <alex@al-f.net> | 2022-05-21 18:24:47 +0200 |
---|---|---|
committer | Alexandre Flament <alex@al-f.net> | 2022-07-08 22:02:21 +0200 |
commit | a1e8af0796d532d529eb9d90f315f79dfbd86b0d (patch) | |
tree | 3bf551b24162b05fe50193dd0c3079ced8698116 /searx/network | |
parent | 2864a67ce922ecaf3f224859ee1e4a92f0b9f8c7 (diff) | |
download | searxng-a1e8af0796d532d529eb9d90f315f79dfbd86b0d.tar.gz searxng-a1e8af0796d532d529eb9d90f315f79dfbd86b0d.zip |
bing.py: resolve bing.com/ck/a redirections
add a new function searx.network.multi_requests to send multiple HTTP requests at once
Diffstat (limited to 'searx/network')
-rw-r--r-- | searx/network/__init__.py | 128 | ||||
-rw-r--r-- | searx/network/network.py | 25 |
2 files changed, 115 insertions, 38 deletions
diff --git a/searx/network/__init__.py b/searx/network/__init__.py index 06c9f75a4..8622e9731 100644 --- a/searx/network/__init__.py +++ b/searx/network/__init__.py @@ -8,7 +8,8 @@ import concurrent.futures from queue import SimpleQueue from types import MethodType from timeit import default_timer -from typing import Iterable, Tuple +from typing import Iterable, NamedTuple, Tuple, List, Dict, Union +from contextlib import contextmanager import httpx import anyio @@ -48,9 +49,23 @@ def get_context_network(): return THREADLOCAL.__dict__.get('network') or get_network() -def request(method, url, **kwargs): - """same as requests/requests/api.py request(...)""" +@contextmanager +def _record_http_time(): + # pylint: disable=too-many-branches time_before_request = default_timer() + start_time = getattr(THREADLOCAL, 'start_time', time_before_request) + try: + yield start_time + finally: + # update total_time. + # See get_time_for_thread() and reset_time_for_thread() + if hasattr(THREADLOCAL, 'total_time'): + time_after_request = default_timer() + THREADLOCAL.total_time += time_after_request - time_before_request + + +def _get_timeout(start_time, kwargs): + # pylint: disable=too-many-branches # timeout (httpx) if 'timeout' in kwargs: @@ -65,45 +80,84 @@ def request(method, url, **kwargs): # ajdust actual timeout timeout += 0.2 # overhead - start_time = getattr(THREADLOCAL, 'start_time', time_before_request) if start_time: timeout -= default_timer() - start_time - # raise_for_error - check_for_httperror = True - if 'raise_for_httperror' in kwargs: - check_for_httperror = kwargs['raise_for_httperror'] - del kwargs['raise_for_httperror'] + return timeout - # requests compatibility - if isinstance(url, bytes): - url = url.decode() - # network - network = get_context_network() - - # do request - future = asyncio.run_coroutine_threadsafe(network.request(method, url, **kwargs), get_loop()) - try: - response = future.result(timeout) - except concurrent.futures.TimeoutError as e: - raise httpx.TimeoutException('Timeout', request=None) from e - - # requests compatibility - # see also https://www.python-httpx.org/compatibility/#checking-for-4xx5xx-responses - response.ok = not response.is_error - - # update total_time. - # See get_time_for_thread() and reset_time_for_thread() - if hasattr(THREADLOCAL, 'total_time'): - time_after_request = default_timer() - THREADLOCAL.total_time += time_after_request - time_before_request - - # raise an exception - if check_for_httperror: - raise_for_httperror(response) - - return response +def request(method, url, **kwargs): + """same as requests/requests/api.py request(...)""" + with _record_http_time() as start_time: + network = get_context_network() + timeout = _get_timeout(start_time, kwargs) + future = asyncio.run_coroutine_threadsafe(network.request(method, url, **kwargs), get_loop()) + try: + return future.result(timeout) + except concurrent.futures.TimeoutError as e: + raise httpx.TimeoutException('Timeout', request=None) from e + + +def multi_requests(request_list: List["Request"]) -> List[Union[httpx.Response, Exception]]: + """send multiple HTTP requests in parallel. Wait for all requests to finish.""" + with _record_http_time() as start_time: + # send the requests + network = get_context_network() + loop = get_loop() + future_list = [] + for request_desc in request_list: + timeout = _get_timeout(start_time, request_desc.kwargs) + future = asyncio.run_coroutine_threadsafe( + network.request(request_desc.method, request_desc.url, **request_desc.kwargs), loop + ) + future_list.append((future, timeout)) + + # read the responses + responses = [] + for future, timeout in future_list: + try: + responses.append(future.result(timeout)) + except concurrent.futures.TimeoutError: + responses.append(httpx.TimeoutException('Timeout', request=None)) + except Exception as e: # pylint: disable=broad-except + responses.append(e) + return responses + + +class Request(NamedTuple): + """Request description for the multi_requests function""" + + method: str + url: str + kwargs: Dict[str, str] = {} + + @staticmethod + def get(url, **kwargs): + return Request('GET', url, kwargs) + + @staticmethod + def options(url, **kwargs): + return Request('OPTIONS', url, kwargs) + + @staticmethod + def head(url, **kwargs): + return Request('HEAD', url, kwargs) + + @staticmethod + def post(url, **kwargs): + return Request('POST', url, kwargs) + + @staticmethod + def put(url, **kwargs): + return Request('PUT', url, kwargs) + + @staticmethod + def patch(url, **kwargs): + return Request('PATCH', url, kwargs) + + @staticmethod + def delete(url, **kwargs): + return Request('DELETE', url, kwargs) def get(url, **kwargs): diff --git a/searx/network/network.py b/searx/network/network.py index 69af3b7c4..677a908bf 100644 --- a/searx/network/network.py +++ b/searx/network/network.py @@ -13,6 +13,7 @@ import httpx from searx import logger, searx_debug from .client import new_client, get_loop, AsyncHTTPTransportNoHttp +from .raise_for_httperror import raise_for_httperror logger = logger.getChild('network') @@ -226,6 +227,27 @@ class Network: kwargs['follow_redirects'] = kwargs.pop('allow_redirects') return kwargs_clients + @staticmethod + def extract_do_raise_for_httperror(kwargs): + do_raise_for_httperror = True + if 'raise_for_httperror' in kwargs: + do_raise_for_httperror = kwargs['raise_for_httperror'] + del kwargs['raise_for_httperror'] + return do_raise_for_httperror + + @staticmethod + def patch_response(response, do_raise_for_httperror): + if isinstance(response, httpx.Response): + # requests compatibility (response is not streamed) + # see also https://www.python-httpx.org/compatibility/#checking-for-4xx5xx-responses + response.ok = not response.is_error + + # raise an exception + if do_raise_for_httperror: + raise_for_httperror(response) + + return response + def is_valid_response(self, response): # pylint: disable=too-many-boolean-expressions if ( @@ -239,6 +261,7 @@ class Network: async def call_client(self, stream, method, url, **kwargs): retries = self.retries was_disconnected = False + do_raise_for_httperror = Network.extract_do_raise_for_httperror(kwargs) kwargs_clients = Network.extract_kwargs_clients(kwargs) while retries >= 0: # pragma: no cover client = await self.get_client(**kwargs_clients) @@ -248,7 +271,7 @@ class Network: else: response = await client.request(method, url, **kwargs) if self.is_valid_response(response) or retries <= 0: - return response + return Network.patch_response(response, do_raise_for_httperror) except httpx.RemoteProtocolError as e: if not was_disconnected: # the server has closed the connection: |