summaryrefslogtreecommitdiff
path: root/searx/network
diff options
context:
space:
mode:
authorAlexandre Flament <alex@al-f.net>2022-05-21 18:24:47 +0200
committerAlexandre Flament <alex@al-f.net>2022-07-08 22:02:21 +0200
commita1e8af0796d532d529eb9d90f315f79dfbd86b0d (patch)
tree3bf551b24162b05fe50193dd0c3079ced8698116 /searx/network
parent2864a67ce922ecaf3f224859ee1e4a92f0b9f8c7 (diff)
downloadsearxng-a1e8af0796d532d529eb9d90f315f79dfbd86b0d.tar.gz
searxng-a1e8af0796d532d529eb9d90f315f79dfbd86b0d.zip
bing.py: resolve bing.com/ck/a redirections
add a new function searx.network.multi_requests to send multiple HTTP requests at once
Diffstat (limited to 'searx/network')
-rw-r--r--searx/network/__init__.py128
-rw-r--r--searx/network/network.py25
2 files changed, 115 insertions, 38 deletions
diff --git a/searx/network/__init__.py b/searx/network/__init__.py
index 06c9f75a4..8622e9731 100644
--- a/searx/network/__init__.py
+++ b/searx/network/__init__.py
@@ -8,7 +8,8 @@ import concurrent.futures
from queue import SimpleQueue
from types import MethodType
from timeit import default_timer
-from typing import Iterable, Tuple
+from typing import Iterable, NamedTuple, Tuple, List, Dict, Union
+from contextlib import contextmanager
import httpx
import anyio
@@ -48,9 +49,23 @@ def get_context_network():
return THREADLOCAL.__dict__.get('network') or get_network()
-def request(method, url, **kwargs):
- """same as requests/requests/api.py request(...)"""
+@contextmanager
+def _record_http_time():
+ # pylint: disable=too-many-branches
time_before_request = default_timer()
+ start_time = getattr(THREADLOCAL, 'start_time', time_before_request)
+ try:
+ yield start_time
+ finally:
+ # update total_time.
+ # See get_time_for_thread() and reset_time_for_thread()
+ if hasattr(THREADLOCAL, 'total_time'):
+ time_after_request = default_timer()
+ THREADLOCAL.total_time += time_after_request - time_before_request
+
+
+def _get_timeout(start_time, kwargs):
+ # pylint: disable=too-many-branches
# timeout (httpx)
if 'timeout' in kwargs:
@@ -65,45 +80,84 @@ def request(method, url, **kwargs):
# ajdust actual timeout
timeout += 0.2 # overhead
- start_time = getattr(THREADLOCAL, 'start_time', time_before_request)
if start_time:
timeout -= default_timer() - start_time
- # raise_for_error
- check_for_httperror = True
- if 'raise_for_httperror' in kwargs:
- check_for_httperror = kwargs['raise_for_httperror']
- del kwargs['raise_for_httperror']
+ return timeout
- # requests compatibility
- if isinstance(url, bytes):
- url = url.decode()
- # network
- network = get_context_network()
-
- # do request
- future = asyncio.run_coroutine_threadsafe(network.request(method, url, **kwargs), get_loop())
- try:
- response = future.result(timeout)
- except concurrent.futures.TimeoutError as e:
- raise httpx.TimeoutException('Timeout', request=None) from e
-
- # requests compatibility
- # see also https://www.python-httpx.org/compatibility/#checking-for-4xx5xx-responses
- response.ok = not response.is_error
-
- # update total_time.
- # See get_time_for_thread() and reset_time_for_thread()
- if hasattr(THREADLOCAL, 'total_time'):
- time_after_request = default_timer()
- THREADLOCAL.total_time += time_after_request - time_before_request
-
- # raise an exception
- if check_for_httperror:
- raise_for_httperror(response)
-
- return response
+def request(method, url, **kwargs):
+ """same as requests/requests/api.py request(...)"""
+ with _record_http_time() as start_time:
+ network = get_context_network()
+ timeout = _get_timeout(start_time, kwargs)
+ future = asyncio.run_coroutine_threadsafe(network.request(method, url, **kwargs), get_loop())
+ try:
+ return future.result(timeout)
+ except concurrent.futures.TimeoutError as e:
+ raise httpx.TimeoutException('Timeout', request=None) from e
+
+
+def multi_requests(request_list: List["Request"]) -> List[Union[httpx.Response, Exception]]:
+ """send multiple HTTP requests in parallel. Wait for all requests to finish."""
+ with _record_http_time() as start_time:
+ # send the requests
+ network = get_context_network()
+ loop = get_loop()
+ future_list = []
+ for request_desc in request_list:
+ timeout = _get_timeout(start_time, request_desc.kwargs)
+ future = asyncio.run_coroutine_threadsafe(
+ network.request(request_desc.method, request_desc.url, **request_desc.kwargs), loop
+ )
+ future_list.append((future, timeout))
+
+ # read the responses
+ responses = []
+ for future, timeout in future_list:
+ try:
+ responses.append(future.result(timeout))
+ except concurrent.futures.TimeoutError:
+ responses.append(httpx.TimeoutException('Timeout', request=None))
+ except Exception as e: # pylint: disable=broad-except
+ responses.append(e)
+ return responses
+
+
+class Request(NamedTuple):
+ """Request description for the multi_requests function"""
+
+ method: str
+ url: str
+ kwargs: Dict[str, str] = {}
+
+ @staticmethod
+ def get(url, **kwargs):
+ return Request('GET', url, kwargs)
+
+ @staticmethod
+ def options(url, **kwargs):
+ return Request('OPTIONS', url, kwargs)
+
+ @staticmethod
+ def head(url, **kwargs):
+ return Request('HEAD', url, kwargs)
+
+ @staticmethod
+ def post(url, **kwargs):
+ return Request('POST', url, kwargs)
+
+ @staticmethod
+ def put(url, **kwargs):
+ return Request('PUT', url, kwargs)
+
+ @staticmethod
+ def patch(url, **kwargs):
+ return Request('PATCH', url, kwargs)
+
+ @staticmethod
+ def delete(url, **kwargs):
+ return Request('DELETE', url, kwargs)
def get(url, **kwargs):
diff --git a/searx/network/network.py b/searx/network/network.py
index 69af3b7c4..677a908bf 100644
--- a/searx/network/network.py
+++ b/searx/network/network.py
@@ -13,6 +13,7 @@ import httpx
from searx import logger, searx_debug
from .client import new_client, get_loop, AsyncHTTPTransportNoHttp
+from .raise_for_httperror import raise_for_httperror
logger = logger.getChild('network')
@@ -226,6 +227,27 @@ class Network:
kwargs['follow_redirects'] = kwargs.pop('allow_redirects')
return kwargs_clients
+ @staticmethod
+ def extract_do_raise_for_httperror(kwargs):
+ do_raise_for_httperror = True
+ if 'raise_for_httperror' in kwargs:
+ do_raise_for_httperror = kwargs['raise_for_httperror']
+ del kwargs['raise_for_httperror']
+ return do_raise_for_httperror
+
+ @staticmethod
+ def patch_response(response, do_raise_for_httperror):
+ if isinstance(response, httpx.Response):
+ # requests compatibility (response is not streamed)
+ # see also https://www.python-httpx.org/compatibility/#checking-for-4xx5xx-responses
+ response.ok = not response.is_error
+
+ # raise an exception
+ if do_raise_for_httperror:
+ raise_for_httperror(response)
+
+ return response
+
def is_valid_response(self, response):
# pylint: disable=too-many-boolean-expressions
if (
@@ -239,6 +261,7 @@ class Network:
async def call_client(self, stream, method, url, **kwargs):
retries = self.retries
was_disconnected = False
+ do_raise_for_httperror = Network.extract_do_raise_for_httperror(kwargs)
kwargs_clients = Network.extract_kwargs_clients(kwargs)
while retries >= 0: # pragma: no cover
client = await self.get_client(**kwargs_clients)
@@ -248,7 +271,7 @@ class Network:
else:
response = await client.request(method, url, **kwargs)
if self.is_valid_response(response) or retries <= 0:
- return response
+ return Network.patch_response(response, do_raise_for_httperror)
except httpx.RemoteProtocolError as e:
if not was_disconnected:
# the server has closed the connection: