diff options
author | Alexandre Flament <alex@al-f.net> | 2021-03-18 19:59:01 +0100 |
---|---|---|
committer | Alexandre Flament <alex@al-f.net> | 2021-04-10 15:38:33 +0200 |
commit | eaa694fb7d0e47b943bc6d6edb6cb6a40ab2d85e (patch) | |
tree | 024786c8a7003be24bbc566cb8c8e734a143f99d /searx/webapp.py | |
parent | 111180705b6f3b142732eb6325de1346f6372828 (diff) | |
download | searxng-eaa694fb7d0e47b943bc6d6edb6cb6a40ab2d85e.tar.gz searxng-eaa694fb7d0e47b943bc6d6edb6cb6a40ab2d85e.zip |
[enh] replace requests by httpx
Diffstat (limited to 'searx/webapp.py')
-rwxr-xr-x | searx/webapp.py | 93 |
1 files changed, 62 insertions, 31 deletions
diff --git a/searx/webapp.py b/searx/webapp.py index 072f140ca..1571df8f1 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -26,12 +26,26 @@ if __name__ == '__main__': from os.path import realpath, dirname sys.path.append(realpath(dirname(realpath(__file__)) + '/../')) +# set Unix thread name +try: + import setproctitle +except ImportError: + pass +else: + import threading + old_thread_init = threading.Thread.__init__ + + def new_thread_init(self, *args, **kwargs): + old_thread_init(self, *args, **kwargs) + setproctitle.setthreadtitle(self._name) + threading.Thread.__init__ = new_thread_init + import hashlib import hmac import json import os -import requests +import httpx from searx import logger logger = logger.getChild('webapp') @@ -79,7 +93,7 @@ from searx.plugins import plugins from searx.plugins.oa_doi_rewrite import get_doi_resolver from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES from searx.answerers import answerers -from searx.poolrequests import get_global_proxies +from searx import poolrequests from searx.answerers import ask from searx.metrology.error_recorder import errors_per_engines @@ -890,50 +904,62 @@ def _is_selected_language_supported(engine, preferences): @app.route('/image_proxy', methods=['GET']) def image_proxy(): - url = request.args.get('url').encode() + url = request.args.get('url') if not url: return '', 400 - h = new_hmac(settings['server']['secret_key'], url) + h = new_hmac(settings['server']['secret_key'], url.encode()) if h != request.args.get('h'): return '', 400 - headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'}) - headers['User-Agent'] = gen_useragent() - - resp = requests.get(url, - stream=True, - timeout=settings['outgoing']['request_timeout'], - headers=headers, - proxies=get_global_proxies()) + maximum_size = 5 * 1024 * 1024 - if resp.status_code == 304: - return '', resp.status_code - - if resp.status_code != 200: - logger.debug('image-proxy: wrong response code: {0}'.format(resp.status_code)) - if resp.status_code >= 400: + try: + headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'}) + headers['User-Agent'] = gen_useragent() + stream = poolrequests.stream( + method='GET', + url=url, + headers=headers, + timeout=settings['outgoing']['request_timeout'], + allow_redirects=True, + max_redirects=20) + + resp = next(stream) + content_length = resp.headers.get('Content-Length') + if content_length and content_length.isdigit() and int(content_length) > maximum_size: + return 'Max size', 400 + + if resp.status_code == 304: return '', resp.status_code - return '', 400 - if not resp.headers.get('content-type', '').startswith('image/'): - logger.debug('image-proxy: wrong content-type: {0}'.format(resp.headers.get('content-type'))) - return '', 400 + if resp.status_code != 200: + logger.debug('image-proxy: wrong response code: {0}'.format(resp.status_code)) + if resp.status_code >= 400: + return '', resp.status_code + return '', 400 + + if not resp.headers.get('content-type', '').startswith('image/'): + logger.debug('image-proxy: wrong content-type: {0}'.format(resp.headers.get('content-type'))) + return '', 400 - img = b'' - chunk_counter = 0 + headers = dict_subset(resp.headers, {'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'}) - for chunk in resp.iter_content(1024 * 1024): - chunk_counter += 1 - if chunk_counter > 5: - return '', 502 # Bad gateway - file is too big (>5M) - img += chunk + total_length = 0 - headers = dict_subset(resp.headers, {'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'}) + def forward_chunk(): + nonlocal total_length + for chunk in stream: + total_length += len(chunk) + if total_length > maximum_size: + break + yield chunk - return Response(img, mimetype=resp.headers['content-type'], headers=headers) + return Response(forward_chunk(), mimetype=resp.headers['Content-Type'], headers=headers) + except httpx.HTTPError: + return '', 400 @app.route('/stats', methods=['GET']) @@ -1083,6 +1109,11 @@ def config(): }) +@app.route('/config/http') +def config_http(): + return jsonify(poolrequests.debug_asyncclients()) + + @app.errorhandler(404) def page_not_found(e): return render('404.html'), 404 |