summaryrefslogtreecommitdiff
path: root/searx/webapp.py
diff options
context:
space:
mode:
authorAlexandre Flament <alex@al-f.net>2021-03-18 19:59:01 +0100
committerAlexandre Flament <alex@al-f.net>2021-04-10 15:38:33 +0200
commiteaa694fb7d0e47b943bc6d6edb6cb6a40ab2d85e (patch)
tree024786c8a7003be24bbc566cb8c8e734a143f99d /searx/webapp.py
parent111180705b6f3b142732eb6325de1346f6372828 (diff)
downloadsearxng-eaa694fb7d0e47b943bc6d6edb6cb6a40ab2d85e.tar.gz
searxng-eaa694fb7d0e47b943bc6d6edb6cb6a40ab2d85e.zip
[enh] replace requests by httpx
Diffstat (limited to 'searx/webapp.py')
-rwxr-xr-xsearx/webapp.py93
1 files changed, 62 insertions, 31 deletions
diff --git a/searx/webapp.py b/searx/webapp.py
index 072f140ca..1571df8f1 100755
--- a/searx/webapp.py
+++ b/searx/webapp.py
@@ -26,12 +26,26 @@ if __name__ == '__main__':
from os.path import realpath, dirname
sys.path.append(realpath(dirname(realpath(__file__)) + '/../'))
+# set Unix thread name
+try:
+ import setproctitle
+except ImportError:
+ pass
+else:
+ import threading
+ old_thread_init = threading.Thread.__init__
+
+ def new_thread_init(self, *args, **kwargs):
+ old_thread_init(self, *args, **kwargs)
+ setproctitle.setthreadtitle(self._name)
+ threading.Thread.__init__ = new_thread_init
+
import hashlib
import hmac
import json
import os
-import requests
+import httpx
from searx import logger
logger = logger.getChild('webapp')
@@ -79,7 +93,7 @@ from searx.plugins import plugins
from searx.plugins.oa_doi_rewrite import get_doi_resolver
from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES
from searx.answerers import answerers
-from searx.poolrequests import get_global_proxies
+from searx import poolrequests
from searx.answerers import ask
from searx.metrology.error_recorder import errors_per_engines
@@ -890,50 +904,62 @@ def _is_selected_language_supported(engine, preferences):
@app.route('/image_proxy', methods=['GET'])
def image_proxy():
- url = request.args.get('url').encode()
+ url = request.args.get('url')
if not url:
return '', 400
- h = new_hmac(settings['server']['secret_key'], url)
+ h = new_hmac(settings['server']['secret_key'], url.encode())
if h != request.args.get('h'):
return '', 400
- headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'})
- headers['User-Agent'] = gen_useragent()
-
- resp = requests.get(url,
- stream=True,
- timeout=settings['outgoing']['request_timeout'],
- headers=headers,
- proxies=get_global_proxies())
+ maximum_size = 5 * 1024 * 1024
- if resp.status_code == 304:
- return '', resp.status_code
-
- if resp.status_code != 200:
- logger.debug('image-proxy: wrong response code: {0}'.format(resp.status_code))
- if resp.status_code >= 400:
+ try:
+ headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'})
+ headers['User-Agent'] = gen_useragent()
+ stream = poolrequests.stream(
+ method='GET',
+ url=url,
+ headers=headers,
+ timeout=settings['outgoing']['request_timeout'],
+ allow_redirects=True,
+ max_redirects=20)
+
+ resp = next(stream)
+ content_length = resp.headers.get('Content-Length')
+ if content_length and content_length.isdigit() and int(content_length) > maximum_size:
+ return 'Max size', 400
+
+ if resp.status_code == 304:
return '', resp.status_code
- return '', 400
- if not resp.headers.get('content-type', '').startswith('image/'):
- logger.debug('image-proxy: wrong content-type: {0}'.format(resp.headers.get('content-type')))
- return '', 400
+ if resp.status_code != 200:
+ logger.debug('image-proxy: wrong response code: {0}'.format(resp.status_code))
+ if resp.status_code >= 400:
+ return '', resp.status_code
+ return '', 400
+
+ if not resp.headers.get('content-type', '').startswith('image/'):
+ logger.debug('image-proxy: wrong content-type: {0}'.format(resp.headers.get('content-type')))
+ return '', 400
- img = b''
- chunk_counter = 0
+ headers = dict_subset(resp.headers, {'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'})
- for chunk in resp.iter_content(1024 * 1024):
- chunk_counter += 1
- if chunk_counter > 5:
- return '', 502 # Bad gateway - file is too big (>5M)
- img += chunk
+ total_length = 0
- headers = dict_subset(resp.headers, {'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'})
+ def forward_chunk():
+ nonlocal total_length
+ for chunk in stream:
+ total_length += len(chunk)
+ if total_length > maximum_size:
+ break
+ yield chunk
- return Response(img, mimetype=resp.headers['content-type'], headers=headers)
+ return Response(forward_chunk(), mimetype=resp.headers['Content-Type'], headers=headers)
+ except httpx.HTTPError:
+ return '', 400
@app.route('/stats', methods=['GET'])
@@ -1083,6 +1109,11 @@ def config():
})
+@app.route('/config/http')
+def config_http():
+ return jsonify(poolrequests.debug_asyncclients())
+
+
@app.errorhandler(404)
def page_not_found(e):
return render('404.html'), 404