diff options
author | Markus Heiser <markus.heiser@darmarit.de> | 2023-06-18 16:43:48 +0200 |
---|---|---|
committer | Markus Heiser <markus.heiser@darmarIT.de> | 2023-06-19 19:49:44 +0200 |
commit | fa1ef9a07b79ab740c127bac0d11b8315a5130ff (patch) | |
tree | 80306333c2e5a13a0b3a286e7dad7b5df633689e /searx/webapp.py | |
parent | 71b6ff07ca137a39576c3084abec348ded40564e (diff) | |
download | searxng-fa1ef9a07b79ab740c127bac0d11b8315a5130ff.tar.gz searxng-fa1ef9a07b79ab740c127bac0d11b8315a5130ff.zip |
[mod] move some code from webapp module to webutils module (no functional change)
Over the years the webapp module became more and more a mess. To improve the
modulaization a little this patch moves some implementations from the webapp
module to webutils module.
HINT: this patch brings non functional change
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/webapp.py')
-rwxr-xr-x | searx/webapp.py | 151 |
1 files changed, 35 insertions, 116 deletions
diff --git a/searx/webapp.py b/searx/webapp.py index d6322447a..59c1dd1a1 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -58,7 +58,7 @@ from searx import ( from searx import infopage from searx.data import ENGINE_DESCRIPTIONS -from searx.results import Timing, UnresponsiveEngine +from searx.results import Timing from searx.settings_defaults import OUTPUT_FORMATS from searx.settings_loader import get_default_settings_path from searx.exceptions import SearxParameterException @@ -68,18 +68,18 @@ from searx.engines import ( engines, engine_shortcuts, ) + +from searx import webutils from searx.webutils import ( - UnicodeWriter, highlight_content, get_static_files, get_result_templates, get_themes, - prettify_url, + exception_classname_to_text, new_hmac, is_hmac_of, is_flask_run_cmdline, group_engines_in_tab, - searxng_l10n_timespan, ) from searx.webadapter import ( get_search_query_from_webapp, @@ -87,7 +87,6 @@ from searx.webadapter import ( parse_lang, ) from searx.utils import ( - html_to_text, gen_useragent, dict_subset, ) @@ -165,39 +164,6 @@ app.jinja_env.add_extension('jinja2.ext.loopcontrols') # pylint: disable=no-mem app.jinja_env.filters['group_engines_in_tab'] = group_engines_in_tab # pylint: disable=no-member app.secret_key = settings['server']['secret_key'] -timeout_text = gettext('timeout') -parsing_error_text = gettext('parsing error') -http_protocol_error_text = gettext('HTTP protocol error') -network_error_text = gettext('network error') -ssl_cert_error_text = gettext("SSL error: certificate validation has failed") -exception_classname_to_text = { - None: gettext('unexpected crash'), - 'timeout': timeout_text, - 'asyncio.TimeoutError': timeout_text, - 'httpx.TimeoutException': timeout_text, - 'httpx.ConnectTimeout': timeout_text, - 'httpx.ReadTimeout': timeout_text, - 'httpx.WriteTimeout': timeout_text, - 'httpx.HTTPStatusError': gettext('HTTP error'), - 'httpx.ConnectError': gettext("HTTP connection error"), - 'httpx.RemoteProtocolError': http_protocol_error_text, - 'httpx.LocalProtocolError': http_protocol_error_text, - 'httpx.ProtocolError': http_protocol_error_text, - 'httpx.ReadError': network_error_text, - 'httpx.WriteError': network_error_text, - 'httpx.ProxyError': gettext("proxy error"), - 'searx.exceptions.SearxEngineCaptchaException': gettext("CAPTCHA"), - 'searx.exceptions.SearxEngineTooManyRequestsException': gettext("too many requests"), - 'searx.exceptions.SearxEngineAccessDeniedException': gettext("access denied"), - 'searx.exceptions.SearxEngineAPIException': gettext("server API error"), - 'searx.exceptions.SearxEngineXPathException': parsing_error_text, - 'KeyError': parsing_error_text, - 'json.decoder.JSONDecodeError': parsing_error_text, - 'lxml.etree.ParserError': parsing_error_text, - 'ssl.SSLCertVerificationError': ssl_cert_error_text, # for Python > 3.7 - 'ssl.CertificateError': ssl_cert_error_text, # for Python 3.7 -} - class ExtendedRequest(flask.Request): """This class is never initialized and only used for type checking.""" @@ -686,9 +652,7 @@ def search(): search_query, raw_text_query, _, _, selected_locale = get_search_query_from_webapp( request.preferences, request.form ) - # search = Search(search_query) # without plugins search = SearchWithPlugins(search_query, request.user_plugins, request) # pylint: disable=redefined-outer-name - result_container = search.search() except SearxParameterException as e: @@ -698,45 +662,54 @@ def search(): logger.exception(e, exc_info=True) return index_error(output_format, gettext('search error')), 500 - # results - results = result_container.get_ordered_results() - number_of_results = result_container.results_number() - if number_of_results < result_container.results_length(): - number_of_results = 0 - - # checkin for a external bang + # 1. check if the result is a redirect for an external bang if result_container.redirect_url: return redirect(result_container.redirect_url) - # Server-Timing header + # 2. add Server-Timing header for measuring performance characteristics of + # web applications request.timings = result_container.get_timings() # pylint: disable=assigning-non-slot + # 3. formats without a template + + if output_format == 'json': + + response = webutils.get_json_response(search_query, result_container) + return Response(response, mimetype='application/json') + + if output_format == 'csv': + + csv = webutils.CSVWriter(StringIO()) + webutils.write_csv_response(csv, result_container) + csv.stream.seek(0) + + response = Response(csv.stream.read(), mimetype='application/csv') + cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query) + response.headers.add('Content-Disposition', cont_disp) + return response + + # 4. formats rendered by a template / RSS & HTML + current_template = None previous_result = None - # output + results = result_container.get_ordered_results() for result in results: if output_format == 'html': if 'content' in result and result['content']: result['content'] = highlight_content(escape(result['content'][:1024]), search_query.query) if 'title' in result and result['title']: result['title'] = highlight_content(escape(result['title'] or ''), search_query.query) - else: - if result.get('content'): - result['content'] = html_to_text(result['content']).strip() - # removing html content and whitespace duplications - result['title'] = ' '.join(html_to_text(result['title']).strip().split()) if 'url' in result: - result['pretty_url'] = prettify_url(result['url']) - + result['pretty_url'] = webutils.prettify_url(result['url']) if result.get('publishedDate'): # do not try to get a date from an empty string or a None type try: # test if publishedDate >= 1900 (datetime module bug) result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z') except ValueError: result['publishedDate'] = None else: - result['publishedDate'] = searxng_l10n_timespan(result['publishedDate']) + result['publishedDate'] = webutils.searxng_l10n_timespan(result['publishedDate']) # set result['open_group'] = True when the template changes from the previous result # set result['close_group'] = True when the template changes on the next result @@ -750,42 +723,7 @@ def search(): if previous_result: previous_result['close_group'] = True - if output_format == 'json': - x = { - 'query': search_query.query, - 'number_of_results': number_of_results, - 'results': results, - 'answers': list(result_container.answers), - 'corrections': list(result_container.corrections), - 'infoboxes': result_container.infoboxes, - 'suggestions': list(result_container.suggestions), - 'unresponsive_engines': __get_translated_errors(result_container.unresponsive_engines), - } - response = json.dumps(x, default=lambda item: list(item) if isinstance(item, set) else item) - return Response(response, mimetype='application/json') - - if output_format == 'csv': - csv = UnicodeWriter(StringIO()) - keys = ('title', 'url', 'content', 'host', 'engine', 'score', 'type') - csv.writerow(keys) - for row in results: - row['host'] = row['parsed_url'].netloc - row['type'] = 'result' - csv.writerow([row.get(key, '') for key in keys]) - for a in result_container.answers: - row = {'title': a, 'type': 'answer'} - csv.writerow([row.get(key, '') for key in keys]) - for a in result_container.suggestions: - row = {'title': a, 'type': 'suggestion'} - csv.writerow([row.get(key, '') for key in keys]) - for a in result_container.corrections: - row = {'title': a, 'type': 'correction'} - csv.writerow([row.get(key, '') for key in keys]) - csv.stream.seek(0) - response = Response(csv.stream.read(), mimetype='application/csv') - cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query) - response.headers.add('Content-Disposition', cont_disp) - return response + # 4.a RSS if output_format == 'rss': response_rss = render( @@ -795,11 +733,11 @@ def search(): corrections=result_container.corrections, suggestions=result_container.suggestions, q=request.form['q'], - number_of_results=number_of_results, + number_of_results=result_container.number_of_results, ) return Response(response_rss, mimetype='text/xml') - # HTML output format + # 4.b HTML # suggestions: use RawTextQuery to get the suggestion URLs with the same bang suggestion_urls = list( @@ -827,14 +765,14 @@ def search(): selected_categories = search_query.categories, pageno = search_query.pageno, time_range = search_query.time_range or '', - number_of_results = format_decimal(number_of_results), + number_of_results = format_decimal(result_container.number_of_results), suggestions = suggestion_urls, answers = result_container.answers, corrections = correction_urls, infoboxes = result_container.infoboxes, engine_data = result_container.engine_data, paging = result_container.paging, - unresponsive_engines = __get_translated_errors( + unresponsive_engines = webutils.get_translated_errors( result_container.unresponsive_engines ), current_locale = request.preferences.get_value("locale"), @@ -849,25 +787,6 @@ def search(): ) -def __get_translated_errors(unresponsive_engines: Iterable[UnresponsiveEngine]): - translated_errors = [] - - # make a copy unresponsive_engines to avoid "RuntimeError: Set changed size - # during iteration" it happens when an engine modifies the ResultContainer - # after the search_multiple_requests method has stopped waiting - - for unresponsive_engine in unresponsive_engines: - error_user_text = exception_classname_to_text.get(unresponsive_engine.error_type) - if not error_user_text: - error_user_text = exception_classname_to_text[None] - error_msg = gettext(error_user_text) - if unresponsive_engine.suspended: - error_msg = gettext('Suspended') + ': ' + error_msg - translated_errors.append((unresponsive_engine.engine, error_msg)) - - return sorted(translated_errors, key=lambda e: e[0]) - - @app.route('/about', methods=['GET']) def about(): """Redirect to about page""" |