summaryrefslogtreecommitdiff
path: root/searx/webutils.py
diff options
context:
space:
mode:
Diffstat (limited to 'searx/webutils.py')
-rw-r--r--searx/webutils.py117
1 files changed, 109 insertions, 8 deletions
diff --git a/searx/webutils.py b/searx/webutils.py
index 470833291..ddd9891bf 100644
--- a/searx/webutils.py
+++ b/searx/webutils.py
@@ -9,31 +9,80 @@ import hmac
import re
import inspect
import itertools
+import json
from datetime import datetime, timedelta
from typing import Iterable, List, Tuple, Dict, TYPE_CHECKING
from io import StringIO
from codecs import getincrementalencoder
-from flask_babel import gettext, format_date
+from flask_babel import gettext, format_date # type: ignore
from searx import logger, settings
from searx.engines import DEFAULT_CATEGORY
if TYPE_CHECKING:
from searx.enginelib import Engine
-
+ from searx.results import ResultContainer
+ from searx.search import SearchQuery
+ from searx.results import UnresponsiveEngine
VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
logger = logger.getChild('webutils')
-
-class UnicodeWriter:
- """
- A CSV writer which will write rows to CSV file "f",
- which is encoded in the given encoding.
- """
+timeout_text = gettext('timeout')
+parsing_error_text = gettext('parsing error')
+http_protocol_error_text = gettext('HTTP protocol error')
+network_error_text = gettext('network error')
+ssl_cert_error_text = gettext("SSL error: certificate validation has failed")
+exception_classname_to_text = {
+ None: gettext('unexpected crash'),
+ 'timeout': timeout_text,
+ 'asyncio.TimeoutError': timeout_text,
+ 'httpx.TimeoutException': timeout_text,
+ 'httpx.ConnectTimeout': timeout_text,
+ 'httpx.ReadTimeout': timeout_text,
+ 'httpx.WriteTimeout': timeout_text,
+ 'httpx.HTTPStatusError': gettext('HTTP error'),
+ 'httpx.ConnectError': gettext("HTTP connection error"),
+ 'httpx.RemoteProtocolError': http_protocol_error_text,
+ 'httpx.LocalProtocolError': http_protocol_error_text,
+ 'httpx.ProtocolError': http_protocol_error_text,
+ 'httpx.ReadError': network_error_text,
+ 'httpx.WriteError': network_error_text,
+ 'httpx.ProxyError': gettext("proxy error"),
+ 'searx.exceptions.SearxEngineCaptchaException': gettext("CAPTCHA"),
+ 'searx.exceptions.SearxEngineTooManyRequestsException': gettext("too many requests"),
+ 'searx.exceptions.SearxEngineAccessDeniedException': gettext("access denied"),
+ 'searx.exceptions.SearxEngineAPIException': gettext("server API error"),
+ 'searx.exceptions.SearxEngineXPathException': parsing_error_text,
+ 'KeyError': parsing_error_text,
+ 'json.decoder.JSONDecodeError': parsing_error_text,
+ 'lxml.etree.ParserError': parsing_error_text,
+ 'ssl.SSLCertVerificationError': ssl_cert_error_text, # for Python > 3.7
+ 'ssl.CertificateError': ssl_cert_error_text, # for Python 3.7
+}
+
+
+def get_translated_errors(unresponsive_engines: Iterable[UnresponsiveEngine]):
+ translated_errors = []
+
+ for unresponsive_engine in unresponsive_engines:
+ error_user_text = exception_classname_to_text.get(unresponsive_engine.error_type)
+ if not error_user_text:
+ error_user_text = exception_classname_to_text[None]
+ error_msg = gettext(error_user_text)
+ if unresponsive_engine.suspended:
+ error_msg = gettext('Suspended') + ': ' + error_msg
+ translated_errors.append((unresponsive_engine.engine, error_msg))
+
+ return sorted(translated_errors, key=lambda e: e[0])
+
+
+class CSVWriter:
+ """A CSV writer which will write rows to CSV file "f", which is encoded in
+ the given encoding."""
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
# Redirect output to a queue
@@ -59,6 +108,58 @@ class UnicodeWriter:
self.writerow(row)
+def write_csv_response(csv: CSVWriter, rc: ResultContainer) -> None:
+ """Write rows of the results to a query (``application/csv``) into a CSV
+ table (:py:obj:`CSVWriter`). First line in the table contain the column
+ names. The column "type" specifies the type, the following types are
+ included in the table:
+
+ - result
+ - answer
+ - suggestion
+ - correction
+
+ """
+
+ results = rc.get_ordered_results()
+ keys = ('title', 'url', 'content', 'host', 'engine', 'score', 'type')
+ csv.writerow(keys)
+
+ for row in results:
+ row['host'] = row['parsed_url'].netloc
+ row['type'] = 'result'
+ csv.writerow([row.get(key, '') for key in keys])
+
+ for a in rc.answers:
+ row = {'title': a, 'type': 'answer'}
+ csv.writerow([row.get(key, '') for key in keys])
+
+ for a in rc.suggestions:
+ row = {'title': a, 'type': 'suggestion'}
+ csv.writerow([row.get(key, '') for key in keys])
+
+ for a in rc.corrections:
+ row = {'title': a, 'type': 'correction'}
+ csv.writerow([row.get(key, '') for key in keys])
+
+
+def get_json_response(sq: SearchQuery, rc: ResultContainer) -> str:
+ """Returns the JSON string of the results to a query (``application/json``)"""
+ results = rc.number_of_results
+ x = {
+ 'query': sq.query,
+ 'number_of_results': results,
+ 'results': rc.get_ordered_results(),
+ 'answers': list(rc.answers),
+ 'corrections': list(rc.corrections),
+ 'infoboxes': rc.infoboxes,
+ 'suggestions': list(rc.suggestions),
+ 'unresponsive_engines': get_translated_errors(rc.unresponsive_engines),
+ }
+ response = json.dumps(x, default=lambda item: list(item) if isinstance(item, set) else item)
+ return response
+
+
def get_themes(templates_path):
"""Returns available themes list."""
return os.listdir(templates_path)