diff options
author | Bnyro <bnyro@tutanota.com> | 2024-11-27 14:13:23 +0100 |
---|---|---|
committer | Bnyro <bnyro@tutanota.com> | 2024-11-28 09:53:21 +0100 |
commit | 0ca2520115ecbdab40c746ce03d3331b5c21886d (patch) | |
tree | 930ac3a07b4f238b2b68a1417bd2c2625491a5b1 | |
parent | 5a9c1c6b5bc1963482e9c8142dffe8aebaed82b8 (diff) | |
download | searxng-0ca2520115ecbdab40c746ce03d3331b5c21886d.tar.gz searxng-0ca2520115ecbdab40c746ce03d3331b5c21886d.zip |
[feat] json/xpath engine: config option for method and body
-rw-r--r-- | searx/engines/json_engine.py | 34 | ||||
-rw-r--r-- | searx/engines/xpath.py | 20 |
2 files changed, 40 insertions, 14 deletions
diff --git a/searx/engines/json_engine.py b/searx/engines/json_engine.py index 9d919a63d..942f6ae8a 100644 --- a/searx/engines/json_engine.py +++ b/searx/engines/json_engine.py @@ -16,23 +16,17 @@ from json import loads from urllib.parse import urlencode from searx.utils import to_string, html_to_text - +# parameters for generating a request search_url = None -url_query = None -url_prefix = "" -content_query = None -title_query = None -content_html_to_text = False -title_html_to_text = False -paging = False -suggestion_query = '' -results_query = '' +method = 'GET' +request_body = '' cookies = {} headers = {} '''Some engines might offer different result based on cookies or headers. Possible use-case: To set safesearch cookie or header to moderate.''' +paging = False # parameters for engines with paging support # # number of results on each page @@ -41,6 +35,16 @@ page_size = 1 # number of the first page (usually 0 or 1) first_page_num = 1 +# parameters for parsing the response +results_query = '' +url_query = None +url_prefix = "" +title_query = None +content_query = None +suggestion_query = '' +title_html_to_text = False +content_html_to_text = False + def iterate(iterable): if isinstance(iterable, dict): @@ -98,9 +102,8 @@ def query(data, query_string): def request(query, params): # pylint: disable=redefined-outer-name - query = urlencode({'q': query})[2:] + fp = {'query': urlencode({'q': query})[2:]} # pylint: disable=invalid-name - fp = {'query': query} # pylint: disable=invalid-name if paging and search_url.find('{pageno}') >= 0: fp['pageno'] = (params['pageno'] - 1) * page_size + first_page_num @@ -108,7 +111,12 @@ def request(query, params): # pylint: disable=redefined-outer-name params['headers'].update(headers) params['url'] = search_url.format(**fp) - params['query'] = query + params['method'] = method + + if request_body: + # don't url-encode the query if it's in the request body + fp['query'] = query + params['data'] = request_body.format(**fp) return params diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index 558531880..97c84bd09 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -12,6 +12,8 @@ Request: - :py:obj:`search_url` - :py:obj:`lang_all` - :py:obj:`soft_max_redirects` +- :py:obj:`method` +- :py:obj:`request_body` - :py:obj:`cookies` - :py:obj:`headers` @@ -151,6 +153,16 @@ headers = {} '''Some engines might offer different result based headers. Possible use-case: To set header to moderate.''' +method = 'GET' +'''Some engines might require to do POST requests for search.''' + +request_body = '' +'''The body of the request. This can only be used if different :py:obj:`method` +is set, e.g. ``POST``. For formatting see the documentation of :py:obj:`search_url`:: + + search={query}&page={pageno}{time_range}{safe_search} +''' + paging = False '''Engine supports paging [True or False].''' @@ -236,8 +248,14 @@ def request(query, params): params['headers'].update(headers) params['url'] = search_url.format(**fargs) - params['soft_max_redirects'] = soft_max_redirects + params['method'] = method + + if request_body: + # don't url-encode the query if it's in the request body + fargs['query'] = query + params['data'] = request_body.format(**fargs) + params['soft_max_redirects'] = soft_max_redirects params['raise_for_httperror'] = False return params |