diff options
author | Markus Heiser <markus.heiser@darmarit.de> | 2022-01-05 13:00:52 +0100 |
---|---|---|
committer | Markus Heiser <markus.heiser@darmarit.de> | 2022-01-10 11:22:12 +0100 |
commit | f1f5e69c425389a5cb7e7a437b3a39c0d7513022 (patch) | |
tree | 1f74e0d0ff5e595d0eea8c143f16a75c5c74a6c0 /searx/engines/startpage.py | |
parent | 79e0aa26456ed316bf8a12c57a8faa46bf5ac2cb (diff) | |
download | searxng-f1f5e69c425389a5cb7e7a437b3a39c0d7513022.tar.gz searxng-f1f5e69c425389a5cb7e7a437b3a39c0d7513022.zip |
[fix] startpage engine - avoid captcha
Startpage has introduced new anti-scraping measures that make SearXNG instances
run into captchas:
1. some arguments has been removed and a new `sc` has been added.
2. search path changed from `do/search` to `sp/search`
3. POST request is no longer needed
Closes: https://github.com/searxng/searxng/issues/692
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/engines/startpage.py')
-rw-r--r-- | searx/engines/startpage.py | 17 |
1 files changed, 9 insertions, 8 deletions
diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index 97891921c..1fd259dad 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -3,6 +3,8 @@ Startpage (Web) """ +from urllib.parse import urlencode + from lxml import html from dateutil import parser from datetime import datetime, timedelta @@ -33,7 +35,7 @@ supported_languages_url = 'https://www.startpage.com/do/settings' # search-url base_url = 'https://startpage.com/' -search_url = base_url + 'do/search' +search_url = base_url + 'sp/search?' # specific xpath variables # ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"] @@ -46,14 +48,12 @@ content_xpath = './/p[@class="w-gl__description"]' # do search-request def request(query, params): - params['url'] = search_url - params['method'] = 'POST' - params['data'] = { + args = { 'query': query, 'page': params['pageno'], 'cat': 'web', - 'cmd': 'process_search', - 'engine0': 'v1all', + # 'abp': "-1", + 'sc': 'Mj4jZy61QETj20', } # set language if specified @@ -61,9 +61,10 @@ def request(query, params): lang_code = match_language(params['language'], supported_languages, fallback=None) if lang_code: language_name = supported_languages[lang_code]['alias'] - params['data']['language'] = language_name - params['data']['lui'] = language_name + args['language'] = language_name + args['lui'] = language_name + params['url'] = search_url + urlencode(args) return params |