summaryrefslogtreecommitdiff
path: root/searx/engines/startpage.py
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2022-01-05 13:00:52 +0100
committerMarkus Heiser <markus.heiser@darmarit.de>2022-01-10 11:22:12 +0100
commitf1f5e69c425389a5cb7e7a437b3a39c0d7513022 (patch)
tree1f74e0d0ff5e595d0eea8c143f16a75c5c74a6c0 /searx/engines/startpage.py
parent79e0aa26456ed316bf8a12c57a8faa46bf5ac2cb (diff)
downloadsearxng-f1f5e69c425389a5cb7e7a437b3a39c0d7513022.tar.gz
searxng-f1f5e69c425389a5cb7e7a437b3a39c0d7513022.zip
[fix] startpage engine - avoid captcha
Startpage has introduced new anti-scraping measures that make SearXNG instances run into captchas: 1. some arguments has been removed and a new `sc` has been added. 2. search path changed from `do/search` to `sp/search` 3. POST request is no longer needed Closes: https://github.com/searxng/searxng/issues/692 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/engines/startpage.py')
-rw-r--r--searx/engines/startpage.py17
1 files changed, 9 insertions, 8 deletions
diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py
index 97891921c..1fd259dad 100644
--- a/searx/engines/startpage.py
+++ b/searx/engines/startpage.py
@@ -3,6 +3,8 @@
Startpage (Web)
"""
+from urllib.parse import urlencode
+
from lxml import html
from dateutil import parser
from datetime import datetime, timedelta
@@ -33,7 +35,7 @@ supported_languages_url = 'https://www.startpage.com/do/settings'
# search-url
base_url = 'https://startpage.com/'
-search_url = base_url + 'do/search'
+search_url = base_url + 'sp/search?'
# specific xpath variables
# ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]
@@ -46,14 +48,12 @@ content_xpath = './/p[@class="w-gl__description"]'
# do search-request
def request(query, params):
- params['url'] = search_url
- params['method'] = 'POST'
- params['data'] = {
+ args = {
'query': query,
'page': params['pageno'],
'cat': 'web',
- 'cmd': 'process_search',
- 'engine0': 'v1all',
+ # 'abp': "-1",
+ 'sc': 'Mj4jZy61QETj20',
}
# set language if specified
@@ -61,9 +61,10 @@ def request(query, params):
lang_code = match_language(params['language'], supported_languages, fallback=None)
if lang_code:
language_name = supported_languages[lang_code]['alias']
- params['data']['language'] = language_name
- params['data']['lui'] = language_name
+ args['language'] = language_name
+ args['lui'] = language_name
+ params['url'] = search_url + urlencode(args)
return params