diff options
author | Markus Heiser <markus.heiser@darmarit.de> | 2022-01-09 16:05:25 +0100 |
---|---|---|
committer | Markus Heiser <markus.heiser@darmarit.de> | 2022-01-10 11:22:38 +0100 |
commit | 21e884f36903e67a2d786498c25ea428bf8349b5 (patch) | |
tree | 9d8c3a8e45c4a9778d9a0f3c6018c3ebeb2392ab /searx/engines/startpage.py | |
parent | 2f4e567e904278f19c4c392fb9a222fcf0afec1c (diff) | |
download | searxng-21e884f36903e67a2d786498c25ea428bf8349b5.tar.gz searxng-21e884f36903e67a2d786498c25ea428bf8349b5.zip |
[fix] startpage engine: fetch CAPTCHA & issues related to PR-695
In case of CAPTCHA raise a SearxEngineCaptchaException and suspend for 7 days.
When get_sc_code() fails raise a SearxEngineResponseException and suspend for 7
days.
[1] https://github.com/searxng/searxng/pull/695
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/engines/startpage.py')
-rw-r--r-- | searx/engines/startpage.py | 23 |
1 files changed, 21 insertions, 2 deletions
diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index f5448dd47..ae7916fc3 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -18,6 +18,11 @@ from babel.localedata import locale_identifiers from searx import network from searx.utils import extract_text, eval_xpath, match_language +from searx.exceptions import ( + SearxEngineResponseException, + SearxEngineCaptchaException, +) + # about about = { @@ -54,6 +59,13 @@ sc_code_ts = 0 sc_code = '' +def raise_captcha(resp): + + if str(resp.url).startswith('https://www.startpage.com/sp/captcha'): + # suspend CAPTCHA for 7 days + raise SearxEngineCaptchaException(suspended_time=7 * 24 * 3600) + + def get_sc_code(headers): """Get an actual `sc` argument from startpage's home page. @@ -73,10 +85,17 @@ def get_sc_code(headers): logger.debug("query new sc time-stamp ...") resp = network.get(base_url, headers=headers) + raise_captcha(resp) dom = html.fromstring(resp.text) - # href --> '/?sc=adrKJMgF8xwp20' - href = eval_xpath(dom, '//a[@class="footer-home__logo"]')[0].get('href') + try: + # href --> '/?sc=adrKJMgF8xwp20' + href = eval_xpath(dom, '//a[@class="footer-home__logo"]')[0].get('href') + except IndexError as exc: + # suspend startpage API --> https://github.com/searxng/searxng/pull/695 + raise SearxEngineResponseException( + suspended_time=7 * 24 * 3600, message="PR-695: query new sc time-stamp failed!" + ) sc_code = href[5:] sc_code_ts = time() |