diff options
author | Alexandre Flament <alex@al-f.net> | 2021-02-12 10:56:53 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-02-12 10:56:53 +0100 |
commit | 2b60d0d243f5f5848c44ffc4d8503ee974fc614b (patch) | |
tree | 0ac05aa41f6d2cc60294c60713ea2f788135e865 /searx/engines | |
parent | 7e83818879a48fef84a518092d833e3785c64ff2 (diff) | |
parent | 35dd0694027baef2c2eb18d27bd0f5dcbcc999ad (diff) | |
download | searxng-2b60d0d243f5f5848c44ffc4d8503ee974fc614b.tar.gz searxng-2b60d0d243f5f5848c44ffc4d8503ee974fc614b.zip |
Merge pull request #2564 from dalf/fix-seznam
[fix] fix seznam engine
Diffstat (limited to 'searx/engines')
-rw-r--r-- | searx/engines/seznam.py | 64 |
1 files changed, 64 insertions, 0 deletions
diff --git a/searx/engines/seznam.py b/searx/engines/seznam.py new file mode 100644 index 000000000..1df92a845 --- /dev/null +++ b/searx/engines/seznam.py @@ -0,0 +1,64 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Seznam +""" + +from urllib.parse import urlencode, urlparse +from lxml import html +from searx.poolrequests import get +from searx.exceptions import SearxEngineAccessDeniedException +from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex + +# about +about = { + "website": "https://www.seznam.cz/", + "wikidata_id": "Q3490485", + "official_api_documentation": "https://api.sklik.cz/", + "use_official_api": False, + "require_api_key": False, + "results": "HTML", +} + +base_url = 'https://search.seznam.cz/' + + +def request(query, params): + response_index = get(base_url, headers=params['headers'], raise_for_httperror=True) + dom = html.fromstring(response_index.text) + + url_params = {'q': query} + for e in eval_xpath_list(dom, '//input[@type="hidden"]'): + name = e.get('name') + value = e.get('value') + url_params[name] = value + + params['url'] = base_url + '?' + urlencode(url_params) + params['cookies'] = response_index.cookies + return params + + +def response(resp): + resp_url = urlparse(resp.url) + if resp_url.path.startswith('/verify'): + raise SearxEngineAccessDeniedException() + + results = [] + + dom = html.fromstring(resp.content.decode()) + for result_element in eval_xpath_list(dom, '//div[@id="searchpage-root"]//div[@data-dot="results"]/div'): + dot_data = eval_xpath_getindex(result_element, './div/div[@data-dot-data]/@data-dot-data', 0, default=None) + if dot_data is None: + title_element = eval_xpath_getindex(result_element, './/h3/a', 0) + results.append({ + 'url': title_element.get('href'), + 'title': extract_text(title_element), + 'content': extract_text(eval_xpath_getindex(title_element, '../../div[2]', 0)), + }) + elif dot_data == '{"reporter_name":"hint/related/relates"}': + suggestions_element = eval_xpath_getindex(result_element, + './div/div[@data-dot="main-box"]', 0, default=None) + if suggestions_element is not None: + for suggestion in eval_xpath_list(suggestions_element, './/ul/li'): + results.append({'suggestion': extract_text(suggestion)}) + + return results |