diff options
author | Alexandre Flament <alex@al-f.net> | 2021-02-12 17:12:07 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-02-12 17:12:07 +0100 |
commit | 45027765e35ad8704b2ed034c3d7460a2a28b827 (patch) | |
tree | edca8ccdd01154885fe9827fc5c8f43d1d26ae8f /searx/engines | |
parent | 50bde93dbbd2acf36d916c97cf6fe6de6bbe18a1 (diff) | |
parent | 5d9db6c2f76b8ec1d13596be0d96f53035914977 (diff) | |
download | searxng-45027765e35ad8704b2ed034c3d7460a2a28b827.tar.gz searxng-45027765e35ad8704b2ed034c3d7460a2a28b827.zip |
Merge pull request #2566 from dalf/remove-yandex
[remove] yandex engine
Diffstat (limited to 'searx/engines')
-rw-r--r-- | searx/engines/yandex.py | 72 |
1 files changed, 0 insertions, 72 deletions
diff --git a/searx/engines/yandex.py b/searx/engines/yandex.py deleted file mode 100644 index ff946cc46..000000000 --- a/searx/engines/yandex.py +++ /dev/null @@ -1,72 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-or-later -""" - Yahoo (Web) -""" - -from urllib.parse import urlencode, urlparse -from lxml import html -from searx import logger -from searx.exceptions import SearxEngineCaptchaException - -logger = logger.getChild('yandex engine') - -# about -about = { - "website": 'https://yandex.ru/', - "wikidata_id": 'Q5281', - "official_api_documentation": "?", - "use_official_api": False, - "require_api_key": False, - "results": 'HTML', -} - -# engine dependent config -categories = ['general'] -paging = True - -default_tld = 'com' -language_map = {'ru': 'ru', - 'ua': 'ua', - 'be': 'by', - 'kk': 'kz', - 'tr': 'com.tr'} - -# search-url -base_url = 'https://yandex.{tld}/' -search_url = 'search/?{query}&p={page}' - -results_xpath = '//li[@class="serp-item"]' -url_xpath = './/h2/a/@href' -title_xpath = './/h2/a//text()' -content_xpath = './/div[@class="text-container typo typo_text_m typo_line_m organic__text"]//text()' - - -def request(query, params): - lang = params['language'].split('-')[0] - host = base_url.format(tld=language_map.get(lang) or default_tld) - params['url'] = host + search_url.format(page=params['pageno'] - 1, - query=urlencode({'text': query})) - return params - - -# get response from search-request -def response(resp): - resp_url = urlparse(resp.url) - if resp_url.path.startswith('/showcaptcha'): - raise SearxEngineCaptchaException() - - dom = html.fromstring(resp.text) - results = [] - - for result in dom.xpath(results_xpath): - try: - res = {'url': result.xpath(url_xpath)[0], - 'title': ''.join(result.xpath(title_xpath)), - 'content': ''.join(result.xpath(content_xpath))} - except: - logger.exception('yandex parse crash') - continue - - results.append(res) - - return results |