diff options
author | Bnyro <bnyro@tutanota.com> | 2023-11-04 19:01:24 +0100 |
---|---|---|
committer | Markus Heiser <markus.heiser@darmarIT.de> | 2023-12-11 18:59:36 +0100 |
commit | 8a4104b9922cbe4c33864b5c0de02e88d3c9665d (patch) | |
tree | fe90a458044cfff9b00d00043870d620aeadcb24 /searx/engines/rottentomatoes.py | |
parent | dda74fc9c96d9bf9bcb0b5dac90219e3b31f8390 (diff) | |
download | searxng-8a4104b9922cbe4c33864b5c0de02e88d3c9665d.tar.gz searxng-8a4104b9922cbe4c33864b5c0de02e88d3c9665d.zip |
[feat] engine: implementation of rotten tomatoes
Diffstat (limited to 'searx/engines/rottentomatoes.py')
-rw-r--r-- | searx/engines/rottentomatoes.py | 60 |
1 files changed, 60 insertions, 0 deletions
diff --git a/searx/engines/rottentomatoes.py b/searx/engines/rottentomatoes.py new file mode 100644 index 000000000..727287355 --- /dev/null +++ b/searx/engines/rottentomatoes.py @@ -0,0 +1,60 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""RottenTomatoes (movies) +""" + +from urllib.parse import quote_plus +from lxml import html +from searx.utils import eval_xpath, eval_xpath_list, extract_text + +# about +about = { + "website": 'https://www.rottentomatoes.com/', + "wikidata_id": 'Q105584', + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} +categories = ['movies'] + +base_url = "https://www.rottentomatoes.com" + +results_xpath = "//search-page-media-row" +url_xpath = "./a[1]/@href" +title_xpath = "./a/img/@alt" +img_src_xpath = "./a/img/@src" +release_year_xpath = "concat('From ', string(./@releaseyear))" +score_xpath = "concat('Score: ', string(./@tomatometerscore))" +cast_xpath = "concat('Starring ', string(./@cast))" + + +def request(query, params): + params["url"] = f"{base_url}/search?search={quote_plus(query)}" + return params + + +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + + for result in eval_xpath_list(dom, results_xpath): + content = [] + for xpath in (release_year_xpath, score_xpath, cast_xpath): + info = extract_text(eval_xpath(result, xpath)) + + # a gap in the end means that no data was found + if info and info[-1] != " ": + content.append(info) + + results.append( + { + 'url': extract_text(eval_xpath(result, url_xpath)), + 'title': extract_text(eval_xpath(result, title_xpath)), + 'content': ', '.join(content), + 'img_src': extract_text(eval_xpath(result, img_src_xpath)), + } + ) + + return results |