diff options
author | Bnyro <bnyro@tutanota.com> | 2024-01-24 19:06:13 +0100 |
---|---|---|
committer | Markus Heiser <markus.heiser@darmarIT.de> | 2024-01-29 16:07:24 +0100 |
commit | 559b8606189ff44eee6e8bcde2763daf3c2cbd7c (patch) | |
tree | e09979d3e9260e221b25075815b11c4947abbe01 /searx/engines | |
parent | c2f1e760a16e79fddb1e7d15131f7b714cc05342 (diff) | |
download | searxng-559b8606189ff44eee6e8bcde2763daf3c2cbd7c.tar.gz searxng-559b8606189ff44eee6e8bcde2763daf3c2cbd7c.zip |
[feat] engine: implementation of goodreads
Diffstat (limited to 'searx/engines')
-rw-r--r-- | searx/engines/goodreads.py | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/searx/engines/goodreads.py b/searx/engines/goodreads.py new file mode 100644 index 000000000..7af04ae58 --- /dev/null +++ b/searx/engines/goodreads.py @@ -0,0 +1,58 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Goodreads (books) +""" + +from urllib.parse import urlencode + +from lxml import html +from searx.utils import extract_text, eval_xpath, eval_xpath_list + +about = { + 'website': 'https://www.goodreads.com', + 'wikidata_id': 'Q2359213', + 'official_api_documentation': None, + 'use_official_api': False, + 'require_api_key': False, + 'results': 'HTML', +} +categories = [] +paging = True + +base_url = "https://www.goodreads.com" + +results_xpath = "//table//tr" +thumbnail_xpath = ".//img[contains(@class, 'bookCover')]/@src" +url_xpath = ".//a[contains(@class, 'bookTitle')]/@href" +title_xpath = ".//a[contains(@class, 'bookTitle')]" +author_xpath = ".//a[contains(@class, 'authorName')]" +info_text_xpath = ".//span[contains(@class, 'uitext')]" + + +def request(query, params): + args = { + 'q': query, + 'page': params['pageno'], + } + + params['url'] = f"{base_url}/search?{urlencode(args)}" + return params + + +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + + for result in eval_xpath_list(dom, results_xpath): + results.append( + { + 'url': base_url + extract_text(eval_xpath(result, url_xpath)), + 'title': extract_text(eval_xpath(result, title_xpath)), + 'img_src': extract_text(eval_xpath(result, thumbnail_xpath)), + 'content': extract_text(eval_xpath(result, info_text_xpath)), + 'metadata': extract_text(eval_xpath(result, author_xpath)), + } + ) + + return results |