diff options
author | Markus Heiser <markus.heiser@darmarit.de> | 2020-12-29 15:49:41 +0100 |
---|---|---|
committer | Markus Heiser <markus.heiser@darmarit.de> | 2021-04-15 08:52:11 +0200 |
commit | 062d589f865cf736620f4ff5d6a8476dfe980ba7 (patch) | |
tree | 26a14e226e94bee4c82672fbcdb5f87e7a395a7c /searx/engines/bandcamp.py | |
parent | 4d3c399ee985385e888ba068d973e4653d9f50b9 (diff) | |
download | searxng-062d589f865cf736620f4ff5d6a8476dfe980ba7.tar.gz searxng-062d589f865cf736620f4ff5d6a8476dfe980ba7.zip |
[fix] xpath expressions to grap all items from bandcamp's response
I also found some items missing a thumbnail and I used text_extract for content
and title, to remove unneeded whitespaces.
BTW: added bandcamp's favicon
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/engines/bandcamp.py')
-rw-r--r-- | searx/engines/bandcamp.py | 15 |
1 files changed, 8 insertions, 7 deletions
diff --git a/searx/engines/bandcamp.py b/searx/engines/bandcamp.py index b1b5214fe..dafb3ee16 100644 --- a/searx/engines/bandcamp.py +++ b/searx/engines/bandcamp.py @@ -51,19 +51,20 @@ def response(resp): tree = html.fromstring(resp.text) search_results = tree.xpath('//li[contains(@class, "searchresult")]') for result in search_results: - link = result.xpath('//div[@class="itemurl"]/a')[0] + link = result.xpath('.//div[@class="itemurl"]/a')[0] result_id = parse_qs(urlparse(link.get('href')).query)["search_item_id"][0] - title = result.xpath('//div[@class="heading"]/a/text()')[0] + title = result.xpath('.//div[@class="heading"]/a/text()') date = dateparse(result.xpath('//div[@class="released"]/text()')[0].replace("released ", "")) - content = result.xpath('//div[@class="subhead"]/text()')[0] - thumbnail = result.xpath('//div[@class="art"]/img/@src')[0] + content = result.xpath('.//div[@class="subhead"]/text()') new_result = { "url": extract_text(link), - "title": title, - "content": content, + "title": extract_text(title), + "content": extract_text(content), "publishedDate": date, - "thumbnail": thumbnail, } + thumbnail = result.xpath('.//div[@class="art"]/img/@src') + if thumbnail: + new_result['thumbnail'] = thumbnail[0] if "album" in result.classes: new_result["embedded"] = embedded_url.format(type='album', result_id=result_id) elif "track" in result.classes: |