summaryrefslogtreecommitdiff
path: root/searx/engines/bandcamp.py
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2020-12-29 15:49:41 +0100
committerMarkus Heiser <markus.heiser@darmarit.de>2021-04-15 08:52:11 +0200
commit062d589f865cf736620f4ff5d6a8476dfe980ba7 (patch)
tree26a14e226e94bee4c82672fbcdb5f87e7a395a7c /searx/engines/bandcamp.py
parent4d3c399ee985385e888ba068d973e4653d9f50b9 (diff)
downloadsearxng-062d589f865cf736620f4ff5d6a8476dfe980ba7.tar.gz
searxng-062d589f865cf736620f4ff5d6a8476dfe980ba7.zip
[fix] xpath expressions to grap all items from bandcamp's response
I also found some items missing a thumbnail and I used text_extract for content and title, to remove unneeded whitespaces. BTW: added bandcamp's favicon Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/engines/bandcamp.py')
-rw-r--r--searx/engines/bandcamp.py15
1 files changed, 8 insertions, 7 deletions
diff --git a/searx/engines/bandcamp.py b/searx/engines/bandcamp.py
index b1b5214fe..dafb3ee16 100644
--- a/searx/engines/bandcamp.py
+++ b/searx/engines/bandcamp.py
@@ -51,19 +51,20 @@ def response(resp):
tree = html.fromstring(resp.text)
search_results = tree.xpath('//li[contains(@class, "searchresult")]')
for result in search_results:
- link = result.xpath('//div[@class="itemurl"]/a')[0]
+ link = result.xpath('.//div[@class="itemurl"]/a')[0]
result_id = parse_qs(urlparse(link.get('href')).query)["search_item_id"][0]
- title = result.xpath('//div[@class="heading"]/a/text()')[0]
+ title = result.xpath('.//div[@class="heading"]/a/text()')
date = dateparse(result.xpath('//div[@class="released"]/text()')[0].replace("released ", ""))
- content = result.xpath('//div[@class="subhead"]/text()')[0]
- thumbnail = result.xpath('//div[@class="art"]/img/@src')[0]
+ content = result.xpath('.//div[@class="subhead"]/text()')
new_result = {
"url": extract_text(link),
- "title": title,
- "content": content,
+ "title": extract_text(title),
+ "content": extract_text(content),
"publishedDate": date,
- "thumbnail": thumbnail,
}
+ thumbnail = result.xpath('.//div[@class="art"]/img/@src')
+ if thumbnail:
+ new_result['thumbnail'] = thumbnail[0]
if "album" in result.classes:
new_result["embedded"] = embedded_url.format(type='album', result_id=result_id)
elif "track" in result.classes: