diff options
author | Cqoicebordel <Cqoicebordel@users.noreply.github.com> | 2015-02-01 13:43:10 +0100 |
---|---|---|
committer | Cqoicebordel <Cqoicebordel@users.noreply.github.com> | 2015-02-01 13:43:10 +0100 |
commit | 8cf2ee57216b4dffc419e1762ff1fe4dfd30e227 (patch) | |
tree | ed8dec8eb00be521ba1968d77815ec472c2e5047 /searx/engines | |
parent | f18807955beceb86a99963feedee8355f31c481c (diff) | |
download | searxng-8cf2ee57216b4dffc419e1762ff1fe4dfd30e227.tar.gz searxng-8cf2ee57216b4dffc419e1762ff1fe4dfd30e227.zip |
500px unit test
Diffstat (limited to 'searx/engines')
-rw-r--r-- | searx/engines/www500px.py | 11 |
1 files changed, 6 insertions, 5 deletions
diff --git a/searx/engines/www500px.py b/searx/engines/www500px.py index f25678c24..99dba4abf 100644 --- a/searx/engines/www500px.py +++ b/searx/engines/www500px.py @@ -15,6 +15,7 @@ from urllib import urlencode from urlparse import urljoin from lxml import html import re +from searx.engines.xpath import extract_text # engine dependent config categories = ['images'] @@ -22,7 +23,7 @@ paging = True # search-url base_url = 'https://500px.com' -search_url = base_url+'/search?search?page={pageno}&type=photos&{query}' +search_url = base_url + '/search?search?page={pageno}&type=photos&{query}' # do search-request @@ -44,11 +45,11 @@ def response(resp): for result in dom.xpath('//div[@class="photo"]'): link = result.xpath('.//a')[0] url = urljoin(base_url, link.attrib.get('href')) - title = result.xpath('.//div[@class="title"]//text()')[0] - thumbnail_src = link.xpath('.//img')[0].attrib['src'] + title = extract_text(result.xpath('.//div[@class="title"]')) + thumbnail_src = link.xpath('.//img')[0].attrib.get('src') # To have a bigger thumbnail, uncomment the next line - #thumbnail_src = regex.sub('4.jpg', thumbnail_src) - content = result.xpath('.//div[@class="info"]//text()')[0] + # thumbnail_src = regex.sub('4.jpg', thumbnail_src) + content = extract_text(result.xpath('.//div[@class="info"]')) img_src = regex.sub('2048.jpg', thumbnail_src) # append result |