diff options
author | Thomas Pointhuber <thomas.pointhuber@gmx.at> | 2014-03-18 15:56:22 +0100 |
---|---|---|
committer | Thomas Pointhuber <thomas.pointhuber@gmx.at> | 2014-03-18 15:56:22 +0100 |
commit | 993271bed30e24c7ae1e0f63b64e030829206f27 (patch) | |
tree | 4d07f76efbc6f8d4a0db39970bcb1d56452d7f25 | |
parent | 337bd6d907503176eb94290c3f386ce88167dea8 (diff) | |
download | searxng-993271bed30e24c7ae1e0f63b64e030829206f27.tar.gz searxng-993271bed30e24c7ae1e0f63b64e030829206f27.zip |
extract publishDate from vimeo
-rw-r--r-- | searx/engines/vimeo.py | 6 | ||||
-rw-r--r-- | searx/engines/yahoo_news.py | 2 |
2 files changed, 7 insertions, 1 deletions
diff --git a/searx/engines/vimeo.py b/searx/engines/vimeo.py index a95c75b49..d2d2a4dd0 100644 --- a/searx/engines/vimeo.py +++ b/searx/engines/vimeo.py @@ -2,6 +2,8 @@ from urllib import urlencode from HTMLParser import HTMLParser from lxml import html from xpath import extract_text +from datetime import datetime +from dateutil import parser base_url = 'http://vimeo.com' search_url = base_url + '/search?{query}' @@ -10,6 +12,7 @@ content_xpath = None title_xpath = None results_xpath = '' content_tpl = '<a href="{0}"> <img src="{2}"/> </a>' +publishedDate_xpath = './/p[@class="meta"]//attribute::datetime' # the cookie set by vimeo contains all the following values, # but only __utma seems to be requiered @@ -40,9 +43,12 @@ def response(resp): url = base_url + result.xpath(url_xpath)[0] title = p.unescape(extract_text(result.xpath(title_xpath))) thumbnail = extract_text(result.xpath(content_xpath)[0]) + publishedDate = parser.parse(extract_text(result.xpath(publishedDate_xpath)[0])) + results.append({'url': url, 'title': title, 'content': content_tpl.format(url, title, thumbnail), 'template': 'videos.html', + 'publishedDate': publishedDate, 'thumbnail': thumbnail}) return results diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py index 53c8b07a7..43da93ede 100644 --- a/searx/engines/yahoo_news.py +++ b/searx/engines/yahoo_news.py @@ -53,7 +53,7 @@ def response(resp): - timedelta(hours=int(timeNumbers[0]))\ - timedelta(minutes=int(timeNumbers[1])) else: - publishedDate =parser.parse(publishedDate) + publishedDate = parser.parse(publishedDate) if publishedDate.year == 1900: publishedDate = publishedDate.replace(year=datetime.now().year) |