summaryrefslogtreecommitdiff
path: root/searx/engines
diff options
context:
space:
mode:
authorCqoicebordel <Cqoicebordel@users.noreply.github.com>2014-12-28 22:57:59 +0100
committerCqoicebordel <Cqoicebordel@users.noreply.github.com>2014-12-28 22:57:59 +0100
commite7e298153678fc0e77e24a3ae3b333b1230136b2 (patch)
treec338bb441cf5572fe72cbb4e3efc6be2a0f5babe /searx/engines
parent011c43b485ed66d399aede2ca1366805496ab8b8 (diff)
downloadsearxng-e7e298153678fc0e77e24a3ae3b333b1230136b2.tar.gz
searxng-e7e298153678fc0e77e24a3ae3b333b1230136b2.zip
Digg + Twitter corrections
Digg engines, with thumbnails Add pubdate for twitter
Diffstat (limited to 'searx/engines')
-rw-r--r--searx/engines/digg.py66
-rw-r--r--searx/engines/twitter.py22
2 files changed, 82 insertions, 6 deletions
diff --git a/searx/engines/digg.py b/searx/engines/digg.py
new file mode 100644
index 000000000..4ebfe58c1
--- /dev/null
+++ b/searx/engines/digg.py
@@ -0,0 +1,66 @@
+## Digg (News, Social media)
+#
+# @website https://digg.com/
+# @provide-api no
+#
+# @using-api no
+# @results HTML (using search portal)
+# @stable no (HTML can change)
+# @parse url, title, content, publishedDate, thumbnail
+
+from urllib import quote_plus
+from json import loads
+from lxml import html
+from cgi import escape
+from dateutil import parser
+
+# engine dependent config
+categories = ['news', 'social media']
+paging = True
+
+# search-url
+base_url = 'https://digg.com/'
+search_url = base_url+'api/search/{query}.json?position={position}&format=html'
+
+# specific xpath variables
+results_xpath = '//article'
+link_xpath = './/small[@class="time"]//a'
+title_xpath = './/h2//a//text()'
+content_xpath = './/p//text()'
+pubdate_xpath = './/time'
+
+
+# do search-request
+def request(query, params):
+ offset = (params['pageno'] - 1) * 10
+ params['url'] = search_url.format(position=offset,
+ query=quote_plus(query))
+ return params
+
+
+# get response from search-request
+def response(resp):
+ results = []
+
+ search_result = loads(resp.text)
+
+ dom = html.fromstring(search_result['html'])
+
+ # parse results
+ for result in dom.xpath(results_xpath):
+ url = result.attrib.get('data-contenturl')
+ thumbnail = result.xpath('.//img')[0].attrib.get('src')
+ title = ''.join(result.xpath(title_xpath))
+ content = escape(''.join(result.xpath(content_xpath)))
+ publishedDate = parser.parse(result.xpath(pubdate_xpath)[0].attrib.get('datetime'))
+
+ # append result
+ results.append({'url': url,
+ 'title': title,
+ 'content': content,
+ 'template': 'videos.html',
+ 'publishedDate': publishedDate,
+ 'thumbnail': thumbnail})
+
+ # return results
+ return results
diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py
index 0689150c8..5a7046c83 100644
--- a/searx/engines/twitter.py
+++ b/searx/engines/twitter.py
@@ -1,6 +1,6 @@
## Twitter (Social media)
#
-# @website https://www.bing.com/news
+# @website https://twitter.com/
# @provide-api yes (https://dev.twitter.com/docs/using-search)
#
# @using-api no
@@ -14,6 +14,7 @@ from urlparse import urljoin
from urllib import urlencode
from lxml import html
from cgi import escape
+from datetime import datetime
# engine dependent config
categories = ['social media']
@@ -28,6 +29,7 @@ results_xpath = '//li[@data-item-type="tweet"]'
link_xpath = './/small[@class="time"]//a'
title_xpath = './/span[@class="username js-action-profile-name"]//text()'
content_xpath = './/p[@class="js-tweet-text tweet-text"]//text()'
+timestamp_xpath = './/span[contains(@class,"_timestamp")]'
# do search-request
@@ -53,11 +55,19 @@ def response(resp):
url = urljoin(base_url, link.attrib.get('href'))
title = ''.join(tweet.xpath(title_xpath))
content = escape(''.join(tweet.xpath(content_xpath)))
-
- # append result
- results.append({'url': url,
- 'title': title,
- 'content': content})
+ pubdate = tweet.xpath(timestamp_xpath)
+ if len(pubdate) > 0:
+ publishedDate = datetime.fromtimestamp(float(pubdate[0].attrib.get('data-time')), None)
+ # append result
+ results.append({'url': url,
+ 'title': title,
+ 'content': content,
+ 'publishedDate': publishedDate})
+ else:
+ # append result
+ results.append({'url': url,
+ 'title': title,
+ 'content': content})
# return results
return results