summaryrefslogtreecommitdiff
path: root/searx/engines/twitter.py
diff options
context:
space:
mode:
authorCqoicebordel <Cqoicebordel@users.noreply.github.com>2015-02-04 19:39:31 +0100
committerCqoicebordel <Cqoicebordel@users.noreply.github.com>2015-02-04 19:39:31 +0100
commitd6e511fc2f090a848fe5656382266fea816f7b01 (patch)
tree5da2613e4e679d7c7632eae14c0f540c1b597782 /searx/engines/twitter.py
parenta96208be965bfc082524f3e22d9339364e2a9976 (diff)
downloadsearxng-d6e511fc2f090a848fe5656382266fea816f7b01.tar.gz
searxng-d6e511fc2f090a848fe5656382266fea816f7b01.zip
Twitter's unit test
There is a commented line of a test that I didn't succed to make it work. It's an issue of unicode, utf-8, ascii, latin1... I think I tried everything, but if you have an idea... I'm still a newbie in python...
Diffstat (limited to 'searx/engines/twitter.py')
-rw-r--r--searx/engines/twitter.py13
1 files changed, 8 insertions, 5 deletions
diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py
index bd9a8c2fc..0e35e6188 100644
--- a/searx/engines/twitter.py
+++ b/searx/engines/twitter.py
@@ -13,8 +13,8 @@
from urlparse import urljoin
from urllib import urlencode
from lxml import html
-from cgi import escape
from datetime import datetime
+from searx.engines.xpath import extract_text
# engine dependent config
categories = ['social media']
@@ -22,12 +22,12 @@ language_support = True
# search-url
base_url = 'https://twitter.com/'
-search_url = base_url+'search?'
+search_url = base_url + 'search?'
# specific xpath variables
results_xpath = '//li[@data-item-type="tweet"]'
link_xpath = './/small[@class="time"]//a'
-title_xpath = './/span[@class="username js-action-profile-name"]//text()'
+title_xpath = './/span[@class="username js-action-profile-name"]'
content_xpath = './/p[@class="js-tweet-text tweet-text"]'
timestamp_xpath = './/span[contains(@class,"_timestamp")]'
@@ -39,6 +39,8 @@ def request(query, params):
# set language if specified
if params['language'] != 'all':
params['cookies']['lang'] = params['language'].split('_')[0]
+ else:
+ params['cookies']['lang'] = 'en'
return params
@@ -53,8 +55,9 @@ def response(resp):
for tweet in dom.xpath(results_xpath):
link = tweet.xpath(link_xpath)[0]
url = urljoin(base_url, link.attrib.get('href'))
- title = ''.join(tweet.xpath(title_xpath))
- content = escape(html.tostring(tweet.xpath(content_xpath)[0], method='text', encoding='UTF-8').decode("utf-8"))
+ title = extract_text(tweet.xpath(title_xpath))
+ content = extract_text(tweet.xpath(content_xpath)[0])
+
pubdate = tweet.xpath(timestamp_xpath)
if len(pubdate) > 0:
timestamp = float(pubdate[0].attrib.get('data-time'))