diff options
author | Alexandre Flament <alex@al-f.net> | 2017-02-12 14:58:49 +0100 |
---|---|---|
committer | Adam Tauber <asciimoo@gmail.com> | 2017-05-15 17:28:21 +0200 |
commit | f5128c7cb96d7564cc7bebeae13a319557b84aaa (patch) | |
tree | 8cdb0bfaacaf00cd193e6a5afffef2464a7152ec | |
parent | 4cffd78650c3f1dfce413ae0a1cd0453ebe6f277 (diff) | |
download | searxng-f5128c7cb96d7564cc7bebeae13a319557b84aaa.tar.gz searxng-f5128c7cb96d7564cc7bebeae13a319557b84aaa.zip |
[mod] add/modify image fetching for bing_news, qwant and twitter engines
-rw-r--r-- | searx/engines/bing_news.py | 5 | ||||
-rw-r--r-- | searx/engines/qwant.py | 19 | ||||
-rw-r--r-- | searx/engines/twitter.py | 7 | ||||
-rw-r--r-- | tests/unit/engines/test_bing_news.py | 6 |
4 files changed, 27 insertions, 10 deletions
diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py index 8e3cc517e..0e2975814 100644 --- a/searx/engines/bing_news.py +++ b/searx/engines/bing_news.py @@ -112,12 +112,11 @@ def response(resp): # append result if thumbnail is not None: - results.append({'template': 'videos.html', - 'url': url, + results.append({'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content, - 'thumbnail': thumbnail}) + 'img_src': thumbnail}) else: results.append({'url': url, 'title': title, diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py index cb097eb38..3d266e228 100644 --- a/searx/engines/qwant.py +++ b/searx/engines/qwant.py @@ -96,14 +96,27 @@ def response(resp): 'thumbnail_src': thumbnail_src, 'img_src': img_src}) - elif (category_to_keyword.get(categories[0], '') == 'news' or - category_to_keyword.get(categories[0], '') == 'social'): + elif category_to_keyword.get(categories[0], '') == 'social': published_date = datetime.fromtimestamp(result['date'], None) + img_src = result.get('img', None) + results.append({'url': res_url, + 'title': title, + 'publishedDate': published_date, + 'content': content, + 'img_src': img_src}) + elif category_to_keyword.get(categories[0], '') == 'news': + published_date = datetime.fromtimestamp(result['date'], None) + media = result.get('media', []) + if len(media) > 0: + img_src = media[0].get('pict', {}).get('url', None) + else: + img_src = None results.append({'url': res_url, 'title': title, 'publishedDate': published_date, - 'content': content}) + 'content': content, + 'img_src': img_src}) return results diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py index 038cef47f..d2a8d2088 100644 --- a/searx/engines/twitter.py +++ b/searx/engines/twitter.py @@ -27,6 +27,7 @@ search_url = base_url + 'search?' # specific xpath variables results_xpath = '//li[@data-item-type="tweet"]' +avatar_xpath = './/img[contains(@class, "avatar")]/@src' link_xpath = './/small[@class="time"]//a' title_xpath = './/span[contains(@class, "username")]' content_xpath = './/p[contains(@class, "tweet-text")]' @@ -57,6 +58,8 @@ def response(resp): try: link = tweet.xpath(link_xpath)[0] content = extract_text(tweet.xpath(content_xpath)[0]) + img_src = tweet.xpath(avatar_xpath)[0] + img_src = img_src.replace('_bigger', '_normal') except Exception: continue @@ -71,12 +74,14 @@ def response(resp): results.append({'url': url, 'title': title, 'content': content, + 'img_src': img_src, 'publishedDate': publishedDate}) else: # append result results.append({'url': url, 'title': title, - 'content': content}) + 'content': content, + 'img_src': img_src}) # return results return results diff --git a/tests/unit/engines/test_bing_news.py b/tests/unit/engines/test_bing_news.py index e571adcee..28ec7a3ed 100644 --- a/tests/unit/engines/test_bing_news.py +++ b/tests/unit/engines/test_bing_news.py @@ -81,11 +81,11 @@ class TestBingNewsEngine(SearxTestCase): self.assertEqual(results[0]['title'], 'Title') self.assertEqual(results[0]['url'], 'http://url.of.article/') self.assertEqual(results[0]['content'], 'Article Content') - self.assertEqual(results[0]['thumbnail'], 'https://www.bing.com/th?id=ON.13371337133713371337133713371337') + self.assertEqual(results[0]['img_src'], 'https://www.bing.com/th?id=ON.13371337133713371337133713371337') self.assertEqual(results[1]['title'], 'Another Title') self.assertEqual(results[1]['url'], 'http://another.url.of.article/') self.assertEqual(results[1]['content'], 'Another Article Content') - self.assertNotIn('thumbnail', results[1]) + self.assertNotIn('img_src', results[1]) html = """<?xml version="1.0" encoding="utf-8" ?> <rss version="2.0" xmlns:News="https://www.bing.com:443/news/search?q=python&setmkt=en-US&first=1&format=RSS"> @@ -120,7 +120,7 @@ class TestBingNewsEngine(SearxTestCase): self.assertEqual(results[0]['title'], 'Title') self.assertEqual(results[0]['url'], 'http://another.url.of.article/') self.assertEqual(results[0]['content'], 'Article Content') - self.assertEqual(results[0]['thumbnail'], 'http://another.bing.com/image') + self.assertEqual(results[0]['img_src'], 'http://another.bing.com/image') html = """<?xml version="1.0" encoding="utf-8" ?> <rss version="2.0" xmlns:News="https://www.bing.com:443/news/search?q=python&setmkt=en-US&first=1&format=RSS"> |