summaryrefslogtreecommitdiff
path: root/searx
diff options
context:
space:
mode:
authorCqoicebordel <Cqoicebordel@users.noreply.github.com>2015-05-31 00:25:59 +0200
committerCqoicebordel <Cqoicebordel@users.noreply.github.com>2015-05-31 00:25:59 +0200
commitf965c978222cf48e8dd4b7dd6c9a28ccca9bc62f (patch)
treec2bd2e7e17cc3090404390694f7127a580b589a8 /searx
parentaac8d3a7bfdd77a5369e52a4ece99b20669a4625 (diff)
downloadsearxng-f965c978222cf48e8dd4b7dd6c9a28ccca9bc62f.tar.gz
searxng-f965c978222cf48e8dd4b7dd6c9a28ccca9bc62f.zip
Adds two engines : Youtube with or without API
The API needs an API_KEY The NOAPI doesn't have the published dates.
Diffstat (limited to 'searx')
-rw-r--r--searx/engines/youtube_api.py83
-rw-r--r--searx/engines/youtube_noapi.py72
-rw-r--r--searx/settings.yml7
-rw-r--r--searx/tests/engines/test_youtube_api.py111
-rw-r--r--searx/tests/engines/test_youtube_noapi.py103
-rw-r--r--searx/tests/test_engines.py2
6 files changed, 377 insertions, 1 deletions
diff --git a/searx/engines/youtube_api.py b/searx/engines/youtube_api.py
new file mode 100644
index 000000000..8fd939a25
--- /dev/null
+++ b/searx/engines/youtube_api.py
@@ -0,0 +1,83 @@
+# Youtube (Videos)
+#
+# @website https://www.youtube.com/
+# @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list)
+#
+# @using-api yes
+# @results JSON
+# @stable yes
+# @parse url, title, content, publishedDate, thumbnail, embedded
+
+from json import loads
+from urllib import urlencode
+from dateutil import parser
+
+# engine dependent config
+categories = ['videos', 'music']
+paging = False
+language_support = True
+api_key = None
+
+# search-url
+base_url = 'https://www.googleapis.com/youtube/v3/search'
+search_url = base_url + '?part=snippet&{query}&maxResults=20&key={api_key}'
+
+embedded_url = '<iframe width="540" height="304" ' +\
+ 'data-src="//www.youtube-nocookie.com/embed/{videoid}" ' +\
+ 'frameborder="0" allowfullscreen></iframe>'
+
+base_youtube_url = 'https://www.youtube.com/watch?v='
+
+
+# do search-request
+def request(query, params):
+ params['url'] = search_url.format(query=urlencode({'q': query}),
+ api_key=api_key)
+
+ # add language tag if specified
+ if params['language'] != 'all':
+ params['url'] += '&relevanceLanguage=' + params['language'].split('_')[0]
+
+ return params
+
+
+# get response from search-request
+def response(resp):
+ results = []
+
+ search_results = loads(resp.text)
+
+ # return empty array if there are no results
+ if 'items' not in search_results:
+ return []
+
+ # parse results
+ for result in search_results['items']:
+ videoid = result['id']['videoId']
+
+ title = result['snippet']['title']
+ content = ''
+ thumbnail = ''
+
+ pubdate = result['snippet']['publishedAt']
+ publishedDate = parser.parse(pubdate)
+
+ thumbnail = result['snippet']['thumbnails']['high']['url']
+
+ content = result['snippet']['description']
+
+ url = base_youtube_url + videoid
+
+ embedded = embedded_url.format(videoid=videoid)
+
+ # append result
+ results.append({'url': url,
+ 'title': title,
+ 'content': content,
+ 'template': 'videos.html',
+ 'publishedDate': publishedDate,
+ 'embedded': embedded,
+ 'thumbnail': thumbnail})
+
+ # return results
+ return results
diff --git a/searx/engines/youtube_noapi.py b/searx/engines/youtube_noapi.py
new file mode 100644
index 000000000..f78e43f0f
--- /dev/null
+++ b/searx/engines/youtube_noapi.py
@@ -0,0 +1,72 @@
+# Youtube (Videos)
+#
+# @website https://www.youtube.com/
+# @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list)
+#
+# @using-api no
+# @results HTML
+# @stable no
+# @parse url, title, content, publishedDate, thumbnail, embedded
+
+from urllib import quote_plus
+from lxml import html
+from searx.engines.xpath import extract_text
+
+# engine dependent config
+categories = ['videos', 'music']
+paging = True
+language_support = False
+
+# search-url
+base_url = 'https://www.youtube.com/results'
+search_url = base_url + '?search_query={query}&page={page}'
+
+embedded_url = '<iframe width="540" height="304" ' +\
+ 'data-src="//www.youtube-nocookie.com/embed/{videoid}" ' +\
+ 'frameborder="0" allowfullscreen></iframe>'
+
+base_youtube_url = 'https://www.youtube.com/watch?v='
+
+# specific xpath variables
+results_xpath = "//ol/li/div[contains(@class, 'yt-lockup yt-lockup-tile yt-lockup-video vve-check')]"
+url_xpath = './/h3/a/@href'
+title_xpath = './/div[@class="yt-lockup-content"]/h3/a'
+content_xpath = './/div[@class="yt-lockup-content"]/div[@class="yt-lockup-description yt-ui-ellipsis yt-ui-ellipsis-2"]'
+
+
+# do search-request
+def request(query, params):
+ params['url'] = search_url.format(query=quote_plus(query),
+ page=params['pageno'])
+
+ return params
+
+
+# get response from search-request
+def response(resp):
+ results = []
+
+ dom = html.fromstring(resp.text)
+
+ # parse results
+ for result in dom.xpath(results_xpath):
+ videoid = result.xpath('@data-context-item-id')[0]
+
+ url = base_youtube_url + videoid
+ thumbnail = 'https://i.ytimg.com/vi/' + videoid + '/hqdefault.jpg'
+
+ title = extract_text(result.xpath(title_xpath)[0])
+ content = extract_text(result.xpath(content_xpath)[0])
+
+ embedded = embedded_url.format(videoid=videoid)
+
+ # append result
+ results.append({'url': url,
+ 'title': title,
+ 'content': content,
+ 'template': 'videos.html',
+ 'embedded': embedded,
+ 'thumbnail': thumbnail})
+
+ # return results
+ return results
diff --git a/searx/settings.yml b/searx/settings.yml
index d35b1378a..519ea8be1 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -242,8 +242,13 @@ engines:
shortcut : yhn
- name : youtube
- engine : youtube
shortcut : yt
+ # You can use the engine using the official stable API, but you need an API key
+ # See : https://console.developers.google.com/project
+ # engine : youtube_api
+ # api_key: 'apikey' # required!
+ # Or you can use the html non-stable engine, activated by default
+ engine : youtube_noapi
- name : dailymotion
engine : dailymotion
diff --git a/searx/tests/engines/test_youtube_api.py b/searx/tests/engines/test_youtube_api.py
new file mode 100644
index 000000000..0d4d478c3
--- /dev/null
+++ b/searx/tests/engines/test_youtube_api.py
@@ -0,0 +1,111 @@
+from collections import defaultdict
+import mock
+from searx.engines import youtube_api
+from searx.testing import SearxTestCase
+
+
+class TestYoutubeAPIEngine(SearxTestCase):
+
+ def test_request(self):
+ query = 'test_query'
+ dicto = defaultdict(dict)
+ dicto['pageno'] = 0
+ dicto['language'] = 'fr_FR'
+ params = youtube_api.request(query, dicto)
+ self.assertTrue('url' in params)
+ self.assertTrue(query in params['url'])
+ self.assertIn('googleapis.com', params['url'])
+ self.assertIn('youtube', params['url'])
+ self.assertIn('fr', params['url'])
+
+ dicto['language'] = 'all'
+ params = youtube_api.request(query, dicto)
+ self.assertFalse('fr' in params['url'])
+
+ def test_response(self):
+ self.assertRaises(AttributeError, youtube_api.response, None)
+ self.assertRaises(AttributeError, youtube_api.response, [])
+ self.assertRaises(AttributeError, youtube_api.response, '')
+ self.assertRaises(AttributeError, youtube_api.response, '[]')
+
+ response = mock.Mock(text='{}')
+ self.assertEqual(youtube_api.response(response), [])
+
+ response = mock.Mock(text='{"data": []}')
+ self.assertEqual(youtube_api.response(response), [])
+
+ json = """
+ {
+ "kind": "youtube#searchListResponse",
+ "etag": "xmg9xJZuZD438sF4hb-VcBBREXc/YJQDcTBCDcaBvl-sRZJoXdvy1ME",
+ "nextPageToken": "CAUQAA",
+ "pageInfo": {
+ "totalResults": 1000000,
+ "resultsPerPage": 20
+ },
+ "items": [
+ {
+ "kind": "youtube#searchResult",
+ "etag": "xmg9xJZuZD438sF4hb-VcBBREXc/IbLO64BMhbHIgWLwLw7MDYe7Hs4",
+ "id": {
+ "kind": "youtube#video",
+ "videoId": "DIVZCPfAOeM"
+ },
+ "snippet": {
+ "publishedAt": "2015-05-29T22:41:04.000Z",
+ "channelId": "UCNodmx1ERIjKqvcJLtdzH5Q",
+ "title": "Title",
+ "description": "Description",
+ "thumbnails": {
+ "default": {
+ "url": "https://i.ytimg.com/vi/DIVZCPfAOeM/default.jpg"
+ },
+ "medium": {
+ "url": "https://i.ytimg.com/vi/DIVZCPfAOeM/mqdefault.jpg"
+ },
+ "high": {
+ "url": "https://i.ytimg.com/vi/DIVZCPfAOeM/hqdefault.jpg"
+ }
+ },
+ "channelTitle": "MinecraftUniverse",
+ "liveBroadcastContent": "none"
+ }
+ }
+ ]
+ }
+ """
+ response = mock.Mock(text=json)
+ results = youtube_api.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 1)
+ self.assertEqual(results[0]['title'], 'Title')
+ self.assertEqual(results[0]['url'], 'https://www.youtube.com/watch?v=DIVZCPfAOeM')
+ self.assertEqual(results[0]['content'], 'Description')
+ self.assertEqual(results[0]['thumbnail'], 'https://i.ytimg.com/vi/DIVZCPfAOeM/hqdefault.jpg')
+ self.assertTrue('DIVZCPfAOeM' in results[0]['embedded'])
+
+ json = """
+ {
+ "kind": "youtube#searchListResponse",
+ "etag": "xmg9xJZuZD438sF4hb-VcBBREXc/YJQDcTBCDcaBvl-sRZJoXdvy1ME",
+ "nextPageToken": "CAUQAA",
+ "pageInfo": {
+ "totalResults": 1000000,
+ "resultsPerPage": 20
+ }
+ }
+ """
+ response = mock.Mock(text=json)
+ results = youtube_api.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 0)
+
+ json = """
+ {"toto":{"entry":[]
+ }
+ }
+ """
+ response = mock.Mock(text=json)
+ results = youtube_api.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 0)
diff --git a/searx/tests/engines/test_youtube_noapi.py b/searx/tests/engines/test_youtube_noapi.py
new file mode 100644
index 000000000..b715ed2f1
--- /dev/null
+++ b/searx/tests/engines/test_youtube_noapi.py
@@ -0,0 +1,103 @@
+# -*- coding: utf-8 -*-
+from collections import defaultdict
+import mock
+from searx.engines import youtube_noapi
+from searx.testing import SearxTestCase
+
+
+class TestYoutubeNoAPIEngine(SearxTestCase):
+
+ def test_request(self):
+ query = 'test_query'
+ dicto = defaultdict(dict)
+ dicto['pageno'] = 0
+ params = youtube_noapi.request(query, dicto)
+ self.assertIn('url', params)
+ self.assertIn(query, params['url'])
+ self.assertIn('youtube.com', params['url'])
+
+ def test_response(self):
+ self.assertRaises(AttributeError, youtube_noapi.response, None)
+ self.assertRaises(AttributeError, youtube_noapi.response, [])
+ self.assertRaises(AttributeError, youtube_noapi.response, '')
+ self.assertRaises(AttributeError, youtube_noapi.response, '[]')
+
+ response = mock.Mock(text='<html></html>')
+ self.assertEqual(youtube_noapi.response(response), [])
+
+ html = """
+ <ol id="item-section-063864" class="item-section">
+ <li>
+ <div class="yt-lockup yt-lockup-tile yt-lockup-video vve-check clearfix yt-uix-tile"
+ data-context-item-id="DIVZCPfAOeM"
+ data-visibility-tracking="CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JECx_-GK5uqMpcIB">
+ <div class="yt-lockup-dismissable"><div class="yt-lockup-thumbnail contains-addto">
+ <a aria-hidden="true" href="/watch?v=DIVZCPfAOeM" class=" yt-uix-sessionlink pf-link"
+ data-sessionlink="itct=CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JFIEdGVzdA">
+ <div class="yt-thumb video-thumb"><img src="//i.ytimg.com/vi/DIVZCPfAOeM/mqdefault.jpg"
+ width="196" height="110"/></div><span class="video-time" aria-hidden="true">11:35</span></a>
+ <span class="thumb-menu dark-overflow-action-menu video-actions">
+ </span>
+ </div>
+ <div class="yt-lockup-content">
+ <h3 class="yt-lockup-title">
+ <a href="/watch?v=DIVZCPfAOeM"
+ class="yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink spf-link"
+ data-sessionlink="itct=CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JFIEdGVzdA"
+ title="Top Speed Test Kawasaki Ninja H2 (Thailand) By. MEHAY SUPERBIKE"
+ aria-describedby="description-id-259079" rel="spf-prefetch" dir="ltr">
+ Title
+ </a>
+ <span class="accessible-description" id="description-id-259079"> - Durée : 11:35.</span>
+ </h3>
+ <div class="yt-lockup-byline">de
+ <a href="/user/mheejapan" class=" yt-uix-sessionlink spf-link g-hovercard"
+ data-sessionlink="itct=CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JA" data-ytid="UCzEesu54Hjs0uRKmpy66qeA"
+ data-name="">MEHAY SUPERBIKE</a></div><div class="yt-lockup-meta">
+ <ul class="yt-lockup-meta-info">
+ <li>il y a 20 heures</li>
+ <li>8 424 vues</li>
+ </ul>
+ </div>
+ <div class="yt-lockup-description yt-ui-ellipsis yt-ui-ellipsis-2" dir="ltr">
+ Description
+ </div>
+ <div class="yt-lockup-badges">
+ <ul class="yt-badge-list ">
+ <li class="yt-badge-item" >
+ <span class="yt-badge">Nouveauté</span>
+ </li>
+ <li class="yt-badge-item" ><span class="yt-badge " >HD</span></li>
+ </ul>
+ </div>
+ <div class="yt-lockup-action-menu yt-uix-menu-container">
+ <div class="yt-uix-menu yt-uix-videoactionmenu hide-until-delayloaded"
+ data-video-id="DIVZCPfAOeM" data-menu-content-id="yt-uix-videoactionmenu-menu">
+ </div>
+ </div>
+ </div>
+ </div>
+ </div>
+ </li>
+ </ol>
+ """
+ response = mock.Mock(text=html)
+ results = youtube_noapi.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 1)
+ self.assertEqual(results[0]['title'], 'Title')
+ self.assertEqual(results[0]['url'], 'https://www.youtube.com/watch?v=DIVZCPfAOeM')
+ self.assertEqual(results[0]['content'], 'Description')
+ self.assertEqual(results[0]['thumbnail'], 'https://i.ytimg.com/vi/DIVZCPfAOeM/hqdefault.jpg')
+ self.assertTrue('DIVZCPfAOeM' in results[0]['embedded'])
+
+ html = """
+ <ol id="item-section-063864" class="item-section">
+ <li>
+ </li>
+ </ol>
+ """
+ response = mock.Mock(text=html)
+ results = youtube_noapi.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 0)
diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py
index 5770458f3..d0a4de4b8 100644
--- a/searx/tests/test_engines.py
+++ b/searx/tests/test_engines.py
@@ -39,4 +39,6 @@ from searx.tests.engines.test_www500px import * # noqa
from searx.tests.engines.test_yacy import * # noqa
from searx.tests.engines.test_yahoo import * # noqa
from searx.tests.engines.test_youtube import * # noqa
+from searx.tests.engines.test_youtube_api import * # noqa
+from searx.tests.engines.test_youtube_noapi import * # noqa
from searx.tests.engines.test_yahoo_news import * # noqa