Merge pull request #1444 from Venca24/devel_google_videos

[fix] google videos engine
author: Noémi Ványi <kvch@users.noreply.github.com> 2019-01-05 18:08:05 +0100
committer: GitHub <noreply@github.com> 2019-01-05 18:08:05 +0100
commit: abcbcec0b5ab9a3108ccc972876fdb60c7911e7a (patch)
tree: 65997b07ea9b2292662f9d5453c6fd388d10845a /searx/engines/google_videos.py
parent: 899ba5d6dee82faacb572b4d9bc4c58570628531 (diff)
parent: 2456b8f57199b0479b063fa3dfb16a585c6a40ed (diff)
download: searxng-abcbcec0b5ab9a3108ccc972876fdb60c7911e7a.tar.gz
searxng-abcbcec0b5ab9a3108ccc972876fdb60c7911e7a.zip
1 files changed, 18 insertions, 6 deletions
diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py
index 310b31490..9a41b2dfa 100644
--- a/searx/engines/google_videos.py
+++ b/searx/engines/google_videos.py
@@ -7,7 +7,7 @@
  @using-api   no
  @results     HTML
  @stable      no
- @parse       url, title, content
+ @parse       url, title, content, thumbnail
 """
 
 from datetime import date, timedelta
@@ -15,7 +15,7 @@ from json import loads
 from lxml import html
 from searx.engines.xpath import extract_text
 from searx.url_utils import urlencode
-
+import re
 
 # engine dependent config
 categories = ['videos']
@@ -25,7 +25,7 @@ time_range_support = True
 number_of_results = 10
 
 search_url = 'https://www.google.com/search'\
-    '?{query}'\
+    '?q={query}'\
     '&tbm=vid'\
     '&{search_options}'
 time_range_attr = "qdr:{range}"
@@ -69,15 +69,27 @@ def response(resp):
     # parse results
     for result in dom.xpath('//div[@class="g"]'):
 
-        title = extract_text(result.xpath('.//h3/a'))
-        url = result.xpath('.//h3/a/@href')[0]
+        title = extract_text(result.xpath('.//h3'))
+        url = result.xpath('.//div[@class="r"]/a/@href')[0]
         content = extract_text(result.xpath('.//span[@class="st"]'))
 
+        # get thumbnails
+        script = str(dom.xpath('//script[contains(., "_setImagesSrc")]')[0].text)
+        id = result.xpath('.//div[@class="s"]//img/@id')[0]
+        thumbnails_data = re.findall('s=\'(.*?)(?:\\\\[a-z,1-9,\\\\]+\'|\')\;var ii=\[(?:|[\'vidthumb\d+\',]+)\'' + id,
+                                     script)
+        tmp = []
+        if len(thumbnails_data) != 0:
+            tmp = re.findall('(data:image/jpeg;base64,[a-z,A-Z,0-9,/,\+]+)', thumbnails_data[0])
+        thumbnail = ''
+        if len(tmp) != 0:
+            thumbnail = tmp[-1]
+
         # append result
         results.append({'url': url,
                         'title': title,
                         'content': content,
-                        'thumbnail': '',
+                        'thumbnail': thumbnail,
                         'template': 'videos.html'})
 
     return results
author	Noémi Ványi <kvch@users.noreply.github.com>	2019-01-05 18:08:05 +0100
committer	GitHub <noreply@github.com>	2019-01-05 18:08:05 +0100
commit	abcbcec0b5ab9a3108ccc972876fdb60c7911e7a (patch)
tree	65997b07ea9b2292662f9d5453c6fd388d10845a /searx/engines/google_videos.py
parent	899ba5d6dee82faacb572b4d9bc4c58570628531 (diff)
parent	2456b8f57199b0479b063fa3dfb16a585c6a40ed (diff)
download	searxng-abcbcec0b5ab9a3108ccc972876fdb60c7911e7a.tar.gz searxng-abcbcec0b5ab9a3108ccc972876fdb60c7911e7a.zip