summaryrefslogtreecommitdiff
path: root/searx/engines
diff options
context:
space:
mode:
authorAlexandre Flament <alex@al-f.net>2019-08-01 07:44:30 +0200
committerGitHub <noreply@github.com>2019-08-01 07:44:30 +0200
commit1bed39e6cb877f75681f82fc8891f7dd4efbb9b6 (patch)
tree60321d6d247c752ab5ac2f3c242daffd6795aaf2 /searx/engines
parent7f56c78876c3f1ccd73c13a1275249a42c2c2405 (diff)
parent0c032c84291954da206e8084553d91add240afd2 (diff)
downloadsearxng-1bed39e6cb877f75681f82fc8891f7dd4efbb9b6.tar.gz
searxng-1bed39e6cb877f75681f82fc8891f7dd4efbb9b6.zip
Merge pull request #1658 from dalf/video-fixes
Fix dailymotion, google_videos and youtube_noapi engines
Diffstat (limited to 'searx/engines')
-rw-r--r--searx/engines/dailymotion.py4
-rw-r--r--searx/engines/google_videos.py20
-rw-r--r--searx/engines/youtube_noapi.py15
3 files changed, 22 insertions, 17 deletions
diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py
index 069aceaa3..1038e64bf 100644
--- a/searx/engines/dailymotion.py
+++ b/searx/engines/dailymotion.py
@@ -15,7 +15,7 @@
from json import loads
from datetime import datetime
from searx.url_utils import urlencode
-from searx.utils import match_language
+from searx.utils import match_language, html_to_text
# engine dependent config
categories = ['videos']
@@ -59,7 +59,7 @@ def response(resp):
for res in search_res['list']:
title = res['title']
url = res['url']
- content = res['description']
+ content = html_to_text(res['description'])
thumbnail = res['thumbnail_360_url']
publishedDate = datetime.fromtimestamp(res['created_time'], None)
embedded = embedded_url.format(videoid=res['id'])
diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py
index 9a41b2dfa..fd6b2e3be 100644
--- a/searx/engines/google_videos.py
+++ b/searx/engines/google_videos.py
@@ -75,15 +75,17 @@ def response(resp):
# get thumbnails
script = str(dom.xpath('//script[contains(., "_setImagesSrc")]')[0].text)
- id = result.xpath('.//div[@class="s"]//img/@id')[0]
- thumbnails_data = re.findall('s=\'(.*?)(?:\\\\[a-z,1-9,\\\\]+\'|\')\;var ii=\[(?:|[\'vidthumb\d+\',]+)\'' + id,
- script)
- tmp = []
- if len(thumbnails_data) != 0:
- tmp = re.findall('(data:image/jpeg;base64,[a-z,A-Z,0-9,/,\+]+)', thumbnails_data[0])
- thumbnail = ''
- if len(tmp) != 0:
- thumbnail = tmp[-1]
+ ids = result.xpath('.//div[@class="s"]//img/@id')
+ if len(ids) > 0:
+ thumbnails_data = \
+ re.findall('s=\'(.*?)(?:\\\\[a-z,1-9,\\\\]+\'|\')\;var ii=\[(?:|[\'vidthumb\d+\',]+)\'' + ids[0],
+ script)
+ tmp = []
+ if len(thumbnails_data) != 0:
+ tmp = re.findall('(data:image/jpeg;base64,[a-z,A-Z,0-9,/,\+]+)', thumbnails_data[0])
+ thumbnail = ''
+ if len(tmp) != 0:
+ thumbnail = tmp[-1]
# append result
results.append({'url': url,
diff --git a/searx/engines/youtube_noapi.py b/searx/engines/youtube_noapi.py
index 53a10bf35..49d0ae604 100644
--- a/searx/engines/youtube_noapi.py
+++ b/searx/engines/youtube_noapi.py
@@ -67,12 +67,8 @@ def response(resp):
if videoid is not None:
url = base_youtube_url + videoid
thumbnail = 'https://i.ytimg.com/vi/' + videoid + '/hqdefault.jpg'
- title = video.get('title', {}).get('simpleText', videoid)
- description_snippet = video.get('descriptionSnippet', {})
- if 'runs' in description_snippet:
- content = reduce(lambda a, b: a + b.get('text', ''), description_snippet.get('runs'), '')
- else:
- content = description_snippet.get('simpleText', '')
+ title = get_text_from_json(video.get('title', {}))
+ content = get_text_from_json(video.get('descriptionSnippet', {}))
embedded = embedded_url.format(videoid=videoid)
# append result
@@ -85,3 +81,10 @@ def response(resp):
# return results
return results
+
+
+def get_text_from_json(element):
+ if 'runs' in element:
+ return reduce(lambda a, b: a + b.get('text', ''), element.get('runs'), '')
+ else:
+ return element.get('simpleText', '')