diff options
author | Alexandre Flament <alex@al-f.net> | 2019-08-01 07:44:30 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-08-01 07:44:30 +0200 |
commit | 1bed39e6cb877f75681f82fc8891f7dd4efbb9b6 (patch) | |
tree | 60321d6d247c752ab5ac2f3c242daffd6795aaf2 /searx | |
parent | 7f56c78876c3f1ccd73c13a1275249a42c2c2405 (diff) | |
parent | 0c032c84291954da206e8084553d91add240afd2 (diff) | |
download | searxng-1bed39e6cb877f75681f82fc8891f7dd4efbb9b6.tar.gz searxng-1bed39e6cb877f75681f82fc8891f7dd4efbb9b6.zip |
Merge pull request #1658 from dalf/video-fixes
Fix dailymotion, google_videos and youtube_noapi engines
Diffstat (limited to 'searx')
-rw-r--r-- | searx/engines/dailymotion.py | 4 | ||||
-rw-r--r-- | searx/engines/google_videos.py | 20 | ||||
-rw-r--r-- | searx/engines/youtube_noapi.py | 15 |
3 files changed, 22 insertions, 17 deletions
diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py index 069aceaa3..1038e64bf 100644 --- a/searx/engines/dailymotion.py +++ b/searx/engines/dailymotion.py @@ -15,7 +15,7 @@ from json import loads from datetime import datetime from searx.url_utils import urlencode -from searx.utils import match_language +from searx.utils import match_language, html_to_text # engine dependent config categories = ['videos'] @@ -59,7 +59,7 @@ def response(resp): for res in search_res['list']: title = res['title'] url = res['url'] - content = res['description'] + content = html_to_text(res['description']) thumbnail = res['thumbnail_360_url'] publishedDate = datetime.fromtimestamp(res['created_time'], None) embedded = embedded_url.format(videoid=res['id']) diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py index 9a41b2dfa..fd6b2e3be 100644 --- a/searx/engines/google_videos.py +++ b/searx/engines/google_videos.py @@ -75,15 +75,17 @@ def response(resp): # get thumbnails script = str(dom.xpath('//script[contains(., "_setImagesSrc")]')[0].text) - id = result.xpath('.//div[@class="s"]//img/@id')[0] - thumbnails_data = re.findall('s=\'(.*?)(?:\\\\[a-z,1-9,\\\\]+\'|\')\;var ii=\[(?:|[\'vidthumb\d+\',]+)\'' + id, - script) - tmp = [] - if len(thumbnails_data) != 0: - tmp = re.findall('(data:image/jpeg;base64,[a-z,A-Z,0-9,/,\+]+)', thumbnails_data[0]) - thumbnail = '' - if len(tmp) != 0: - thumbnail = tmp[-1] + ids = result.xpath('.//div[@class="s"]//img/@id') + if len(ids) > 0: + thumbnails_data = \ + re.findall('s=\'(.*?)(?:\\\\[a-z,1-9,\\\\]+\'|\')\;var ii=\[(?:|[\'vidthumb\d+\',]+)\'' + ids[0], + script) + tmp = [] + if len(thumbnails_data) != 0: + tmp = re.findall('(data:image/jpeg;base64,[a-z,A-Z,0-9,/,\+]+)', thumbnails_data[0]) + thumbnail = '' + if len(tmp) != 0: + thumbnail = tmp[-1] # append result results.append({'url': url, diff --git a/searx/engines/youtube_noapi.py b/searx/engines/youtube_noapi.py index 53a10bf35..49d0ae604 100644 --- a/searx/engines/youtube_noapi.py +++ b/searx/engines/youtube_noapi.py @@ -67,12 +67,8 @@ def response(resp): if videoid is not None: url = base_youtube_url + videoid thumbnail = 'https://i.ytimg.com/vi/' + videoid + '/hqdefault.jpg' - title = video.get('title', {}).get('simpleText', videoid) - description_snippet = video.get('descriptionSnippet', {}) - if 'runs' in description_snippet: - content = reduce(lambda a, b: a + b.get('text', ''), description_snippet.get('runs'), '') - else: - content = description_snippet.get('simpleText', '') + title = get_text_from_json(video.get('title', {})) + content = get_text_from_json(video.get('descriptionSnippet', {})) embedded = embedded_url.format(videoid=videoid) # append result @@ -85,3 +81,10 @@ def response(resp): # return results return results + + +def get_text_from_json(element): + if 'runs' in element: + return reduce(lambda a, b: a + b.get('text', ''), element.get('runs'), '') + else: + return element.get('simpleText', '') |