summaryrefslogtreecommitdiff
path: root/searx/engines/dailymotion.py
blob: 03e1d7ffc2da1fc2d3c37683eed6333a0725a11c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from urllib import urlencode
from json import loads
from lxml import html

categories = ['videos']
locale = 'en_US'

# see http://www.dailymotion.com/doc/api/obj-video.html
search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=25&page={pageno}&{query}'  # noqa

# TODO use video result template
content_tpl = '<a href="{0}" title="{0}" ><img src="{1}" /></a><br />'

paging = True


def request(query, params):
    params['url'] = search_url.format(
        query=urlencode({'search': query, 'localization': locale}),
        pageno=params['pageno'])
    return params


def response(resp):
    results = []
    search_res = loads(resp.text)
    if not 'list' in search_res:
        return results
    for res in search_res['list']:
        title = res['title']
        url = res['url']
        if res['thumbnail_360_url']:
            content = content_tpl.format(url, res['thumbnail_360_url'])
        else:
            content = ''
        if res['description']:
            description = text_content_from_html(res['description'])
            content += description[:500]
        results.append({'url': url, 'title': title, 'content': content})
    return results


def text_content_from_html(html_string):
    desc_html = html.fragment_fromstring(html_string, create_parent=True)
    return desc_html.text_content()