searx/engines/duckduckgo_definitions.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157

import json
from lxml import html
from re import compile
from searx.engines.xpath import extract_text
from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url
from searx.url_utils import urlencode
from searx.utils import html_to_text

url = 'https://api.duckduckgo.com/'\
    + '?{query}&format=json&pretty=0&no_redirect=1&d=1'

http_regex = compile(r'^http:')


def result_to_text(url, text, htmlResult):
    # TODO : remove result ending with "Meaning" or "Category"
    dom = html.fromstring(htmlResult)
    a = dom.xpath('//a')
    if len(a) >= 1:
        return extract_text(a[0])
    else:
        return text


def request(query, params):
    params['url'] = url.format(query=urlencode({'q': query}))
    params['headers']['Accept-Language'] = params['language'].split('-')[0]
    return params


def response(resp):
    results = []

    search_res = json.loads(resp.text)

    content = ''
    heading = search_res.get('Heading', '')
    attributes = []
    urls = []
    infobox_id = None
    relatedTopics = []

    # add answer if there is one
    answer = search_res.get('Answer', '')
    if answer != '':
        results.append({'answer': html_to_text(answer)})

    # add infobox
    if 'Definition' in search_res:
        content = content + search_res.get('Definition', '')

    if 'Abstract' in search_res:
        content = content + search_res.get('Abstract', '')

    # image
    image = search_res.get('Image', '')
    image = None if image == '' else image

    # attributes
    if 'Infobox' in search_res:
        infobox = search_res.get('Infobox', None)
        if 'content' in infobox:
            for info in infobox.get('content'):
                attributes.append({'label': info.get('label'),
                                  'value': info.get('value')})

    # urls
    for ddg_result in search_res.get('Results', []):
        if 'FirstURL' in ddg_result:
            firstURL = ddg_result.get('FirstURL', '')
            text = ddg_result.get('Text', '')
            urls.append({'title': text, 'url': firstURL})
            results.append({'title': heading, 'url': firstURL})

    # related topics
    for ddg_result in search_res.get('RelatedTopics', []):
        if 'FirstURL' in ddg_result:
            suggestion = result_to_text(ddg_result.get('FirstURL', None),
                                        ddg_result.get('Text', None),
                                        ddg_result.get('Result', None))
            if suggestion != heading:
                results.append({'suggestion': suggestion})
        elif 'Topics' in ddg_result:
            suggestions = []
            relatedTopics.append({'name': ddg_result.get('Name', ''),
                                 'suggestions': suggestions})
            for topic_result in ddg_result.get('Topics', []):
                suggestion = result_to_text(topic_result.get('FirstURL', None),
                                            topic_result.get('Text', None),
                                            topic_result.get('Result', None))
                if suggestion != heading:
                    suggestions.append(suggestion)

    # abstract
    abstractURL = search_res.get('AbstractURL', '')
    if abstractURL != '':
        # add as result ? problem always in english
        infobox_id = abstractURL
        urls.append({'title': search_res.get('AbstractSource'),
                    'url': abstractURL})

    # definition
    definitionURL = search_res.get('DefinitionURL', '')
    if definitionURL != '':
        # add as result ? as answer ? problem always in english
        infobox_id = definitionURL
        urls.append({'title': search_res.get('DefinitionSource'),
                    'url': definitionURL})

    # to merge with wikidata's infobox
    if infobox_id:
        infobox_id = http_regex.sub('https:', infobox_id)

    # entity
    entity = search_res.get('Entity', None)
    # TODO continent / country / department / location / waterfall /
    #      mountain range :
    #      link to map search, get weather, near by locations
    # TODO musician : link to music search
    # TODO concert tour : ??
    # TODO film / actor / television  / media franchise :
    #      links to IMDB / rottentomatoes (or scrap result)
    # TODO music : link tu musicbrainz / last.fm
    # TODO book : ??
    # TODO artist / playwright : ??
    # TODO compagny : ??
    # TODO software / os : ??
    # TODO software engineer : ??
    # TODO prepared food : ??
    # TODO website : ??
    # TODO performing art : ??
    # TODO prepared food : ??
    # TODO programming language : ??
    # TODO file format : ??

    if len(heading) > 0:
        # TODO get infobox.meta.value where .label='article_title'
        if image is None and len(attributes) == 0 and len(urls) == 1 and\
           len(relatedTopics) == 0 and len(content) == 0:
            results.append({
                           'url': urls[0]['url'],
                           'title': heading,
                           'content': content
                           })
        else:
            results.append({
                           'infobox': heading,
                           'id': infobox_id,
                           'entity': entity,
                           'content': content,
                           'img_src': image,
                           'attributes': attributes,
                           'urls': urls,
                           'relatedTopics': relatedTopics
                           })

    return results