summaryrefslogtreecommitdiff
path: root/searx/engines/duckduckgo_definitions.py
diff options
context:
space:
mode:
authorDalf <alex@al-f.net>2014-09-28 16:51:41 +0200
committerDalf <alex@al-f.net>2014-09-28 16:51:41 +0200
commit6bfd5663539052a64c984f5bdb7135d0d652c923 (patch)
treeacf2049046d62c0b4849d0b36815dfe21117bcec /searx/engines/duckduckgo_definitions.py
parente39d9fe5423a0fceed1d15dc63c1f8aa30d72e44 (diff)
downloadsearxng-6bfd5663539052a64c984f5bdb7135d0d652c923.tar.gz
searxng-6bfd5663539052a64c984f5bdb7135d0d652c923.zip
[enh] add infoboxes and answers
Diffstat (limited to 'searx/engines/duckduckgo_definitions.py')
-rw-r--r--searx/engines/duckduckgo_definitions.py121
1 files changed, 114 insertions, 7 deletions
diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py
index 3037aae53..3da7352a4 100644
--- a/searx/engines/duckduckgo_definitions.py
+++ b/searx/engines/duckduckgo_definitions.py
@@ -1,10 +1,25 @@
import json
from urllib import urlencode
+from lxml import html
+from searx.engines.xpath import extract_text
-url = 'http://api.duckduckgo.com/?{query}&format=json&pretty=0&no_redirect=1'
+url = 'https://api.duckduckgo.com/?{query}&format=json&pretty=0&no_redirect=1&d=1'
+def result_to_text(url, text, htmlResult):
+ # TODO : remove result ending with "Meaning" or "Category"
+ dom = html.fromstring(htmlResult)
+ a = dom.xpath('//a')
+ if len(a)>=1:
+ return extract_text(a[0])
+ else:
+ return text
+
+def html_to_text(htmlFragment):
+ dom = html.fromstring(htmlFragment)
+ return extract_text(dom)
def request(query, params):
+ # TODO add kl={locale}
params['url'] = url.format(query=urlencode({'q': query}))
return params
@@ -12,12 +27,104 @@ def request(query, params):
def response(resp):
search_res = json.loads(resp.text)
results = []
+
+ content = ''
+ heading = search_res.get('Heading', '')
+ attributes = []
+ urls = []
+ infobox_id = None
+ relatedTopics = []
+
+ # add answer if there is one
+ answer = search_res.get('Answer', '')
+ if answer != '':
+ results.append({ 'answer' : html_to_text(answer) })
+
+ # add infobox
if 'Definition' in search_res:
- if search_res.get('AbstractURL'):
- res = {'title': search_res.get('Heading', ''),
- 'content': search_res.get('Definition', ''),
- 'url': search_res.get('AbstractURL', ''),
- 'class': 'definition_result'}
- results.append(res)
+ content = content + search_res.get('Definition', '')
+
+ if 'Abstract' in search_res:
+ content = content + search_res.get('Abstract', '')
+
+
+ # image
+ image = search_res.get('Image', '')
+ image = None if image == '' else image
+
+ # attributes
+ if 'Infobox' in search_res:
+ infobox = search_res.get('Infobox', None)
+ if 'content' in infobox:
+ for info in infobox.get('content'):
+ attributes.append({'label': info.get('label'), 'value': info.get('value')})
+
+ # urls
+ for ddg_result in search_res.get('Results', []):
+ if 'FirstURL' in ddg_result:
+ firstURL = ddg_result.get('FirstURL', '')
+ text = ddg_result.get('Text', '')
+ urls.append({'title':text, 'url':firstURL})
+ results.append({'title':heading, 'url': firstURL})
+
+ # related topics
+ for ddg_result in search_res.get('RelatedTopics', None):
+ if 'FirstURL' in ddg_result:
+ suggestion = result_to_text(ddg_result.get('FirstURL', None), ddg_result.get('Text', None), ddg_result.get('Result', None))
+ if suggestion != heading:
+ results.append({'suggestion': suggestion})
+ elif 'Topics' in ddg_result:
+ suggestions = []
+ relatedTopics.append({ 'name' : ddg_result.get('Name', ''), 'suggestions': suggestions })
+ for topic_result in ddg_result.get('Topics', []):
+ suggestion = result_to_text(topic_result.get('FirstURL', None), topic_result.get('Text', None), topic_result.get('Result', None))
+ if suggestion != heading:
+ suggestions.append(suggestion)
+
+ # abstract
+ abstractURL = search_res.get('AbstractURL', '')
+ if abstractURL != '':
+ # add as result ? problem always in english
+ infobox_id = abstractURL
+ urls.append({'title': search_res.get('AbstractSource'), 'url': abstractURL})
+
+ # definition
+ definitionURL = search_res.get('DefinitionURL', '')
+ if definitionURL != '':
+ # add as result ? as answer ? problem always in english
+ infobox_id = definitionURL
+ urls.append({'title': search_res.get('DefinitionSource'), 'url': definitionURL})
+
+ # entity
+ entity = search_res.get('Entity', None)
+ # TODO continent / country / department / location / waterfall / mountain range : link to map search, get weather, near by locations
+ # TODO musician : link to music search
+ # TODO concert tour : ??
+ # TODO film / actor / television / media franchise : links to IMDB / rottentomatoes (or scrap result)
+ # TODO music : link tu musicbrainz / last.fm
+ # TODO book : ??
+ # TODO artist / playwright : ??
+ # TODO compagny : ??
+ # TODO software / os : ??
+ # TODO software engineer : ??
+ # TODO prepared food : ??
+ # TODO website : ??
+ # TODO performing art : ??
+ # TODO prepared food : ??
+ # TODO programming language : ??
+ # TODO file format : ??
+
+ if len(heading)>0:
+ # TODO get infobox.meta.value where .label='article_title'
+ results.append({
+ 'infobox': heading,
+ 'id': infobox_id,
+ 'entity': entity,
+ 'content': content,
+ 'img_src' : image,
+ 'attributes': attributes,
+ 'urls': urls,
+ 'relatedTopics': relatedTopics
+ })
return results