summaryrefslogtreecommitdiff
path: root/searx/engines/duckduckgo_images.py
diff options
context:
space:
mode:
authormarc <a01200356@itesm.mx>2017-05-20 22:33:08 -0500
committerAdam Tauber <asciimoo@gmail.com>2017-05-23 20:07:09 +0200
commitc65a409f0d2728ba5a0c3ffa1a0cb05659033a71 (patch)
tree7c56b415c5693544b08d32afcf1c1c073b273545 /searx/engines/duckduckgo_images.py
parente60e98156f9aeeb40ca7272c883704c095d9f7d4 (diff)
downloadsearxng-c65a409f0d2728ba5a0c3ffa1a0cb05659033a71.tar.gz
searxng-c65a409f0d2728ba5a0c3ffa1a0cb05659033a71.zip
add duckduckgo images engine
Diffstat (limited to 'searx/engines/duckduckgo_images.py')
-rw-r--r--searx/engines/duckduckgo_images.py91
1 files changed, 91 insertions, 0 deletions
diff --git a/searx/engines/duckduckgo_images.py b/searx/engines/duckduckgo_images.py
new file mode 100644
index 000000000..f3555230e
--- /dev/null
+++ b/searx/engines/duckduckgo_images.py
@@ -0,0 +1,91 @@
+"""
+ DuckDuckGo (Images)
+
+ @website https://duckduckgo.com/
+ @provide-api yes (https://duckduckgo.com/api),
+ but images are not supported
+
+ @using-api no
+ @results JSON (site requires js to get images)
+ @stable no (JSON can change)
+ @parse url, title, img_src
+
+ @todo avoid extra request
+"""
+
+from requests import get
+from json import loads
+from searx.engines.xpath import extract_text
+from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url, get_region_code
+from searx.url_utils import urlencode
+
+# engine dependent config
+categories = ['images']
+paging = True
+language_support = True
+safesearch = True
+
+# search-url
+images_url = 'https://duckduckgo.com/i.js?{query}&s={offset}&p={safesearch}&o=json&vqd={vqd}'
+site_url = 'https://duckduckgo.com/?{query}&iar=images&iax=1&ia=images'
+
+
+# run query in site to get vqd number needed for requesting images
+# TODO: find a way to get this number without an extra request (is it a hash of the query?)
+def get_vqd(query):
+ res = get(site_url.format(query=urlencode({'q': query})))
+ content = res.text
+ vqd = content[content.find('vqd=\'') + 5:]
+ vqd = vqd[:vqd.find('\'')]
+ return vqd
+
+
+# do search-request
+def request(query, params):
+ # to avoid running actual external requests when testing
+ if 'is_test' not in params:
+ vqd = get_vqd(query)
+ else:
+ vqd = '12345'
+
+ offset = (params['pageno'] - 1) * 50
+
+ safesearch = params['safesearch'] - 1
+
+ region_code = get_region_code(params['language'])
+ if region_code:
+ params['url'] = images_url.format(
+ query=urlencode({'q': query, 'l': region_code}), offset=offset, safesearch=safesearch, vqd=vqd)
+ else:
+ params['url'] = images_url.format(
+ query=urlencode({'q': query}), offset=offset, safesearch=safesearch, vqd=vqd)
+
+ return params
+
+
+# get response from search-request
+def response(resp):
+ results = []
+
+ content = resp.text
+ try:
+ res_json = loads(content)
+ except:
+ return []
+
+ # parse results
+ for result in res_json['results']:
+ title = result['title']
+ url = result['url']
+ thumbnail = result['thumbnail']
+ image = result['image']
+
+ # append result
+ results.append({'template': 'images.html',
+ 'title': title,
+ 'content': '',
+ 'thumbnail_src': thumbnail,
+ 'img_src': image,
+ 'url': url})
+
+ return results