diff options
author | marc <a01200356@itesm.mx> | 2017-05-20 22:33:08 -0500 |
---|---|---|
committer | Adam Tauber <asciimoo@gmail.com> | 2017-05-23 20:07:09 +0200 |
commit | c65a409f0d2728ba5a0c3ffa1a0cb05659033a71 (patch) | |
tree | 7c56b415c5693544b08d32afcf1c1c073b273545 /searx/engines/duckduckgo_images.py | |
parent | e60e98156f9aeeb40ca7272c883704c095d9f7d4 (diff) | |
download | searxng-c65a409f0d2728ba5a0c3ffa1a0cb05659033a71.tar.gz searxng-c65a409f0d2728ba5a0c3ffa1a0cb05659033a71.zip |
add duckduckgo images engine
Diffstat (limited to 'searx/engines/duckduckgo_images.py')
-rw-r--r-- | searx/engines/duckduckgo_images.py | 91 |
1 files changed, 91 insertions, 0 deletions
diff --git a/searx/engines/duckduckgo_images.py b/searx/engines/duckduckgo_images.py new file mode 100644 index 000000000..f3555230e --- /dev/null +++ b/searx/engines/duckduckgo_images.py @@ -0,0 +1,91 @@ +""" + DuckDuckGo (Images) + + @website https://duckduckgo.com/ + @provide-api yes (https://duckduckgo.com/api), + but images are not supported + + @using-api no + @results JSON (site requires js to get images) + @stable no (JSON can change) + @parse url, title, img_src + + @todo avoid extra request +""" + +from requests import get +from json import loads +from searx.engines.xpath import extract_text +from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url, get_region_code +from searx.url_utils import urlencode + +# engine dependent config +categories = ['images'] +paging = True +language_support = True +safesearch = True + +# search-url +images_url = 'https://duckduckgo.com/i.js?{query}&s={offset}&p={safesearch}&o=json&vqd={vqd}' +site_url = 'https://duckduckgo.com/?{query}&iar=images&iax=1&ia=images' + + +# run query in site to get vqd number needed for requesting images +# TODO: find a way to get this number without an extra request (is it a hash of the query?) +def get_vqd(query): + res = get(site_url.format(query=urlencode({'q': query}))) + content = res.text + vqd = content[content.find('vqd=\'') + 5:] + vqd = vqd[:vqd.find('\'')] + return vqd + + +# do search-request +def request(query, params): + # to avoid running actual external requests when testing + if 'is_test' not in params: + vqd = get_vqd(query) + else: + vqd = '12345' + + offset = (params['pageno'] - 1) * 50 + + safesearch = params['safesearch'] - 1 + + region_code = get_region_code(params['language']) + if region_code: + params['url'] = images_url.format( + query=urlencode({'q': query, 'l': region_code}), offset=offset, safesearch=safesearch, vqd=vqd) + else: + params['url'] = images_url.format( + query=urlencode({'q': query}), offset=offset, safesearch=safesearch, vqd=vqd) + + return params + + +# get response from search-request +def response(resp): + results = [] + + content = resp.text + try: + res_json = loads(content) + except: + return [] + + # parse results + for result in res_json['results']: + title = result['title'] + url = result['url'] + thumbnail = result['thumbnail'] + image = result['image'] + + # append result + results.append({'template': 'images.html', + 'title': title, + 'content': '', + 'thumbnail_src': thumbnail, + 'img_src': image, + 'url': url}) + + return results |