diff options
author | marc <a01200356@itesm.mx> | 2017-05-20 22:33:08 -0500 |
---|---|---|
committer | Adam Tauber <asciimoo@gmail.com> | 2017-05-23 20:07:09 +0200 |
commit | c65a409f0d2728ba5a0c3ffa1a0cb05659033a71 (patch) | |
tree | 7c56b415c5693544b08d32afcf1c1c073b273545 /searx | |
parent | e60e98156f9aeeb40ca7272c883704c095d9f7d4 (diff) | |
download | searxng-c65a409f0d2728ba5a0c3ffa1a0cb05659033a71.tar.gz searxng-c65a409f0d2728ba5a0c3ffa1a0cb05659033a71.zip |
add duckduckgo images engine
Diffstat (limited to 'searx')
-rw-r--r-- | searx/engines/duckduckgo.py | 65 | ||||
-rw-r--r-- | searx/engines/duckduckgo_images.py | 91 | ||||
-rw-r--r-- | searx/settings.yml | 6 |
3 files changed, 132 insertions, 30 deletions
diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index 1872ab7d4..1c0587451 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -41,46 +41,51 @@ title_xpath = './/a[@class="result__a"]' content_xpath = './/a[@class="result__snippet"]' -# do search-request -def request(query, params): - if params['time_range'] and params['time_range'] not in time_range_dict: - return params - - offset = 30 + (params['pageno'] - 1) * 50 - dc_param = offset + 1 - +# match query's language to a region code that duckduckgo will accept +def get_region_code(lang): # custom fixes for languages - if params['language'] == 'all': - locale = None - elif params['language'][:2] == 'ja': - locale = 'jp-jp' - elif params['language'][:2] == 'sl': - locale = 'sl-sl' - elif params['language'] == 'zh-TW': - locale = 'tw-tzh' - elif params['language'] == 'zh-HK': - locale = 'hk-tzh' - elif params['language'][-2:] == 'SA': - locale = 'xa-' + params['language'].split('-')[0] - elif params['language'][-2:] == 'GB': - locale = 'uk-' + params['language'].split('-')[0] + if lang == 'all': + region_code = None + elif lang[:2] == 'ja': + region_code = 'jp-jp' + elif lang[:2] == 'sl': + region_code = 'sl-sl' + elif lang == 'zh-TW': + region_code = 'tw-tzh' + elif lang == 'zh-HK': + region_code = 'hk-tzh' + elif lang[-2:] == 'SA': + region_code = 'xa-' + lang.split('-')[0] + elif lang[-2:] == 'GB': + region_code = 'uk-' + lang.split('-')[0] else: - locale = params['language'].split('-') - if len(locale) == 2: + region_code = lang.split('-') + if len(region_code) == 2: # country code goes first - locale = locale[1].lower() + '-' + locale[0].lower() + region_code = region_code[1].lower() + '-' + region_code[0].lower() else: # tries to get a country code from language - locale = locale[0].lower() + region_code = region_code[0].lower() for lc in supported_languages: lc = lc.split('-') - if locale == lc[0]: - locale = lc[1].lower() + '-' + lc[0].lower() + if region_code == lc[0]: + region_code = lc[1].lower() + '-' + lc[0].lower() break + return region_code + + +# do search-request +def request(query, params): + if params['time_range'] and params['time_range'] not in time_range_dict: + return params + + offset = 30 + (params['pageno'] - 1) * 50 + dc_param = offset + 1 - if locale: + region_code = get_region_code(params['language']) + if region_code: params['url'] = url.format( - query=urlencode({'q': query, 'kl': locale}), offset=offset, dc_param=dc_param) + query=urlencode({'q': query, 'kl': region_code}), offset=offset, dc_param=dc_param) else: params['url'] = url.format( query=urlencode({'q': query}), offset=offset, dc_param=dc_param) diff --git a/searx/engines/duckduckgo_images.py b/searx/engines/duckduckgo_images.py new file mode 100644 index 000000000..f3555230e --- /dev/null +++ b/searx/engines/duckduckgo_images.py @@ -0,0 +1,91 @@ +""" + DuckDuckGo (Images) + + @website https://duckduckgo.com/ + @provide-api yes (https://duckduckgo.com/api), + but images are not supported + + @using-api no + @results JSON (site requires js to get images) + @stable no (JSON can change) + @parse url, title, img_src + + @todo avoid extra request +""" + +from requests import get +from json import loads +from searx.engines.xpath import extract_text +from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url, get_region_code +from searx.url_utils import urlencode + +# engine dependent config +categories = ['images'] +paging = True +language_support = True +safesearch = True + +# search-url +images_url = 'https://duckduckgo.com/i.js?{query}&s={offset}&p={safesearch}&o=json&vqd={vqd}' +site_url = 'https://duckduckgo.com/?{query}&iar=images&iax=1&ia=images' + + +# run query in site to get vqd number needed for requesting images +# TODO: find a way to get this number without an extra request (is it a hash of the query?) +def get_vqd(query): + res = get(site_url.format(query=urlencode({'q': query}))) + content = res.text + vqd = content[content.find('vqd=\'') + 5:] + vqd = vqd[:vqd.find('\'')] + return vqd + + +# do search-request +def request(query, params): + # to avoid running actual external requests when testing + if 'is_test' not in params: + vqd = get_vqd(query) + else: + vqd = '12345' + + offset = (params['pageno'] - 1) * 50 + + safesearch = params['safesearch'] - 1 + + region_code = get_region_code(params['language']) + if region_code: + params['url'] = images_url.format( + query=urlencode({'q': query, 'l': region_code}), offset=offset, safesearch=safesearch, vqd=vqd) + else: + params['url'] = images_url.format( + query=urlencode({'q': query}), offset=offset, safesearch=safesearch, vqd=vqd) + + return params + + +# get response from search-request +def response(resp): + results = [] + + content = resp.text + try: + res_json = loads(content) + except: + return [] + + # parse results + for result in res_json['results']: + title = result['title'] + url = result['url'] + thumbnail = result['thumbnail'] + image = result['image'] + + # append result + results.append({'template': 'images.html', + 'title': title, + 'content': '', + 'thumbnail_src': thumbnail, + 'img_src': image, + 'url': url}) + + return results diff --git a/searx/settings.yml b/searx/settings.yml index af91a18f4..17b0bd580 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -167,6 +167,12 @@ engines: shortcut : ddg disabled : True + - name : duckduckgo images + engine : duckduckgo_images + shortcut : ddi + timeout: 3.0 + disabled : True + - name : etymonline engine : xpath paging : True |