summaryrefslogtreecommitdiff
path: root/searx
diff options
context:
space:
mode:
authormarc <a01200356@itesm.mx>2017-05-20 22:33:08 -0500
committerAdam Tauber <asciimoo@gmail.com>2017-05-23 20:07:09 +0200
commitc65a409f0d2728ba5a0c3ffa1a0cb05659033a71 (patch)
tree7c56b415c5693544b08d32afcf1c1c073b273545 /searx
parente60e98156f9aeeb40ca7272c883704c095d9f7d4 (diff)
downloadsearxng-c65a409f0d2728ba5a0c3ffa1a0cb05659033a71.tar.gz
searxng-c65a409f0d2728ba5a0c3ffa1a0cb05659033a71.zip
add duckduckgo images engine
Diffstat (limited to 'searx')
-rw-r--r--searx/engines/duckduckgo.py65
-rw-r--r--searx/engines/duckduckgo_images.py91
-rw-r--r--searx/settings.yml6
3 files changed, 132 insertions, 30 deletions
diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index 1872ab7d4..1c0587451 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -41,46 +41,51 @@ title_xpath = './/a[@class="result__a"]'
content_xpath = './/a[@class="result__snippet"]'
-# do search-request
-def request(query, params):
- if params['time_range'] and params['time_range'] not in time_range_dict:
- return params
-
- offset = 30 + (params['pageno'] - 1) * 50
- dc_param = offset + 1
-
+# match query's language to a region code that duckduckgo will accept
+def get_region_code(lang):
# custom fixes for languages
- if params['language'] == 'all':
- locale = None
- elif params['language'][:2] == 'ja':
- locale = 'jp-jp'
- elif params['language'][:2] == 'sl':
- locale = 'sl-sl'
- elif params['language'] == 'zh-TW':
- locale = 'tw-tzh'
- elif params['language'] == 'zh-HK':
- locale = 'hk-tzh'
- elif params['language'][-2:] == 'SA':
- locale = 'xa-' + params['language'].split('-')[0]
- elif params['language'][-2:] == 'GB':
- locale = 'uk-' + params['language'].split('-')[0]
+ if lang == 'all':
+ region_code = None
+ elif lang[:2] == 'ja':
+ region_code = 'jp-jp'
+ elif lang[:2] == 'sl':
+ region_code = 'sl-sl'
+ elif lang == 'zh-TW':
+ region_code = 'tw-tzh'
+ elif lang == 'zh-HK':
+ region_code = 'hk-tzh'
+ elif lang[-2:] == 'SA':
+ region_code = 'xa-' + lang.split('-')[0]
+ elif lang[-2:] == 'GB':
+ region_code = 'uk-' + lang.split('-')[0]
else:
- locale = params['language'].split('-')
- if len(locale) == 2:
+ region_code = lang.split('-')
+ if len(region_code) == 2:
# country code goes first
- locale = locale[1].lower() + '-' + locale[0].lower()
+ region_code = region_code[1].lower() + '-' + region_code[0].lower()
else:
# tries to get a country code from language
- locale = locale[0].lower()
+ region_code = region_code[0].lower()
for lc in supported_languages:
lc = lc.split('-')
- if locale == lc[0]:
- locale = lc[1].lower() + '-' + lc[0].lower()
+ if region_code == lc[0]:
+ region_code = lc[1].lower() + '-' + lc[0].lower()
break
+ return region_code
+
+
+# do search-request
+def request(query, params):
+ if params['time_range'] and params['time_range'] not in time_range_dict:
+ return params
+
+ offset = 30 + (params['pageno'] - 1) * 50
+ dc_param = offset + 1
- if locale:
+ region_code = get_region_code(params['language'])
+ if region_code:
params['url'] = url.format(
- query=urlencode({'q': query, 'kl': locale}), offset=offset, dc_param=dc_param)
+ query=urlencode({'q': query, 'kl': region_code}), offset=offset, dc_param=dc_param)
else:
params['url'] = url.format(
query=urlencode({'q': query}), offset=offset, dc_param=dc_param)
diff --git a/searx/engines/duckduckgo_images.py b/searx/engines/duckduckgo_images.py
new file mode 100644
index 000000000..f3555230e
--- /dev/null
+++ b/searx/engines/duckduckgo_images.py
@@ -0,0 +1,91 @@
+"""
+ DuckDuckGo (Images)
+
+ @website https://duckduckgo.com/
+ @provide-api yes (https://duckduckgo.com/api),
+ but images are not supported
+
+ @using-api no
+ @results JSON (site requires js to get images)
+ @stable no (JSON can change)
+ @parse url, title, img_src
+
+ @todo avoid extra request
+"""
+
+from requests import get
+from json import loads
+from searx.engines.xpath import extract_text
+from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url, get_region_code
+from searx.url_utils import urlencode
+
+# engine dependent config
+categories = ['images']
+paging = True
+language_support = True
+safesearch = True
+
+# search-url
+images_url = 'https://duckduckgo.com/i.js?{query}&s={offset}&p={safesearch}&o=json&vqd={vqd}'
+site_url = 'https://duckduckgo.com/?{query}&iar=images&iax=1&ia=images'
+
+
+# run query in site to get vqd number needed for requesting images
+# TODO: find a way to get this number without an extra request (is it a hash of the query?)
+def get_vqd(query):
+ res = get(site_url.format(query=urlencode({'q': query})))
+ content = res.text
+ vqd = content[content.find('vqd=\'') + 5:]
+ vqd = vqd[:vqd.find('\'')]
+ return vqd
+
+
+# do search-request
+def request(query, params):
+ # to avoid running actual external requests when testing
+ if 'is_test' not in params:
+ vqd = get_vqd(query)
+ else:
+ vqd = '12345'
+
+ offset = (params['pageno'] - 1) * 50
+
+ safesearch = params['safesearch'] - 1
+
+ region_code = get_region_code(params['language'])
+ if region_code:
+ params['url'] = images_url.format(
+ query=urlencode({'q': query, 'l': region_code}), offset=offset, safesearch=safesearch, vqd=vqd)
+ else:
+ params['url'] = images_url.format(
+ query=urlencode({'q': query}), offset=offset, safesearch=safesearch, vqd=vqd)
+
+ return params
+
+
+# get response from search-request
+def response(resp):
+ results = []
+
+ content = resp.text
+ try:
+ res_json = loads(content)
+ except:
+ return []
+
+ # parse results
+ for result in res_json['results']:
+ title = result['title']
+ url = result['url']
+ thumbnail = result['thumbnail']
+ image = result['image']
+
+ # append result
+ results.append({'template': 'images.html',
+ 'title': title,
+ 'content': '',
+ 'thumbnail_src': thumbnail,
+ 'img_src': image,
+ 'url': url})
+
+ return results
diff --git a/searx/settings.yml b/searx/settings.yml
index af91a18f4..17b0bd580 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -167,6 +167,12 @@ engines:
shortcut : ddg
disabled : True
+ - name : duckduckgo images
+ engine : duckduckgo_images
+ shortcut : ddi
+ timeout: 3.0
+ disabled : True
+
- name : etymonline
engine : xpath
paging : True