diff options
author | Thomas Pointhuber <thomas.pointhuber@gmx.at> | 2015-02-08 14:12:14 +0100 |
---|---|---|
committer | Thomas Pointhuber <thomas.pointhuber@gmx.at> | 2015-02-08 14:12:14 +0100 |
commit | 04f7118d0a0693906ef57fa83f01d29eb366a45e (patch) | |
tree | 1328e80350a8861ddd8c630bb09bafa8b849bd38 /searx/engines/gigablast.py | |
parent | 7c075aa73197030d01b210054488ce99ec861d70 (diff) | |
download | searxng-04f7118d0a0693906ef57fa83f01d29eb366a45e.tar.gz searxng-04f7118d0a0693906ef57fa83f01d29eb366a45e.zip |
[enh] add gigablast engine
Diffstat (limited to 'searx/engines/gigablast.py')
-rw-r--r-- | searx/engines/gigablast.py | 63 |
1 files changed, 63 insertions, 0 deletions
diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py new file mode 100644 index 000000000..8749c3256 --- /dev/null +++ b/searx/engines/gigablast.py @@ -0,0 +1,63 @@ +## Gigablast (Web) +# +# @website http://gigablast.com +# @provide-api yes (http://gigablast.com/api.html) +# +# @using-api yes +# @results XML +# @stable yes +# @parse url, title, content + +from urllib import urlencode +from cgi import escape +from lxml import etree + +# engine dependent config +categories = ['general'] +paging = True +number_of_results = 5 + +# search-url +base_url = 'http://gigablast.com/' +search_string = 'search?{query}&n={number_of_results}&s={offset}&xml=1&qh=0' + +# specific xpath variables +results_xpath = '//response//result' +url_xpath = './/url' +title_xpath = './/title' +content_xpath = './/sum' + + +# do search-request +def request(query, params): + offset = (params['pageno'] - 1) * number_of_results + + search_path = search_string.format( + query=urlencode({'q': query}), + offset=offset, + number_of_results=number_of_results) + + params['url'] = base_url + search_path + + return params + + +# get response from search-request +def response(resp): + results = [] + + dom = etree.fromstring(resp.content) + + # parse results + for result in dom.xpath(results_xpath): + url = result.xpath(url_xpath)[0].text + title = result.xpath(title_xpath)[0].text + content = escape(result.xpath(content_xpath)[0].text) + + # append result + results.append({'url': url, + 'title': title, + 'content': content}) + + # return results + return results |