diff options
author | a01200356 <a01200356@itesm.mx> | 2015-12-29 20:59:51 -0600 |
---|---|---|
committer | a01200356 <a01200356@itesm.mx> | 2015-12-29 20:59:51 -0600 |
commit | b51ba32f619e6b7a927444475b0ee986d4d13a60 (patch) | |
tree | c2f7c804b0718f87279bcaa0a81d967c6b050e8d /searx/engines/wolframalpha_noapi.py | |
parent | d4b6ab2be47c7b7ac6ae222905a8848fc428cd69 (diff) | |
download | searxng-b51ba32f619e6b7a927444475b0ee986d4d13a60.tar.gz searxng-b51ba32f619e6b7a927444475b0ee986d4d13a60.zip |
Wolfram Alpha (no API needed now)
Diffstat (limited to 'searx/engines/wolframalpha_noapi.py')
-rw-r--r-- | searx/engines/wolframalpha_noapi.py | 66 |
1 files changed, 66 insertions, 0 deletions
diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py new file mode 100644 index 000000000..1ce2aa1ff --- /dev/null +++ b/searx/engines/wolframalpha_noapi.py @@ -0,0 +1,66 @@ +# WolframAlpha (Maths) +# +# @website http://www.wolframalpha.com/ +# +# @using-api no +# @results HTML, JS +# @stable no +# @parse answer + +import re +import json +from urllib import urlencode +from lxml import html +from searx.engines.xpath import extract_text + +# search-url +url = 'http://www.wolframalpha.com/' +search_url = url+'input/?{query}' + + +# do search-request +def request(query, params): + params['url'] = search_url.format(query=urlencode({'i': query})) + + return params + + +# tries to find answer under the pattern given +def extract_answer(script_list, pattern): + answer = None + + # get line that matches the pattern + for script in script_list: + try: + line = re.search(pattern, script.text_content()).group(1) + except AttributeError: + continue + + # extract answer from json + answer = line[line.find('{') : line.rfind('}')+1] + answer = json.loads(answer.encode('unicode-escape')) + answer = answer['stringified'].decode('unicode-escape') + + return answer + + +# get response from search-request +def response(resp): + + dom = html.fromstring(resp.text) + + # the answer is inside a js script + scripts = dom.xpath('//script') + + results = [] + + # answer can be located in different 'pods', although by default it should be in pod_0200 + answer = extract_answer(scripts, 'pod_0200\.push(.*)\n') + if not answer: + answer = extract_answer(scripts, 'pod_0100\.push(.*)\n') + if answer: + results.append({'answer': answer}) + else: + results.append({'answer': answer}) + + return results |