summaryrefslogtreecommitdiff
path: root/searx/engines/wolframalpha_noapi.py
diff options
context:
space:
mode:
authora01200356 <a01200356@itesm.mx>2015-12-29 20:59:51 -0600
committera01200356 <a01200356@itesm.mx>2015-12-29 20:59:51 -0600
commitb51ba32f619e6b7a927444475b0ee986d4d13a60 (patch)
treec2f7c804b0718f87279bcaa0a81d967c6b050e8d /searx/engines/wolframalpha_noapi.py
parentd4b6ab2be47c7b7ac6ae222905a8848fc428cd69 (diff)
downloadsearxng-b51ba32f619e6b7a927444475b0ee986d4d13a60.tar.gz
searxng-b51ba32f619e6b7a927444475b0ee986d4d13a60.zip
Wolfram Alpha (no API needed now)
Diffstat (limited to 'searx/engines/wolframalpha_noapi.py')
-rw-r--r--searx/engines/wolframalpha_noapi.py66
1 files changed, 66 insertions, 0 deletions
diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py
new file mode 100644
index 000000000..1ce2aa1ff
--- /dev/null
+++ b/searx/engines/wolframalpha_noapi.py
@@ -0,0 +1,66 @@
+# WolframAlpha (Maths)
+#
+# @website http://www.wolframalpha.com/
+#
+# @using-api no
+# @results HTML, JS
+# @stable no
+# @parse answer
+
+import re
+import json
+from urllib import urlencode
+from lxml import html
+from searx.engines.xpath import extract_text
+
+# search-url
+url = 'http://www.wolframalpha.com/'
+search_url = url+'input/?{query}'
+
+
+# do search-request
+def request(query, params):
+ params['url'] = search_url.format(query=urlencode({'i': query}))
+
+ return params
+
+
+# tries to find answer under the pattern given
+def extract_answer(script_list, pattern):
+ answer = None
+
+ # get line that matches the pattern
+ for script in script_list:
+ try:
+ line = re.search(pattern, script.text_content()).group(1)
+ except AttributeError:
+ continue
+
+ # extract answer from json
+ answer = line[line.find('{') : line.rfind('}')+1]
+ answer = json.loads(answer.encode('unicode-escape'))
+ answer = answer['stringified'].decode('unicode-escape')
+
+ return answer
+
+
+# get response from search-request
+def response(resp):
+
+ dom = html.fromstring(resp.text)
+
+ # the answer is inside a js script
+ scripts = dom.xpath('//script')
+
+ results = []
+
+ # answer can be located in different 'pods', although by default it should be in pod_0200
+ answer = extract_answer(scripts, 'pod_0200\.push(.*)\n')
+ if not answer:
+ answer = extract_answer(scripts, 'pod_0100\.push(.*)\n')
+ if answer:
+ results.append({'answer': answer})
+ else:
+ results.append({'answer': answer})
+
+ return results