diff options
author | a01200356 <a01200356@itesm.mx> | 2015-12-29 21:11:49 -0600 |
---|---|---|
committer | a01200356 <a01200356@itesm.mx> | 2015-12-29 21:11:49 -0600 |
commit | d827fc49a11b6f84bba3d006b54a70a6a05757fd (patch) | |
tree | ceb7c4be92644c74fb583a7c3f3f0d7490b237ac /searx/engines/wolframalpha_noapi.py | |
parent | b51ba32f619e6b7a927444475b0ee986d4d13a60 (diff) | |
download | searxng-d827fc49a11b6f84bba3d006b54a70a6a05757fd.tar.gz searxng-d827fc49a11b6f84bba3d006b54a70a6a05757fd.zip |
Remove unnecessary code in wolframalpha_noapi engine
The answer is scraped from a js function, so parsing the html tree
doesn't achieve anything here.
Diffstat (limited to 'searx/engines/wolframalpha_noapi.py')
-rw-r--r-- | searx/engines/wolframalpha_noapi.py | 49 |
1 files changed, 18 insertions, 31 deletions
diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py index 1ce2aa1ff..29600ca1f 100644 --- a/searx/engines/wolframalpha_noapi.py +++ b/searx/engines/wolframalpha_noapi.py @@ -10,8 +10,6 @@ import re import json from urllib import urlencode -from lxml import html -from searx.engines.xpath import extract_text # search-url url = 'http://www.wolframalpha.com/' @@ -25,42 +23,31 @@ def request(query, params): return params -# tries to find answer under the pattern given -def extract_answer(script_list, pattern): - answer = None +# get response from search-request +def response(resp): + results = [] + + # the answer is inside a js function + # answer can be located in different 'pods', although by default it should be in pod_0200 + possible_locations = ['pod_0200\.push(.*)\n', + 'pod_0100\.push(.*)\n'] # get line that matches the pattern - for script in script_list: + for pattern in possible_locations: try: - line = re.search(pattern, script.text_content()).group(1) + line = re.search(pattern, resp.text).group(1) + break except AttributeError: continue - # extract answer from json - answer = line[line.find('{') : line.rfind('}')+1] - answer = json.loads(answer.encode('unicode-escape')) - answer = answer['stringified'].decode('unicode-escape') - - return answer - + if not line: + return results -# get response from search-request -def response(resp): - - dom = html.fromstring(resp.text) - - # the answer is inside a js script - scripts = dom.xpath('//script') + # extract answer from json + answer = line[line.find('{') : line.rfind('}')+1] + answer = json.loads(answer.encode('unicode-escape')) + answer = answer['stringified'].decode('unicode-escape') - results = [] - - # answer can be located in different 'pods', although by default it should be in pod_0200 - answer = extract_answer(scripts, 'pod_0200\.push(.*)\n') - if not answer: - answer = extract_answer(scripts, 'pod_0100\.push(.*)\n') - if answer: - results.append({'answer': answer}) - else: - results.append({'answer': answer}) + results.append({'answer': answer}) return results |