Remove unnecessary code in wolframalpha_noapi engine

The answer is scraped from a js function, so parsing the html tree doesn't achieve anything here.
author: a01200356 <a01200356@itesm.mx> 2015-12-29 21:11:49 -0600
committer: a01200356 <a01200356@itesm.mx> 2015-12-29 21:11:49 -0600
commit: d827fc49a11b6f84bba3d006b54a70a6a05757fd (patch)
tree: ceb7c4be92644c74fb583a7c3f3f0d7490b237ac /searx/engines/wolframalpha_noapi.py
parent: b51ba32f619e6b7a927444475b0ee986d4d13a60 (diff)
download: searxng-d827fc49a11b6f84bba3d006b54a70a6a05757fd.tar.gz
searxng-d827fc49a11b6f84bba3d006b54a70a6a05757fd.zip
1 files changed, 18 insertions, 31 deletions
diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py
index 1ce2aa1ff..29600ca1f 100644
--- a/searx/engines/wolframalpha_noapi.py
+++ b/searx/engines/wolframalpha_noapi.py
@@ -10,8 +10,6 @@
 import re
 import json
 from urllib import urlencode
-from lxml import html
-from searx.engines.xpath import extract_text
 
 # search-url
 url = 'http://www.wolframalpha.com/'
@@ -25,42 +23,31 @@ def request(query, params):
     return params
 
 
-# tries to find answer under the pattern given
-def extract_answer(script_list, pattern):
-    answer = None
+# get response from search-request
+def response(resp):
+    results = []
+    
+    # the answer is inside a js function
+    # answer can be located in different 'pods', although by default it should be in pod_0200
+    possible_locations = ['pod_0200\.push(.*)\n',
+                          'pod_0100\.push(.*)\n']
 
     # get line that matches the pattern
-    for script in script_list:
+    for pattern in possible_locations:
         try:
-            line = re.search(pattern, script.text_content()).group(1)
+            line = re.search(pattern, resp.text).group(1)
+            break
         except AttributeError:
             continue
 
-        # extract answer from json
-        answer = line[line.find('{') : line.rfind('}')+1]
-        answer = json.loads(answer.encode('unicode-escape'))
-        answer = answer['stringified'].decode('unicode-escape')
-
-    return answer
-
+    if not line:
+        return results
 
-# get response from search-request
-def response(resp):
-
-    dom = html.fromstring(resp.text)
-
-    # the answer is inside a js script
-    scripts = dom.xpath('//script')
+    # extract answer from json
+    answer = line[line.find('{') : line.rfind('}')+1]
+    answer = json.loads(answer.encode('unicode-escape'))
+    answer = answer['stringified'].decode('unicode-escape')
 
-    results = []
-
-    # answer can be located in different 'pods', although by default it should be in pod_0200
-    answer = extract_answer(scripts, 'pod_0200\.push(.*)\n')
-    if not answer:
-        answer = extract_answer(scripts, 'pod_0100\.push(.*)\n')
-        if answer:
-            results.append({'answer': answer})
-    else:
-        results.append({'answer': answer})
+    results.append({'answer': answer})
     
     return results
author	a01200356 <a01200356@itesm.mx>	2015-12-29 21:11:49 -0600
committer	a01200356 <a01200356@itesm.mx>	2015-12-29 21:11:49 -0600
commit	d827fc49a11b6f84bba3d006b54a70a6a05757fd (patch)
tree	ceb7c4be92644c74fb583a7c3f3f0d7490b237ac /searx/engines/wolframalpha_noapi.py
parent	b51ba32f619e6b7a927444475b0ee986d4d13a60 (diff)
download	searxng-d827fc49a11b6f84bba3d006b54a70a6a05757fd.tar.gz searxng-d827fc49a11b6f84bba3d006b54a70a6a05757fd.zip