[fix] pass wolframalpha_noapi tests

author: a01200356 <a01200356@itesm.mx> 2016-01-02 01:49:32 -0600
committer: a01200356 <a01200356@itesm.mx> 2016-01-02 01:49:32 -0600
commit: 19d025f0e7ef9a5f41b81fc6c1a9a7114bdae78c (patch)
tree: 6f3b9e9f807d28bdabdf0944c97951116655850d /searx/engines/wolframalpha_noapi.py
parent: e9d35c1309f05a0b214fb323049909ee7ec62ab8 (diff)
download: searxng-19d025f0e7ef9a5f41b81fc6c1a9a7114bdae78c.tar.gz
searxng-19d025f0e7ef9a5f41b81fc6c1a9a7114bdae78c.zip
1 files changed, 27 insertions, 16 deletions
diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py
index d7442db5d..a730ed60b 100644
--- a/searx/engines/wolframalpha_noapi.py
+++ b/searx/engines/wolframalpha_noapi.py
@@ -8,60 +8,71 @@
 # @stable      no
 # @parse       answer
 
-from re import search
+from re import search, sub
 from json import loads
 from urllib import urlencode
+from lxml import html
 
 # search-url
 url = 'http://www.wolframalpha.com/'
 search_url = url+'input/?{query}'
-search_query = ''
+
+# xpath variables
+scripts_xpath = '//script'
+title_xpath = '//title'
+failure_xpath = '//p[attribute::class="pfail"]'
 
 
 # do search-request
 def request(query, params):
     params['url'] = search_url.format(query=urlencode({'i': query}))
 
-    # used in response
-    global search_query
-    search_query = query
-
     return params
 
 
 # get response from search-request
 def response(resp):
     results = []
-    webpage = resp.text
     line = None
 
+    dom = html.fromstring(resp.text)
+    scripts = dom.xpath(scripts_xpath)
+
     # the answer is inside a js function
     # answer can be located in different 'pods', although by default it should be in pod_0200
     possible_locations = ['pod_0200\.push(.*)\n',
                           'pod_0100\.push(.*)\n']
 
+    # failed result
+    if dom.xpath(failure_xpath):
+        return results
+
     # get line that matches the pattern
     for pattern in possible_locations:
-        try:
-            line = search(pattern, webpage).group(1)
+        for script in scripts:
+            try:
+                line = search(pattern, script.text_content()).group(1)
+                break
+            except AttributeError:
+                continue
+        if line:
             break
-        except AttributeError:
-            continue
 
     if line:
         # extract answer from json
         answer = line[line.find('{'):line.rfind('}')+1]
         answer = loads(answer.encode('unicode-escape'))
         answer = answer['stringified'].decode('unicode-escape')
+        answer = sub(r'\\', '', answer)
 
         results.append({'answer': answer})
 
-    # failed result
-    elif search('pfail', webpage):
-        return results
+    # user input is in first part of title
+    title = dom.xpath(title_xpath)[0].text
+    result_url = request(title[:-16], {})['url']
 
     # append result
-    results.append({'url': request(search_query, {})['url'],
-                    'title': search_query + ' - Wolfram|Alpha'})
+    results.append({'url': result_url,
+                    'title': title})
 
     return results
author	a01200356 <a01200356@itesm.mx>	2016-01-02 01:49:32 -0600
committer	a01200356 <a01200356@itesm.mx>	2016-01-02 01:49:32 -0600
commit	19d025f0e7ef9a5f41b81fc6c1a9a7114bdae78c (patch)
tree	6f3b9e9f807d28bdabdf0944c97951116655850d /searx/engines/wolframalpha_noapi.py
parent	e9d35c1309f05a0b214fb323049909ee7ec62ab8 (diff)
download	searxng-19d025f0e7ef9a5f41b81fc6c1a9a7114bdae78c.tar.gz searxng-19d025f0e7ef9a5f41b81fc6c1a9a7114bdae78c.zip