diff options
author | a01200356 <a01200356@itesm.mx> | 2016-01-03 15:58:01 -0600 |
---|---|---|
committer | a01200356 <a01200356@itesm.mx> | 2016-01-03 15:58:01 -0600 |
commit | 576d37f256649b570a9c8591a795acd85ac499bc (patch) | |
tree | a38b1d6ff497158a6bb971bbc6003e7890d21f28 /searx/engines/wolframalpha_noapi.py | |
parent | 16d6e758d73ed5b369a4cf70830b5ebf0d0196ba (diff) | |
download | searxng-576d37f256649b570a9c8591a795acd85ac499bc.tar.gz searxng-576d37f256649b570a9c8591a795acd85ac499bc.zip |
[fix] unescape htmlentities in wolframalpha_noapi's answer
Diffstat (limited to 'searx/engines/wolframalpha_noapi.py')
-rw-r--r-- | searx/engines/wolframalpha_noapi.py | 7 |
1 files changed, 6 insertions, 1 deletions
diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py index a730ed60b..0f0315630 100644 --- a/searx/engines/wolframalpha_noapi.py +++ b/searx/engines/wolframalpha_noapi.py @@ -12,6 +12,7 @@ from re import search, sub from json import loads from urllib import urlencode from lxml import html +import HTMLParser # search-url url = 'http://www.wolframalpha.com/' @@ -62,7 +63,11 @@ def response(resp): # extract answer from json answer = line[line.find('{'):line.rfind('}')+1] answer = loads(answer.encode('unicode-escape')) - answer = answer['stringified'].decode('unicode-escape') + answer = answer['stringified'] + + # clean plaintext answer + h = HTMLParser.HTMLParser() + answer = h.unescape(answer.decode('unicode-escape')) answer = sub(r'\\', '', answer) results.append({'answer': answer}) |