diff options
author | asciimoo <asciimoo@gmail.com> | 2014-01-23 11:08:08 +0100 |
---|---|---|
committer | asciimoo <asciimoo@gmail.com> | 2014-01-23 11:08:08 +0100 |
commit | 59eeeaab87951fd6fa3302ec240db98902a20b2c (patch) | |
tree | 393114f41b487eea4b71dd4073903726310a1257 /searx/engines/xpath.py | |
parent | ba0f818e89b32ddd7c4d5d9c5f2f8fb2d6703a94 (diff) | |
download | searxng-59eeeaab87951fd6fa3302ec240db98902a20b2c.tar.gz searxng-59eeeaab87951fd6fa3302ec240db98902a20b2c.zip |
[fix] html tag removal
Diffstat (limited to 'searx/engines/xpath.py')
-rw-r--r-- | searx/engines/xpath.py | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index a7d24e2a2..8960b5f21 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -2,6 +2,7 @@ from lxml import html from urllib import urlencode, unquote from urlparse import urlparse, urljoin from lxml.etree import _ElementStringResult +from searx.utils import html_to_text search_url = None url_xpath = None @@ -33,7 +34,7 @@ def extract_text(xpath_results): return ''.join(xpath_results) else: # it's a element - return xpath_results.text_content() + return html_to_text(xpath_results.text_content()) def extract_url(xpath_results): |