diff options
author | Adam Tauber <asciimoo@gmail.com> | 2017-05-15 14:23:23 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-05-15 14:23:23 +0200 |
commit | 4cffd78650c3f1dfce413ae0a1cd0453ebe6f277 (patch) | |
tree | ac65990c72156def2d49e81d981f0b3beda4fd2e /searx/engines/soundcloud.py | |
parent | 46a2c63f8e1c3819cceff2d61fe9106051e8ecee (diff) | |
parent | 52e615dede8538c36f569d2cf07835427a9a0db6 (diff) | |
download | searxng-4cffd78650c3f1dfce413ae0a1cd0453ebe6f277.tar.gz searxng-4cffd78650c3f1dfce413ae0a1cd0453ebe6f277.zip |
Merge pull request #913 from asciimoo/py3
Add Python3 compatibility
Diffstat (limited to 'searx/engines/soundcloud.py')
-rw-r--r-- | searx/engines/soundcloud.py | 19 |
1 files changed, 12 insertions, 7 deletions
diff --git a/searx/engines/soundcloud.py b/searx/engines/soundcloud.py index 62b03ac03..41b40da61 100644 --- a/searx/engines/soundcloud.py +++ b/searx/engines/soundcloud.py @@ -11,13 +11,17 @@ """ import re -from StringIO import StringIO from json import loads -from lxml import etree -from urllib import urlencode, quote_plus +from lxml import html from dateutil import parser from searx import logger from searx.poolrequests import get as http_get +from searx.url_utils import quote_plus, urlencode + +try: + from cStringIO import StringIO +except: + from io import StringIO # engine dependent config categories = ['music'] @@ -36,14 +40,15 @@ embedded_url = '<iframe width="100%" height="166" ' +\ 'scrolling="no" frameborder="no" ' +\ 'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>' +cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U) + def get_client_id(): response = http_get("https://soundcloud.com") - rx_namespace = {"re": "http://exslt.org/regular-expressions"} if response.ok: - tree = etree.parse(StringIO(response.content), etree.HTMLParser()) - script_tags = tree.xpath("//script[re:match(@src, '(.*app.*js)')]", namespaces=rx_namespace) + tree = html.fromstring(response.content) + script_tags = tree.xpath("//script[contains(@src, '/assets/app')]") app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None] # extracts valid app_js urls from soundcloud.com content @@ -51,7 +56,7 @@ def get_client_id(): # gets app_js and searches for the clientid response = http_get(app_js_url) if response.ok: - cids = re.search(r'client_id:"([^"]*)"', response.content, re.M | re.I) + cids = cid_re.search(response.text) if cids is not None and len(cids.groups()): return cids.groups()[0] logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!") |