summaryrefslogtreecommitdiff
path: root/searx/engines/google_videos.py
diff options
context:
space:
mode:
Diffstat (limited to 'searx/engines/google_videos.py')
-rw-r--r--searx/engines/google_videos.py76
1 files changed, 39 insertions, 37 deletions
diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py
index 77b0ab260..049f9138c 100644
--- a/searx/engines/google_videos.py
+++ b/searx/engines/google_videos.py
@@ -38,10 +38,8 @@ from searx.engines.google import (
)
# pylint: disable=unused-import
-from searx.engines.google import (
- supported_languages_url
- , _fetch_supported_languages
-)
+from searx.engines.google import supported_languages_url, _fetch_supported_languages
+
# pylint: enable=unused-import
# about
@@ -65,6 +63,7 @@ safesearch = True
RE_CACHE = {}
+
def _re(regexpr):
"""returns compiled regular expression"""
RE_CACHE[regexpr] = RE_CACHE.get(regexpr, re.compile(regexpr))
@@ -77,18 +76,17 @@ def scrap_out_thumbs_src(dom):
for script in eval_xpath_list(dom, '//script[contains(., "google.ldi={")]'):
_script = script.text
# "dimg_35":"https://i.ytimg.c....",
- _dimurl = _re("s='([^']*)").findall( _script)
- for k,v in _re('(' + thumb_name + '[0-9]*)":"(http[^"]*)' ).findall(_script):
- v = v.replace(r'\u003d','=')
- v = v.replace(r'\u0026','&')
+ _dimurl = _re("s='([^']*)").findall(_script)
+ for k, v in _re('(' + thumb_name + '[0-9]*)":"(http[^"]*)').findall(_script):
+ v = v.replace(r'\u003d', '=')
+ v = v.replace(r'\u0026', '&')
ret_val[k] = v
logger.debug("found %s imgdata for: %s", thumb_name, ret_val.keys())
return ret_val
def scrap_out_thumbs(dom):
- """Scrap out thumbnail data from <script> tags.
- """
+ """Scrap out thumbnail data from <script> tags."""
ret_val = {}
thumb_name = 'dimg_'
@@ -96,7 +94,7 @@ def scrap_out_thumbs(dom):
_script = script.text
# var s='data:image/jpeg;base64, ...'
- _imgdata = _re("s='([^']*)").findall( _script)
+ _imgdata = _re("s='([^']*)").findall(_script)
if not _imgdata:
continue
@@ -112,19 +110,24 @@ def scrap_out_thumbs(dom):
def request(query, params):
"""Google-Video search request"""
- lang_info = get_lang_info(
- params, supported_languages, language_aliases, False
+ lang_info = get_lang_info(params, supported_languages, language_aliases, False)
+ logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
+
+ query_url = (
+ 'https://'
+ + lang_info['subdomain']
+ + '/search'
+ + "?"
+ + urlencode(
+ {
+ 'q': query,
+ 'tbm': "vid",
+ **lang_info['params'],
+ 'ie': "utf8",
+ 'oe': "utf8",
+ }
+ )
)
- logger.debug(
- "HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
-
- query_url = 'https://' + lang_info['subdomain'] + '/search' + "?" + urlencode({
- 'q': query,
- 'tbm': "vid",
- **lang_info['params'],
- 'ie': "utf8",
- 'oe': "utf8",
- })
if params['time_range'] in time_range_dict:
query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]})
@@ -133,9 +136,7 @@ def request(query, params):
params['url'] = query_url
params['headers'].update(lang_info['headers'])
- params['headers']['Accept'] = (
- 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
- )
+ params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
return params
@@ -171,21 +172,22 @@ def response(resp):
title = extract_text(eval_xpath_getindex(result, title_xpath, 0))
url = eval_xpath_getindex(result, './/div[@class="dXiKIc"]//a/@href', 0)
- length = extract_text(eval_xpath(
- result, './/div[contains(@class, "P7xzyf")]/span/span'))
+ length = extract_text(eval_xpath(result, './/div[contains(@class, "P7xzyf")]/span/span'))
c_node = eval_xpath_getindex(result, './/div[@class="Uroaid"]', 0)
content = extract_text(c_node)
pub_info = extract_text(eval_xpath(result, './/div[@class="Zg1NU"]'))
- results.append({
- 'url': url,
- 'title': title,
- 'content': content,
- 'length': length,
- 'author': pub_info,
- 'thumbnail': img_src,
- 'template': 'videos.html',
- })
+ results.append(
+ {
+ 'url': url,
+ 'title': title,
+ 'content': content,
+ 'length': length,
+ 'author': pub_info,
+ 'thumbnail': img_src,
+ 'template': 'videos.html',
+ }
+ )
# parse suggestion
for suggestion in eval_xpath_list(dom, suggestion_xpath):