diff options
Diffstat (limited to 'searx/engines/google_videos.py')
-rw-r--r-- | searx/engines/google_videos.py | 76 |
1 files changed, 39 insertions, 37 deletions
diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py index 77b0ab260..049f9138c 100644 --- a/searx/engines/google_videos.py +++ b/searx/engines/google_videos.py @@ -38,10 +38,8 @@ from searx.engines.google import ( ) # pylint: disable=unused-import -from searx.engines.google import ( - supported_languages_url - , _fetch_supported_languages -) +from searx.engines.google import supported_languages_url, _fetch_supported_languages + # pylint: enable=unused-import # about @@ -65,6 +63,7 @@ safesearch = True RE_CACHE = {} + def _re(regexpr): """returns compiled regular expression""" RE_CACHE[regexpr] = RE_CACHE.get(regexpr, re.compile(regexpr)) @@ -77,18 +76,17 @@ def scrap_out_thumbs_src(dom): for script in eval_xpath_list(dom, '//script[contains(., "google.ldi={")]'): _script = script.text # "dimg_35":"https://i.ytimg.c....", - _dimurl = _re("s='([^']*)").findall( _script) - for k,v in _re('(' + thumb_name + '[0-9]*)":"(http[^"]*)' ).findall(_script): - v = v.replace(r'\u003d','=') - v = v.replace(r'\u0026','&') + _dimurl = _re("s='([^']*)").findall(_script) + for k, v in _re('(' + thumb_name + '[0-9]*)":"(http[^"]*)').findall(_script): + v = v.replace(r'\u003d', '=') + v = v.replace(r'\u0026', '&') ret_val[k] = v logger.debug("found %s imgdata for: %s", thumb_name, ret_val.keys()) return ret_val def scrap_out_thumbs(dom): - """Scrap out thumbnail data from <script> tags. - """ + """Scrap out thumbnail data from <script> tags.""" ret_val = {} thumb_name = 'dimg_' @@ -96,7 +94,7 @@ def scrap_out_thumbs(dom): _script = script.text # var s='data:image/jpeg;base64, ...' - _imgdata = _re("s='([^']*)").findall( _script) + _imgdata = _re("s='([^']*)").findall(_script) if not _imgdata: continue @@ -112,19 +110,24 @@ def scrap_out_thumbs(dom): def request(query, params): """Google-Video search request""" - lang_info = get_lang_info( - params, supported_languages, language_aliases, False + lang_info = get_lang_info(params, supported_languages, language_aliases, False) + logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language']) + + query_url = ( + 'https://' + + lang_info['subdomain'] + + '/search' + + "?" + + urlencode( + { + 'q': query, + 'tbm': "vid", + **lang_info['params'], + 'ie': "utf8", + 'oe': "utf8", + } + ) ) - logger.debug( - "HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language']) - - query_url = 'https://' + lang_info['subdomain'] + '/search' + "?" + urlencode({ - 'q': query, - 'tbm': "vid", - **lang_info['params'], - 'ie': "utf8", - 'oe': "utf8", - }) if params['time_range'] in time_range_dict: query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]}) @@ -133,9 +136,7 @@ def request(query, params): params['url'] = query_url params['headers'].update(lang_info['headers']) - params['headers']['Accept'] = ( - 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' - ) + params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' return params @@ -171,21 +172,22 @@ def response(resp): title = extract_text(eval_xpath_getindex(result, title_xpath, 0)) url = eval_xpath_getindex(result, './/div[@class="dXiKIc"]//a/@href', 0) - length = extract_text(eval_xpath( - result, './/div[contains(@class, "P7xzyf")]/span/span')) + length = extract_text(eval_xpath(result, './/div[contains(@class, "P7xzyf")]/span/span')) c_node = eval_xpath_getindex(result, './/div[@class="Uroaid"]', 0) content = extract_text(c_node) pub_info = extract_text(eval_xpath(result, './/div[@class="Zg1NU"]')) - results.append({ - 'url': url, - 'title': title, - 'content': content, - 'length': length, - 'author': pub_info, - 'thumbnail': img_src, - 'template': 'videos.html', - }) + results.append( + { + 'url': url, + 'title': title, + 'content': content, + 'length': length, + 'author': pub_info, + 'thumbnail': img_src, + 'template': 'videos.html', + } + ) # parse suggestion for suggestion in eval_xpath_list(dom, suggestion_xpath): |