summaryrefslogtreecommitdiff
path: root/searx/engines/google_news.py
diff options
context:
space:
mode:
Diffstat (limited to 'searx/engines/google_news.py')
-rw-r--r--searx/engines/google_news.py52
1 files changed, 30 insertions, 22 deletions
diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py
index 87ac9a19d..162e4348e 100644
--- a/searx/engines/google_news.py
+++ b/searx/engines/google_news.py
@@ -32,6 +32,7 @@ from searx.engines.google import (
supported_languages_url,
_fetch_supported_languages,
)
+
# pylint: enable=unused-import
from searx.engines.google import (
@@ -71,14 +72,12 @@ time_range_support = True
# safesearch : results are identitical for safesearch=0 and safesearch=2
safesearch = False
+
def request(query, params):
"""Google-News search request"""
- lang_info = get_lang_info(
- params, supported_languages, language_aliases, False
- )
- logger.debug(
- "HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
+ lang_info = get_lang_info(params, supported_languages, language_aliases, False)
+ logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
# google news has only one domain
lang_info['subdomain'] = 'news.google.com'
@@ -94,19 +93,26 @@ def request(query, params):
if params['time_range']:
query += ' ' + time_range_dict[params['time_range']]
- query_url = 'https://' + lang_info['subdomain'] + '/search' + "?" + urlencode({
- 'q': query,
- **lang_info['params'],
- 'ie': "utf8",
- 'oe': "utf8",
- 'gl': lang_info['country'],
- }) + ('&ceid=%s' % ceid) # ceid includes a ':' character which must not be urlencoded
+ query_url = (
+ 'https://'
+ + lang_info['subdomain']
+ + '/search'
+ + "?"
+ + urlencode(
+ {
+ 'q': query,
+ **lang_info['params'],
+ 'ie': "utf8",
+ 'oe': "utf8",
+ 'gl': lang_info['country'],
+ }
+ )
+ + ('&ceid=%s' % ceid)
+ ) # ceid includes a ':' character which must not be urlencoded
params['url'] = query_url
params['headers'].update(lang_info['headers'])
- params['headers']['Accept'] = (
- 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
- )
+ params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
params['headers']['Cookie'] = "CONSENT=YES+cb.%s-14-p0.en+F+941;" % datetime.now().strftime("%Y%m%d")
return params
@@ -141,7 +147,7 @@ def response(resp):
# jslog="95014; 5:W251bGwsbnVsbCxudW...giXQ==; track:click"
jslog = jslog.split(";")[1].split(':')[1].strip()
try:
- padding = (4 -(len(jslog) % 4)) * "="
+ padding = (4 - (len(jslog) % 4)) * "="
jslog = b64decode(jslog + padding)
except binascii.Error:
# URL cant be read, skip this result
@@ -178,12 +184,14 @@ def response(resp):
img_src = extract_text(result.xpath('preceding-sibling::a/figure/img/@src'))
- results.append({
- 'url': url,
- 'title': title,
- 'content': content,
- 'img_src': img_src,
- })
+ results.append(
+ {
+ 'url': url,
+ 'title': title,
+ 'content': content,
+ 'img_src': img_src,
+ }
+ )
# return results
return results