diff options
author | Adam Tauber <asciimoo@gmail.com> | 2018-08-19 13:22:22 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-08-19 13:22:22 +0200 |
commit | b75f1b6cc39a94989a74d52eb0f1267c3e3c665e (patch) | |
tree | d3bab81ca2071196b1b4223d6d2db7d408b79bf2 /searx/engines/google.py | |
parent | e7f7eda18cc69287f30c512a98b4e90453bcd8e7 (diff) | |
parent | 931c1bb0f663bc13998f5a78ae7cd9485d37453c (diff) | |
download | searxng-b75f1b6cc39a94989a74d52eb0f1267c3e3c665e.tar.gz searxng-b75f1b6cc39a94989a74d52eb0f1267c3e3c665e.zip |
Merge branch 'master' into patch-2
Diffstat (limited to 'searx/engines/google.py')
-rw-r--r-- | searx/engines/google.py | 34 |
1 files changed, 15 insertions, 19 deletions
diff --git a/searx/engines/google.py b/searx/engines/google.py index 0a8678362..62e7d1170 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -14,6 +14,7 @@ from lxml import html, etree from searx.engines.xpath import extract_text, extract_url from searx import logger from searx.url_utils import urlencode, urlparse, parse_qsl +from searx.utils import match_language logger = logger.getChild('google engine') @@ -72,7 +73,7 @@ country_to_hostname = { 'RO': 'www.google.ro', # Romania 'RU': 'www.google.ru', # Russia 'SK': 'www.google.sk', # Slovakia - 'SL': 'www.google.si', # Slovenia (SL -> si) + 'SI': 'www.google.si', # Slovenia 'SE': 'www.google.se', # Sweden 'TH': 'www.google.co.th', # Thailand 'TR': 'www.google.com.tr', # Turkey @@ -90,7 +91,7 @@ url_map = 'https://www.openstreetmap.org/'\ search_path = '/search' search_url = ('https://{hostname}' + search_path + - '?{query}&start={offset}&gws_rd=cr&gbv=1&lr={lang}&ei=x') + '?{query}&start={offset}&gws_rd=cr&gbv=1&lr={lang}&hl={lang_short}&ei=x') time_range_search = "&tbs=qdr:{range}" time_range_dict = {'day': 'd', @@ -165,22 +166,16 @@ def extract_text_from_dom(result, xpath): def request(query, params): offset = (params['pageno'] - 1) * 10 - # temporary fix until a way of supporting en-US is found - if params['language'] == 'en-US': - params['language'] = 'en-GB' - - if params['language'][:2] == 'jv': - language = 'jw' - country = 'ID' - url_lang = 'lang_jw' + language = match_language(params['language'], supported_languages) + language_array = language.split('-') + if params['language'].find('-') > 0: + country = params['language'].split('-')[1] + elif len(language_array) == 2: + country = language_array[1] else: - language_array = params['language'].lower().split('-') - if len(language_array) == 2: - country = language_array[1] - else: - country = 'US' - language = language_array[0] + ',' + language_array[0] + '-' + country - url_lang = 'lang_' + language_array[0] + country = 'US' + + url_lang = 'lang_' + language if use_locale_domain: google_hostname = country_to_hostname.get(country.upper(), default_hostname) @@ -192,11 +187,12 @@ def request(query, params): params['url'] = search_url.format(offset=offset, query=urlencode({'q': query}), hostname=google_hostname, - lang=url_lang) + lang=url_lang, + lang_short=language) if params['time_range'] in time_range_dict: params['url'] += time_range_search.format(range=time_range_dict[params['time_range']]) - params['headers']['Accept-Language'] = language + params['headers']['Accept-Language'] = language + ',' + language + '-' + country params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' params['google_hostname'] = google_hostname |