diff options
author | marc <a01200356@itesm.mx> | 2017-10-10 16:49:49 -0500 |
---|---|---|
committer | marc <a01200356@itesm.mx> | 2017-10-10 16:49:49 -0500 |
commit | a524dbb823e88482a762d56ac1ed352641f3f0c3 (patch) | |
tree | 21604320f131f5eddde172426685c7c5f4f8a98a /searx | |
parent | 1adc8d6e2604be1a159c936b0fd77efdd09c555e (diff) | |
download | searxng-a524dbb823e88482a762d56ac1ed352641f3f0c3.tar.gz searxng-a524dbb823e88482a762d56ac1ed352641f3f0c3.zip |
[fix] language support for bing images and videos
Diffstat (limited to 'searx')
-rw-r--r-- | searx/engines/bing_images.py | 56 | ||||
-rw-r--r-- | searx/engines/bing_videos.py | 5 |
2 files changed, 51 insertions, 10 deletions
diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index 6300c94e4..15679056c 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -18,7 +18,6 @@ from lxml import html from json import loads import re -from searx.engines.bing import _fetch_supported_languages, supported_languages_url from searx.url_utils import urlencode # engine dependent config @@ -26,6 +25,8 @@ categories = ['images'] paging = True safesearch = True time_range_support = True +language_support = True +supported_languages_url = 'https://www.bing.com/account/general' # search-url base_url = 'https://www.bing.com/' @@ -45,23 +46,41 @@ safesearch_types = {2: 'STRICT', _quote_keys_regex = re.compile('({|,)([a-z][a-z0-9]*):(")', re.I | re.U) +# get supported region code +def get_region_code(lang, lang_list=None): + region = None + if lang in (lang_list or supported_languages): + region = lang + elif lang.startswith('no'): + region = 'nb-NO' + else: + # try to get a supported country code with language + lang = lang.split('-')[0] + for lc in (lang_list or supported_languages): + if lang == lc.split('-')[0]: + region = lc + break + if region: + return region.lower() + else: + return 'en-us' + + # do search-request def request(query, params): offset = (params['pageno'] - 1) * 10 + 1 - # required for cookie - if params['language'] == 'all': - language = 'en-US' - else: - language = params['language'] - search_path = search_string.format( query=urlencode({'q': query}), offset=offset) + language = get_region_code(params['language']) + params['cookies']['SRCHHPGUSR'] = \ - 'NEWWND=0&NRSLT=-1&SRCHLANG=' + language.split('-')[0] +\ - '&ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') + 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') + + params['cookies']['_EDGE_S'] = 'mkt=' + language +\ + '&ui=' + language + '&F=1' params['url'] = base_url + search_path if params['time_range'] in time_range_dict: @@ -106,3 +125,22 @@ def response(resp): # return results return results + + +# get supported languages from their site +def _fetch_supported_languages(resp): + supported_languages = [] + dom = html.fromstring(resp.text) + + regions_xpath = '//div[@id="region-section-content"]' \ + + '//ul[@class="b_vList"]/li/a/@href' + + regions = dom.xpath(regions_xpath) + for region in regions: + code = re.search('setmkt=[^\&]+', region).group()[7:] + if code == 'nb-NO': + code = 'no-NO' + + supported_languages.append(code) + + return supported_languages diff --git a/searx/engines/bing_videos.py b/searx/engines/bing_videos.py index 918064c9b..bd91bce37 100644 --- a/searx/engines/bing_videos.py +++ b/searx/engines/bing_videos.py @@ -12,6 +12,7 @@ from json import loads from lxml import html +from searx.engines.bing_images import _fetch_supported_languages, supported_languages_url, get_region_code from searx.engines.xpath import extract_text from searx.url_utils import urlencode @@ -21,6 +22,7 @@ paging = True safesearch = True time_range_support = True number_of_results = 10 +language_support = True search_url = 'https://www.bing.com/videos/asyncv2?{query}&async=content&'\ 'first={offset}&count={number_of_results}&CW=1366&CH=25&FORM=R5VR5' @@ -45,7 +47,8 @@ def request(query, params): 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') # language cookie - params['cookies']['_EDGE_S'] = 'mkt=' + params['language'].lower() + '&F=1' + region = get_region_code(params['language'], lang_list=supported_languages) + params['cookies']['_EDGE_S'] = 'mkt=' + region + '&F=1' # query and paging params['url'] = search_url.format(query=urlencode({'q': query}), |