summaryrefslogtreecommitdiff
path: root/utils
diff options
context:
space:
mode:
Diffstat (limited to 'utils')
-rw-r--r--utils/fetch_languages.py42
1 files changed, 29 insertions, 13 deletions
diff --git a/utils/fetch_languages.py b/utils/fetch_languages.py
index 3510a3234..5632c90ba 100644
--- a/utils/fetch_languages.py
+++ b/utils/fetch_languages.py
@@ -25,7 +25,6 @@ engines_languages_file = 'engines_languages.json'
languages_file = 'languages.py'
engines_languages = {}
-languages = {}
# To filter out invalid codes and dialects.
@@ -93,22 +92,36 @@ def fetch_supported_languages():
# Join all language lists.
# Iterate all languages supported by each engine.
def join_language_lists():
+ global languages
# include wikipedia first for more accurate language names
- languages.update({code: lang for code, lang
- in engines_languages['wikipedia'].iteritems()
- if valid_code(code)})
+ languages = {code: lang for code, lang
+ in engines_languages['wikipedia'].iteritems()
+ if valid_code(code)}
for engine_name in engines_languages:
for locale in engines_languages[engine_name]:
- if not valid_code(locale):
- continue
-
- # if language is not on list or if it has no name yet
- if locale not in languages or not languages[locale].get('name'):
- if isinstance(engines_languages[engine_name], dict):
- languages[locale] = engines_languages[engine_name][locale]
- else:
- languages[locale] = {}
+ if valid_code(locale):
+ # if language is not on list or if it has no name yet
+ if locale not in languages or not languages[locale].get('name'):
+ if isinstance(engines_languages[engine_name], dict):
+ languages[locale] = engines_languages[engine_name][locale]
+ else:
+ languages[locale] = {}
+
+ # add to counter of engines that support given language
+ lang = locale.split('-')[0]
+ if lang in languages:
+ if 'counter' not in languages[lang]:
+ languages[lang]['counter'] = [engine_name]
+ elif engine_name not in languages[lang]['counter']:
+ languages[lang]['counter'].append(engine_name)
+
+ # filter list to include only languages supported by most engines
+ min_supported_engines = int(0.75 * len(engines_languages))
+ languages = {code: lang for code, lang
+ in languages.iteritems()
+ if len(lang.get('counter', [])) >= min_supported_engines or
+ len(languages.get(code.split('-')[0], {}).get('counter', [])) >= min_supported_engines}
# get locales that have no name or country yet
for locale in languages.keys():
@@ -134,6 +147,7 @@ def join_language_lists():
# Remove countryless language if language is featured in only one country.
def filter_single_country_languages():
prev_lang = None
+ prev_code = None
for code in sorted(languages):
lang = code.split('-')[0]
if lang == prev_lang:
@@ -141,8 +155,10 @@ def filter_single_country_languages():
else:
if prev_lang is not None and countries == 1:
del languages[prev_lang]
+ languages[prev_code]['country'] = ''
countries = 0
prev_lang = lang
+ prev_code = code
# Write languages.py.