summaryrefslogtreecommitdiff
path: root/searx/engines
diff options
context:
space:
mode:
authormarc <a01200356@itesm.mx>2016-08-05 23:34:56 -0500
committermarc <a01200356@itesm.mx>2016-12-13 19:32:00 -0600
commit149802c56926bf48520c98932c4c36b8152b3d2d (patch)
treef450a584a785c31a1c118be29b3039f779a0cb70 /searx/engines
parente58949b76fac7aa93341523ff0e2f35e0a03e057 (diff)
downloadsearxng-149802c56926bf48520c98932c4c36b8152b3d2d.tar.gz
searxng-149802c56926bf48520c98932c4c36b8152b3d2d.zip
[enh] add supported_languages on engines and auto-generate languages.py
Diffstat (limited to 'searx/engines')
-rw-r--r--searx/engines/__init__.py1
-rw-r--r--searx/engines/archlinux.py5
-rw-r--r--searx/engines/bing.py2
-rw-r--r--searx/engines/bing_images.py3
-rw-r--r--searx/engines/bing_news.py3
-rw-r--r--searx/engines/duckduckgo.py42
-rw-r--r--searx/engines/duckduckgo_definitions.py3
-rw-r--r--searx/engines/gigablast.py2
-rw-r--r--searx/engines/google.py14
-rw-r--r--searx/engines/google_news.py4
-rw-r--r--searx/engines/mediawiki.py3
-rw-r--r--searx/engines/photon.py4
-rw-r--r--searx/engines/startpage.py2
-rw-r--r--searx/engines/subtitleseeker.py9
-rw-r--r--searx/engines/swisscows.py4
-rw-r--r--searx/engines/twitter.py2
-rw-r--r--searx/engines/wikidata.py2
-rw-r--r--searx/engines/wikipedia.py35
-rw-r--r--searx/engines/yacy.py2
-rw-r--r--searx/engines/yahoo.py12
-rw-r--r--searx/engines/yahoo_news.py2
-rw-r--r--searx/engines/yandex.py2
-rw-r--r--searx/engines/youtube_api.py2
23 files changed, 134 insertions, 26 deletions
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index 87b1b0eb4..ab3677984 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -38,6 +38,7 @@ engine_shortcuts = {}
engine_default_args = {'paging': False,
'categories': ['general'],
'language_support': True,
+ 'supported_languages': [],
'safesearch': False,
'timeout': settings['outgoing']['request_timeout'],
'shortcut': '-',
diff --git a/searx/engines/archlinux.py b/searx/engines/archlinux.py
index 5ba512766..dca825790 100644
--- a/searx/engines/archlinux.py
+++ b/searx/engines/archlinux.py
@@ -29,8 +29,8 @@ xpath_link = './/div[@class="mw-search-result-heading"]/a'
# cut 'en' from 'en_US', 'de' from 'de_CH', and so on
def locale_to_lang_code(locale):
- if locale.find('_') >= 0:
- locale = locale.split('_')[0]
+ if locale.find('-') >= 0:
+ locale = locale.split('-')[0]
return locale
@@ -95,6 +95,7 @@ main_langs = {
'uk': 'Українська',
'zh': '简体中文'
}
+supported_languages = dict(lang_urls, **main_langs)
# do search-request
diff --git a/searx/engines/bing.py b/searx/engines/bing.py
index 58db61251..052b66448 100644
--- a/searx/engines/bing.py
+++ b/searx/engines/bing.py
@@ -32,7 +32,7 @@ def request(query, params):
offset = (params['pageno'] - 1) * 10 + 1
if params['language'] != 'all':
- query = u'language:{} {}'.format(params['language'].split('_')[0].upper(),
+ query = u'language:{} {}'.format(params['language'].split('-')[0].upper(),
query.decode('utf-8')).encode('utf-8')
search_path = search_string.format(
diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py
index 4dd362cb3..c0deaf6b2 100644
--- a/searx/engines/bing_images.py
+++ b/searx/engines/bing_images.py
@@ -19,6 +19,7 @@ from urllib import urlencode
from lxml import html
from json import loads
import re
+from searx.engines.bing import supported_languages
# engine dependent config
categories = ['images']
@@ -53,7 +54,7 @@ def request(query, params):
if params['language'] == 'all':
language = 'en-US'
else:
- language = params['language'].replace('_', '-')
+ language = params['language']
search_path = search_string.format(
query=urlencode({'q': query}),
diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py
index 4e7c33129..4bac5bbce 100644
--- a/searx/engines/bing_news.py
+++ b/searx/engines/bing_news.py
@@ -17,6 +17,7 @@ from datetime import datetime
from dateutil import parser
from lxml import etree
from searx.utils import list_get
+from searx.engines.bing import supported_languages
# engine dependent config
categories = ['news']
@@ -74,7 +75,7 @@ def request(query, params):
if params['language'] == 'all':
language = 'en-US'
else:
- language = params['language'].replace('_', '-')
+ language = params['language']
params['url'] = _get_url(query, language, offset, params['time_range'])
diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index 9959a52e6..a1cb5882c 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -22,6 +22,13 @@ from searx.languages import language_codes
categories = ['general']
paging = True
language_support = True
+supported_languages = ["es-AR", "en-AU", "de-AT", "fr-BE", "nl-BE", "pt-BR", "bg-BG", "en-CA", "fr-CA", "ca-CT",
+ "es-CL", "zh-CN", "es-CO", "hr-HR", "cs-CZ", "da-DK", "et-EE", "fi-FI", "fr-FR", "de-DE",
+ "el-GR", "tzh-HK", "hu-HU", "en-IN", "id-ID", "en-ID", "en-IE", "he-IL", "it-IT", "jp-JP",
+ "kr-KR", "es-XL", "lv-LV", "lt-LT", "ms-MY", "en-MY", "es-MX", "nl-NL", "en-NZ", "no-NO",
+ "es-PE", "en-PH", "tl-PH", "pl-PL", "pt-PT", "ro-RO", "ru-RU", "ar-XA", "en-XA", "en-SG",
+ "sk-SK", "sl-SL", "en-ZA", "es-ES", "ca-ES", "sv-SE", "de-CH", "fr-CH", "it-CH", "tzh-TW",
+ "th-TH", "tr-TR", "uk-UA", "en-UK", "en-US", "es-US", "vi-VN"]
time_range_support = True
# search-url
@@ -46,10 +53,23 @@ def request(query, params):
offset = (params['pageno'] - 1) * 30
+ # custom fixes for languages
if params['language'] == 'all':
locale = None
+ elif params['language'][:2] == 'ja':
+ locale = 'jp-jp'
+ elif params['language'] == 'zh-TW':
+ locale = 'tw-tzh'
+ elif params['language'] == 'zh-HK':
+ locale = 'hk-tzh'
+ elif params['language'][-2:] == 'SA':
+ locale = 'xa' + params['language'].split('-')[0]
+ elif params['language'][-2:] == 'GB':
+ locale = 'uk' + params['language'].split('-')[0]
+ elif params['language'] == 'es-419':
+ locale = 'xl-es'
else:
- locale = params['language'].split('_')
+ locale = params['language'].split('-')
if len(locale) == 2:
# country code goes first
locale = locale[1].lower() + '-' + locale[0].lower()
@@ -58,7 +78,25 @@ def request(query, params):
locale = locale[0].lower()
lang_codes = [x[0] for x in language_codes]
for lc in lang_codes:
- lc = lc.split('_')
+ lc = lc.split('-')
+ if locale == lc[0] and len(lc) == 2:
+ locale = lc[1].lower() + '-' + lc[0].lower()
+ break
+
+ if locale:
+ params['url'] = url.format(
+ query=urlencode({'q': query, 'kl': locale}), offset=offset)
+ else:
+ locale = params['language'].split('-')
+ if len(locale) == 2:
+ # country code goes first
+ locale = locale[1].lower() + '-' + locale[0].lower()
+ else:
+ # tries to get a country code from language
+ locale = locale[0].lower()
+ lang_codes = [x[0] for x in language_codes]
+ for lc in lang_codes:
+ lc = lc.split('-')
if locale == lc[0]:
locale = lc[1].lower() + '-' + lc[0].lower()
break
diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py
index 208ccca28..23a2f3be3 100644
--- a/searx/engines/duckduckgo_definitions.py
+++ b/searx/engines/duckduckgo_definitions.py
@@ -4,6 +4,7 @@ from re import compile, sub
from lxml import html
from searx.utils import html_to_text
from searx.engines.xpath import extract_text
+from searx.engines.duckduckgo import supported_languages
url = 'https://api.duckduckgo.com/'\
+ '?{query}&format=json&pretty=0&no_redirect=1&d=1'
@@ -23,7 +24,7 @@ def result_to_text(url, text, htmlResult):
def request(query, params):
params['url'] = url.format(query=urlencode({'q': query}))
- params['headers']['Accept-Language'] = params['language']
+ params['headers']['Accept-Language'] = params['language'].split('-')[0]
return params
diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py
index 5430eb3ba..e139842fa 100644
--- a/searx/engines/gigablast.py
+++ b/searx/engines/gigablast.py
@@ -48,7 +48,7 @@ def request(query, params):
if params['language'] == 'all':
language = 'xx'
else:
- language = params['language'][0:2]
+ language = params['language'].split('-')[0]
if params['safesearch'] >= 1:
safesearch = 1
diff --git a/searx/engines/google.py b/searx/engines/google.py
index a02b6940e..375e627ba 100644
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -23,6 +23,20 @@ categories = ['general']
paging = True
language_support = True
use_locale_domain = True
+supported_languages = ['de', 'en', 'es', 'es_419', 'fr', 'hr', 'it', 'nl', 'pl', 'pt-BR',
+ 'pt-PT', 'vi', 'tr', 'ru', 'ar', 'th', 'ko', 'zh-CN', 'zh-TW', 'ja',
+ 'ach', 'af', 'ak', 'az', 'ms', 'ban', 'xx_bork', 'bs', 'br', 'ca',
+ 'ceb', 'ckb', 'cs', 'sn', 'co', 'cy', 'da', 'yo', 'et', 'xx_elmer',
+ 'eo', 'eu', 'ee', 'tl', 'fo', 'gaa', 'ga', 'gd', 'gl', 'gn', 'xx_hacker',
+ 'ht', 'ha', 'haw', 'bem', 'ig', 'rn', 'id', 'ia', 'zu', 'is', 'jw', 'rw',
+ 'sw', 'tlh', 'kg', 'mfe', 'kri', 'la', 'lv', 'to', 'lt', 'ln', 'loz',
+ 'lua', 'lg', 'hu', 'mg', 'mt', 'mi', 'pcm', 'no', 'nso', 'ny', 'nn',
+ 'uz', 'oc', 'om', 'xx_pirate', 'pt', 'ro', 'mo', 'rm', 'qu', 'nyn', 'crs',
+ 'sq', 'sd', 'sk', 'sl', 'so', 'st', 'sr_ME', 'sr_Latn', 'su', 'fi', 'sv',
+ 'tg', 'tt', 'tn', 'tum', 'tk', 'tw', 'fy', 'wo', 'xh', 'el', 'be', 'bg',
+ 'ky', 'kk', 'mk', 'mn', 'sr', 'uk', 'ka', 'hy', 'yi', 'iw', 'ug', 'ur',
+ 'ps', 'fa', 'ti', 'am', 'ne', 'mr', 'hi', 'bn', 'pa', 'gu', 'or', 'ta',
+ 'te', 'kn', 'ml', 'si', 'lo', 'my', 'km', 'chr']
time_range_support = True
# based on https://en.wikipedia.org/wiki/List_of_Google_domains and tests
diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py
index 37253c6a7..6d1430248 100644
--- a/searx/engines/google_news.py
+++ b/searx/engines/google_news.py
@@ -12,6 +12,8 @@
from lxml import html
from urllib import urlencode
+from json import loads
+from searx.engines.google import supported_languages
# search-url
categories = ['news']
@@ -50,7 +52,7 @@ def request(query, params):
search_options=urlencode(search_options))
if params['language'] != 'all':
- language_array = params['language'].lower().split('_')
+ language_array = params['language'].lower().split('-')
params['url'] += '&lr=lang_' + language_array[0]
return params
diff --git a/searx/engines/mediawiki.py b/searx/engines/mediawiki.py
index 26d3720d9..b17cb38e4 100644
--- a/searx/engines/mediawiki.py
+++ b/searx/engines/mediawiki.py
@@ -15,6 +15,7 @@
from json import loads
from string import Formatter
from urllib import urlencode, quote
+from searx.engines.wikipedia import supported_engines
# engine dependent config
categories = ['general']
@@ -46,7 +47,7 @@ def request(query, params):
if params['language'] == 'all':
language = 'en'
else:
- language = params['language'].split('_')[0]
+ language = params['language'].split('-')[0]
# format_string [('https://', 'language', '', None), ('.wikipedia.org/', None, None, None)]
if any(x[1] == 'language' for x in format_strings):
diff --git a/searx/engines/photon.py b/searx/engines/photon.py
index 2197005e5..a029bbfef 100644
--- a/searx/engines/photon.py
+++ b/searx/engines/photon.py
@@ -26,7 +26,7 @@ search_string = 'api/?{query}&limit={limit}'
result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}'
# list of supported languages
-allowed_languages = ['de', 'en', 'fr', 'it']
+supported_languages = ['de', 'en', 'fr', 'it']
# do search-request
@@ -37,7 +37,7 @@ def request(query, params):
if params['language'] != 'all':
language = params['language'].split('_')[0]
- if language in allowed_languages:
+ if language in supported_languages:
params['url'] = params['url'] + "&lang=" + language
# using searx User-Agent
diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py
index 6f6eae1cf..54aafdee5 100644
--- a/searx/engines/startpage.py
+++ b/searx/engines/startpage.py
@@ -47,7 +47,7 @@ def request(query, params):
# set language if specified
if params['language'] != 'all':
- params['data']['with_language'] = ('lang_' + params['language'].split('_')[0])
+ params['data']['with_language'] = ('lang_' + params['language'].split('-')[0])
return params
diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py
index daba68be7..2c0a94f08 100644
--- a/searx/engines/subtitleseeker.py
+++ b/searx/engines/subtitleseeker.py
@@ -43,8 +43,13 @@ def response(resp):
search_lang = ""
- if resp.search_params['language'] != 'all':
- search_lang = [lc[1]
+ # dirty fix for languages named differenly in their site
+ if resp.search_params['language'][:2] == 'fa':
+ search_lang = 'Farsi'
+ elif resp.search_params['language'] == 'pt_BR':
+ search_lang = 'Brazilian'
+ elif resp.search_params['language'] != 'all':
+ search_lang = [lc[3]
for lc in language_codes
if lc[0][:2] == resp.search_params['language'].split('_')[0]][0]
diff --git a/searx/engines/swisscows.py b/searx/engines/swisscows.py
index 72184e428..68632a15a 100644
--- a/searx/engines/swisscows.py
+++ b/searx/engines/swisscows.py
@@ -36,8 +36,8 @@ def request(query, params):
ui_language = 'browser'
region = 'browser'
else:
- region = params['language'].replace('_', '-')
- ui_language = params['language'].split('_')[0]
+ region = params['language']
+ ui_language = params['language'].split('-')[0]
search_path = search_string.format(
query=urlencode({'query': query,
diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py
index 36efac186..6cca05f70 100644
--- a/searx/engines/twitter.py
+++ b/searx/engines/twitter.py
@@ -40,7 +40,7 @@ def request(query, params):
# set language if specified
if params['language'] != 'all':
- params['cookies']['lang'] = params['language'].split('_')[0]
+ params['cookies']['lang'] = params['language'].split('-')[0]
else:
params['cookies']['lang'] = 'en'
diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py
index 91040e218..edb6d75fe 100644
--- a/searx/engines/wikidata.py
+++ b/searx/engines/wikidata.py
@@ -14,6 +14,8 @@
from searx import logger
from searx.poolrequests import get
from searx.engines.xpath import extract_text
+from searx.utils import format_date_by_locale
+from searx.engines.wikipedia import supported_languages
from json import loads
from lxml.html import fromstring
diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py
index 70191d22b..fdba5ed68 100644
--- a/searx/engines/wikipedia.py
+++ b/searx/engines/wikipedia.py
@@ -13,6 +13,36 @@
from json import loads
from urllib import urlencode, quote
+supported_languages = ["en", "sv", "ceb", "de", "nl", "fr", "ru", "it", "es", "war",
+ "pl", "vi", "ja", "pt", "zh", "uk", "ca", "fa", "no", "sh",
+ "ar", "fi", "hu", "id", "ro", "cs", "ko", "sr", "ms", "tr",
+ "eu", "eo", "min", "bg", "da", "kk", "sk", "hy", "he", "zh-min-nan",
+ "lt", "hr", "sl", "et", "ce", "gl", "nn", "uz", "la", "vo",
+ "el", "simple", "be", "az", "th", "ur", "ka", "hi", "oc", "ta",
+ "mk", "mg", "new", "lv", "cy", "bs", "tt", "tl", "te", "pms",
+ "be-tarask", "br", "sq", "ky", "ht", "jv", "tg", "ast", "zh-yue", "lb",
+ "mr", "ml", "bn", "pnb", "is", "af", "sco", "ga", "ba", "fy",
+ "cv", "lmo", "sw", "my", "an", "yo", "ne", "io", "gu", "nds",
+ "scn", "bpy", "pa", "ku", "als", "kn", "bar", "ia", "qu", "su",
+ "ckb", "bat-smg", "mn", "arz", "nap", "wa", "bug", "gd", "yi", "map-bms",
+ "am", "mzn", "fo", "si", "nah", "li", "sah", "vec", "hsb", "or",
+ "os", "mrj", "sa", "hif", "mhr", "roa-tara", "azb", "pam", "ilo",
+ "sd", "ps", "se", "mi", "bh", "eml", "bcl", "xmf", "diq", "hak",
+ "gan", "glk", "vls", "nds-nl", "rue", "bo", "fiu-vro", "co", "sc",
+ "tk", "csb", "lrc", "vep", "wuu", "km", "szl", "gv", "crh", "kv",
+ "zh-classical", "frr", "zea", "as", "so", "kw", "nso", "ay", "stq",
+ "udm", "cdo", "nrm", "ie", "koi", "rm", "pcd", "myv", "mt", "fur",
+ "ace", "lad", "gn", "lij", "dsb", "dv", "cbk-zam", "ext", "gom",
+ "kab", "ksh", "ang", "mai", "mwl", "lez", "gag", "ln", "ug", "pi",
+ "pag", "frp", "sn", "nv", "av", "pfl", "haw", "xal", "krc", "kaa",
+ "rw", "bxr", "pdc", "to", "kl", "nov", "arc", "kbd", "lo", "bjn",
+ "pap", "ha", "tet", "ki", "tyv", "tpi", "na", "lbe", "ig", "jbo",
+ "roa-rup", "ty", "jam", "za", "kg", "mdf", "lg", "wo", "srn", "ab",
+ "ltg", "zu", "sm", "chr", "om", "tn", "chy", "rmy", "cu", "tw", "tum",
+ "xh", "bi", "rn", "pih", "got", "ss", "pnt", "bm", "ch", "mo", "ts",
+ "ady", "iu", "st", "ee", "ny", "fj", "ks", "ak", "ik", "sg", "ve",
+ "dz", "ff", "ti", "cr", "ng", "cho", "kj", "mh", "ho", "ii", "aa", "mus", "hz", "kr"]
+
# search-url
base_url = 'https://{language}.wikipedia.org/'
search_postfix = 'w/api.php?'\
@@ -28,10 +58,11 @@ search_postfix = 'w/api.php?'\
# set language in base_url
def url_lang(lang):
- if lang == 'all':
+ lang = lang.split('-')[0]
+ if lang == 'all' or lang not in supported_languages:
language = 'en'
else:
- language = lang.split('_')[0]
+ language = lang
return base_url.format(language=language)
diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py
index 92cf881c0..7b1b6b35d 100644
--- a/searx/engines/yacy.py
+++ b/searx/engines/yacy.py
@@ -53,7 +53,7 @@ def request(query, params):
# add language tag if specified
if params['language'] != 'all':
- params['url'] += '&lr=lang_' + params['language'].split('_')[0]
+ params['url'] += '&lr=lang_' + params['language'].split('-')[0]
return params
diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py
index 2bb34b83d..c00e42368 100644
--- a/searx/engines/yahoo.py
+++ b/searx/engines/yahoo.py
@@ -20,6 +20,10 @@ from searx.engines.xpath import extract_text, extract_url
categories = ['general']
paging = True
language_support = True
+supported_languages = ["ar", "bg", "ca", "szh", "tzh", "hr", "cs", "da", "nl", "en",
+ "et", "fi", "fr", "de", "el", "he", "hu", "is", "id", "it", "ja",
+ "ko", "lv", "lt", "no", "fa", "pl", "pt", "ro", "ru", "sk", "sr",
+ "sl", "es", "sv", "th", "tr"]
time_range_support = True
# search-url
@@ -72,7 +76,13 @@ def _get_url(query, offset, language, time_range):
def _get_language(params):
if params['language'] == 'all':
return 'en'
- return params['language'].split('_')[0]
+ elif params['language'][:2] == 'zh':
+ if params['language'] == 'zh' or params['language'] == 'zh-CH':
+ return 'szh'
+ else:
+ return 'tzh'
+ else:
+ return params['language'].split('-')[0]
# do search-request
diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py
index e91c1d34e..613513e59 100644
--- a/searx/engines/yahoo_news.py
+++ b/searx/engines/yahoo_news.py
@@ -12,7 +12,7 @@
from urllib import urlencode
from lxml import html
from searx.engines.xpath import extract_text, extract_url
-from searx.engines.yahoo import parse_url
+from searx.engines.yahoo import parse_url, supported_languages
from datetime import datetime, timedelta
import re
from dateutil import parser
diff --git a/searx/engines/yandex.py b/searx/engines/yandex.py
index b83a747f9..eee345c45 100644
--- a/searx/engines/yandex.py
+++ b/searx/engines/yandex.py
@@ -36,7 +36,7 @@ content_xpath = './/div[@class="text-container typo typo_text_m typo_line_m orga
def request(query, params):
- lang = params['language'].split('_')[0]
+ lang = params['language'].split('-')[0]
host = base_url.format(tld=language_map.get(lang) or default_tld)
params['url'] = host + search_url.format(page=params['pageno'] - 1,
query=urlencode({'text': query}))
diff --git a/searx/engines/youtube_api.py b/searx/engines/youtube_api.py
index 8fd939a25..1dfca5166 100644
--- a/searx/engines/youtube_api.py
+++ b/searx/engines/youtube_api.py
@@ -36,7 +36,7 @@ def request(query, params):
# add language tag if specified
if params['language'] != 'all':
- params['url'] += '&relevanceLanguage=' + params['language'].split('_')[0]
+ params['url'] += '&relevanceLanguage=' + params['language'].split('-')[0]
return params