diff options
author | marc <a01200356@itesm.mx> | 2016-12-15 00:34:43 -0600 |
---|---|---|
committer | marc <a01200356@itesm.mx> | 2016-12-15 00:40:21 -0600 |
commit | af35eee10b98940c51c6e5e18629de514b4bd48d (patch) | |
tree | 804b0a4cfe08bb897541e9e8571b921a78e07992 /searx/engines | |
parent | e0c270bd72f7b2a40222e3ed264e25d36cb0fc30 (diff) | |
download | searxng-af35eee10b98940c51c6e5e18629de514b4bd48d.tar.gz searxng-af35eee10b98940c51c6e5e18629de514b4bd48d.zip |
tests for _fetch_supported_languages in engines
and refactor method to make it testable without making requests
Diffstat (limited to 'searx/engines')
-rw-r--r-- | searx/engines/.yandex.py.swp | bin | 12288 -> 0 bytes | |||
-rw-r--r-- | searx/engines/__init__.py | 13 | ||||
-rw-r--r-- | searx/engines/bing.py | 6 | ||||
-rw-r--r-- | searx/engines/bing_images.py | 2 | ||||
-rw-r--r-- | searx/engines/bing_news.py | 2 | ||||
-rw-r--r-- | searx/engines/dailymotion.py | 5 | ||||
-rw-r--r-- | searx/engines/duckduckgo.py | 5 | ||||
-rw-r--r-- | searx/engines/duckduckgo_definitions.py | 2 | ||||
-rw-r--r-- | searx/engines/gigablast.py | 6 | ||||
-rw-r--r-- | searx/engines/google.py | 12 | ||||
-rw-r--r-- | searx/engines/google_news.py | 2 | ||||
-rw-r--r-- | searx/engines/swisscows.py | 8 | ||||
-rw-r--r-- | searx/engines/wikidata.py | 2 | ||||
-rw-r--r-- | searx/engines/wikipedia.py | 6 | ||||
-rw-r--r-- | searx/engines/yahoo.py | 8 | ||||
-rw-r--r-- | searx/engines/yahoo_news.py | 2 |
16 files changed, 38 insertions, 43 deletions
diff --git a/searx/engines/.yandex.py.swp b/searx/engines/.yandex.py.swp Binary files differdeleted file mode 100644 index ff2a8f648..000000000 --- a/searx/engines/.yandex.py.swp +++ /dev/null diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 7a64fd25b..cc200a0d1 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -21,6 +21,7 @@ import sys from flask_babel import gettext from operator import itemgetter from json import loads +from requests import get from searx import settings from searx import logger from searx.utils import load_module @@ -79,9 +80,6 @@ def load_engine(engine_data): if not hasattr(engine, arg_name): setattr(engine, arg_name, arg_value) - if engine_data['name'] in languages: - setattr(engine, 'supported_languages', languages[engine_data['name']]) - # checking required variables for engine_attr in dir(engine): if engine_attr.startswith('_'): @@ -91,6 +89,15 @@ def load_engine(engine_data): .format(engine.name, engine_attr)) sys.exit(1) + # assign supported languages from json file + if engine_data['name'] in languages: + setattr(engine, 'supported_languages', languages[engine_data['name']]) + + # assign language fetching method if auxiliary method exists + if hasattr(engine, '_fetch_supported_languages'): + setattr(engine, 'fetch_supported_languages', + lambda: engine._fetch_supported_languages(get(engine.supported_languages_url))) + engine.stats = { 'result_count': 0, 'search_count': 0, diff --git a/searx/engines/bing.py b/searx/engines/bing.py index 354003399..b2ad7b6cf 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -15,7 +15,6 @@ from urllib import urlencode from lxml import html -from requests import get from searx.engines.xpath import extract_text # engine dependent config @@ -86,10 +85,9 @@ def response(resp): # get supported languages from their site -def fetch_supported_languages(): +def _fetch_supported_languages(resp): supported_languages = [] - response = get(supported_languages_url) - dom = html.fromstring(response.text) + dom = html.fromstring(resp.text) options = dom.xpath('//div[@id="limit-languages"]//input') for option in options: code = option.xpath('./@id')[0].replace('_', '-') diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index 746d3abc4..97f6dca37 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -19,7 +19,7 @@ from urllib import urlencode from lxml import html from json import loads import re -from searx.engines.bing import fetch_supported_languages +from searx.engines.bing import _fetch_supported_languages, supported_languages_url # engine dependent config categories = ['images'] diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py index 2d936fa53..765bcd38e 100644 --- a/searx/engines/bing_news.py +++ b/searx/engines/bing_news.py @@ -17,7 +17,7 @@ from datetime import datetime from dateutil import parser from lxml import etree from searx.utils import list_get -from searx.engines.bing import fetch_supported_languages +from searx.engines.bing import _fetch_supported_languages, supported_languages_url # engine dependent config categories = ['news'] diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py index 813dd951f..8c69aafe0 100644 --- a/searx/engines/dailymotion.py +++ b/searx/engines/dailymotion.py @@ -80,11 +80,10 @@ def response(resp): # get supported languages from their site -def fetch_supported_languages(): +def _fetch_supported_languages(resp): supported_languages = {} - response = get(supported_languages_url) - response_json = loads(response.text) + response_json = loads(resp.text) for language in response_json['list']: supported_languages[language['code']] = {} diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index 9cf5fb339..df230222d 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -119,11 +119,10 @@ def response(resp): # get supported languages from their site -def fetch_supported_languages(): - response = get(supported_languages_url) +def _fetch_supported_languages(resp): # response is a js file with regions as an embedded object - response_page = response.text + response_page = resp.text response_page = response_page[response_page.find('regions:{') + 8:] response_page = response_page[:response_page.find('}') + 1] diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py index b965c02e9..dd3f12e1e 100644 --- a/searx/engines/duckduckgo_definitions.py +++ b/searx/engines/duckduckgo_definitions.py @@ -4,7 +4,7 @@ from re import compile, sub from lxml import html from searx.utils import html_to_text from searx.engines.xpath import extract_text -from searx.engines.duckduckgo import fetch_supported_languages +from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url url = 'https://api.duckduckgo.com/'\ + '?{query}&format=json&pretty=0&no_redirect=1&d=1' diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py index e598e55c4..827b9cd03 100644 --- a/searx/engines/gigablast.py +++ b/searx/engines/gigablast.py @@ -14,7 +14,6 @@ from json import loads from random import randint from time import time from urllib import urlencode -from requests import get from lxml.html import fromstring # engine dependent config @@ -91,10 +90,9 @@ def response(resp): # get supported languages from their site -def fetch_supported_languages(): +def _fetch_supported_languages(resp): supported_languages = [] - response = get(supported_languages_url) - dom = fromstring(response.text) + dom = fromstring(resp.text) links = dom.xpath('//span[@id="menu2"]/a') for link in links: code = link.xpath('./@href')[0][-2:] diff --git a/searx/engines/google.py b/searx/engines/google.py index a82a0b5a7..803cd307e 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -12,7 +12,6 @@ import re from urllib import urlencode from urlparse import urlparse, parse_qsl from lxml import html, etree -from requests import get from searx.engines.xpath import extract_text, extract_url from searx.search import logger @@ -364,14 +363,13 @@ def attributes_to_html(attributes): # get supported languages from their site -def fetch_supported_languages(): +def _fetch_supported_languages(resp): supported_languages = {} - response = get(supported_languages_url) - dom = html.fromstring(response.text) - options = dom.xpath('//select[@name="hl"]/option') + dom = html.fromstring(resp.text) + options = dom.xpath('//table//td/font/label/span') for option in options: - code = option.xpath('./@value')[0].split('-')[0] - name = option.text[:-1].title() + code = option.xpath('./@id')[0][1:] + name = option.text.title() supported_languages[code] = {"name": name} return supported_languages diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py index d138f99f5..ddacd1a61 100644 --- a/searx/engines/google_news.py +++ b/searx/engines/google_news.py @@ -13,7 +13,7 @@ from lxml import html from urllib import urlencode from json import loads -from searx.engines.google import fetch_supported_languages +from searx.engines.google import _fetch_supported_languages, supported_languages_url # search-url categories = ['news'] diff --git a/searx/engines/swisscows.py b/searx/engines/swisscows.py index 7f85019a6..d8a454039 100644 --- a/searx/engines/swisscows.py +++ b/searx/engines/swisscows.py @@ -13,7 +13,6 @@ from json import loads from urllib import urlencode, unquote import re -from requests import get from lxml.html import fromstring # engine dependent config @@ -25,6 +24,8 @@ language_support = True base_url = 'https://swisscows.ch/' search_string = '?{query}&page={page}' +supported_languages_url = base_url + # regex regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment') regex_json_remove_start = re.compile(r'^initialData:\s*') @@ -113,10 +114,9 @@ def response(resp): # get supported languages from their site -def fetch_supported_languages(): +def _fetch_supported_languages(resp): supported_languages = [] - response = get(base_url) - dom = fromstring(response.text) + dom = fromstring(resp.text) options = dom.xpath('//div[@id="regions-popup"]//ul/li/a') for option in options: code = option.xpath('./@data-val')[0] diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index 9c0a768e0..3f849bc7d 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -15,7 +15,7 @@ from searx import logger from searx.poolrequests import get from searx.engines.xpath import extract_text from searx.utils import format_date_by_locale -from searx.engines.wikipedia import fetch_supported_languages +from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url from json import loads from lxml.html import fromstring diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py index 0dee325a7..322e8d128 100644 --- a/searx/engines/wikipedia.py +++ b/searx/engines/wikipedia.py @@ -12,7 +12,6 @@ from json import loads from urllib import urlencode, quote -from requests import get from lxml.html import fromstring @@ -119,10 +118,9 @@ def response(resp): # get supported languages from their site -def fetch_supported_languages(): +def _fetch_supported_languages(resp): supported_languages = {} - response = get(supported_languages_url) - dom = fromstring(response.text) + dom = fromstring(resp.text) tables = dom.xpath('//table[contains(@class,"sortable")]') for table in tables: # exclude header row diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py index db10c8939..5c62c2ed8 100644 --- a/searx/engines/yahoo.py +++ b/searx/engines/yahoo.py @@ -14,7 +14,6 @@ from urllib import urlencode from urlparse import unquote from lxml import html -from requests import get from searx.engines.xpath import extract_text, extract_url # engine dependent config @@ -144,13 +143,12 @@ def response(resp): # get supported languages from their site -def fetch_supported_languages(): +def _fetch_supported_languages(resp): supported_languages = [] - response = get(supported_languages_url) - dom = html.fromstring(response.text) + dom = html.fromstring(resp.text) options = dom.xpath('//div[@id="yschlang"]/span/label/input') for option in options: - code = option.xpath('./@value')[0][5:] + code = option.xpath('./@value')[0][5:].replace('_', '-') supported_languages.append(code) return supported_languages diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py index bc7b5c368..3e4cf02eb 100644 --- a/searx/engines/yahoo_news.py +++ b/searx/engines/yahoo_news.py @@ -12,7 +12,7 @@ from urllib import urlencode from lxml import html from searx.engines.xpath import extract_text, extract_url -from searx.engines.yahoo import parse_url, fetch_supported_languages +from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url from datetime import datetime, timedelta import re from dateutil import parser |