summaryrefslogtreecommitdiff
path: root/searx/engines
diff options
context:
space:
mode:
authormarc <a01200356@itesm.mx>2016-12-15 00:34:43 -0600
committermarc <a01200356@itesm.mx>2016-12-15 00:40:21 -0600
commitaf35eee10b98940c51c6e5e18629de514b4bd48d (patch)
tree804b0a4cfe08bb897541e9e8571b921a78e07992 /searx/engines
parente0c270bd72f7b2a40222e3ed264e25d36cb0fc30 (diff)
downloadsearxng-af35eee10b98940c51c6e5e18629de514b4bd48d.tar.gz
searxng-af35eee10b98940c51c6e5e18629de514b4bd48d.zip
tests for _fetch_supported_languages in engines
and refactor method to make it testable without making requests
Diffstat (limited to 'searx/engines')
-rw-r--r--searx/engines/.yandex.py.swpbin12288 -> 0 bytes
-rw-r--r--searx/engines/__init__.py13
-rw-r--r--searx/engines/bing.py6
-rw-r--r--searx/engines/bing_images.py2
-rw-r--r--searx/engines/bing_news.py2
-rw-r--r--searx/engines/dailymotion.py5
-rw-r--r--searx/engines/duckduckgo.py5
-rw-r--r--searx/engines/duckduckgo_definitions.py2
-rw-r--r--searx/engines/gigablast.py6
-rw-r--r--searx/engines/google.py12
-rw-r--r--searx/engines/google_news.py2
-rw-r--r--searx/engines/swisscows.py8
-rw-r--r--searx/engines/wikidata.py2
-rw-r--r--searx/engines/wikipedia.py6
-rw-r--r--searx/engines/yahoo.py8
-rw-r--r--searx/engines/yahoo_news.py2
16 files changed, 38 insertions, 43 deletions
diff --git a/searx/engines/.yandex.py.swp b/searx/engines/.yandex.py.swp
deleted file mode 100644
index ff2a8f648..000000000
--- a/searx/engines/.yandex.py.swp
+++ /dev/null
Binary files differ
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index 7a64fd25b..cc200a0d1 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -21,6 +21,7 @@ import sys
from flask_babel import gettext
from operator import itemgetter
from json import loads
+from requests import get
from searx import settings
from searx import logger
from searx.utils import load_module
@@ -79,9 +80,6 @@ def load_engine(engine_data):
if not hasattr(engine, arg_name):
setattr(engine, arg_name, arg_value)
- if engine_data['name'] in languages:
- setattr(engine, 'supported_languages', languages[engine_data['name']])
-
# checking required variables
for engine_attr in dir(engine):
if engine_attr.startswith('_'):
@@ -91,6 +89,15 @@ def load_engine(engine_data):
.format(engine.name, engine_attr))
sys.exit(1)
+ # assign supported languages from json file
+ if engine_data['name'] in languages:
+ setattr(engine, 'supported_languages', languages[engine_data['name']])
+
+ # assign language fetching method if auxiliary method exists
+ if hasattr(engine, '_fetch_supported_languages'):
+ setattr(engine, 'fetch_supported_languages',
+ lambda: engine._fetch_supported_languages(get(engine.supported_languages_url)))
+
engine.stats = {
'result_count': 0,
'search_count': 0,
diff --git a/searx/engines/bing.py b/searx/engines/bing.py
index 354003399..b2ad7b6cf 100644
--- a/searx/engines/bing.py
+++ b/searx/engines/bing.py
@@ -15,7 +15,6 @@
from urllib import urlencode
from lxml import html
-from requests import get
from searx.engines.xpath import extract_text
# engine dependent config
@@ -86,10 +85,9 @@ def response(resp):
# get supported languages from their site
-def fetch_supported_languages():
+def _fetch_supported_languages(resp):
supported_languages = []
- response = get(supported_languages_url)
- dom = html.fromstring(response.text)
+ dom = html.fromstring(resp.text)
options = dom.xpath('//div[@id="limit-languages"]//input')
for option in options:
code = option.xpath('./@id')[0].replace('_', '-')
diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py
index 746d3abc4..97f6dca37 100644
--- a/searx/engines/bing_images.py
+++ b/searx/engines/bing_images.py
@@ -19,7 +19,7 @@ from urllib import urlencode
from lxml import html
from json import loads
import re
-from searx.engines.bing import fetch_supported_languages
+from searx.engines.bing import _fetch_supported_languages, supported_languages_url
# engine dependent config
categories = ['images']
diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py
index 2d936fa53..765bcd38e 100644
--- a/searx/engines/bing_news.py
+++ b/searx/engines/bing_news.py
@@ -17,7 +17,7 @@ from datetime import datetime
from dateutil import parser
from lxml import etree
from searx.utils import list_get
-from searx.engines.bing import fetch_supported_languages
+from searx.engines.bing import _fetch_supported_languages, supported_languages_url
# engine dependent config
categories = ['news']
diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py
index 813dd951f..8c69aafe0 100644
--- a/searx/engines/dailymotion.py
+++ b/searx/engines/dailymotion.py
@@ -80,11 +80,10 @@ def response(resp):
# get supported languages from their site
-def fetch_supported_languages():
+def _fetch_supported_languages(resp):
supported_languages = {}
- response = get(supported_languages_url)
- response_json = loads(response.text)
+ response_json = loads(resp.text)
for language in response_json['list']:
supported_languages[language['code']] = {}
diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index 9cf5fb339..df230222d 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -119,11 +119,10 @@ def response(resp):
# get supported languages from their site
-def fetch_supported_languages():
- response = get(supported_languages_url)
+def _fetch_supported_languages(resp):
# response is a js file with regions as an embedded object
- response_page = response.text
+ response_page = resp.text
response_page = response_page[response_page.find('regions:{') + 8:]
response_page = response_page[:response_page.find('}') + 1]
diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py
index b965c02e9..dd3f12e1e 100644
--- a/searx/engines/duckduckgo_definitions.py
+++ b/searx/engines/duckduckgo_definitions.py
@@ -4,7 +4,7 @@ from re import compile, sub
from lxml import html
from searx.utils import html_to_text
from searx.engines.xpath import extract_text
-from searx.engines.duckduckgo import fetch_supported_languages
+from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url
url = 'https://api.duckduckgo.com/'\
+ '?{query}&format=json&pretty=0&no_redirect=1&d=1'
diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py
index e598e55c4..827b9cd03 100644
--- a/searx/engines/gigablast.py
+++ b/searx/engines/gigablast.py
@@ -14,7 +14,6 @@ from json import loads
from random import randint
from time import time
from urllib import urlencode
-from requests import get
from lxml.html import fromstring
# engine dependent config
@@ -91,10 +90,9 @@ def response(resp):
# get supported languages from their site
-def fetch_supported_languages():
+def _fetch_supported_languages(resp):
supported_languages = []
- response = get(supported_languages_url)
- dom = fromstring(response.text)
+ dom = fromstring(resp.text)
links = dom.xpath('//span[@id="menu2"]/a')
for link in links:
code = link.xpath('./@href')[0][-2:]
diff --git a/searx/engines/google.py b/searx/engines/google.py
index a82a0b5a7..803cd307e 100644
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -12,7 +12,6 @@ import re
from urllib import urlencode
from urlparse import urlparse, parse_qsl
from lxml import html, etree
-from requests import get
from searx.engines.xpath import extract_text, extract_url
from searx.search import logger
@@ -364,14 +363,13 @@ def attributes_to_html(attributes):
# get supported languages from their site
-def fetch_supported_languages():
+def _fetch_supported_languages(resp):
supported_languages = {}
- response = get(supported_languages_url)
- dom = html.fromstring(response.text)
- options = dom.xpath('//select[@name="hl"]/option')
+ dom = html.fromstring(resp.text)
+ options = dom.xpath('//table//td/font/label/span')
for option in options:
- code = option.xpath('./@value')[0].split('-')[0]
- name = option.text[:-1].title()
+ code = option.xpath('./@id')[0][1:]
+ name = option.text.title()
supported_languages[code] = {"name": name}
return supported_languages
diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py
index d138f99f5..ddacd1a61 100644
--- a/searx/engines/google_news.py
+++ b/searx/engines/google_news.py
@@ -13,7 +13,7 @@
from lxml import html
from urllib import urlencode
from json import loads
-from searx.engines.google import fetch_supported_languages
+from searx.engines.google import _fetch_supported_languages, supported_languages_url
# search-url
categories = ['news']
diff --git a/searx/engines/swisscows.py b/searx/engines/swisscows.py
index 7f85019a6..d8a454039 100644
--- a/searx/engines/swisscows.py
+++ b/searx/engines/swisscows.py
@@ -13,7 +13,6 @@
from json import loads
from urllib import urlencode, unquote
import re
-from requests import get
from lxml.html import fromstring
# engine dependent config
@@ -25,6 +24,8 @@ language_support = True
base_url = 'https://swisscows.ch/'
search_string = '?{query}&page={page}'
+supported_languages_url = base_url
+
# regex
regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment')
regex_json_remove_start = re.compile(r'^initialData:\s*')
@@ -113,10 +114,9 @@ def response(resp):
# get supported languages from their site
-def fetch_supported_languages():
+def _fetch_supported_languages(resp):
supported_languages = []
- response = get(base_url)
- dom = fromstring(response.text)
+ dom = fromstring(resp.text)
options = dom.xpath('//div[@id="regions-popup"]//ul/li/a')
for option in options:
code = option.xpath('./@data-val')[0]
diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py
index 9c0a768e0..3f849bc7d 100644
--- a/searx/engines/wikidata.py
+++ b/searx/engines/wikidata.py
@@ -15,7 +15,7 @@ from searx import logger
from searx.poolrequests import get
from searx.engines.xpath import extract_text
from searx.utils import format_date_by_locale
-from searx.engines.wikipedia import fetch_supported_languages
+from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
from json import loads
from lxml.html import fromstring
diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py
index 0dee325a7..322e8d128 100644
--- a/searx/engines/wikipedia.py
+++ b/searx/engines/wikipedia.py
@@ -12,7 +12,6 @@
from json import loads
from urllib import urlencode, quote
-from requests import get
from lxml.html import fromstring
@@ -119,10 +118,9 @@ def response(resp):
# get supported languages from their site
-def fetch_supported_languages():
+def _fetch_supported_languages(resp):
supported_languages = {}
- response = get(supported_languages_url)
- dom = fromstring(response.text)
+ dom = fromstring(resp.text)
tables = dom.xpath('//table[contains(@class,"sortable")]')
for table in tables:
# exclude header row
diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py
index db10c8939..5c62c2ed8 100644
--- a/searx/engines/yahoo.py
+++ b/searx/engines/yahoo.py
@@ -14,7 +14,6 @@
from urllib import urlencode
from urlparse import unquote
from lxml import html
-from requests import get
from searx.engines.xpath import extract_text, extract_url
# engine dependent config
@@ -144,13 +143,12 @@ def response(resp):
# get supported languages from their site
-def fetch_supported_languages():
+def _fetch_supported_languages(resp):
supported_languages = []
- response = get(supported_languages_url)
- dom = html.fromstring(response.text)
+ dom = html.fromstring(resp.text)
options = dom.xpath('//div[@id="yschlang"]/span/label/input')
for option in options:
- code = option.xpath('./@value')[0][5:]
+ code = option.xpath('./@value')[0][5:].replace('_', '-')
supported_languages.append(code)
return supported_languages
diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py
index bc7b5c368..3e4cf02eb 100644
--- a/searx/engines/yahoo_news.py
+++ b/searx/engines/yahoo_news.py
@@ -12,7 +12,7 @@
from urllib import urlencode
from lxml import html
from searx.engines.xpath import extract_text, extract_url
-from searx.engines.yahoo import parse_url, fetch_supported_languages
+from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url
from datetime import datetime, timedelta
import re
from dateutil import parser