summaryrefslogtreecommitdiff
path: root/searx/engines
diff options
context:
space:
mode:
Diffstat (limited to 'searx/engines')
-rw-r--r--searx/engines/__init__.py14
-rw-r--r--searx/engines/ahmia.py82
-rw-r--r--searx/engines/not_evil.py64
-rw-r--r--searx/engines/xpath.py36
4 files changed, 188 insertions, 8 deletions
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index 9cdca47b7..00be89412 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -142,6 +142,17 @@ def load_engine(engine_data):
engine.stats['page_load_time'] = 0
engine.stats['page_load_count'] = 0
+ # tor related settings
+ if settings['outgoing'].get('using_tor_proxy'):
+ # use onion url if using tor.
+ if hasattr(engine, 'onion_url'):
+ engine.search_url = engine.onion_url + getattr(engine, 'search_path', '')
+ elif 'onions' in engine.categories:
+ # exclude onion engines if not using tor.
+ return None
+
+ engine.timeout += settings['outgoing'].get('extra_proxy_timeout', 0)
+
for category_name in engine.categories:
categories.setdefault(category_name, []).append(engine)
@@ -252,8 +263,9 @@ def get_engines_stats(preferences):
def load_engines(engine_list):
- global engines
+ global engines, engine_shortcuts
engines.clear()
+ engine_shortcuts.clear()
for engine_data in engine_list:
engine = load_engine(engine_data)
if engine is not None:
diff --git a/searx/engines/ahmia.py b/searx/engines/ahmia.py
new file mode 100644
index 000000000..d9fcc6ca7
--- /dev/null
+++ b/searx/engines/ahmia.py
@@ -0,0 +1,82 @@
+"""
+ Ahmia (Onions)
+
+ @website http://msydqstlz2kzerdg.onion
+ @provides-api no
+
+ @using-api no
+ @results HTML
+ @stable no
+ @parse url, title, content
+"""
+
+from urllib.parse import urlencode, urlparse, parse_qs
+from lxml.html import fromstring
+from searx.engines.xpath import extract_url, extract_text
+
+# engine config
+categories = ['onions']
+paging = True
+page_size = 10
+
+# search url
+search_url = 'http://msydqstlz2kzerdg.onion/search/?{query}'
+time_range_support = True
+time_range_dict = {'day': 1,
+ 'week': 7,
+ 'month': 30}
+
+# xpaths
+results_xpath = '//li[@class="result"]'
+url_xpath = './h4/a/@href'
+title_xpath = './h4/a[1]'
+content_xpath = './/p[1]'
+correction_xpath = '//*[@id="didYouMean"]//a'
+number_of_results_xpath = '//*[@id="totalResults"]'
+
+
+def request(query, params):
+ params['url'] = search_url.format(query=urlencode({'q': query}))
+
+ if params['time_range'] in time_range_dict:
+ params['url'] += '&' + urlencode({'d': time_range_dict[params['time_range']]})
+
+ return params
+
+
+def response(resp):
+ results = []
+ dom = fromstring(resp.text)
+
+ # trim results so there's not way too many at once
+ first_result_index = page_size * (resp.search_params.get('pageno', 1) - 1)
+ all_results = dom.xpath(results_xpath)
+ trimmed_results = all_results[first_result_index:first_result_index + page_size]
+
+ # get results
+ for result in trimmed_results:
+ # remove ahmia url and extract the actual url for the result
+ raw_url = extract_url(result.xpath(url_xpath), search_url)
+ cleaned_url = parse_qs(urlparse(raw_url).query).get('redirect_url', [''])[0]
+
+ title = extract_text(result.xpath(title_xpath))
+ content = extract_text(result.xpath(content_xpath))
+
+ results.append({'url': cleaned_url,
+ 'title': title,
+ 'content': content,
+ 'is_onion': True})
+
+ # get spelling corrections
+ for correction in dom.xpath(correction_xpath):
+ results.append({'correction': extract_text(correction)})
+
+ # get number of results
+ number_of_results = dom.xpath(number_of_results_xpath)
+ if number_of_results:
+ try:
+ results.append({'number_of_results': int(extract_text(number_of_results))})
+ except:
+ pass
+
+ return results
diff --git a/searx/engines/not_evil.py b/searx/engines/not_evil.py
new file mode 100644
index 000000000..e84f153bd
--- /dev/null
+++ b/searx/engines/not_evil.py
@@ -0,0 +1,64 @@
+"""
+ not Evil (Onions)
+
+ @website http://hss3uro2hsxfogfq.onion
+ @provide-api yes (http://hss3uro2hsxfogfq.onion/api.htm)
+
+ @using-api no
+ @results HTML
+ @stable no
+ @parse url, title, content
+"""
+
+from urllib.parse import urlencode
+from lxml import html
+from searx.engines.xpath import extract_text
+
+# engine dependent config
+categories = ['onions']
+paging = True
+page_size = 20
+
+# search-url
+base_url = 'http://hss3uro2hsxfogfq.onion/'
+search_url = 'index.php?{query}&hostLimit=20&start={pageno}&numRows={page_size}'
+
+# specific xpath variables
+results_xpath = '//*[@id="content"]/div/p'
+url_xpath = './span[1]'
+title_xpath = './a[1]'
+content_xpath = './text()'
+
+
+# do search-request
+def request(query, params):
+ offset = (params['pageno'] - 1) * page_size
+
+ params['url'] = base_url + search_url.format(pageno=offset,
+ query=urlencode({'q': query}),
+ page_size=page_size)
+
+ return params
+
+
+# get response from search-request
+def response(resp):
+ results = []
+
+ # needed because otherwise requests guesses wrong encoding
+ resp.encoding = 'utf8'
+ dom = html.fromstring(resp.text)
+
+ # parse results
+ for result in dom.xpath(results_xpath):
+ url = extract_text(result.xpath(url_xpath)[0])
+ title = extract_text(result.xpath(title_xpath)[0])
+ content = extract_text(result.xpath(content_xpath))
+
+ # append result
+ results.append({'url': url,
+ 'title': title,
+ 'content': content,
+ 'is_onion': True})
+
+ return results
diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py
index a269253d7..81c2747fb 100644
--- a/searx/engines/xpath.py
+++ b/searx/engines/xpath.py
@@ -10,6 +10,8 @@ thumbnail_xpath = False
paging = False
suggestion_xpath = ''
results_xpath = ''
+cached_xpath = ''
+cached_url = ''
# parameters for engines with paging support
#
@@ -36,6 +38,8 @@ def request(query, params):
def response(resp):
results = []
dom = html.fromstring(resp.text)
+ is_onion = True if 'onions' in categories else False
+
if results_xpath:
for result in eval_xpath(dom, results_xpath):
url = extract_url(eval_xpath(result, url_xpath), search_url)
@@ -49,15 +53,33 @@ def response(resp):
if len(thumbnail_xpath_result) > 0:
tmp_result['img_src'] = extract_url(thumbnail_xpath_result, search_url)
+ # add alternative cached url if available
+ if cached_xpath:
+ tmp_result['cached_url'] = cached_url + extract_text(result.xpath(cached_xpath))
+
+ if is_onion:
+ tmp_result['is_onion'] = True
+
results.append(tmp_result)
else:
- for url, title, content in zip(
- (extract_url(x, search_url) for
- x in eval_xpath(dom, url_xpath)),
- map(extract_text, eval_xpath(dom, title_xpath)),
- map(extract_text, eval_xpath(dom, content_xpath))
- ):
- results.append({'url': url, 'title': title, 'content': content})
+ if cached_xpath:
+ for url, title, content, cached in zip(
+ (extract_url(x, search_url) for
+ x in dom.xpath(url_xpath)),
+ map(extract_text, dom.xpath(title_xpath)),
+ map(extract_text, dom.xpath(content_xpath)),
+ map(extract_text, dom.xpath(cached_xpath))
+ ):
+ results.append({'url': url, 'title': title, 'content': content,
+ 'cached_url': cached_url + cached, 'is_onion': is_onion})
+ else:
+ for url, title, content in zip(
+ (extract_url(x, search_url) for
+ x in dom.xpath(url_xpath)),
+ map(extract_text, dom.xpath(title_xpath)),
+ map(extract_text, dom.xpath(content_xpath))
+ ):
+ results.append({'url': url, 'title': title, 'content': content, 'is_onion': is_onion})
if not suggestion_xpath:
return results