summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGuilhem Bonnefille <guilhem.bonnefille@gmail.com>2016-04-03 22:03:41 +0200
committerGuilhem Bonnefille <guilhem.bonnefille@gmail.com>2016-04-03 22:03:41 +0200
commitcf09b500f35fd1bca3fc9cc853bd7ea932220e4e (patch)
treebc0b56378caed891646d8d35457d1871edb409bd
parent5cbe4c53329a1fd2b949660fda25ff7a4ce6f254 (diff)
downloadsearxng-cf09b500f35fd1bca3fc9cc853bd7ea932220e4e.tar.gz
searxng-cf09b500f35fd1bca3fc9cc853bd7ea932220e4e.zip
Add support for dokuwiki engine
-rw-r--r--searx/engines/doku.py83
-rw-r--r--tests/unit/engines/test_doku.py86
2 files changed, 169 insertions, 0 deletions
diff --git a/searx/engines/doku.py b/searx/engines/doku.py
new file mode 100644
index 000000000..18abe75e5
--- /dev/null
+++ b/searx/engines/doku.py
@@ -0,0 +1,83 @@
+# Doku Wiki
+#
+# @website https://www.dokuwiki.org/
+# @provide-api yes
+# (https://www.dokuwiki.org/devel:xmlrpc)
+#
+# @using-api no
+# @results HTML
+# @stable yes
+# @parse (general) url, title, content
+
+from urllib import urlencode
+from lxml.html import fromstring
+from searx.engines.xpath import extract_text
+
+# engine dependent config
+categories = ['general'] # TODO , 'images', 'music', 'videos', 'files'
+paging = False
+language_support = False
+number_of_results = 5
+
+# search-url
+# Doku is OpenSearch compatible
+base_url = 'http://localhost:8090'
+search_url = '/?do=search'\
+ '&id={query}'
+# TODO '&startRecord={offset}'\
+# TODO '&maximumRecords={limit}'\
+
+# do search-request
+def request(query, params):
+
+ params['url'] = base_url +\
+ search_url.format(query=urlencode({'query': query}))
+
+ return params
+
+
+# get response from search-request
+def response(resp):
+ results = []
+
+ doc = fromstring(resp.text)
+
+ # parse results
+ # Quickhits
+ for r in doc.xpath('//div[@class="search_quickresult"]/ul/li'):
+ try:
+ res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1]
+ except:
+ continue
+
+ if not res_url:
+ continue
+
+ title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title'))
+
+ # append result
+ results.append({'title': title,
+ 'content': "",
+ 'url': base_url + res_url})
+
+ # Search results
+ for r in doc.xpath('//dl[@class="search_results"]/*'):
+ try:
+ if r.tag == "dt":
+ res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1]
+ title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title'))
+ elif r.tag == "dd":
+ content = extract_text(r.xpath('.'))
+
+ # append result
+ results.append({'title': title,
+ 'content': content,
+ 'url': base_url + res_url})
+ except:
+ continue
+
+ if not res_url:
+ continue
+
+ # return results
+ return results
diff --git a/tests/unit/engines/test_doku.py b/tests/unit/engines/test_doku.py
new file mode 100644
index 000000000..331671eeb
--- /dev/null
+++ b/tests/unit/engines/test_doku.py
@@ -0,0 +1,86 @@
+# -*- coding: utf-8 -*-
+from collections import defaultdict
+import mock
+from searx.engines import doku
+from searx.testing import SearxTestCase
+
+
+class TestDokuEngine(SearxTestCase):
+
+ def test_request(self):
+ query = 'test_query'
+ dicto = defaultdict(dict)
+ params = doku.request(query, dicto)
+ self.assertIn('url', params)
+ self.assertIn(query, params['url'])
+
+ def test_response(self):
+ self.assertRaises(AttributeError, doku.response, None)
+ self.assertRaises(AttributeError, doku.response, [])
+ self.assertRaises(AttributeError, doku.response, '')
+ self.assertRaises(AttributeError, doku.response, '[]')
+
+ response = mock.Mock(text='<html></html>')
+ self.assertEqual(doku.response(response), [])
+
+ html = u"""
+ <div class="search_quickresult">
+ <h3>Pages trouvées :</h3>
+ <ul class="search_quickhits">
+ <li> <a href="/xfconf-query" class="wikilink1" title="xfconf-query">xfconf-query</a></li>
+ </ul>
+ <div class="clearer"></div>
+ </div>
+ """
+ response = mock.Mock(text=html)
+ results = doku.response(response)
+ self.assertEqual(doku.response(response), [{'content': '', 'title': 'xfconf-query', 'url': 'http://localhost:8090/xfconf-query'}])
+
+ html = u"""
+ <dl class="search_results">
+ <dt><a href="/xvnc?s[]=query" class="wikilink1" title="xvnc">xvnc</a>: 40 Occurrences trouvées</dt>
+ <dd>er = /usr/bin/Xvnc
+ server_args = -inetd -<strong class="search_hit">query</strong> localhost -once -geometry 640x480 -depth 8 -Secur... er = /usr/bin/Xvnc
+ server_args = -inetd -<strong class="search_hit">query</strong> localhost -once -geometry 800x600 -depth 8 -Secur... er = /usr/bin/Xvnc
+ server_args = -inetd -<strong class="search_hit">query</strong> localhost -once -geometry 1024x768 -depth 8 -Secu... er = /usr/bin/Xvnc
+ server_args = -inetd -<strong class="search_hit">query</strong> localhost -once -geometry 1280x1024 -depth 8 -Sec</dd>
+ <dt><a href="/postfix_mysql_tls_sasl_1404?s[]=query" class="wikilink1" title="postfix_mysql_tls_sasl_1404">postfix_mysql_tls_sasl_1404</a>: 14 Occurrences trouvées</dt>
+ <dd>tdepasse
+ hosts = 127.0.0.1
+ dbname = postfix
+ <strong class="search_hit">query</strong> = SELECT goto FROM alias WHERE address='%s' AND a... tdepasse
+ hosts = 127.0.0.1
+ dbname = postfix
+ <strong class="search_hit">query</strong> = SELECT domain FROM domain WHERE domain='%s'
+ #optional <strong class="search_hit">query</strong> to use when relaying for backup MX
+ #<strong class="search_hit">query</strong> = SELECT domain FROM domain WHERE domain='%s' and backupmx =</dd><dt><a href="/tutoriel/comment_creer_un_terminal_x_ou_recycler_une_vieille_machine?s[]=query" class="wikilink1" title="tutoriel:comment_creer_un_terminal_x_ou_recycler_une_vieille_machine">tutoriel:comment_creer_un_terminal_x_ou_recycler_une_vieille_machine</a>: 13 Occurrences trouvées</dt><dd>z gdm (ubuntu) tapez sudo /etc/init.d/gdm stop
+X -<strong class="search_hit">query</strong> 192.168.1.2
+&lt;/code&gt;
+:)
+Si vous désirez, sur la mê... ans une console (tjs sur le vieil ordi)
+&lt;code&gt;
+X -<strong class="search_hit">query</strong> 192.168.1.2 :1
+&lt;/code&gt;
+Un écran de login devrait ... ure.
+&lt;note tip&gt;Rajouter "-once" à la commande "X -<strong class="search_hit">query</strong> 192.168.1.2 :1" permet de quitter la session et r... d'une ubuntu/kubuntu\\
+Testez d'abord que le //X -<strong class="search_hit">query</strong> ...// fonctionne, dans une console (CTRL-ALT-F1) </dd>
+ <dt><a href="/bind9?s[]=query" class="wikilink1" title="bind9">bind9</a>: 12 Occurrences trouvées</dt>
+ <dd> printcmd
+;; Got answer:
+;; -&gt;&gt;HEADER&lt;&lt;- opcode: <strong class="search_hit">QUERY</strong>, status: NOERROR, id: 13427
+;; flags: qr aa rd ra; <strong class="search_hit">QUERY</strong>: 1, ANSWER: 1, AUTHORITY: 1, ADDITIONAL: 1
+
+[...]
+
+;; <strong class="search_hit">Query</strong> time: 1 msec
+;; SERVER: 127.0.0.1#53(127.0.0.1)
+;... ne énorme diminution du temps mis par la requête (<strong class="search_hit">Query</strong> time) , entre la première et la deuxième requête.</dd>
+ </dl>
+ """
+ response = mock.Mock(text=html)
+ results = doku.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 4)
+ self.assertEqual(results[0]['title'], 'xvnc')
+# FIXME self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű')
+# FIXME self.assertEqual(results[0]['content'], 'This should be the content.')