summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKirill Isakov <ukwt@ya.ru>2016-03-25 00:38:48 +0600
committerKirill Isakov <ukwt@ya.ru>2016-03-25 00:38:48 +0600
commit8b7dc2acb9f670ba65e4b98eb310f04e4c212bd8 (patch)
tree7e084da807052c86423804242ef0b0ecd3687028
parentd748b8419ad1ef875f34783bbbcf773ebc4cfb5e (diff)
downloadsearxng-8b7dc2acb9f670ba65e4b98eb310f04e4c212bd8.tar.gz
searxng-8b7dc2acb9f670ba65e4b98eb310f04e4c212bd8.zip
Remove content field from ArchWiki results; reformat code in archlinux.py
Content field in Arch Wiki search results is of no real use, more often than not it contains no usable information and includes too many markup tags which make the text unreadable. It is safe to remove it.
-rw-r--r--searx/engines/archlinux.py17
-rw-r--r--tests/unit/engines/test_archlinux.py17
2 files changed, 13 insertions, 21 deletions
diff --git a/searx/engines/archlinux.py b/searx/engines/archlinux.py
index f12c4328a..84e0d0fba 100644
--- a/searx/engines/archlinux.py
+++ b/searx/engines/archlinux.py
@@ -3,12 +3,12 @@
"""
Arch Linux Wiki
- @website https://wiki.archlinux.org
- @provide-api no (Mediawiki provides API, but Arch Wiki blocks access to it
- @using-api no
- @results HTML
- @stable no (HTML can change)
- @parse url, title, content
+ @website https://wiki.archlinux.org
+ @provide-api no (Mediawiki provides API, but Arch Wiki blocks access to it
+ @using-api no
+ @results HTML
+ @stable no (HTML can change)
+ @parse url, title
"""
from urlparse import urljoin
@@ -26,7 +26,6 @@ base_url = 'https://wiki.archlinux.org'
# xpath queries
xpath_results = '//ul[@class="mw-search-results"]/li'
xpath_link = './/div[@class="mw-search-result-heading"]/a'
-xpath_content = './/div[@class="searchresult"]'
# cut 'en' from 'en_US', 'de' from 'de_CH', and so on
@@ -135,10 +134,8 @@ def response(resp):
link = result.xpath(xpath_link)[0]
href = urljoin(base_url, link.attrib.get('href'))
title = escape(extract_text(link))
- content = escape(extract_text(result.xpath(xpath_content)))
results.append({'url': href,
- 'title': title,
- 'content': content})
+ 'title': title})
return results
diff --git a/tests/unit/engines/test_archlinux.py b/tests/unit/engines/test_archlinux.py
index 66959857a..d0009d63a 100644
--- a/tests/unit/engines/test_archlinux.py
+++ b/tests/unit/engines/test_archlinux.py
@@ -18,7 +18,7 @@ class TestArchLinuxEngine(SearxTestCase):
def test_request(self):
query = 'test_query'
dic = defaultdict(dict)
- dic['pageno'] = 0
+ dic['pageno'] = 1
dic['language'] = 'en_US'
params = archlinux.request(query, dic)
self.assertTrue('url' in params)
@@ -31,10 +31,8 @@ class TestArchLinuxEngine(SearxTestCase):
self.assertTrue(domain in params['url'])
def test_response(self):
- response = mock.Mock(text='<html></html>')
- response.search_params = {
- 'language': 'en_US'
- }
+ response = mock.Mock(text='<html></html>',
+ search_params={'language': 'en_US'})
self.assertEqual(archlinux.response(response), [])
html = """
@@ -79,18 +77,15 @@ class TestArchLinuxEngine(SearxTestCase):
expected = [
{
'title': 'ATI',
- 'url': 'https://wiki.archlinux.org/index.php/ATI',
- 'content': 'Lorem ipsum dolor sit amet'
+ 'url': 'https://wiki.archlinux.org/index.php/ATI'
},
{
'title': 'Frequently asked questions',
- 'url': 'https://wiki.archlinux.org/index.php/Frequently_asked_questions',
- 'content': 'CPUs with AMDs instruction set "AMD64"'
+ 'url': 'https://wiki.archlinux.org/index.php/Frequently_asked_questions'
},
{
'title': 'CPU frequency scaling',
- 'url': 'https://wiki.archlinux.org/index.php/CPU_frequency_scaling',
- 'content': 'ondemand for AMD and older Intel CPU'
+ 'url': 'https://wiki.archlinux.org/index.php/CPU_frequency_scaling'
}
]