summaryrefslogtreecommitdiff
path: root/searx/engines/www1x.py
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarIT.de>2019-12-24 17:45:13 +0100
committerGitHub <noreply@github.com>2019-12-24 17:45:13 +0100
commitecb054a7a058a1f62a536e5cac88eed8926b107d (patch)
tree925594876f18580732d2c8a438ff8f3bea8d9092 /searx/engines/www1x.py
parentcc8d4b958e274eb9e154db5c319d2e50da561d61 (diff)
parent5a0a66e9bc34af2b6404231efc7cf02f389bdfcb (diff)
downloadsearxng-ecb054a7a058a1f62a536e5cac88eed8926b107d.tar.gz
searxng-ecb054a7a058a1f62a536e5cac88eed8926b107d.zip
Merge branch 'master' into patch-1
Diffstat (limited to 'searx/engines/www1x.py')
-rw-r--r--searx/engines/www1x.py35
1 files changed, 6 insertions, 29 deletions
diff --git a/searx/engines/www1x.py b/searx/engines/www1x.py
index 508803240..f1154b16d 100644
--- a/searx/engines/www1x.py
+++ b/searx/engines/www1x.py
@@ -11,8 +11,8 @@
"""
from lxml import html
-import re
from searx.url_utils import urlencode, urljoin
+from searx.engines.xpath import extract_text
# engine dependent config
categories = ['images']
@@ -34,41 +34,18 @@ def request(query, params):
def response(resp):
results = []
- # get links from result-text
- regex = re.compile('(</a>|<a)')
- results_parts = re.split(regex, resp.text)
-
- cur_element = ''
-
- # iterate over link parts
- for result_part in results_parts:
+ dom = html.fromstring(resp.text)
+ for res in dom.xpath('//div[@class="List-item MainListing"]'):
# processed start and end of link
- if result_part == '<a':
- cur_element = result_part
- continue
- elif result_part != '</a>':
- cur_element += result_part
- continue
-
- cur_element += result_part
-
- # fix xml-error
- cur_element = cur_element.replace('"></a>', '"/></a>')
-
- dom = html.fromstring(cur_element)
- link = dom.xpath('//a')[0]
+ link = res.xpath('//a')[0]
url = urljoin(base_url, link.attrib.get('href'))
- title = link.attrib.get('title', '')
+ title = extract_text(link)
- thumbnail_src = urljoin(base_url, link.xpath('.//img')[0].attrib['src'])
+ thumbnail_src = urljoin(base_url, res.xpath('.//img')[0].attrib['src'])
# TODO: get image with higher resolution
img_src = thumbnail_src
- # check if url is showing to a photo
- if '/photo/' not in url:
- continue
-
# append result
results.append({'url': url,
'title': title,