diff options
author | Alexandre Flament <alex@al-f.net> | 2020-12-07 20:24:47 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-12-07 20:24:47 +0100 |
commit | 62073c0e1dcb41a9b6f01fa5321baaca291adbaa (patch) | |
tree | 3489796cdf19f732b84585b10dd00296be036d59 /searx/engines/www1x.py | |
parent | 923bc02c17a657fbcd332c7b6f2655176a34fd43 (diff) | |
parent | 4ec810749b45c02462a9be98090374b759db8151 (diff) | |
download | searxng-62073c0e1dcb41a9b6f01fa5321baaca291adbaa.tar.gz searxng-62073c0e1dcb41a9b6f01fa5321baaca291adbaa.zip |
Merge pull request #2361 from dalf/fix-1x
[fix] 1x engine
Diffstat (limited to 'searx/engines/www1x.py')
-rw-r--r-- | searx/engines/www1x.py | 24 |
1 files changed, 10 insertions, 14 deletions
diff --git a/searx/engines/www1x.py b/searx/engines/www1x.py index 8d691c852..b8f111a50 100644 --- a/searx/engines/www1x.py +++ b/searx/engines/www1x.py @@ -7,12 +7,12 @@ @using-api no @results HTML @stable no (HTML can change) - @parse url, title, thumbnail, img_src, content + @parse url, title, thumbnail """ -from lxml import html +from lxml import html, etree from urllib.parse import urlencode, urljoin -from searx.utils import extract_text +from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex # engine dependent config categories = ['images'] @@ -21,6 +21,7 @@ paging = False # search-url base_url = 'https://1x.com' search_url = base_url + '/backend/search.php?{query}' +gallery_url = 'https://gallery.1x.com/' # do search-request @@ -33,23 +34,18 @@ def request(query, params): # get response from search-request def response(resp): results = [] - - dom = html.fromstring(resp.text) - for res in dom.xpath('//div[@class="List-item MainListing"]'): - # processed start and end of link - link = res.xpath('//a')[0] - + xmldom = etree.fromstring(resp.content) + xmlsearchresult = eval_xpath_getindex(xmldom, '//searchresult', 0) + dom = html.fragment_fromstring(xmlsearchresult.text, create_parent='div') + for link in eval_xpath_list(dom, '/div/table/tr/td/div[2]//a'): url = urljoin(base_url, link.attrib.get('href')) title = extract_text(link) - - thumbnail_src = urljoin(base_url, res.xpath('.//img')[0].attrib['src']) - # TODO: get image with higher resolution - img_src = thumbnail_src + thumbnail_src = urljoin(gallery_url, eval_xpath_getindex(link, './/img', 0).attrib['src']) # append result results.append({'url': url, 'title': title, - 'img_src': img_src, + 'img_src': thumbnail_src, 'content': '', 'thumbnail_src': thumbnail_src, 'template': 'images.html'}) |