summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Pointhuber <thomas.pointhuber@gmx.at>2015-02-01 11:27:28 +0100
committerThomas Pointhuber <thomas.pointhuber@gmx.at>2015-02-01 11:27:28 +0100
commit6042f2bc53d2b6f0d03e6b882db83377b27029be (patch)
treeb9a9a61f8b2c95d06f2120d967c5b6059a71687d
parent78828efdb0ea28efa057dbd82b240af1112f085a (diff)
downloadsearxng-6042f2bc53d2b6f0d03e6b882db83377b27029be.tar.gz
searxng-6042f2bc53d2b6f0d03e6b882db83377b27029be.zip
[enh] add 1x.com engine
* Deacivated by default, because of the big amount of results
-rw-r--r--searx/engines/www1x.py81
-rw-r--r--searx/settings.yml5
2 files changed, 86 insertions, 0 deletions
diff --git a/searx/engines/www1x.py b/searx/engines/www1x.py
new file mode 100644
index 000000000..d10c4ca37
--- /dev/null
+++ b/searx/engines/www1x.py
@@ -0,0 +1,81 @@
+## 1x (Images)
+#
+# @website http://1x.com/
+# @provide-api no
+#
+# @using-api no
+# @results HTML
+# @stable no (HTML can change)
+# @parse url, title, thumbnail, img_src, content
+
+
+from urllib import urlencode
+from urlparse import urljoin
+from lxml import html
+import string
+import re
+
+# engine dependent config
+categories = ['images']
+paging = False
+
+# search-url
+base_url = 'http://1x.com'
+search_url = base_url+'/backend/search.php?{query}'
+
+
+# do search-request
+def request(query, params):
+ params['url'] = search_url.format(query=urlencode({'q': query}))
+
+ return params
+
+
+# get response from search-request
+def response(resp):
+ results = []
+
+ # get links from result-text
+ results_parts = re.split(r'(</a>|<a)', resp.text)
+
+ cur_element = ''
+
+ # iterate over link parts
+ for result_part in results_parts:
+ # processed start and end of link
+ if result_part == '<a':
+ cur_element = result_part
+ continue
+ elif result_part != '</a>':
+ cur_element += result_part
+ continue
+
+ cur_element += result_part
+
+ # fix xml-error
+ cur_element = string.replace(cur_element, '"></a>', '"/></a>')
+
+ dom = html.fromstring(cur_element)
+ link = dom.xpath('//a')[0]
+
+ url = urljoin(base_url, link.attrib.get('href'))
+ title = link.attrib.get('title', '')
+
+ thumbnail_src = urljoin(base_url, link.xpath('.//img')[0].attrib['src'])
+ # TODO: get image with higher resolution
+ img_src = thumbnail_src
+
+ # check if url is showing to a photo
+ if '/photo/' not in url:
+ continue
+
+ # append result
+ results.append({'url': url,
+ 'title': title,
+ 'img_src': img_src,
+ 'content': '',
+ 'thumbnail_src': thumbnail_src,
+ 'template': 'images.html'})
+
+ # return results
+ return results
diff --git a/searx/settings.yml b/searx/settings.yml
index ebae8af62..f4fca8985 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -83,6 +83,11 @@ engines:
engine : www500px
shortcut : px
+ - name : 1x
+ engine : www1x
+ shortcut : 1x
+ disabled : True
+
- name : flickr
categories : images
shortcut : fl