summaryrefslogtreecommitdiff
path: root/searx
diff options
context:
space:
mode:
authorAlexandre Flament <alex@al-f.net>2021-04-19 09:35:12 +0200
committerAlexandre Flament <alex@al-f.net>2021-04-19 09:35:12 +0200
commit48720e20a8acc07d8614bfbf503c254acd604836 (patch)
tree23b141503745e4582243648842e45e4e36079edc /searx
parent01cefffbf6efa8a027e0e7d720970fffadb6337a (diff)
parent8362257b9ad446fff270559fd15eb20e6a7b45b3 (diff)
downloadsearxng-48720e20a8acc07d8614bfbf503c254acd604836.tar.gz
searxng-48720e20a8acc07d8614bfbf503c254acd604836.zip
Merge remote-tracking branch 'searx/master'
Diffstat (limited to 'searx')
-rw-r--r--searx/engines/bandcamp.py73
-rw-r--r--searx/engines/google.py2
-rw-r--r--searx/engines/sjp.py92
-rw-r--r--searx/settings.yml13
-rw-r--r--searx/static/themes/oscar/img/icons/bandcamp.pngbin0 -> 919 bytes
-rw-r--r--searx/templates/oscar/result_templates/default.html4
6 files changed, 181 insertions, 3 deletions
diff --git a/searx/engines/bandcamp.py b/searx/engines/bandcamp.py
new file mode 100644
index 000000000..dafb3ee16
--- /dev/null
+++ b/searx/engines/bandcamp.py
@@ -0,0 +1,73 @@
+"""
+Bandcamp (Music)
+
+@website https://bandcamp.com/
+@provide-api no
+@results HTML
+@parse url, title, content, publishedDate, embedded, thumbnail
+"""
+
+from urllib.parse import urlencode, urlparse, parse_qs
+from dateutil.parser import parse as dateparse
+from lxml import html
+from searx.utils import extract_text
+
+categories = ['music']
+paging = True
+
+base_url = "https://bandcamp.com/"
+search_string = search_string = 'search?{query}&page={page}'
+embedded_url = '''<iframe width="100%" height="166"
+ scrolling="no" frameborder="no"
+ data-src="https://bandcamp.com/EmbeddedPlayer/{type}={result_id}/size=large/bgcol=ffffff/linkcol=0687f5/tracklist=false/artwork=small/transparent=true/"
+></iframe>'''
+
+
+def request(query, params):
+ '''pre-request callback
+ params<dict>:
+ method : POST/GET
+ headers : {}
+ data : {} # if method == POST
+ url : ''
+ category: 'search category'
+ pageno : 1 # number of the requested page
+ '''
+
+ search_path = search_string.format(
+ query=urlencode({'q': query}),
+ page=params['pageno'])
+
+ params['url'] = base_url + search_path
+
+ return params
+
+
+def response(resp):
+ '''post-response callback
+ resp: requests response object
+ '''
+ results = []
+ tree = html.fromstring(resp.text)
+ search_results = tree.xpath('//li[contains(@class, "searchresult")]')
+ for result in search_results:
+ link = result.xpath('.//div[@class="itemurl"]/a')[0]
+ result_id = parse_qs(urlparse(link.get('href')).query)["search_item_id"][0]
+ title = result.xpath('.//div[@class="heading"]/a/text()')
+ date = dateparse(result.xpath('//div[@class="released"]/text()')[0].replace("released ", ""))
+ content = result.xpath('.//div[@class="subhead"]/text()')
+ new_result = {
+ "url": extract_text(link),
+ "title": extract_text(title),
+ "content": extract_text(content),
+ "publishedDate": date,
+ }
+ thumbnail = result.xpath('.//div[@class="art"]/img/@src')
+ if thumbnail:
+ new_result['thumbnail'] = thumbnail[0]
+ if "album" in result.classes:
+ new_result["embedded"] = embedded_url.format(type='album', result_id=result_id)
+ elif "track" in result.classes:
+ new_result["embedded"] = embedded_url.format(type='track', result_id=result_id)
+ results.append(new_result)
+ return results
diff --git a/searx/engines/google.py b/searx/engines/google.py
index dcb65df57..a4aee5c20 100644
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -242,7 +242,7 @@ def response(resp):
if answer:
results.append({'answer': ' '.join(answer)})
else:
- logger.debug("did not found 'answer'")
+ logger.debug("did not find 'answer'")
# results --> number_of_results
try:
diff --git a/searx/engines/sjp.py b/searx/engines/sjp.py
new file mode 100644
index 000000000..eff7b7092
--- /dev/null
+++ b/searx/engines/sjp.py
@@ -0,0 +1,92 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Słownik Języka Polskiego (general)
+
+"""
+
+from lxml.html import fromstring
+from searx import logger
+from searx.utils import extract_text
+from searx.raise_for_httperror import raise_for_httperror
+
+logger = logger.getChild('sjp engine')
+
+# about
+about = {
+ "website": 'https://sjp.pwn.pl',
+ "wikidata_id": 'Q55117369',
+ "official_api_documentation": None,
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
+categories = ['general']
+paging = False
+
+URL = 'https://sjp.pwn.pl'
+SEARCH_URL = URL + '/szukaj/{query}.html'
+
+word_xpath = '//div[@class="query"]'
+dict_xpath = ['//div[@class="wyniki sjp-so-wyniki sjp-so-anchor"]',
+ '//div[@class="wyniki sjp-wyniki sjp-anchor"]',
+ '//div[@class="wyniki sjp-doroszewski-wyniki sjp-doroszewski-anchor"]']
+
+
+def request(query, params):
+ params['url'] = SEARCH_URL.format(query=query)
+ logger.debug(f"query_url --> {params['url']}")
+ return params
+
+
+def response(resp):
+ results = []
+
+ raise_for_httperror(resp)
+ dom = fromstring(resp.text)
+ word = extract_text(dom.xpath(word_xpath))
+
+ definitions = []
+
+ for dict_src in dict_xpath:
+ for src in dom.xpath(dict_src):
+ src_text = extract_text(src.xpath('.//span[@class="entry-head-title"]/text()')).strip()
+
+ src_defs = []
+ for def_item in src.xpath('.//div[contains(@class, "ribbon-element")]'):
+ if def_item.xpath('./div[@class="znacz"]'):
+ sub_defs = []
+ for def_sub_item in def_item.xpath('./div[@class="znacz"]'):
+ def_sub_text = extract_text(def_sub_item).lstrip('0123456789. ')
+ sub_defs.append(def_sub_text)
+ src_defs.append((word, sub_defs))
+ else:
+ def_text = extract_text(def_item).strip()
+ def_link = def_item.xpath('./span/a/@href')
+ if 'doroszewski' in def_link[0]:
+ def_text = f"<a href='{def_link[0]}'>{def_text}</a>"
+ src_defs.append((def_text, ''))
+
+ definitions.append((src_text, src_defs))
+
+ if not definitions:
+ return results
+
+ infobox = ''
+ for src in definitions:
+ infobox += f"<div><small>{src[0]}</small>"
+ infobox += "<ul>"
+ for (def_text, sub_def) in src[1]:
+ infobox += f"<li>{def_text}</li>"
+ if sub_def:
+ infobox += "<ol>"
+ for sub_def_text in sub_def:
+ infobox += f"<li>{sub_def_text}</li>"
+ infobox += "</ol>"
+ infobox += "</ul></div>"
+
+ results.append({
+ 'infobox': word,
+ 'content': infobox,
+ })
+
+ return results
diff --git a/searx/settings.yml b/searx/settings.yml
index 60e6dd5aa..1940739bc 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -196,6 +196,11 @@ engines:
# engine : base
# shortcut : bs
+ - name: bandcamp
+ engine: bandcamp
+ shortcut: bc
+ categories: music
+
- name : wikipedia
engine : wikipedia
shortcut : wp
@@ -1280,6 +1285,14 @@ engines:
timeout: 5.0
disabled: True
+ - name: słownik języka polskiego
+ engine: sjp
+ shortcut: sjp
+ base_url: https://sjp.pwn.pl/
+ categories: general
+ timeout: 5.0
+ disabled: True
+
# Doku engine lets you access to any Doku wiki instance:
# A public one or a privete/corporate one.
# - name : ubuntuwiki
diff --git a/searx/static/themes/oscar/img/icons/bandcamp.png b/searx/static/themes/oscar/img/icons/bandcamp.png
new file mode 100644
index 000000000..2de405afe
--- /dev/null
+++ b/searx/static/themes/oscar/img/icons/bandcamp.png
Binary files differ
diff --git a/searx/templates/oscar/result_templates/default.html b/searx/templates/oscar/result_templates/default.html
index d743f928e..53cfee5cb 100644
--- a/searx/templates/oscar/result_templates/default.html
+++ b/searx/templates/oscar/result_templates/default.html
@@ -13,10 +13,10 @@
</div>
{%- endif -%}
-{%- if result.img_src -%}
+{%- if result.img_src or result.thumbnail -%}
<div class="container-fluid">{{- "" -}}
<div class="row">{{- "" -}}
- <img src="{{ image_proxify(result.img_src) }}" title="{{ result.title|striptags }}" style="width: auto; max-height: 60px; min-height: 60px;" class="col-xs-2 col-sm-4 col-md-4 result-content">
+ <img src="{{ image_proxify(result.img_src or result.thumbnail) }}" title="{{ result.title|striptags }}" style="width: auto; max-height: 60px; min-height: 60px;" class="col-xs-2 col-sm-4 col-md-4 result-content">
{%- if result.content %}<p class="result-content col-xs-8 col-sm-8 col-md-8">{{ result.content|safe }}</p>{% endif -%}
</div>{{- "" -}}
</div>