Merge pull request #2735 from plague-doctor/wordnik

Add new engine: Wordnik.com
author: Noémi Ványi <kvch@users.noreply.github.com> 2021-04-08 19:48:13 +0200
committer: GitHub <noreply@github.com> 2021-04-08 19:48:13 +0200
commit: cc359345a879297c1db116bb178f457d967c0744 (patch)
tree: 867547f4dce13e7974231d6b924f6cf188744955 /searx
parent: a9a51ceb4887ef3d012cb5645d2cd6e6432c0cd2 (diff)
parent: 7035bed4ee2c8aa40c80b41e11cc538583e2b2de (diff)
download: searxng-cc359345a879297c1db116bb178f457d967c0744.tar.gz
searxng-cc359345a879297c1db116bb178f457d967c0744.zip
2 files changed, 85 insertions, 0 deletions
diff --git a/searx/engines/wordnik.py b/searx/engines/wordnik.py
new file mode 100644
index 000000000..3abe9efa2
--- /dev/null
+++ b/searx/engines/wordnik.py
@@ -0,0 +1,77 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Wordnik (general)
+
+"""
+
+from lxml.html import fromstring
+from searx import logger
+from searx.utils import extract_text
+from searx.raise_for_httperror import raise_for_httperror
+
+logger = logger.getChild('Wordnik engine')
+
+# about
+about = {
+    "website": 'https://www.wordnik.com',
+    "wikidata_id": 'Q8034401',
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
+categories = ['general']
+paging = False
+
+URL = 'https://www.wordnik.com'
+SEARCH_URL = URL + '/words/{query}'
+
+
+def request(query, params):
+    params['url'] = SEARCH_URL.format(query=query)
+    logger.debug(f"query_url --> {params['url']}")
+    return params
+
+
+def response(resp):
+    results = []
+
+    raise_for_httperror(resp)
+    dom = fromstring(resp.text)
+    word = extract_text(dom.xpath('//*[@id="headword"]/text()'))
+
+    definitions = []
+    for src in dom.xpath('//*[@id="define"]//h3[@class="source"]'):
+        src_text = extract_text(src).strip()
+        if src_text.startswith('from '):
+            src_text = src_text[5:]
+
+        src_defs = []
+        for def_item in src.xpath('following-sibling::ul[1]/li'):
+            def_abbr = extract_text(def_item.xpath('.//abbr')).strip()
+            def_text = extract_text(def_item).strip()
+            if def_abbr:
+                def_text = def_text[len(def_abbr):].strip()
+            src_defs.append((def_abbr, def_text))
+
+        definitions.append((src_text, src_defs))
+
+    if not definitions:
+        return results
+
+    infobox = ''
+    for src_text, src_defs in definitions:
+        infobox += f"<small>{src_text}</small>"
+        infobox += "<ul>"
+        for def_abbr, def_text in src_defs:
+            if def_abbr:
+                def_abbr += ": "
+            infobox += f"<li><i>{def_abbr}</i> {def_text}</li>"
+        infobox += "</ul>"
+
+    results.append({
+        'infobox': word,
+        'content': infobox,
+    })
+
+    return results
diff --git a/searx/settings.yml b/searx/settings.yml
index 3428b2ec5..c289cde5c 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -1271,6 +1271,14 @@ engines:
     categories: videos
     disabled : True
 
+  - name: wordnik
+    engine: wordnik
+    shortcut: def
+    base_url: https://www.wordnik.com/
+    categories: general
+    timeout: 5.0
+    disabled: True
+
 # Doku engine lets you access to any Doku wiki instance:
 # A public one or a privete/corporate one.
 #  - name : ubuntuwiki
author	Noémi Ványi <kvch@users.noreply.github.com>	2021-04-08 19:48:13 +0200
committer	GitHub <noreply@github.com>	2021-04-08 19:48:13 +0200
commit	cc359345a879297c1db116bb178f457d967c0744 (patch)
tree	867547f4dce13e7974231d6b924f6cf188744955 /searx
parent	a9a51ceb4887ef3d012cb5645d2cd6e6432c0cd2 (diff)
parent	7035bed4ee2c8aa40c80b41e11cc538583e2b2de (diff)
download	searxng-cc359345a879297c1db116bb178f457d967c0744.tar.gz searxng-cc359345a879297c1db116bb178f457d967c0744.zip