summaryrefslogtreecommitdiff
path: root/searx/engines/jisho.py
diff options
context:
space:
mode:
Diffstat (limited to 'searx/engines/jisho.py')
-rw-r--r--searx/engines/jisho.py136
1 files changed, 136 insertions, 0 deletions
diff --git a/searx/engines/jisho.py b/searx/engines/jisho.py
new file mode 100644
index 000000000..87bbe983d
--- /dev/null
+++ b/searx/engines/jisho.py
@@ -0,0 +1,136 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+Jisho (the Japanese-English dictionary)
+"""
+
+from urllib.parse import urlencode, urljoin
+
+# about
+about = {
+ "website": 'https://jisho.org',
+ "wikidata_id": 'Q24568389',
+ "official_api_documentation": "https://jisho.org/forum/54fefc1f6e73340b1f160000-is-there-any-kind-of-search-api",
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": 'JSON',
+ "language": 'ja',
+}
+
+categories = ['dictionaries']
+paging = False
+
+URL = 'https://jisho.org'
+BASE_URL = 'https://jisho.org/word/'
+SEARCH_URL = URL + '/api/v1/search/words?{query}'
+
+
+def request(query, params):
+ query = urlencode({'keyword': query})
+ params['url'] = SEARCH_URL.format(query=query)
+ logger.debug(f"query_url --> {params['url']}")
+ return params
+
+
+def response(resp):
+ results = []
+ first_result = True
+
+ search_results = resp.json()
+
+ for page in search_results.get('data', []):
+ # Entries that are purely from Wikipedia are excluded.
+ parts_of_speech = page.get('senses') and page['senses'][0].get('parts_of_speech')
+ if parts_of_speech and parts_of_speech[0] == 'Wikipedia definition':
+ pass
+
+ # Process alternative forms
+ alt_forms = []
+ for title_raw in page['japanese']:
+ if 'word' not in title_raw:
+ alt_forms.append(title_raw['reading'])
+ else:
+ title = title_raw['word']
+ if 'reading' in title_raw:
+ title += ' (' + title_raw['reading'] + ')'
+ alt_forms.append(title)
+
+ #
+ result_url = urljoin(BASE_URL, page['slug'])
+ definitions = get_definitions(page)
+
+ # For results, we'll return the URL, all alternative forms (as title),
+ # and all definitions (as description) truncated to 300 characters.
+ content = " ".join(f"{engdef}." for _, engdef, _ in definitions)
+ results.append({
+ 'url': result_url,
+ 'title': ", ".join(alt_forms),
+ 'content': content[:300] + (content[300:] and '...')
+ })
+
+ # Like Wordnik, we'll return the first result in an infobox too.
+ if first_result:
+ first_result = False
+ results.append(get_infobox(alt_forms, result_url, definitions))
+
+ return results
+
+
+def get_definitions(page):
+ # Process definitions
+ definitions = []
+ for defn_raw in page['senses']:
+ extra = []
+ # Extra data. Since they're not documented, this implementation is based solely by the author's assumptions.
+ if defn_raw.get('tags'):
+ if defn_raw.get('info'):
+ # "usually written as kana: <kana>"
+ extra.append(defn_raw['tags'][0] + ', ' + defn_raw['info'][0] + '. ')
+ else:
+ # abbreviation, archaism, etc.
+ extra.append(', '.join(defn_raw['tags']) + '. ')
+ elif defn_raw.get('info'):
+ # inconsistent
+ extra.append(', '.join(defn_raw['info']).capitalize() + '. ')
+ if defn_raw.get('restrictions'):
+ extra.append('Only applies to: ' + ', '.join(defn_raw['restrictions']) + '. ')
+ definitions.append((
+ ', '.join(defn_raw['parts_of_speech']),
+ '; '.join(defn_raw['english_definitions']),
+ ''.join(extra)[:-1],
+ ))
+ return definitions
+
+
+def get_infobox(alt_forms, result_url, definitions):
+ infobox_content = []
+ # title & alt_forms
+ infobox_title = alt_forms[0]
+ if len(alt_forms) > 1:
+ infobox_content.append(f'<p><i>Other forms:</i> {", ".join(alt_forms[1:])}</p>')
+
+ # definitions
+ infobox_content.append('''
+ <small><a href="https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project">JMdict</a>
+ and <a href="https://www.edrdg.org/enamdict/enamdict_doc.html">JMnedict</a>
+ by <a href="https://www.edrdg.org/edrdg/licence.html">EDRDG</a>, CC BY-SA 3.0.</small>
+ <ul>
+ ''')
+ for pos, engdef, extra in definitions:
+ if pos == 'Wikipedia definition':
+ infobox_content.append('</ul><small>Wikipedia, CC BY-SA 3.0.</small><ul>')
+ pos = f'<i>{pos}</i>: ' if pos else ''
+ extra = f' ({extra})' if extra else ''
+ infobox_content.append(f'<li>{pos}{engdef}{extra}</li>')
+ infobox_content.append('</ul>')
+
+ #
+ return {
+ 'infobox': infobox_title,
+ 'content': ''.join(infobox_content),
+ 'urls': [
+ {
+ 'title': 'Jisho.org',
+ 'url': result_url,
+ }
+ ]
+ }