diff options
Diffstat (limited to 'searx/engines/jisho.py')
-rw-r--r-- | searx/engines/jisho.py | 136 |
1 files changed, 136 insertions, 0 deletions
diff --git a/searx/engines/jisho.py b/searx/engines/jisho.py new file mode 100644 index 000000000..87bbe983d --- /dev/null +++ b/searx/engines/jisho.py @@ -0,0 +1,136 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +""" +Jisho (the Japanese-English dictionary) +""" + +from urllib.parse import urlencode, urljoin + +# about +about = { + "website": 'https://jisho.org', + "wikidata_id": 'Q24568389', + "official_api_documentation": "https://jisho.org/forum/54fefc1f6e73340b1f160000-is-there-any-kind-of-search-api", + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', + "language": 'ja', +} + +categories = ['dictionaries'] +paging = False + +URL = 'https://jisho.org' +BASE_URL = 'https://jisho.org/word/' +SEARCH_URL = URL + '/api/v1/search/words?{query}' + + +def request(query, params): + query = urlencode({'keyword': query}) + params['url'] = SEARCH_URL.format(query=query) + logger.debug(f"query_url --> {params['url']}") + return params + + +def response(resp): + results = [] + first_result = True + + search_results = resp.json() + + for page in search_results.get('data', []): + # Entries that are purely from Wikipedia are excluded. + parts_of_speech = page.get('senses') and page['senses'][0].get('parts_of_speech') + if parts_of_speech and parts_of_speech[0] == 'Wikipedia definition': + pass + + # Process alternative forms + alt_forms = [] + for title_raw in page['japanese']: + if 'word' not in title_raw: + alt_forms.append(title_raw['reading']) + else: + title = title_raw['word'] + if 'reading' in title_raw: + title += ' (' + title_raw['reading'] + ')' + alt_forms.append(title) + + # + result_url = urljoin(BASE_URL, page['slug']) + definitions = get_definitions(page) + + # For results, we'll return the URL, all alternative forms (as title), + # and all definitions (as description) truncated to 300 characters. + content = " ".join(f"{engdef}." for _, engdef, _ in definitions) + results.append({ + 'url': result_url, + 'title': ", ".join(alt_forms), + 'content': content[:300] + (content[300:] and '...') + }) + + # Like Wordnik, we'll return the first result in an infobox too. + if first_result: + first_result = False + results.append(get_infobox(alt_forms, result_url, definitions)) + + return results + + +def get_definitions(page): + # Process definitions + definitions = [] + for defn_raw in page['senses']: + extra = [] + # Extra data. Since they're not documented, this implementation is based solely by the author's assumptions. + if defn_raw.get('tags'): + if defn_raw.get('info'): + # "usually written as kana: <kana>" + extra.append(defn_raw['tags'][0] + ', ' + defn_raw['info'][0] + '. ') + else: + # abbreviation, archaism, etc. + extra.append(', '.join(defn_raw['tags']) + '. ') + elif defn_raw.get('info'): + # inconsistent + extra.append(', '.join(defn_raw['info']).capitalize() + '. ') + if defn_raw.get('restrictions'): + extra.append('Only applies to: ' + ', '.join(defn_raw['restrictions']) + '. ') + definitions.append(( + ', '.join(defn_raw['parts_of_speech']), + '; '.join(defn_raw['english_definitions']), + ''.join(extra)[:-1], + )) + return definitions + + +def get_infobox(alt_forms, result_url, definitions): + infobox_content = [] + # title & alt_forms + infobox_title = alt_forms[0] + if len(alt_forms) > 1: + infobox_content.append(f'<p><i>Other forms:</i> {", ".join(alt_forms[1:])}</p>') + + # definitions + infobox_content.append(''' + <small><a href="https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project">JMdict</a> + and <a href="https://www.edrdg.org/enamdict/enamdict_doc.html">JMnedict</a> + by <a href="https://www.edrdg.org/edrdg/licence.html">EDRDG</a>, CC BY-SA 3.0.</small> + <ul> + ''') + for pos, engdef, extra in definitions: + if pos == 'Wikipedia definition': + infobox_content.append('</ul><small>Wikipedia, CC BY-SA 3.0.</small><ul>') + pos = f'<i>{pos}</i>: ' if pos else '' + extra = f' ({extra})' if extra else '' + infobox_content.append(f'<li>{pos}{engdef}{extra}</li>') + infobox_content.append('</ul>') + + # + return { + 'infobox': infobox_title, + 'content': ''.join(infobox_content), + 'urls': [ + { + 'title': 'Jisho.org', + 'url': result_url, + } + ] + } |