diff options
author | Austin Huang <im@austinhuang.me> | 2022-04-01 09:18:19 -0400 |
---|---|---|
committer | Austin Huang <im@austinhuang.me> | 2022-04-01 09:18:19 -0400 |
commit | a399248f56e6975c78f617defc5ce7df2f62a828 (patch) | |
tree | e5fb515653299791e77dfbbea5b189a75bf31193 /searx/engines/jisho.py | |
parent | 934ae4e086a26d1c9c8d25946b43789e55696478 (diff) | |
download | searxng-a399248f56e6975c78f617defc5ce7df2f62a828.tar.gz searxng-a399248f56e6975c78f617defc5ce7df2f62a828.zip |
update jisho.py according to suggestions
Diffstat (limited to 'searx/engines/jisho.py')
-rw-r--r-- | searx/engines/jisho.py | 165 |
1 files changed, 84 insertions, 81 deletions
diff --git a/searx/engines/jisho.py b/searx/engines/jisho.py index 6fab054e0..c1324635b 100644 --- a/searx/engines/jisho.py +++ b/searx/engines/jisho.py @@ -14,9 +14,11 @@ about = { "use_official_api": True, "require_api_key": False, "results": 'JSON', + "language": 'ja', } categories = ['dictionaries'] +engine_type = 'online_dictionary' paging = False URL = 'https://jisho.org' @@ -35,91 +37,92 @@ def response(resp): results = [] infoboxed = False - search_results = json.loads(resp.text) + search_results = resp.json() pages = search_results.get('data', []) for page in pages: # Entries that are purely from Wikipedia are excluded. - if page['senses'][0]['parts_of_speech'][0] != 'Wikipedia definition': - # Process alternative forms - japanese = page['japanese'] - alt_forms = [] - for title_raw in japanese: - if 'word' not in title_raw: - alt_forms.append(title_raw['reading']) - else: - title = title_raw['word'] - if 'reading' in title_raw: - title += ' (' + title_raw['reading'] + ')' - alt_forms.append(title) - # Process definitions - definitions = [] - def_raw = page['senses'] - for defn_raw in def_raw: - extra = '' - if not infoboxed: - # Extra data. Since they're not documented, this implementation is based solely by the author's assumptions. - if defn_raw['tags'] != []: - if defn_raw['info'] != []: - extra += defn_raw['tags'][0] + ', ' + defn_raw['info'][0] + '. ' # "usually written as kana: <kana>" - else: - extra += ', '.join(defn_raw['tags']) + '. ' # abbreviation, archaism, etc. - elif defn_raw['info'] != []: - extra += ', '.join(defn_raw['info']).capitalize() + '. ' # inconsistent - if defn_raw['restrictions'] != []: - extra += 'Only applies to: ' + ', '.join(defn_raw['restrictions']) + '. ' - extra = extra[:-1] - definitions.append(( - ', '.join(defn_raw['parts_of_speech']), - '; '.join(defn_raw['english_definitions']), - extra - )) - content = '' - infobox_content = ''' - <small><a href="https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project">JMdict</a> - and <a href="https://www.edrdg.org/enamdict/enamdict_doc.html">JMnedict</a> - by <a href="https://www.edrdg.org/edrdg/licence.html">EDRDG</a>, CC BY-SA 3.0.</small><ul> - ''' - for pos, engdef, extra in definitions: - if pos == 'Wikipedia definition': - infobox_content += '</ul><small>Wikipedia, CC BY-SA 3.0.</small><ul>' - if pos == '': - infobox_content += f"<li>{engdef}" - else: - infobox_content += f"<li><i>{pos}</i>: {engdef}" - if extra != '': - infobox_content += f" ({extra})" - infobox_content += '</li>' - content += f"{engdef}. " - infobox_content += '</ul>' - - # For results, we'll return the URL, all alternative forms (as title), - # and all definitions (as description) truncated to 300 characters. - results.append({ - 'url': urljoin(BASE_URL, page['slug']), - 'title': ", ".join(alt_forms), - 'content': content[:300] + (content[300:] and '...') - }) - - # Like Wordnik, we'll return the first result in an infobox too. + if page['senses'][0]['parts_of_speech'] != [] and page['senses'][0]['parts_of_speech'][0] == 'Wikipedia definition': + pass + # Process alternative forms + japanese = page['japanese'] + alt_forms = [] + for title_raw in japanese: + if 'word' not in title_raw: + alt_forms.append(title_raw['reading']) + else: + title = title_raw['word'] + if 'reading' in title_raw: + title += ' (' + title_raw['reading'] + ')' + alt_forms.append(title) + # Process definitions + definitions = [] + def_raw = page['senses'] + for defn_raw in def_raw: + extra = '' if not infoboxed: - infoboxed = True - infobox_urls = [] - infobox_urls.append({ - 'title': 'Jisho.org', - 'url': urljoin(BASE_URL, page['slug']) - }) - infobox = { - 'infobox': alt_forms[0], - 'urls': infobox_urls - } - alt_forms.pop(0) - alt_content = '' - if len(alt_forms) > 0: - alt_content = '<p><i>Other forms:</i> ' - alt_content += ", ".join(alt_forms) - alt_content += '</p>' - infobox['content'] = alt_content + infobox_content - results.append(infobox) + # Extra data. Since they're not documented, this implementation is based solely by the author's assumptions. + if defn_raw['tags'] != []: + if defn_raw['info'] != []: + extra += defn_raw['tags'][0] + ', ' + defn_raw['info'][0] + '. ' # "usually written as kana: <kana>" + else: + extra += ', '.join(defn_raw['tags']) + '. ' # abbreviation, archaism, etc. + elif defn_raw['info'] != []: + extra += ', '.join(defn_raw['info']).capitalize() + '. ' # inconsistent + if defn_raw['restrictions'] != []: + extra += 'Only applies to: ' + ', '.join(defn_raw['restrictions']) + '. ' + extra = extra[:-1] + definitions.append(( + ', '.join(defn_raw['parts_of_speech']), + '; '.join(defn_raw['english_definitions']), + extra + )) + content = '' + infobox_content = ''' + <small><a href="https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project">JMdict</a> + and <a href="https://www.edrdg.org/enamdict/enamdict_doc.html">JMnedict</a> + by <a href="https://www.edrdg.org/edrdg/licence.html">EDRDG</a>, CC BY-SA 3.0.</small><ul> + ''' + for pos, engdef, extra in definitions: + if pos == 'Wikipedia definition': + infobox_content += '</ul><small>Wikipedia, CC BY-SA 3.0.</small><ul>' + if pos == '': + infobox_content += f"<li>{engdef}" + else: + infobox_content += f"<li><i>{pos}</i>: {engdef}" + if extra != '': + infobox_content += f" ({extra})" + infobox_content += '</li>' + content += f"{engdef}. " + infobox_content += '</ul>' + + # For results, we'll return the URL, all alternative forms (as title), + # and all definitions (as description) truncated to 300 characters. + results.append({ + 'url': urljoin(BASE_URL, page['slug']), + 'title': ", ".join(alt_forms), + 'content': content[:300] + (content[300:] and '...') + }) + + # Like Wordnik, we'll return the first result in an infobox too. + if not infoboxed: + infoboxed = True + infobox_urls = [] + infobox_urls.append({ + 'title': 'Jisho.org', + 'url': urljoin(BASE_URL, page['slug']) + }) + infobox = { + 'infobox': alt_forms[0], + 'urls': infobox_urls + } + alt_forms.pop(0) + alt_content = '' + if len(alt_forms) > 0: + alt_content = '<p><i>Other forms:</i> ' + alt_content += ", ".join(alt_forms) + alt_content += '</p>' + infobox['content'] = alt_content + infobox_content + results.append(infobox) return results |