1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
|
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Jisho (the Japanese-English dictionary)
"""
from urllib.parse import urlencode, urljoin
# about
about = {
"website": 'https://jisho.org',
"wikidata_id": 'Q24568389',
"official_api_documentation": "https://jisho.org/forum/54fefc1f6e73340b1f160000-is-there-any-kind-of-search-api",
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
"language": 'ja',
}
categories = ['dictionaries']
paging = False
URL = 'https://jisho.org'
BASE_URL = 'https://jisho.org/word/'
SEARCH_URL = URL + '/api/v1/search/words?{query}'
def request(query, params):
query = urlencode({'keyword': query})
params['url'] = SEARCH_URL.format(query=query)
logger.debug(f"query_url --> {params['url']}")
return params
def response(resp):
results = []
first_result = True
search_results = resp.json()
for page in search_results.get('data', []):
# Entries that are purely from Wikipedia are excluded.
parts_of_speech = page.get('senses') and page['senses'][0].get('parts_of_speech')
if parts_of_speech and parts_of_speech[0] == 'Wikipedia definition':
pass
# Process alternative forms
alt_forms = []
for title_raw in page['japanese']:
if 'word' not in title_raw:
alt_forms.append(title_raw['reading'])
else:
title = title_raw['word']
if 'reading' in title_raw:
title += ' (' + title_raw['reading'] + ')'
alt_forms.append(title)
result_url = urljoin(BASE_URL, page['slug'])
definitions = get_definitions(page)
# For results, we'll return the URL, all alternative forms (as title),
# and all definitions (as description) truncated to 300 characters.
content = " ".join(f"{engdef}." for _, engdef, _ in definitions)
results.append(
{'url': result_url, 'title': ", ".join(alt_forms), 'content': content[:300] + (content[300:] and '...')}
)
# Like Wordnik, we'll return the first result in an infobox too.
if first_result:
first_result = False
results.append(get_infobox(alt_forms, result_url, definitions))
return results
def get_definitions(page):
# Process definitions
definitions = []
for defn_raw in page['senses']:
extra = []
# Extra data. Since they're not documented, this implementation is based solely by the author's assumptions.
if defn_raw.get('tags'):
if defn_raw.get('info'):
# "usually written as kana: <kana>"
extra.append(defn_raw['tags'][0] + ', ' + defn_raw['info'][0] + '. ')
else:
# abbreviation, archaism, etc.
extra.append(', '.join(defn_raw['tags']) + '. ')
elif defn_raw.get('info'):
# inconsistent
extra.append(', '.join(defn_raw['info']).capitalize() + '. ')
if defn_raw.get('restrictions'):
extra.append('Only applies to: ' + ', '.join(defn_raw['restrictions']) + '. ')
definitions.append(
(
', '.join(defn_raw['parts_of_speech']),
'; '.join(defn_raw['english_definitions']),
''.join(extra)[:-1],
)
)
return definitions
def get_infobox(alt_forms, result_url, definitions):
infobox_content = []
# title & alt_forms
infobox_title = alt_forms[0]
if len(alt_forms) > 1:
infobox_content.append(f'<p><i>Other forms:</i> {", ".join(alt_forms[1:])}</p>')
# definitions
infobox_content.append(
'''
<small><a href="https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project">JMdict</a>
and <a href="https://www.edrdg.org/enamdict/enamdict_doc.html">JMnedict</a>
by <a href="https://www.edrdg.org/edrdg/licence.html">EDRDG</a>, CC BY-SA 3.0.</small>
<ul>
'''
)
for pos, engdef, extra in definitions:
if pos == 'Wikipedia definition':
infobox_content.append('</ul><small>Wikipedia, CC BY-SA 3.0.</small><ul>')
pos = f'<i>{pos}</i>: ' if pos else ''
extra = f' ({extra})' if extra else ''
infobox_content.append(f'<li>{pos}{engdef}{extra}</li>')
infobox_content.append('</ul>')
#
return {
'infobox': infobox_title,
'content': ''.join(infobox_content),
'urls': [
{
'title': 'Jisho.org',
'url': result_url,
}
],
}
|