summaryrefslogtreecommitdiff
path: root/searx
diff options
context:
space:
mode:
authorAlexandre Flament <alex@al-f.net>2022-04-02 15:21:58 +0200
committerAlexandre Flament <alex@al-f.net>2022-04-09 18:01:57 +0200
commit74c7aee9ec52e6b954e48817501a334f23a40e25 (patch)
treef1ac4584c317655a3f712a7fdeba08a48766cea0 /searx
parent19fa0095a0ab12ed1f7a79d91edf862faf6fdfcf (diff)
downloadsearxng-74c7aee9ec52e6b954e48817501a334f23a40e25.tar.gz
searxng-74c7aee9ec52e6b954e48817501a334f23a40e25.zip
jisho : code refactoring
Diffstat (limited to 'searx')
-rw-r--r--searx/engines/jisho.py143
1 files changed, 76 insertions, 67 deletions
diff --git a/searx/engines/jisho.py b/searx/engines/jisho.py
index a34d8e421..87bbe983d 100644
--- a/searx/engines/jisho.py
+++ b/searx/engines/jisho.py
@@ -17,7 +17,6 @@ about = {
}
categories = ['dictionaries']
-engine_type = 'online_dictionary'
paging = False
URL = 'https://jisho.org'
@@ -34,19 +33,19 @@ def request(query, params):
def response(resp):
results = []
- infoboxed = False
+ first_result = True
search_results = resp.json()
- pages = search_results.get('data', [])
- for page in pages:
+ for page in search_results.get('data', []):
# Entries that are purely from Wikipedia are excluded.
- if page['senses'][0]['parts_of_speech'] != [] and page['senses'][0]['parts_of_speech'][0] == 'Wikipedia definition':
+ parts_of_speech = page.get('senses') and page['senses'][0].get('parts_of_speech')
+ if parts_of_speech and parts_of_speech[0] == 'Wikipedia definition':
pass
+
# Process alternative forms
- japanese = page['japanese']
alt_forms = []
- for title_raw in japanese:
+ for title_raw in page['japanese']:
if 'word' not in title_raw:
alt_forms.append(title_raw['reading'])
else:
@@ -54,74 +53,84 @@ def response(resp):
if 'reading' in title_raw:
title += ' (' + title_raw['reading'] + ')'
alt_forms.append(title)
- # Process definitions
- definitions = []
- def_raw = page['senses']
- for defn_raw in def_raw:
- extra = ''
- if not infoboxed:
- # Extra data. Since they're not documented, this implementation is based solely by the author's assumptions.
- if defn_raw['tags'] != []:
- if defn_raw['info'] != []:
- extra += defn_raw['tags'][0] + ', ' + defn_raw['info'][0] + '. ' # "usually written as kana: <kana>"
- else:
- extra += ', '.join(defn_raw['tags']) + '. ' # abbreviation, archaism, etc.
- elif defn_raw['info'] != []:
- extra += ', '.join(defn_raw['info']).capitalize() + '. ' # inconsistent
- if defn_raw['restrictions'] != []:
- extra += 'Only applies to: ' + ', '.join(defn_raw['restrictions']) + '. '
- extra = extra[:-1]
- definitions.append((
- ', '.join(defn_raw['parts_of_speech']),
- '; '.join(defn_raw['english_definitions']),
- extra
- ))
- content = ''
- infobox_content = '''
- <small><a href="https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project">JMdict</a>
- and <a href="https://www.edrdg.org/enamdict/enamdict_doc.html">JMnedict</a>
- by <a href="https://www.edrdg.org/edrdg/licence.html">EDRDG</a>, CC BY-SA 3.0.</small><ul>
- '''
- for pos, engdef, extra in definitions:
- if pos == 'Wikipedia definition':
- infobox_content += '</ul><small>Wikipedia, CC BY-SA 3.0.</small><ul>'
- if pos == '':
- infobox_content += f"<li>{engdef}"
- else:
- infobox_content += f"<li><i>{pos}</i>: {engdef}"
- if extra != '':
- infobox_content += f" ({extra})"
- infobox_content += '</li>'
- content += f"{engdef}. "
- infobox_content += '</ul>'
+ #
+ result_url = urljoin(BASE_URL, page['slug'])
+ definitions = get_definitions(page)
+
# For results, we'll return the URL, all alternative forms (as title),
# and all definitions (as description) truncated to 300 characters.
+ content = " ".join(f"{engdef}." for _, engdef, _ in definitions)
results.append({
- 'url': urljoin(BASE_URL, page['slug']),
+ 'url': result_url,
'title': ", ".join(alt_forms),
'content': content[:300] + (content[300:] and '...')
})
# Like Wordnik, we'll return the first result in an infobox too.
- if not infoboxed:
- infoboxed = True
- infobox_urls = []
- infobox_urls.append({
- 'title': 'Jisho.org',
- 'url': urljoin(BASE_URL, page['slug'])
- })
- infobox = {
- 'infobox': alt_forms[0],
- 'urls': infobox_urls
- }
- alt_forms.pop(0)
- alt_content = ''
- if len(alt_forms) > 0:
- alt_content = '<p><i>Other forms:</i> '
- alt_content += ", ".join(alt_forms)
- alt_content += '</p>'
- infobox['content'] = alt_content + infobox_content
- results.append(infobox)
+ if first_result:
+ first_result = False
+ results.append(get_infobox(alt_forms, result_url, definitions))
return results
+
+
+def get_definitions(page):
+ # Process definitions
+ definitions = []
+ for defn_raw in page['senses']:
+ extra = []
+ # Extra data. Since they're not documented, this implementation is based solely by the author's assumptions.
+ if defn_raw.get('tags'):
+ if defn_raw.get('info'):
+ # "usually written as kana: <kana>"
+ extra.append(defn_raw['tags'][0] + ', ' + defn_raw['info'][0] + '. ')
+ else:
+ # abbreviation, archaism, etc.
+ extra.append(', '.join(defn_raw['tags']) + '. ')
+ elif defn_raw.get('info'):
+ # inconsistent
+ extra.append(', '.join(defn_raw['info']).capitalize() + '. ')
+ if defn_raw.get('restrictions'):
+ extra.append('Only applies to: ' + ', '.join(defn_raw['restrictions']) + '. ')
+ definitions.append((
+ ', '.join(defn_raw['parts_of_speech']),
+ '; '.join(defn_raw['english_definitions']),
+ ''.join(extra)[:-1],
+ ))
+ return definitions
+
+
+def get_infobox(alt_forms, result_url, definitions):
+ infobox_content = []
+ # title & alt_forms
+ infobox_title = alt_forms[0]
+ if len(alt_forms) > 1:
+ infobox_content.append(f'<p><i>Other forms:</i> {", ".join(alt_forms[1:])}</p>')
+
+ # definitions
+ infobox_content.append('''
+ <small><a href="https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project">JMdict</a>
+ and <a href="https://www.edrdg.org/enamdict/enamdict_doc.html">JMnedict</a>
+ by <a href="https://www.edrdg.org/edrdg/licence.html">EDRDG</a>, CC BY-SA 3.0.</small>
+ <ul>
+ ''')
+ for pos, engdef, extra in definitions:
+ if pos == 'Wikipedia definition':
+ infobox_content.append('</ul><small>Wikipedia, CC BY-SA 3.0.</small><ul>')
+ pos = f'<i>{pos}</i>: ' if pos else ''
+ extra = f' ({extra})' if extra else ''
+ infobox_content.append(f'<li>{pos}{engdef}{extra}</li>')
+ infobox_content.append('</ul>')
+
+ #
+ return {
+ 'infobox': infobox_title,
+ 'content': ''.join(infobox_content),
+ 'urls': [
+ {
+ 'title': 'Jisho.org',
+ 'url': result_url,
+ }
+ ]
+ }