summaryrefslogtreecommitdiff
path: root/searx
diff options
context:
space:
mode:
authorÉmilien (perso) <4016501+unixfox@users.noreply.github.com>2023-09-19 10:31:02 +0200
committerGitHub <noreply@github.com>2023-09-19 10:31:02 +0200
commitad725ce7d7df5afc2fc2f63405195b6bcfc8a8a7 (patch)
tree24a5153f321853fd5c93f2bc9c834fc551519662 /searx
parentdcee82334548ad8849391b5c29cdcd868b65daad (diff)
downloadsearxng-ad725ce7d7df5afc2fc2f63405195b6bcfc8a8a7.tar.gz
searxng-ad725ce7d7df5afc2fc2f63405195b6bcfc8a8a7.zip
wikipedia wikidata infobox + disable wikisource (#2806)
Co-authored-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx')
-rw-r--r--searx/engines/wikidata.py19
-rw-r--r--searx/engines/wikipedia.py34
-rw-r--r--searx/settings.yml7
3 files changed, 45 insertions, 15 deletions
diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py
index 5779daa0b..8fa3a97d3 100644
--- a/searx/engines/wikidata.py
+++ b/searx/engines/wikidata.py
@@ -41,6 +41,12 @@ about = {
"results": 'JSON',
}
+display_type = ["infobox"]
+"""A list of display types composed from ``infobox`` and ``list``. The latter
+one will add a hit to the result list. The first one will show a hit in the
+info box. Both values can be set, or one of the two can be set."""
+
+
# SPARQL
SPARQL_ENDPOINT_URL = 'https://query.wikidata.org/sparql'
SPARQL_EXPLAIN_URL = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql?explain'
@@ -268,8 +274,9 @@ def get_results(attribute_result, attributes, language):
for url in value.split(', '):
infobox_urls.append({'title': attribute.get_label(language), 'url': url, **attribute.kwargs})
# "normal" results (not infobox) include official website and Wikipedia links.
- if attribute.kwargs.get('official') or attribute_type == WDArticle:
+ if "list" in display_type and (attribute.kwargs.get('official') or attribute_type == WDArticle):
results.append({'title': infobox_title, 'url': url, "content": infobox_content})
+
# update the infobox_id with the wikipedia URL
# first the local wikipedia URL, and as fallback the english wikipedia URL
if attribute_type == WDArticle and (
@@ -305,9 +312,15 @@ def get_results(attribute_result, attributes, language):
# add the wikidata URL at the end
infobox_urls.append({'title': 'Wikidata', 'url': attribute_result['item']})
- if img_src is None and len(infobox_attributes) == 0 and len(infobox_urls) == 1 and len(infobox_content) == 0:
+ if (
+ "list" in display_type
+ and img_src is None
+ and len(infobox_attributes) == 0
+ and len(infobox_urls) == 1
+ and len(infobox_content) == 0
+ ):
results.append({'url': infobox_urls[0]['url'], 'title': infobox_title, 'content': infobox_content})
- else:
+ elif "infobox" in display_type:
results.append(
{
'infobox': infobox_title,
diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py
index b4b70208d..d825cbdea 100644
--- a/searx/engines/wikipedia.py
+++ b/searx/engines/wikipedia.py
@@ -77,6 +77,11 @@ about = {
"results": 'JSON',
}
+display_type = ["infobox"]
+"""A list of display types composed from ``infobox`` and ``list``. The latter
+one will add a hit to the result list. The first one will show a hit in the
+info box. Both values can be set, or one of the two can be set."""
+
send_accept_language_header = True
"""The HTTP ``Accept-Language`` header is needed for wikis where
LanguageConverter_ is enabled."""
@@ -185,18 +190,23 @@ def response(resp):
api_result = resp.json()
title = utils.html_to_text(api_result.get('titles', {}).get('display') or api_result.get('title'))
wikipedia_link = api_result['content_urls']['desktop']['page']
- results.append({'url': wikipedia_link, 'title': title, 'content': api_result.get('description', '')})
-
- if api_result.get('type') == 'standard':
- results.append(
- {
- 'infobox': title,
- 'id': wikipedia_link,
- 'content': api_result.get('extract', ''),
- 'img_src': api_result.get('thumbnail', {}).get('source'),
- 'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}],
- }
- )
+
+ if "list" in display_type or api_result.get('type') != 'standard':
+ # show item in the result list if 'list' is in the display options or it
+ # is a item that can't be displayed in a infobox.
+ results.append({'url': wikipedia_link, 'title': title, 'content': api_result.get('description', '')})
+
+ if "infobox" in display_type:
+ if api_result.get('type') == 'standard':
+ results.append(
+ {
+ 'infobox': title,
+ 'id': wikipedia_link,
+ 'content': api_result.get('extract', ''),
+ 'img_src': api_result.get('thumbnail', {}).get('source'),
+ 'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}],
+ }
+ )
return results
diff --git a/searx/settings.yml b/searx/settings.yml
index 3990ebaf3..4d8899caa 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -371,7 +371,10 @@ engines:
- name: wikipedia
engine: wikipedia
shortcut: wp
+ # add "list" to the array to get results in the results list
+ display_type: ["infobox"]
base_url: 'https://{language}.wikipedia.org/'
+ categories: [general]
- name: bilibili
engine: bilibili
@@ -584,7 +587,10 @@ engines:
shortcut: wd
timeout: 3.0
weight: 2
+ # add "list" to the array to get results in the results list
+ display_type: ["infobox"]
tests: *tests_infobox
+ categories: [general]
- name: duckduckgo
engine: duckduckgo
@@ -1622,6 +1628,7 @@ engines:
categories: [general, wikimedia]
base_url: "https://{language}.wikisource.org/"
search_type: text
+ disabled: true
about:
website: https://www.wikisource.org/
wikidata_id: Q263