summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorallendema_searxng_pi <pi@raspberrypi.local>2023-08-09 18:16:58 +0200
committerMarkus Heiser <markus.heiser@darmarIT.de>2024-06-07 10:16:09 +0200
commitee146dbc0762a06397d380eb87b57d8f65975f39 (patch)
tree7385f69d2d7d9c85eb744f5fb1def36b3f92a7f5
parent91882aedf112cfa8a68c4dac6e699f49a8cb8a40 (diff)
downloadsearxng-ee146dbc0762a06397d380eb87b57d8f65975f39.tar.gz
searxng-ee146dbc0762a06397d380eb87b57d8f65975f39.zip
[enh] Add engine for discourse forums
-rw-r--r--docs/dev/engines/online/discourse.rst8
-rw-r--r--searx/engines/discourse.py153
-rw-r--r--searx/searxng.msg3
3 files changed, 164 insertions, 0 deletions
diff --git a/docs/dev/engines/online/discourse.rst b/docs/dev/engines/online/discourse.rst
new file mode 100644
index 000000000..1fab9e34c
--- /dev/null
+++ b/docs/dev/engines/online/discourse.rst
@@ -0,0 +1,8 @@
+.. _discourse engine:
+
+================
+Discourse Forums
+================
+
+.. automodule:: searx.engines.discourse
+ :members:
diff --git a/searx/engines/discourse.py b/searx/engines/discourse.py
new file mode 100644
index 000000000..298dd7ccf
--- /dev/null
+++ b/searx/engines/discourse.py
@@ -0,0 +1,153 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+""".. sidebar:: info
+
+ - `builtwith.com Discourse <https://trends.builtwith.com/websitelist/Discourse>`_
+
+Discourse is an open source Internet forum system. To search in a forum this
+engine offers some additional settings:
+
+- :py:obj:`base_url`
+- :py:obj:`api_order`
+- :py:obj:`search_endpoint`
+- :py:obj:`show_avatar`
+
+Example
+=======
+
+To search in your favorite Discourse forum, add a configuration like shown here
+for the ``paddling.com`` forum:
+
+.. code:: yaml
+
+ - name: paddling
+ engine: discourse
+ shortcut: paddle
+ base_url: 'https://forums.paddling.com/'
+ api_order: views
+ categories: ['social media', 'sports']
+ show_avatar: true
+
+
+Implementations
+===============
+
+"""
+
+from urllib.parse import urlencode
+from datetime import datetime, timedelta
+import html
+
+from dateutil import parser
+
+from flask_babel import gettext
+
+about = {
+ "website": "https://discourse.org/",
+ "wikidata_id": "Q15054354",
+ "official_api_documentation": "https://docs.discourse.org/",
+ "use_official_api": True,
+ "require_api_key": False,
+ "results": "JSON",
+}
+
+base_url: str = None # type: ignore
+"""URL of the Discourse forum."""
+
+search_endpoint = '/search.json'
+"""URL path of the `search endpoint`_.
+
+.. _search endpoint: https://docs.discourse.org/#tag/Search
+"""
+
+api_order = 'likes'
+"""Order method, valid values are: ``latest``, ``likes``, ``views``, ``latest_topic``"""
+
+show_avatar = False
+"""Show avatar of the user who send the post."""
+
+paging = True
+time_range_support = True
+
+AGO_TIMEDELTA = {
+ 'day': timedelta(days=1),
+ 'week': timedelta(days=7),
+ 'month': timedelta(days=31),
+ 'year': timedelta(days=365),
+}
+
+
+def request(query, params):
+
+ if len(query) <= 2:
+ return None
+
+ q = [query, f'order:{api_order}']
+ time_range = params.get('time_range')
+ if time_range:
+ after_date = datetime.now() - AGO_TIMEDELTA[time_range]
+ q.append('after:' + after_date.strftime('%Y-%m-%d'))
+
+ args = {
+ 'q': ' '.join(q),
+ 'page': params['pageno'],
+ }
+
+ params['url'] = f'{base_url}{search_endpoint}?{urlencode(args)}'
+ params['headers'] = {
+ 'Accept': 'application/json, text/javascript, */*; q=0.01',
+ 'X-Requested-With': 'XMLHttpRequest',
+ }
+
+ return params
+
+
+def response(resp):
+
+ results = []
+ json_data = resp.json()
+
+ if ('topics' or 'posts') not in json_data.keys():
+ return []
+
+ topics = {}
+
+ for item in json_data['topics']:
+ topics[item['id']] = item
+
+ for post in json_data['posts']:
+ result = topics.get(post['topic_id'], {})
+
+ url = f"{base_url}/p/{post['id']}"
+ status = gettext("closed") if result.get('closed', '') else gettext("open")
+ comments = result.get('posts_count', 0)
+ publishedDate = parser.parse(result['created_at'])
+
+ metadata = []
+ metadata.append('@' + post.get('username', ''))
+
+ if int(comments) > 1:
+ metadata.append(f'{gettext("comments")}: {comments}')
+
+ if result.get('has_accepted_answer'):
+ metadata.append(gettext("answered"))
+ elif int(comments) > 1:
+ metadata.append(status)
+
+ result = {
+ 'url': url,
+ 'title': html.unescape(result['title']),
+ 'content': html.unescape(post.get('blurb', '')),
+ 'metadata': ' | '.join(metadata),
+ 'publishedDate': publishedDate,
+ 'upstream': {'topics': result},
+ }
+
+ avatar = post.get('avatar_template', '').replace('{size}', '96')
+ if show_avatar and avatar:
+ result['thumbnail'] = base_url + avatar
+
+ results.append(result)
+
+ results.append({'number_of_results': len(json_data['topics'])})
+
+ return results
diff --git a/searx/searxng.msg b/searx/searxng.msg
index 17c38d3ae..013be2aff 100644
--- a/searx/searxng.msg
+++ b/searx/searxng.msg
@@ -94,4 +94,7 @@ SOCIAL_MEDIA_TERMS = {
'POINTS': 'points',
'TITLE': 'title',
'AUTHOR': 'author',
+ 'THREAD OPEN': 'open',
+ 'THREAD CLOSED': 'closed',
+ 'THREAD ANSWERED': 'answered',
}