diff options
author | Noémi Ványi <kvch@users.noreply.github.com> | 2020-11-30 08:35:15 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-11-30 08:35:15 +0100 |
commit | 4a36a3044d6e39bc60d026d99ed7a010f6505a5f (patch) | |
tree | 7037ea2a1e86ec7d308a5d231eae3aa0897e0f87 | |
parent | 93c2603561c039fb43137c251493e77032f91743 (diff) | |
download | searxng-4a36a3044d6e39bc60d026d99ed7a010f6505a5f.tar.gz searxng-4a36a3044d6e39bc60d026d99ed7a010f6505a5f.zip |
Add recoll engine (#2325)
recoll is a local search engine based on Xapian:
http://www.lesbonscomptes.com/recoll/
By itself recoll does not offer web or API access,
this can be achieved using recoll-webui:
https://framagit.org/medoc92/recollwebui.git
This engine uses a custom 'files' result template
set `base_url` to the location where recoll-webui can be reached
set `dl_prefix` to a location where the file hierarchy as indexed by recoll can be reached
set `search_dir` to the part of the indexed file hierarchy to be searched, use an empty string to search the entire search domain
-rw-r--r-- | docs/admin/engines.rst | 25 | ||||
-rw-r--r-- | docs/admin/engines/recoll.rst | 50 | ||||
-rw-r--r-- | docs/dev/engine_overview.rst | 3 | ||||
-rw-r--r-- | searx/engines/recoll.py | 104 | ||||
-rw-r--r-- | searx/settings.yml | 22 | ||||
-rw-r--r-- | searx/static/themes/oscar/less/logicodev/results.less | 19 | ||||
-rw-r--r-- | searx/templates/oscar/macros.html | 26 | ||||
-rw-r--r-- | searx/templates/oscar/result_templates/files.html | 55 |
8 files changed, 298 insertions, 6 deletions
diff --git a/docs/admin/engines.rst b/docs/admin/engines.rst index 4d1872dfc..f1ac03699 100644 --- a/docs/admin/engines.rst +++ b/docs/admin/engines.rst @@ -1,14 +1,28 @@ -.. _engines generic: - ======= Engines ======= +Special Engine Settings +======================= + .. sidebar:: Further reading .. - :ref:`settings engine` - - :ref:`engine settings` - - :ref:`engine file` + - :ref:`engine settings` & :ref:`engine file` + +.. toctree:: + :maxdepth: 1 + + engines/recoll.rst + + +.. _engines generic: + +General Engine Settings +======================= + +Explanation of the :ref:`general engine configuration` shown in the table +:ref:`configured engines`. ============= =========== ==================== ============ :ref:`engine settings` :ref:`engine file` @@ -30,8 +44,6 @@ Disabled **D** Show errors **DE** ============= =========== ================================= -Configuration defaults (at built time): - .. _configured engines: .. jinja:: webapp @@ -73,3 +85,4 @@ Configuration defaults (at built time): - {{(mod.display_error_messages and "y") or ""}} {% endfor %} + diff --git a/docs/admin/engines/recoll.rst b/docs/admin/engines/recoll.rst new file mode 100644 index 000000000..cba2e81f7 --- /dev/null +++ b/docs/admin/engines/recoll.rst @@ -0,0 +1,50 @@ +.. _engine recoll: + +====== +Recoll +====== + +.. sidebar:: info + + - `Recoll <https://www.lesbonscomptes.com/recoll/>`_ + - `recoll-webui <https://framagit.org/medoc92/recollwebui.git>`_ + +Recoll_ is a desktop full-text search tool based on Xapian. By itself Recoll_ +does not offer web or API access, this can be achieved using recoll-webui_ + + + +Configuration +============= + +You must configure the following settings: + +``base_url``: + Location where recoll-webui can be reached. + +``mount_prefix``: + Location where the file hierarchy is mounted on your *local* filesystem. + +``dl_prefix``: + Location where the file hierarchy as indexed by recoll can be reached. + +``search_dir``: + Part of the indexed file hierarchy to be search, if empty the full domain is + searched. + + +Example +======= + +Scenario: + +#. Recoll indexes a local filesystem mounted in ``/export/documents/reference``, +#. the Recoll search inteface can be reached at https://recoll.example.org/ and +#. the contents of this filesystem can be reached though https://download.example.org/reference + +.. code:: yaml + + base_url: https://recoll.example.org/ + mount_prefix: /export/documents + dl_prefix: https://download.example.org + search_dir: '' diff --git a/docs/dev/engine_overview.rst b/docs/dev/engine_overview.rst index 268995a45..5e3483fd7 100644 --- a/docs/dev/engine_overview.rst +++ b/docs/dev/engine_overview.rst @@ -18,6 +18,9 @@ engines. Adapters are stored under the folder :origin:`searx/engines`. :depth: 3 :backlinks: entry + +.. _general engine configuration: + general engine configuration ============================ diff --git a/searx/engines/recoll.py b/searx/engines/recoll.py new file mode 100644 index 000000000..5a956b8bf --- /dev/null +++ b/searx/engines/recoll.py @@ -0,0 +1,104 @@ +""" + Recoll (local search engine) + + @using-api yes + @results JSON + @stable yes + @parse url, content, size, abstract, author, mtype, subtype, time, \ + filename, label, type, embedded +""" + +from datetime import date, timedelta +from json import loads +from urllib.parse import urlencode, quote + +# engine dependent config +time_range_support = True + +# parameters from settings.yml +base_url = None +search_dir = '' +mount_prefix = None +dl_prefix = None + +# embedded +embedded_url = '<{ttype} controls height="166px" ' +\ + 'src="{url}" type="{mtype}"></{ttype}>' + + +# helper functions +def get_time_range(time_range): + sw = { + 'day': 1, + 'week': 7, + 'month': 30, + 'year': 365 + } + + offset = sw.get(time_range, 0) + if not offset: + return '' + + return (date.today() - timedelta(days=offset)).isoformat() + + +# do search-request +def request(query, params): + search_after = get_time_range(params['time_range']) + search_url = base_url + 'json?{query}&highlight=0' + params['url'] = search_url.format(query=urlencode({ + 'query': query, + 'after': search_after, + 'dir': search_dir})) + + return params + + +# get response from search-request +def response(resp): + results = [] + + response_json = loads(resp.text) + + if not response_json: + return [] + + for result in response_json.get('results', []): + title = result['label'] + url = result['url'].replace('file://' + mount_prefix, dl_prefix) + content = '{}'.format(result['snippet']) + + # append result + item = {'url': url, + 'title': title, + 'content': content, + 'template': 'files.html'} + + if result['size']: + item['size'] = int(result['size']) + + for parameter in ['filename', 'abstract', 'author', 'mtype', 'time']: + if result[parameter]: + item[parameter] = result[parameter] + + # facilitate preview support for known mime types + if 'mtype' in result and '/' in result['mtype']: + (mtype, subtype) = result['mtype'].split('/') + item['mtype'] = mtype + item['subtype'] = subtype + + if mtype in ['audio', 'video']: + item['embedded'] = embedded_url.format( + ttype=mtype, + url=quote(url.encode('utf8'), '/:'), + mtype=result['mtype']) + + if mtype in ['image'] and subtype in ['bmp', 'gif', 'jpeg', 'png']: + item['img_src'] = url + + results.append(item) + + if 'nres' in response_json: + results.append({'number_of_results': response_json['nres']}) + + return results diff --git a/searx/settings.yml b/searx/settings.yml index 8af1a17f1..33ae234d5 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -650,6 +650,28 @@ engines: shortcut : qws categories : social media +# - name: library +# engine: recoll +# shortcut: lib +# base_url: 'https://recoll.example.org/' +# search_dir: '' +# mount_prefix: /export +# dl_prefix: 'https://download.example.org' +# timeout: 30.0 +# categories: files +# disabled: True + +# - name: recoll library reference +# engine: recoll +# base_url: 'https://recoll.example.org/' +# search_dir: reference +# mount_prefix: /export +# dl_prefix: 'https://download.example.org' +# shortcut: libr +# timeout: 30.0 +# categories: files +# disabled: True + - name : reddit engine : reddit shortcut : re diff --git a/searx/static/themes/oscar/less/logicodev/results.less b/searx/static/themes/oscar/less/logicodev/results.less index 9926d6e53..33965fb33 100644 --- a/searx/static/themes/oscar/less/logicodev/results.less +++ b/searx/static/themes/oscar/less/logicodev/results.less @@ -51,6 +51,11 @@ float: right; } +.result-abstract { + margin-top: 0.5em; + margin-bottom: 0.8em; +} + .external-link { color: @dark-green; font-size: 12px; @@ -124,6 +129,20 @@ } } +.result-metadata { + clear: both; + margin: 1em; + + td { + padding-right: 1em; + color: @gray; + } + + td:first-of-type { + color: @dark-gray; + } +} + // map formating of results .result-map { clear: both; diff --git a/searx/templates/oscar/macros.html b/searx/templates/oscar/macros.html index 57a90aaa2..2bc1e7805 100644 --- a/searx/templates/oscar/macros.html +++ b/searx/templates/oscar/macros.html @@ -47,6 +47,20 @@ {%- endif -%} {%- endmacro %} +<!-- Draw result footer without cache link --> +{% macro result_footer_nocache(result) -%} + <div class="clearfix"></div> + <div class="pull-right"> + {% for engine in result.engines %} + <span class="label label-default">{{ engine }}</span> + {% endfor %} + {% if proxify %} + <small>{{ result_link(proxify(result.url), icon('sort') + _('proxied'), "text-info") }}</small> + {% endif %} +</div> +<div class="external-link">{{ result.pretty_url }}</div> +{%- endmacro %} + <!-- Draw result footer --> {% macro result_footer_rtl(result, id) -%} <div class="clearfix"></div>{{- "" -}} @@ -68,6 +82,18 @@ {%- endif %} {%- endmacro %} +<!-- Draw result footer without cache link --> +{% macro result_footer_nocache_rtl(result) -%} + <div class="clearfix"></div> + {% for engine in result.engines %} + <span class="label label-default">{{ engine }}</span> + {% endfor %} + {% if proxify %} + <small>{{ result_link(proxify(result.url), icon('sort') + _('proxied'), "text-info") }}</small> + {% endif %} + <div class="external-link">{{ result.pretty_url }}</div> +{%- endmacro %} + {% macro preferences_item_header(info, label, rtl, id) -%} {% if rtl %} <div class="row form-group"> diff --git a/searx/templates/oscar/result_templates/files.html b/searx/templates/oscar/result_templates/files.html new file mode 100644 index 000000000..5e3894e0a --- /dev/null +++ b/searx/templates/oscar/result_templates/files.html @@ -0,0 +1,55 @@ +{% from 'oscar/macros.html' import result_header, result_sub_header, result_footer_nocache, result_footer_nocache_rtl, icon with context %}
+
+{{ result_header(result, favicons) }}
+{{ result_sub_header(result) }}
+
+{% if result.embedded %}
+ <small> • <a class="text-info btn-collapse collapsed cursor-pointer media-loader disabled_if_nojs" data-toggle="collapse" data-target="#result-media-{{ index }}" data-btn-text-collapsed="{{ _('show media') }}" data-btn-text-not-collapsed="{{ _('hide media') }}">
+ {% if result.mtype == 'audio' %}{{ icon('music') }}
+ {% elif result.mtype == 'video' %} {{ icon('film') }}
+ {% endif %} {{ _('show media') }}</a></small>
+{% endif %}
+
+{% if result.embedded %}
+<div id="result-media-{{ index }}" class="collapse">
+ {{ result.embedded|safe }}
+</div>
+{% endif %}
+
+{% if result.abstract %}<p class="result-content result-abstract">{{ result.abstract|safe }}</p>{% endif %}
+
+{% if result.img_src %}
+<div class="container-fluid">
+ <div class="row">
+<img src="{{ image_proxify(result.img_src) }}" alt="{{ result.title|striptags }}" title="{{ result.title|striptags }}" style="width: auto; max-height: 60px; min-height: 60px;" class="col-xs-2 col-sm-4 col-md-4 result-content">
+{% if result.content %}<p class="result-content col-xs-8 col-sm-8 col-md-8">{{ result.content|safe }}</p>{% endif %}
+ </div>
+</div>
+{% else %}
+{% if result.content %}<p class="result-content">{{ result.content|safe }}</p>{% endif %}
+{% endif %}
+
+<table class="result-metadata result-content">
+{% if result.author %}<tr><td>{{ _('Author') }}</td><td>{{ result.author|safe }}</td></tr>{% endif %}
+
+{% if result.filename %}<tr><td>{{ _('Filename') }}</td><td>{{ result.filename|safe }}</td></tr>{% endif %}
+
+{% if result.size %}<tr><td>{{ _('Filesize') }}</td><td>
+ {% if result.size < 1024 %}{{ result.size }} {{ _('Bytes') }}
+ {% elif result.size < 1024*1024 %}{{ '{0:0.2f}'.format(result.size/1024) }} {{ _('kiB') }}
+ {% elif result.size < 1024*1024*1024 %}{{ '{0:0.2f}'.format(result.size/1024/1024) }} {{ _('MiB') }}
+ {% elif result.size < 1024*1024*1024*1024 %}{{ '{0:0.2f}'.format(result.size/1024/1024/1024) }} {{ _('GiB') }}
+ {% else %}{{ '{0:0.2f}'.format(result.size/1024/1024/1024/1024) }} {{ _('TiB') }}{% endif %}
+ </td></tr>
+{% endif %}
+
+{% if result.time %}<tr><td>{{ _('Date') }}</td><td>{{ result.time|safe }}</td></tr>{% endif %}
+
+{% if result.mtype %}<tr><td>{{ _('Type') }}</td><td>{{ result.mtype|safe }}/{{ result.subtype|safe }}</td></tr>{% endif %}
+</table>
+
+{% if rtl %}
+{{ result_footer_nocache_rtl(result) }}
+{% else %}
+{{ result_footer_nocache(result) }}
+{% endif %}
|