summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2022-01-12 18:08:48 +0100
committerMarkus Heiser <markus.heiser@darmarit.de>2022-01-12 19:37:13 +0100
commit7cdd31440e621937550072c3f73e68f644554842 (patch)
tree621fbc3794b25a8de5803bfe350f662cf9b99f4b
parent6d7e86eece852a525ac2ec3c191aca57712ac1a7 (diff)
downloadsearxng-7cdd31440e621937550072c3f73e68f644554842.tar.gz
searxng-7cdd31440e621937550072c3f73e68f644554842.zip
[fix] external bangs: don't overwrite Bangs in data trie
Bangs with a `*` suffix (e.g. `!!d*`) overwrite Bangs with the same prefix (e.g. `!!d`) [1]. This can be avoid when a non printable character is used to tag a LEAF_KEY. [1] https://github.com/searxng/searxng/pull/740#issuecomment-1010411888 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
-rw-r--r--searx/external_bang.py6
-rwxr-xr-xsearxng_extra/update/update_external_bangs.py36
-rw-r--r--tests/unit/test_external_bangs.py20
3 files changed, 37 insertions, 25 deletions
diff --git a/searx/external_bang.py b/searx/external_bang.py
index ec5a46ed0..a56737c73 100644
--- a/searx/external_bang.py
+++ b/searx/external_bang.py
@@ -2,6 +2,8 @@
from searx.data import EXTERNAL_BANGS
+LEAF_KEY = chr(16)
+
def get_node(external_bangs_db, bang):
node = external_bangs_db['trie']
@@ -26,8 +28,8 @@ def get_bang_definition_and_ac(external_bangs_db, bang):
if k.startswith(after):
bang_ac_list.append(before + k)
elif isinstance(node, dict):
- bang_definition = node.get('*')
- bang_ac_list = [before + k for k in node.keys() if k != '*']
+ bang_definition = node.get(LEAF_KEY)
+ bang_ac_list = [before + k for k in node.keys() if k != LEAF_KEY]
elif isinstance(node, str):
bang_definition = node
bang_ac_list = []
diff --git a/searxng_extra/update/update_external_bangs.py b/searxng_extra/update/update_external_bangs.py
index be3aade0f..7b8262653 100755
--- a/searxng_extra/update/update_external_bangs.py
+++ b/searxng_extra/update/update_external_bangs.py
@@ -25,7 +25,7 @@ from os.path import join
import httpx
from searx import searx_dir # pylint: disable=E0401 C0413
-
+from searx.external_bang import LEAF_KEY
# from https://duckduckgo.com/newbang
URL_BV1 = 'https://duckduckgo.com/bv1.js'
@@ -51,18 +51,22 @@ def fetch_ddg_bangs(url):
def merge_when_no_leaf(node):
"""Minimize the number of nodes
- A -> B -> C
- B is child of A
- C is child of B
+ ``A -> B -> C``
+
+ - ``B`` is child of ``A``
+ - ``C`` is child of ``B``
+
+ If there are no ``C`` equals to ``<LEAF_KEY>``, then each ``C`` are merged
+ into ``A``. For example (5 nodes)::
+
+ d -> d -> g -> <LEAF_KEY> (ddg)
+ -> i -> g -> <LEAF_KEY> (dig)
+
+ becomes (3 noodes)::
- If there are no C equals to '*', then each C are merged into A
+ d -> dg -> <LEAF_KEY>
+ -> ig -> <LEAF_KEY>
- For example:
- d -> d -> g -> * (ddg*)
- -> i -> g -> * (dig*)
- becomes
- d -> dg -> *
- -> ig -> *
"""
restart = False
if not isinstance(node, dict):
@@ -72,12 +76,12 @@ def merge_when_no_leaf(node):
keys = list(node.keys())
for key in keys:
- if key == '*':
+ if key == LEAF_KEY:
continue
value = node[key]
value_keys = list(value.keys())
- if '*' not in value_keys:
+ if LEAF_KEY not in value_keys:
for value_key in value_keys:
node[key + value_key] = value[value_key]
merge_when_no_leaf(node[key + value_key])
@@ -94,8 +98,8 @@ def optimize_leaf(parent, parent_key, node):
if not isinstance(node, dict):
return
- if len(node) == 1 and '*' in node and parent is not None:
- parent[parent_key] = node['*']
+ if len(node) == 1 and LEAF_KEY in node and parent is not None:
+ parent[parent_key] = node[LEAF_KEY]
else:
for key, value in node.items():
optimize_leaf(node, key, value)
@@ -138,7 +142,7 @@ def parse_ddg_bangs(ddg_bangs):
t = bang_trie
for bang_letter in bang:
t = t.setdefault(bang_letter, {})
- t = t.setdefault('*', bang_def_output)
+ t = t.setdefault(LEAF_KEY, bang_def_output)
# optimize the trie
merge_when_no_leaf(bang_trie)
diff --git a/tests/unit/test_external_bangs.py b/tests/unit/test_external_bangs.py
index 698ce36c6..794edf159 100644
--- a/tests/unit/test_external_bangs.py
+++ b/tests/unit/test_external_bangs.py
@@ -1,4 +1,10 @@
-from searx.external_bang import get_node, resolve_bang_definition, get_bang_url, get_bang_definition_and_autocomplete
+from searx.external_bang import (
+ get_node,
+ resolve_bang_definition,
+ get_bang_url,
+ get_bang_definition_and_autocomplete,
+ LEAF_KEY,
+)
from searx.search import SearchQuery, EngineRef
from tests import SearxTestCase
@@ -7,12 +13,12 @@ TEST_DB = {
'trie': {
'exam': {
'ple': '//example.com/' + chr(2) + chr(1) + '0',
- '*': '//wikipedia.org/wiki/' + chr(2) + chr(1) + '0',
+ LEAF_KEY: '//wikipedia.org/wiki/' + chr(2) + chr(1) + '0',
},
'sea': {
- '*': 'sea' + chr(2) + chr(1) + '0',
+ LEAF_KEY: 'sea' + chr(2) + chr(1) + '0',
'rch': {
- '*': 'search' + chr(2) + chr(1) + '0',
+ LEAF_KEY: 'search' + chr(2) + chr(1) + '0',
'ing': 'searching' + chr(2) + chr(1) + '0',
},
's': {
@@ -31,7 +37,7 @@ class TestGetNode(SearxTestCase):
'trie': {
'exam': {
'ple': 'test',
- '*': 'not used',
+ LEAF_KEY: 'not used',
}
}
}
@@ -71,7 +77,7 @@ class TestResolveBangDefinition(SearxTestCase):
class TestGetBangDefinitionAndAutocomplete(SearxTestCase):
def test_found(self):
bang_definition, new_autocomplete = get_bang_definition_and_autocomplete('exam', external_bangs_db=TEST_DB)
- self.assertEqual(bang_definition, TEST_DB['trie']['exam']['*'])
+ self.assertEqual(bang_definition, TEST_DB['trie']['exam'][LEAF_KEY])
self.assertEqual(new_autocomplete, ['example'])
def test_found_optimized(self):
@@ -86,7 +92,7 @@ class TestGetBangDefinitionAndAutocomplete(SearxTestCase):
def test_partial2(self):
bang_definition, new_autocomplete = get_bang_definition_and_autocomplete('sea', external_bangs_db=TEST_DB)
- self.assertEqual(bang_definition, TEST_DB['trie']['sea']['*'])
+ self.assertEqual(bang_definition, TEST_DB['trie']['sea'][LEAF_KEY])
self.assertEqual(new_autocomplete, ['search', 'searching', 'seascapes', 'season'])
def test_error(self):