summaryrefslogtreecommitdiff
path: root/searx/engines/command.py
diff options
context:
space:
mode:
authorNoémi Ványi <kvch@users.noreply.github.com>2020-09-08 09:51:53 +0200
committerGitHub <noreply@github.com>2020-09-08 09:51:53 +0200
commitf0ca1c34833e2c0c79af68e699e646d77167a269 (patch)
treeddd94c298ee654f94b8fee8cc53c293efb34455e /searx/engines/command.py
parent339738275446dc185e761bdc3f4714cf29d33fd8 (diff)
downloadsearxng-f0ca1c34833e2c0c79af68e699e646d77167a269.tar.gz
searxng-f0ca1c34833e2c0c79af68e699e646d77167a269.zip
[enh] Add command line engines: git grep, find, etc. (#2128)
A new "base" engine called command is introduced. It is the foundation for all command line engines for now. You can use this engine to create your own command line engine. Add some engines (commented out to make sure no one enables anything accidentally): * git grep: This engine lets you grep in the searx repo. * locate: If locate is installed and initialized, you can search on the FS. * find: You can find files with a specific name from where you started searx. * pattern search in files: This engine utilizes the command fgrep. * regex search in files: This engine runs `grep` to find a file based on its contents.
Diffstat (limited to 'searx/engines/command.py')
-rw-r--r--searx/engines/command.py184
1 files changed, 184 insertions, 0 deletions
diff --git a/searx/engines/command.py b/searx/engines/command.py
new file mode 100644
index 000000000..b9e672ffa
--- /dev/null
+++ b/searx/engines/command.py
@@ -0,0 +1,184 @@
+'''
+searx is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+searx is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with searx. If not, see < http://www.gnu.org/licenses/ >.
+'''
+
+
+from os.path import expanduser, isabs, realpath, commonprefix
+from re import MULTILINE, search as re_search
+from shlex import split as shlex_split
+from subprocess import Popen, PIPE
+from time import time
+from threading import Thread
+
+from searx import logger
+
+
+offline = True
+paging = True
+command = []
+delimiter = {}
+parse_regex = {}
+query_type = ''
+query_enum = []
+environment_variables = {}
+working_dir = realpath('.')
+result_separator = '\n'
+result_template = 'key-value.html'
+timeout = 4.0
+
+_command_logger = logger.getChild('command')
+_compiled_parse_regex = {}
+
+
+def init(engine_settings):
+ check_parsing_options(engine_settings)
+
+ if 'command' not in engine_settings:
+ raise ValueError('engine command : missing configuration key: command')
+
+ global command, working_dir, result_template, delimiter, parse_regex, timeout, environment_variables
+
+ command = engine_settings['command']
+
+ if 'working_dir' in engine_settings:
+ working_dir = engine_settings['working_dir']
+ if not isabs(engine_settings['working_dir']):
+ working_dir = realpath(working_dir)
+
+ if 'parse_regex' in engine_settings:
+ parse_regex = engine_settings['parse_regex']
+ for result_key, regex in parse_regex.items():
+ _compiled_parse_regex[result_key] = re.compile(regex, flags=MULTILINE)
+ if 'delimiter' in engine_settings:
+ delimiter = engine_settings['delimiter']
+
+ if 'environment_variables' in engine_settings:
+ environment_variables = engine_settings['environment_variables']
+
+
+def search(query, params):
+ cmd = _get_command_to_run(query)
+ if not cmd:
+ return []
+
+ results = []
+ reader_thread = Thread(target=_get_results_from_process, args=(results, cmd, params['pageno']))
+ reader_thread.start()
+ reader_thread.join(timeout=timeout)
+
+ return results
+
+
+def _get_command_to_run(query):
+ params = shlex_split(query.decode('utf-8'))
+ __check_query_params(params)
+
+ cmd = []
+ for c in command:
+ if c == '{{QUERY}}':
+ cmd.extend(params)
+ else:
+ cmd.append(c)
+
+ return cmd
+
+
+def _get_results_from_process(results, cmd, pageno):
+ leftover = ''
+ count = 0
+ start, end = __get_results_limits(pageno)
+ with Popen(cmd, stdout=PIPE, stderr=PIPE, env=environment_variables) as process:
+ line = process.stdout.readline()
+ while line:
+ buf = leftover + line.decode('utf-8')
+ raw_results = buf.split(result_separator)
+ if raw_results[-1]:
+ leftover = raw_results[-1]
+ raw_results = raw_results[:-1]
+
+ for raw_result in raw_results:
+ result = __parse_single_result(raw_result)
+ if result is None:
+ _command_logger.debug('skipped result:', raw_result)
+ continue
+
+ if start <= count and count <= end:
+ result['template'] = result_template
+ results.append(result)
+
+ count += 1
+ if end < count:
+ return results
+
+ line = process.stdout.readline()
+
+ return_code = process.wait(timeout=timeout)
+ if return_code != 0:
+ raise RuntimeError('non-zero return code when running command', cmd, return_code)
+
+
+def __get_results_limits(pageno):
+ start = (pageno - 1) * 10
+ end = start + 9
+ return start, end
+
+
+def __check_query_params(params):
+ if not query_type:
+ return
+
+ if query_type == 'path':
+ query_path = params[-1]
+ query_path = expanduser(query_path)
+ if commonprefix([realpath(query_path), working_dir]) != working_dir:
+ raise ValueError('requested path is outside of configured working directory')
+ elif query_type == 'enum' and len(query_enum) > 0:
+ for param in params:
+ if param not in query_enum:
+ raise ValueError('submitted query params is not allowed', param, 'allowed params:', query_enum)
+
+
+def check_parsing_options(engine_settings):
+ """ Checks if delimiter based parsing or regex parsing is configured correctly """
+
+ if 'delimiter' not in engine_settings and 'parse_regex' not in engine_settings:
+ raise ValueError('failed to init settings for parsing lines: missing delimiter or parse_regex')
+ if 'delimiter' in engine_settings and 'parse_regex' in engine_settings:
+ raise ValueError('failed to init settings for parsing lines: too many settings')
+
+ if 'delimiter' in engine_settings:
+ if 'chars' not in engine_settings['delimiter'] or 'keys' not in engine_settings['delimiter']:
+ raise ValueError
+
+
+def __parse_single_result(raw_result):
+ """ Parses command line output based on configuration """
+
+ result = {}
+
+ if delimiter:
+ elements = raw_result.split(delimiter['chars'], maxsplit=len(delimiter['keys']) - 1)
+ if len(elements) != len(delimiter['keys']):
+ return {}
+ for i in range(len(elements)):
+ result[delimiter['keys'][i]] = elements[i]
+
+ if parse_regex:
+ for result_key, regex in _compiled_parse_regex.items():
+ found = regex.search(raw_result)
+ if not found:
+ return {}
+ result[result_key] = raw_result[found.start():found.end()]
+
+ return result