summaryrefslogtreecommitdiff
path: root/searx/search/checker/background.py
blob: f16e4c25c849f97ef5149d92f1b52533ee3caef8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
# pylint: disable=missing-module-docstring
# pyright: basic

import json
import time
import threading
import os
import signal
from typing import Any, Dict, List, Literal, Optional, Tuple, TypedDict, Union

import redis.exceptions

from searx import logger, settings, searx_debug
from searx.redisdb import client as get_redis_client
from searx.exceptions import SearxSettingsException
from searx.search.processors import PROCESSORS
from searx.search.checker import Checker
from searx.search.checker.scheduler import scheduler_function


REDIS_RESULT_KEY = 'SearXNG_checker_result'
REDIS_LOCK_KEY = 'SearXNG_checker_lock'


CheckerResult = Union['CheckerOk', 'CheckerErr', 'CheckerOther']


class CheckerOk(TypedDict):
    """Checking the engines succeeded"""

    status: Literal['ok']
    engines: Dict[str, 'EngineResult']
    timestamp: int


class CheckerErr(TypedDict):
    """Checking the engines failed"""

    status: Literal['error']
    timestamp: int


class CheckerOther(TypedDict):
    """The status is unknown or disabled"""

    status: Literal['unknown', 'disabled']


EngineResult = Union['EngineOk', 'EngineErr']


class EngineOk(TypedDict):
    """Checking the engine succeeded"""

    success: Literal[True]


class EngineErr(TypedDict):
    """Checking the engine failed"""

    success: Literal[False]
    errors: Dict[str, List[str]]


def _get_interval(every: Any, error_msg: str) -> Tuple[int, int]:
    if isinstance(every, int):
        return (every, every)

    if (
        not isinstance(every, (tuple, list))
        or len(every) != 2  # type: ignore
        or not isinstance(every[0], int)
        or not isinstance(every[1], int)
    ):
        raise SearxSettingsException(error_msg, None)
    return (every[0], every[1])


def get_result() -> CheckerResult:
    client = get_redis_client()
    if client is None:
        # without Redis, the checker is disabled
        return {'status': 'disabled'}
    serialized_result: Optional[bytes] = client.get(REDIS_RESULT_KEY)
    if serialized_result is None:
        # the Redis key does not exist
        return {'status': 'unknown'}
    return json.loads(serialized_result)


def _set_result(result: CheckerResult):
    client = get_redis_client()
    if client is None:
        # without Redis, the function does nothing
        return
    client.set(REDIS_RESULT_KEY, json.dumps(result))


def _timestamp():
    return int(time.time() / 3600) * 3600


def run():
    try:
        # use a Redis lock to make sure there is no checker running at the same time
        # (this should not happen, this is a safety measure)
        with get_redis_client().lock(REDIS_LOCK_KEY, blocking_timeout=60, timeout=3600):
            logger.info('Starting checker')
            result: CheckerOk = {'status': 'ok', 'engines': {}, 'timestamp': _timestamp()}
            for name, processor in PROCESSORS.items():
                logger.debug('Checking %s engine', name)
                checker = Checker(processor)
                checker.run()
                if checker.test_results.successful:
                    result['engines'][name] = {'success': True}
                else:
                    result['engines'][name] = {'success': False, 'errors': checker.test_results.errors}

            _set_result(result)
            logger.info('Check done')
    except redis.exceptions.LockError:
        _set_result({'status': 'error', 'timestamp': _timestamp()})
        logger.exception('Error while running the checker')
    except Exception:  # pylint: disable=broad-except
        _set_result({'status': 'error', 'timestamp': _timestamp()})
        logger.exception('Error while running the checker')


def _signal_handler(_signum: int, _frame: Any):
    t = threading.Thread(target=run)
    t.daemon = True
    t.start()


def initialize():
    if hasattr(signal, 'SIGUSR1'):
        # Windows doesn't support SIGUSR1
        logger.info('Send SIGUSR1 signal to pid %i to start the checker', os.getpid())
        signal.signal(signal.SIGUSR1, _signal_handler)

    # special case when debug is activate
    if searx_debug and settings['checker']['off_when_debug']:
        logger.info('debug mode: checker is disabled')
        return

    # check value of checker.scheduling.every now
    scheduling = settings['checker']['scheduling']
    if scheduling is None or not scheduling:
        logger.info('Checker scheduler is disabled')
        return

    # make sure there is a Redis connection
    if get_redis_client() is None:
        logger.error('The checker requires Redis')
        return

    # start the background scheduler
    every_range = _get_interval(scheduling.get('every', (300, 1800)), 'checker.scheduling.every is not a int or list')
    start_after_range = _get_interval(
        scheduling.get('start_after', (300, 1800)), 'checker.scheduling.start_after is not a int or list'
    )
    t = threading.Thread(
        target=scheduler_function,
        args=(start_after_range[0], start_after_range[1], every_range[0], every_range[1], run),
        name='checker_scheduler',
    )
    t.daemon = True
    t.start()