diff options
Diffstat (limited to 'src/searx.botdetection.html')
-rw-r--r-- | src/searx.botdetection.html | 581 |
1 files changed, 581 insertions, 0 deletions
diff --git a/src/searx.botdetection.html b/src/searx.botdetection.html new file mode 100644 index 000000000..0ab20721a --- /dev/null +++ b/src/searx.botdetection.html @@ -0,0 +1,581 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>Bot Detection — SearXNG Documentation (2024.5.17+ec41b5358)</title> + <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../_static/searxng.css?v=52e4ff28" /> + <link rel="stylesheet" type="text/css" href="../_static/tabs.css?v=a5c4661c" /> + <script src="../_static/documentation_options.js?v=619ad1c8"></script> + <script src="../_static/doctools.js?v=9a2dae69"></script> + <script src="../_static/sphinx_highlight.js?v=dc90522c"></script> + <link rel="index" title="Index" href="../genindex.html" /> + <link rel="search" title="Search" href="../search.html" /> + <link rel="next" title="SearXNG Exceptions" href="searx.exceptions.html" /> + <link rel="prev" title="Custom message extractor (i18n)" href="searx.babel_extract.html" /> + </head><body> + <div class="related" role="navigation" aria-label="related navigation"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="right" > + <a href="searx.exceptions.html" title="SearXNG Exceptions" + accesskey="N">next</a> |</li> + <li class="right" > + <a href="searx.babel_extract.html" title="Custom message extractor (i18n)" + accesskey="P">previous</a> |</li> + <li class="nav-item nav-item-0"><a href="../index.html">SearXNG Documentation (2024.5.17+ec41b5358)</a> »</li> + <li class="nav-item nav-item-1"><a href="index.html" accesskey="U">Source-Code</a> »</li> + <li class="nav-item nav-item-this"><a href="">Bot Detection</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <section id="bot-detection"> +<span id="botdetection"></span><h1>Bot Detection<a class="headerlink" href="#bot-detection" title="Link to this heading">¶</a></h1> +<nav class="contents local" id="contents"> +<ul class="simple"> +<li><p><a class="reference internal" href="#module-searx.botdetection.ip_lists" id="id4">IP lists</a></p> +<ul> +<li><p><a class="reference internal" href="#method-ip-lists" id="id5">Method <code class="docutils literal notranslate"><span class="pre">ip_lists</span></code></a></p></li> +</ul> +</li> +<li><p><a class="reference internal" href="#module-searx.botdetection.ip_limit" id="id6">Rate limit</a></p> +<ul> +<li><p><a class="reference internal" href="#method-ip-limit" id="id7">Method <code class="docutils literal notranslate"><span class="pre">ip_limit</span></code></a></p></li> +<li><p><a class="reference internal" href="#method-link-token" id="id8">Method <code class="docutils literal notranslate"><span class="pre">link_token</span></code></a></p></li> +</ul> +</li> +<li><p><a class="reference internal" href="#module-searx.botdetection.http_accept" id="id9">Probe HTTP headers</a></p> +<ul> +<li><p><a class="reference internal" href="#method-http-accept" id="id10">Method <code class="docutils literal notranslate"><span class="pre">http_accept</span></code></a></p></li> +<li><p><a class="reference internal" href="#method-http-accept-encoding" id="id11">Method <code class="docutils literal notranslate"><span class="pre">http_accept_encoding</span></code></a></p></li> +<li><p><a class="reference internal" href="#method-http-accept-language" id="id12">Method <code class="docutils literal notranslate"><span class="pre">http_accept_language</span></code></a></p></li> +<li><p><a class="reference internal" href="#method-http-connection" id="id13">Method <code class="docutils literal notranslate"><span class="pre">http_connection</span></code></a></p></li> +<li><p><a class="reference internal" href="#method-http-user-agent" id="id14">Method <code class="docutils literal notranslate"><span class="pre">http_user_agent</span></code></a></p></li> +</ul> +</li> +<li><p><a class="reference internal" href="#module-searx.botdetection.config" id="id15">Config</a></p></li> +</ul> +</nav> +<p id="botdetection-src"><span id="module-searx.botdetection"></span>Implementations used for bot detection.</p> +<dl class="py function"> +<dt class="sig sig-object py" id="searx.botdetection.get_network"> +<span class="sig-prename descclassname"><span class="pre">searx.botdetection.</span></span><span class="sig-name descname"><span class="pre">get_network</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">real_ip</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">IPv4Address</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">IPv6Address</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">cfg</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#searx.botdetection.config.Config" title="searx.botdetection.config.Config"><span class="pre">config.Config</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">IPv4Network</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">IPv6Network</span></span></span><a class="reference internal" href="../_modules/searx/botdetection/_helpers.html#get_network"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#searx.botdetection.get_network" title="Link to this definition">¶</a></dt> +<dd><p>Returns the (client) network of whether the real_ip is part of.</p> +</dd></dl> + +<dl class="py function"> +<dt class="sig sig-object py" id="searx.botdetection.get_real_ip"> +<span class="sig-prename descclassname"><span class="pre">searx.botdetection.</span></span><span class="sig-name descname"><span class="pre">get_real_ip</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">request</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://flask.palletsprojects.com/en/3.0.x/api/#flask.Request" title="(in Flask v3.0.x)"><span class="pre">Request</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><span class="pre">str</span></a></span></span><a class="reference internal" href="../_modules/searx/botdetection/_helpers.html#get_real_ip"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#searx.botdetection.get_real_ip" title="Link to this definition">¶</a></dt> +<dd><p>Returns real IP of the request. Since not all proxies set all the HTTP +headers and incoming headers can be faked it may happen that the IP cannot +be determined correctly.</p> +<aside class="sidebar"> +<p class="sidebar-title"><a class="reference external" href="https://flask.palletsprojects.com/en/3.0.x/api/#flask.Request.remote_addr" title="(in Flask v3.0.x)"><code class="xref py py-obj docutils literal notranslate"><span class="pre">flask.Request.remote_addr</span></code></a></p> +<p>SearXNG uses Werkzeug’s <a class="reference external" href="https://werkzeug.palletsprojects.com/middleware/proxy_fix/">ProxyFix</a> (with it default <code class="docutils literal notranslate"><span class="pre">x_for=1</span></code>).</p> +</aside> +<p>This function tries to get the remote IP in the order listed below, +additional some tests are done and if inconsistencies or errors are +detected, they are logged.</p> +<p>The remote IP of the request is taken from (first match):</p> +<ul class="simple"> +<li><p><a class="reference external" href="https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For">X-Forwarded-For</a> header</p></li> +<li><p><a class="reference external" href="https://github.com/searxng/searxng/issues/1237#issuecomment-1147564516">X-real-IP header</a></p></li> +<li><p><a class="reference external" href="https://flask.palletsprojects.com/en/3.0.x/api/#flask.Request.remote_addr" title="(in Flask v3.0.x)"><code class="xref py py-obj docutils literal notranslate"><span class="pre">flask.Request.remote_addr</span></code></a></p></li> +</ul> +</dd></dl> + +<dl class="py function"> +<dt class="sig sig-object py" id="searx.botdetection.too_many_requests"> +<span class="sig-prename descclassname"><span class="pre">searx.botdetection.</span></span><span class="sig-name descname"><span class="pre">too_many_requests</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">network</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">IPv4Network</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">IPv6Network</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">log_msg</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><span class="pre">str</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">werkzeug.Response</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.12)"><span class="pre">None</span></a></span></span><a class="reference internal" href="../_modules/searx/botdetection/_helpers.html#too_many_requests"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#searx.botdetection.too_many_requests" title="Link to this definition">¶</a></dt> +<dd><p>Returns a HTTP 429 response object and writes a ERROR message to the +‘botdetection’ logger. This function is used in part by the filter methods +to return the default <code class="docutils literal notranslate"><span class="pre">Too</span> <span class="pre">Many</span> <span class="pre">Requests</span></code> response.</p> +</dd></dl> + +<section id="module-searx.botdetection.ip_lists"> +<span id="ip-lists"></span><span id="botdetection-ip-lists"></span><h2><a class="toc-backref" href="#id4" role="doc-backlink">IP lists</a><a class="headerlink" href="#module-searx.botdetection.ip_lists" title="Link to this heading">¶</a></h2> +<section id="method-ip-lists"> +<span id="id1"></span><h3><a class="toc-backref" href="#id5" role="doc-backlink">Method <code class="docutils literal notranslate"><span class="pre">ip_lists</span></code></a><a class="headerlink" href="#method-ip-lists" title="Link to this heading">¶</a></h3> +<p>The <code class="docutils literal notranslate"><span class="pre">ip_lists</span></code> method implements IP <a class="reference internal" href="#searx.botdetection.ip_lists.block_ip" title="searx.botdetection.ip_lists.block_ip"><code class="xref py py-obj docutils literal notranslate"><span class="pre">block-</span></code></a> and +<a class="reference internal" href="#searx.botdetection.ip_lists.pass_ip" title="searx.botdetection.ip_lists.pass_ip"><code class="xref py py-obj docutils literal notranslate"><span class="pre">pass-lists</span></code></a>.</p> +<div class="highlight-toml notranslate"><div class="highlight"><pre><span></span><span class="k">[botdetection.ip_lists]</span> + +<span class="n">pass_ip</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">[</span> +<span class="w"> </span><span class="s1">'167.235.158.251'</span><span class="p">,</span><span class="w"> </span><span class="c1"># IPv4 of check.searx.space</span> +<span class="w"> </span><span class="s1">'192.168.0.0/16'</span><span class="p">,</span><span class="w"> </span><span class="c1"># IPv4 private network</span> +<span class="w"> </span><span class="s1">'fe80::/10'</span><span class="w"> </span><span class="c1"># IPv6 linklocal</span> +<span class="p">]</span> +<span class="n">block_ip</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">[</span> +<span class="w"> </span><span class="s1">'93.184.216.34'</span><span class="p">,</span><span class="w"> </span><span class="c1"># IPv4 of example.org</span> +<span class="w"> </span><span class="s1">'257.1.1.1'</span><span class="p">,</span><span class="w"> </span><span class="c1"># invalid IP --> will be ignored, logged in ERROR class</span> +<span class="p">]</span> +</pre></div> +</div> +</section> +<dl class="py function"> +<dt class="sig sig-object py" id="searx.botdetection.ip_lists.block_ip"> +<span class="sig-prename descclassname"><span class="pre">searx.botdetection.ip_lists.</span></span><span class="sig-name descname"><span class="pre">block_ip</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">real_ip</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">IPv4Address</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">IPv6Address</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">cfg</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#searx.botdetection.config.Config" title="searx.botdetection.config.Config"><span class="pre">config.Config</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><span class="pre">bool</span></a><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><span class="pre">str</span></a><span class="p"><span class="pre">]</span></span></span></span><a class="reference internal" href="../_modules/searx/botdetection/ip_lists.html#block_ip"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#searx.botdetection.ip_lists.block_ip" title="Link to this definition">¶</a></dt> +<dd><p>Checks if the IP on the subnet is in one of the members of the +<code class="docutils literal notranslate"><span class="pre">botdetection.ip_lists.block_ip</span></code> list.</p> +</dd></dl> + +<dl class="py function"> +<dt class="sig sig-object py" id="searx.botdetection.ip_lists.pass_ip"> +<span class="sig-prename descclassname"><span class="pre">searx.botdetection.ip_lists.</span></span><span class="sig-name descname"><span class="pre">pass_ip</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">real_ip</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">IPv4Address</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">IPv6Address</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">cfg</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#searx.botdetection.config.Config" title="searx.botdetection.config.Config"><span class="pre">config.Config</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><span class="pre">bool</span></a><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><span class="pre">str</span></a><span class="p"><span class="pre">]</span></span></span></span><a class="reference internal" href="../_modules/searx/botdetection/ip_lists.html#pass_ip"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#searx.botdetection.ip_lists.pass_ip" title="Link to this definition">¶</a></dt> +<dd><p>Checks if the IP on the subnet is in one of the members of the +<code class="docutils literal notranslate"><span class="pre">botdetection.ip_lists.pass_ip</span></code> list.</p> +</dd></dl> + +<dl class="py data"> +<dt class="sig sig-object py" id="searx.botdetection.ip_lists.SEARXNG_ORG"> +<span class="sig-prename descclassname"><span class="pre">searx.botdetection.ip_lists.</span></span><span class="sig-name descname"><span class="pre">SEARXNG_ORG</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">['167.235.158.251',</span> <span class="pre">'2a01:04f8:1c1c:8fc2::/64']</span></em><a class="headerlink" href="#searx.botdetection.ip_lists.SEARXNG_ORG" title="Link to this definition">¶</a></dt> +<dd><p>Passlist of IPs from the SearXNG organization, e.g. <cite>check.searx.space</cite>.</p> +</dd></dl> + +</section> +<section id="module-searx.botdetection.ip_limit"> +<span id="rate-limit"></span><span id="botdetection-rate-limit"></span><h2><a class="toc-backref" href="#id6" role="doc-backlink">Rate limit</a><a class="headerlink" href="#module-searx.botdetection.ip_limit" title="Link to this heading">¶</a></h2> +<section id="method-ip-limit"> +<span id="botdetection-ip-limit"></span><h3><a class="toc-backref" href="#id7" role="doc-backlink">Method <code class="docutils literal notranslate"><span class="pre">ip_limit</span></code></a><a class="headerlink" href="#method-ip-limit" title="Link to this heading">¶</a></h3> +<p>The <code class="docutils literal notranslate"><span class="pre">ip_limit</span></code> method counts request from an IP in <em>sliding windows</em>. If +there are to many requests in a sliding window, the request is evaluated as a +bot request. This method requires a redis DB and needs a HTTP <a class="reference external" href="https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For">X-Forwarded-For</a> +header. To take privacy only the hash value of an IP is stored in the redis DB +and at least for a maximum of 10 minutes.</p> +<p>The <a class="reference internal" href="#module-searx.botdetection.link_token" title="searx.botdetection.link_token"><code class="xref py py-obj docutils literal notranslate"><span class="pre">link_token</span></code></a> method can be used to investigate whether a request is +<em>suspicious</em>. To activate the <a class="reference internal" href="#module-searx.botdetection.link_token" title="searx.botdetection.link_token"><code class="xref py py-obj docutils literal notranslate"><span class="pre">link_token</span></code></a> method in the +<a class="reference internal" href="#module-searx.botdetection.ip_limit" title="searx.botdetection.ip_limit"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ip_limit</span></code></a> method add the following configuration:</p> +<div class="highlight-toml notranslate"><div class="highlight"><pre><span></span><span class="k">[botdetection.ip_limit]</span> +<span class="n">link_token</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="kc">true</span> +</pre></div> +</div> +<p>If the <a class="reference internal" href="#module-searx.botdetection.link_token" title="searx.botdetection.link_token"><code class="xref py py-obj docutils literal notranslate"><span class="pre">link_token</span></code></a> method is activated and a request is <em>suspicious</em> +the request rates are reduced:</p> +<ul class="simple"> +<li><p><a class="reference internal" href="#searx.botdetection.ip_limit.BURST_MAX" title="searx.botdetection.ip_limit.BURST_MAX"><code class="xref py py-obj docutils literal notranslate"><span class="pre">BURST_MAX</span></code></a> -> <a class="reference internal" href="#searx.botdetection.ip_limit.BURST_MAX_SUSPICIOUS" title="searx.botdetection.ip_limit.BURST_MAX_SUSPICIOUS"><code class="xref py py-obj docutils literal notranslate"><span class="pre">BURST_MAX_SUSPICIOUS</span></code></a></p></li> +<li><p><a class="reference internal" href="#searx.botdetection.ip_limit.LONG_MAX" title="searx.botdetection.ip_limit.LONG_MAX"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LONG_MAX</span></code></a> -> <a class="reference internal" href="#searx.botdetection.ip_limit.LONG_MAX_SUSPICIOUS" title="searx.botdetection.ip_limit.LONG_MAX_SUSPICIOUS"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LONG_MAX_SUSPICIOUS</span></code></a></p></li> +</ul> +<p>To intercept bots that get their IPs from a range of IPs, there is a +<a class="reference internal" href="#searx.botdetection.ip_limit.SUSPICIOUS_IP_WINDOW" title="searx.botdetection.ip_limit.SUSPICIOUS_IP_WINDOW"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SUSPICIOUS_IP_WINDOW</span></code></a>. In this window the suspicious IPs are stored +for a longer time. IPs stored in this sliding window have a maximum of +<a class="reference internal" href="#searx.botdetection.ip_limit.SUSPICIOUS_IP_MAX" title="searx.botdetection.ip_limit.SUSPICIOUS_IP_MAX"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SUSPICIOUS_IP_MAX</span></code></a> accesses before they are blocked. As soon as the IP +makes a request that is not suspicious, the sliding window for this IP is +dropped.</p> +</section> +<dl class="py data"> +<dt class="sig sig-object py" id="searx.botdetection.ip_limit.API_MAX"> +<span class="sig-prename descclassname"><span class="pre">searx.botdetection.ip_limit.</span></span><span class="sig-name descname"><span class="pre">API_MAX</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">4</span></em><a class="headerlink" href="#searx.botdetection.ip_limit.API_MAX" title="Link to this definition">¶</a></dt> +<dd><p>Maximum requests from one IP in the <a class="reference internal" href="#searx.botdetection.ip_limit.API_WONDOW" title="searx.botdetection.ip_limit.API_WONDOW"><code class="xref py py-obj docutils literal notranslate"><span class="pre">API_WONDOW</span></code></a></p> +</dd></dl> + +<dl class="py data"> +<dt class="sig sig-object py" id="searx.botdetection.ip_limit.API_WONDOW"> +<span class="sig-prename descclassname"><span class="pre">searx.botdetection.ip_limit.</span></span><span class="sig-name descname"><span class="pre">API_WONDOW</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">3600</span></em><a class="headerlink" href="#searx.botdetection.ip_limit.API_WONDOW" title="Link to this definition">¶</a></dt> +<dd><p>Time (sec) before sliding window for API requests (format != html) expires.</p> +</dd></dl> + +<dl class="py data"> +<dt class="sig sig-object py" id="searx.botdetection.ip_limit.BURST_MAX"> +<span class="sig-prename descclassname"><span class="pre">searx.botdetection.ip_limit.</span></span><span class="sig-name descname"><span class="pre">BURST_MAX</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">15</span></em><a class="headerlink" href="#searx.botdetection.ip_limit.BURST_MAX" title="Link to this definition">¶</a></dt> +<dd><p>Maximum requests from one IP in the <a class="reference internal" href="#searx.botdetection.ip_limit.BURST_WINDOW" title="searx.botdetection.ip_limit.BURST_WINDOW"><code class="xref py py-obj docutils literal notranslate"><span class="pre">BURST_WINDOW</span></code></a></p> +</dd></dl> + +<dl class="py data"> +<dt class="sig sig-object py" id="searx.botdetection.ip_limit.BURST_MAX_SUSPICIOUS"> +<span class="sig-prename descclassname"><span class="pre">searx.botdetection.ip_limit.</span></span><span class="sig-name descname"><span class="pre">BURST_MAX_SUSPICIOUS</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">2</span></em><a class="headerlink" href="#searx.botdetection.ip_limit.BURST_MAX_SUSPICIOUS" title="Link to this definition">¶</a></dt> +<dd><p>Maximum of suspicious requests from one IP in the <a class="reference internal" href="#searx.botdetection.ip_limit.BURST_WINDOW" title="searx.botdetection.ip_limit.BURST_WINDOW"><code class="xref py py-obj docutils literal notranslate"><span class="pre">BURST_WINDOW</span></code></a></p> +</dd></dl> + +<dl class="py data"> +<dt class="sig sig-object py" id="searx.botdetection.ip_limit.BURST_WINDOW"> +<span class="sig-prename descclassname"><span class="pre">searx.botdetection.ip_limit.</span></span><span class="sig-name descname"><span class="pre">BURST_WINDOW</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">20</span></em><a class="headerlink" href="#searx.botdetection.ip_limit.BURST_WINDOW" title="Link to this definition">¶</a></dt> +<dd><p>Time (sec) before sliding window for <em>burst</em> requests expires.</p> +</dd></dl> + +<dl class="py data"> +<dt class="sig sig-object py" id="searx.botdetection.ip_limit.LONG_MAX"> +<span class="sig-prename descclassname"><span class="pre">searx.botdetection.ip_limit.</span></span><span class="sig-name descname"><span class="pre">LONG_MAX</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">150</span></em><a class="headerlink" href="#searx.botdetection.ip_limit.LONG_MAX" title="Link to this definition">¶</a></dt> +<dd><p>Maximum requests from one IP in the <a class="reference internal" href="#searx.botdetection.ip_limit.LONG_WINDOW" title="searx.botdetection.ip_limit.LONG_WINDOW"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LONG_WINDOW</span></code></a></p> +</dd></dl> + +<dl class="py data"> +<dt class="sig sig-object py" id="searx.botdetection.ip_limit.LONG_MAX_SUSPICIOUS"> +<span class="sig-prename descclassname"><span class="pre">searx.botdetection.ip_limit.</span></span><span class="sig-name descname"><span class="pre">LONG_MAX_SUSPICIOUS</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">10</span></em><a class="headerlink" href="#searx.botdetection.ip_limit.LONG_MAX_SUSPICIOUS" title="Link to this definition">¶</a></dt> +<dd><p>Maximum suspicious requests from one IP in the <a class="reference internal" href="#searx.botdetection.ip_limit.LONG_WINDOW" title="searx.botdetection.ip_limit.LONG_WINDOW"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LONG_WINDOW</span></code></a></p> +</dd></dl> + +<dl class="py data"> +<dt class="sig sig-object py" id="searx.botdetection.ip_limit.LONG_WINDOW"> +<span class="sig-prename descclassname"><span class="pre">searx.botdetection.ip_limit.</span></span><span class="sig-name descname"><span class="pre">LONG_WINDOW</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">600</span></em><a class="headerlink" href="#searx.botdetection.ip_limit.LONG_WINDOW" title="Link to this definition">¶</a></dt> +<dd><p>Time (sec) before the longer sliding window expires.</p> +</dd></dl> + +<dl class="py data"> +<dt class="sig sig-object py" id="searx.botdetection.ip_limit.SUSPICIOUS_IP_MAX"> +<span class="sig-prename descclassname"><span class="pre">searx.botdetection.ip_limit.</span></span><span class="sig-name descname"><span class="pre">SUSPICIOUS_IP_MAX</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">3</span></em><a class="headerlink" href="#searx.botdetection.ip_limit.SUSPICIOUS_IP_MAX" title="Link to this definition">¶</a></dt> +<dd><p>Maximum requests from one suspicious IP in the <a class="reference internal" href="#searx.botdetection.ip_limit.SUSPICIOUS_IP_WINDOW" title="searx.botdetection.ip_limit.SUSPICIOUS_IP_WINDOW"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SUSPICIOUS_IP_WINDOW</span></code></a>.</p> +</dd></dl> + +<dl class="py data"> +<dt class="sig sig-object py" id="searx.botdetection.ip_limit.SUSPICIOUS_IP_WINDOW"> +<span class="sig-prename descclassname"><span class="pre">searx.botdetection.ip_limit.</span></span><span class="sig-name descname"><span class="pre">SUSPICIOUS_IP_WINDOW</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">2592000</span></em><a class="headerlink" href="#searx.botdetection.ip_limit.SUSPICIOUS_IP_WINDOW" title="Link to this definition">¶</a></dt> +<dd><p>Time (sec) before sliding window for one suspicious IP expires.</p> +</dd></dl> + +<section id="method-link-token"> +<span id="module-searx.botdetection.link_token"></span><h3><a class="toc-backref" href="#id8" role="doc-backlink">Method <code class="docutils literal notranslate"><span class="pre">link_token</span></code></a><a class="headerlink" href="#method-link-token" title="Link to this heading">¶</a></h3> +<p>The <code class="docutils literal notranslate"><span class="pre">link_token</span></code> method evaluates a request as <a class="reference internal" href="#searx.botdetection.link_token.is_suspicious" title="searx.botdetection.link_token.is_suspicious"><code class="xref py py-obj docutils literal notranslate"><span class="pre">suspicious</span></code></a> if the URL <code class="docutils literal notranslate"><span class="pre">/client<token>.css</span></code> is not requested by the +client. By adding a random component (the token) in the URL, a bot can not send +a ping by request a static URL.</p> +<div class="admonition note"> +<p class="admonition-title">Note</p> +<p>This method requires a redis DB and needs a HTTP <a class="reference external" href="https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For">X-Forwarded-For</a> header.</p> +</div> +<p>To get in use of this method a flask URL route needs to be added:</p> +<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="nd">@app</span><span class="o">.</span><span class="n">route</span><span class="p">(</span><span class="s1">'/client<token>.css'</span><span class="p">,</span> <span class="n">methods</span><span class="o">=</span><span class="p">[</span><span class="s1">'GET'</span><span class="p">,</span> <span class="s1">'POST'</span><span class="p">])</span> +<span class="k">def</span> <span class="nf">client_token</span><span class="p">(</span><span class="n">token</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span> + <span class="n">link_token</span><span class="o">.</span><span class="n">ping</span><span class="p">(</span><span class="n">request</span><span class="p">,</span> <span class="n">token</span><span class="p">)</span> + <span class="k">return</span> <span class="n">Response</span><span class="p">(</span><span class="s1">''</span><span class="p">,</span> <span class="n">mimetype</span><span class="o">=</span><span class="s1">'text/css'</span><span class="p">)</span> +</pre></div> +</div> +<p>And in the HTML template from flask a stylesheet link is needed (the value of +<code class="docutils literal notranslate"><span class="pre">link_token</span></code> comes from <a class="reference internal" href="#searx.botdetection.link_token.get_token" title="searx.botdetection.link_token.get_token"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_token</span></code></a>):</p> +<div class="highlight-html notranslate"><div class="highlight"><pre><span></span><span class="p"><</span><span class="nt">link</span> <span class="na">rel</span><span class="o">=</span><span class="s">"stylesheet"</span> + <span class="na">href</span><span class="o">=</span><span class="s">"{{ url_for('client_token', token=link_token) }}"</span> + <span class="na">type</span><span class="o">=</span><span class="s">"text/css"</span> <span class="p">/></span> +</pre></div> +</div> +</section> +<dl class="py function"> +<dt class="sig sig-object py" id="searx.botdetection.link_token.get_ping_key"> +<span class="sig-prename descclassname"><span class="pre">searx.botdetection.link_token.</span></span><span class="sig-name descname"><span class="pre">get_ping_key</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">network</span></span><sp |