diff options
author | return42 <markus.heiser@darmarIT.de> | 2025-01-10 07:15:40 +0000 |
---|---|---|
committer | return42 <markus.heiser@darmarIT.de> | 2025-01-10 07:15:40 +0000 |
commit | bf98d6b3f17d56ff760d9da5e5b4858ea104ffa9 (patch) | |
tree | 67261a258d820bf1272e33547b975e20acdfe61e /_modules | |
download | searxng-gh-pages.tar.gz searxng-gh-pages.zip |
[doc] build from commit 94a0b415ef587e013df9e7350667b752a3822e90gh-pages
Diffstat (limited to '_modules')
66 files changed, 22725 insertions, 0 deletions
diff --git a/_modules/index.html b/_modules/index.html new file mode 100644 index 000000000..571f03cb9 --- /dev/null +++ b/_modules/index.html @@ -0,0 +1,162 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>Overview: module code — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../_static/searxng.css?v=52e4ff28" /> + <script src="../_static/documentation_options.js?v=532e341d"></script> + <script src="../_static/doctools.js?v=9a2dae69"></script> + <script src="../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../genindex.html" /> + <link rel="search" title="Search" href="../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-this"><a href="">Overview: module code</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>All modules for which code is available</h1> +<ul><li><a href="searx/autocomplete.html">searx.autocomplete</a></li> +<li><a href="searx/babel_extract.html">searx.babel_extract</a></li> +<li><a href="searx/botdetection/_helpers.html">searx.botdetection._helpers</a></li> +<li><a href="searx/botdetection/config.html">searx.botdetection.config</a></li> +<li><a href="searx/botdetection/ip_lists.html">searx.botdetection.ip_lists</a></li> +<li><a href="searx/botdetection/link_token.html">searx.botdetection.link_token</a></li> +<li><a href="searx/enginelib.html">searx.enginelib</a></li> +<ul><li><a href="searx/enginelib/traits.html">searx.enginelib.traits</a></li> +</ul><li><a href="searx/engines.html">searx.engines</a></li> +<ul><li><a href="searx/engines/annas_archive.html">searx.engines.annas_archive</a></li> +<li><a href="searx/engines/archlinux.html">searx.engines.archlinux</a></li> +<li><a href="searx/engines/bing.html">searx.engines.bing</a></li> +<li><a href="searx/engines/bing_images.html">searx.engines.bing_images</a></li> +<li><a href="searx/engines/bing_news.html">searx.engines.bing_news</a></li> +<li><a href="searx/engines/bing_videos.html">searx.engines.bing_videos</a></li> +<li><a href="searx/engines/brave.html">searx.engines.brave</a></li> +<li><a href="searx/engines/command.html">searx.engines.command</a></li> +<li><a href="searx/engines/dailymotion.html">searx.engines.dailymotion</a></li> +<li><a href="searx/engines/demo_offline.html">searx.engines.demo_offline</a></li> +<li><a href="searx/engines/demo_online.html">searx.engines.demo_online</a></li> +<li><a href="searx/engines/duckduckgo.html">searx.engines.duckduckgo</a></li> +<li><a href="searx/engines/duckduckgo_definitions.html">searx.engines.duckduckgo_definitions</a></li> +<li><a href="searx/engines/google.html">searx.engines.google</a></li> +<li><a href="searx/engines/google_images.html">searx.engines.google_images</a></li> +<li><a href="searx/engines/google_news.html">searx.engines.google_news</a></li> +<li><a href="searx/engines/google_scholar.html">searx.engines.google_scholar</a></li> +<li><a href="searx/engines/google_videos.html">searx.engines.google_videos</a></li> +<li><a href="searx/engines/mrs.html">searx.engines.mrs</a></li> +<li><a href="searx/engines/mullvad_leta.html">searx.engines.mullvad_leta</a></li> +<li><a href="searx/engines/odysee.html">searx.engines.odysee</a></li> +<li><a href="searx/engines/peertube.html">searx.engines.peertube</a></li> +<li><a href="searx/engines/qwant.html">searx.engines.qwant</a></li> +<li><a href="searx/engines/radio_browser.html">searx.engines.radio_browser</a></li> +<li><a href="searx/engines/sepiasearch.html">searx.engines.sepiasearch</a></li> +<li><a href="searx/engines/sqlite.html">searx.engines.sqlite</a></li> +<li><a href="searx/engines/startpage.html">searx.engines.startpage</a></li> +<li><a href="searx/engines/tineye.html">searx.engines.tineye</a></li> +<li><a href="searx/engines/torznab.html">searx.engines.torznab</a></li> +<li><a href="searx/engines/voidlinux.html">searx.engines.voidlinux</a></li> +<li><a href="searx/engines/wikidata.html">searx.engines.wikidata</a></li> +<li><a href="searx/engines/wikipedia.html">searx.engines.wikipedia</a></li> +<li><a href="searx/engines/xpath.html">searx.engines.xpath</a></li> +<li><a href="searx/engines/yahoo.html">searx.engines.yahoo</a></li> +<li><a href="searx/engines/zlibrary.html">searx.engines.zlibrary</a></li> +</ul><li><a href="searx/exceptions.html">searx.exceptions</a></li> +<li><a href="searx/favicons/cache.html">searx.favicons.cache</a></li> +<li><a href="searx/favicons/config.html">searx.favicons.config</a></li> +<li><a href="searx/favicons/proxy.html">searx.favicons.proxy</a></li> +<li><a href="searx/favicons/resolvers.html">searx.favicons.resolvers</a></li> +<li><a href="searx/infopage.html">searx.infopage</a></li> +<li><a href="searx/limiter.html">searx.limiter</a></li> +<li><a href="searx/locales.html">searx.locales</a></li> +<li><a href="searx/plugins/unit_converter.html">searx.plugins.unit_converter</a></li> +<li><a href="searx/redislib.html">searx.redislib</a></li> +<li><a href="searx/search.html">searx.search</a></li> +<ul><li><a href="searx/search/models.html">searx.search.models</a></li> +<li><a href="searx/search/processors/abstract.html">searx.search.processors.abstract</a></li> +<li><a href="searx/search/processors/offline.html">searx.search.processors.offline</a></li> +<li><a href="searx/search/processors/online.html">searx.search.processors.online</a></li> +<li><a href="searx/search/processors/online_currency.html">searx.search.processors.online_currency</a></li> +<li><a href="searx/search/processors/online_dictionary.html">searx.search.processors.online_dictionary</a></li> +<li><a href="searx/search/processors/online_url_search.html">searx.search.processors.online_url_search</a></li> +</ul><li><a href="searx/settings_loader.html">searx.settings_loader</a></li> +<li><a href="searx/sqlitedb.html">searx.sqlitedb</a></li> +<li><a href="searx/utils.html">searx.utils</a></li> +</ul> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../index.html"> + <img class="logo" src="../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../index.html">Overview</a> + + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/autocomplete.html b/_modules/searx/autocomplete.html new file mode 100644 index 000000000..d028f4224 --- /dev/null +++ b/_modules/searx/autocomplete.html @@ -0,0 +1,373 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.autocomplete — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../_static/searxng.css?v=52e4ff28" /> + <script src="../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../genindex.html" /> + <link rel="search" title="Search" href="../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../index.html" accesskey="U">Module code</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.autocomplete</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.autocomplete</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""This module implements functions needed for the autocompleter.</span> + +<span class="sd">"""</span> +<span class="c1"># pylint: disable=use-dict-literal</span> + +<span class="kn">import</span> <span class="nn">json</span> +<span class="kn">import</span> <span class="nn">html</span> +<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span><span class="p">,</span> <span class="n">quote_plus</span> + +<span class="kn">import</span> <span class="nn">lxml</span> +<span class="kn">from</span> <span class="nn">httpx</span> <span class="kn">import</span> <span class="n">HTTPError</span> + +<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">settings</span> +<span class="kn">from</span> <span class="nn">searx.engines</span> <span class="kn">import</span> <span class="p">(</span> + <span class="n">engines</span><span class="p">,</span> + <span class="n">google</span><span class="p">,</span> +<span class="p">)</span> +<span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">get</span> <span class="k">as</span> <span class="n">http_get</span><span class="p">,</span> <span class="n">post</span> <span class="k">as</span> <span class="n">http_post</span> +<span class="kn">from</span> <span class="nn">searx.exceptions</span> <span class="kn">import</span> <span class="n">SearxEngineResponseException</span> + + +<span class="k">def</span> <span class="nf">update_kwargs</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">):</span> + <span class="k">if</span> <span class="s1">'timeout'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">kwargs</span><span class="p">:</span> + <span class="n">kwargs</span><span class="p">[</span><span class="s1">'timeout'</span><span class="p">]</span> <span class="o">=</span> <span class="n">settings</span><span class="p">[</span><span class="s1">'outgoing'</span><span class="p">][</span><span class="s1">'request_timeout'</span><span class="p">]</span> + <span class="n">kwargs</span><span class="p">[</span><span class="s1">'raise_for_httperror'</span><span class="p">]</span> <span class="o">=</span> <span class="kc">True</span> + + +<span class="k">def</span> <span class="nf">get</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span> + <span class="n">update_kwargs</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> + <span class="k">return</span> <span class="n">http_get</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> + + +<span class="k">def</span> <span class="nf">post</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span> + <span class="n">update_kwargs</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> + <span class="k">return</span> <span class="n">http_post</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> + + +<span class="k">def</span> <span class="nf">brave</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">_lang</span><span class="p">):</span> + <span class="c1"># brave search autocompleter</span> + <span class="n">url</span> <span class="o">=</span> <span class="s1">'https://search.brave.com/api/suggest?'</span> + <span class="n">url</span> <span class="o">+=</span> <span class="n">urlencode</span><span class="p">({</span><span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">})</span> + <span class="n">country</span> <span class="o">=</span> <span class="s1">'all'</span> + <span class="c1"># if lang in _brave:</span> + <span class="c1"># country = lang</span> + <span class="n">kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'cookies'</span><span class="p">:</span> <span class="p">{</span><span class="s1">'country'</span><span class="p">:</span> <span class="n">country</span><span class="p">}}</span> + <span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> + + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="k">if</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> + <span class="n">data</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">json</span><span class="p">()</span> + <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">data</span><span class="p">[</span><span class="mi">1</span><span class="p">]:</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">item</span><span class="p">)</span> + <span class="k">return</span> <span class="n">results</span> + + +<span class="k">def</span> <span class="nf">dbpedia</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">_lang</span><span class="p">):</span> + <span class="c1"># dbpedia autocompleter, no HTTPS</span> + <span class="n">autocomplete_url</span> <span class="o">=</span> <span class="s1">'https://lookup.dbpedia.org/api/search.asmx/KeywordSearch?'</span> + + <span class="n">response</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="n">autocomplete_url</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">(</span><span class="nb">dict</span><span class="p">(</span><span class="n">QueryString</span><span class="o">=</span><span class="n">query</span><span class="p">)))</span> + + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="k">if</span> <span class="n">response</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> + <span class="n">dom</span> <span class="o">=</span> <span class="n">lxml</span><span class="o">.</span><span class="n">etree</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">response</span><span class="o">.</span><span class="n">content</span><span class="p">)</span> + <span class="n">results</span> <span class="o">=</span> <span class="n">dom</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'//Result/Label//text()'</span><span class="p">)</span> + + <span class="k">return</span> <span class="n">results</span> + + +<span class="k">def</span> <span class="nf">duckduckgo</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">sxng_locale</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Autocomplete from DuckDuckGo. Supports DuckDuckGo's languages"""</span> + + <span class="n">traits</span> <span class="o">=</span> <span class="n">engines</span><span class="p">[</span><span class="s1">'duckduckgo'</span><span class="p">]</span><span class="o">.</span><span class="n">traits</span> + <span class="n">args</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span> + <span class="s1">'kl'</span><span class="p">:</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_region</span><span class="p">(</span><span class="n">sxng_locale</span><span class="p">,</span> <span class="n">traits</span><span class="o">.</span><span class="n">all_locale</span><span class="p">),</span> + <span class="p">}</span> + + <span class="n">url</span> <span class="o">=</span> <span class="s1">'https://duckduckgo.com/ac/?type=list&'</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">(</span><span class="n">args</span><span class="p">)</span> + <span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="n">url</span><span class="p">)</span> + + <span class="n">ret_val</span> <span class="o">=</span> <span class="p">[]</span> + <span class="k">if</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> + <span class="n">j</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">json</span><span class="p">()</span> + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">j</span><span class="p">)</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span> + <span class="n">ret_val</span> <span class="o">=</span> <span class="n">j</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> + <span class="k">return</span> <span class="n">ret_val</span> + + +<div class="viewcode-block" id="google_complete"> +<a class="viewcode-back" href="../../dev/engines/online/google.html#searx.autocomplete.google_complete">[docs]</a> +<span class="k">def</span> <span class="nf">google_complete</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">sxng_locale</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Autocomplete from Google. Supports Google's languages and subdomains</span> +<span class="sd"> (:py:obj:`searx.engines.google.get_google_info`) by using the async REST</span> +<span class="sd"> API::</span> + +<span class="sd"> https://{subdomain}/complete/search?{args}</span> + +<span class="sd"> """</span> + + <span class="n">google_info</span> <span class="o">=</span> <span class="n">google</span><span class="o">.</span><span class="n">get_google_info</span><span class="p">({</span><span class="s1">'searxng_locale'</span><span class="p">:</span> <span class="n">sxng_locale</span><span class="p">},</span> <span class="n">engines</span><span class="p">[</span><span class="s1">'google'</span><span class="p">]</span><span class="o">.</span><span class="n">traits</span><span class="p">)</span> + + <span class="n">url</span> <span class="o">=</span> <span class="s1">'https://</span><span class="si">{subdomain}</span><span class="s1">/complete/search?</span><span class="si">{args}</span><span class="s1">'</span> + <span class="n">args</span> <span class="o">=</span> <span class="n">urlencode</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span> + <span class="s1">'client'</span><span class="p">:</span> <span class="s1">'gws-wiz'</span><span class="p">,</span> + <span class="s1">'hl'</span><span class="p">:</span> <span class="n">google_info</span><span class="p">[</span><span class="s1">'params'</span><span class="p">][</span><span class="s1">'hl'</span><span class="p">],</span> + <span class="p">}</span> + <span class="p">)</span> + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="n">url</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">subdomain</span><span class="o">=</span><span class="n">google_info</span><span class="p">[</span><span class="s1">'subdomain'</span><span class="p">],</span> <span class="n">args</span><span class="o">=</span><span class="n">args</span><span class="p">))</span> + <span class="k">if</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> + <span class="n">json_txt</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">[</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'['</span><span class="p">)</span> <span class="p">:</span> <span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">']'</span><span class="p">,</span> <span class="o">-</span><span class="mi">3</span><span class="p">)</span> <span class="o">+</span> <span class="mi">1</span><span class="p">]</span> + <span class="n">data</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">json_txt</span><span class="p">)</span> + <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">data</span><span class="p">[</span><span class="mi">0</span><span class="p">]:</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">lxml</span><span class="o">.</span><span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">item</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span><span class="o">.</span><span class="n">text_content</span><span class="p">())</span> + <span class="k">return</span> <span class="n">results</span></div> + + + +<div class="viewcode-block" id="mwmbl"> +<a class="viewcode-back" href="../../dev/engines/online/mwmbl.html#searx.autocomplete.mwmbl">[docs]</a> +<span class="k">def</span> <span class="nf">mwmbl</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">_lang</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Autocomplete from Mwmbl_."""</span> + + <span class="c1"># mwmbl autocompleter</span> + <span class="n">url</span> <span class="o">=</span> <span class="s1">'https://api.mwmbl.org/search/complete?</span><span class="si">{query}</span><span class="s1">'</span> + + <span class="n">results</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="n">url</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">query</span><span class="o">=</span><span class="n">urlencode</span><span class="p">({</span><span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">})))</span><span class="o">.</span><span class="n">json</span><span class="p">()[</span><span class="mi">1</span><span class="p">]</span> + + <span class="c1"># results starting with `go:` are direct urls and not useful for auto completion</span> + <span class="k">return</span> <span class="p">[</span><span class="n">result</span> <span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">results</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">result</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s2">"go: "</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">result</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s2">"search: "</span><span class="p">)]</span></div> + + + +<span class="k">def</span> <span class="nf">seznam</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">_lang</span><span class="p">):</span> + <span class="c1"># seznam search autocompleter</span> + <span class="n">url</span> <span class="o">=</span> <span class="s1">'https://suggest.seznam.cz/fulltext/cs?</span><span class="si">{query}</span><span class="s1">'</span> + + <span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span> + <span class="n">url</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> + <span class="n">query</span><span class="o">=</span><span class="n">urlencode</span><span class="p">(</span> + <span class="p">{</span><span class="s1">'phrase'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span> <span class="s1">'cursorPosition'</span><span class="p">:</span> <span class="nb">len</span><span class="p">(</span><span class="n">query</span><span class="p">),</span> <span class="s1">'format'</span><span class="p">:</span> <span class="s1">'json-2'</span><span class="p">,</span> <span class="s1">'highlight'</span><span class="p">:</span> <span class="s1">'1'</span><span class="p">,</span> <span class="s1">'count'</span><span class="p">:</span> <span class="s1">'6'</span><span class="p">}</span> + <span class="p">)</span> + <span class="p">)</span> + <span class="p">)</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> + <span class="k">return</span> <span class="p">[]</span> + + <span class="n">data</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">json</span><span class="p">()</span> + <span class="k">return</span> <span class="p">[</span> + <span class="s1">''</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="n">part</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'text'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span> <span class="k">for</span> <span class="n">part</span> <span class="ow">in</span> <span class="n">item</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'text'</span><span class="p">,</span> <span class="p">[])])</span> + <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'result'</span><span class="p">,</span> <span class="p">[])</span> + <span class="k">if</span> <span class="n">item</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'itemType'</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> <span class="o">==</span> <span class="s1">'ItemType.TEXT'</span> + <span class="p">]</span> + + +<span class="k">def</span> <span class="nf">stract</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">_lang</span><span class="p">):</span> + <span class="c1"># stract autocompleter (beta)</span> + <span class="n">url</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">"https://stract.com/beta/api/autosuggest?q=</span><span class="si">{</span><span class="n">quote_plus</span><span class="p">(</span><span class="n">query</span><span class="p">)</span><span class="si">}</span><span class="s2">"</span> + + <span class="n">resp</span> <span class="o">=</span> <span class="n">post</span><span class="p">(</span><span class="n">url</span><span class="p">)</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> + <span class="k">return</span> <span class="p">[]</span> + + <span class="k">return</span> <span class="p">[</span><span class="n">html</span><span class="o">.</span><span class="n">unescape</span><span class="p">(</span><span class="n">suggestion</span><span class="p">[</span><span class="s1">'raw'</span><span class="p">])</span> <span class="k">for</span> <span class="n">suggestion</span> <span class="ow">in</span> <span class="n">resp</span><span class="o">.</span><span class="n">json</span><span class="p">()]</span> + + +<span class="k">def</span> <span class="nf">startpage</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">sxng_locale</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Autocomplete from Startpage. Supports Startpage's languages"""</span> + <span class="n">lui</span> <span class="o">=</span> <span class="n">engines</span><span class="p">[</span><span class="s1">'startpage'</span><span class="p">]</span><span class="o">.</span><span class="n">traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">sxng_locale</span><span class="p">,</span> <span class="s1">'english'</span><span class="p">)</span> + <span class="n">url</span> <span class="o">=</span> <span class="s1">'https://startpage.com/suggestions?</span><span class="si">{query}</span><span class="s1">'</span> + <span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="n">url</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">query</span><span class="o">=</span><span class="n">urlencode</span><span class="p">({</span><span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span> <span class="s1">'segment'</span><span class="p">:</span> <span class="s1">'startpage.udog'</span><span class="p">,</span> <span class="s1">'lui'</span><span class="p">:</span> <span class="n">lui</span><span class="p">})))</span> + <span class="n">data</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">json</span><span class="p">()</span> + <span class="k">return</span> <span class="p">[</span><span class="n">e</span><span class="p">[</span><span class="s1">'text'</span><span class="p">]</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'suggestions'</span><span class="p">,</span> <span class="p">[])</span> <span class="k">if</span> <span class="s1">'text'</span> <span class="ow">in</span> <span class="n">e</span><span class="p">]</span> + + +<span class="k">def</span> <span class="nf">swisscows</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">_lang</span><span class="p">):</span> + <span class="c1"># swisscows autocompleter</span> + <span class="n">url</span> <span class="o">=</span> <span class="s1">'https://swisscows.ch/api/suggest?</span><span class="si">{query}</span><span class="s1">&itemsCount=5'</span> + + <span class="n">resp</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">get</span><span class="p">(</span><span class="n">url</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">query</span><span class="o">=</span><span class="n">urlencode</span><span class="p">({</span><span class="s1">'query'</span><span class="p">:</span> <span class="n">query</span><span class="p">})))</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> + <span class="k">return</span> <span class="n">resp</span> + + +<span class="k">def</span> <span class="nf">qwant</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">sxng_locale</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Autocomplete from Qwant. Supports Qwant's regions."""</span> + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="n">locale</span> <span class="o">=</span> <span class="n">engines</span><span class="p">[</span><span class="s1">'qwant'</span><span class="p">]</span><span class="o">.</span><span class="n">traits</span><span class="o">.</span><span class="n">get_region</span><span class="p">(</span><span class="n">sxng_locale</span><span class="p">,</span> <span class="s1">'en_US'</span><span class="p">)</span> + <span class="n">url</span> <span class="o">=</span> <span class="s1">'https://api.qwant.com/v3/suggest?</span><span class="si">{query}</span><span class="s1">'</span> + <span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="n">url</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">query</span><span class="o">=</span><span class="n">urlencode</span><span class="p">({</span><span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span> <span class="s1">'locale'</span><span class="p">:</span> <span class="n">locale</span><span class="p">,</span> <span class="s1">'version'</span><span class="p">:</span> <span class="s1">'2'</span><span class="p">})))</span> + + <span class="k">if</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> + <span class="n">data</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">json</span><span class="p">()</span> + <span class="k">if</span> <span class="n">data</span><span class="p">[</span><span class="s1">'status'</span><span class="p">]</span> <span class="o">==</span> <span class="s1">'success'</span><span class="p">:</span> + <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">data</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'items'</span><span class="p">]:</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">item</span><span class="p">[</span><span class="s1">'value'</span><span class="p">])</span> + + <span class="k">return</span> <span class="n">results</span> + + +<span class="k">def</span> <span class="nf">wikipedia</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">sxng_locale</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Autocomplete from Wikipedia. Supports Wikipedia's languages (aka netloc)."""</span> + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">eng_traits</span> <span class="o">=</span> <span class="n">engines</span><span class="p">[</span><span class="s1">'wikipedia'</span><span class="p">]</span><span class="o">.</span><span class="n">traits</span> + <span class="n">wiki_lang</span> <span class="o">=</span> <span class="n">eng_traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">sxng_locale</span><span class="p">,</span> <span class="s1">'en'</span><span class="p">)</span> + <span class="n">wiki_netloc</span> <span class="o">=</span> <span class="n">eng_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'wiki_netloc'</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">wiki_lang</span><span class="p">,</span> <span class="s1">'en.wikipedia.org'</span><span class="p">)</span> + + <span class="n">url</span> <span class="o">=</span> <span class="s1">'https://</span><span class="si">{wiki_netloc}</span><span class="s1">/w/api.php?</span><span class="si">{args}</span><span class="s1">'</span> + <span class="n">args</span> <span class="o">=</span> <span class="n">urlencode</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'action'</span><span class="p">:</span> <span class="s1">'opensearch'</span><span class="p">,</span> + <span class="s1">'format'</span><span class="p">:</span> <span class="s1">'json'</span><span class="p">,</span> + <span class="s1">'formatversion'</span><span class="p">:</span> <span class="s1">'2'</span><span class="p">,</span> + <span class="s1">'search'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span> + <span class="s1">'namespace'</span><span class="p">:</span> <span class="s1">'0'</span><span class="p">,</span> + <span class="s1">'limit'</span><span class="p">:</span> <span class="s1">'10'</span><span class="p">,</span> + <span class="p">}</span> + <span class="p">)</span> + <span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="n">url</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">args</span><span class="o">=</span><span class="n">args</span><span class="p">,</span> <span class="n">wiki_netloc</span><span class="o">=</span><span class="n">wiki_netloc</span><span class="p">))</span> + <span class="k">if</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> + <span class="n">data</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">json</span><span class="p">()</span> + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="p">)</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span> + <span class="n">results</span> <span class="o">=</span> <span class="n">data</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> + + <span class="k">return</span> <span class="n">results</span> + + +<span class="k">def</span> <span class="nf">yandex</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">_lang</span><span class="p">):</span> + <span class="c1"># yandex autocompleter</span> + <span class="n">url</span> <span class="o">=</span> <span class="s2">"https://suggest.yandex.com/suggest-ff.cgi?</span><span class="si">{0}</span><span class="s2">"</span> + + <span class="n">resp</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">get</span><span class="p">(</span><span class="n">url</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">urlencode</span><span class="p">(</span><span class="nb">dict</span><span class="p">(</span><span class="n">part</span><span class="o">=</span><span class="n">query</span><span class="p">))))</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">resp</span><span class="p">)</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span> + <span class="k">return</span> <span class="n">resp</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> + <span class="k">return</span> <span class="p">[]</span> + + +<span class="n">backends</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'dbpedia'</span><span class="p">:</span> <span class="n">dbpedia</span><span class="p">,</span> + <span class="s1">'duckduckgo'</span><span class="p">:</span> <span class="n">duckduckgo</span><span class="p">,</span> + <span class="s1">'google'</span><span class="p">:</span> <span class="n">google_complete</span><span class="p">,</span> + <span class="s1">'mwmbl'</span><span class="p">:</span> <span class="n">mwmbl</span><span class="p">,</span> + <span class="s1">'seznam'</span><span class="p">:</span> <span class="n">seznam</span><span class="p">,</span> + <span class="s1">'startpage'</span><span class="p">:</span> <span class="n">startpage</span><span class="p">,</span> + <span class="s1">'stract'</span><span class="p">:</span> <span class="n">stract</span><span class="p">,</span> + <span class="s1">'swisscows'</span><span class="p">:</span> <span class="n">swisscows</span><span class="p">,</span> + <span class="s1">'qwant'</span><span class="p">:</span> <span class="n">qwant</span><span class="p">,</span> + <span class="s1">'wikipedia'</span><span class="p">:</span> <span class="n">wikipedia</span><span class="p">,</span> + <span class="s1">'brave'</span><span class="p">:</span> <span class="n">brave</span><span class="p">,</span> + <span class="s1">'yandex'</span><span class="p">:</span> <span class="n">yandex</span><span class="p">,</span> +<span class="p">}</span> + + +<span class="k">def</span> <span class="nf">search_autocomplete</span><span class="p">(</span><span class="n">backend_name</span><span class="p">,</span> <span class="n">query</span><span class="p">,</span> <span class="n">sxng_locale</span><span class="p">):</span> + <span class="n">backend</span> <span class="o">=</span> <span class="n">backends</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">backend_name</span><span class="p">)</span> + <span class="k">if</span> <span class="n">backend</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">return</span> <span class="p">[]</span> + <span class="k">try</span><span class="p">:</span> + <span class="k">return</span> <span class="n">backend</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">sxng_locale</span><span class="p">)</span> + <span class="k">except</span> <span class="p">(</span><span class="n">HTTPError</span><span class="p">,</span> <span class="n">SearxEngineResponseException</span><span class="p">):</span> + <span class="k">return</span> <span class="p">[]</span> +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../index.html"> + <img class="logo" src="../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../index.html">Overview</a> + <ul> + <li><a href="../index.html">Module code</a> + + + </ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/babel_extract.html b/_modules/searx/babel_extract.html new file mode 100644 index 000000000..bf8944a82 --- /dev/null +++ b/_modules/searx/babel_extract.html @@ -0,0 +1,157 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.babel_extract — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../_static/searxng.css?v=52e4ff28" /> + <script src="../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../genindex.html" /> + <link rel="search" title="Search" href="../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../index.html" accesskey="U">Module code</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.babel_extract</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.babel_extract</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""This module implements the :origin:`searxng_msg <babel.cfg>` extractor to</span> +<span class="sd">extract messages from:</span> + +<span class="sd">- :origin:`searx/searxng.msg`</span> + +<span class="sd">The ``searxng.msg`` files are selected by Babel_, see Babel's configuration in</span> +<span class="sd">:origin:`babel.cfg`::</span> + +<span class="sd"> searxng_msg = searx.babel_extract.extract</span> +<span class="sd"> ...</span> +<span class="sd"> [searxng_msg: **/searxng.msg]</span> + +<span class="sd">A ``searxng.msg`` file is a python file that is *executed* by the</span> +<span class="sd">:py:obj:`extract` function. Additional ``searxng.msg`` files can be added by:</span> + +<span class="sd">1. Adding a ``searxng.msg`` file in one of the SearXNG python packages and</span> +<span class="sd">2. implement a method in :py:obj:`extract` that yields messages from this file.</span> + +<span class="sd">.. _Babel: https://babel.pocoo.org/en/latest/index.html</span> + +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">os</span> <span class="kn">import</span> <span class="n">path</span> + +<span class="n">SEARXNG_MSG_FILE</span> <span class="o">=</span> <span class="s2">"searxng.msg"</span> +<span class="n">_MSG_FILES</span> <span class="o">=</span> <span class="p">[</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">path</span><span class="o">.</span><span class="n">dirname</span><span class="p">(</span><span class="vm">__file__</span><span class="p">),</span> <span class="n">SEARXNG_MSG_FILE</span><span class="p">)]</span> + + +<div class="viewcode-block" id="extract"> +<a class="viewcode-back" href="../../src/searx.babel_extract.html#searx.babel_extract.extract">[docs]</a> +<span class="k">def</span> <span class="nf">extract</span><span class="p">(</span> + <span class="c1"># pylint: disable=unused-argument</span> + <span class="n">fileobj</span><span class="p">,</span> + <span class="n">keywords</span><span class="p">,</span> + <span class="n">comment_tags</span><span class="p">,</span> + <span class="n">options</span><span class="p">,</span> +<span class="p">):</span> +<span class="w"> </span><span class="sd">"""Extract messages from ``searxng.msg`` files by a custom extractor_.</span> + +<span class="sd"> .. _extractor:</span> +<span class="sd"> https://babel.pocoo.org/en/latest/messages.html#writing-extraction-methods</span> +<span class="sd"> """</span> + <span class="k">if</span> <span class="n">fileobj</span><span class="o">.</span><span class="n">name</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">_MSG_FILES</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s2">"don't know how to extract messages from </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="n">fileobj</span><span class="o">.</span><span class="n">name</span><span class="p">)</span> + + <span class="n">namespace</span> <span class="o">=</span> <span class="p">{}</span> + <span class="n">exec</span><span class="p">(</span><span class="n">fileobj</span><span class="o">.</span><span class="n">read</span><span class="p">(),</span> <span class="p">{},</span> <span class="n">namespace</span><span class="p">)</span> <span class="c1"># pylint: disable=exec-used</span> + + <span class="k">for</span> <span class="n">name</span> <span class="ow">in</span> <span class="n">namespace</span><span class="p">[</span><span class="s1">'__all__'</span><span class="p">]:</span> + <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">namespace</span><span class="p">[</span><span class="n">name</span><span class="p">]</span><span class="o">.</span><span class="n">items</span><span class="p">():</span> + <span class="k">yield</span> <span class="mi">0</span><span class="p">,</span> <span class="s1">'_'</span><span class="p">,</span> <span class="n">v</span><span class="p">,</span> <span class="p">[</span><span class="s2">"</span><span class="si">%s</span><span class="s2">['</span><span class="si">%s</span><span class="s2">']"</span> <span class="o">%</span> <span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">k</span><span class="p">)]</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../index.html"> + <img class="logo" src="../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../index.html">Overview</a> + <ul> + <li><a href="../index.html">Module code</a> + + + </ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/botdetection/_helpers.html b/_modules/searx/botdetection/_helpers.html new file mode 100644 index 000000000..b5884e5a8 --- /dev/null +++ b/_modules/searx/botdetection/_helpers.html @@ -0,0 +1,241 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.botdetection._helpers — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" accesskey="U">Module code</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.botdetection._helpers</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.botdetection._helpers</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="c1"># pylint: disable=missing-module-docstring, invalid-name</span> +<span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">annotations</span> + +<span class="kn">from</span> <span class="nn">ipaddress</span> <span class="kn">import</span> <span class="p">(</span> + <span class="n">IPv4Network</span><span class="p">,</span> + <span class="n">IPv6Network</span><span class="p">,</span> + <span class="n">IPv4Address</span><span class="p">,</span> + <span class="n">IPv6Address</span><span class="p">,</span> + <span class="n">ip_network</span><span class="p">,</span> +<span class="p">)</span> +<span class="kn">import</span> <span class="nn">flask</span> +<span class="kn">import</span> <span class="nn">werkzeug</span> + +<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">logger</span> +<span class="kn">from</span> <span class="nn">.</span> <span class="kn">import</span> <span class="n">config</span> + +<span class="n">logger</span> <span class="o">=</span> <span class="n">logger</span><span class="o">.</span><span class="n">getChild</span><span class="p">(</span><span class="s1">'botdetection'</span><span class="p">)</span> + + +<span class="k">def</span> <span class="nf">dump_request</span><span class="p">(</span><span class="n">request</span><span class="p">:</span> <span class="n">flask</span><span class="o">.</span><span class="n">Request</span><span class="p">):</span> + <span class="k">return</span> <span class="p">(</span> + <span class="n">request</span><span class="o">.</span><span class="n">path</span> + <span class="o">+</span> <span class="s2">" || X-Forwarded-For: </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="n">request</span><span class="o">.</span><span class="n">headers</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'X-Forwarded-For'</span><span class="p">)</span> + <span class="o">+</span> <span class="s2">" || X-Real-IP: </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="n">request</span><span class="o">.</span><span class="n">headers</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'X-Real-IP'</span><span class="p">)</span> + <span class="o">+</span> <span class="s2">" || form: </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="n">request</span><span class="o">.</span><span class="n">form</span> + <span class="o">+</span> <span class="s2">" || Accept: </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="n">request</span><span class="o">.</span><span class="n">headers</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Accept'</span><span class="p">)</span> + <span class="o">+</span> <span class="s2">" || Accept-Language: </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="n">request</span><span class="o">.</span><span class="n">headers</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Accept-Language'</span><span class="p">)</span> + <span class="o">+</span> <span class="s2">" || Accept-Encoding: </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="n">request</span><span class="o">.</span><span class="n">headers</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Accept-Encoding'</span><span class="p">)</span> + <span class="o">+</span> <span class="s2">" || Content-Type: </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="n">request</span><span class="o">.</span><span class="n">headers</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Content-Type'</span><span class="p">)</span> + <span class="o">+</span> <span class="s2">" || Content-Length: </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="n">request</span><span class="o">.</span><span class="n">headers</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Content-Length'</span><span class="p">)</span> + <span class="o">+</span> <span class="s2">" || Connection: </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="n">request</span><span class="o">.</span><span class="n">headers</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Connection'</span><span class="p">)</span> + <span class="o">+</span> <span class="s2">" || User-Agent: </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="n">request</span><span class="o">.</span><span class="n">headers</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'User-Agent'</span><span class="p">)</span> + <span class="p">)</span> + + +<div class="viewcode-block" id="too_many_requests"> +<a class="viewcode-back" href="../../../src/searx.botdetection.html#searx.botdetection.too_many_requests">[docs]</a> +<span class="k">def</span> <span class="nf">too_many_requests</span><span class="p">(</span><span class="n">network</span><span class="p">:</span> <span class="n">IPv4Network</span> <span class="o">|</span> <span class="n">IPv6Network</span><span class="p">,</span> <span class="n">log_msg</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">werkzeug</span><span class="o">.</span><span class="n">Response</span> <span class="o">|</span> <span class="kc">None</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Returns a HTTP 429 response object and writes a ERROR message to the</span> +<span class="sd"> 'botdetection' logger. This function is used in part by the filter methods</span> +<span class="sd"> to return the default ``Too Many Requests`` response.</span> + +<span class="sd"> """</span> + + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"BLOCK </span><span class="si">%s</span><span class="s2">: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">network</span><span class="o">.</span><span class="n">compressed</span><span class="p">,</span> <span class="n">log_msg</span><span class="p">)</span> + <span class="k">return</span> <span class="n">flask</span><span class="o">.</span><span class="n">make_response</span><span class="p">((</span><span class="s1">'Too Many Requests'</span><span class="p">,</span> <span class="mi">429</span><span class="p">))</span></div> + + + +<div class="viewcode-block" id="get_network"> +<a class="viewcode-back" href="../../../src/searx.botdetection.html#searx.botdetection.get_network">[docs]</a> +<span class="k">def</span> <span class="nf">get_network</span><span class="p">(</span><span class="n">real_ip</span><span class="p">:</span> <span class="n">IPv4Address</span> <span class="o">|</span> <span class="n">IPv6Address</span><span class="p">,</span> <span class="n">cfg</span><span class="p">:</span> <span class="n">config</span><span class="o">.</span><span class="n">Config</span><span class="p">)</span> <span class="o">-></span> <span class="n">IPv4Network</span> <span class="o">|</span> <span class="n">IPv6Network</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Returns the (client) network of whether the real_ip is part of."""</span> + + <span class="k">if</span> <span class="n">real_ip</span><span class="o">.</span><span class="n">version</span> <span class="o">==</span> <span class="mi">6</span><span class="p">:</span> + <span class="n">prefix</span> <span class="o">=</span> <span class="n">cfg</span><span class="p">[</span><span class="s1">'real_ip.ipv6_prefix'</span><span class="p">]</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">prefix</span> <span class="o">=</span> <span class="n">cfg</span><span class="p">[</span><span class="s1">'real_ip.ipv4_prefix'</span><span class="p">]</span> + <span class="n">network</span> <span class="o">=</span> <span class="n">ip_network</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">real_ip</span><span class="si">}</span><span class="s2">/</span><span class="si">{</span><span class="n">prefix</span><span class="si">}</span><span class="s2">"</span><span class="p">,</span> <span class="n">strict</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> + <span class="c1"># logger.debug("get_network(): %s", network.compressed)</span> + <span class="k">return</span> <span class="n">network</span></div> + + + +<span class="n">_logged_errors</span> <span class="o">=</span> <span class="p">[]</span> + + +<span class="k">def</span> <span class="nf">_log_error_only_once</span><span class="p">(</span><span class="n">err_msg</span><span class="p">):</span> + <span class="k">if</span> <span class="n">err_msg</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">_logged_errors</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="n">err_msg</span><span class="p">)</span> + <span class="n">_logged_errors</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">err_msg</span><span class="p">)</span> + + +<div class="viewcode-block" id="get_real_ip"> +<a class="viewcode-back" href="../../../src/searx.botdetection.html#searx.botdetection.get_real_ip">[docs]</a> +<span class="k">def</span> <span class="nf">get_real_ip</span><span class="p">(</span><span class="n">request</span><span class="p">:</span> <span class="n">flask</span><span class="o">.</span><span class="n">Request</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Returns real IP of the request. Since not all proxies set all the HTTP</span> +<span class="sd"> headers and incoming headers can be faked it may happen that the IP cannot</span> +<span class="sd"> be determined correctly.</span> + +<span class="sd"> .. sidebar:: :py:obj:`flask.Request.remote_addr`</span> + +<span class="sd"> SearXNG uses Werkzeug's ProxyFix_ (with it default ``x_for=1``).</span> + +<span class="sd"> This function tries to get the remote IP in the order listed below,</span> +<span class="sd"> additional some tests are done and if inconsistencies or errors are</span> +<span class="sd"> detected, they are logged.</span> + +<span class="sd"> The remote IP of the request is taken from (first match):</span> + +<span class="sd"> - X-Forwarded-For_ header</span> +<span class="sd"> - `X-real-IP header <https://github.com/searxng/searxng/issues/1237#issuecomment-1147564516>`__</span> +<span class="sd"> - :py:obj:`flask.Request.remote_addr`</span> + +<span class="sd"> .. _ProxyFix:</span> +<span class="sd"> https://werkzeug.palletsprojects.com/middleware/proxy_fix/</span> + +<span class="sd"> .. _X-Forwarded-For:</span> +<span class="sd"> https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For</span> + +<span class="sd"> """</span> + + <span class="n">forwarded_for</span> <span class="o">=</span> <span class="n">request</span><span class="o">.</span><span class="n">headers</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"X-Forwarded-For"</span><span class="p">)</span> + <span class="n">real_ip</span> <span class="o">=</span> <span class="n">request</span><span class="o">.</span><span class="n">headers</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'X-Real-IP'</span><span class="p">)</span> + <span class="n">remote_addr</span> <span class="o">=</span> <span class="n">request</span><span class="o">.</span><span class="n">remote_addr</span> + <span class="c1"># logger.debug(</span> + <span class="c1"># "X-Forwarded-For: %s || X-Real-IP: %s || request.remote_addr: %s", forwarded_for, real_ip, remote_addr</span> + <span class="c1"># )</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">forwarded_for</span><span class="p">:</span> + <span class="n">_log_error_only_once</span><span class="p">(</span><span class="s2">"X-Forwarded-For header is not set!"</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="kn">from</span> <span class="nn">.</span> <span class="kn">import</span> <span class="n">cfg</span> <span class="c1"># pylint: disable=import-outside-toplevel, cyclic-import</span> + + <span class="n">forwarded_for</span> <span class="o">=</span> <span class="p">[</span><span class="n">x</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">forwarded_for</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">','</span><span class="p">)]</span> + <span class="n">x_for</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="n">cfg</span><span class="p">[</span><span class="s1">'real_ip.x_for'</span><span class="p">]</span> <span class="c1"># type: ignore</span> + <span class="n">forwarded_for</span> <span class="o">=</span> <span class="n">forwarded_for</span><span class="p">[</span><span class="o">-</span><span class="nb">min</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">forwarded_for</span><span class="p">),</span> <span class="n">x_for</span><span class="p">)]</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">real_ip</span><span class="p">:</span> + <span class="n">_log_error_only_once</span><span class="p">(</span><span class="s2">"X-Real-IP header is not set!"</span><span class="p">)</span> + + <span class="k">if</span> <span class="n">forwarded_for</span> <span class="ow">and</span> <span class="n">real_ip</span> <span class="ow">and</span> <span class="n">forwarded_for</span> <span class="o">!=</span> <span class="n">real_ip</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">"IP from X-Real-IP (</span><span class="si">%s</span><span class="s2">) is not equal to IP from X-Forwarded-For (</span><span class="si">%s</span><span class="s2">)"</span><span class="p">,</span> <span class="n">real_ip</span><span class="p">,</span> <span class="n">forwarded_for</span><span class="p">)</span> + + <span class="k">if</span> <span class="n">forwarded_for</span> <span class="ow">and</span> <span class="n">remote_addr</span> <span class="ow">and</span> <span class="n">forwarded_for</span> <span class="o">!=</span> <span class="n">remote_addr</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span> + <span class="s2">"IP from WSGI environment (</span><span class="si">%s</span><span class="s2">) is not equal to IP from X-Forwarded-For (</span><span class="si">%s</span><span class="s2">)"</span><span class="p">,</span> <span class="n">remote_addr</span><span class="p">,</span> <span class="n">forwarded_for</span> + <span class="p">)</span> + + <span class="k">if</span> <span class="n">real_ip</span> <span class="ow">and</span> <span class="n">remote_addr</span> <span class="ow">and</span> <span class="n">real_ip</span> <span class="o">!=</span> <span class="n">remote_addr</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">"IP from WSGI environment (</span><span class="si">%s</span><span class="s2">) is not equal to IP from X-Real-IP (</span><span class="si">%s</span><span class="s2">)"</span><span class="p">,</span> <span class="n">remote_addr</span><span class="p">,</span> <span class="n">real_ip</span><span class="p">)</span> + + <span class="n">request_ip</span> <span class="o">=</span> <span class="n">forwarded_for</span> <span class="ow">or</span> <span class="n">real_ip</span> <span class="ow">or</span> <span class="n">remote_addr</span> <span class="ow">or</span> <span class="s1">'0.0.0.0'</span> + <span class="c1"># logger.debug("get_real_ip() -> %s", request_ip)</span> + <span class="k">return</span> <span class="n">request_ip</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + + + </ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/botdetection/config.html b/_modules/searx/botdetection/config.html new file mode 100644 index 000000000..89d46c10c --- /dev/null +++ b/_modules/searx/botdetection/config.html @@ -0,0 +1,512 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.botdetection.config — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" accesskey="U">Module code</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.botdetection.config</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.botdetection.config</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""Configuration class :py:class:`Config` with deep-update, schema validation</span> +<span class="sd">and deprecated names.</span> + +<span class="sd">The :py:class:`Config` class implements a configuration that is based on</span> +<span class="sd">structured dictionaries. The configuration schema is defined in a dictionary</span> +<span class="sd">structure and the configuration data is given in a dictionary structure.</span> +<span class="sd">"""</span> +<span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">annotations</span> +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Any</span> + +<span class="kn">import</span> <span class="nn">copy</span> +<span class="kn">import</span> <span class="nn">typing</span> +<span class="kn">import</span> <span class="nn">logging</span> +<span class="kn">import</span> <span class="nn">pathlib</span> + +<span class="kn">from</span> <span class="nn">..compat</span> <span class="kn">import</span> <span class="n">tomllib</span> + +<span class="n">__all__</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'Config'</span><span class="p">,</span> <span class="s1">'UNSET'</span><span class="p">,</span> <span class="s1">'SchemaIssue'</span><span class="p">]</span> + +<span class="n">log</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">(</span><span class="vm">__name__</span><span class="p">)</span> + + +<span class="k">class</span> <span class="nc">FALSE</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Class of ``False`` singleton"""</span> + + <span class="c1"># pylint: disable=multiple-statements</span> + <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">msg</span><span class="p">):</span> + <span class="bp">self</span><span class="o">.</span><span class="n">msg</span> <span class="o">=</span> <span class="n">msg</span> + + <span class="k">def</span> <span class="fm">__bool__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="kc">False</span> + + <span class="k">def</span> <span class="fm">__str__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">msg</span> + + <span class="fm">__repr__</span> <span class="o">=</span> <span class="fm">__str__</span> + + +<span class="n">UNSET</span> <span class="o">=</span> <span class="n">FALSE</span><span class="p">(</span><span class="s1">'<UNSET>'</span><span class="p">)</span> + + +<div class="viewcode-block" id="SchemaIssue"> +<a class="viewcode-back" href="../../../src/searx.botdetection.html#searx.botdetection.config.SchemaIssue">[docs]</a> +<span class="k">class</span> <span class="nc">SchemaIssue</span><span class="p">(</span><span class="ne">ValueError</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Exception to store and/or raise a message from a schema issue."""</span> + + <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">level</span><span class="p">:</span> <span class="n">typing</span><span class="o">.</span><span class="n">Literal</span><span class="p">[</span><span class="s1">'warn'</span><span class="p">,</span> <span class="s1">'invalid'</span><span class="p">],</span> <span class="n">msg</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span> + <span class="bp">self</span><span class="o">.</span><span class="n">level</span> <span class="o">=</span> <span class="n">level</span> + <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">msg</span><span class="p">)</span> + + <span class="k">def</span> <span class="fm">__str__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="sa">f</span><span class="s2">"[cfg schema </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">level</span><span class="si">}</span><span class="s2">] </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="si">}</span><span class="s2">"</span></div> + + + +<div class="viewcode-block" id="Config"> +<a class="viewcode-back" href="../../../src/searx.botdetection.html#searx.botdetection.config.Config">[docs]</a> +<span class="k">class</span> <span class="nc">Config</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Base class used for configuration"""</span> + + <span class="n">UNSET</span> <span class="o">=</span> <span class="n">UNSET</span> + + <span class="nd">@classmethod</span> + <span class="k">def</span> <span class="nf">from_toml</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">schema_file</span><span class="p">:</span> <span class="n">pathlib</span><span class="o">.</span><span class="n">Path</span><span class="p">,</span> <span class="n">cfg_file</span><span class="p">:</span> <span class="n">pathlib</span><span class="o">.</span><span class="n">Path</span><span class="p">,</span> <span class="n">deprecated</span><span class="p">:</span> <span class="nb">dict</span><span class="p">)</span> <span class="o">-></span> <span class="n">Config</span><span class="p">:</span> + + <span class="c1"># init schema</span> + + <span class="n">log</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"load schema file: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">schema_file</span><span class="p">)</span> + <span class="n">cfg</span> <span class="o">=</span> <span class="bp">cls</span><span class="p">(</span><span class="n">cfg_schema</span><span class="o">=</span><span class="n">toml_load</span><span class="p">(</span><span class="n">schema_file</span><span class="p">),</span> <span class="n">deprecated</span><span class="o">=</span><span class="n">deprecated</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">cfg_file</span><span class="o">.</span><span class="n">exists</span><span class="p">():</span> + <span class="n">log</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">"missing config file: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">cfg_file</span><span class="p">)</span> + <span class="k">return</span> <span class="n">cfg</span> + + <span class="c1"># load configuration</span> + + <span class="n">log</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"load config file: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">cfg_file</span><span class="p">)</span> + <span class="n">upd_cfg</span> <span class="o">=</span> <span class="n">toml_load</span><span class="p">(</span><span class="n">cfg_file</span><span class="p">)</span> + + <span class="n">is_valid</span><span class="p">,</span> <span class="n">issue_list</span> <span class="o">=</span> <span class="n">cfg</span><span class="o">.</span><span class="n">validate</span><span class="p">(</span><span class="n">upd_cfg</span><span class="p">)</span> + <span class="k">for</span> <span class="n">msg</span> <span class="ow">in</span> <span class="n">issue_list</span><span class="p">:</span> + <span class="n">log</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">msg</span><span class="p">))</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">is_valid</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"schema of </span><span class="si">{</span><span class="n">cfg_file</span><span class="si">}</span><span class="s2"> is invalid!"</span><span class="p">)</span> + <span class="n">cfg</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">upd_cfg</span><span class="p">)</span> + <span class="k">return</span> <span class="n">cfg</span> + + <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">cfg_schema</span><span class="p">:</span> <span class="n">typing</span><span class="o">.</span><span class="n">Dict</span><span class="p">,</span> <span class="n">deprecated</span><span class="p">:</span> <span class="n">typing</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]):</span> +<span class="w"> </span><span class="sd">"""Constructor of class Config.</span> + +<span class="sd"> :param cfg_schema: Schema of the configuration</span> +<span class="sd"> :param deprecated: dictionary that maps deprecated configuration names to a messages</span> + +<span class="sd"> These values are needed for validation, see :py:obj:`validate`.</span> + +<span class="sd"> """</span> + <span class="bp">self</span><span class="o">.</span><span class="n">cfg_schema</span> <span class="o">=</span> <span class="n">cfg_schema</span> + <span class="bp">self</span><span class="o">.</span><span class="n">deprecated</span> <span class="o">=</span> <span class="n">deprecated</span> + <span class="bp">self</span><span class="o">.</span><span class="n">cfg</span> <span class="o">=</span> <span class="n">copy</span><span class="o">.</span><span class="n">deepcopy</span><span class="p">(</span><span class="n">cfg_schema</span><span class="p">)</span> + + <span class="k">def</span> <span class="fm">__getitem__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">Any</span><span class="p">:</span> + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">key</span><span class="p">)</span> + +<div class="viewcode-block" id="Config.validate"> +<a class="viewcode-back" href="../../../src/searx.botdetection.html#searx.botdetection.config.Config.validate">[docs]</a> + <span class="k">def</span> <span class="nf">validate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">cfg</span><span class="p">:</span> <span class="nb">dict</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Validation of dictionary ``cfg`` on :py:obj:`Config.SCHEMA`.</span> +<span class="sd"> Validation is done by :py:obj:`validate`."""</span> + + <span class="k">return</span> <span class="n">validate</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">cfg_schema</span><span class="p">,</span> <span class="n">cfg</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">deprecated</span><span class="p">)</span></div> + + +<div class="viewcode-block" id="Config.update"> +<a class="viewcode-back" href="../../../src/searx.botdetection.html#searx.botdetection.config.Config.update">[docs]</a> + <span class="k">def</span> <span class="nf">update</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">upd_cfg</span><span class="p">:</span> <span class="nb">dict</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Update this configuration by ``upd_cfg``."""</span> + + <span class="n">dict_deepupdate</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">cfg</span><span class="p">,</span> <span class="n">upd_cfg</span><span class="p">)</span></div> + + +<div class="viewcode-block" id="Config.default"> +<a class="viewcode-back" href="../../../src/searx.botdetection.html#searx.botdetection.config.Config.default">[docs]</a> + <span class="k">def</span> <span class="nf">default</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Returns default value of field ``name`` in ``self.cfg_schema``."""</span> + <span class="k">return</span> <span class="n">value</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">cfg_schema</span><span class="p">)</span></div> + + +<div class="viewcode-block" id="Config.get"> +<a class="viewcode-back" href="../../../src/searx.botdetection.html#searx.botdetection.config.Config.get">[docs]</a> + <span class="k">def</span> <span class="nf">get</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">default</span><span class="p">:</span> <span class="n">Any</span> <span class="o">=</span> <span class="n">UNSET</span><span class="p">,</span> <span class="n">replace</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-></span> <span class="n">Any</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Returns the value to which ``name`` points in the configuration.</span> + +<span class="sd"> If there is no such ``name`` in the config and the ``default`` is</span> +<span class="sd"> :py:obj:`UNSET`, a :py:obj:`KeyError` is raised.</span> +<span class="sd"> """</span> + + <span class="n">parent</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_parent_dict</span><span class="p">(</span><span class="n">name</span><span class="p">)</span> + <span class="n">val</span> <span class="o">=</span> <span class="n">parent</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">name</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'.'</span><span class="p">)[</span><span class="o">-</span><span class="mi">1</span><span class="p">],</span> <span class="n">UNSET</span><span class="p">)</span> + <span class="k">if</span> <span class="n">val</span> <span class="ow">is</span> <span class="n">UNSET</span><span class="p">:</span> + <span class="k">if</span> <span class="n">default</span> <span class="ow">is</span> <span class="n">UNSET</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">KeyError</span><span class="p">(</span><span class="n">name</span><span class="p">)</span> + <span class="n">val</span> <span class="o">=</span> <span class="n">default</span> + + <span class="k">if</span> <span class="n">replace</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">val</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span> + <span class="n">val</span> <span class="o">=</span> <span class="n">val</span> <span class="o">%</span> <span class="bp">self</span> + <span class="k">return</span> <span class="n">val</span></div> + + +<div class="viewcode-block" id="Config.set"> +<a class="viewcode-back" href="../../../src/searx.botdetection.html#searx.botdetection.config.Config.set">[docs]</a> + <span class="k">def</span> <span class="nf">set</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">val</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Set the value to which ``name`` points in the configuration.</span> + +<span class="sd"> If there is no such ``name`` in the config, a :py:obj:`KeyError` is</span> +<span class="sd"> raised.</span> +<span class="sd"> """</span> + <span class="n">parent</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_parent_dict</span><span class="p">(</span><span class="n">name</span><span class="p">)</span> + <span class="n">parent</span><span class="p">[</span><span class="n">name</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'.'</span><span class="p">)[</span><span class="o">-</span><span class="mi">1</span><span class="p">]]</span> <span class="o">=</span> <span class="n">val</span></div> + + + <span class="k">def</span> <span class="nf">_get_parent_dict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">):</span> + <span class="n">parent_name</span> <span class="o">=</span> <span class="s1">'.'</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">name</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'.'</span><span class="p">)[:</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span> + <span class="k">if</span> <span class="n">parent_name</span><span class="p">:</span> + <span class="n">parent</span> <span class="o">=</span> <span class="n">value</span><span class="p">(</span><span class="n">parent_name</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">cfg</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">parent</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">cfg</span> + <span class="k">if</span> <span class="p">(</span><span class="n">parent</span> <span class="ow">is</span> <span class="n">UNSET</span><span class="p">)</span> <span class="ow">or</span> <span class="p">(</span><span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">parent</span><span class="p">,</span> <span class="nb">dict</span><span class="p">)):</span> + <span class="k">raise</span> <span class="ne">KeyError</span><span class="p">(</span><span class="n">parent_name</span><span class="p">)</span> + <span class="k">return</span> <span class="n">parent</span> + +<div class="viewcode-block" id="Config.path"> +<a class="viewcode-back" href="../../../src/searx.botdetection.html#searx.botdetection.config.Config.path">[docs]</a> + <span class="k">def</span> <span class="nf">path</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="n">UNSET</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Get a :py:class:`pathlib.Path` object from a config string."""</span> + + <span class="n">val</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">default</span><span class="p">)</span> + <span class="k">if</span> <span class="n">val</span> <span class="ow">is</span> <span class="n">UNSET</span><span class="p">:</span> + <span class="k">if</span> <span class="n">default</span> <span class="ow">is</span> <span class="n">UNSET</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">KeyError</span><span class="p">(</span><span class="n">name</span><span class="p">)</span> + <span class="k">return</span> <span class="n">default</span> + <span class="k">return</span> <span class="n">pathlib</span><span class="o">.</span><span class="n">Path</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">val</span><span class="p">))</span></div> + + +<div class="viewcode-block" id="Config.pyobj"> +<a class="viewcode-back" href="../../../src/searx.botdetection.html#searx.botdetection.config.Config.pyobj">[docs]</a> + <span class="k">def</span> <span class="nf">pyobj</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="n">UNSET</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Get python object referred by full qualiffied name (FQN) in the config</span> +<span class="sd"> string."""</span> + + <span class="n">fqn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">default</span><span class="p">)</span> + <span class="k">if</span> <span class="n">fqn</span> <span class="ow">is</span> <span class="n">UNSET</span><span class="p">:</span> + <span class="k">if</span> <span class="n">default</span> <span class="ow">is</span> <span class="n">UNSET</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">KeyError</span><span class="p">(</span><span class="n">name</span><span class="p">)</span> + <span class="k">return</span> <span class="n">default</span> + <span class="p">(</span><span class="n">modulename</span><span class="p">,</span> <span class="n">name</span><span class="p">)</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">fqn</span><span class="p">)</span><span class="o">.</span><span class="n">rsplit</span><span class="p">(</span><span class="s1">'.'</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span> + <span class="n">m</span> <span class="o">=</span> <span class="nb">__import__</span><span class="p">(</span><span class="n">modulename</span><span class="p">,</span> <span class="p">{},</span> <span class="p">{},</span> <span class="p">[</span><span class="n">name</span><span class="p">],</span> <span class="mi">0</span><span class="p">)</span> + <span class="k">return</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="n">name</span><span class="p">)</span></div> +</div> + + + +<span class="k">def</span> <span class="nf">toml_load</span><span class="p">(</span><span class="n">file_name</span><span class="p">):</span> + <span class="k">try</span><span class="p">:</span> + <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">file_name</span><span class="p">,</span> <span class="s2">"rb"</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span> + <span class="k">return</span> <span class="n">tomllib</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">f</span><span class="p">)</span> + <span class="k">except</span> <span class="n">tomllib</span><span class="o">.</span><span class="n">TOMLDecodeError</span> <span class="k">as</span> <span class="n">exc</span><span class="p">:</span> + <span class="n">msg</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">exc</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="se">\t</span><span class="s1">'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="se">\n</span><span class="s1">'</span><span class="p">,</span> <span class="s1">' '</span><span class="p">)</span> + <span class="n">log</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s2">"</span><span class="si">%s</span><span class="s2">: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">file_name</span><span class="p">,</span> <span class="n">msg</span><span class="p">)</span> + <span class="k">raise</span> + + +<span class="c1"># working with dictionaries</span> + + +<span class="k">def</span> <span class="nf">value</span><span class="p">(</span><span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">data_dict</span><span class="p">:</span> <span class="nb">dict</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Returns the value to which ``name`` points in the ``dat_dict``.</span> + +<span class="sd"> .. code: python</span> + +<span class="sd"> >>> data_dict = {</span> +<span class="sd"> "foo": {"bar": 1 },</span> +<span class="sd"> "bar": {"foo": 2 },</span> +<span class="sd"> "foobar": [1, 2, 3],</span> +<span class="sd"> }</span> +<span class="sd"> >>> value('foobar', data_dict)</span> +<span class="sd"> [1, 2, 3]</span> +<span class="sd"> >>> value('foo.bar', data_dict)</span> +<span class="sd"> 1</span> +<span class="sd"> >>> value('foo.bar.xxx', data_dict)</span> +<span class="sd"> <UNSET></span> + +<span class="sd"> """</span> + + <span class="n">ret_val</span> <span class="o">=</span> <span class="n">data_dict</span> + <span class="k">for</span> <span class="n">part</span> <span class="ow">in</span> <span class="n">name</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'.'</span><span class="p">):</span> + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">ret_val</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span> + <span class="n">ret_val</span> <span class="o">=</span> <span class="n">ret_val</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">part</span><span class="p">,</span> <span class="n">UNSET</span><span class="p">)</span> + <span class="k">if</span> <span class="n">ret_val</span> <span class="ow">is</span> <span class="n">UNSET</span><span class="p">:</span> + <span class="k">break</span> + <span class="k">return</span> <span class="n">ret_val</span> + + +<span class="k">def</span> <span class="nf">validate</span><span class="p">(</span> + <span class="n">schema_dict</span><span class="p">:</span> <span class="n">typing</span><span class="o">.</span><span class="n">Dict</span><span class="p">,</span> <span class="n">data_dict</span><span class="p">:</span> <span class="n">typing</span><span class="o">.</span><span class="n">Dict</span><span class="p">,</span> <span class="n">deprecated</span><span class="p">:</span> <span class="n">typing</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]</span> +<span class="p">)</span> <span class="o">-></span> <span class="n">typing</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">list</span><span class="p">]:</span> +<span class="w"> </span><span class="sd">"""Deep validation of dictionary in ``data_dict`` against dictionary in</span> +<span class="sd"> ``schema_dict``. Argument deprecated is a dictionary that maps deprecated</span> +<span class="sd"> configuration names to a messages::</span> + +<span class="sd"> deprecated = {</span> +<span class="sd"> "foo.bar" : "config 'foo.bar' is deprecated, use 'bar.foo'",</span> +<span class="sd"> "..." : "..."</span> +<span class="sd"> }</span> + +<span class="sd"> The function returns a python tuple ``(is_valid, issue_list)``:</span> + +<span class="sd"> ``is_valid``:</span> +<span class="sd"> A bool value indicating ``data_dict`` is valid or not.</span> + +<span class="sd"> ``issue_list``:</span> +<span class="sd"> A list of messages (:py:obj:`SchemaIssue`) from the validation::</span> + +<span class="sd"> [schema warn] data_dict: deprecated 'fontlib.foo': <DEPRECATED['foo.bar']></span> +<span class="sd"> [schema invalid] data_dict: key unknown 'fontlib.foo'</span> +<span class="sd"> [schema invalid] data_dict: type mismatch 'fontlib.foo': expected ..., is ...</span> + +<span class="sd"> If ``schema_dict`` or ``data_dict`` is not a dictionary type a</span> +<span class="sd"> :py:obj:`SchemaIssue` is raised.</span> + +<span class="sd"> """</span> + <span class="n">names</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">is_valid</span> <span class="o">=</span> <span class="kc">True</span> + <span class="n">issue_list</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">schema_dict</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span> + <span class="k">raise</span> <span class="n">SchemaIssue</span><span class="p">(</span><span class="s1">'invalid'</span><span class="p">,</span> <span class="s2">"schema_dict is not a dict type"</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">data_dict</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span> + <span class="k">raise</span> <span class="n">SchemaIssue</span><span class="p">(</span><span class="s1">'invalid'</span><span class="p">,</span> <span class="sa">f</span><span class="s2">"data_dict issue</span><span class="si">{</span><span class="s1">'.'</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">names</span><span class="p">)</span><span class="si">}</span><span class="s2"> is not a dict type"</span><span class="p">)</span> + + <span class="n">is_valid</span><span class="p">,</span> <span class="n">issue_list</span> <span class="o">=</span> <span class="n">_validate</span><span class="p">(</span><span class="n">names</span><span class="p">,</span> <span class="n">issue_list</span><span class="p">,</span> <span class="n">schema_dict</span><span class="p">,</span> <span class="n">data_dict</span><span class="p">,</span> <span class="n">deprecated</span><span class="p">)</span> + <span class="k">return</span> <span class="n">is_valid</span><span class="p">,</span> <span class="n">issue_list</span> + + +<span class="k">def</span> <span class="nf">_validate</span><span class="p">(</span> + <span class="n">names</span><span class="p">:</span> <span class="n">typing</span><span class="o">.</span><span class="n">List</span><span class="p">,</span> + <span class="n">issue_list</span><span class="p">:</span> <span class="n">typing</span><span class="o">.</span><span class="n">List</span><span class="p">,</span> + <span class="n">schema_dict</span><span class="p">:</span> <span class="n">typing</span><span class="o">.</span><span class="n">Dict</span><span class="p">,</span> + <span class="n">data_dict</span><span class="p">:</span> <span class="n">typing</span><span class="o">.</span><span class="n">Dict</span><span class="p">,</span> + <span class="n">deprecated</span><span class="p">:</span> <span class="n">typing</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">],</span> +<span class="p">)</span> <span class="o">-></span> <span class="n">typing</span><span class="o">.</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="n">typing</span><span class="o">.</span><span class="n">List</span><span class="p">]:</span> + + <span class="n">is_valid</span> <span class="o">=</span> <span class="kc">True</span> + + <span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">data_value</span> <span class="ow">in</span> <span class="n">data_dict</span><span class="o">.</span><span class="n">items</span><span class="p">():</span> + + <span class="n">names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">key</span><span class="p">)</span> + <span class="n">name</span> <span class="o">=</span> <span class="s1">'.'</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">names</span><span class="p">)</span> + + <span class="n">deprecated_msg</span> <span class="o">=</span> <span class="n">deprecated</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">name</span><span class="p">)</span> + <span class="c1"># print("XXX %s: key %s // data_value: %s" % (name, key, data_value))</span> + <span class="k">if</span> <span class="n">deprecated_msg</span><span class="p">:</span> + <span class="n">issue_list</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">SchemaIssue</span><span class="p">(</span><span class="s1">'warn'</span><span class="p">,</span> <span class="sa">f</span><span class="s2">"data_dict '</span><span class="si">{</span><span class="n">name</span><span class="si">}</span><span class="s2">': deprecated - </span><span class="si">{</span><span class="n">deprecated_msg</span><span class="si">}</span><span class="s2">"</span><span class="p">))</span> + + <span class="n">schema_value</span> <span class="o">=</span> <span class="n">value</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">schema_dict</span><span class="p">)</span> + <span class="c1"># print("YYY %s: key %s // schema_value: %s" % (name, key, schema_value))</span> + <span class="k">if</span> <span class="n">schema_value</span> <span class="ow">is</span> <span class="n">UNSET</span><span class="p">:</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">deprecated_msg</span><span class="p">:</span> + <span class="n">issue_list</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">SchemaIssue</span><span class="p">(</span><span class="s1">'invalid'</span><span class="p">,</span> <span class="sa">f</span><span class="s2">"data_dict '</span><span class="si">{</span><span class="n">name</span><span class="si">}</span><span class="s2">': key unknown in schema_dict"</span><span class="p">))</span> + <span class="n">is_valid</span> <span class="o">=</span> <span class="kc">False</span> + + <span class="k">elif</span> <span class="nb">type</span><span class="p">(</span><span class="n">schema_value</span><span class="p">)</span> <span class="o">!=</span> <span class="nb">type</span><span class="p">(</span><span class="n">data_value</span><span class="p">):</span> <span class="c1"># pylint: disable=unidiomatic-typecheck</span> + <span class="n">issue_list</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="n">SchemaIssue</span><span class="p">(</span> + <span class="s1">'invalid'</span><span class="p">,</span> + <span class="p">(</span><span class="sa">f</span><span class="s2">"data_dict: type mismatch '</span><span class="si">{</span><span class="n">name</span><span class="si">}</span><span class="s2">':"</span> <span class="sa">f</span><span class="s2">" expected </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">schema_value</span><span class="p">)</span><span class="si">}</span><span class="s2">, is: </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">data_value</span><span class="p">)</span><span class="si">}</span><span class="s2">"</span><span class="p">),</span> + <span class="p">)</span> + <span class="p">)</span> + <span class="n">is_valid</span> <span class="o">=</span> <span class="kc">False</span> + + <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">data_value</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span> + <span class="n">_valid</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">_validate</span><span class="p">(</span><span class="n">names</span><span class="p">,</span> <span class="n">issue_list</span><span class="p">,</span> <span class="n">schema_dict</span><span class="p">,</span> <span class="n">data_value</span><span class="p">,</span> <span class="n">deprecated</span><span class="p">)</span> + <span class="n">is_valid</span> <span class="o">=</span> <span class="n">is_valid</span> <span class="ow">and</span> <span class="n">_valid</span> + <span class="n">names</span><span class="o">.</span><span class="n">pop</span><span class="p">()</span> + + <span class="k">return</span> <span class="n">is_valid</span><span class="p">,</span> <span class="n">issue_list</span> + + +<span class="k">def</span> <span class="nf">dict_deepupdate</span><span class="p">(</span><span class="n">base_dict</span><span class="p">:</span> <span class="nb">dict</span><span class="p">,</span> <span class="n">upd_dict</span><span class="p">:</span> <span class="nb">dict</span><span class="p">,</span> <span class="n">names</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Deep-update of dictionary in ``base_dict`` by dictionary in ``upd_dict``.</span> + +<span class="sd"> For each ``upd_key`` & ``upd_val`` pair in ``upd_dict``:</span> + +<span class="sd"> 0. If types of ``base_dict[upd_key]`` and ``upd_val`` do not match raise a</span> +<span class="sd"> :py:obj:`TypeError`.</span> + +<span class="sd"> 1. If ``base_dict[upd_key]`` is a dict: recursively deep-update it by ``upd_val``.</span> + +<span class="sd"> 2. If ``base_dict[upd_key]`` not exist: set ``base_dict[upd_key]`` from a</span> +<span class="sd"> (deep-) copy of ``upd_val``.</span> + +<span class="sd"> 3. If ``upd_val`` is a list, extend list in ``base_dict[upd_key]`` by the</span> +<span class="sd"> list in ``upd_val``.</span> + +<span class="sd"> 4. If ``upd_val`` is a set, update set in ``base_dict[upd_key]`` by set in</span> +<span class="sd"> ``upd_val``.</span> +<span class="sd"> """</span> + <span class="c1"># pylint: disable=too-many-branches</span> + <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">base_dict</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span> + <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">"argument 'base_dict' is not a ditionary type"</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">upd_dict</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span> + <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">"argument 'upd_dict' is not a ditionary type"</span><span class="p">)</span> + + <span class="k">if</span> <span class="n">names</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">names</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="k">for</span> <span class="n">upd_key</span><span class="p">,</span> <span class="n">upd_val</span> <span class="ow">in</span> <span class="n">upd_dict</span><span class="o">.</span><span class="n">items</span><span class="p">():</span> + <span class="c1"># For each upd_key & upd_val pair in upd_dict:</span> + + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">upd_val</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span> + + <span class="k">if</span> <span class="n">upd_key</span> <span class="ow">in</span> <span class="n">base_dict</span><span class="p">:</span> + <span class="c1"># if base_dict[upd_key] exists, recursively deep-update it</span> + <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">base_dict</span><span class="p">[</span><span class="n">upd_key</span><span class="p">],</span> <span class="nb">dict</span><span class="p">):</span> + <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"type mismatch </span><span class="si">{</span><span class="s1">'.'</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">names</span><span class="p">)</span><span class="si">}</span><span class="s2">: is not a dict type in base_dict"</span><span class="p">)</span> + <span class="n">dict_deepupdate</span><span class="p">(</span> + <span class="n">base_dict</span><span class="p">[</span><span class="n">upd_key</span><span class="p">],</span> + <span class="n">upd_val</span><span class="p">,</span> + <span class="n">names</span> + <span class="o">+</span> <span class="p">[</span> + <span class="n">upd_key</span><span class="p">,</span> + <span class="p">],</span> + <span class="p">)</span> + + <span class="k">else</span><span class="p">:</span> + <span class="c1"># if base_dict[upd_key] not exist, set base_dict[upd_key] from deepcopy of upd_val</span> + <span class="n">base_dict</span><span class="p">[</span><span class="n">upd_key</span><span class="p">]</span> <span class="o">=</span> <span class="n">copy</span><span class="o">.</span><span class="n">deepcopy</span><span class="p">(</span><span class="n">upd_val</span><span class="p">)</span> + + <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">upd_val</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span> + + <span class="k">if</span> <span class="n">upd_key</span> <span class="ow">in</span> <span class="n">base_dict</span><span class="p">:</span> + <span class="c1"># if base_dict[upd_key] exists, base_dict[up_key] is extended by</span> + <span class="c1"># the list from upd_val</span> + <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">base_dict</span><span class="p">[</span><span class="n">upd_key</span><span class="p">],</span> <span class="nb">list</span><span class="p">):</span> + <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"type mismatch </span><span class="si">{</span><span class="s1">'.'</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">names</span><span class="p">)</span><span class="si">}</span><span class="s2">: is not a list type in base_dict"</span><span class="p">)</span> + <span class="n">base_dict</span><span class="p">[</span><span class="n">upd_key</span><span class="p">]</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">upd_val</span><span class="p">)</span> + + <span class="k">else</span><span class="p">:</span> + <span class="c1"># if base_dict[upd_key] doesn't exists, set base_dict[key] from a deepcopy of the</span> + <span class="c1"># list in upd_val.</span> + <span class="n">base_dict</span><span class="p">[</span><span class="n">upd_key</span><span class="p">]</span> <span class="o">=</span> <span class="n">copy</span><span class="o">.</span><span class="n">deepcopy</span><span class="p">(</span><span class="n">upd_val</span><span class="p">)</span> + + <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">upd_val</span><span class="p">,</span> <span class="nb">set</span><span class="p">):</span> + + <span class="k">if</span> <span class="n">upd_key</span> <span class="ow">in</span> <span class="n">base_dict</span><span class="p">:</span> + <span class="c1"># if base_dict[upd_key] exists, base_dict[up_key] is updated by the set in upd_val</span> + <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">base_dict</span><span class="p">[</span><span class="n">upd_key</span><span class="p">],</span> <span class="nb">set</span><span class="p">):</span> + <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"type mismatch </span><span class="si">{</span><span class="s1">'.'</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">names</span><span class="p">)</span><span class="si">}</span><span class="s2">: is not a set type in base_dict"</span><span class="p">)</span> + <span class="n">base_dict</span><span class="p">[</span><span class="n">upd_key</span><span class="p">]</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">upd_val</span><span class="o">.</span><span class="n">copy</span><span class="p">())</span> + + <span class="k">else</span><span class="p">:</span> + <span class="c1"># if base_dict[upd_key] doesn't exists, set base_dict[upd_key] from a copy of the</span> + <span class="c1"># set in upd_val</span> + <span class="n">base_dict</span><span class="p">[</span><span class="n">upd_key</span><span class="p">]</span> <span class="o">=</span> <span class="n">upd_val</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span> + + <span class="k">else</span><span class="p">:</span> + <span class="c1"># for any other type of upd_val replace or add base_dict[upd_key] by a copy</span> + <span class="c1"># of upd_val</span> + <span class="n">base_dict</span><span class="p">[</span><span class="n">upd_key</span><span class="p">]</span> <span class="o">=</span> <span class="n">copy</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">upd_val</span><span class="p">)</span> +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + + + </ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/botdetection/ip_lists.html b/_modules/searx/botdetection/ip_lists.html new file mode 100644 index 000000000..052196a59 --- /dev/null +++ b/_modules/searx/botdetection/ip_lists.html @@ -0,0 +1,194 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.botdetection.ip_lists — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" accesskey="U">Module code</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.botdetection.ip_lists</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.botdetection.ip_lists</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">""".. _botdetection.ip_lists:</span> + +<span class="sd">Method ``ip_lists``</span> +<span class="sd">-------------------</span> + +<span class="sd">The ``ip_lists`` method implements IP :py:obj:`block- <block_ip>` and</span> +<span class="sd">:py:obj:`pass-lists <pass_ip>`.</span> + +<span class="sd">.. code:: toml</span> + +<span class="sd"> [botdetection.ip_lists]</span> + +<span class="sd"> pass_ip = [</span> +<span class="sd"> '167.235.158.251', # IPv4 of check.searx.space</span> +<span class="sd"> '192.168.0.0/16', # IPv4 private network</span> +<span class="sd"> 'fe80::/10' # IPv6 linklocal</span> +<span class="sd"> ]</span> +<span class="sd"> block_ip = [</span> +<span class="sd"> '93.184.216.34', # IPv4 of example.org</span> +<span class="sd"> '257.1.1.1', # invalid IP --> will be ignored, logged in ERROR class</span> +<span class="sd"> ]</span> + +<span class="sd">"""</span> +<span class="c1"># pylint: disable=unused-argument</span> + +<span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">annotations</span> +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Tuple</span> +<span class="kn">from</span> <span class="nn">ipaddress</span> <span class="kn">import</span> <span class="p">(</span> + <span class="n">ip_network</span><span class="p">,</span> + <span class="n">IPv4Address</span><span class="p">,</span> + <span class="n">IPv6Address</span><span class="p">,</span> +<span class="p">)</span> + +<span class="kn">from</span> <span class="nn">.</span> <span class="kn">import</span> <span class="n">config</span> +<span class="kn">from</span> <span class="nn">._helpers</span> <span class="kn">import</span> <span class="n">logger</span> + +<span class="n">logger</span> <span class="o">=</span> <span class="n">logger</span><span class="o">.</span><span class="n">getChild</span><span class="p">(</span><span class="s1">'ip_limit'</span><span class="p">)</span> + +<span class="n">SEARXNG_ORG</span> <span class="o">=</span> <span class="p">[</span> + <span class="c1"># https://github.com/searxng/searxng/pull/2484#issuecomment-1576639195</span> + <span class="s1">'167.235.158.251'</span><span class="p">,</span> <span class="c1"># IPv4 check.searx.space</span> + <span class="s1">'2a01:04f8:1c1c:8fc2::/64'</span><span class="p">,</span> <span class="c1"># IPv6 check.searx.space</span> +<span class="p">]</span> +<span class="sd">"""Passlist of IPs from the SearXNG organization, e.g. `check.searx.space`."""</span> + + +<div class="viewcode-block" id="pass_ip"> +<a class="viewcode-back" href="../../../src/searx.botdetection.html#searx.botdetection.ip_lists.pass_ip">[docs]</a> +<span class="k">def</span> <span class="nf">pass_ip</span><span class="p">(</span><span class="n">real_ip</span><span class="p">:</span> <span class="n">IPv4Address</span> <span class="o">|</span> <span class="n">IPv6Address</span><span class="p">,</span> <span class="n">cfg</span><span class="p">:</span> <span class="n">config</span><span class="o">.</span><span class="n">Config</span><span class="p">)</span> <span class="o">-></span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]:</span> +<span class="w"> </span><span class="sd">"""Checks if the IP on the subnet is in one of the members of the</span> +<span class="sd"> ``botdetection.ip_lists.pass_ip`` list.</span> +<span class="sd"> """</span> + + <span class="k">if</span> <span class="n">cfg</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'botdetection.ip_lists.pass_searxng_org'</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span> + <span class="k">for</span> <span class="n">net</span> <span class="ow">in</span> <span class="n">SEARXNG_ORG</span><span class="p">:</span> + <span class="n">net</span> <span class="o">=</span> <span class="n">ip_network</span><span class="p">(</span><span class="n">net</span><span class="p">,</span> <span class="n">strict</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> + <span class="k">if</span> <span class="n">real_ip</span><span class="o">.</span><span class="n">version</span> <span class="o">==</span> <span class="n">net</span><span class="o">.</span><span class="n">version</span> <span class="ow">and</span> <span class="n">real_ip</span> <span class="ow">in</span> <span class="n">net</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">True</span><span class="p">,</span> <span class="sa">f</span><span class="s2">"IP matches </span><span class="si">{</span><span class="n">net</span><span class="o">.</span><span class="n">compressed</span><span class="si">}</span><span class="s2"> in SEARXNG_ORG list."</span> + <span class="k">return</span> <span class="n">ip_is_subnet_of_member_in_list</span><span class="p">(</span><span class="n">real_ip</span><span class="p">,</span> <span class="s1">'botdetection.ip_lists.pass_ip'</span><span class="p">,</span> <span class="n">cfg</span><span class="p">)</span></div> + + + +<div class="viewcode-block" id="block_ip"> +<a class="viewcode-back" href="../../../src/searx.botdetection.html#searx.botdetection.ip_lists.block_ip">[docs]</a> +<span class="k">def</span> <span class="nf">block_ip</span><span class="p">(</span><span class="n">real_ip</span><span class="p">:</span> <span class="n">IPv4Address</span> <span class="o">|</span> <span class="n">IPv6Address</span><span class="p">,</span> <span class="n">cfg</span><span class="p">:</span> <span class="n">config</span><span class="o">.</span><span class="n">Config</span><span class="p">)</span> <span class="o">-></span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]:</span> +<span class="w"> </span><span class="sd">"""Checks if the IP on the subnet is in one of the members of the</span> +<span class="sd"> ``botdetection.ip_lists.block_ip`` list.</span> +<span class="sd"> """</span> + + <span class="n">block</span><span class="p">,</span> <span class="n">msg</span> <span class="o">=</span> <span class="n">ip_is_subnet_of_member_in_list</span><span class="p">(</span><span class="n">real_ip</span><span class="p">,</span> <span class="s1">'botdetection.ip_lists.block_ip'</span><span class="p">,</span> <span class="n">cfg</span><span class="p">)</span> + <span class="k">if</span> <span class="n">block</span><span class="p">:</span> + <span class="n">msg</span> <span class="o">+=</span> <span class="s2">" To remove IP from list, please contact the maintainer of the service."</span> + <span class="k">return</span> <span class="n">block</span><span class="p">,</span> <span class="n">msg</span></div> + + + +<span class="k">def</span> <span class="nf">ip_is_subnet_of_member_in_list</span><span class="p">(</span> + <span class="n">real_ip</span><span class="p">:</span> <span class="n">IPv4Address</span> <span class="o">|</span> <span class="n">IPv6Address</span><span class="p">,</span> <span class="n">list_name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">cfg</span><span class="p">:</span> <span class="n">config</span><span class="o">.</span><span class="n">Config</span> +<span class="p">)</span> <span class="o">-></span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]:</span> + + <span class="k">for</span> <span class="n">net</span> <span class="ow">in</span> <span class="n">cfg</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">list_name</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="p">[]):</span> + <span class="k">try</span><span class="p">:</span> + <span class="n">net</span> <span class="o">=</span> <span class="n">ip_network</span><span class="p">(</span><span class="n">net</span><span class="p">,</span> <span class="n">strict</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> + <span class="k">except</span> <span class="ne">ValueError</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s2">"invalid IP </span><span class="si">%s</span><span class="s2"> in </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">net</span><span class="p">,</span> <span class="n">list_name</span><span class="p">)</span> + <span class="k">continue</span> + <span class="k">if</span> <span class="n">real_ip</span><span class="o">.</span><span class="n">version</span> <span class="o">==</span> <span class="n">net</span><span class="o">.</span><span class="n">version</span> <span class="ow">and</span> <span class="n">real_ip</span> <span class="ow">in</span> <span class="n">net</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">True</span><span class="p">,</span> <span class="sa">f</span><span class="s2">"IP matches </span><span class="si">{</span><span class="n">net</span><span class="o">.</span><span class="n">compressed</span><span class="si">}</span><span class="s2"> in </span><span class="si">{</span><span class="n">list_name</span><span class="si">}</span><span class="s2">."</span> + <span class="k">return</span> <span class="kc">False</span><span class="p">,</span> <span class="sa">f</span><span class="s2">"IP is not a member of an item in the f</span><span class="si">{</span><span class="n">list_name</span><span class="si">}</span><span class="s2"> list"</span> +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + + + </ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/botdetection/link_token.html b/_modules/searx/botdetection/link_token.html new file mode 100644 index 000000000..38bc4cde5 --- /dev/null +++ b/_modules/searx/botdetection/link_token.html @@ -0,0 +1,270 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.botdetection.link_token — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" accesskey="U">Module code</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.botdetection.link_token</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.botdetection.link_token</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""</span> +<span class="sd">Method ``link_token``</span> +<span class="sd">---------------------</span> + +<span class="sd">The ``link_token`` method evaluates a request as :py:obj:`suspicious</span> +<span class="sd"><is_suspicious>` if the URL ``/client<token>.css`` is not requested by the</span> +<span class="sd">client. By adding a random component (the token) in the URL, a bot can not send</span> +<span class="sd">a ping by request a static URL.</span> + +<span class="sd">.. note::</span> + +<span class="sd"> This method requires a redis DB and needs a HTTP X-Forwarded-For_ header.</span> + +<span class="sd">To get in use of this method a flask URL route needs to be added:</span> + +<span class="sd">.. code:: python</span> + +<span class="sd"> @app.route('/client<token>.css', methods=['GET', 'POST'])</span> +<span class="sd"> def client_token(token=None):</span> +<span class="sd"> link_token.ping(request, token)</span> +<span class="sd"> return Response('', mimetype='text/css')</span> + +<span class="sd">And in the HTML template from flask a stylesheet link is needed (the value of</span> +<span class="sd">``link_token`` comes from :py:obj:`get_token`):</span> + +<span class="sd">.. code:: html</span> + +<span class="sd"> <link rel="stylesheet"</span> +<span class="sd"> href="{{ url_for('client_token', token=link_token) }}"</span> +<span class="sd"> type="text/css" ></span> + +<span class="sd">.. _X-Forwarded-For:</span> +<span class="sd"> https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For</span> + +<span class="sd">"""</span> +<span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">annotations</span> +<span class="kn">from</span> <span class="nn">ipaddress</span> <span class="kn">import</span> <span class="p">(</span> + <span class="n">IPv4Network</span><span class="p">,</span> + <span class="n">IPv6Network</span><span class="p">,</span> + <span class="n">ip_address</span><span class="p">,</span> +<span class="p">)</span> + +<span class="kn">import</span> <span class="nn">string</span> +<span class="kn">import</span> <span class="nn">random</span> +<span class="kn">import</span> <span class="nn">flask</span> + +<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">logger</span> +<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">redisdb</span> +<span class="kn">from</span> <span class="nn">searx.redislib</span> <span class="kn">import</span> <span class="n">secret_hash</span> + +<span class="kn">from</span> <span class="nn">._helpers</span> <span class="kn">import</span> <span class="p">(</span> + <span class="n">get_network</span><span class="p">,</span> + <span class="n">get_real_ip</span><span class="p">,</span> +<span class="p">)</span> + +<span class="n">TOKEN_LIVE_TIME</span> <span class="o">=</span> <span class="mi">600</span> +<span class="sd">"""Lifetime (sec) of limiter's CSS token."""</span> + +<span class="n">PING_LIVE_TIME</span> <span class="o">=</span> <span class="mi">3600</span> +<span class="sd">"""Lifetime (sec) of the ping-key from a client (request)"""</span> + +<span class="n">PING_KEY</span> <span class="o">=</span> <span class="s1">'SearXNG_limiter.ping'</span> +<span class="sd">"""Prefix of all ping-keys generated by :py:obj:`get_ping_key`"""</span> + +<span class="n">TOKEN_KEY</span> <span class="o">=</span> <span class="s1">'SearXNG_limiter.token'</span> +<span class="sd">"""Key for which the current token is stored in the DB"""</span> + +<span class="n">logger</span> <span class="o">=</span> <span class="n">logger</span><span class="o">.</span><span class="n">getChild</span><span class="p">(</span><span class="s1">'botdetection.link_token'</span><span class="p">)</span> + + +<div class="viewcode-block" id="is_suspicious"> +<a class="viewcode-back" href="../../../src/searx.botdetection.html#searx.botdetection.link_token.is_suspicious">[docs]</a> +<span class="k">def</span> <span class="nf">is_suspicious</span><span class="p">(</span><span class="n">network</span><span class="p">:</span> <span class="n">IPv4Network</span> <span class="o">|</span> <span class="n">IPv6Network</span><span class="p">,</span> <span class="n">request</span><span class="p">:</span> <span class="n">flask</span><span class="o">.</span><span class="n">Request</span><span class="p">,</span> <span class="n">renew</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Checks whether a valid ping is exists for this (client) network, if not</span> +<span class="sd"> this request is rated as *suspicious*. If a valid ping exists and argument</span> +<span class="sd"> ``renew`` is ``True`` the expire time of this ping is reset to</span> +<span class="sd"> :py:obj:`PING_LIVE_TIME`.</span> + +<span class="sd"> """</span> + <span class="n">redis_client</span> <span class="o">=</span> <span class="n">redisdb</span><span class="o">.</span><span class="n">client</span><span class="p">()</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">redis_client</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">False</span> + + <span class="n">ping_key</span> <span class="o">=</span> <span class="n">get_ping_key</span><span class="p">(</span><span class="n">network</span><span class="p">,</span> <span class="n">request</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">redis_client</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">ping_key</span><span class="p">):</span> + <span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"missing ping (IP: </span><span class="si">%s</span><span class="s2">) / request: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">network</span><span class="o">.</span><span class="n">compressed</span><span class="p">,</span> <span class="n">ping_key</span><span class="p">)</span> + <span class="k">return</span> <span class="kc">True</span> + + <span class="k">if</span> <span class="n">renew</span><span class="p">:</span> + <span class="n">redis_client</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="n">ping_key</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">ex</span><span class="o">=</span><span class="n">PING_LIVE_TIME</span><span class="p">)</span> + + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"found ping for (client) network </span><span class="si">%s</span><span class="s2"> -> </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">network</span><span class="o">.</span><span class="n">compressed</span><span class="p">,</span> <span class="n">ping_key</span><span class="p">)</span> + <span class="k">return</span> <span class="kc">False</span></div> + + + +<div class="viewcode-block" id="ping"> +<a class="viewcode-back" href="../../../src/searx.botdetection.html#searx.botdetection.link_token.ping">[docs]</a> +<span class="k">def</span> <span class="nf">ping</span><span class="p">(</span><span class="n">request</span><span class="p">:</span> <span class="n">flask</span><span class="o">.</span><span class="n">Request</span><span class="p">,</span> <span class="n">token</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""This function is called by a request to URL ``/client<token>.css``. If</span> +<span class="sd"> ``token`` is valid a :py:obj:`PING_KEY` for the client is stored in the DB.</span> +<span class="sd"> The expire time of this ping-key is :py:obj:`PING_LIVE_TIME`.</span> + +<span class="sd"> """</span> + <span class="kn">from</span> <span class="nn">.</span> <span class="kn">import</span> <span class="n">redis_client</span><span class="p">,</span> <span class="n">cfg</span> <span class="c1"># pylint: disable=import-outside-toplevel, cyclic-import</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">redis_client</span><span class="p">:</span> + <span class="k">return</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">token_is_valid</span><span class="p">(</span><span class="n">token</span><span class="p">):</span> + <span class="k">return</span> + + <span class="n">real_ip</span> <span class="o">=</span> <span class="n">ip_address</span><span class="p">(</span><span class="n">get_real_ip</span><span class="p">(</span><span class="n">request</span><span class="p">))</span> + <span class="n">network</span> <span class="o">=</span> <span class="n">get_network</span><span class="p">(</span><span class="n">real_ip</span><span class="p">,</span> <span class="n">cfg</span><span class="p">)</span> + + <span class="n">ping_key</span> <span class="o">=</span> <span class="n">get_ping_key</span><span class="p">(</span><span class="n">network</span><span class="p">,</span> <span class="n">request</span><span class="p">)</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"store ping_key for (client) network </span><span class="si">%s</span><span class="s2"> (IP </span><span class="si">%s</span><span class="s2">) -> </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">network</span><span class="o">.</span><span class="n">compressed</span><span class="p">,</span> <span class="n">real_ip</span><span class="p">,</span> <span class="n">ping_key</span><span class="p">)</span> + <span class="n">redis_client</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="n">ping_key</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">ex</span><span class="o">=</span><span class="n">PING_LIVE_TIME</span><span class="p">)</span></div> + + + +<div class="viewcode-block" id="get_ping_key"> +<a class="viewcode-back" href="../../../src/searx.botdetection.html#searx.botdetection.link_token.get_ping_key">[docs]</a> +<span class="k">def</span> <span class="nf">get_ping_key</span><span class="p">(</span><span class="n">network</span><span class="p">:</span> <span class="n">IPv4Network</span> <span class="o">|</span> <span class="n">IPv6Network</span><span class="p">,</span> <span class="n">request</span><span class="p">:</span> <span class="n">flask</span><span class="o">.</span><span class="n">Request</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Generates a hashed key that fits (more or less) to a *WEB-browser</span> +<span class="sd"> session* in a network."""</span> + <span class="k">return</span> <span class="p">(</span> + <span class="n">PING_KEY</span> + <span class="o">+</span> <span class="s2">"["</span> + <span class="o">+</span> <span class="n">secret_hash</span><span class="p">(</span> + <span class="n">network</span><span class="o">.</span><span class="n">compressed</span> <span class="o">+</span> <span class="n">request</span><span class="o">.</span><span class="n">headers</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Accept-Language'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span> <span class="o">+</span> <span class="n">request</span><span class="o">.</span><span class="n">headers</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'User-Agent'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span> + <span class="p">)</span> + <span class="o">+</span> <span class="s2">"]"</span> + <span class="p">)</span></div> + + + +<span class="k">def</span> <span class="nf">token_is_valid</span><span class="p">(</span><span class="n">token</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> + <span class="n">valid</span> <span class="o">=</span> <span class="n">token</span> <span class="o">==</span> <span class="n">get_token</span><span class="p">()</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"token is valid --> </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">valid</span><span class="p">)</span> + <span class="k">return</span> <span class="n">valid</span> + + +<div class="viewcode-block" id="get_token"> +<a class="viewcode-back" href="../../../src/searx.botdetection.html#searx.botdetection.link_token.get_token">[docs]</a> +<span class="k">def</span> <span class="nf">get_token</span><span class="p">()</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Returns current token. If there is no currently active token a new token</span> +<span class="sd"> is generated randomly and stored in the redis DB.</span> + +<span class="sd"> - :py:obj:`TOKEN_LIVE_TIME`</span> +<span class="sd"> - :py:obj:`TOKEN_KEY`</span> + +<span class="sd"> """</span> + <span class="n">redis_client</span> <span class="o">=</span> <span class="n">redisdb</span><span class="o">.</span><span class="n">client</span><span class="p">()</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">redis_client</span><span class="p">:</span> + <span class="c1"># This function is also called when limiter is inactive / no redis DB</span> + <span class="c1"># (see render function in webapp.py)</span> + <span class="k">return</span> <span class="s1">'12345678'</span> + <span class="n">token</span> <span class="o">=</span> <span class="n">redis_client</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">TOKEN_KEY</span><span class="p">)</span> + <span class="k">if</span> <span class="n">token</span><span class="p">:</span> + <span class="n">token</span> <span class="o">=</span> <span class="n">token</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s1">'UTF-8'</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">token</span> <span class="o">=</span> <span class="s1">''</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">random</span><span class="o">.</span><span class="n">choice</span><span class="p">(</span><span class="n">string</span><span class="o">.</span><span class="n">ascii_lowercase</span> <span class="o">+</span> <span class="n">string</span><span class="o">.</span><span class="n">digits</span><span class="p">)</span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">16</span><span class="p">))</span> + <span class="n">redis_client</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="n">TOKEN_KEY</span><span class="p">,</span> <span class="n">token</span><span class="p">,</span> <span class="n">ex</span><span class="o">=</span><span class="n">TOKEN_LIVE_TIME</span><span class="p">)</span> + <span class="k">return</span> <span class="n">token</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + + + </ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/enginelib.html b/_modules/searx/enginelib.html new file mode 100644 index 000000000..e72dd533d --- /dev/null +++ b/_modules/searx/enginelib.html @@ -0,0 +1,251 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.enginelib — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../_static/searxng.css?v=52e4ff28" /> + <script src="../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../genindex.html" /> + <link rel="search" title="Search" href="../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../index.html" accesskey="U">Module code</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.enginelib</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.enginelib</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""Implementations of the framework for the SearXNG engines.</span> + +<span class="sd">.. hint::</span> + +<span class="sd"> The long term goal is to modularize all implementations of the engine</span> +<span class="sd"> framework here in this Python package. ToDo:</span> + +<span class="sd"> - move implementations of the :ref:`searx.engines loader` to a new module in</span> +<span class="sd"> the :py:obj:`searx.enginelib` namespace.</span> + +<span class="sd">"""</span> + + +<span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">annotations</span> +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">List</span><span class="p">,</span> <span class="n">Callable</span><span class="p">,</span> <span class="n">TYPE_CHECKING</span> + +<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> + <span class="kn">from</span> <span class="nn">searx.enginelib</span> <span class="kn">import</span> <span class="n">traits</span> + + +<div class="viewcode-block" id="Engine"> +<a class="viewcode-back" href="../../dev/engines/enginelib.html#searx.enginelib.Engine">[docs]</a> +<span class="k">class</span> <span class="nc">Engine</span><span class="p">:</span> <span class="c1"># pylint: disable=too-few-public-methods</span> +<span class="w"> </span><span class="sd">"""Class of engine instances build from YAML settings.</span> + +<span class="sd"> Further documentation see :ref:`general engine configuration`.</span> + +<span class="sd"> .. hint::</span> + +<span class="sd"> This class is currently never initialized and only used for type hinting.</span> +<span class="sd"> """</span> + + <span class="c1"># Common options in the engine module</span> + + <span class="n">engine_type</span><span class="p">:</span> <span class="nb">str</span> +<span class="w"> </span><span class="sd">"""Type of the engine (:ref:`searx.search.processors`)"""</span> + + <span class="n">paging</span><span class="p">:</span> <span class="nb">bool</span> +<span class="w"> </span><span class="sd">"""Engine supports multiple pages."""</span> + + <span class="n">time_range_support</span><span class="p">:</span> <span class="nb">bool</span> +<span class="w"> </span><span class="sd">"""Engine supports search time range."""</span> + + <span class="n">safesearch</span><span class="p">:</span> <span class="nb">bool</span> +<span class="w"> </span><span class="sd">"""Engine supports SafeSearch"""</span> + + <span class="n">language_support</span><span class="p">:</span> <span class="nb">bool</span> +<span class="w"> </span><span class="sd">"""Engine supports languages (locales) search."""</span> + + <span class="n">language</span><span class="p">:</span> <span class="nb">str</span> +<span class="w"> </span><span class="sd">"""For an engine, when there is ``language: ...`` in the YAML settings the engine</span> +<span class="sd"> does support only this one language:</span> + +<span class="sd"> .. code:: yaml</span> + +<span class="sd"> - name: google french</span> +<span class="sd"> engine: google</span> +<span class="sd"> language: fr</span> +<span class="sd"> """</span> + + <span class="n">region</span><span class="p">:</span> <span class="nb">str</span> +<span class="w"> </span><span class="sd">"""For an engine, when there is ``region: ...`` in the YAML settings the engine</span> +<span class="sd"> does support only this one region::</span> + +<span class="sd"> .. code:: yaml</span> + +<span class="sd"> - name: google belgium</span> +<span class="sd"> engine: google</span> +<span class="sd"> region: fr-BE</span> +<span class="sd"> """</span> + + <span class="n">fetch_traits</span><span class="p">:</span> <span class="n">Callable</span> +<span class="w"> </span><span class="sd">"""Function to to fetch engine's traits from origin."""</span> + + <span class="n">traits</span><span class="p">:</span> <span class="n">traits</span><span class="o">.</span><span class="n">EngineTraits</span> +<span class="w"> </span><span class="sd">"""Traits of the engine."""</span> + + <span class="c1"># settings.yml</span> + + <span class="n">categories</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> +<span class="w"> </span><span class="sd">"""Specifies to which :ref:`engine categories` the engine should be added."""</span> + + <span class="n">name</span><span class="p">:</span> <span class="nb">str</span> +<span class="w"> </span><span class="sd">"""Name that will be used across SearXNG to define this engine. In settings, on</span> +<span class="sd"> the result page .."""</span> + + <span class="n">engine</span><span class="p">:</span> <span class="nb">str</span> +<span class="w"> </span><span class="sd">"""Name of the python file used to handle requests and responses to and from</span> +<span class="sd"> this search engine (file name from :origin:`searx/engines` without</span> +<span class="sd"> ``.py``)."""</span> + + <span class="n">enable_http</span><span class="p">:</span> <span class="nb">bool</span> +<span class="w"> </span><span class="sd">"""Enable HTTP (by default only HTTPS is enabled)."""</span> + + <span class="n">shortcut</span><span class="p">:</span> <span class="nb">str</span> +<span class="w"> </span><span class="sd">"""Code used to execute bang requests (``!foo``)"""</span> + + <span class="n">timeout</span><span class="p">:</span> <span class="nb">float</span> +<span class="w"> </span><span class="sd">"""Specific timeout for search-engine."""</span> + + <span class="n">display_error_messages</span><span class="p">:</span> <span class="nb">bool</span> +<span class="w"> </span><span class="sd">"""Display error messages on the web UI."""</span> + + <span class="n">proxies</span><span class="p">:</span> <span class="nb">dict</span> +<span class="w"> </span><span class="sd">"""Set proxies for a specific engine (YAML):</span> + +<span class="sd"> .. code:: yaml</span> + +<span class="sd"> proxies :</span> +<span class="sd"> http: socks5://proxy:port</span> +<span class="sd"> https: socks5://proxy:port</span> +<span class="sd"> """</span> + + <span class="n">disabled</span><span class="p">:</span> <span class="nb">bool</span> +<span class="w"> </span><span class="sd">"""To disable by default the engine, but not deleting it. It will allow the</span> +<span class="sd"> user to manually activate it in the settings."""</span> + + <span class="n">inactive</span><span class="p">:</span> <span class="nb">bool</span> +<span class="w"> </span><span class="sd">"""Remove the engine from the settings (*disabled & removed*)."""</span> + + <span class="n">about</span><span class="p">:</span> <span class="nb">dict</span> +<span class="w"> </span><span class="sd">"""Additional fields describing the engine.</span> + +<span class="sd"> .. code:: yaml</span> + +<span class="sd"> about:</span> +<span class="sd"> website: https://example.com</span> +<span class="sd"> wikidata_id: Q306656</span> +<span class="sd"> official_api_documentation: https://example.com/api-doc</span> +<span class="sd"> use_official_api: true</span> +<span class="sd"> require_api_key: true</span> +<span class="sd"> results: HTML</span> +<span class="sd"> """</span> + + <span class="n">using_tor_proxy</span><span class="p">:</span> <span class="nb">bool</span> +<span class="w"> </span><span class="sd">"""Using tor proxy (``true``) or not (``false``) for this engine."""</span> + + <span class="n">send_accept_language_header</span><span class="p">:</span> <span class="nb">bool</span> +<span class="w"> </span><span class="sd">"""When this option is activated, the language (locale) that is selected by</span> +<span class="sd"> the user is used to build and send a ``Accept-Language`` header in the</span> +<span class="sd"> request to the origin search engine."""</span> + + <span class="n">tokens</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> +<span class="w"> </span><span class="sd">"""A list of secret tokens to make this engine *private*, more details see</span> +<span class="sd"> :ref:`private engines`."""</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../index.html"> + <img class="logo" src="../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../index.html">Overview</a> + <ul> + <li><a href="../index.html">Module code</a> + + + </ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/enginelib/traits.html b/_modules/searx/enginelib/traits.html new file mode 100644 index 000000000..c99f0a331 --- /dev/null +++ b/_modules/searx/enginelib/traits.html @@ -0,0 +1,397 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.enginelib.traits — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../enginelib.html" accesskey="U">searx.enginelib</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.enginelib.traits</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.enginelib.traits</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""Engine's traits are fetched from the origin engines and stored in a JSON file</span> +<span class="sd">in the *data folder*. Most often traits are languages and region codes and</span> +<span class="sd">their mapping from SearXNG's representation to the representation in the origin</span> +<span class="sd">search engine. For new traits new properties can be added to the class</span> +<span class="sd">:py:class:`EngineTraits`.</span> + +<span class="sd">To load traits from the persistence :py:obj:`EngineTraitsMap.from_data` can be</span> +<span class="sd">used.</span> +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">annotations</span> +<span class="kn">import</span> <span class="nn">json</span> +<span class="kn">import</span> <span class="nn">dataclasses</span> +<span class="kn">import</span> <span class="nn">types</span> +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Dict</span><span class="p">,</span> <span class="n">Literal</span><span class="p">,</span> <span class="n">Iterable</span><span class="p">,</span> <span class="n">Union</span><span class="p">,</span> <span class="n">Callable</span><span class="p">,</span> <span class="n">Optional</span><span class="p">,</span> <span class="n">TYPE_CHECKING</span> + +<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">locales</span> +<span class="kn">from</span> <span class="nn">searx.data</span> <span class="kn">import</span> <span class="n">data_dir</span><span class="p">,</span> <span class="n">ENGINE_TRAITS</span> + +<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> + <span class="kn">from</span> <span class="nn">.</span> <span class="kn">import</span> <span class="n">Engine</span> + + +<div class="viewcode-block" id="EngineTraitsEncoder"> +<a class="viewcode-back" href="../../../dev/engines/enginelib.html#searx.enginelib.traits.EngineTraitsEncoder">[docs]</a> +<span class="k">class</span> <span class="nc">EngineTraitsEncoder</span><span class="p">(</span><span class="n">json</span><span class="o">.</span><span class="n">JSONEncoder</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Encodes :class:`EngineTraits` to a serializable object, see</span> +<span class="sd"> :class:`json.JSONEncoder`."""</span> + +<div class="viewcode-block" id="EngineTraitsEncoder.default"> +<a class="viewcode-back" href="../../../dev/engines/enginelib.html#searx.enginelib.traits.EngineTraitsEncoder.default">[docs]</a> + <span class="k">def</span> <span class="nf">default</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">o</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Return dictionary of a :class:`EngineTraits` object."""</span> + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">o</span><span class="p">,</span> <span class="n">EngineTraits</span><span class="p">):</span> + <span class="k">return</span> <span class="n">o</span><span class="o">.</span><span class="vm">__dict__</span> + <span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">default</span><span class="p">(</span><span class="n">o</span><span class="p">)</span></div> +</div> + + + +<div class="viewcode-block" id="EngineTraits"> +<a class="viewcode-back" href="../../../dev/engines/enginelib.html#searx.enginelib.traits.EngineTraits">[docs]</a> +<span class="nd">@dataclasses</span><span class="o">.</span><span class="n">dataclass</span> +<span class="k">class</span> <span class="nc">EngineTraits</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""The class is intended to be instantiated for each engine."""</span> + + <span class="n">regions</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">dataclasses</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="n">default_factory</span><span class="o">=</span><span class="nb">dict</span><span class="p">)</span> +<span class="w"> </span><span class="sd">"""Maps SearXNG's internal representation of a region to the one of the engine.</span> + +<span class="sd"> SearXNG's internal representation can be parsed by babel and the value is</span> +<span class="sd"> send to the engine:</span> + +<span class="sd"> .. code:: python</span> + +<span class="sd"> regions ={</span> +<span class="sd"> 'fr-BE' : <engine's region name>,</span> +<span class="sd"> }</span> + +<span class="sd"> for key, egnine_region regions.items():</span> +<span class="sd"> searxng_region = babel.Locale.parse(key, sep='-')</span> +<span class="sd"> ...</span> +<span class="sd"> """</span> + + <span class="n">languages</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">dataclasses</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="n">default_factory</span><span class="o">=</span><span class="nb">dict</span><span class="p">)</span> +<span class="w"> </span><span class="sd">"""Maps SearXNG's internal representation of a language to the one of the engine.</span> + +<span class="sd"> SearXNG's internal representation can be parsed by babel and the value is</span> +<span class="sd"> send to the engine:</span> + +<span class="sd"> .. code:: python</span> + +<span class="sd"> languages = {</span> +<span class="sd"> 'ca' : <engine's language name>,</span> +<span class="sd"> }</span> + +<span class="sd"> for key, egnine_lang in languages.items():</span> +<span class="sd"> searxng_lang = babel.Locale.parse(key)</span> +<span class="sd"> ...</span> +<span class="sd"> """</span> + + <span class="n">all_locale</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span> +<span class="w"> </span><span class="sd">"""To which locale value SearXNG's ``all`` language is mapped (shown a "Default</span> +<span class="sd"> language").</span> +<span class="sd"> """</span> + + <span class="n">data_type</span><span class="p">:</span> <span class="n">Literal</span><span class="p">[</span><span class="s1">'traits_v1'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'traits_v1'</span> +<span class="w"> </span><span class="sd">"""Data type, default is 'traits_v1'.</span> +<span class="sd"> """</span> + + <span class="n">custom</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Union</span><span class="p">[</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Dict</span><span class="p">],</span> <span class="n">Iterable</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="n">dataclasses</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="n">default_factory</span><span class="o">=</span><span class="nb">dict</span><span class="p">)</span> +<span class="w"> </span><span class="sd">"""A place to store engine's custom traits, not related to the SearXNG core.</span> +<span class="sd"> """</span> + +<div class="viewcode-block" id="EngineTraits.get_language"> +<a class="viewcode-back" href="../../../dev/engines/enginelib.html#searx.enginelib.traits.EngineTraits.get_language">[docs]</a> + <span class="k">def</span> <span class="nf">get_language</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">searxng_locale</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Return engine's language string that *best fits* to SearXNG's locale.</span> + +<span class="sd"> :param searxng_locale: SearXNG's internal representation of locale</span> +<span class="sd"> selected by the user.</span> + +<span class="sd"> :param default: engine's default language</span> + +<span class="sd"> The *best fits* rules are implemented in</span> +<span class="sd"> :py:obj:`searx.locales.get_engine_locale`. Except for the special value ``all``</span> +<span class="sd"> which is determined from :py:obj:`EngineTraits.all_locale`.</span> +<span class="sd"> """</span> + <span class="k">if</span> <span class="n">searxng_locale</span> <span class="o">==</span> <span class="s1">'all'</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">all_locale</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">all_locale</span> + <span class="k">return</span> <span class="n">locales</span><span class="o">.</span><span class="n">get_engine_locale</span><span class="p">(</span><span class="n">searxng_locale</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">languages</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="n">default</span><span class="p">)</span></div> + + +<div class="viewcode-block" id="EngineTraits.get_region"> +<a class="viewcode-back" href="../../../dev/engines/enginelib.html#searx.enginelib.traits.EngineTraits.get_region">[docs]</a> + <span class="k">def</span> <span class="nf">get_region</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">searxng_locale</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Return engine's region string that best fits to SearXNG's locale.</span> + +<span class="sd"> :param searxng_locale: SearXNG's internal representation of locale</span> +<span class="sd"> selected by the user.</span> + +<span class="sd"> :param default: engine's default region</span> + +<span class="sd"> The *best fits* rules are implemented in</span> +<span class="sd"> :py:obj:`searx.locales.get_engine_locale`. Except for the special value ``all``</span> +<span class="sd"> which is determined from :py:obj:`EngineTraits.all_locale`.</span> +<span class="sd"> """</span> + <span class="k">if</span> <span class="n">searxng_locale</span> <span class="o">==</span> <span class="s1">'all'</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">all_locale</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">all_locale</span> + <span class="k">return</span> <span class="n">locales</span><span class="o">.</span><span class="n">get_engine_locale</span><span class="p">(</span><span class="n">searxng_locale</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">regions</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="n">default</span><span class="p">)</span></div> + + +<div class="viewcode-block" id="EngineTraits.is_locale_supported"> +<a class="viewcode-back" href="../../../dev/engines/enginelib.html#searx.enginelib.traits.EngineTraits.is_locale_supported">[docs]</a> + <span class="k">def</span> <span class="nf">is_locale_supported</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">searxng_locale</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""A *locale* (SearXNG's internal representation) is considered to be</span> +<span class="sd"> supported by the engine if the *region* or the *language* is supported</span> +<span class="sd"> by the engine.</span> + +<span class="sd"> For verification the functions :py:func:`EngineTraits.get_region` and</span> +<span class="sd"> :py:func:`EngineTraits.get_language` are used.</span> +<span class="sd"> """</span> + <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">data_type</span> <span class="o">==</span> <span class="s1">'traits_v1'</span><span class="p">:</span> + <span class="k">return</span> <span class="nb">bool</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">get_region</span><span class="p">(</span><span class="n">searxng_locale</span><span class="p">)</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">searxng_locale</span><span class="p">))</span> + + <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s1">'engine traits of type </span><span class="si">%s</span><span class="s1"> is unknown'</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">data_type</span><span class="p">)</span></div> + + +<div class="viewcode-block" id="EngineTraits.copy"> +<a class="viewcode-back" href="../../../dev/engines/enginelib.html#searx.enginelib.traits.EngineTraits.copy">[docs]</a> + <span class="k">def</span> <span class="nf">copy</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Create a copy of the dataclass object."""</span> + <span class="k">return</span> <span class="n">EngineTraits</span><span class="p">(</span><span class="o">**</span><span class="n">dataclasses</span><span class="o">.</span><span class="n">asdict</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span></div> + + +<div class="viewcode-block" id="EngineTraits.fetch_traits"> +<a class="viewcode-back" href="../../../dev/engines/enginelib.html#searx.enginelib.traits.EngineTraits.fetch_traits">[docs]</a> + <span class="nd">@classmethod</span> + <span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">engine</span><span class="p">:</span> <span class="n">Engine</span><span class="p">)</span> <span class="o">-></span> <span class="n">Union</span><span class="p">[</span><span class="s1">'EngineTraits'</span><span class="p">,</span> <span class="kc">None</span><span class="p">]:</span> +<span class="w"> </span><span class="sd">"""Call a function ``fetch_traits(engine_traits)`` from engines namespace to fetch</span> +<span class="sd"> and set properties from the origin engine in the object ``engine_traits``. If</span> +<span class="sd"> function does not exists, ``None`` is returned.</span> +<span class="sd"> """</span> + + <span class="n">fetch_traits</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">engine</span><span class="p">,</span> <span class="s1">'fetch_traits'</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> + <span class="n">engine_traits</span> <span class="o">=</span> <span class="kc">None</span> + + <span class="k">if</span> <span class="n">fetch_traits</span><span class="p">:</span> + <span class="n">engine_traits</span> <span class="o">=</span> <span class="bp">cls</span><span class="p">()</span> + <span class="n">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">)</span> + <span class="k">return</span> <span class="n">engine_traits</span></div> + + +<div class="viewcode-block" id="EngineTraits.set_traits"> +<a class="viewcode-back" href="../../../dev/engines/enginelib.html#searx.enginelib.traits.EngineTraits.set_traits">[docs]</a> + <span class="k">def</span> <span class="nf">set_traits</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">engine</span><span class="p">:</span> <span class="n">Engine</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Set traits from self object in a :py:obj:`.Engine` namespace.</span> + +<span class="sd"> :param engine: engine instance build by :py:func:`searx.engines.load_engine`</span> +<span class="sd"> """</span> + + <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">data_type</span> <span class="o">==</span> <span class="s1">'traits_v1'</span><span class="p">:</span> + <span class="bp">self</span><span class="o">.</span><span class="n">_set_traits_v1</span><span class="p">(</span><span class="n">engine</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s1">'engine traits of type </span><span class="si">%s</span><span class="s1"> is unknown'</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">data_type</span><span class="p">)</span></div> + + + <span class="k">def</span> <span class="nf">_set_traits_v1</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">engine</span><span class="p">:</span> <span class="n">Engine</span><span class="p">):</span> + <span class="c1"># For an engine, when there is `language: ...` in the YAML settings the engine</span> + <span class="c1"># does support only this one language (region)::</span> + <span class="c1">#</span> + <span class="c1"># - name: google italian</span> + <span class="c1"># engine: google</span> + <span class="c1"># language: it</span> + <span class="c1"># region: it-IT # type: ignore</span> + + <span class="n">traits</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span> + + <span class="n">_msg</span> <span class="o">=</span> <span class="s2">"settings.yml - engine: '</span><span class="si">%s</span><span class="s2">' / </span><span class="si">%s</span><span class="s2">: '</span><span class="si">%s</span><span class="s2">' not supported"</span> + + <span class="n">languages</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">languages</span> + <span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">engine</span><span class="p">,</span> <span class="s1">'language'</span><span class="p">):</span> + <span class="k">if</span> <span class="n">engine</span><span class="o">.</span><span class="n">language</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">languages</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="n">_msg</span> <span class="o">%</span> <span class="p">(</span><span class="n">engine</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="s1">'language'</span><span class="p">,</span> <span class="n">engine</span><span class="o">.</span><span class="n">language</span><span class="p">))</span> + <span class="n">traits</span><span class="o">.</span><span class="n">languages</span> <span class="o">=</span> <span class="p">{</span><span class="n">engine</span><span class="o">.</span><span class="n">language</span><span class="p">:</span> <span class="n">languages</span><span class="p">[</span><span class="n">engine</span><span class="o">.</span><span class="n">language</span><span class="p">]}</span> + + <span class="n">regions</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">regions</span> + <span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">engine</span><span class="p">,</span> <span class="s1">'region'</span><span class="p">):</span> + <span class="k">if</span> <span class="n">engine</span><span class="o">.</span><span class="n">region</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">regions</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="n">_msg</span> <span class="o">%</span> <span class="p">(</span><span class="n">engine</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="s1">'region'</span><span class="p">,</span> <span class="n">engine</span><span class="o">.</span><span class="n">region</span><span class="p">))</span> + <span class="n">traits</span><span class="o">.</span><span class="n">regions</span> <span class="o">=</span> <span class="p">{</span><span class="n">engine</span><span class="o">.</span><span class="n">region</span><span class="p">:</span> <span class="n">regions</span><span class="p">[</span><span class="n">engine</span><span class="o">.</span><span class="n">region</span><span class="p">]}</span> + + <span class="n">engine</span><span class="o">.</span><span class="n">language_support</span> <span class="o">=</span> <span class="nb">bool</span><span class="p">(</span><span class="n">traits</span><span class="o">.</span><span class="n">languages</span> <span class="ow">or</span> <span class="n">traits</span><span class="o">.</span><span class="n">regions</span><span class="p">)</span> + + <span class="c1"># set the copied & modified traits in engine's namespace</span> + <span class="n">engine</span><span class="o">.</span><span class="n">traits</span> <span class="o">=</span> <span class="n">traits</span></div> + + + +<div class="viewcode-block" id="EngineTraitsMap"> +<a class="viewcode-back" href="../../../dev/engines/enginelib.html#searx.enginelib.traits.EngineTraitsMap">[docs]</a> +<span class="k">class</span> <span class="nc">EngineTraitsMap</span><span class="p">(</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">EngineTraits</span><span class="p">]):</span> +<span class="w"> </span><span class="sd">"""A python dictionary to map :class:`EngineTraits` by engine name."""</span> + + <span class="n">ENGINE_TRAITS_FILE</span> <span class="o">=</span> <span class="p">(</span><span class="n">data_dir</span> <span class="o">/</span> <span class="s1">'engine_traits.json'</span><span class="p">)</span><span class="o">.</span><span class="n">resolve</span><span class="p">()</span> +<span class="w"> </span><span class="sd">"""File with persistence of the :py:obj:`EngineTraitsMap`."""</span> + +<div class="viewcode-block" id="EngineTraitsMap.save_data"> +<a class="viewcode-back" href="../../../dev/engines/enginelib.html#searx.enginelib.traits.EngineTraitsMap.save_data">[docs]</a> + <span class="k">def</span> <span class="nf">save_data</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Store EngineTraitsMap in in file :py:obj:`self.ENGINE_TRAITS_FILE`"""</span> + <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">ENGINE_TRAITS_FILE</span><span class="p">,</span> <span class="s1">'w'</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">'utf-8'</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span> + <span class="n">json</span><span class="o">.</span><span class="n">dump</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">f</span><span class="p">,</span> <span class="n">indent</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">sort_keys</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="bp">cls</span><span class="o">=</span><span class="n">EngineTraitsEncoder</span><span class="p">)</span></div> + + +<div class="viewcode-block" id="EngineTraitsMap.from_data"> +<a class="viewcode-back" href="../../../dev/engines/enginelib.html#searx.enginelib.traits.EngineTraitsMap.from_data">[docs]</a> + <span class="nd">@classmethod</span> + <span class="k">def</span> <span class="nf">from_data</span><span class="p">(</span><span class="bp">cls</span><span class="p">)</span> <span class="o">-></span> <span class="s1">'EngineTraitsMap'</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Instantiate :class:`EngineTraitsMap` object from :py:obj:`ENGINE_TRAITS`"""</span> + <span class="n">obj</span> <span class="o">=</span> <span class="bp">cls</span><span class="p">()</span> + <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">ENGINE_TRAITS</span><span class="o">.</span><span class="n">items</span><span class="p">():</span> + <span class="n">obj</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> <span class="o">=</span> <span class="n">EngineTraits</span><span class="p">(</span><span class="o">**</span><span class="n">v</span><span class="p">)</span> + <span class="k">return</span> <span class="n">obj</span></div> + + + <span class="nd">@classmethod</span> + <span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">log</span><span class="p">:</span> <span class="n">Callable</span><span class="p">)</span> <span class="o">-></span> <span class="s1">'EngineTraitsMap'</span><span class="p">:</span> + <span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">engines</span> <span class="c1"># pylint: disable=cyclic-import, import-outside-toplevel</span> + + <span class="n">names</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">engines</span><span class="o">.</span><span class="n">engines</span><span class="p">)</span> + <span class="n">names</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span> + <span class="n">obj</span> <span class="o">=</span> <span class="bp">cls</span><span class="p">()</span> + + <span class="k">for</span> <span class="n">engine_name</span> <span class="ow">in</span> <span class="n">names</span><span class="p">:</span> + <span class="n">engine</span> <span class="o">=</span> <span class="n">engines</span><span class="o">.</span><span class="n">engines</span><span class="p">[</span><span class="n">engine_name</span><span class="p">]</span> + + <span class="n">traits</span> <span class="o">=</span> <span class="n">EngineTraits</span><span class="o">.</span><span class="n">fetch_traits</span><span class="p">(</span><span class="n">engine</span><span class="p">)</span> + <span class="k">if</span> <span class="n">traits</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">log</span><span class="p">(</span><span class="s2">"</span><span class="si">%-20s</span><span class="s2">: SearXNG languages --> </span><span class="si">%s</span><span class="s2"> "</span> <span class="o">%</span> <span class="p">(</span><span class="n">engine_name</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">traits</span><span class="o">.</span><span class="n">languages</span><span class="p">)))</span> + <span class="n">log</span><span class="p">(</span><span class="s2">"</span><span class="si">%-20s</span><span class="s2">: SearXNG regions --> </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">engine_name</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">traits</span><span class="o">.</span><span class="n">regions</span><span class="p">)))</span> + <span class="n">obj</span><span class="p">[</span><span class="n">engine_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">traits</span> + + <span class="k">return</span> <span class="n">obj</span> + +<div class="viewcode-block" id="EngineTraitsMap.set_traits"> +<a class="viewcode-back" href="../../../dev/engines/enginelib.html#searx.enginelib.traits.EngineTraitsMap.set_traits">[docs]</a> + <span class="k">def</span> <span class="nf">set_traits</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">engine</span><span class="p">:</span> <span class="n">Engine</span> <span class="o">|</span> <span class="n">types</span><span class="o">.</span><span class="n">ModuleType</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Set traits in a :py:obj:`Engine` namespace.</span> + +<span class="sd"> :param engine: engine instance build by :py:func:`searx.engines.load_engine`</span> +<span class="sd"> """</span> + + <span class="n">engine_traits</span> <span class="o">=</span> <span class="n">EngineTraits</span><span class="p">(</span><span class="n">data_type</span><span class="o">=</span><span class="s1">'traits_v1'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">engine</span><span class="o">.</span><span class="n">name</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span> + <span class="n">engine_traits</span> <span class="o">=</span> <span class="bp">self</span><span class="p">[</span><span class="n">engine</span><span class="o">.</span><span class="n">name</span><span class="p">]</span> + + <span class="k">elif</span> <span class="n">engine</span><span class="o">.</span><span class="n">engine</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span> + <span class="c1"># The key of the dictionary traits_map is the *engine name*</span> + <span class="c1"># configured in settings.xml. When multiple engines are configured</span> + <span class="c1"># in settings.yml to use the same origin engine (python module)</span> + <span class="c1"># these additional engines can use the languages from the origin</span> + <span class="c1"># engine. For this use the configured ``engine: ...`` from</span> + <span class="c1"># settings.yml</span> + <span class="n">engine_traits</span> <span class="o">=</span> <span class="bp">self</span><span class="p">[</span><span class="n">engine</span><span class="o">.</span><span class="n">engine</span><span class="p">]</span> + + <span class="n">engine_traits</span><span class="o">.</span><span class="n">set_traits</span><span class="p">(</span><span class="n">engine</span><span class="p">)</span></div> +</div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../enginelib.html">searx.enginelib</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines.html b/_modules/searx/engines.html new file mode 100644 index 000000000..ac968f72a --- /dev/null +++ b/_modules/searx/engines.html @@ -0,0 +1,369 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../_static/searxng.css?v=52e4ff28" /> + <script src="../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../genindex.html" /> + <link rel="search" title="Search" href="../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../index.html" accesskey="U">Module code</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""Load and initialize the ``engines``, see :py:func:`load_engines` and register</span> +<span class="sd">:py:obj:`engine_shortcuts`.</span> + +<span class="sd">usage::</span> + +<span class="sd"> load_engines( settings['engines'] )</span> + +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">annotations</span> + +<span class="kn">import</span> <span class="nn">sys</span> +<span class="kn">import</span> <span class="nn">copy</span> +<span class="kn">from</span> <span class="nn">os.path</span> <span class="kn">import</span> <span class="n">realpath</span><span class="p">,</span> <span class="n">dirname</span> + +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span><span class="p">,</span> <span class="n">Dict</span> +<span class="kn">import</span> <span class="nn">types</span> +<span class="kn">import</span> <span class="nn">inspect</span> + +<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">logger</span><span class="p">,</span> <span class="n">settings</span> +<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">load_module</span> + +<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> + <span class="kn">from</span> <span class="nn">searx.enginelib</span> <span class="kn">import</span> <span class="n">Engine</span> + +<span class="n">logger</span> <span class="o">=</span> <span class="n">logger</span><span class="o">.</span><span class="n">getChild</span><span class="p">(</span><span class="s1">'engines'</span><span class="p">)</span> +<span class="n">ENGINE_DIR</span> <span class="o">=</span> <span class="n">dirname</span><span class="p">(</span><span class="n">realpath</span><span class="p">(</span><span class="vm">__file__</span><span class="p">))</span> +<span class="n">ENGINE_DEFAULT_ARGS</span> <span class="o">=</span> <span class="p">{</span> + <span class="c1"># Common options in the engine module</span> + <span class="s2">"engine_type"</span><span class="p">:</span> <span class="s2">"online"</span><span class="p">,</span> + <span class="s2">"paging"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"time_range_support"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"safesearch"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="c1"># settings.yml</span> + <span class="s2">"categories"</span><span class="p">:</span> <span class="p">[</span><span class="s2">"general"</span><span class="p">],</span> + <span class="s2">"enable_http"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"shortcut"</span><span class="p">:</span> <span class="s2">"-"</span><span class="p">,</span> + <span class="s2">"timeout"</span><span class="p">:</span> <span class="n">settings</span><span class="p">[</span><span class="s2">"outgoing"</span><span class="p">][</span><span class="s2">"request_timeout"</span><span class="p">],</span> + <span class="s2">"display_error_messages"</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> + <span class="s2">"disabled"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"inactive"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"about"</span><span class="p">:</span> <span class="p">{},</span> + <span class="s2">"using_tor_proxy"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"send_accept_language_header"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"tokens"</span><span class="p">:</span> <span class="p">[],</span> + <span class="s2">"max_page"</span><span class="p">:</span> <span class="mi">0</span><span class="p">,</span> +<span class="p">}</span> +<span class="c1"># set automatically when an engine does not have any tab category</span> +<span class="n">DEFAULT_CATEGORY</span> <span class="o">=</span> <span class="s1">'other'</span> + + +<span class="c1"># Defaults for the namespace of an engine module, see :py:func:`load_engine`</span> + +<span class="n">categories</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'general'</span><span class="p">:</span> <span class="p">[]}</span> +<span class="n">engines</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Engine</span> <span class="o">|</span> <span class="n">types</span><span class="o">.</span><span class="n">ModuleType</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> +<span class="n">engine_shortcuts</span> <span class="o">=</span> <span class="p">{}</span> +<span class="sd">"""Simple map of registered *shortcuts* to name of the engine (or ``None``).</span> + +<span class="sd">::</span> + +<span class="sd"> engine_shortcuts[engine.shortcut] = engine.name</span> + +<span class="sd">:meta hide-value:</span> +<span class="sd">"""</span> + + +<span class="k">def</span> <span class="nf">check_engine_module</span><span class="p">(</span><span class="n">module</span><span class="p">:</span> <span class="n">types</span><span class="o">.</span><span class="n">ModuleType</span><span class="p">):</span> + <span class="c1"># probe unintentional name collisions / for example name collisions caused</span> + <span class="c1"># by import statements in the engine module ..</span> + + <span class="c1"># network: https://github.com/searxng/searxng/issues/762#issuecomment-1605323861</span> + <span class="n">obj</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">module</span><span class="p">,</span> <span class="s1">'network'</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> + <span class="k">if</span> <span class="n">obj</span> <span class="ow">and</span> <span class="n">inspect</span><span class="o">.</span><span class="n">ismodule</span><span class="p">(</span><span class="n">obj</span><span class="p">):</span> + <span class="n">msg</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">'type of </span><span class="si">{</span><span class="n">module</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1">.network is a module (</span><span class="si">{</span><span class="n">obj</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1">), expected a string'</span> + <span class="c1"># logger.error(msg)</span> + <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="n">msg</span><span class="p">)</span> + + +<div class="viewcode-block" id="load_engine"> +<a class="viewcode-back" href="../../dev/engines/engines.html#searx.engines.load_engine">[docs]</a> +<span class="k">def</span> <span class="nf">load_engine</span><span class="p">(</span><span class="n">engine_data</span><span class="p">:</span> <span class="nb">dict</span><span class="p">)</span> <span class="o">-></span> <span class="n">Engine</span> <span class="o">|</span> <span class="n">types</span><span class="o">.</span><span class="n">ModuleType</span> <span class="o">|</span> <span class="kc">None</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Load engine from ``engine_data``.</span> + +<span class="sd"> :param dict engine_data: Attributes from YAML ``settings:engines/<engine>``</span> +<span class="sd"> :return: initialized namespace of the ``<engine>``.</span> + +<span class="sd"> 1. create a namespace and load module of the ``<engine>``</span> +<span class="sd"> 2. update namespace with the defaults from :py:obj:`ENGINE_DEFAULT_ARGS`</span> +<span class="sd"> 3. update namespace with values from ``engine_data``</span> + +<span class="sd"> If engine *is active*, return namespace of the engine, otherwise return</span> +<span class="sd"> ``None``.</span> + +<span class="sd"> This function also returns ``None`` if initialization of the namespace fails</span> +<span class="sd"> for one of the following reasons:</span> + +<span class="sd"> - engine name contains underscore</span> +<span class="sd"> - engine name is not lowercase</span> +<span class="sd"> - required attribute is not set :py:func:`is_missing_required_attributes`</span> + +<span class="sd"> """</span> + <span class="c1"># pylint: disable=too-many-return-statements</span> + + <span class="n">engine_name</span> <span class="o">=</span> <span class="n">engine_data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'name'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">engine_name</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s1">'An engine does not have a "name" field'</span><span class="p">)</span> + <span class="k">return</span> <span class="kc">None</span> + <span class="k">if</span> <span class="s1">'_'</span> <span class="ow">in</span> <span class="n">engine_name</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s1">'Engine name contains underscore: "</span><span class="si">{}</span><span class="s1">"'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">engine_name</span><span class="p">))</span> + <span class="k">return</span> <span class="kc">None</span> + + <span class="k">if</span> <span class="n">engine_name</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="o">!=</span> <span class="n">engine_name</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s1">'Engine name is not lowercase: "</span><span class="si">{}</span><span class="s1">", converting to lowercase'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">engine_name</span><span class="p">))</span> + <span class="n">engine_name</span> <span class="o">=</span> <span class="n">engine_name</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> + <span class="n">engine_data</span><span class="p">[</span><span class="s1">'name'</span><span class="p">]</span> <span class="o">=</span> <span class="n">engine_name</span> + + <span class="c1"># load_module</span> + <span class="n">module_name</span> <span class="o">=</span> <span class="n">engine_data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'engine'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">module_name</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s1">'The "engine" field is missing for the engine named "</span><span class="si">{}</span><span class="s1">"'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">engine_name</span><span class="p">))</span> + <span class="k">return</span> <span class="kc">None</span> + <span class="k">try</span><span class="p">:</span> + <span class="n">engine</span> <span class="o">=</span> <span class="n">load_module</span><span class="p">(</span><span class="n">module_name</span> <span class="o">+</span> <span class="s1">'.py'</span><span class="p">,</span> <span class="n">ENGINE_DIR</span><span class="p">)</span> + <span class="k">except</span> <span class="p">(</span><span class="ne">SyntaxError</span><span class="p">,</span> <span class="ne">KeyboardInterrupt</span><span class="p">,</span> <span class="ne">SystemExit</span><span class="p">,</span> <span class="ne">SystemError</span><span class="p">,</span> <span class="ne">ImportError</span><span class="p">,</span> <span class="ne">RuntimeError</span><span class="p">):</span> + <span class="n">logger</span><span class="o">.</span><span class="n">exception</span><span class="p">(</span><span class="s1">'Fatal exception in engine "</span><span class="si">{}</span><span class="s1">"'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">module_name</span><span class="p">))</span> + <span class="n">sys</span><span class="o">.</span><span class="n">exit</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span> + <span class="k">except</span> <span class="ne">BaseException</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">exception</span><span class="p">(</span><span class="s1">'Cannot load engine "</span><span class="si">{}</span><span class="s1">"'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">module_name</span><span class="p">))</span> + <span class="k">return</span> <span class="kc">None</span> + + <span class="n">check_engine_module</span><span class="p">(</span><span class="n">engine</span><span class="p">)</span> + <span class="n">update_engine_attributes</span><span class="p">(</span><span class="n">engine</span><span class="p">,</span> <span class="n">engine_data</span><span class="p">)</span> + <span class="n">update_attributes_for_tor</span><span class="p">(</span><span class="n">engine</span><span class="p">)</span> + + <span class="c1"># avoid cyclic imports</span> + <span class="c1"># pylint: disable=import-outside-toplevel</span> + <span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraitsMap</span> + + <span class="n">trait_map</span> <span class="o">=</span> <span class="n">EngineTraitsMap</span><span class="o">.</span><span class="n">from_data</span><span class="p">()</span> + <span class="n">trait_map</span><span class="o">.</span><span class="n">set_traits</span><span class="p">(</span><span class="n">engine</span><span class="p">)</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">is_engine_active</span><span class="p">(</span><span class="n">engine</span><span class="p">):</span> + <span class="k">return</span> <span class="kc">None</span> + + <span class="k">if</span> <span class="n">is_missing_required_attributes</span><span class="p">(</span><span class="n">engine</span><span class="p">):</span> + <span class="k">return</span> <span class="kc">None</span> + + <span class="n">set_loggers</span><span class="p">(</span><span class="n">engine</span><span class="p">,</span> <span class="n">engine_name</span><span class="p">)</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="nb">any</span><span class="p">(</span><span class="n">cat</span> <span class="ow">in</span> <span class="n">settings</span><span class="p">[</span><span class="s1">'categories_as_tabs'</span><span class="p">]</span> <span class="k">for</span> <span class="n">cat</span> <span class="ow">in</span> <span class="n">engine</span><span class="o">.</span><span class="n">categories</span><span class="p">):</span> + <span class="n">engine</span><span class="o">.</span><span class="n">categories</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">DEFAULT_CATEGORY</span><span class="p">)</span> + + <span class="k">return</span> <span class="n">engine</span></div> + + + +<span class="k">def</span> <span class="nf">set_loggers</span><span class="p">(</span><span class="n">engine</span><span class="p">,</span> <span class="n">engine_name</span><span class="p">):</span> + <span class="c1"># set the logger for engine</span> + <span class="n">engine</span><span class="o">.</span><span class="n">logger</span> <span class="o">=</span> <span class="n">logger</span><span class="o">.</span><span class="n">getChild</span><span class="p">(</span><span class="n">engine_name</span><span class="p">)</span> + <span class="c1"># the engine may have load some other engines</span> + <span class="c1"># may sure the logger is initialized</span> + <span class="c1"># use sys.modules.copy() to avoid "RuntimeError: dictionary changed size during iteration"</span> + <span class="c1"># see https://github.com/python/cpython/issues/89516</span> + <span class="c1"># and https://docs.python.org/3.10/library/sys.html#sys.modules</span> + <span class="n">modules</span> <span class="o">=</span> <span class="n">sys</span><span class="o">.</span><span class="n">modules</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span> + <span class="k">for</span> <span class="n">module_name</span><span class="p">,</span> <span class="n">module</span> <span class="ow">in</span> <span class="n">modules</span><span class="o">.</span><span class="n">items</span><span class="p">():</span> + <span class="k">if</span> <span class="p">(</span> + <span class="n">module_name</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s2">"searx.engines"</span><span class="p">)</span> + <span class="ow">and</span> <span class="n">module_name</span> <span class="o">!=</span> <span class="s2">"searx.engines.__init__"</span> + <span class="ow">and</span> <span class="ow">not</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">module</span><span class="p">,</span> <span class="s2">"logger"</span><span class="p">)</span> + <span class="p">):</span> + <span class="n">module_engine_name</span> <span class="o">=</span> <span class="n">module_name</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">"."</span><span class="p">)[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> + <span class="n">module</span><span class="o">.</span><span class="n">logger</span> <span class="o">=</span> <span class="n">logger</span><span class="o">.</span><span class="n">getChild</span><span class="p">(</span><span class="n">module_engine_name</span><span class="p">)</span> <span class="c1"># type: ignore</span> + + +<span class="k">def</span> <span class="nf">update_engine_attributes</span><span class="p">(</span><span class="n">engine</span><span class="p">:</span> <span class="n">Engine</span> <span class="o">|</span> <span class="n">types</span><span class="o">.</span><span class="n">ModuleType</span><span class="p">,</span> <span class="n">engine_data</span><span class="p">):</span> + <span class="c1"># set engine attributes from engine_data</span> + <span class="k">for</span> <span class="n">param_name</span><span class="p">,</span> <span class="n">param_value</span> <span class="ow">in</span> <span class="n">engine_data</span><span class="o">.</span><span class="n">items</span><span class="p">():</span> + <span class="k">if</span> <span class="n">param_name</span> <span class="o">==</span> <span class="s1">'categories'</span><span class="p">:</span> + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">param_value</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span> + <span class="n">param_value</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="nb">str</span><span class="o">.</span><span class="n">strip</span><span class="p">,</span> <span class="n">param_value</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">','</span><span class="p">)))</span> + <span class="n">engine</span><span class="o">.</span><span class="n">categories</span> <span class="o">=</span> <span class="n">param_value</span> <span class="c1"># type: ignore</span> + <span class="k">elif</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">engine</span><span class="p">,</span> <span class="s1">'about'</span><span class="p">)</span> <span class="ow">and</span> <span class="n">param_name</span> <span class="o">==</span> <span class="s1">'about'</span><span class="p">:</span> + <span class="n">engine</span><span class="o">.</span><span class="n">about</span> <span class="o">=</span> <span class="p">{</span><span class="o">**</span><span class="n">engine</span><span class="o">.</span><span class="n">about</span><span class="p">,</span> <span class="o">**</span><span class="n">engine_data</span><span class="p">[</span><span class="s1">'about'</span><span class="p">]}</span> <span class="c1"># type: ignore</span> + <span class="k">else</span><span class="p">:</span> + <span class="nb">setattr</span><span class="p">(</span><span class="n">engine</span><span class="p">,</span> <span class="n">param_name</span><span class="p">,</span> <span class="n">param_value</span><span class="p">)</span> + + <span class="c1"># set default attributes</span> + <span class="k">for</span> <span class="n">arg_name</span><span class="p">,</span> <span class="n">arg_value</span> <span class="ow">in</span> <span class="n">ENGINE_DEFAULT_ARGS</span><span class="o">.</span><span class="n">items</span><span class="p">():</span> + <span class="k">if</span> <span class="ow">not</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">engine</span><span class="p">,</span> <span class="n">arg_name</span><span class="p">):</span> + <span class="nb">setattr</span><span class="p">(</span><span class="n">engine</span><span class="p">,</span> <span class="n">arg_name</span><span class="p">,</span> <span class="n">copy</span><span class="o">.</span><span class="n">deepcopy</span><span class="p">(</span><span class="n">arg_value</span><span class="p">))</span> + + +<span class="k">def</span> <span class="nf">update_attributes_for_tor</span><span class="p">(</span><span class="n">engine</span><span class="p">:</span> <span class="n">Engine</span> <span class="o">|</span> <span class="n">types</span><span class="o">.</span><span class="n">ModuleType</span><span class="p">):</span> + <span class="k">if</span> <span class="n">using_tor_proxy</span><span class="p">(</span><span class="n">engine</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">engine</span><span class="p">,</span> <span class="s1">'onion_url'</span><span class="p">):</span> + <span class="n">engine</span><span class="o">.</span><span class="n">search_url</span> <span class="o">=</span> <span class="n">engine</span><span class="o">.</span><span class="n">onion_url</span> <span class="o">+</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">engine</span><span class="p">,</span> <span class="s1">'search_path'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span> <span class="c1"># type: ignore</span> + <span class="n">engine</span><span class="o">.</span><span class="n">timeout</span> <span class="o">+=</span> <span class="n">settings</span><span class="p">[</span><span class="s1">'outgoing'</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'extra_proxy_timeout'</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span> <span class="c1"># type: ignore</span> + + +<div class="viewcode-block" id="is_missing_required_attributes"> +<a class="viewcode-back" href="../../dev/engines/engines.html#searx.engines.is_missing_required_attributes">[docs]</a> +<span class="k">def</span> <span class="nf">is_missing_required_attributes</span><span class="p">(</span><span class="n">engine</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""An attribute is required when its name doesn't start with ``_`` (underline).</span> +<span class="sd"> Required attributes must not be ``None``.</span> + +<span class="sd"> """</span> + <span class="n">missing</span> <span class="o">=</span> <span class="kc">False</span> + <span class="k">for</span> <span class="n">engine_attr</span> <span class="ow">in</span> <span class="nb">dir</span><span class="p">(</span><span class="n">engine</span><span class="p">):</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">engine_attr</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'_'</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">engine</span><span class="p">,</span> <span class="n">engine_attr</span><span class="p">)</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s1">'Missing engine config attribute: "</span><span class="si">{0}</span><span class="s1">.</span><span class="si">{1}</span><span class="s1">"'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">engine</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">engine_attr</span><span class="p">))</span> + <span class="n">missing</span> <span class="o">=</span> <span class="kc">True</span> + <span class="k">return</span> <span class="n">missing</span></div> + + + +<div class="viewcode-block" id="using_tor_proxy"> +<a class="viewcode-back" href="../../dev/engines/engines.html#searx.engines.using_tor_proxy">[docs]</a> +<span class="k">def</span> <span class="nf">using_tor_proxy</span><span class="p">(</span><span class="n">engine</span><span class="p">:</span> <span class="n">Engine</span> <span class="o">|</span> <span class="n">types</span><span class="o">.</span><span class="n">ModuleType</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Return True if the engine configuration declares to use Tor."""</span> + <span class="k">return</span> <span class="n">settings</span><span class="p">[</span><span class="s1">'outgoing'</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'using_tor_proxy'</span><span class="p">)</span> <span class="ow">or</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">engine</span><span class="p">,</span> <span class="s1">'using_tor_proxy'</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span></div> + + + +<span class="k">def</span> <span class="nf">is_engine_active</span><span class="p">(</span><span class="n">engine</span><span class="p">:</span> <span class="n">Engine</span> <span class="o">|</span> <span class="n">types</span><span class="o">.</span><span class="n">ModuleType</span><span class="p">):</span> + <span class="c1"># check if engine is inactive</span> + <span class="k">if</span> <span class="n">engine</span><span class="o">.</span><span class="n">inactive</span> <span class="ow">is</span> <span class="kc">True</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">False</span> + + <span class="c1"># exclude onion engines if not using tor</span> + <span class="k">if</span> <span class="s1">'onions'</span> <span class="ow">in</span> <span class="n">engine</span><span class="o">.</span><span class="n">categories</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">using_tor_proxy</span><span class="p">(</span><span class="n">engine</span><span class="p">):</span> + <span class="k">return</span> <span class="kc">False</span> + + <span class="k">return</span> <span class="kc">True</span> + + +<span class="k">def</span> <span class="nf">register_engine</span><span class="p">(</span><span class="n">engine</span><span class="p">:</span> <span class="n">Engine</span> <span class="o">|</span> <span class="n">types</span><span class="o">.</span><span class="n">ModuleType</span><span class="p">):</span> + <span class="k">if</span> <span class="n">engine</span><span class="o">.</span><span class="n">name</span> <span class="ow">in</span> <span class="n">engines</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s1">'Engine config error: ambiguous name: </span><span class="si">{0}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">engine</span><span class="o">.</span><span class="n">name</span><span class="p">))</span> + <span class="n">sys</span><span class="o">.</span><span class="n">exit</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span> + <span class="n">engines</span><span class="p">[</span><span class="n">engine</span><span class="o">.</span><span class="n">name</span><span class="p">]</span> <span class="o">=</span> <span class="n">engine</span> + + <span class="k">if</span> <span class="n">engine</span><span class="o">.</span><span class="n">shortcut</span> <span class="ow">in</span> <span class="n">engine_shortcuts</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s1">'Engine config error: ambiguous shortcut: </span><span class="si">{0}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">engine</span><span class="o">.</span><span class="n">shortcut</span><span class="p">))</span> + <span class="n">sys</span><span class="o">.</span><span class="n">exit</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span> + <span class="n">engine_shortcuts</span><span class="p">[</span><span class="n">engine</span><span class="o">.</span><span class="n">shortcut</span><span class="p">]</span> <span class="o">=</span> <span class="n">engine</span><span class="o">.</span><span class="n">name</span> + + <span class="k">for</span> <span class="n">category_name</span> <span class="ow">in</span> <span class="n">engine</span><span class="o">.</span><span class="n">categories</span><span class="p">:</span> + <span class="n">categories</span><span class="o">.</span><span class="n">setdefault</span><span class="p">(</span><span class="n">category_name</span><span class="p">,</span> <span class="p">[])</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">engine</span><span class="p">)</span> + + +<div class="viewcode-block" id="load_engines"> +<a class="viewcode-back" href="../../dev/engines/engines.html#searx.engines.load_engines">[docs]</a> +<span class="k">def</span> <span class="nf">load_engines</span><span class="p">(</span><span class="n">engine_list</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""usage: ``engine_list = settings['engines']``"""</span> + <span class="n">engines</span><span class="o">.</span><span class="n">clear</span><span class="p">()</span> + <span class="n">engine_shortcuts</span><span class="o">.</span><span class="n">clear</span><span class="p">()</span> + <span class="n">categories</span><span class="o">.</span><span class="n">clear</span><span class="p">()</span> + <span class="n">categories</span><span class="p">[</span><span class="s1">'general'</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span> + <span class="k">for</span> <span class="n">engine_data</span> <span class="ow">in</span> <span class="n">engine_list</span><span class="p">:</span> + <span class="n">engine</span> <span class="o">=</span> <span class="n">load_engine</span><span class="p">(</span><span class="n">engine_data</span><span class="p">)</span> + <span class="k">if</span> <span class="n">engine</span><span class="p">:</span> + <span class="n">register_engine</span><span class="p">(</span><span class="n">engine</span><span class="p">)</span> + <span class="k">return</span> <span class="n">engines</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../index.html"> + <img class="logo" src="../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../index.html">Overview</a> + <ul> + <li><a href="../index.html">Module code</a> + + + </ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/annas_archive.html b/_modules/searx/engines/annas_archive.html new file mode 100644 index 000000000..6ed8b7c84 --- /dev/null +++ b/_modules/searx/engines/annas_archive.html @@ -0,0 +1,316 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.annas_archive — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.annas_archive</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.annas_archive</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""`Anna's Archive`_ is a free non-profit online shadow library metasearch</span> +<span class="sd">engine providing access to a variety of book resources (also via IPFS), created</span> +<span class="sd">by a team of anonymous archivists (AnnaArchivist_).</span> + +<span class="sd">.. _Anna's Archive: https://annas-archive.org/</span> +<span class="sd">.. _AnnaArchivist: https://annas-software.org/AnnaArchivist/annas-archive</span> + +<span class="sd">Configuration</span> +<span class="sd">=============</span> + +<span class="sd">The engine has the following additional settings:</span> + +<span class="sd">- :py:obj:`aa_content`</span> +<span class="sd">- :py:obj:`aa_ext`</span> +<span class="sd">- :py:obj:`aa_sort`</span> + +<span class="sd">With this options a SearXNG maintainer is able to configure **additional**</span> +<span class="sd">engines for specific searches in Anna's Archive. For example a engine to search</span> +<span class="sd">for *newest* articles and journals (PDF) / by shortcut ``!aaa <search-term>``.</span> + +<span class="sd">.. code:: yaml</span> + +<span class="sd"> - name: annas articles</span> +<span class="sd"> engine: annas_archive</span> +<span class="sd"> shortcut: aaa</span> +<span class="sd"> aa_content: 'magazine'</span> +<span class="sd"> aa_ext: 'pdf'</span> +<span class="sd"> aa_sort: 'newest'</span> + +<span class="sd">Implementations</span> +<span class="sd">===============</span> + +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">List</span><span class="p">,</span> <span class="n">Dict</span><span class="p">,</span> <span class="n">Any</span><span class="p">,</span> <span class="n">Optional</span> +<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span> +<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span> + +<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">extract_text</span><span class="p">,</span> <span class="n">eval_xpath</span><span class="p">,</span> <span class="n">eval_xpath_getindex</span><span class="p">,</span> <span class="n">eval_xpath_list</span> +<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span> +<span class="kn">from</span> <span class="nn">searx.data</span> <span class="kn">import</span> <span class="n">ENGINE_TRAITS</span> + +<span class="c1"># about</span> +<span class="n">about</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"website"</span><span class="p">:</span> <span class="s2">"https://annas-archive.org/"</span><span class="p">,</span> + <span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s2">"Q115288326"</span><span class="p">,</span> + <span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> + <span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"results"</span><span class="p">:</span> <span class="s2">"HTML"</span><span class="p">,</span> +<span class="p">}</span> + +<span class="c1"># engine dependent config</span> +<span class="n">categories</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span><span class="s2">"files"</span><span class="p">]</span> +<span class="n">paging</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span> + +<span class="c1"># search-url</span> +<span class="n">base_url</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"https://annas-archive.org"</span> +<span class="n">aa_content</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">""</span> +<span class="sd">"""Anan's search form field **Content** / possible values::</span> + +<span class="sd"> book_fiction, book_unknown, book_nonfiction,</span> +<span class="sd"> book_comic, magazine, standards_document</span> + +<span class="sd">To not filter use an empty string (default).</span> +<span class="sd">"""</span> +<span class="n">aa_sort</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">''</span> +<span class="sd">"""Sort Anna's results, possible values::</span> + +<span class="sd"> newest, oldest, largest, smallest</span> + +<span class="sd">To sort by *most relevant* use an empty string (default)."""</span> + +<span class="n">aa_ext</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">''</span> +<span class="sd">"""Filter Anna's results by a file ending. Common filters for example are</span> +<span class="sd">``pdf`` and ``epub``.</span> + +<span class="sd">.. note::</span> + +<span class="sd"> Anna's Archive is a beta release: Filter results by file extension does not</span> +<span class="sd"> really work on Anna's Archive.</span> + +<span class="sd">"""</span> + + +<div class="viewcode-block" id="init"> +<a class="viewcode-back" href="../../../dev/engines/online/annas_archive.html#searx.engines.annas_archive.init">[docs]</a> +<span class="k">def</span> <span class="nf">init</span><span class="p">(</span><span class="n">engine_settings</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span> <span class="c1"># pylint: disable=unused-argument</span> +<span class="w"> </span><span class="sd">"""Check of engine's settings."""</span> + <span class="n">traits</span> <span class="o">=</span> <span class="n">EngineTraits</span><span class="p">(</span><span class="o">**</span><span class="n">ENGINE_TRAITS</span><span class="p">[</span><span class="s1">'annas archive'</span><span class="p">])</span> + + <span class="k">if</span> <span class="n">aa_content</span> <span class="ow">and</span> <span class="n">aa_content</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'content'</span><span class="p">]:</span> + <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'invalid setting content: </span><span class="si">{</span><span class="n">aa_content</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span> + + <span class="k">if</span> <span class="n">aa_sort</span> <span class="ow">and</span> <span class="n">aa_sort</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'sort'</span><span class="p">]:</span> + <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'invalid setting sort: </span><span class="si">{</span><span class="n">aa_sort</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span> + + <span class="k">if</span> <span class="n">aa_ext</span> <span class="ow">and</span> <span class="n">aa_ext</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'ext'</span><span class="p">]:</span> + <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'invalid setting ext: </span><span class="si">{</span><span class="n">aa_ext</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span></div> + + + +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span> + <span class="n">lang</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s2">"language"</span><span class="p">],</span> <span class="n">traits</span><span class="o">.</span><span class="n">all_locale</span><span class="p">)</span> <span class="c1"># type: ignore</span> + <span class="n">args</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'lang'</span><span class="p">:</span> <span class="n">lang</span><span class="p">,</span> + <span class="s1">'content'</span><span class="p">:</span> <span class="n">aa_content</span><span class="p">,</span> + <span class="s1">'ext'</span><span class="p">:</span> <span class="n">aa_ext</span><span class="p">,</span> + <span class="s1">'sort'</span><span class="p">:</span> <span class="n">aa_sort</span><span class="p">,</span> + <span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span> + <span class="s1">'page'</span><span class="p">:</span> <span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">],</span> + <span class="p">}</span> + <span class="c1"># filter out None and empty values</span> + <span class="n">filtered_args</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">((</span><span class="n">k</span><span class="p">,</span> <span class="n">v</span><span class="p">)</span> <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">args</span><span class="o">.</span><span class="n">items</span><span class="p">()</span> <span class="k">if</span> <span class="n">v</span><span class="p">)</span> + <span class="n">params</span><span class="p">[</span><span class="s2">"url"</span><span class="p">]</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">base_url</span><span class="si">}</span><span class="s2">/search?</span><span class="si">{</span><span class="n">urlencode</span><span class="p">(</span><span class="n">filtered_args</span><span class="p">)</span><span class="si">}</span><span class="s2">"</span> + <span class="k">return</span> <span class="n">params</span> + + +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]:</span> + <span class="n">results</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> + + <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//main//div[contains(@class, "h-[125]")]/a'</span><span class="p">):</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">_get_result</span><span class="p">(</span><span class="n">item</span><span class="p">))</span> + + <span class="c1"># The rendering of the WEB page is very strange; except the first position</span> + <span class="c1"># all other positions of Anna's result page are enclosed in SGML comments.</span> + <span class="c1"># These comments are *uncommented* by some JS code, see query of class</span> + <span class="c1"># '.js-scroll-hidden' in Anna's HTML template:</span> + <span class="c1"># https://annas-software.org/AnnaArchivist/annas-archive/-/blob/main/allthethings/templates/macros/md5_list.html</span> + + <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//main//div[contains(@class, "js-scroll-hidden")]'</span><span class="p">):</span> + <span class="n">item</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">item</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'./comment()'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">_get_result</span><span class="p">(</span><span class="n">item</span><span class="p">))</span> + + <span class="k">return</span> <span class="n">results</span> + + +<span class="k">def</span> <span class="nf">_get_result</span><span class="p">(</span><span class="n">item</span><span class="p">):</span> + <span class="k">return</span> <span class="p">{</span> + <span class="s1">'template'</span><span class="p">:</span> <span class="s1">'paper.html'</span><span class="p">,</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="n">base_url</span> <span class="o">+</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'./@href'</span><span class="p">,</span> <span class="mi">0</span><span class="p">)),</span> + <span class="s1">'title'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'.//h3/text()[1]'</span><span class="p">)),</span> + <span class="s1">'publisher'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'.//div[contains(@class, "text-sm")]'</span><span class="p">)),</span> + <span class="s1">'authors'</span><span class="p">:</span> <span class="p">[</span><span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'.//div[contains(@class, "italic")]'</span><span class="p">))],</span> + <span class="s1">'content'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'.//div[contains(@class, "text-xs")]'</span><span class="p">)),</span> + <span class="s1">'thumbnail'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'.//img/@src'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">),</span> <span class="n">allow_none</span><span class="o">=</span><span class="kc">True</span><span class="p">),</span> + <span class="p">}</span> + + +<div class="viewcode-block" id="fetch_traits"> +<a class="viewcode-back" href="../../../dev/engines/online/annas_archive.html#searx.engines.annas_archive.fetch_traits">[docs]</a> +<span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Fetch languages and other search arguments from Anna's search form."""</span> + <span class="c1"># pylint: disable=import-outside-toplevel</span> + + <span class="kn">import</span> <span class="nn">babel</span> + <span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">get</span> <span class="c1"># see https://github.com/searxng/searxng/issues/762</span> + <span class="kn">from</span> <span class="nn">searx.locales</span> <span class="kn">import</span> <span class="n">language_tag</span> + + <span class="n">engine_traits</span><span class="o">.</span><span class="n">all_locale</span> <span class="o">=</span> <span class="s1">''</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'content'</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'ext'</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'sort'</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="n">base_url</span> <span class="o">+</span> <span class="s1">'/search'</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> <span class="c1"># type: ignore</span> + <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s2">"Response from Anna's search page is not OK."</span><span class="p">)</span> + <span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> <span class="c1"># type: ignore</span> + + <span class="c1"># supported language codes</span> + + <span class="n">lang_map</span> <span class="o">=</span> <span class="p">{}</span> + <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s2">"//form//input[@name='lang']"</span><span class="p">):</span> + <span class="n">eng_lang</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"value"</span><span class="p">)</span> + <span class="k">if</span> <span class="n">eng_lang</span> <span class="ow">in</span> <span class="p">(</span><span class="s1">''</span><span class="p">,</span> <span class="s1">'_empty'</span><span class="p">,</span> <span class="s1">'nl-BE'</span><span class="p">,</span> <span class="s1">'und'</span><span class="p">)</span> <span class="ow">or</span> <span class="n">eng_lang</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'anti__'</span><span class="p">):</span> + <span class="k">continue</span> + <span class="k">try</span><span class="p">:</span> + <span class="n">locale</span> <span class="o">=</span> <span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">lang_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">eng_lang</span><span class="p">,</span> <span class="n">eng_lang</span><span class="p">),</span> <span class="n">sep</span><span class="o">=</span><span class="s1">'-'</span><span class="p">)</span> + <span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span> + <span class="c1"># silently ignore unknown languages</span> + <span class="c1"># print("ERROR: %s -> %s is unknown by babel" % (x.get("data-name"), eng_lang))</span> + <span class="k">continue</span> + <span class="n">sxng_lang</span> <span class="o">=</span> <span class="n">language_tag</span><span class="p">(</span><span class="n">locale</span><span class="p">)</span> + <span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_lang</span><span class="p">)</span> + <span class="k">if</span> <span class="n">conflict</span><span class="p">:</span> + <span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">eng_lang</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_lang</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">eng_lang</span><span class="p">))</span> + <span class="k">continue</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="n">sxng_lang</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_lang</span> + + <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s2">"//form//input[@name='content']"</span><span class="p">):</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">x</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"value"</span><span class="p">)</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s2">"anti__"</span><span class="p">):</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'content'</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"value"</span><span class="p">))</span> + + <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s2">"//form//input[@name='ext']"</span><span class="p">):</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">x</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"value"</span><span class="p">)</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s2">"anti__"</span><span class="p">):</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'ext'</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"value"</span><span class="p">))</span> + + <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s2">"//form//select[@name='sort']//option"</span><span class="p">):</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'sort'</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"value"</span><span class="p">))</span> + + <span class="c1"># for better diff; sort the persistence of these traits</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'content'</span><span class="p">]</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'ext'</span><span class="p">]</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'sort'</span><span class="p">]</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/archlinux.html b/_modules/searx/engines/archlinux.html new file mode 100644 index 000000000..2d3abc075 --- /dev/null +++ b/_modules/searx/engines/archlinux.html @@ -0,0 +1,262 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.archlinux — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.archlinux</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.archlinux</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""</span> +<span class="sd">Arch Linux Wiki</span> +<span class="sd">~~~~~~~~~~~~~~~</span> + +<span class="sd">This implementation does not use a official API: Mediawiki provides API, but</span> +<span class="sd">Arch Wiki blocks access to it.</span> + +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span> +<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span><span class="p">,</span> <span class="n">urljoin</span><span class="p">,</span> <span class="n">urlparse</span> +<span class="kn">import</span> <span class="nn">lxml</span> +<span class="kn">import</span> <span class="nn">babel</span> + +<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">extract_text</span><span class="p">,</span> <span class="n">eval_xpath_list</span><span class="p">,</span> <span class="n">eval_xpath_getindex</span> +<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span> +<span class="kn">from</span> <span class="nn">searx.locales</span> <span class="kn">import</span> <span class="n">language_tag</span> + +<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> + <span class="kn">import</span> <span class="nn">logging</span> + + <span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span> + +<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span> + + +<span class="n">about</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://wiki.archlinux.org/'</span><span class="p">,</span> + <span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q101445877'</span><span class="p">,</span> + <span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> + <span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"results"</span><span class="p">:</span> <span class="s1">'HTML'</span><span class="p">,</span> +<span class="p">}</span> + +<span class="c1"># engine dependent config</span> +<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'it'</span><span class="p">,</span> <span class="s1">'software wikis'</span><span class="p">]</span> +<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">main_wiki</span> <span class="o">=</span> <span class="s1">'wiki.archlinux.org'</span> + + +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> + + <span class="n">sxng_lang</span> <span class="o">=</span> <span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'-'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span> + <span class="n">netloc</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'wiki_netloc'</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_lang</span><span class="p">,</span> <span class="n">main_wiki</span><span class="p">)</span> <span class="c1"># type: ignore</span> + <span class="n">title</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'title'</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_lang</span><span class="p">,</span> <span class="s1">'Special:Search'</span><span class="p">)</span> <span class="c1"># type: ignore</span> + <span class="n">base_url</span> <span class="o">=</span> <span class="s1">'https://'</span> <span class="o">+</span> <span class="n">netloc</span> <span class="o">+</span> <span class="s1">'/index.php?'</span> + <span class="n">offset</span> <span class="o">=</span> <span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="mi">20</span> + + <span class="k">if</span> <span class="n">netloc</span> <span class="o">==</span> <span class="n">main_wiki</span><span class="p">:</span> + <span class="n">eng_lang</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">sxng_lang</span><span class="p">,</span> <span class="s1">'English'</span><span class="p">)</span> <span class="c1"># type: ignore</span> + <span class="n">query</span> <span class="o">+=</span> <span class="s1">' ('</span> <span class="o">+</span> <span class="n">eng_lang</span> <span class="o">+</span> <span class="s1">')'</span> + <span class="k">elif</span> <span class="n">netloc</span> <span class="o">==</span> <span class="s1">'wiki.archlinuxcn.org'</span><span class="p">:</span> + <span class="n">base_url</span> <span class="o">=</span> <span class="s1">'https://'</span> <span class="o">+</span> <span class="n">netloc</span> <span class="o">+</span> <span class="s1">'/wzh/index.php?'</span> + + <span class="n">args</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'search'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span> + <span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span> + <span class="s1">'limit'</span><span class="p">:</span> <span class="mi">20</span><span class="p">,</span> + <span class="s1">'offset'</span><span class="p">:</span> <span class="n">offset</span><span class="p">,</span> + <span class="s1">'profile'</span><span class="p">:</span> <span class="s1">'default'</span><span class="p">,</span> + <span class="p">}</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">base_url</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">(</span><span class="n">args</span><span class="p">)</span> + <span class="k">return</span> <span class="n">params</span> + + +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> + + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">dom</span> <span class="o">=</span> <span class="n">lxml</span><span class="o">.</span><span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> <span class="c1"># type: ignore</span> + + <span class="c1"># get the base URL for the language in which request was made</span> + <span class="n">sxng_lang</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">search_params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'-'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span> + <span class="n">netloc</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'wiki_netloc'</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_lang</span><span class="p">,</span> <span class="n">main_wiki</span><span class="p">)</span> <span class="c1"># type: ignore</span> + <span class="n">base_url</span> <span class="o">=</span> <span class="s1">'https://'</span> <span class="o">+</span> <span class="n">netloc</span> <span class="o">+</span> <span class="s1">'/index.php?'</span> + + <span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//ul[@class="mw-search-results"]/li'</span><span class="p">):</span> + <span class="n">link</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//div[@class="mw-search-result-heading"]/a'</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span> + <span class="n">content</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'.//div[@class="searchresult"]'</span><span class="p">))</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="n">urljoin</span><span class="p">(</span><span class="n">base_url</span><span class="p">,</span> <span class="n">link</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'href'</span><span class="p">)),</span> <span class="c1"># type: ignore</span> + <span class="s1">'title'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">link</span><span class="p">),</span> + <span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">,</span> + <span class="p">}</span> + <span class="p">)</span> + + <span class="k">return</span> <span class="n">results</span> + + +<div class="viewcode-block" id="fetch_traits"> +<a class="viewcode-back" href="../../../dev/engines/online/archlinux.html#searx.engines.archlinux.fetch_traits">[docs]</a> +<span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Fetch languages from Archlinux-Wiki. The location of the Wiki address of a</span> +<span class="sd"> language is mapped in a :py:obj:`custom field</span> +<span class="sd"> <searx.enginelib.traits.EngineTraits.custom>` (``wiki_netloc``). Depending</span> +<span class="sd"> on the location, the ``title`` argument in the request is translated.</span> + +<span class="sd"> .. code:: python</span> + +<span class="sd"> "custom": {</span> +<span class="sd"> "wiki_netloc": {</span> +<span class="sd"> "de": "wiki.archlinux.de",</span> +<span class="sd"> # ...</span> +<span class="sd"> "zh": "wiki.archlinuxcn.org"</span> +<span class="sd"> }</span> +<span class="sd"> "title": {</span> +<span class="sd"> "de": "Spezial:Suche",</span> +<span class="sd"> # ...</span> +<span class="sd"> "zh": "Special:\u641c\u7d22"</span> +<span class="sd"> },</span> +<span class="sd"> },</span> + +<span class="sd"> """</span> + <span class="c1"># pylint: disable=import-outside-toplevel</span> + <span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">get</span> <span class="c1"># see https://github.com/searxng/searxng/issues/762</span> + + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'wiki_netloc'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'title'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> + + <span class="n">title_map</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'de'</span><span class="p">:</span> <span class="s1">'Spezial:Suche'</span><span class="p">,</span> + <span class="s1">'fa'</span><span class="p">:</span> <span class="s1">'ویژه:جستجو'</span><span class="p">,</span> + <span class="s1">'ja'</span><span class="p">:</span> <span class="s1">'特別:検索'</span><span class="p">,</span> + <span class="s1">'zh'</span><span class="p">:</span> <span class="s1">'Special:搜索'</span><span class="p">,</span> + <span class="p">}</span> + + <span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="s1">'https://wiki.archlinux.org/'</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> <span class="c1"># type: ignore</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: response from wiki.archlinux.org is not OK."</span><span class="p">)</span> + + <span class="n">dom</span> <span class="o">=</span> <span class="n">lxml</span><span class="o">.</span><span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> <span class="c1"># type: ignore</span> + <span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s2">"//a[@class='interlanguage-link-target']"</span><span class="p">):</span> + + <span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">language_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">a</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'lang'</span><span class="p">),</span> <span class="n">sep</span><span class="o">=</span><span class="s1">'-'</span><span class="p">))</span> + <span class="c1"># zh_Hans --> zh</span> + <span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">sxng_tag</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'_'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span> + + <span class="n">netloc</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">a</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'href'</span><span class="p">))</span><span class="o">.</span><span class="n">netloc</span> + <span class="k">if</span> <span class="n">netloc</span> <span class="o">!=</span> <span class="s1">'wiki.archlinux.org'</span><span class="p">:</span> + <span class="n">title</span> <span class="o">=</span> <span class="n">title_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_tag</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">title</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: title tag from </span><span class="si">%s</span><span class="s2"> (</span><span class="si">%s</span><span class="s2">) is unknown"</span> <span class="o">%</span> <span class="p">(</span><span class="n">netloc</span><span class="p">,</span> <span class="n">sxng_tag</span><span class="p">))</span> + <span class="k">continue</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'wiki_netloc'</span><span class="p">][</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">netloc</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'title'</span><span class="p">][</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">title</span> <span class="c1"># type: ignore</span> + + <span class="n">eng_tag</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="s2">".//span"</span><span class="p">))</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_tag</span> <span class="c1"># type: ignore</span> + + <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="s1">'en'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'English'</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/bing.html b/_modules/searx/engines/bing.html new file mode 100644 index 000000000..6066605a8 --- /dev/null +++ b/_modules/searx/engines/bing.html @@ -0,0 +1,391 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.bing — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.bing</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.bing</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""This is the implementation of the Bing-WEB engine. Some of this</span> +<span class="sd">implementations are shared by other engines:</span> + +<span class="sd">- :ref:`bing images engine`</span> +<span class="sd">- :ref:`bing news engine`</span> +<span class="sd">- :ref:`bing videos engine`</span> + +<span class="sd">On the `preference page`_ Bing offers a lot of languages an regions (see section</span> +<span class="sd">LANGUAGE and COUNTRY/REGION). The Language is the language of the UI, we need</span> +<span class="sd">in SearXNG to get the translations of data such as *"published last week"*.</span> + +<span class="sd">There is a description of the official search-APIs_, unfortunately this is not</span> +<span class="sd">the API we can use or that bing itself would use. You can look up some things</span> +<span class="sd">in the API to get a better picture of bing, but the value specifications like</span> +<span class="sd">the market codes are usually outdated or at least no longer used by bing itself.</span> + +<span class="sd">The market codes have been harmonized and are identical for web, video and</span> +<span class="sd">images. The news area has also been harmonized with the other categories. Only</span> +<span class="sd">political adjustments still seem to be made -- for example, there is no news</span> +<span class="sd">category for the Chinese market.</span> + +<span class="sd">.. _preference page: https://www.bing.com/account/general</span> +<span class="sd">.. _search-APIs: https://learn.microsoft.com/en-us/bing/search-apis/</span> + +<span class="sd">"""</span> +<span class="c1"># pylint: disable=too-many-branches, invalid-name</span> + +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span> +<span class="kn">import</span> <span class="nn">base64</span> +<span class="kn">import</span> <span class="nn">re</span> +<span class="kn">import</span> <span class="nn">time</span> +<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">parse_qs</span><span class="p">,</span> <span class="n">urlencode</span><span class="p">,</span> <span class="n">urlparse</span> +<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span> +<span class="kn">import</span> <span class="nn">babel</span> +<span class="kn">import</span> <span class="nn">babel.languages</span> + +<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">eval_xpath</span><span class="p">,</span> <span class="n">extract_text</span><span class="p">,</span> <span class="n">eval_xpath_list</span><span class="p">,</span> <span class="n">eval_xpath_getindex</span> +<span class="kn">from</span> <span class="nn">searx.locales</span> <span class="kn">import</span> <span class="n">language_tag</span><span class="p">,</span> <span class="n">region_tag</span> +<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span> + +<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> + <span class="kn">import</span> <span class="nn">logging</span> + + <span class="n">logger</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">()</span> + +<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span> + +<span class="n">about</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://www.bing.com'</span><span class="p">,</span> + <span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q182496'</span><span class="p">,</span> + <span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://www.microsoft.com/en-us/bing/apis/bing-web-search-api'</span><span class="p">,</span> + <span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"results"</span><span class="p">:</span> <span class="s1">'HTML'</span><span class="p">,</span> +<span class="p">}</span> + +<span class="c1"># engine dependent config</span> +<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'general'</span><span class="p">,</span> <span class="s1">'web'</span><span class="p">]</span> +<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">max_page</span> <span class="o">=</span> <span class="mi">200</span> +<span class="sd">"""200 pages maximum (``&first=1991``)"""</span> + +<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">True</span> +<span class="sd">"""Bing results are always SFW. To get NSFW links from bing some age</span> +<span class="sd">verification by a cookie is needed / thats not possible in SearXNG.</span> +<span class="sd">"""</span> + +<span class="n">base_url</span> <span class="o">=</span> <span class="s1">'https://www.bing.com/search'</span> +<span class="sd">"""Bing (Web) search URL"""</span> + + +<span class="k">def</span> <span class="nf">_page_offset</span><span class="p">(</span><span class="n">pageno</span><span class="p">):</span> + <span class="k">return</span> <span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">pageno</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="mi">10</span> <span class="o">+</span> <span class="mi">1</span> + + +<span class="k">def</span> <span class="nf">set_bing_cookies</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">engine_language</span><span class="p">,</span> <span class="n">engine_region</span><span class="p">):</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">][</span><span class="s1">'_EDGE_CD'</span><span class="p">]</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">'m=</span><span class="si">{</span><span class="n">engine_region</span><span class="si">}</span><span class="s1">&u=</span><span class="si">{</span><span class="n">engine_language</span><span class="si">}</span><span class="s1">'</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">][</span><span class="s1">'_EDGE_S'</span><span class="p">]</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">'mkt=</span><span class="si">{</span><span class="n">engine_region</span><span class="si">}</span><span class="s1">&ui=</span><span class="si">{</span><span class="n">engine_language</span><span class="si">}</span><span class="s1">'</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"bing cookies: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">])</span> + + +<div class="viewcode-block" id="request"> +<a class="viewcode-back" href="../../../dev/engines/online/bing.html#searx.engines.bing.request">[docs]</a> +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Assemble a Bing-Web request."""</span> + + <span class="n">engine_region</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_region</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="n">traits</span><span class="o">.</span><span class="n">all_locale</span><span class="p">)</span> <span class="c1"># type: ignore</span> + <span class="n">engine_language</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="s1">'en'</span><span class="p">)</span> <span class="c1"># type: ignore</span> + <span class="n">set_bing_cookies</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">engine_language</span><span class="p">,</span> <span class="n">engine_region</span><span class="p">)</span> + + <span class="n">page</span> <span class="o">=</span> <span class="n">params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'pageno'</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span> + <span class="n">query_params</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span> + <span class="c1"># if arg 'pq' is missed, sometimes on page 4 we get results from page 1,</span> + <span class="c1"># don't ask why it is only sometimes / its M$ and they have never been</span> + <span class="c1"># deterministic ;)</span> + <span class="s1">'pq'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span> + <span class="p">}</span> + + <span class="c1"># To get correct page, arg first and this arg FORM is needed, the value PERE</span> + <span class="c1"># is on page 2, on page 3 its PERE1 and on page 4 its PERE2 .. and so forth.</span> + <span class="c1"># The 'first' arg should never send on page 1.</span> + + <span class="k">if</span> <span class="n">page</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span> + <span class="n">query_params</span><span class="p">[</span><span class="s1">'first'</span><span class="p">]</span> <span class="o">=</span> <span class="n">_page_offset</span><span class="p">(</span><span class="n">page</span><span class="p">)</span> <span class="c1"># see also arg FORM</span> + <span class="k">if</span> <span class="n">page</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span> + <span class="n">query_params</span><span class="p">[</span><span class="s1">'FORM'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'PERE'</span> + <span class="k">elif</span> <span class="n">page</span> <span class="o">></span> <span class="mi">2</span><span class="p">:</span> + <span class="n">query_params</span><span class="p">[</span><span class="s1">'FORM'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'PERE</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span><span class="n">page</span> <span class="o">-</span> <span class="mi">2</span><span class="p">)</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">base_url</span><span class="si">}</span><span class="s1">?</span><span class="si">{</span><span class="n">urlencode</span><span class="p">(</span><span class="n">query_params</span><span class="p">)</span><span class="si">}</span><span class="s1">'</span> + + <span class="k">if</span> <span class="n">params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'time_range'</span><span class="p">):</span> + <span class="n">unix_day</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span> <span class="o">/</span> <span class="mi">86400</span><span class="p">)</span> + <span class="n">time_ranges</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'day'</span><span class="p">:</span> <span class="s1">'1'</span><span class="p">,</span> <span class="s1">'week'</span><span class="p">:</span> <span class="s1">'2'</span><span class="p">,</span> <span class="s1">'month'</span><span class="p">:</span> <span class="s1">'3'</span><span class="p">,</span> <span class="s1">'year'</span><span class="p">:</span> <span class="sa">f</span><span class="s1">'5_</span><span class="si">{</span><span class="n">unix_day</span><span class="o">-</span><span class="mi">365</span><span class="si">}</span><span class="s1">_</span><span class="si">{</span><span class="n">unix_day</span><span class="si">}</span><span class="s1">'</span><span class="p">}</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">+=</span> <span class="sa">f</span><span class="s1">'&filters=ex1:"ez</span><span class="si">{</span><span class="n">time_ranges</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s2">"time_range"</span><span class="p">]]</span><span class="si">}</span><span class="s1">"'</span> + + <span class="k">return</span> <span class="n">params</span></div> + + + +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> + <span class="c1"># pylint: disable=too-many-locals</span> + + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">result_len</span> <span class="o">=</span> <span class="mi">0</span> + + <span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> + + <span class="c1"># parse results again if nothing is found yet</span> + + <span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//ol[@id="b_results"]/li[contains(@class, "b_algo")]'</span><span class="p">):</span> + + <span class="n">link</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//h2/a'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> + <span class="k">if</span> <span class="n">link</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">continue</span> + <span class="n">url</span> <span class="o">=</span> <span class="n">link</span><span class="o">.</span><span class="n">attrib</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'href'</span><span class="p">)</span> + <span class="n">title</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">link</span><span class="p">)</span> + + <span class="n">content</span> <span class="o">=</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//p'</span><span class="p">)</span> + <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">content</span><span class="p">:</span> + <span class="c1"># Make sure that the element is free of:</span> + <span class="c1"># <span class="algoSlug_icon" # data-priority="2">Web</span></span> + <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">p</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'.//span[@class="algoSlug_icon"]'</span><span class="p">):</span> + <span class="n">e</span><span class="o">.</span><span class="n">getparent</span><span class="p">()</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">e</span><span class="p">)</span> + <span class="n">content</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">content</span><span class="p">)</span> + + <span class="c1"># get the real URL</span> + <span class="k">if</span> <span class="n">url</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'https://www.bing.com/ck/a?'</span><span class="p">):</span> + <span class="c1"># get the first value of u parameter</span> + <span class="n">url_query</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">url</span><span class="p">)</span><span class="o">.</span><span class="n">query</span> + <span class="n">parsed_url_query</span> <span class="o">=</span> <span class="n">parse_qs</span><span class="p">(</span><span class="n">url_query</span><span class="p">)</span> + <span class="n">param_u</span> <span class="o">=</span> <span class="n">parsed_url_query</span><span class="p">[</span><span class="s2">"u"</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span> + <span class="c1"># remove "a1" in front</span> + <span class="n">encoded_url</span> <span class="o">=</span> <span class="n">param_u</span><span class="p">[</span><span class="mi">2</span><span class="p">:]</span> + <span class="c1"># add padding</span> + <span class="n">encoded_url</span> <span class="o">=</span> <span class="n">encoded_url</span> <span class="o">+</span> <span class="s1">'='</span> <span class="o">*</span> <span class="p">(</span><span class="o">-</span><span class="nb">len</span><span class="p">(</span><span class="n">encoded_url</span><span class="p">)</span> <span class="o">%</span> <span class="mi">4</span><span class="p">)</span> + <span class="c1"># decode base64 encoded URL</span> + <span class="n">url</span> <span class="o">=</span> <span class="n">base64</span><span class="o">.</span><span class="n">urlsafe_b64decode</span><span class="p">(</span><span class="n">encoded_url</span><span class="p">)</span><span class="o">.</span><span class="n">decode</span><span class="p">()</span> + + <span class="c1"># append result</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span> <span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span> <span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">})</span> + + <span class="c1"># get number_of_results</span> + <span class="k">try</span><span class="p">:</span> + <span class="n">result_len_container</span> <span class="o">=</span> <span class="s2">""</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//span[@class="sb_count"]//text()'</span><span class="p">))</span> + <span class="k">if</span> <span class="s2">"-"</span> <span class="ow">in</span> <span class="n">result_len_container</span><span class="p">:</span> + + <span class="c1"># Remove the part "from-to" for paginated request ...</span> + <span class="n">result_len_container</span> <span class="o">=</span> <span class="n">result_len_container</span><span class="p">[</span><span class="n">result_len_container</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s2">"-"</span><span class="p">)</span> <span class="o">*</span> <span class="mi">2</span> <span class="o">+</span> <span class="mi">2</span> <span class="p">:]</span> + + <span class="n">result_len_container</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="s1">'[^0-9]'</span><span class="p">,</span> <span class="s1">''</span><span class="p">,</span> <span class="n">result_len_container</span><span class="p">)</span> + + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">result_len_container</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span> + <span class="n">result_len</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">result_len_container</span><span class="p">)</span> + + <span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> <span class="c1"># pylint: disable=broad-except</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'result error :</span><span class="se">\n</span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span> + + <span class="k">if</span> <span class="n">result_len</span> <span class="ow">and</span> <span class="n">_page_offset</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">search_params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"pageno"</span><span class="p">,</span> <span class="mi">0</span><span class="p">))</span> <span class="o">></span> <span class="n">result_len</span><span class="p">:</span> + <span class="c1"># Avoid reading more results than available.</span> + <span class="c1"># For example, if there is 100 results from some search and we try to get results from 120 to 130,</span> + <span class="c1"># Bing will send back the results from 0 to 10 and no error.</span> + <span class="c1"># If we compare results count with the first parameter of the request we can avoid this "invalid" results.</span> + <span class="k">return</span> <span class="p">[]</span> + + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'number_of_results'</span><span class="p">:</span> <span class="n">result_len</span><span class="p">})</span> + <span class="k">return</span> <span class="n">results</span> + + +<div class="viewcode-block" id="fetch_traits"> +<a class="viewcode-back" href="../../../dev/engines/online/bing.html#searx.engines.bing.fetch_traits">[docs]</a> +<span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Fetch languages and regions from Bing-Web."""</span> + <span class="c1"># pylint: disable=import-outside-toplevel</span> + + <span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">get</span> <span class="c1"># see https://github.com/searxng/searxng/issues/762</span> + <span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">gen_useragent</span> + + <span class="n">headers</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"User-Agent"</span><span class="p">:</span> <span class="n">gen_useragent</span><span class="p">(),</span> + <span class="s2">"Accept"</span><span class="p">:</span> <span class="s2">"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"</span><span class="p">,</span> + <span class="s2">"Accept-Language"</span><span class="p">:</span> <span class="s2">"en-US;q=0.5,en;q=0.3"</span><span class="p">,</span> + <span class="s2">"Accept-Encoding"</span><span class="p">:</span> <span class="s2">"gzip, deflate, br"</span><span class="p">,</span> + <span class="s2">"DNT"</span><span class="p">:</span> <span class="s2">"1"</span><span class="p">,</span> + <span class="s2">"Connection"</span><span class="p">:</span> <span class="s2">"keep-alive"</span><span class="p">,</span> + <span class="s2">"Upgrade-Insecure-Requests"</span><span class="p">:</span> <span class="s2">"1"</span><span class="p">,</span> + <span class="s2">"Sec-GPC"</span><span class="p">:</span> <span class="s2">"1"</span><span class="p">,</span> + <span class="s2">"Cache-Control"</span><span class="p">:</span> <span class="s2">"max-age=0"</span><span class="p">,</span> + <span class="p">}</span> + + <span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="s2">"https://www.bing.com/account/general"</span><span class="p">,</span> <span class="n">headers</span><span class="o">=</span><span class="n">headers</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> <span class="c1"># type: ignore</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: response from bing is not OK."</span><span class="p">)</span> + + <span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> <span class="c1"># type: ignore</span> + + <span class="c1"># languages</span> + + <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="s1">'zh'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'zh-hans'</span> + + <span class="n">map_lang</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'prs'</span><span class="p">:</span> <span class="s1">'fa-AF'</span><span class="p">,</span> <span class="s1">'en'</span><span class="p">:</span> <span class="s1">'en-us'</span><span class="p">}</span> + <span class="n">bing_ui_lang_map</span> <span class="o">=</span> <span class="p">{</span> + <span class="c1"># HINT: this list probably needs to be supplemented</span> + <span class="s1">'en'</span><span class="p">:</span> <span class="s1">'us'</span><span class="p">,</span> <span class="c1"># en --> en-us</span> + <span class="s1">'da'</span><span class="p">:</span> <span class="s1">'dk'</span><span class="p">,</span> <span class="c1"># da --> da-dk</span> + <span class="p">}</span> + + <span class="k">for</span> <span class="n">href</span> <span class="ow">in</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//div[@id="language-section"]//li/a/@href'</span><span class="p">):</span> + <span class="n">eng_lang</span> <span class="o">=</span> <span class="n">parse_qs</span><span class="p">(</span><span class="n">urlparse</span><span class="p">(</span><span class="n">href</span><span class="p">)</span><span class="o">.</span><span class="n">query</span><span class="p">)[</span><span class="s1">'setlang'</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span> + <span class="n">babel_lang</span> <span class="o">=</span> <span class="n">map_lang</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">eng_lang</span><span class="p">,</span> <span class="n">eng_lang</span><span class="p">)</span> + <span class="k">try</span><span class="p">:</span> + <span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">language_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">babel_lang</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'-'</span><span class="p">,</span> <span class="s1">'_'</span><span class="p">)))</span> + <span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: language (</span><span class="si">%s</span><span class="s2">) is unknown by babel"</span> <span class="o">%</span> <span class="p">(</span><span class="n">babel_lang</span><span class="p">))</span> + <span class="k">continue</span> + <span class="c1"># Language (e.g. 'en' or 'de') from https://www.bing.com/account/general</span> + <span class="c1"># is converted by bing to 'en-us' or 'de-de'. But only if there is not</span> + <span class="c1"># already a '-' delemitter in the language. For instance 'pt-PT' --></span> + <span class="c1"># 'pt-pt' and 'pt-br' --> 'pt-br'</span> + <span class="n">bing_ui_lang</span> <span class="o">=</span> <span class="n">eng_lang</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> + <span class="k">if</span> <span class="s1">'-'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">bing_ui_lang</span><span class="p">:</span> + <span class="n">bing_ui_lang</span> <span class="o">=</span> <span class="n">bing_ui_lang</span> <span class="o">+</span> <span class="s1">'-'</span> <span class="o">+</span> <span class="n">bing_ui_lang_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">bing_ui_lang</span><span class="p">,</span> <span class="n">bing_ui_lang</span><span class="p">)</span> + + <span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_tag</span><span class="p">)</span> + <span class="k">if</span> <span class="n">conflict</span><span class="p">:</span> + <span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">bing_ui_lang</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"CONFLICT: babel </span><span class="si">{</span><span class="n">sxng_tag</span><span class="si">}</span><span class="s2"> --> </span><span class="si">{</span><span class="n">conflict</span><span class="si">}</span><span class="s2">, </span><span class="si">{</span><span class="n">bing_ui_lang</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> + <span class="k">continue</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">bing_ui_lang</span> + + <span class="c1"># regions (aka "market codes")</span> + + <span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="p">[</span><span class="s1">'zh-CN'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'zh-cn'</span> + + <span class="n">map_market_codes</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'zh-hk'</span><span class="p">:</span> <span class="s1">'en-hk'</span><span class="p">,</span> <span class="c1"># not sure why, but at M$ this is the market code for Hongkong</span> + <span class="p">}</span> + <span class="k">for</span> <span class="n">href</span> <span class="ow">in</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//div[@id="region-section"]//li/a/@href'</span><span class="p">):</span> + <span class="n">cc_tag</span> <span class="o">=</span> <span class="n">parse_qs</span><span class="p">(</span><span class="n">urlparse</span><span class="p">(</span><span class="n">href</span><span class="p">)</span><span class="o">.</span><span class="n">query</span><span class="p">)[</span><span class="s1">'cc'</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span> + <span class="k">if</span> <span class="n">cc_tag</span> <span class="o">==</span> <span class="s1">'clear'</span><span class="p">:</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">all_locale</span> <span class="o">=</span> <span class="n">cc_tag</span> + <span class="k">continue</span> + + <span class="c1"># add market codes from official languages of the country ..</span> + <span class="k">for</span> <span class="n">lang_tag</span> <span class="ow">in</span> <span class="n">babel</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">get_official_languages</span><span class="p">(</span><span class="n">cc_tag</span><span class="p">,</span> <span class="n">de_facto</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span> + <span class="k">if</span> <span class="n">lang_tag</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span> + <span class="c1"># print("ignore lang: %s <-- %s" % (cc_tag, lang_tag))</span> + <span class="k">continue</span> + <span class="n">lang_tag</span> <span class="o">=</span> <span class="n">lang_tag</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'_'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span> <span class="c1"># zh_Hant --> zh</span> + <span class="n">market_code</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">lang_tag</span><span class="si">}</span><span class="s2">-</span><span class="si">{</span><span class="n">cc_tag</span><span class="si">}</span><span class="s2">"</span> <span class="c1"># zh-tw</span> + + <span class="n">market_code</span> <span class="o">=</span> <span class="n">map_market_codes</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">market_code</span><span class="p">,</span> <span class="n">market_code</span><span class="p">)</span> + <span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">region_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="s1">'</span><span class="si">%s</span><span class="s1">_</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span><span class="n">lang_tag</span><span class="p">,</span> <span class="n">cc_tag</span><span class="o">.</span><span class="n">upper</span><span class="p">())))</span> + <span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_tag</span><span class="p">)</span> + <span class="k">if</span> <span class="n">conflict</span><span class="p">:</span> + <span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">market_code</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_tag</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">market_code</span><span class="p">))</span> + <span class="k">continue</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">market_code</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/bing_images.html b/_modules/searx/engines/bing_images.html new file mode 100644 index 000000000..1a7591f77 --- /dev/null +++ b/_modules/searx/engines/bing_images.html @@ -0,0 +1,223 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.bing_images — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.bing_images</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.bing_images</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""Bing-Images: description see :py:obj:`searx.engines.bing`.</span> +<span class="sd">"""</span> +<span class="c1"># pylint: disable=invalid-name</span> + + +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span> +<span class="kn">import</span> <span class="nn">json</span> +<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span> + +<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span> + +<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span> +<span class="kn">from</span> <span class="nn">searx.engines.bing</span> <span class="kn">import</span> <span class="n">set_bing_cookies</span> +<span class="kn">from</span> <span class="nn">searx.engines.bing</span> <span class="kn">import</span> <span class="n">fetch_traits</span> <span class="c1"># pylint: disable=unused-import</span> + + +<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> + <span class="kn">import</span> <span class="nn">logging</span> + + <span class="n">logger</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">()</span> + +<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span> + +<span class="c1"># about</span> +<span class="n">about</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://www.bing.com/images'</span><span class="p">,</span> + <span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q182496'</span><span class="p">,</span> + <span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://www.microsoft.com/en-us/bing/apis/bing-image-search-api'</span><span class="p">,</span> + <span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"results"</span><span class="p">:</span> <span class="s1">'HTML'</span><span class="p">,</span> +<span class="p">}</span> + +<span class="c1"># engine dependent config</span> +<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'images'</span><span class="p">,</span> <span class="s1">'web'</span><span class="p">]</span> +<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">True</span> + +<span class="n">base_url</span> <span class="o">=</span> <span class="s1">'https://www.bing.com/images/async'</span> +<span class="sd">"""Bing (Images) search URL"""</span> + +<span class="n">time_map</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'day'</span><span class="p">:</span> <span class="mi">60</span> <span class="o">*</span> <span class="mi">24</span><span class="p">,</span> + <span class="s1">'week'</span><span class="p">:</span> <span class="mi">60</span> <span class="o">*</span> <span class="mi">24</span> <span class="o">*</span> <span class="mi">7</span><span class="p">,</span> + <span class="s1">'month'</span><span class="p">:</span> <span class="mi">60</span> <span class="o">*</span> <span class="mi">24</span> <span class="o">*</span> <span class="mi">31</span><span class="p">,</span> + <span class="s1">'year'</span><span class="p">:</span> <span class="mi">60</span> <span class="o">*</span> <span class="mi">24</span> <span class="o">*</span> <span class="mi">365</span><span class="p">,</span> +<span class="p">}</span> + + +<div class="viewcode-block" id="request"> +<a class="viewcode-back" href="../../../dev/engines/online/bing.html#searx.engines.bing_images.request">[docs]</a> +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Assemble a Bing-Image request."""</span> + + <span class="n">engine_region</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_region</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="n">traits</span><span class="o">.</span><span class="n">all_locale</span><span class="p">)</span> <span class="c1"># type: ignore</span> + <span class="n">engine_language</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="s1">'en'</span><span class="p">)</span> <span class="c1"># type: ignore</span> + <span class="n">set_bing_cookies</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">engine_language</span><span class="p">,</span> <span class="n">engine_region</span><span class="p">)</span> + + <span class="c1"># build URL query</span> + <span class="c1"># - example: https://www.bing.com/images/async?q=foo&async=content&first=1&count=35</span> + <span class="n">query_params</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span> + <span class="s1">'async'</span><span class="p">:</span> <span class="s1">'1'</span><span class="p">,</span> + <span class="c1"># to simplify the page count lets use the default of 35 images per page</span> + <span class="s1">'first'</span><span class="p">:</span> <span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'pageno'</span><span class="p">,</span> <span class="mi">1</span><span class="p">))</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="mi">35</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span> + <span class="s1">'count'</span><span class="p">:</span> <span class="mi">35</span><span class="p">,</span> + <span class="p">}</span> + + <span class="c1"># time range</span> + <span class="c1"># - example: one year (525600 minutes) 'qft=+filterui:age-lt525600'</span> + + <span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]:</span> + <span class="n">query_params</span><span class="p">[</span><span class="s1">'qft'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'filterui:age-lt</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="n">time_map</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]]</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">base_url</span> <span class="o">+</span> <span class="s1">'?'</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">(</span><span class="n">query_params</span><span class="p">)</span> + + <span class="k">return</span> <span class="n">params</span></div> + + + +<div class="viewcode-block" id="response"> +<a class="viewcode-back" href="../../../dev/engines/online/bing.html#searx.engines.bing_images.response">[docs]</a> +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Get response from Bing-Images"""</span> + + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> + + <span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">dom</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'//ul[contains(@class, "dgControl_list")]/li'</span><span class="p">):</span> + + <span class="n">metadata</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'.//a[@class="iusc"]/@m'</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">metadata</span><span class="p">:</span> + <span class="k">continue</span> + + <span class="n">metadata</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'.//a[@class="iusc"]/@m'</span><span class="p">)[</span><span class="mi">0</span><span class="p">])</span> + <span class="n">title</span> <span class="o">=</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'.//div[@class="infnmpt"]//a/text()'</span><span class="p">))</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span> + <span class="n">img_format</span> <span class="o">=</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'.//div[@class="imgpt"]/div/span/text()'</span><span class="p">))</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">" · "</span><span class="p">)</span> + <span class="n">source</span> <span class="o">=</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'.//div[@class="imgpt"]//div[@class="lnkw"]//a/text()'</span><span class="p">))</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'template'</span><span class="p">:</span> <span class="s1">'images.html'</span><span class="p">,</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="n">metadata</span><span class="p">[</span><span class="s1">'purl'</span><span class="p">],</span> + <span class="s1">'thumbnail_src'</span><span class="p">:</span> <span class="n">metadata</span><span class="p">[</span><span class="s1">'turl'</span><span class="p">],</span> + <span class="s1">'img_src'</span><span class="p">:</span> <span class="n">metadata</span><span class="p">[</span><span class="s1">'murl'</span><span class="p">],</span> + <span class="s1">'content'</span><span class="p">:</span> <span class="n">metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'desc'</span><span class="p">),</span> + <span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span> + <span class="s1">'source'</span><span class="p">:</span> <span class="n">source</span><span class="p">,</span> + <span class="s1">'resolution'</span><span class="p">:</span> <span class="n">img_format</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> + <span class="s1">'img_format'</span><span class="p">:</span> <span class="n">img_format</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">img_format</span><span class="p">)</span> <span class="o">>=</span> <span class="mi">2</span> <span class="k">else</span> <span class="kc">None</span><span class="p">,</span> + <span class="p">}</span> + <span class="p">)</span> + <span class="k">return</span> <span class="n">results</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/bing_news.html b/_modules/searx/engines/bing_news.html new file mode 100644 index 000000000..bcd42b0eb --- /dev/null +++ b/_modules/searx/engines/bing_news.html @@ -0,0 +1,277 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.bing_news — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.bing_news</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.bing_news</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""Bing-News: description see :py:obj:`searx.engines.bing`.</span> + +<span class="sd">.. hint::</span> + +<span class="sd"> Bing News is *different* in some ways!</span> + +<span class="sd">"""</span> + +<span class="c1"># pylint: disable=invalid-name</span> + +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span> +<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span> + +<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span> + +<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">eval_xpath</span><span class="p">,</span> <span class="n">extract_text</span><span class="p">,</span> <span class="n">eval_xpath_list</span><span class="p">,</span> <span class="n">eval_xpath_getindex</span> +<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span> +<span class="kn">from</span> <span class="nn">searx.engines.bing</span> <span class="kn">import</span> <span class="n">set_bing_cookies</span> + +<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> + <span class="kn">import</span> <span class="nn">logging</span> + + <span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span> + +<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span> + + +<span class="c1"># about</span> +<span class="n">about</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://www.bing.com/news'</span><span class="p">,</span> + <span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q2878637'</span><span class="p">,</span> + <span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://www.microsoft.com/en-us/bing/apis/bing-news-search-api'</span><span class="p">,</span> + <span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"results"</span><span class="p">:</span> <span class="s1">'RSS'</span><span class="p">,</span> +<span class="p">}</span> + +<span class="c1"># engine dependent config</span> +<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'news'</span><span class="p">]</span> +<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span> +<span class="sd">"""If go through the pages and there are actually no new results for another</span> +<span class="sd">page, then bing returns the results from the last page again."""</span> + +<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">time_map</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'day'</span><span class="p">:</span> <span class="s1">'interval="4"'</span><span class="p">,</span> + <span class="s1">'week'</span><span class="p">:</span> <span class="s1">'interval="7"'</span><span class="p">,</span> + <span class="s1">'month'</span><span class="p">:</span> <span class="s1">'interval="9"'</span><span class="p">,</span> +<span class="p">}</span> +<span class="sd">"""A string '4' means *last hour*. We use *last hour* for ``day`` here since the</span> +<span class="sd">difference of *last day* and *last week* in the result list is just marginally.</span> +<span class="sd">Bing does not have news range ``year`` / we use ``month`` instead."""</span> + +<span class="n">base_url</span> <span class="o">=</span> <span class="s1">'https://www.bing.com/news/infinitescrollajax'</span> +<span class="sd">"""Bing (News) search URL"""</span> + + +<div class="viewcode-block" id="request"> +<a class="viewcode-back" href="../../../dev/engines/online/bing.html#searx.engines.bing_news.request">[docs]</a> +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Assemble a Bing-News request."""</span> + + <span class="n">engine_region</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_region</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="n">traits</span><span class="o">.</span><span class="n">all_locale</span><span class="p">)</span> <span class="c1"># type: ignore</span> + <span class="n">engine_language</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="s1">'en'</span><span class="p">)</span> <span class="c1"># type: ignore</span> + <span class="n">set_bing_cookies</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">engine_language</span><span class="p">,</span> <span class="n">engine_region</span><span class="p">)</span> + + <span class="c1"># build URL query</span> + <span class="c1">#</span> + <span class="c1"># example: https://www.bing.com/news/infinitescrollajax?q=london&first=1</span> + + <span class="n">page</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'pageno'</span><span class="p">,</span> <span class="mi">1</span><span class="p">))</span> <span class="o">-</span> <span class="mi">1</span> + <span class="n">query_params</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span> + <span class="s1">'InfiniteScroll'</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> + <span class="c1"># to simplify the page count lets use the default of 10 images per page</span> + <span class="s1">'first'</span><span class="p">:</span> <span class="n">page</span> <span class="o">*</span> <span class="mi">10</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span> + <span class="s1">'SFX'</span><span class="p">:</span> <span class="n">page</span><span class="p">,</span> + <span class="s1">'form'</span><span class="p">:</span> <span class="s1">'PTFTNR'</span><span class="p">,</span> + <span class="s1">'setlang'</span><span class="p">:</span> <span class="n">engine_region</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'-'</span><span class="p">)[</span><span class="mi">0</span><span class="p">],</span> + <span class="s1">'cc'</span><span class="p">:</span> <span class="n">engine_region</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'-'</span><span class="p">)[</span><span class="o">-</span><span class="mi">1</span><span class="p">],</span> + <span class="p">}</span> + + <span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]:</span> + <span class="n">query_params</span><span class="p">[</span><span class="s1">'qft'</span><span class="p">]</span> <span class="o">=</span> <span class="n">time_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">],</span> <span class="s1">'interval="9"'</span><span class="p">)</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">base_url</span> <span class="o">+</span> <span class="s1">'?'</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">(</span><span class="n">query_params</span><span class="p">)</span> + + <span class="k">return</span> <span class="n">params</span></div> + + + +<div class="viewcode-block" id="response"> +<a class="viewcode-back" href="../../../dev/engines/online/bing.html#searx.engines.bing_news.response">[docs]</a> +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Get response from Bing-Video"""</span> + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span> <span class="ow">or</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">:</span> + <span class="k">return</span> <span class="n">results</span> + + <span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> + + <span class="k">for</span> <span class="n">newsitem</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//div[contains(@class, "newsitem")]'</span><span class="p">):</span> + + <span class="n">link</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">newsitem</span><span class="p">,</span> <span class="s1">'.//a[@class="title"]'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> + <span class="k">if</span> <span class="n">link</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">continue</span> + <span class="n">url</span> <span class="o">=</span> <span class="n">link</span><span class="o">.</span><span class="n">attrib</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'href'</span><span class="p">)</span> + <span class="n">title</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">link</span><span class="p">)</span> + <span class="n">content</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">newsitem</span><span class="p">,</span> <span class="s1">'.//div[@class="snippet"]'</span><span class="p">))</span> + + <span class="n">metadata</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">source</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">newsitem</span><span class="p">,</span> <span class="s1">'.//div[contains(@class, "source")]'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> + <span class="k">if</span> <span class="n">source</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="p">(</span> + <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">source</span><span class="p">,</span> <span class="s1">'.//span[@aria-label]/@aria-label'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span> + <span class="c1"># eval_xpath_getindex(source, './/a', 0, None),</span> + <span class="c1"># eval_xpath_getindex(source, './div/span', 3, None),</span> + <span class="n">link</span><span class="o">.</span><span class="n">attrib</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'data-author'</span><span class="p">),</span> + <span class="p">):</span> + <span class="k">if</span> <span class="n">item</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">t</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">item</span><span class="p">)</span> + <span class="k">if</span> <span class="n">t</span> <span class="ow">and</span> <span class="n">t</span><span class="o">.</span><span class="n">strip</span><span class="p">():</span> + <span class="n">metadata</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">t</span><span class="o">.</span><span class="n">strip</span><span class="p">())</span> + <span class="n">metadata</span> <span class="o">=</span> <span class="s1">' | '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">metadata</span><span class="p">)</span> + + <span class="n">thumbnail</span> <span class="o">=</span> <span class="kc">None</span> + <span class="n">imagelink</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">newsitem</span><span class="p">,</span> <span class="s1">'.//a[@class="imagelink"]//img'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> + <span class="k">if</span> <span class="n">imagelink</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">thumbnail</span> <span class="o">=</span> <span class="n">imagelink</span><span class="o">.</span><span class="n">attrib</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'src'</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">thumbnail</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s2">"https://www.bing.com"</span><span class="p">):</span> + <span class="n">thumbnail</span> <span class="o">=</span> <span class="s1">'https://www.bing.com/'</span> <span class="o">+</span> <span class="n">thumbnail</span> + + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span> + <span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span> + <span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">,</span> + <span class="s1">'thumbnail'</span><span class="p">:</span> <span class="n">thumbnail</span><span class="p">,</span> + <span class="s1">'metadata'</span><span class="p">:</span> <span class="n">metadata</span><span class="p">,</span> + <span class="p">}</span> + <span class="p">)</span> + + <span class="k">return</span> <span class="n">results</span></div> + + + +<div class="viewcode-block" id="fetch_traits"> +<a class="viewcode-back" href="../../../dev/engines/online/bing.html#searx.engines.bing_news.fetch_traits">[docs]</a> +<span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Fetch languages and regions from Bing-News."""</span> + <span class="c1"># pylint: disable=import-outside-toplevel</span> + + <span class="kn">from</span> <span class="nn">searx.engines.bing</span> <span class="kn">import</span> <span class="n">fetch_traits</span> <span class="k">as</span> <span class="n">_f</span> + + <span class="n">_f</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">)</span> + + <span class="c1"># fix market codes not known by bing news:</span> + + <span class="c1"># In bing the market code 'zh-cn' exists, but there is no 'news' category in</span> + <span class="c1"># bing for this market. Alternatively we use the the market code from Honk</span> + <span class="c1"># Kong. Even if this is not correct, it is better than having no hits at</span> + <span class="c1"># all, or sending false queries to bing that could raise the suspicion of a</span> + <span class="c1"># bot.</span> + + <span class="c1"># HINT: 'en-hk' is the region code it does not indicate the language en!!</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="p">[</span><span class="s1">'zh-CN'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'en-hk'</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/bing_videos.html b/_modules/searx/engines/bing_videos.html new file mode 100644 index 000000000..41c5e4326 --- /dev/null +++ b/_modules/searx/engines/bing_videos.html @@ -0,0 +1,212 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.bing_videos — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.bing_videos</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.bing_videos</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="c1"># pylint: disable=invalid-name</span> +<span class="sd">"""Bing-Videos: description see :py:obj:`searx.engines.bing`.</span> +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span> +<span class="kn">import</span> <span class="nn">json</span> +<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span> + +<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span> + +<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span> +<span class="kn">from</span> <span class="nn">searx.engines.bing</span> <span class="kn">import</span> <span class="n">set_bing_cookies</span> +<span class="kn">from</span> <span class="nn">searx.engines.bing</span> <span class="kn">import</span> <span class="n">fetch_traits</span> <span class="c1"># pylint: disable=unused-import</span> +<span class="kn">from</span> <span class="nn">searx.engines.bing_images</span> <span class="kn">import</span> <span class="n">time_map</span> + +<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> + <span class="kn">import</span> <span class="nn">logging</span> + + <span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span> + +<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span> + + +<span class="n">about</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://www.bing.com/videos'</span><span class="p">,</span> + <span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q4914152'</span><span class="p">,</span> + <span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://www.microsoft.com/en-us/bing/apis/bing-video-search-api'</span><span class="p">,</span> + <span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"results"</span><span class="p">:</span> <span class="s1">'HTML'</span><span class="p">,</span> +<span class="p">}</span> + +<span class="c1"># engine dependent config</span> +<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'videos'</span><span class="p">,</span> <span class="s1">'web'</span><span class="p">]</span> +<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">True</span> + +<span class="n">base_url</span> <span class="o">=</span> <span class="s1">'https://www.bing.com/videos/asyncv2'</span> +<span class="sd">"""Bing (Videos) async search URL."""</span> + + +<div class="viewcode-block" id="request"> +<a class="viewcode-back" href="../../../dev/engines/online/bing.html#searx.engines.bing_videos.request">[docs]</a> +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Assemble a Bing-Video request."""</span> + + <span class="n">engine_region</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_region</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="n">traits</span><span class="o">.</span><span class="n">all_locale</span><span class="p">)</span> <span class="c1"># type: ignore</span> + <span class="n">engine_language</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="s1">'en'</span><span class="p">)</span> <span class="c1"># type: ignore</span> + <span class="n">set_bing_cookies</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">engine_language</span><span class="p">,</span> <span class="n">engine_region</span><span class="p">)</span> + + <span class="c1"># build URL query</span> + <span class="c1">#</span> + <span class="c1"># example: https://www.bing.com/videos/asyncv2?q=foo&async=content&first=1&count=35</span> + + <span class="n">query_params</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span> + <span class="s1">'async'</span><span class="p">:</span> <span class="s1">'content'</span><span class="p">,</span> + <span class="c1"># to simplify the page count lets use the default of 35 images per page</span> + <span class="s1">'first'</span><span class="p">:</span> <span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'pageno'</span><span class="p">,</span> <span class="mi">1</span><span class="p">))</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="mi">35</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span> + <span class="s1">'count'</span><span class="p">:</span> <span class="mi">35</span><span class="p">,</span> + <span class="p">}</span> + + <span class="c1"># time range</span> + <span class="c1">#</span> + <span class="c1"># example: one week (10080 minutes) '&qft= filterui:videoage-lt10080' '&form=VRFLTR'</span> + + <span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]:</span> + <span class="n">query_params</span><span class="p">[</span><span class="s1">'form'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'VRFLTR'</span> + <span class="n">query_params</span><span class="p">[</span><span class="s1">'qft'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">' filterui:videoage-lt</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="n">time_map</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]]</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">base_url</span> <span class="o">+</span> <span class="s1">'?'</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">(</span><span class="n">query_params</span><span class="p">)</span> + + <span class="k">return</span> <span class="n">params</span></div> + + + +<div class="viewcode-block" id="response"> +<a class="viewcode-back" href="../../../dev/engines/online/bing.html#searx.engines.bing_videos.response">[docs]</a> +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Get response from Bing-Video"""</span> + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> + + <span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">dom</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'//div[@class="dg_u"]//div[contains(@id, "mc_vtvc_video")]'</span><span class="p">):</span> + <span class="n">metadata</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'.//div[@class="vrhdata"]/@vrhm'</span><span class="p">)[</span><span class="mi">0</span><span class="p">])</span> + <span class="n">info</span> <span class="o">=</span> <span class="s1">' - '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'.//div[@class="mc_vtvc_meta_block"]//span/text()'</span><span class="p">))</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span> + <span class="n">content</span> <span class="o">=</span> <span class="s1">'</span><span class="si">{0}</span><span class="s1"> - </span><span class="si">{1}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">metadata</span><span class="p">[</span><span class="s1">'du'</span><span class="p">],</span> <span class="n">info</span><span class="p">)</span> + <span class="n">thumbnail</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'.//div[contains(@class, "mc_vtvc_th")]//img/@src'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span> + + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="n">metadata</span><span class="p">[</span><span class="s1">'murl'</span><span class="p">],</span> + <span class="s1">'thumbnail'</span><span class="p">:</span> <span class="n">thumbnail</span><span class="p">,</span> + <span class="s1">'title'</span><span class="p">:</span> <span class="n">metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'vt'</span><span class="p">,</span> <span class="s1">''</span><span class="p">),</span> + <span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">,</span> + <span class="s1">'template'</span><span class="p">:</span> <span class="s1">'videos.html'</span><span class="p">,</span> + <span class="p">}</span> + <span class="p">)</span> + + <span class="k">return</span> <span class="n">results</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/brave.html b/_modules/searx/engines/brave.html new file mode 100644 index 000000000..8c7a72c0d --- /dev/null +++ b/_modules/searx/engines/brave.html @@ -0,0 +1,580 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.brave — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.brave</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.brave</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""Brave supports the categories listed in :py:obj:`brave_category` (General,</span> +<span class="sd">news, videos, images). The support of :py:obj:`paging` and :py:obj:`time range</span> +<span class="sd"><time_range_support>` is limited (see remarks).</span> + +<span class="sd">Configured ``brave`` engines:</span> + +<span class="sd">.. code:: yaml</span> + +<span class="sd"> - name: brave</span> +<span class="sd"> engine: brave</span> +<span class="sd"> ...</span> +<span class="sd"> brave_category: search</span> +<span class="sd"> time_range_support: true</span> +<span class="sd"> paging: true</span> + +<span class="sd"> - name: brave.images</span> +<span class="sd"> engine: brave</span> +<span class="sd"> ...</span> +<span class="sd"> brave_category: images</span> + +<span class="sd"> - name: brave.videos</span> +<span class="sd"> engine: brave</span> +<span class="sd"> ...</span> +<span class="sd"> brave_category: videos</span> + +<span class="sd"> - name: brave.news</span> +<span class="sd"> engine: brave</span> +<span class="sd"> ...</span> +<span class="sd"> brave_category: news</span> + +<span class="sd"> - name: brave.goggles</span> +<span class="sd"> brave_category: goggles</span> +<span class="sd"> time_range_support: true</span> +<span class="sd"> paging: true</span> +<span class="sd"> ...</span> +<span class="sd"> brave_category: goggles</span> + + +<span class="sd">.. _brave regions:</span> + +<span class="sd">Brave regions</span> +<span class="sd">=============</span> + +<span class="sd">Brave uses two-digit tags for the regions like ``ca`` while SearXNG deals with</span> +<span class="sd">locales. To get a mapping, all *officiat de-facto* languages of the Brave</span> +<span class="sd">region are mapped to regions in SearXNG (see :py:obj:`babel</span> +<span class="sd"><babel.languages.get_official_languages>`):</span> + +<span class="sd">.. code:: python</span> + +<span class="sd"> "regions": {</span> +<span class="sd"> ..</span> +<span class="sd"> "en-CA": "ca",</span> +<span class="sd"> "fr-CA": "ca",</span> +<span class="sd"> ..</span> +<span class="sd"> }</span> + + +<span class="sd">.. note::</span> + +<span class="sd"> The language (aka region) support of Brave's index is limited to very basic</span> +<span class="sd"> languages. The search results for languages like Chinese or Arabic are of</span> +<span class="sd"> low quality.</span> + + +<span class="sd">.. _brave googles:</span> + +<span class="sd">Brave Goggles</span> +<span class="sd">=============</span> + +<span class="sd">.. _list of Goggles: https://search.brave.com/goggles/discover</span> +<span class="sd">.. _Goggles Whitepaper: https://brave.com/static-assets/files/goggles.pdf</span> +<span class="sd">.. _Goggles Quickstart: https://github.com/brave/goggles-quickstart</span> + +<span class="sd">Goggles allow you to choose, alter, or extend the ranking of Brave Search</span> +<span class="sd">results (`Goggles Whitepaper`_). Goggles are openly developed by the community</span> +<span class="sd">of Brave Search users.</span> + +<span class="sd">Select from the `list of Goggles`_ people have published, or create your own</span> +<span class="sd">(`Goggles Quickstart`_).</span> + + +<span class="sd">.. _brave languages:</span> + +<span class="sd">Brave languages</span> +<span class="sd">===============</span> + +<span class="sd">Brave's language support is limited to the UI (menus, area local notations,</span> +<span class="sd">etc). Brave's index only seems to support a locale, but it does not seem to</span> +<span class="sd">support any languages in its index. The choice of available languages is very</span> +<span class="sd">small (and its not clear to me where the difference in UI is when switching</span> +<span class="sd">from en-us to en-ca or en-gb).</span> + +<span class="sd">In the :py:obj:`EngineTraits object <searx.enginelib.traits.EngineTraits>` the</span> +<span class="sd">UI languages are stored in a custom field named ``ui_lang``:</span> + +<span class="sd">.. code:: python</span> + +<span class="sd"> "custom": {</span> +<span class="sd"> "ui_lang": {</span> +<span class="sd"> "ca": "ca",</span> +<span class="sd"> "de-DE": "de-de",</span> +<span class="sd"> "en-CA": "en-ca",</span> +<span class="sd"> "en-GB": "en-gb",</span> +<span class="sd"> "en-US": "en-us",</span> +<span class="sd"> "es": "es",</span> +<span class="sd"> "fr-CA": "fr-ca",</span> +<span class="sd"> "fr-FR": "fr-fr",</span> +<span class="sd"> "ja-JP": "ja-jp",</span> +<span class="sd"> "pt-BR": "pt-br",</span> +<span class="sd"> "sq-AL": "sq-al"</span> +<span class="sd"> }</span> +<span class="sd"> },</span> + +<span class="sd">Implementations</span> +<span class="sd">===============</span> + +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Any</span><span class="p">,</span> <span class="n">TYPE_CHECKING</span> + +<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="p">(</span> + <span class="n">urlencode</span><span class="p">,</span> + <span class="n">urlparse</span><span class="p">,</span> +<span class="p">)</span> + +<span class="kn">from</span> <span class="nn">dateutil</span> <span class="kn">import</span> <span class="n">parser</span> +<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span> + +<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">locales</span> +<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="p">(</span> + <span class="n">extract_text</span><span class="p">,</span> + <span class="n">extr</span><span class="p">,</span> + <span class="n">eval_xpath</span><span class="p">,</span> + <span class="n">eval_xpath_list</span><span class="p">,</span> + <span class="n">eval_xpath_getindex</span><span class="p">,</span> + <span class="n">js_variable_to_python</span><span class="p">,</span> + <span class="n">get_embeded_stream_url</span><span class="p">,</span> +<span class="p">)</span> +<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span> + +<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> + <span class="kn">import</span> <span class="nn">logging</span> + + <span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span> + +<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span> + +<span class="n">about</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://search.brave.com/'</span><span class="p">,</span> + <span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q22906900'</span><span class="p">,</span> + <span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> + <span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"results"</span><span class="p">:</span> <span class="s1">'HTML'</span><span class="p">,</span> +<span class="p">}</span> + +<span class="n">base_url</span> <span class="o">=</span> <span class="s2">"https://search.brave.com/"</span> +<span class="n">categories</span> <span class="o">=</span> <span class="p">[]</span> +<span class="n">brave_category</span> <span class="o">=</span> <span class="s1">'search'</span> +<span class="n">Goggles</span> <span class="o">=</span> <span class="n">Any</span> +<span class="sd">"""Brave supports common web-search, videos, images, news, and goggles search.</span> + +<span class="sd">- ``search``: Common WEB search</span> +<span class="sd">- ``videos``: search for videos</span> +<span class="sd">- ``images``: search for images</span> +<span class="sd">- ``news``: search for news</span> +<span class="sd">- ``goggles``: Common WEB search with custom rules</span> +<span class="sd">"""</span> + +<span class="n">brave_spellcheck</span> <span class="o">=</span> <span class="kc">False</span> +<span class="sd">"""Brave supports some kind of spell checking. When activated, Brave tries to</span> +<span class="sd">fix typos, e.g. it searches for ``food`` when the user queries for ``fooh``. In</span> +<span class="sd">the UI of Brave the user gets warned about this, since we can not warn the user</span> +<span class="sd">in SearXNG, the spellchecking is disabled by default.</span> +<span class="sd">"""</span> + +<span class="n">send_accept_language_header</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">paging</span> <span class="o">=</span> <span class="kc">False</span> +<span class="sd">"""Brave only supports paging in :py:obj:`brave_category` ``search`` (UI</span> +<span class="sd">category All) and in the goggles category."""</span> +<span class="n">max_page</span> <span class="o">=</span> <span class="mi">10</span> +<span class="sd">"""Tested 9 pages maximum (``&offset=8``), to be save max is set to 10. Trying</span> +<span class="sd">to do more won't return any result and you will most likely be flagged as a bot.</span> +<span class="sd">"""</span> + +<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">safesearch_map</span> <span class="o">=</span> <span class="p">{</span><span class="mi">2</span><span class="p">:</span> <span class="s1">'strict'</span><span class="p">,</span> <span class="mi">1</span><span class="p">:</span> <span class="s1">'moderate'</span><span class="p">,</span> <span class="mi">0</span><span class="p">:</span> <span class="s1">'off'</span><span class="p">}</span> <span class="c1"># cookie: safesearch=off</span> + +<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">False</span> +<span class="sd">"""Brave only supports time-range in :py:obj:`brave_category` ``search`` (UI</span> +<span class="sd">category All) and in the goggles category."""</span> + +<span class="n">time_range_map</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'day'</span><span class="p">:</span> <span class="s1">'pd'</span><span class="p">,</span> + <span class="s1">'week'</span><span class="p">:</span> <span class="s1">'pw'</span><span class="p">,</span> + <span class="s1">'month'</span><span class="p">:</span> <span class="s1">'pm'</span><span class="p">,</span> + <span class="s1">'year'</span><span class="p">:</span> <span class="s1">'py'</span><span class="p">,</span> +<span class="p">}</span> + + +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> + + <span class="c1"># Don't accept br encoding / see https://github.com/searxng/searxng/pull/1787</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">][</span><span class="s1">'Accept-Encoding'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'gzip, deflate'</span> + + <span class="n">args</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span> + <span class="p">}</span> + <span class="k">if</span> <span class="n">brave_spellcheck</span><span class="p">:</span> + <span class="n">args</span><span class="p">[</span><span class="s1">'spellcheck'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'1'</span> + + <span class="k">if</span> <span class="n">brave_category</span> <span class="ow">in</span> <span class="p">(</span><span class="s1">'search'</span><span class="p">,</span> <span class="s1">'goggles'</span><span class="p">):</span> + <span class="k">if</span> <span class="n">params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'pageno'</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">:</span> + <span class="n">args</span><span class="p">[</span><span class="s1">'offset'</span><span class="p">]</span> <span class="o">=</span> <span class="n">params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'pageno'</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span> + <span class="k">if</span> <span class="n">time_range_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]):</span> + <span class="n">args</span><span class="p">[</span><span class="s1">'tf'</span><span class="p">]</span> <span class="o">=</span> <span class="n">time_range_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">])</span> + + <span class="k">if</span> <span class="n">brave_category</span> <span class="o">==</span> <span class="s1">'goggles'</span><span class="p">:</span> + <span class="n">args</span><span class="p">[</span><span class="s1">'goggles_id'</span><span class="p">]</span> <span class="o">=</span> <span class="n">Goggles</span> + + <span class="n">params</span><span class="p">[</span><span class="s2">"url"</span><span class="p">]</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">base_url</span><span class="si">}{</span><span class="n">brave_category</span><span class="si">}</span><span class="s2">?</span><span class="si">{</span><span class="n">urlencode</span><span class="p">(</span><span class="n">args</span><span class="p">)</span><span class="si">}</span><span class="s2">"</span> + + <span class="c1"># set properties in the cookies</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">][</span><span class="s1">'safesearch'</span><span class="p">]</span> <span class="o">=</span> <span class="n">safesearch_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">],</span> <span class="s1">'off'</span><span class="p">)</span> + <span class="c1"># the useLocation is IP based, we use cookie 'country' for the region</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">][</span><span class="s1">'useLocation'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'0'</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">][</span><span class="s1">'summarizer'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'0'</span> + + <span class="n">engine_region</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_region</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="s1">'all'</span><span class="p">)</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">][</span><span class="s1">'country'</span><span class="p">]</span> <span class="o">=</span> <span class="n">engine_region</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'-'</span><span class="p">)[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="c1"># type: ignore</span> + + <span class="n">ui_lang</span> <span class="o">=</span> <span class="n">locales</span><span class="o">.</span><span class="n">get_engine_locale</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="n">traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s2">"ui_lang"</span><span class="p">],</span> <span class="s1">'en-us'</span><span class="p">)</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">][</span><span class="s1">'ui_lang'</span><span class="p">]</span> <span class="o">=</span> <span class="n">ui_lang</span> + + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"cookies </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">])</span> + + +<span class="k">def</span> <span class="nf">_extract_published_date</span><span class="p">(</span><span class="n">published_date_raw</span><span class="p">):</span> + <span class="k">if</span> <span class="n">published_date_raw</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">None</span> + + <span class="k">try</span><span class="p">:</span> + <span class="k">return</span> <span class="n">parser</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">published_date_raw</span><span class="p">)</span> + <span class="k">except</span> <span class="n">parser</span><span class="o">.</span><span class="n">ParserError</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">None</span> + + +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> + + <span class="k">if</span> <span class="n">brave_category</span> <span class="ow">in</span> <span class="p">(</span><span class="s1">'search'</span><span class="p">,</span> <span class="s1">'goggles'</span><span class="p">):</span> + <span class="k">return</span> <span class="n">_parse_search</span><span class="p">(</span><span class="n">resp</span><span class="p">)</span> + + <span class="n">datastr</span> <span class="o">=</span> <span class="n">extr</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">,</span> <span class="s2">"const data = "</span><span class="p">,</span> <span class="s2">";</span><span class="se">\n</span><span class="s2">"</span><span class="p">)</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span> + + <span class="n">json_data</span> <span class="o">=</span> <span class="n">js_variable_to_python</span><span class="p">(</span><span class="n">datastr</span><span class="p">)</span> + <span class="n">json_resp</span> <span class="o">=</span> <span class="n">json_data</span><span class="p">[</span><span class="mi">1</span><span class="p">][</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'body'</span><span class="p">][</span><span class="s1">'response'</span><span class="p">]</span> + + <span class="k">if</span> <span class="n">brave_category</span> <span class="o">==</span> <span class="s1">'news'</span><span class="p">:</span> + <span class="k">return</span> <span class="n">_parse_news</span><span class="p">(</span><span class="n">json_resp</span><span class="p">[</span><span class="s1">'news'</span><span class="p">])</span> + + <span class="k">if</span> <span class="n">brave_category</span> <span class="o">==</span> <span class="s1">'images'</span><span class="p">:</span> + <span class="k">return</span> <span class="n">_parse_images</span><span class="p">(</span><span class="n">json_resp</span><span class="p">)</span> + <span class="k">if</span> <span class="n">brave_category</span> <span class="o">==</span> <span class="s1">'videos'</span><span class="p">:</span> + <span class="k">return</span> <span class="n">_parse_videos</span><span class="p">(</span><span class="n">json_resp</span><span class="p">)</span> + + <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Unsupported brave category: </span><span class="si">{</span><span class="n">brave_category</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> + + +<span class="k">def</span> <span class="nf">_parse_search</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> + + <span class="n">result_list</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> + + <span class="n">answer_tag</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//div[@class="answer"]'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span> + <span class="k">if</span> <span class="n">answer_tag</span><span class="p">:</span> + <span class="n">url</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//div[@id="featured_snippet"]/a[@class="result-header"]/@href'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span> + <span class="n">result_list</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'answer'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">answer_tag</span><span class="p">),</span> <span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">})</span> + + <span class="c1"># xpath_results = '//div[contains(@class, "snippet fdb") and @data-type="web"]'</span> + <span class="n">xpath_results</span> <span class="o">=</span> <span class="s1">'//div[contains(@class, "snippet ")]'</span> + + <span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="n">xpath_results</span><span class="p">):</span> + + <span class="n">url</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//a[contains(@class, "h")]/@href'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span> + <span class="n">title_tag</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span> + <span class="n">result</span><span class="p">,</span> <span class="s1">'.//a[contains(@class, "h")]//div[contains(@class, "title")]'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="kc">None</span> + <span class="p">)</span> + <span class="k">if</span> <span class="n">url</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">title_tag</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="ow">not</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">url</span><span class="p">)</span><span class="o">.</span><span class="n">netloc</span><span class="p">:</span> <span class="c1"># partial url likely means it's an ad</span> + <span class="k">continue</span> + + <span class="n">content_tag</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//div[contains(@class, "snippet-description")]'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="s1">''</span><span class="p">)</span> + <span class="n">pub_date_raw</span> <span class="o">=</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'substring-before(.//div[contains(@class, "snippet-description")], "-")'</span><span class="p">)</span> + <span class="n">thumbnail</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//img[contains(@class, "thumb")]/@src'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="s1">''</span><span class="p">)</span> + + <span class="n">item</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span> + <span class="s1">'title'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">title_tag</span><span class="p">),</span> + <span class="s1">'content'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">content_tag</span><span class="p">),</span> + <span class="s1">'publishedDate'</span><span class="p">:</span> <span class="n">_extract_published_date</span><span class="p">(</span><span class="n">pub_date_raw</span><span class="p">),</span> + <span class="s1">'thumbnail'</span><span class="p">:</span> <span class="n">thumbnail</span><span class="p">,</span> + <span class="p">}</span> + + <span class="n">video_tag</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span> + <span class="n">result</span><span class="p">,</span> <span class="s1">'.//div[contains(@class, "video-snippet") and @data-macro="video"]'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="kc">None</span> + <span class="p">)</span> + <span class="k">if</span> <span class="n">video_tag</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + + <span class="c1"># In my tests a video tag in the WEB search was most often not a</span> + <span class="c1"># video, except the ones from youtube ..</span> + + <span class="n">iframe_src</span> <span class="o">=</span> <span class="n">get_embeded_stream_url</span><span class="p">(</span><span class="n">url</span><span class="p">)</span> + <span class="k">if</span> <span class="n">iframe_src</span><span class="p">:</span> + <span class="n">item</span><span class="p">[</span><span class="s1">'iframe_src'</span><span class="p">]</span> <span class="o">=</span> <span class="n">iframe_src</span> + <span class="n">item</span><span class="p">[</span><span class="s1">'template'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'videos.html'</span> + <span class="n">item</span><span class="p">[</span><span class="s1">'thumbnail'</span><span class="p">]</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">video_tag</span><span class="p">,</span> <span class="s1">'.//img/@src'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="s1">''</span><span class="p">)</span> + <span class="n">pub_date_raw</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span> + <span class="n">eval_xpath</span><span class="p">(</span><span class="n">video_tag</span><span class="p">,</span> <span class="s1">'.//div[contains(@class, "snippet-attributes")]/div/text()'</span><span class="p">)</span> + <span class="p">)</span> + <span class="n">item</span><span class="p">[</span><span class="s1">'publishedDate'</span><span class="p">]</span> <span class="o">=</span> <span class="n">_extract_published_date</span><span class="p">(</span><span class="n">pub_date_raw</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">item</span><span class="p">[</span><span class="s1">'thumbnail'</span><span class="p">]</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">video_tag</span><span class="p">,</span> <span class="s1">'.//img/@src'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="s1">''</span><span class="p">)</span> + + <span class="n">result_list</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">item</span><span class="p">)</span> + + <span class="k">return</span> <span class="n">result_list</span> + + +<span class="k">def</span> <span class="nf">_parse_news</span><span class="p">(</span><span class="n">json_resp</span><span class="p">):</span> + <span class="n">result_list</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">json_resp</span><span class="p">[</span><span class="s2">"results"</span><span class="p">]:</span> + <span class="n">item</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'url'</span><span class="p">],</span> + <span class="s1">'title'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'title'</span><span class="p">],</span> + <span class="s1">'content'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'description'</span><span class="p">],</span> + <span class="s1">'publishedDate'</span><span class="p">:</span> <span class="n">_extract_published_date</span><span class="p">(</span><span class="n">result</span><span class="p">[</span><span class="s1">'age'</span><span class="p">]),</span> + <span class="p">}</span> + <span class="k">if</span> <span class="n">result</span><span class="p">[</span><span class="s1">'thumbnail'</span><span class="p">]</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">item</span><span class="p">[</span><span class="s1">'thumbnail'</span><span class="p">]</span> <span class="o">=</span> <span class="n">result</span><span class="p">[</span><span class="s1">'thumbnail'</span><span class="p">][</span><span class="s1">'src'</span><span class="p">]</span> + <span class="n">result_list</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">item</span><span class="p">)</span> + + <span class="k">return</span> <span class="n">result_list</span> + + +<span class="k">def</span> <span class="nf">_parse_images</span><span class="p">(</span><span class="n">json_resp</span><span class="p">):</span> + <span class="n">result_list</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">json_resp</span><span class="p">[</span><span class="s2">"results"</span><span class="p">]:</span> + <span class="n">item</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'url'</span><span class="p">],</span> + <span class="s1">'title'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'title'</span><span class="p">],</span> + <span class="s1">'content'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'description'</span><span class="p">],</span> + <span class="s1">'template'</span><span class="p">:</span> <span class="s1">'images.html'</span><span class="p">,</span> + <span class="s1">'resolution'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'properties'</span><span class="p">][</span><span class="s1">'format'</span><span class="p">],</span> + <span class="s1">'source'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'source'</span><span class="p">],</span> + <span class="s1">'img_src'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'properties'</span><span class="p">][</span><span class="s1">'url'</span><span class="p">],</span> + <span class="s1">'thumbnail_src'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'thumbnail'</span><span class="p">][</span><span class="s1">'src'</span><span class="p">],</span> + <span class="p">}</span> + <span class="n">result_list</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">item</span><span class="p">)</span> + + <span class="k">return</span> <span class="n">result_list</span> + + +<span class="k">def</span> <span class="nf">_parse_videos</span><span class="p">(</span><span class="n">json_resp</span><span class="p">):</span> + <span class="n">result_list</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">json_resp</span><span class="p">[</span><span class="s2">"results"</span><span class="p">]:</span> + + <span class="n">url</span> <span class="o">=</span> <span class="n">result</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> + <span class="n">item</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span> + <span class="s1">'title'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'title'</span><span class="p">],</span> + <span class="s1">'content'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'description'</span><span class="p">],</span> + <span class="s1">'template'</span><span class="p">:</span> <span class="s1">'videos.html'</span><span class="p">,</span> + <span class="s1">'length'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'video'</span><span class="p">][</span><span class="s1">'duration'</span><span class="p">],</span> + <span class="s1">'duration'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'video'</span><span class="p">][</span><span class="s1">'duration'</span><span class="p">],</span> + <span class="s1">'publishedDate'</span><span class="p">:</span> <span class="n">_extract_published_date</span><span class="p">(</span><span class="n">result</span><span class="p">[</span><span class="s1">'age'</span><span class="p">]),</span> + <span class="p">}</span> + + <span class="k">if</span> <span class="n">result</span><span class="p">[</span><span class="s1">'thumbnail'</span><span class="p">]</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">item</span><span class="p">[</span><span class="s1">'thumbnail'</span><span class="p">]</span> <span class="o">=</span> <span class="n">result</span><span class="p">[</span><span class="s1">'thumbnail'</span><span class="p">][</span><span class="s1">'src'</span><span class="p">]</span> + + <span class="n">iframe_src</span> <span class="o">=</span> <span class="n">get_embeded_stream_url</span><span class="p">(</span><span class="n">url</span><span class="p">)</span> + <span class="k">if</span> <span class="n">iframe_src</span><span class="p">:</span> + <span class="n">item</span><span class="p">[</span><span class="s1">'iframe_src'</span><span class="p">]</span> <span class="o">=</span> <span class="n">iframe_src</span> + + <span class="n">result_list</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">item</span><span class="p">)</span> + + <span class="k">return</span> <span class="n">result_list</span> + + +<div class="viewcode-block" id="fetch_traits"> +<a class="viewcode-back" href="../../../dev/engines/online/brave.html#searx.engines.brave.fetch_traits">[docs]</a> +<span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Fetch :ref:`languages <brave languages>` and :ref:`regions <brave</span> +<span class="sd"> regions>` from Brave."""</span> + + <span class="c1"># pylint: disable=import-outside-toplevel, too-many-branches</span> + + <span class="kn">import</span> <span class="nn">babel.languages</span> + <span class="kn">from</span> <span class="nn">searx.locales</span> <span class="kn">import</span> <span class="n">region_tag</span><span class="p">,</span> <span class="n">language_tag</span> + <span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">get</span> <span class="c1"># see https://github.com/searxng/searxng/issues/762</span> + + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s2">"ui_lang"</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> + + <span class="n">headers</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'Accept-Encoding'</span><span class="p">:</span> <span class="s1">'gzip, deflate'</span><span class="p">,</span> + <span class="p">}</span> + <span class="n">lang_map</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'no'</span><span class="p">:</span> <span class="s1">'nb'</span><span class="p">}</span> <span class="c1"># norway</span> + + <span class="c1"># languages (UI)</span> + + <span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="s1">'https://search.brave.com/settings'</span><span class="p">,</span> <span class="n">headers</span><span class="o">=</span><span class="n">headers</span><span class="p">)</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> <span class="c1"># type: ignore</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: response from Brave is not OK."</span><span class="p">)</span> + <span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> <span class="c1"># type: ignore</span> + + <span class="k">for</span> <span class="n">option</span> <span class="ow">in</span> <span class="n">dom</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'//section//option[@value="en-us"]/../option'</span><span class="p">):</span> + + <span class="n">ui_lang</span> <span class="o">=</span> <span class="n">option</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'value'</span><span class="p">)</span> + <span class="k">try</span><span class="p">:</span> + <span class="n">l</span> <span class="o">=</span> <span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">ui_lang</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">'-'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">l</span><span class="o">.</span><span class="n">territory</span><span class="p">:</span> + <span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">region_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">ui_lang</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">'-'</span><span class="p">))</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">language_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">ui_lang</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">'-'</span><span class="p">))</span> + + <span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: can't determine babel locale of Brave's (UI) language </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="n">ui_lang</span><span class="p">)</span> + <span class="k">continue</span> + + <span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s2">"ui_lang"</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_tag</span><span class="p">)</span> + <span class="k">if</span> <span class="n">conflict</span><span class="p">:</span> + <span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">ui_lang</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_tag</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">ui_lang</span><span class="p">))</span> + <span class="k">continue</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s2">"ui_lang"</span><span class="p">][</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">ui_lang</span> + + <span class="c1"># search regions of brave</span> + + <span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="s1">'https://cdn.search.brave.com/serp/v2/_app/immutable/chunks/parameters.734c106a.js'</span><span class="p">,</span> <span class="n">headers</span><span class="o">=</span><span class="n">headers</span><span class="p">)</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> <span class="c1"># type: ignore</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: response from Brave is not OK."</span><span class="p">)</span> + + <span class="n">country_js</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">[</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="o">.</span><span class="n">index</span><span class="p">(</span><span class="s2">"options:{all"</span><span class="p">)</span> <span class="o">+</span> <span class="nb">len</span><span class="p">(</span><span class="s1">'options:'</span><span class="p">)</span> <span class="p">:]</span> <span class="c1"># type: ignore</span> + <span class="n">country_js</span> <span class="o">=</span> <span class="n">country_js</span><span class="p">[:</span> <span class="n">country_js</span><span class="o">.</span><span class="n">index</span><span class="p">(</span><span class="s2">"},k={default"</span><span class="p">)]</span> + <span class="n">country_tags</span> <span class="o">=</span> <span class="n">js_variable_to_python</span><span class="p">(</span><span class="n">country_js</span><span class="p">)</span> + + <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">country_tags</span><span class="o">.</span><span class="n">items</span><span class="p">():</span> + <span class="k">if</span> <span class="n">k</span> <span class="o">==</span> <span class="s1">'all'</span><span class="p">:</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">all_locale</span> <span class="o">=</span> <span class="s1">'all'</span> + <span class="k">continue</span> + <span class="n">country_tag</span> <span class="o">=</span> <span class="n">v</span><span class="p">[</span><span class="s1">'value'</span><span class="p">]</span> + + <span class="c1"># add official languages of the country ..</span> + <span class="k">for</span> <span class="n">lang_tag</span> <span class="ow">in</span> <span class="n">babel</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">get_official_languages</span><span class="p">(</span><span class="n">country_tag</span><span class="p">,</span> <span class="n">de_facto</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span> + <span class="n">lang_tag</span> <span class="o">=</span> <span class="n">lang_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">lang_tag</span><span class="p">,</span> <span class="n">lang_tag</span><span class="p">)</span> + <span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">region_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="s1">'</span><span class="si">%s</span><span class="s1">_</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span><span class="n">lang_tag</span><span class="p">,</span> <span class="n">country_tag</span><span class="o">.</span><span class="n">upper</span><span class="p">())))</span> + <span class="c1"># print("%-20s: %s <-- %s" % (v['label'], country_tag, sxng_tag))</span> + + <span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_tag</span><span class="p">)</span> + <span class="k">if</span> <span class="n">conflict</span><span class="p">:</span> + <span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">country_tag</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_tag</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">country_tag</span><span class="p">))</span> + <span class="k">continue</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">country_tag</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/command.html b/_modules/searx/engines/command.html new file mode 100644 index 000000000..5f0081b5e --- /dev/null +++ b/_modules/searx/engines/command.html @@ -0,0 +1,355 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.command — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.command</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.command</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""With *command engines* administrators can run engines to integrate arbitrary</span> +<span class="sd">shell commands.</span> + +<span class="sd">.. attention::</span> + +<span class="sd"> When creating and enabling a ``command`` engine on a public instance, you</span> +<span class="sd"> must be careful to avoid leaking private data.</span> + +<span class="sd">The easiest solution is to limit the access by setting ``tokens`` as described</span> +<span class="sd">in section :ref:`private engines`. The engine base is flexible. Only your</span> +<span class="sd">imagination can limit the power of this engine (and maybe security concerns).</span> + +<span class="sd">Configuration</span> +<span class="sd">=============</span> + +<span class="sd">The following options are available:</span> + +<span class="sd">``command``:</span> +<span class="sd"> A comma separated list of the elements of the command. A special token</span> +<span class="sd"> ``{{QUERY}}`` tells where to put the search terms of the user. Example:</span> + +<span class="sd"> .. code:: yaml</span> + +<span class="sd"> ['ls', '-l', '-h', '{{QUERY}}']</span> + +<span class="sd">``delimiter``:</span> +<span class="sd"> A mapping containing a delimiter ``char`` and the *titles* of each element in</span> +<span class="sd"> ``keys``.</span> + +<span class="sd">``parse_regex``:</span> +<span class="sd"> A dict containing the regular expressions for each result key.</span> + +<span class="sd">``query_type``:</span> + +<span class="sd"> The expected type of user search terms. Possible values: ``path`` and</span> +<span class="sd"> ``enum``.</span> + +<span class="sd"> ``path``:</span> +<span class="sd"> Checks if the user provided path is inside the working directory. If not,</span> +<span class="sd"> the query is not executed.</span> + +<span class="sd"> ``enum``:</span> +<span class="sd"> Is a list of allowed search terms. If the user submits something which is</span> +<span class="sd"> not included in the list, the query returns an error.</span> + +<span class="sd">``query_enum``:</span> +<span class="sd"> A list containing allowed search terms if ``query_type`` is set to ``enum``.</span> + +<span class="sd">``working_dir``:</span> +<span class="sd"> The directory where the command has to be executed. Default: ``./``.</span> + +<span class="sd">``result_separator``:</span> +<span class="sd"> The character that separates results. Default: ``\\n``.</span> + +<span class="sd">Example</span> +<span class="sd">=======</span> + +<span class="sd">The example engine below can be used to find files with a specific name in the</span> +<span class="sd">configured working directory:</span> + +<span class="sd">.. code:: yaml</span> + +<span class="sd"> - name: find</span> +<span class="sd"> engine: command</span> +<span class="sd"> command: ['find', '.', '-name', '{{QUERY}}']</span> +<span class="sd"> query_type: path</span> +<span class="sd"> shortcut: fnd</span> +<span class="sd"> delimiter:</span> +<span class="sd"> chars: ' '</span> +<span class="sd"> keys: ['line']</span> + +<span class="sd">Implementations</span> +<span class="sd">===============</span> +<span class="sd">"""</span> + +<span class="kn">import</span> <span class="nn">re</span> +<span class="kn">from</span> <span class="nn">os.path</span> <span class="kn">import</span> <span class="n">expanduser</span><span class="p">,</span> <span class="n">isabs</span><span class="p">,</span> <span class="n">realpath</span><span class="p">,</span> <span class="n">commonprefix</span> +<span class="kn">from</span> <span class="nn">shlex</span> <span class="kn">import</span> <span class="n">split</span> <span class="k">as</span> <span class="n">shlex_split</span> +<span class="kn">from</span> <span class="nn">subprocess</span> <span class="kn">import</span> <span class="n">Popen</span><span class="p">,</span> <span class="n">PIPE</span> +<span class="kn">from</span> <span class="nn">threading</span> <span class="kn">import</span> <span class="n">Thread</span> + +<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">logger</span> + + +<span class="n">engine_type</span> <span class="o">=</span> <span class="s1">'offline'</span> +<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">command</span> <span class="o">=</span> <span class="p">[]</span> +<span class="n">delimiter</span> <span class="o">=</span> <span class="p">{}</span> +<span class="n">parse_regex</span> <span class="o">=</span> <span class="p">{}</span> +<span class="n">query_type</span> <span class="o">=</span> <span class="s1">''</span> +<span class="n">query_enum</span> <span class="o">=</span> <span class="p">[]</span> +<span class="n">environment_variables</span> <span class="o">=</span> <span class="p">{}</span> +<span class="n">working_dir</span> <span class="o">=</span> <span class="n">realpath</span><span class="p">(</span><span class="s1">'.'</span><span class="p">)</span> +<span class="n">result_separator</span> <span class="o">=</span> <span class="s1">'</span><span class="se">\n</span><span class="s1">'</span> +<span class="n">result_template</span> <span class="o">=</span> <span class="s1">'key-value.html'</span> +<span class="n">timeout</span> <span class="o">=</span> <span class="mf">4.0</span> + +<span class="n">_command_logger</span> <span class="o">=</span> <span class="n">logger</span><span class="o">.</span><span class="n">getChild</span><span class="p">(</span><span class="s1">'command'</span><span class="p">)</span> +<span class="n">_compiled_parse_regex</span> <span class="o">=</span> <span class="p">{}</span> + + +<span class="k">def</span> <span class="nf">init</span><span class="p">(</span><span class="n">engine_settings</span><span class="p">):</span> + <span class="n">check_parsing_options</span><span class="p">(</span><span class="n">engine_settings</span><span class="p">)</span> + + <span class="k">if</span> <span class="s1">'command'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">engine_settings</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'engine command : missing configuration key: command'</span><span class="p">)</span> + + <span class="k">global</span> <span class="n">command</span><span class="p">,</span> <span class="n">working_dir</span><span class="p">,</span> <span class="n">delimiter</span><span class="p">,</span> <span class="n">parse_regex</span><span class="p">,</span> <span class="n">environment_variables</span> <span class="c1"># pylint: disable=global-statement</span> + + <span class="n">command</span> <span class="o">=</span> <span class="n">engine_settings</span><span class="p">[</span><span class="s1">'command'</span><span class="p">]</span> + + <span class="k">if</span> <span class="s1">'working_dir'</span> <span class="ow">in</span> <span class="n">engine_settings</span><span class="p">:</span> + <span class="n">working_dir</span> <span class="o">=</span> <span class="n">engine_settings</span><span class="p">[</span><span class="s1">'working_dir'</span><span class="p">]</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">isabs</span><span class="p">(</span><span class="n">engine_settings</span><span class="p">[</span><span class="s1">'working_dir'</span><span class="p">]):</span> + <span class="n">working_dir</span> <span class="o">=</span> <span class="n">realpath</span><span class="p">(</span><span class="n">working_dir</span><span class="p">)</span> + + <span class="k">if</span> <span class="s1">'parse_regex'</span> <span class="ow">in</span> <span class="n">engine_settings</span><span class="p">:</span> + <span class="n">parse_regex</span> <span class="o">=</span> <span class="n">engine_settings</span><span class="p">[</span><span class="s1">'parse_regex'</span><span class="p">]</span> + <span class="k">for</span> <span class="n">result_key</span><span class="p">,</span> <span class="n">regex</span> <span class="ow">in</span> <span class="n">parse_regex</span><span class="o">.</span><span class="n">items</span><span class="p">():</span> + <span class="n">_compiled_parse_regex</span><span class="p">[</span><span class="n">result_key</span><span class="p">]</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="n">regex</span><span class="p">,</span> <span class="n">flags</span><span class="o">=</span><span class="n">re</span><span class="o">.</span><span class="n">MULTILINE</span><span class="p">)</span> + <span class="k">if</span> <span class="s1">'delimiter'</span> <span class="ow">in</span> <span class="n">engine_settings</span><span class="p">:</span> + <span class="n">delimiter</span> <span class="o">=</span> <span class="n">engine_settings</span><span class="p">[</span><span class="s1">'delimiter'</span><span class="p">]</span> + + <span class="k">if</span> <span class="s1">'environment_variables'</span> <span class="ow">in</span> <span class="n">engine_settings</span><span class="p">:</span> + <span class="n">environment_variables</span> <span class="o">=</span> <span class="n">engine_settings</span><span class="p">[</span><span class="s1">'environment_variables'</span><span class="p">]</span> + + +<span class="k">def</span> <span class="nf">search</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> + <span class="n">cmd</span> <span class="o">=</span> <span class="n">_get_command_to_run</span><span class="p">(</span><span class="n">query</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">cmd</span><span class="p">:</span> + <span class="k">return</span> <span class="p">[]</span> + + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">reader_thread</span> <span class="o">=</span> <span class="n">Thread</span><span class="p">(</span><span class="n">target</span><span class="o">=</span><span class="n">_get_results_from_process</span><span class="p">,</span> <span class="n">args</span><span class="o">=</span><span class="p">(</span><span class="n">results</span><span class="p">,</span> <span class="n">cmd</span><span class="p">,</span> <span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]))</span> + <span class="n">reader_thread</span><span class="o">.</span><span class="n">start</span><span class="p">()</span> + <span class="n">reader_thread</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">timeout</span><span class="o">=</span><span class="n">timeout</span><span class="p">)</span> + + <span class="k">return</span> <span class="n">results</span> + + +<span class="k">def</span> <span class="nf">_get_command_to_run</span><span class="p">(</span><span class="n">query</span><span class="p">):</span> + <span class="n">params</span> <span class="o">=</span> <span class="n">shlex_split</span><span class="p">(</span><span class="n">query</span><span class="p">)</span> + <span class="n">__check_query_params</span><span class="p">(</span><span class="n">params</span><span class="p">)</span> + + <span class="n">cmd</span> <span class="o">=</span> <span class="p">[]</span> + <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">command</span><span class="p">:</span> + <span class="k">if</span> <span class="n">c</span> <span class="o">==</span> <span class="s1">'{{QUERY}}'</span><span class="p">:</span> + <span class="n">cmd</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">params</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">cmd</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">c</span><span class="p">)</span> + + <span class="k">return</span> <span class="n">cmd</span> + + +<span class="k">def</span> <span class="nf">_get_results_from_process</span><span class="p">(</span><span class="n">results</span><span class="p">,</span> <span class="n">cmd</span><span class="p">,</span> <span class="n">pageno</span><span class="p">):</span> + <span class="n">leftover</span> <span class="o">=</span> <span class="s1">''</span> + <span class="n">count</span> <span class="o">=</span> <span class="mi">0</span> + <span class="n">start</span><span class="p">,</span> <span class="n">end</span> <span class="o">=</span> <span class="n">__get_results_limits</span><span class="p">(</span><span class="n">pageno</span><span class="p">)</span> + <span class="k">with</span> <span class="n">Popen</span><span class="p">(</span><span class="n">cmd</span><span class="p">,</span> <span class="n">stdout</span><span class="o">=</span><span class="n">PIPE</span><span class="p">,</span> <span class="n">stderr</span><span class="o">=</span><span class="n">PIPE</span><span class="p">,</span> <span class="n">env</span><span class="o">=</span><span class="n">environment_variables</span><span class="p">)</span> <span class="k">as</span> <span class="n">process</span><span class="p">:</span> + <span class="n">line</span> <span class="o">=</span> <span class="n">process</span><span class="o">.</span><span class="n">stdout</span><span class="o">.</span><span class="n">readline</span><span class="p">()</span> + <span class="k">while</span> <span class="n">line</span><span class="p">:</span> + <span class="n">buf</span> <span class="o">=</span> <span class="n">leftover</span> <span class="o">+</span> <span class="n">line</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s1">'utf-8'</span><span class="p">)</span> + <span class="n">raw_results</span> <span class="o">=</span> <span class="n">buf</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">result_separator</span><span class="p">)</span> + <span class="k">if</span> <span class="n">raw_results</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]:</span> + <span class="n">leftover</span> <span class="o">=</span> <span class="n">raw_results</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> + <span class="n">raw_results</span> <span class="o">=</span> <span class="n">raw_results</span><span class="p">[:</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> + + <span class="k">for</span> <span class="n">raw_result</span> <span class="ow">in</span> <span class="n">raw_results</span><span class="p">:</span> + <span class="n">result</span> <span class="o">=</span> <span class="n">__parse_single_result</span><span class="p">(</span><span class="n">raw_result</span><span class="p">)</span> + <span class="k">if</span> <span class="n">result</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">_command_logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'skipped result:'</span><span class="p">,</span> <span class="n">raw_result</span><span class="p">)</span> + <span class="k">continue</span> + + <span class="k">if</span> <span class="n">start</span> <span class="o"><=</span> <span class="n">count</span> <span class="ow">and</span> <span class="n">count</span> <span class="o"><=</span> <span class="n">end</span><span class="p">:</span> <span class="c1"># pylint: disable=chained-comparison</span> + <span class="n">result</span><span class="p">[</span><span class="s1">'template'</span><span class="p">]</span> <span class="o">=</span> <span class="n">result_template</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">result</span><span class="p">)</span> + + <span class="n">count</span> <span class="o">+=</span> <span class="mi">1</span> + <span class="k">if</span> <span class="n">end</span> <span class="o"><</span> <span class="n">count</span><span class="p">:</span> + <span class="k">return</span> <span class="n">results</span> + + <span class="n">line</span> <span class="o">=</span> <span class="n">process</span><span class="o">.</span><span class="n">stdout</span><span class="o">.</span><span class="n">readline</span><span class="p">()</span> + + <span class="n">return_code</span> <span class="o">=</span> <span class="n">process</span><span class="o">.</span><span class="n">wait</span><span class="p">(</span><span class="n">timeout</span><span class="o">=</span><span class="n">timeout</span><span class="p">)</span> + <span class="k">if</span> <span class="n">return_code</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s1">'non-zero return code when running command'</span><span class="p">,</span> <span class="n">cmd</span><span class="p">,</span> <span class="n">return_code</span><span class="p">)</span> + <span class="k">return</span> <span class="kc">None</span> + + +<span class="k">def</span> <span class="nf">__get_results_limits</span><span class="p">(</span><span class="n">pageno</span><span class="p">):</span> + <span class="n">start</span> <span class="o">=</span> <span class="p">(</span><span class="n">pageno</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="mi">10</span> + <span class="n">end</span> <span class="o">=</span> <span class="n">start</span> <span class="o">+</span> <span class="mi">9</span> + <span class="k">return</span> <span class="n">start</span><span class="p">,</span> <span class="n">end</span> + + +<span class="k">def</span> <span class="nf">__check_query_params</span><span class="p">(</span><span class="n">params</span><span class="p">):</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">query_type</span><span class="p">:</span> + <span class="k">return</span> + + <span class="k">if</span> <span class="n">query_type</span> <span class="o">==</span> <span class="s1">'path'</span><span class="p">:</span> + <span class="n">query_path</span> <span class="o">=</span> <span class="n">params</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> + <span class="n">query_path</span> <span class="o">=</span> <span class="n">expanduser</span><span class="p">(</span><span class="n">query_path</span><span class="p">)</span> + <span class="k">if</span> <span class="n">commonprefix</span><span class="p">([</span><span class="n">realpath</span><span class="p">(</span><span class="n">query_path</span><span class="p">),</span> <span class="n">working_dir</span><span class="p">])</span> <span class="o">!=</span> <span class="n">working_dir</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'requested path is outside of configured working directory'</span><span class="p">)</span> + <span class="k">elif</span> <span class="n">query_type</span> <span class="o">==</span> <span class="s1">'enum'</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">query_enum</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span> + <span class="k">for</span> <span class="n">param</span> <span class="ow">in</span> <span class="n">params</span><span class="p">:</span> + <span class="k">if</span> <span class="n">param</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">query_enum</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'submitted query params is not allowed'</span><span class="p">,</span> <span class="n">param</span><span class="p">,</span> <span class="s1">'allowed params:'</span><span class="p">,</span> <span class="n">query_enum</span><span class="p">)</span> + + +<div class="viewcode-block" id="check_parsing_options"> +<a class="viewcode-back" href="../../../dev/engines/offline/command-line-engines.html#searx.engines.command.check_parsing_options">[docs]</a> +<span class="k">def</span> <span class="nf">check_parsing_options</span><span class="p">(</span><span class="n">engine_settings</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Checks if delimiter based parsing or regex parsing is configured correctly"""</span> + + <span class="k">if</span> <span class="s1">'delimiter'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">engine_settings</span> <span class="ow">and</span> <span class="s1">'parse_regex'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">engine_settings</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'failed to init settings for parsing lines: missing delimiter or parse_regex'</span><span class="p">)</span> + <span class="k">if</span> <span class="s1">'delimiter'</span> <span class="ow">in</span> <span class="n">engine_settings</span> <span class="ow">and</span> <span class="s1">'parse_regex'</span> <span class="ow">in</span> <span class="n">engine_settings</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'failed to init settings for parsing lines: too many settings'</span><span class="p">)</span> + + <span class="k">if</span> <span class="s1">'delimiter'</span> <span class="ow">in</span> <span class="n">engine_settings</span><span class="p">:</span> + <span class="k">if</span> <span class="s1">'chars'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">engine_settings</span><span class="p">[</span><span class="s1">'delimiter'</span><span class="p">]</span> <span class="ow">or</span> <span class="s1">'keys'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">engine_settings</span><span class="p">[</span><span class="s1">'delimiter'</span><span class="p">]:</span> + <span class="k">raise</span> <span class="ne">ValueError</span></div> + + + +<span class="k">def</span> <span class="nf">__parse_single_result</span><span class="p">(</span><span class="n">raw_result</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Parses command line output based on configuration"""</span> + + <span class="n">result</span> <span class="o">=</span> <span class="p">{}</span> + + <span class="k">if</span> <span class="n">delimiter</span><span class="p">:</span> + <span class="n">elements</span> <span class="o">=</span> <span class="n">raw_result</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">delimiter</span><span class="p">[</span><span class="s1">'chars'</span><span class="p">],</span> <span class="n">maxsplit</span><span class="o">=</span><span class="nb">len</span><span class="p">(</span><span class="n">delimiter</span><span class="p">[</span><span class="s1">'keys'</span><span class="p">])</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">elements</span><span class="p">)</span> <span class="o">!=</span> <span class="nb">len</span><span class="p">(</span><span class="n">delimiter</span><span class="p">[</span><span class="s1">'keys'</span><span class="p">]):</span> + <span class="k">return</span> <span class="p">{}</span> + <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">elements</span><span class="p">)):</span> <span class="c1"># pylint: disable=consider-using-enumerate</span> + <span class="n">result</span><span class="p">[</span><span class="n">delimiter</span><span class="p">[</span><span class="s1">'keys'</span><span class="p">][</span><span class="n">i</span><span class="p">]]</span> <span class="o">=</span> <span class="n">elements</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> + + <span class="k">if</span> <span class="n">parse_regex</span><span class="p">:</span> + <span class="k">for</span> <span class="n">result_key</span><span class="p">,</span> <span class="n">regex</span> <span class="ow">in</span> <span class="n">_compiled_parse_regex</span><span class="o">.</span><span class="n">items</span><span class="p">():</span> + <span class="n">found</span> <span class="o">=</span> <span class="n">regex</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="n">raw_result</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">found</span><span class="p">:</span> + <span class="k">return</span> <span class="p">{}</span> + <span class="n">result</span><span class="p">[</span><span class="n">result_key</span><span class="p">]</span> <span class="o">=</span> <span class="n">raw_result</span><span class="p">[</span><span class="n">found</span><span class="o">.</span><span class="n">start</span><span class="p">()</span> <span class="p">:</span> <span class="n">found</span><span class="o">.</span><span class="n">end</span><span class="p">()]</span> + + <span class="k">return</span> <span class="n">result</span> +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/dailymotion.html b/_modules/searx/engines/dailymotion.html new file mode 100644 index 000000000..fd568fd04 --- /dev/null +++ b/_modules/searx/engines/dailymotion.html @@ -0,0 +1,362 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.dailymotion — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.dailymotion</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.dailymotion</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""</span> +<span class="sd">Dailymotion (Videos)</span> +<span class="sd">~~~~~~~~~~~~~~~~~~~~</span> + +<span class="sd">.. _REST GET: https://developers.dailymotion.com/tools/</span> +<span class="sd">.. _Global API Parameters: https://developers.dailymotion.com/api/#global-parameters</span> +<span class="sd">.. _Video filters API: https://developers.dailymotion.com/api/#video-filters</span> +<span class="sd">.. _Fields selection: https://developers.dailymotion.com/api/#fields-selection</span> + +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span> + +<span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span><span class="p">,</span> <span class="n">timedelta</span> +<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span> +<span class="kn">import</span> <span class="nn">time</span> +<span class="kn">import</span> <span class="nn">babel</span> + +<span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">get</span><span class="p">,</span> <span class="n">raise_for_httperror</span> <span class="c1"># see https://github.com/searxng/searxng/issues/762</span> +<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">html_to_text</span> +<span class="kn">from</span> <span class="nn">searx.exceptions</span> <span class="kn">import</span> <span class="n">SearxEngineAPIException</span> +<span class="kn">from</span> <span class="nn">searx.locales</span> <span class="kn">import</span> <span class="n">region_tag</span><span class="p">,</span> <span class="n">language_tag</span> +<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span> + +<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> + <span class="kn">import</span> <span class="nn">logging</span> + + <span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span> + +<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span> + +<span class="c1"># about</span> +<span class="n">about</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://www.dailymotion.com'</span><span class="p">,</span> + <span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q769222'</span><span class="p">,</span> + <span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://www.dailymotion.com/developer'</span><span class="p">,</span> + <span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> + <span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"results"</span><span class="p">:</span> <span class="s1">'JSON'</span><span class="p">,</span> +<span class="p">}</span> + +<span class="c1"># engine dependent config</span> +<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'videos'</span><span class="p">]</span> +<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">number_of_results</span> <span class="o">=</span> <span class="mi">10</span> + +<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">time_delta_dict</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"day"</span><span class="p">:</span> <span class="n">timedelta</span><span class="p">(</span><span class="n">days</span><span class="o">=</span><span class="mi">1</span><span class="p">),</span> + <span class="s2">"week"</span><span class="p">:</span> <span class="n">timedelta</span><span class="p">(</span><span class="n">days</span><span class="o">=</span><span class="mi">7</span><span class="p">),</span> + <span class="s2">"month"</span><span class="p">:</span> <span class="n">timedelta</span><span class="p">(</span><span class="n">days</span><span class="o">=</span><span class="mi">31</span><span class="p">),</span> + <span class="s2">"year"</span><span class="p">:</span> <span class="n">timedelta</span><span class="p">(</span><span class="n">days</span><span class="o">=</span><span class="mi">365</span><span class="p">),</span> +<span class="p">}</span> + +<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">safesearch_params</span> <span class="o">=</span> <span class="p">{</span> + <span class="mi">2</span><span class="p">:</span> <span class="p">{</span><span class="s1">'is_created_for_kids'</span><span class="p">:</span> <span class="s1">'true'</span><span class="p">},</span> + <span class="mi">1</span><span class="p">:</span> <span class="p">{</span><span class="s1">'is_created_for_kids'</span><span class="p">:</span> <span class="s1">'true'</span><span class="p">},</span> + <span class="mi">0</span><span class="p">:</span> <span class="p">{},</span> +<span class="p">}</span> +<span class="sd">"""True if this video is "Created for Kids" / intends to target an audience</span> +<span class="sd">under the age of 16 (``is_created_for_kids`` in `Video filters API`_ )</span> +<span class="sd">"""</span> + +<span class="n">family_filter_map</span> <span class="o">=</span> <span class="p">{</span> + <span class="mi">2</span><span class="p">:</span> <span class="s1">'true'</span><span class="p">,</span> + <span class="mi">1</span><span class="p">:</span> <span class="s1">'true'</span><span class="p">,</span> + <span class="mi">0</span><span class="p">:</span> <span class="s1">'false'</span><span class="p">,</span> +<span class="p">}</span> +<span class="sd">"""By default, the family filter is turned on. Setting this parameter to</span> +<span class="sd">``false`` will stop filtering-out explicit content from searches and global</span> +<span class="sd">contexts (``family_filter`` in `Global API Parameters`_ ).</span> +<span class="sd">"""</span> + +<span class="n">result_fields</span> <span class="o">=</span> <span class="p">[</span> + <span class="s1">'allow_embed'</span><span class="p">,</span> + <span class="s1">'description'</span><span class="p">,</span> + <span class="s1">'title'</span><span class="p">,</span> + <span class="s1">'created_time'</span><span class="p">,</span> + <span class="s1">'duration'</span><span class="p">,</span> + <span class="s1">'url'</span><span class="p">,</span> + <span class="s1">'thumbnail_360_url'</span><span class="p">,</span> + <span class="s1">'id'</span><span class="p">,</span> +<span class="p">]</span> +<span class="sd">"""`Fields selection`_, by default, a few fields are returned. To request more</span> +<span class="sd">specific fields, the ``fields`` parameter is used with the list of fields</span> +<span class="sd">SearXNG needs in the response to build a video result list.</span> +<span class="sd">"""</span> + +<span class="n">search_url</span> <span class="o">=</span> <span class="s1">'https://api.dailymotion.com/videos?'</span> +<span class="sd">"""URL to retrieve a list of videos.</span> + +<span class="sd">- `REST GET`_</span> +<span class="sd">- `Global API Parameters`_</span> +<span class="sd">- `Video filters API`_</span> +<span class="sd">"""</span> + +<span class="n">iframe_src</span> <span class="o">=</span> <span class="s2">"https://www.dailymotion.com/embed/video/</span><span class="si">{video_id}</span><span class="s2">"</span> +<span class="sd">"""URL template to embed video in SearXNG's result list."""</span> + + +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">query</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">False</span> + + <span class="n">eng_region</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_region</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="s1">'en_US'</span><span class="p">)</span> <span class="c1"># type: ignore</span> + <span class="n">eng_lang</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="s1">'en'</span><span class="p">)</span> + + <span class="n">args</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'search'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span> + <span class="s1">'family_filter'</span><span class="p">:</span> <span class="n">family_filter_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">],</span> <span class="s1">'false'</span><span class="p">),</span> + <span class="s1">'thumbnail_ratio'</span><span class="p">:</span> <span class="s1">'original'</span><span class="p">,</span> <span class="c1"># original|widescreen|square</span> + <span class="c1"># https://developers.dailymotion.com/api/#video-filters</span> + <span class="s1">'languages'</span><span class="p">:</span> <span class="n">eng_lang</span><span class="p">,</span> + <span class="s1">'page'</span><span class="p">:</span> <span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">],</span> + <span class="s1">'password_protected'</span><span class="p">:</span> <span class="s1">'false'</span><span class="p">,</span> + <span class="s1">'private'</span><span class="p">:</span> <span class="s1">'false'</span><span class="p">,</span> + <span class="s1">'sort'</span><span class="p">:</span> <span class="s1">'relevance'</span><span class="p">,</span> + <span class="s1">'limit'</span><span class="p">:</span> <span class="n">number_of_results</span><span class="p">,</span> + <span class="s1">'fields'</span><span class="p">:</span> <span class="s1">','</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">result_fields</span><span class="p">),</span> + <span class="p">}</span> + + <span class="n">args</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">safesearch_params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">],</span> <span class="p">{}))</span> + + <span class="c1"># Don't add localization and country arguments if the user does select a</span> + <span class="c1"># language (:de, :en, ..)</span> + + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'-'</span><span class="p">))</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span> + <span class="c1"># https://developers.dailymotion.com/api/#global-parameters</span> + <span class="n">args</span><span class="p">[</span><span class="s1">'localization'</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_region</span> + <span class="n">args</span><span class="p">[</span><span class="s1">'country'</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_region</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'_'</span><span class="p">)[</span><span class="mi">1</span><span class="p">]</span> + <span class="c1"># Insufficient rights for the `ams_country' parameter of route `GET /videos'</span> + <span class="c1"># 'ams_country': eng_region.split('_')[1],</span> + + <span class="n">time_delta</span> <span class="o">=</span> <span class="n">time_delta_dict</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s2">"time_range"</span><span class="p">])</span> + <span class="k">if</span> <span class="n">time_delta</span><span class="p">:</span> + <span class="n">created_after</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span> <span class="o">-</span> <span class="n">time_delta</span> + <span class="n">args</span><span class="p">[</span><span class="s1">'created_after'</span><span class="p">]</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">timestamp</span><span class="p">(</span><span class="n">created_after</span><span class="p">)</span> + + <span class="n">query_str</span> <span class="o">=</span> <span class="n">urlencode</span><span class="p">(</span><span class="n">args</span><span class="p">)</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">search_url</span> <span class="o">+</span> <span class="n">query_str</span> + + <span class="k">return</span> <span class="n">params</span> + + +<span class="c1"># get response from search-request</span> +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="n">search_res</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">json</span><span class="p">()</span> + + <span class="c1"># check for an API error</span> + <span class="k">if</span> <span class="s1">'error'</span> <span class="ow">in</span> <span class="n">search_res</span><span class="p">:</span> + <span class="k">raise</span> <span class="n">SearxEngineAPIException</span><span class="p">(</span><span class="n">search_res</span><span class="p">[</span><span class="s1">'error'</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'message'</span><span class="p">))</span> + + <span class="n">raise_for_httperror</span><span class="p">(</span><span class="n">resp</span><span class="p">)</span> + + <span class="c1"># parse results</span> + <span class="k">for</span> <span class="n">res</span> <span class="ow">in</span> <span class="n">search_res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'list'</span><span class="p">,</span> <span class="p">[]):</span> + + <span class="n">title</span> <span class="o">=</span> <span class="n">res</span><span class="p">[</span><span class="s1">'title'</span><span class="p">]</span> + <span class="n">url</span> <span class="o">=</span> <span class="n">res</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> + + <span class="n">content</span> <span class="o">=</span> <span class="n">html_to_text</span><span class="p">(</span><span class="n">res</span><span class="p">[</span><span class="s1">'description'</span><span class="p">])</span> + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">content</span><span class="p">)</span> <span class="o">></span> <span class="mi">300</span><span class="p">:</span> + <span class="n">content</span> <span class="o">=</span> <span class="n">content</span><span class="p">[:</span><span class="mi">300</span><span class="p">]</span> <span class="o">+</span> <span class="s1">'...'</span> + + <span class="n">publishedDate</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">fromtimestamp</span><span class="p">(</span><span class="n">res</span><span class="p">[</span><span class="s1">'created_time'</span><span class="p">],</span> <span class="kc">None</span><span class="p">)</span> + + <span class="n">length</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">gmtime</span><span class="p">(</span><span class="n">res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'duration'</span><span class="p">))</span> + <span class="k">if</span> <span class="n">length</span><span class="o">.</span><span class="n">tm_hour</span><span class="p">:</span> + <span class="n">length</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="s2">"%H:%M:%S"</span><span class="p">,</span> <span class="n">length</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">length</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="s2">"%M:%S"</span><span class="p">,</span> <span class="n">length</span><span class="p">)</span> + + <span class="n">thumbnail</span> <span class="o">=</span> <span class="n">res</span><span class="p">[</span><span class="s1">'thumbnail_360_url'</span><span class="p">]</span> + <span class="n">thumbnail</span> <span class="o">=</span> <span class="n">thumbnail</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">"http://"</span><span class="p">,</span> <span class="s2">"https://"</span><span class="p">)</span> + + <span class="n">item</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'template'</span><span class="p">:</span> <span class="s1">'videos.html'</span><span class="p">,</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span> + <span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span> + <span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">,</span> + <span class="s1">'publishedDate'</span><span class="p">:</span> <span class="n">publishedDate</span><span class="p">,</span> + <span class="s1">'length'</span><span class="p">:</span> <span class="n">length</span><span class="p">,</span> + <span class="s1">'thumbnail'</span><span class="p">:</span> <span class="n">thumbnail</span><span class="p">,</span> + <span class="p">}</span> + + <span class="c1"># HINT: no mater what the value is, without API token videos can't shown</span> + <span class="c1"># embedded</span> + <span class="k">if</span> <span class="n">res</span><span class="p">[</span><span class="s1">'allow_embed'</span><span class="p">]:</span> + <span class="n">item</span><span class="p">[</span><span class="s1">'iframe_src'</span><span class="p">]</span> <span class="o">=</span> <span class="n">iframe_src</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">video_id</span><span class="o">=</span><span class="n">res</span><span class="p">[</span><span class="s1">'id'</span><span class="p">])</span> + + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">item</span><span class="p">)</span> + + <span class="c1"># return results</span> + <span class="k">return</span> <span class="n">results</span> + + +<div class="viewcode-block" id="fetch_traits"> +<a class="viewcode-back" href="../../../dev/engines/online/dailymotion.html#searx.engines.dailymotion.fetch_traits">[docs]</a> +<span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Fetch locales & languages from dailymotion.</span> + +<span class="sd"> Locales fetched from `api/locales <https://api.dailymotion.com/locales>`_.</span> +<span class="sd"> There are duplications in the locale codes returned from Dailymotion which</span> +<span class="sd"> can be ignored::</span> + +<span class="sd"> en_EN --> en_GB, en_US</span> +<span class="sd"> ar_AA --> ar_EG, ar_AE, ar_SA</span> + +<span class="sd"> The language list `api/languages <https://api.dailymotion.com/languages>`_</span> +<span class="sd"> contains over 7000 *languages* codes (see PR1071_). We use only those</span> +<span class="sd"> language codes that are used in the locales.</span> + +<span class="sd"> .. _PR1071: https://github.com/searxng/searxng/pull/1071</span> + +<span class="sd"> """</span> + + <span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="s1">'https://api.dailymotion.com/locales'</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> <span class="c1"># type: ignore</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: response from dailymotion/locales is not OK."</span><span class="p">)</span> + + <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">resp</span><span class="o">.</span><span class="n">json</span><span class="p">()[</span><span class="s1">'list'</span><span class="p">]:</span> <span class="c1"># type: ignore</span> + <span class="n">eng_tag</span> <span class="o">=</span> <span class="n">item</span><span class="p">[</span><span class="s1">'locale'</span><span class="p">]</span> + <span class="k">if</span> <span class="n">eng_tag</span> <span class="ow">in</span> <span class="p">(</span><span class="s1">'en_EN'</span><span class="p">,</span> <span class="s1">'ar_AA'</span><span class="p">):</span> + <span class="k">continue</span> + <span class="k">try</span><span class="p">:</span> + <span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">region_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">eng_tag</span><span class="p">))</span> + <span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: item unknown --> </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="n">item</span><span class="p">)</span> + <span class="k">continue</span> + + <span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_tag</span><span class="p">)</span> + <span class="k">if</span> <span class="n">conflict</span><span class="p">:</span> + <span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">eng_tag</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_tag</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">eng_tag</span><span class="p">))</span> + <span class="k">continue</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_tag</span> + + <span class="n">locale_lang_list</span> <span class="o">=</span> <span class="p">[</span><span class="n">x</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'_'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="o">.</span><span class="n">values</span><span class="p">()]</span> + + <span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="s1">'https://api.dailymotion.com/languages'</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> <span class="c1"># type: ignore</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: response from dailymotion/languages is not OK."</span><span class="p">)</span> + + <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">resp</span><span class="o">.</span><span class="n">json</span><span class="p">()[</span><span class="s1">'list'</span><span class="p">]:</span> <span class="c1"># type: ignore</span> + <span class="n">eng_tag</span> <span class="o">=</span> <span class="n">item</span><span class="p">[</span><span class="s1">'code'</span><span class="p">]</span> + <span class="k">if</span> <span class="n">eng_tag</span> <span class="ow">in</span> <span class="n">locale_lang_list</span><span class="p">:</span> + <span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">language_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">eng_tag</span><span class="p">))</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_tag</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/demo_offline.html b/_modules/searx/engines/demo_offline.html new file mode 100644 index 000000000..ea7887410 --- /dev/null +++ b/_modules/searx/engines/demo_offline.html @@ -0,0 +1,186 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.demo_offline — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.demo_offline</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.demo_offline</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""Within this module we implement a *demo offline engine*. Do not look to</span> +<span class="sd">close to the implementation, its just a simple example. To get in use of this</span> +<span class="sd">*demo* engine add the following entry to your engines list in ``settings.yml``:</span> + +<span class="sd">.. code:: yaml</span> + +<span class="sd"> - name: my offline engine</span> +<span class="sd"> engine: demo_offline</span> +<span class="sd"> shortcut: demo</span> +<span class="sd"> disabled: false</span> + +<span class="sd">"""</span> + +<span class="kn">import</span> <span class="nn">json</span> + +<span class="n">engine_type</span> <span class="o">=</span> <span class="s1">'offline'</span> +<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'general'</span><span class="p">]</span> +<span class="n">disabled</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">timeout</span> <span class="o">=</span> <span class="mf">2.0</span> + +<span class="n">about</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> + <span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> + <span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"results"</span><span class="p">:</span> <span class="s1">'JSON'</span><span class="p">,</span> +<span class="p">}</span> + +<span class="c1"># if there is a need for globals, use a leading underline</span> +<span class="n">_my_offline_engine</span> <span class="o">=</span> <span class="kc">None</span> + + +<div class="viewcode-block" id="init"> +<a class="viewcode-back" href="../../../dev/engines/demo/demo_offline.html#searx.engines.demo_offline.init">[docs]</a> +<span class="k">def</span> <span class="nf">init</span><span class="p">(</span><span class="n">engine_settings</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Initialization of the (offline) engine. The origin of this demo engine is a</span> +<span class="sd"> simple json string which is loaded in this example while the engine is</span> +<span class="sd"> initialized.</span> + +<span class="sd"> """</span> + <span class="k">global</span> <span class="n">_my_offline_engine</span> <span class="c1"># pylint: disable=global-statement</span> + + <span class="n">_my_offline_engine</span> <span class="o">=</span> <span class="p">(</span> + <span class="s1">'[ {"value": "</span><span class="si">%s</span><span class="s1">"}'</span> + <span class="s1">', {"value":"first item"}'</span> + <span class="s1">', {"value":"second item"}'</span> + <span class="s1">', {"value":"third item"}'</span> + <span class="s1">']'</span> <span class="o">%</span> <span class="n">engine_settings</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'name'</span><span class="p">)</span> + <span class="p">)</span></div> + + + +<div class="viewcode-block" id="search"> +<a class="viewcode-back" href="../../../dev/engines/demo/demo_offline.html#searx.engines.demo_offline.search">[docs]</a> +<span class="k">def</span> <span class="nf">search</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">request_params</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Query (offline) engine and return results. Assemble the list of results from</span> +<span class="sd"> your local engine. In this demo engine we ignore the 'query' term, usual</span> +<span class="sd"> you would pass the 'query' term to your local engine to filter out the</span> +<span class="sd"> results.</span> + +<span class="sd"> """</span> + <span class="n">ret_val</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="n">result_list</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">_my_offline_engine</span><span class="p">)</span> + + <span class="k">for</span> <span class="n">row</span> <span class="ow">in</span> <span class="n">result_list</span><span class="p">:</span> + <span class="n">entry</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'query'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span> + <span class="s1">'language'</span><span class="p">:</span> <span class="n">request_params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> + <span class="s1">'value'</span><span class="p">:</span> <span class="n">row</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"value"</span><span class="p">),</span> + <span class="c1"># choose a result template or comment out to use the *default*</span> + <span class="s1">'template'</span><span class="p">:</span> <span class="s1">'key-value.html'</span><span class="p">,</span> + <span class="p">}</span> + <span class="n">ret_val</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">entry</span><span class="p">)</span> + + <span class="k">return</span> <span class="n">ret_val</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/demo_online.html b/_modules/searx/engines/demo_online.html new file mode 100644 index 000000000..d20af1107 --- /dev/null +++ b/_modules/searx/engines/demo_online.html @@ -0,0 +1,215 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.demo_online — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.demo_online</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.demo_online</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""Within this module we implement a *demo online engine*. Do not look to</span> +<span class="sd">close to the implementation, its just a simple example which queries `The Art</span> +<span class="sd">Institute of Chicago <https://www.artic.edu>`_</span> + +<span class="sd">To get in use of this *demo* engine add the following entry to your engines</span> +<span class="sd">list in ``settings.yml``:</span> + +<span class="sd">.. code:: yaml</span> + +<span class="sd"> - name: my online engine</span> +<span class="sd"> engine: demo_online</span> +<span class="sd"> shortcut: demo</span> +<span class="sd"> disabled: false</span> + +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">json</span> <span class="kn">import</span> <span class="n">loads</span> +<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span> + +<span class="n">engine_type</span> <span class="o">=</span> <span class="s1">'online'</span> +<span class="n">send_accept_language_header</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'general'</span><span class="p">]</span> +<span class="n">disabled</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">timeout</span> <span class="o">=</span> <span class="mf">2.0</span> +<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'images'</span><span class="p">]</span> +<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">page_size</span> <span class="o">=</span> <span class="mi">20</span> + +<span class="n">search_api</span> <span class="o">=</span> <span class="s1">'https://api.artic.edu/api/v1/artworks/search?'</span> +<span class="n">image_api</span> <span class="o">=</span> <span class="s1">'https://www.artic.edu/iiif/2/'</span> + +<span class="n">about</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://www.artic.edu'</span><span class="p">,</span> + <span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q239303'</span><span class="p">,</span> + <span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'http://api.artic.edu/docs/'</span><span class="p">,</span> + <span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> + <span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"results"</span><span class="p">:</span> <span class="s1">'JSON'</span><span class="p">,</span> +<span class="p">}</span> + + +<span class="c1"># if there is a need for globals, use a leading underline</span> +<span class="n">_my_online_engine</span> <span class="o">=</span> <span class="kc">None</span> + + +<div class="viewcode-block" id="init"> +<a class="viewcode-back" href="../../../dev/engines/demo/demo_online.html#searx.engines.demo_online.init">[docs]</a> +<span class="k">def</span> <span class="nf">init</span><span class="p">(</span><span class="n">engine_settings</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Initialization of the (online) engine. If no initialization is needed, drop</span> +<span class="sd"> this init function.</span> + +<span class="sd"> """</span> + <span class="k">global</span> <span class="n">_my_online_engine</span> <span class="c1"># pylint: disable=global-statement</span> + <span class="n">_my_online_engine</span> <span class="o">=</span> <span class="n">engine_settings</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'name'</span><span class="p">)</span></div> + + + +<div class="viewcode-block" id="request"> +<a class="viewcode-back" href="../../../dev/engines/demo/demo_online.html#searx.engines.demo_online.request">[docs]</a> +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Build up the ``params`` for the online request. In this example we build a</span> +<span class="sd"> URL to fetch images from `artic.edu <https://artic.edu>`__</span> + +<span class="sd"> """</span> + <span class="n">args</span> <span class="o">=</span> <span class="n">urlencode</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span> + <span class="s1">'page'</span><span class="p">:</span> <span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">],</span> + <span class="s1">'fields'</span><span class="p">:</span> <span class="s1">'id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles'</span><span class="p">,</span> + <span class="s1">'limit'</span><span class="p">:</span> <span class="n">page_size</span><span class="p">,</span> + <span class="p">}</span> + <span class="p">)</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">search_api</span> <span class="o">+</span> <span class="n">args</span> + <span class="k">return</span> <span class="n">params</span></div> + + + +<div class="viewcode-block" id="response"> +<a class="viewcode-back" href="../../../dev/engines/demo/demo_online.html#searx.engines.demo_online.response">[docs]</a> +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Parse out the result items from the response. In this example we parse the</span> +<span class="sd"> response from `api.artic.edu <https://artic.edu>`__ and filter out all</span> +<span class="sd"> images.</span> + +<span class="sd"> """</span> + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">json_data</span> <span class="o">=</span> <span class="n">loads</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> + + <span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">json_data</span><span class="p">[</span><span class="s1">'data'</span><span class="p">]:</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">result</span><span class="p">[</span><span class="s1">'image_id'</span><span class="p">]:</span> + <span class="k">continue</span> + + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="s1">'https://artic.edu/artworks/</span><span class="si">%(id)s</span><span class="s1">'</span> <span class="o">%</span> <span class="n">result</span><span class="p">,</span> + <span class="s1">'title'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'title'</span><span class="p">]</span> <span class="o">+</span> <span class="s2">" (</span><span class="si">%(date_display)s</span><span class="s2">) // </span><span class="si">%(artist_display)s</span><span class="s2">"</span> <span class="o">%</span> <span class="n">result</span><span class="p">,</span> + <span class="s1">'content'</span><span class="p">:</span> <span class="s2">"</span><span class="si">%(medium_display)s</span><span class="s2"> // </span><span class="si">%(dimensions)s</span><span class="s2">"</span> <span class="o">%</span> <span class="n">result</span><span class="p">,</span> + <span class="s1">'author'</span><span class="p">:</span> <span class="s1">', '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">result</span><span class="p">[</span><span class="s1">'artist_titles'</span><span class="p">]),</span> + <span class="s1">'img_src'</span><span class="p">:</span> <span class="n">image_api</span> <span class="o">+</span> <span class="s1">'/</span><span class="si">%(image_id)s</span><span class="s1">/full/843,/0/default.jpg'</span> <span class="o">%</span> <span class="n">result</span><span class="p">,</span> + <span class="s1">'template'</span><span class="p">:</span> <span class="s1">'images.html'</span><span class="p">,</span> + <span class="p">}</span> + <span class="p">)</span> + + <span class="k">return</span> <span class="n">results</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/duckduckgo.html b/_modules/searx/engines/duckduckgo.html new file mode 100644 index 000000000..0b7f711dd --- /dev/null +++ b/_modules/searx/engines/duckduckgo.html @@ -0,0 +1,632 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.duckduckgo — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.duckduckgo</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.duckduckgo</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""</span> +<span class="sd">DuckDuckGo WEB</span> +<span class="sd">~~~~~~~~~~~~~~</span> +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">annotations</span> + +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span> +<span class="kn">import</span> <span class="nn">re</span> +<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span><span class="p">,</span> <span class="n">quote_plus</span> +<span class="kn">import</span> <span class="nn">json</span> +<span class="kn">import</span> <span class="nn">babel</span> +<span class="kn">import</span> <span class="nn">lxml.html</span> + +<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="p">(</span> + <span class="n">locales</span><span class="p">,</span> + <span class="n">redislib</span><span class="p">,</span> + <span class="n">external_bang</span><span class="p">,</span> +<span class="p">)</span> +<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="p">(</span> + <span class="n">eval_xpath</span><span class="p">,</span> + <span class="n">extr</span><span class="p">,</span> + <span class="n">extract_text</span><span class="p">,</span> +<span class="p">)</span> +<span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">get</span> <span class="c1"># see https://github.com/searxng/searxng/issues/762</span> +<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">redisdb</span> +<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span> +<span class="kn">from</span> <span class="nn">searx.exceptions</span> <span class="kn">import</span> <span class="n">SearxEngineCaptchaException</span> + +<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> + <span class="kn">import</span> <span class="nn">logging</span> + + <span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span> + +<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span> + +<span class="n">about</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://lite.duckduckgo.com/lite/'</span><span class="p">,</span> + <span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q12805'</span><span class="p">,</span> + <span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"results"</span><span class="p">:</span> <span class="s1">'HTML'</span><span class="p">,</span> +<span class="p">}</span> + +<span class="n">send_accept_language_header</span> <span class="o">=</span> <span class="kc">True</span> +<span class="sd">"""DuckDuckGo-Lite tries to guess user's preferred language from the HTTP</span> +<span class="sd">``Accept-Language``. Optional the user can select a region filter (but not a</span> +<span class="sd">language).</span> +<span class="sd">"""</span> + +<span class="c1"># engine dependent config</span> +<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'general'</span><span class="p">,</span> <span class="s1">'web'</span><span class="p">]</span> +<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">True</span> <span class="c1"># user can't select but the results are filtered</span> + +<span class="n">url</span> <span class="o">=</span> <span class="s2">"https://html.duckduckgo.com/html"</span> + +<span class="n">time_range_dict</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'day'</span><span class="p">:</span> <span class="s1">'d'</span><span class="p">,</span> <span class="s1">'week'</span><span class="p">:</span> <span class="s1">'w'</span><span class="p">,</span> <span class="s1">'month'</span><span class="p">:</span> <span class="s1">'m'</span><span class="p">,</span> <span class="s1">'year'</span><span class="p">:</span> <span class="s1">'y'</span><span class="p">}</span> +<span class="n">form_data</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'v'</span><span class="p">:</span> <span class="s1">'l'</span><span class="p">,</span> <span class="s1">'api'</span><span class="p">:</span> <span class="s1">'d.js'</span><span class="p">,</span> <span class="s1">'o'</span><span class="p">:</span> <span class="s1">'json'</span><span class="p">}</span> +<span class="n">__CACHE</span> <span class="o">=</span> <span class="p">[]</span> + + +<span class="k">def</span> <span class="nf">_cache_key</span><span class="p">(</span><span class="n">query</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">region</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span> + <span class="k">return</span> <span class="s1">'SearXNG_ddg_web_vqd'</span> <span class="o">+</span> <span class="n">redislib</span><span class="o">.</span><span class="n">secret_hash</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">query</span><span class="si">}</span><span class="s2">//</span><span class="si">{</span><span class="n">region</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> + + +<div class="viewcode-block" id="cache_vqd"> +<a class="viewcode-back" href="../../../dev/engines/online/duckduckgo.html#searx.engines.duckduckgo.cache_vqd">[docs]</a> +<span class="k">def</span> <span class="nf">cache_vqd</span><span class="p">(</span><span class="n">query</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">region</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Caches a ``vqd`` value from a query."""</span> + <span class="n">c</span> <span class="o">=</span> <span class="n">redisdb</span><span class="o">.</span><span class="n">client</span><span class="p">()</span> + <span class="k">if</span> <span class="n">c</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"VALKEY cache vqd value: </span><span class="si">%s</span><span class="s2"> (</span><span class="si">%s</span><span class="s2">)"</span><span class="p">,</span> <span class="n">value</span><span class="p">,</span> <span class="n">region</span><span class="p">)</span> + <span class="n">c</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="n">_cache_key</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">region</span><span class="p">),</span> <span class="n">value</span><span class="p">,</span> <span class="n">ex</span><span class="o">=</span><span class="mi">600</span><span class="p">)</span> + + <span class="k">else</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"MEM cache vqd value: </span><span class="si">%s</span><span class="s2"> (</span><span class="si">%s</span><span class="s2">)"</span><span class="p">,</span> <span class="n">value</span><span class="p">,</span> <span class="n">region</span><span class="p">)</span> + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">__CACHE</span><span class="p">)</span> <span class="o">></span> <span class="mi">100</span><span class="p">:</span> <span class="c1"># cache vqd from last 100 queries</span> + <span class="n">__CACHE</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span> + <span class="n">__CACHE</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">_cache_key</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">region</span><span class="p">),</span> <span class="n">value</span><span class="p">))</span></div> + + + +<div class="viewcode-block" id="get_vqd"> +<a class="viewcode-back" href="../../../dev/engines/online/duckduckgo.html#searx.engines.duckduckgo.get_vqd">[docs]</a> +<span class="k">def</span> <span class="nf">get_vqd</span><span class="p">(</span><span class="n">query</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">region</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">force_request</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Returns the ``vqd`` that fits to the *query*.</span> + +<span class="sd"> :param query: The query term</span> +<span class="sd"> :param region: DDG's region code</span> +<span class="sd"> :param force_request: force a request to get a vqd value from DDG</span> + +<span class="sd"> TL;DR; the ``vqd`` value is needed to pass DDG's bot protection and is used</span> +<span class="sd"> by all request to DDG:</span> + +<span class="sd"> - DuckDuckGo Lite: ``https://lite.duckduckgo.com/lite`` (POST form data)</span> +<span class="sd"> - DuckDuckGo Web: ``https://links.duckduckgo.com/d.js?q=...&vqd=...``</span> +<span class="sd"> - DuckDuckGo Images: ``https://duckduckgo.com/i.js??q=...&vqd=...``</span> +<span class="sd"> - DuckDuckGo Videos: ``https://duckduckgo.com/v.js??q=...&vqd=...``</span> +<span class="sd"> - DuckDuckGo News: ``https://duckduckgo.com/news.js??q=...&vqd=...``</span> + +<span class="sd"> DDG's bot detection is sensitive to the ``vqd`` value. For some search terms</span> +<span class="sd"> (such as extremely long search terms that are often sent by bots), no ``vqd``</span> +<span class="sd"> value can be determined.</span> + +<span class="sd"> If SearXNG cannot determine a ``vqd`` value, then no request should go out</span> +<span class="sd"> to DDG.</span> + +<span class="sd"> .. attention::</span> + +<span class="sd"> A request with a wrong ``vqd`` value leads to DDG temporarily putting</span> +<span class="sd"> SearXNG's IP on a block list.</span> + +<span class="sd"> Requests from IPs in this block list run into timeouts. Not sure, but it</span> +<span class="sd"> seems the block list is a sliding window: to get my IP rid from the bot list</span> +<span class="sd"> I had to cool down my IP for 1h (send no requests from that IP to DDG).</span> +<span class="sd"> """</span> + <span class="n">key</span> <span class="o">=</span> <span class="n">_cache_key</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">region</span><span class="p">)</span> + + <span class="n">c</span> <span class="o">=</span> <span class="n">redisdb</span><span class="o">.</span><span class="n">client</span><span class="p">()</span> + <span class="k">if</span> <span class="n">c</span><span class="p">:</span> + <span class="n">value</span> <span class="o">=</span> <span class="n">c</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">key</span><span class="p">)</span> + <span class="k">if</span> <span class="n">value</span> <span class="ow">or</span> <span class="n">value</span> <span class="o">==</span> <span class="sa">b</span><span class="s1">''</span><span class="p">:</span> + <span class="n">value</span> <span class="o">=</span> <span class="n">value</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s1">'utf-8'</span><span class="p">)</span> <span class="c1"># type: ignore</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"re-use CACHED vqd value: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span> + <span class="k">return</span> <span class="n">value</span> + + <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">__CACHE</span><span class="p">:</span> + <span class="k">if</span> <span class="n">k</span> <span class="o">==</span> <span class="n">key</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"MEM re-use CACHED vqd value: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span> + <span class="k">return</span> <span class="n">value</span> + + <span class="k">if</span> <span class="n">force_request</span><span class="p">:</span> + <span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="sa">f</span><span class="s1">'https://duckduckgo.com/?q=</span><span class="si">{</span><span class="n">quote_plus</span><span class="p">(</span><span class="n">query</span><span class="p">)</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">resp</span><span class="o">.</span><span class="n">status_code</span> <span class="o">==</span> <span class="mi">200</span><span class="p">:</span> <span class="c1"># type: ignore</span> + <span class="n">value</span> <span class="o">=</span> <span class="n">extr</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">,</span> <span class="s1">'vqd="'</span><span class="p">,</span> <span class="s1">'"'</span><span class="p">)</span> <span class="c1"># type: ignore</span> + <span class="k">if</span> <span class="n">value</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"vqd value from DDG request: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span> + <span class="n">cache_vqd</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">region</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span> + <span class="k">return</span> <span class="n">value</span> + + <span class="k">return</span> <span class="kc">None</span></div> + + + +<div class="viewcode-block" id="get_ddg_lang"> +<a class="viewcode-back" href="../../../dev/engines/online/duckduckgo.html#searx.engines.duckduckgo.get_ddg_lang">[docs]</a> +<span class="k">def</span> <span class="nf">get_ddg_lang</span><span class="p">(</span><span class="n">eng_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">,</span> <span class="n">sxng_locale</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="s1">'en_US'</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Get DuckDuckGo's language identifier from SearXNG's locale.</span> + +<span class="sd"> DuckDuckGo defines its languages by region codes (see</span> +<span class="sd"> :py:obj:`fetch_traits`).</span> + +<span class="sd"> To get region and language of a DDG service use:</span> + +<span class="sd"> .. code: python</span> + +<span class="sd"> eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)</span> +<span class="sd"> eng_lang = get_ddg_lang(traits, params['searxng_locale'])</span> + +<span class="sd"> It might confuse, but the ``l`` value of the cookie is what SearXNG calls</span> +<span class="sd"> the *region*:</span> + +<span class="sd"> .. code:: python</span> + +<span class="sd"> # !ddi paris :es-AR --> {'ad': 'es_AR', 'ah': 'ar-es', 'l': 'ar-es'}</span> +<span class="sd"> params['cookies']['ad'] = eng_lang</span> +<span class="sd"> params['cookies']['ah'] = eng_region</span> +<span class="sd"> params['cookies']['l'] = eng_region</span> + +<span class="sd"> .. hint::</span> + +<span class="sd"> `DDG-lite <https://lite.duckduckgo.com/lite>`__ and the *no Javascript*</span> +<span class="sd"> page https://html.duckduckgo.com/html do not offer a language selection</span> +<span class="sd"> to the user, only a region can be selected by the user (``eng_region``</span> +<span class="sd"> from the example above). DDG-lite and *no Javascript* store the selected</span> +<span class="sd"> region in a cookie::</span> + +<span class="sd"> params['cookies']['kl'] = eng_region # 'ar-es'</span> + +<span class="sd"> """</span> + <span class="k">return</span> <span class="n">eng_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'lang_region'</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span> <span class="c1"># type: ignore</span> + <span class="n">sxng_locale</span><span class="p">,</span> <span class="n">eng_traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">sxng_locale</span><span class="p">,</span> <span class="n">default</span><span class="p">)</span> + <span class="p">)</span></div> + + + +<span class="n">ddg_reg_map</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'tw-tzh'</span><span class="p">:</span> <span class="s1">'zh_TW'</span><span class="p">,</span> + <span class="s1">'hk-tzh'</span><span class="p">:</span> <span class="s1">'zh_HK'</span><span class="p">,</span> + <span class="s1">'ct-ca'</span><span class="p">:</span> <span class="s1">'skip'</span><span class="p">,</span> <span class="c1"># ct-ca and es-ca both map to ca_ES</span> + <span class="s1">'es-ca'</span><span class="p">:</span> <span class="s1">'ca_ES'</span><span class="p">,</span> + <span class="s1">'id-en'</span><span class="p">:</span> <span class="s1">'id_ID'</span><span class="p">,</span> + <span class="s1">'no-no'</span><span class="p">:</span> <span class="s1">'nb_NO'</span><span class="p">,</span> + <span class="s1">'jp-jp'</span><span class="p">:</span> <span class="s1">'ja_JP'</span><span class="p">,</span> + <span class="s1">'kr-kr'</span><span class="p">:</span> <span class="s1">'ko_KR'</span><span class="p">,</span> + <span class="s1">'xa-ar'</span><span class="p">:</span> <span class="s1">'ar_SA'</span><span class="p">,</span> + <span class="s1">'sl-sl'</span><span class="p">:</span> <span class="s1">'sl_SI'</span><span class="p">,</span> + <span class="s1">'th-en'</span><span class="p">:</span> <span class="s1">'th_TH'</span><span class="p">,</span> + <span class="s1">'vn-en'</span><span class="p">:</span> <span class="s1">'vi_VN'</span><span class="p">,</span> +<span class="p">}</span> + +<span class="n">ddg_lang_map</span> <span class="o">=</span> <span class="p">{</span> + <span class="c1"># use ar --> ar_EG (Egypt's arabic)</span> + <span class="s2">"ar_DZ"</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span> + <span class="s2">"ar_JO"</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span> + <span class="s2">"ar_SA"</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span> + <span class="c1"># use bn --> bn_BD</span> + <span class="s1">'bn_IN'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span> + <span class="c1"># use de --> de_DE</span> + <span class="s1">'de_CH'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span> + <span class="c1"># use en --> en_US,</span> + <span class="s1">'en_AU'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span> + <span class="s1">'en_CA'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span> + <span class="s1">'en_GB'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span> + <span class="c1"># Esperanto</span> + <span class="s1">'eo_XX'</span><span class="p">:</span> <span class="s1">'eo'</span><span class="p">,</span> + <span class="c1"># use es --> es_ES,</span> + <span class="s1">'es_AR'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span> + <span class="s1">'es_CL'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span> + <span class="s1">'es_CO'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span> + <span class="s1">'es_CR'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span> + <span class="s1">'es_EC'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span> + <span class="s1">'es_MX'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span> + <span class="s1">'es_PE'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span> + <span class="s1">'es_UY'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span> + <span class="s1">'es_VE'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span> + <span class="c1"># use fr --> rf_FR</span> + <span class="s1">'fr_CA'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span> + <span class="s1">'fr_CH'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span> + <span class="s1">'fr_BE'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span> + <span class="c1"># use nl --> nl_NL</span> + <span class="s1">'nl_BE'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span> + <span class="c1"># use pt --> pt_PT</span> + <span class="s1">'pt_BR'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span> + <span class="c1"># skip these languages</span> + <span class="s1">'od_IN'</span><span class="p">:</span> <span class="s1">'skip'</span><span class="p">,</span> + <span class="s1">'io_XX'</span><span class="p">:</span> <span class="s1">'skip'</span><span class="p">,</span> + <span class="s1">'tokipona_XX'</span><span class="p">:</span> <span class="s1">'skip'</span><span class="p">,</span> +<span class="p">}</span> + + +<span class="k">def</span> <span class="nf">quote_ddg_bangs</span><span class="p">(</span><span class="n">query</span><span class="p">):</span> + <span class="c1"># quote ddg bangs</span> + <span class="n">query_parts</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="c1"># for val in re.split(r'(\s+)', query):</span> + <span class="k">for</span> <span class="n">val</span> <span class="ow">in</span> <span class="n">re</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="sa">r</span><span class="s1">'(\s+)'</span><span class="p">,</span> <span class="n">query</span><span class="p">):</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">val</span><span class="o">.</span><span class="n">strip</span><span class="p">():</span> + <span class="k">continue</span> + <span class="k">if</span> <span class="n">val</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'!'</span><span class="p">)</span> <span class="ow">and</span> <span class="n">external_bang</span><span class="o">.</span><span class="n">get_node</span><span class="p">(</span><span class="n">external_bang</span><span class="o">.</span><span class="n">EXTERNAL_BANGS</span><span class="p">,</span> <span class="n">val</span><span class="p">[</span><span class="mi">1</span><span class="p">:]):</span> + <span class="n">val</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">"'</span><span class="si">{</span><span class="n">val</span><span class="si">}</span><span class="s2">'"</span> + <span class="n">query_parts</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">val</span><span class="p">)</span> + <span class="k">return</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">query_parts</span><span class="p">)</span> + + +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> + + <span class="n">query</span> <span class="o">=</span> <span class="n">quote_ddg_bangs</span><span class="p">(</span><span class="n">query</span><span class="p">)</span> + + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">query</span><span class="p">)</span> <span class="o">>=</span> <span class="mi">500</span><span class="p">:</span> + <span class="c1"># DDG does not accept queries with more than 499 chars</span> + <span class="n">params</span><span class="p">[</span><span class="s2">"url"</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span> + <span class="k">return</span> + + <span class="c1"># Advanced search syntax ends in CAPTCHA</span> + <span class="c1"># https://duckduckgo.com/duckduckgo-help-pages/results/syntax/</span> + <span class="n">query</span> <span class="o">=</span> <span class="s2">" "</span><span class="o">.</span><span class="n">join</span><span class="p">(</span> + <span class="p">[</span> + <span class="n">x</span><span class="o">.</span><span class="n">removeprefix</span><span class="p">(</span><span class="s2">"site:"</span><span class="p">)</span><span class="o">.</span><span class="n">removeprefix</span><span class="p">(</span><span class="s2">"intitle:"</span><span class="p">)</span><span class="o">.</span><span class="n">removeprefix</span><span class="p">(</span><span class="s2">"inurl:"</span><span class="p">)</span><span class="o">.</span><span class="n">removeprefix</span><span class="p">(</span><span class="s2">"filetype:"</span><span class="p">)</span> + <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">query</span><span class="o">.</span><span class="n">split</span><span class="p">()</span> + <span class="p">]</span> + <span class="p">)</span> + <span class="n">eng_region</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_region</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="n">traits</span><span class="o">.</span><span class="n">all_locale</span><span class="p">)</span> <span class="c1"># type: ignore</span> + <span class="k">if</span> <span class="n">eng_region</span> <span class="o">==</span> <span class="s2">"wt-wt"</span><span class="p">:</span> + <span class="c1"># https://html.duckduckgo.com/html sets an empty value for "all".</span> + <span class="n">eng_region</span> <span class="o">=</span> <span class="s2">""</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'kl'</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_region</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">][</span><span class="s1">'kl'</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_region</span> + + <span class="c1"># eng_lang = get_ddg_lang(traits, params['searxng_locale'])</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">url</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'method'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'POST'</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'q'</span><span class="p">]</span> <span class="o">=</span> <span class="n">query</span> + + <span class="c1"># The API is not documented, so we do some reverse engineering and emulate</span> + <span class="c1"># what https://html.duckduckgo.com/html does when you press "next Page" link</span> + <span class="c1"># again and again ..</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">][</span><span class="s1">'Content-Type'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'application/x-www-form-urlencoded'</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">][</span><span class="s1">'Sec-Fetch-Dest'</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"document"</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">][</span><span class="s1">'Sec-Fetch-Mode'</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"navigate"</span> <span class="c1"># at least this one is used by ddg's bot detection</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">][</span><span class="s1">'Sec-Fetch-Site'</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"same-origin"</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">][</span><span class="s1">'Sec-Fetch-User'</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"?1"</span> + + <span class="c1"># Form of the initial search page does have empty values in the form</span> + <span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'b'</span><span class="p">]</span> <span class="o">=</span> <span class="s2">""</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'df'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">''</span> + <span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]</span> <span class="ow">in</span> <span class="n">time_range_dict</span><span class="p">:</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'df'</span><span class="p">]</span> <span class="o">=</span> <span class="n">time_range_dict</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]]</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">][</span><span class="s1">'df'</span><span class="p">]</span> <span class="o">=</span> <span class="n">time_range_dict</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]]</span> + + <span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span> + + <span class="c1"># second page does have an offset of 20</span> + <span class="n">offset</span> <span class="o">=</span> <span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="mi">20</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'s'</span><span class="p">]</span> <span class="o">=</span> <span class="n">offset</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'dc'</span><span class="p">]</span> <span class="o">=</span> <span class="n">offset</span> <span class="o">+</span> <span class="mi">1</span> + + <span class="k">elif</span> <span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">></span> <span class="mi">2</span><span class="p">:</span> + + <span class="c1"># third and following pages do have an offset of 20 + n*50</span> + <span class="n">offset</span> <span class="o">=</span> <span class="mi">20</span> <span class="o">+</span> <span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">-</span> <span class="mi">2</span><span class="p">)</span> <span class="o">*</span> <span class="mi">50</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'s'</span><span class="p">]</span> <span class="o">=</span> <span class="n">offset</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'dc'</span><span class="p">]</span> <span class="o">=</span> <span class="n">offset</span> <span class="o">+</span> <span class="mi">1</span> + + <span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span> + + <span class="c1"># initial page does not have these additional data in the input form</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'o'</span><span class="p">]</span> <span class="o">=</span> <span class="n">form_data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'o'</span><span class="p">,</span> <span class="s1">'json'</span><span class="p">)</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'api'</span><span class="p">]</span> <span class="o">=</span> <span class="n">form_data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'api'</span><span class="p">,</span> <span class="s1">'d.js'</span><span class="p">)</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'nextParams'</span><span class="p">]</span> <span class="o">=</span> <span class="n">form_data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'nextParams'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'v'</span><span class="p">]</span> <span class="o">=</span> <span class="n">form_data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'v'</span><span class="p">,</span> <span class="s1">'l'</span><span class="p">)</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">][</span><span class="s1">'Referer'</span><span class="p">]</span> <span class="o">=</span> <span class="n">url</span> + + <span class="n">vqd</span> <span class="o">=</span> <span class="n">get_vqd</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">eng_region</span><span class="p">,</span> <span class="n">force_request</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> + + <span class="c1"># Certain conditions must be met in order to call up one of the</span> + <span class="c1"># following pages ...</span> + + <span class="k">if</span> <span class="n">vqd</span><span class="p">:</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'vqd'</span><span class="p">]</span> <span class="o">=</span> <span class="n">vqd</span> <span class="c1"># follow up pages / requests needs a vqd argument</span> + <span class="k">else</span><span class="p">:</span> + <span class="c1"># Don't try to call follow up pages without a vqd value. DDG</span> + <span class="c1"># recognizes this as a request from a bot. This lowers the</span> + <span class="c1"># reputation of the SearXNG IP and DDG starts to activate CAPTCHAs.</span> + <span class="n">params</span><span class="p">[</span><span class="s2">"url"</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span> + <span class="k">return</span> + + <span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">]</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s2">"zh"</span><span class="p">):</span> + <span class="c1"># Some locales (at least China) do not have a "next page" button and ddg</span> + <span class="c1"># will return a HTTP/2 403 Forbidden for a request of such a page.</span> + <span class="n">params</span><span class="p">[</span><span class="s2">"url"</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span> + <span class="k">return</span> + + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"param data: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">])</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"param cookies: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">])</span> + + +<div class="viewcode-block" id="is_ddg_captcha"> +<a class="viewcode-back" href="../../../dev/engines/online/duckduckgo.html#searx.engines.duckduckgo.is_ddg_captcha">[docs]</a> +<span class="k">def</span> <span class="nf">is_ddg_captcha</span><span class="p">(</span><span class="n">dom</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""In case of CAPTCHA ddg response its own *not a Robot* dialog and is not</span> +<span class="sd"> redirected to a CAPTCHA page."""</span> + + <span class="k">return</span> <span class="nb">bool</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s2">"//form[@id='challenge-form']"</span><span class="p">))</span></div> + + + +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> + + <span class="k">if</span> <span class="n">resp</span><span class="o">.</span><span class="n">status_code</span> <span class="o">==</span> <span class="mi">303</span><span class="p">:</span> + <span class="k">return</span> <span class="p">[]</span> + + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">doc</span> <span class="o">=</span> <span class="n">lxml</span><span class="o">.</span><span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> + + <span class="k">if</span> <span class="n">is_ddg_captcha</span><span class="p">(</span><span class="n">doc</span><span class="p">):</span> + <span class="c1"># set suspend time to zero is OK --> ddg does not block the IP</span> + <span class="k">raise</span> <span class="n">SearxEngineCaptchaException</span><span class="p">(</span><span class="n">suspended_time</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">message</span><span class="o">=</span><span class="sa">f</span><span class="s2">"CAPTCHA (</span><span class="si">{</span><span class="n">resp</span><span class="o">.</span><span class="n">search_params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'kl'</span><span class="p">)</span><span class="si">}</span><span class="s2">)"</span><span class="p">)</span> + + <span class="n">form</span> <span class="o">=</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">doc</span><span class="p">,</span> <span class="s1">'//input[@name="vqd"]/..'</span><span class="p">)</span> + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">form</span><span class="p">):</span> + <span class="c1"># some locales (at least China) does not have a "next page" button</span> + <span class="n">form</span> <span class="o">=</span> <span class="n">form</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> + <span class="n">form_vqd</span> <span class="o">=</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">form</span><span class="p">,</span> <span class="s1">'//input[@name="vqd"]/@value'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span> + + <span class="n">cache_vqd</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">search_params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'q'</span><span class="p">],</span> <span class="n">resp</span><span class="o">.</span><span class="n">search_params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'kl'</span><span class="p">],</span> <span class="n">form_vqd</span><span class="p">)</span> + + <span class="c1"># just select "web-result" and ignore results of class "result--ad result--ad--small"</span> + <span class="k">for</span> <span class="n">div_result</span> <span class="ow">in</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">doc</span><span class="p">,</span> <span class="s1">'//div[@id="links"]/div[contains(@class, "web-result")]'</span><span class="p">):</span> + + <span class="n">item</span> <span class="o">=</span> <span class="p">{}</span> + <span class="n">title</span> <span class="o">=</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">div_result</span><span class="p">,</span> <span class="s1">'.//h2/a'</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">title</span><span class="p">:</span> + <span class="c1"># this is the "No results." item in the result list</span> + <span class="k">continue</span> + <span class="n">item</span><span class="p">[</span><span class="s2">"title"</span><span class="p">]</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">title</span><span class="p">)</span> + <span class="n">item</span><span class="p">[</span><span class="s2">"url"</span><span class="p">]</span> <span class="o">=</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">div_result</span><span class="p">,</span> <span class="s1">'.//h2/a/@href'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span> + <span class="n">item</span><span class="p">[</span><span class="s2">"content"</span><span class="p">]</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">div_result</span><span class="p">,</span> <span class="s1">'.//a[contains(@class, "result__snippet")]'</span><span class="p">)[</span><span class="mi">0</span><span class="p">])</span> + + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">item</span><span class="p">)</span> + + <span class="n">zero_click_info_xpath</span> <span class="o">=</span> <span class="s1">'//div[@id="zero_click_abstract"]'</span> + <span class="n">zero_click</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">doc</span><span class="p">,</span> <span class="n">zero_click_info_xpath</span><span class="p">))</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span> <span class="c1"># type: ignore</span> + + <span class="k">if</span> <span class="n">zero_click</span> <span class="ow">and</span> <span class="p">(</span> + <span class="s2">"Your IP address is"</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">zero_click</span> + <span class="ow">and</span> <span class="s2">"Your user agent:"</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">zero_click</span> + <span class="ow">and</span> <span class="s2">"URL Decoded:"</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">zero_click</span> + <span class="p">):</span> + <span class="n">current_query</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">search_params</span><span class="p">[</span><span class="s2">"data"</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"q"</span><span class="p">)</span> + + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'answer'</span><span class="p">:</span> <span class="n">zero_click</span><span class="p">,</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="s2">"https://duckduckgo.com/?"</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">({</span><span class="s2">"q"</span><span class="p">:</span> <span class="n">current_query</span><span class="p">}),</span> + <span class="p">}</span> + <span class="p">)</span> + + <span class="k">return</span> <span class="n">results</span> + + +<div class="viewcode-block" id="fetch_traits"> +<a class="viewcode-back" href="../../../dev/engines/online/duckduckgo.html#searx.engines.duckduckgo.fetch_traits">[docs]</a> +<span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Fetch languages & regions from DuckDuckGo.</span> + +<span class="sd"> SearXNG's ``all`` locale maps DuckDuckGo's "Alle regions" (``wt-wt``).</span> +<span class="sd"> DuckDuckGo's language "Browsers preferred language" (``wt_WT``) makes no</span> +<span class="sd"> sense in a SearXNG request since SearXNG's ``all`` will not add a</span> +<span class="sd"> ``Accept-Language`` HTTP header. The value in ``engine_traits.all_locale``</span> +<span class="sd"> is ``wt-wt`` (the region).</span> + +<span class="sd"> Beside regions DuckDuckGo also defines its languages by region codes. By</span> +<span class="sd"> example these are the english languages in DuckDuckGo:</span> + +<span class="sd"> - en_US</span> +<span class="sd"> - en_AU</span> +<span class="sd"> - en_CA</span> +<span class="sd"> - en_GB</span> + +<span class="sd"> The function :py:obj:`get_ddg_lang` evaluates DuckDuckGo's language from</span> +<span class="sd"> SearXNG's locale.</span> + +<span class="sd"> """</span> + <span class="c1"># pylint: disable=too-many-branches, too-many-statements, disable=import-outside-toplevel</span> + <span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">js_variable_to_python</span> + + <span class="c1"># fetch regions</span> + + <span class="n">engine_traits</span><span class="o">.</span><span class="n">all_locale</span> <span class="o">=</span> <span class="s1">'wt-wt'</span> + + <span class="c1"># updated from u661.js to u.7669f071a13a7daa57cb / should be updated automatically?</span> + <span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="s1">'https://duckduckgo.com/dist/util/u.7669f071a13a7daa57cb.js'</span><span class="p">)</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> <span class="c1"># type: ignore</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: response from DuckDuckGo is not OK."</span><span class="p">)</span> + + <span class="n">js_code</span> <span class="o">=</span> <span class="n">extr</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">,</span> <span class="s1">'regions:'</span><span class="p">,</span> <span class="s1">',snippetLengths'</span><span class="p">)</span> <span class="c1"># type: ignore</span> + + <span class="n">regions</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">js_code</span><span class="p">)</span> + <span class="k">for</span> <span class="n">eng_tag</span><span class="p">,</span> <span class="n">name</span> <span class="ow">in</span> <span class="n">regions</span><span class="o">.</span><span class="n">items</span><span class="p">():</span> + + <span class="k">if</span> <span class="n">eng_tag</span> <span class="o">==</span> <span class="s1">'wt-wt'</span><span class="p">:</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">all_locale</span> <span class="o">=</span> <span class="s1">'wt-wt'</span> + <span class="k">continue</span> + + <span class="n">region</span> <span class="o">=</span> <span class="n">ddg_reg_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">eng_tag</span><span class="p">)</span> + <span class="k">if</span> <span class="n">region</span> <span class="o">==</span> <span class="s1">'skip'</span><span class="p">:</span> + <span class="k">continue</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">region</span><span class="p">:</span> + <span class="n">eng_territory</span><span class="p">,</span> <span class="n">eng_lang</span> <span class="o">=</span> <span class="n">eng_tag</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'-'</span><span class="p">)</span> + <span class="n">region</span> <span class="o">=</span> <span class="n">eng_lang</span> <span class="o">+</span> <span class="s1">'_'</span> <span class="o">+</span> <span class="n">eng_territory</span><span class="o">.</span><span class="n">upper</span><span class="p">()</span> + + <span class="k">try</span><span class="p">:</span> + <span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">locales</span><span class="o">.</span><span class="n">region_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">region</span><span class="p">))</span> + <span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: </span><span class="si">%s</span><span class="s2"> (</span><span class="si">%s</span><span class="s2">) -> </span><span class="si">%s</span><span class="s2"> is unknown by babel"</span> <span class="o">%</span> <span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">eng_tag</span><span class="p">,</span> <span class="n">region</span><span class="p">))</span> + <span class="k">continue</span> + + <span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_tag</span><span class="p">)</span> + <span class="k">if</span> <span class="n">conflict</span><span class="p">:</span> + <span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">eng_tag</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_tag</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">eng_tag</span><span class="p">))</span> + <span class="k">continue</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_tag</span> + + <span class="c1"># fetch languages</span> + + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'lang_region'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> + + <span class="n">js_code</span> <span class="o">=</span> <span class="n">extr</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">,</span> <span class="s1">'languages:'</span><span class="p">,</span> <span class="s1">',regions'</span><span class="p">)</span> <span class="c1"># type: ignore</span> + + <span class="n">languages</span> <span class="o">=</span> <span class="n">js_variable_to_python</span><span class="p">(</span><span class="n">js_code</span><span class="p">)</span> + <span class="k">for</span> <span class="n">eng_lang</span><span class="p">,</span> <span class="n">name</span> <span class="ow">in</span> <span class="n">languages</span><span class="o">.</span><span class="n">items</span><span class="p">():</span> + + <span class="k">if</span> <span class="n">eng_lang</span> <span class="o">==</span> <span class="s1">'wt_WT'</span><span class="p">:</span> + <span class="k">continue</span> + + <span class="n">babel_tag</span> <span class="o">=</span> <span class="n">ddg_lang_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">eng_lang</span><span class="p">,</span> <span class="n">eng_lang</span><span class="p">)</span> + <span class="k">if</span> <span class="n">babel_tag</span> <span class="o">==</span> <span class="s1">'skip'</span><span class="p">:</span> + <span class="k">continue</span> + + <span class="k">try</span><span class="p">:</span> + + <span class="k">if</span> <span class="n">babel_tag</span> <span class="o">==</span> <span class="s1">'lang_region'</span><span class="p">:</span> + <span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">locales</span><span class="o">.</span><span class="n">region_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">eng_lang</span><span class="p">))</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'lang_region'</span><span class="p">][</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_lang</span> + <span class="k">continue</span> + + <span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">locales</span><span class="o">.</span><span class="n">language_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">babel_tag</span><span class="p">))</span> + + <span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: language </span><span class="si">%s</span><span class="s2"> (</span><span class="si">%s</span><span class="s2">) is unknown by babel"</span> <span class="o">%</span> <span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">eng_lang</span><span class="p">))</span> + <span class="k">continue</span> + + <span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_tag</span><span class="p">)</span> + <span class="k">if</span> <span class="n">conflict</span><span class="p">:</span> + <span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">eng_lang</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_tag</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">eng_lang</span><span class="p">))</span> + <span class="k">continue</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_lang</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/duckduckgo_definitions.html b/_modules/searx/engines/duckduckgo_definitions.html new file mode 100644 index 000000000..cbbd68853 --- /dev/null +++ b/_modules/searx/engines/duckduckgo_definitions.html @@ -0,0 +1,371 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.duckduckgo_definitions — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.duckduckgo_definitions</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.duckduckgo_definitions</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""</span> +<span class="sd">DuckDuckGo Instant Answer API</span> +<span class="sd">~~~~~~~~~~~~~~~~~~~~~~~~~~~~~</span> + +<span class="sd">The `DDG-API <https://duckduckgo.com/api>`__ is no longer documented but from</span> +<span class="sd">reverse engineering we can see that some services (e.g. instant answers) still</span> +<span class="sd">in use from the DDG search engine.</span> + +<span class="sd">As far we can say the *instant answers* API does not support languages, or at</span> +<span class="sd">least we could not find out how language support should work. It seems that</span> +<span class="sd">most of the features are based on English terms.</span> + +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span> + +<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span><span class="p">,</span> <span class="n">urlparse</span><span class="p">,</span> <span class="n">urljoin</span> +<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span> + +<span class="kn">from</span> <span class="nn">searx.data</span> <span class="kn">import</span> <span class="n">WIKIDATA_UNITS</span> +<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">extract_text</span><span class="p">,</span> <span class="n">html_to_text</span><span class="p">,</span> <span class="n">get_string_replaces_function</span> +<span class="kn">from</span> <span class="nn">searx.external_urls</span> <span class="kn">import</span> <span class="n">get_external_url</span><span class="p">,</span> <span class="n">get_earth_coordinates_url</span><span class="p">,</span> <span class="n">area_to_osm_zoom</span> + +<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> + <span class="kn">import</span> <span class="nn">logging</span> + + <span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span> + +<span class="c1"># about</span> +<span class="n">about</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://duckduckgo.com/'</span><span class="p">,</span> + <span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q12805'</span><span class="p">,</span> + <span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://duckduckgo.com/api'</span><span class="p">,</span> + <span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> + <span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"results"</span><span class="p">:</span> <span class="s1">'JSON'</span><span class="p">,</span> +<span class="p">}</span> + +<span class="n">send_accept_language_header</span> <span class="o">=</span> <span class="kc">True</span> + +<span class="n">URL</span> <span class="o">=</span> <span class="s1">'https://api.duckduckgo.com/'</span> <span class="o">+</span> <span class="s1">'?</span><span class="si">{query}</span><span class="s1">&format=json&pretty=0&no_redirect=1&d=1'</span> + +<span class="n">WIKIDATA_PREFIX</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'http://www.wikidata.org/entity/'</span><span class="p">,</span> <span class="s1">'https://www.wikidata.org/entity/'</span><span class="p">]</span> + +<span class="n">replace_http_by_https</span> <span class="o">=</span> <span class="n">get_string_replaces_function</span><span class="p">({</span><span class="s1">'http:'</span><span class="p">:</span> <span class="s1">'https:'</span><span class="p">})</span> + + +<div class="viewcode-block" id="is_broken_text"> +<a class="viewcode-back" href="../../../dev/engines/online/duckduckgo.html#searx.engines.duckduckgo_definitions.is_broken_text">[docs]</a> +<span class="k">def</span> <span class="nf">is_broken_text</span><span class="p">(</span><span class="n">text</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""duckduckgo may return something like ``<a href="xxxx">http://somewhere Related website<a/>``</span> + +<span class="sd"> The href URL is broken, the "Related website" may contains some HTML.</span> + +<span class="sd"> The best solution seems to ignore these results.</span> +<span class="sd"> """</span> + <span class="k">return</span> <span class="n">text</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'http'</span><span class="p">)</span> <span class="ow">and</span> <span class="s1">' '</span> <span class="ow">in</span> <span class="n">text</span></div> + + + +<span class="k">def</span> <span class="nf">result_to_text</span><span class="p">(</span><span class="n">text</span><span class="p">,</span> <span class="n">htmlResult</span><span class="p">):</span> + <span class="c1"># TODO : remove result ending with "Meaning" or "Category" # pylint: disable=fixme</span> + <span class="n">result</span> <span class="o">=</span> <span class="kc">None</span> + <span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">htmlResult</span><span class="p">)</span> + <span class="n">a</span> <span class="o">=</span> <span class="n">dom</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'//a'</span><span class="p">)</span> + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">a</span><span class="p">)</span> <span class="o">>=</span> <span class="mi">1</span><span class="p">:</span> + <span class="n">result</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">a</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">result</span> <span class="o">=</span> <span class="n">text</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">is_broken_text</span><span class="p">(</span><span class="n">result</span><span class="p">):</span> + <span class="k">return</span> <span class="n">result</span> + <span class="k">return</span> <span class="kc">None</span> + + +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">URL</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">query</span><span class="o">=</span><span class="n">urlencode</span><span class="p">({</span><span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">}))</span> + <span class="k">return</span> <span class="n">params</span> + + +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> + <span class="c1"># pylint: disable=too-many-locals, too-many-branches, too-many-statements</span> + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="n">search_res</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">json</span><span class="p">()</span> + + <span class="c1"># search_res.get('Entity') possible values (not exhaustive) :</span> + <span class="c1"># * continent / country / department / location / waterfall</span> + <span class="c1"># * actor / musician / artist</span> + <span class="c1"># * book / performing art / film / television / media franchise / concert tour / playwright</span> + <span class="c1"># * prepared food</span> + <span class="c1"># * website / software / os / programming language / file format / software engineer</span> + <span class="c1"># * company</span> + + <span class="n">content</span> <span class="o">=</span> <span class="s1">''</span> + <span class="n">heading</span> <span class="o">=</span> <span class="n">search_res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Heading'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span> + <span class="n">attributes</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">urls</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">infobox_id</span> <span class="o">=</span> <span class="kc">None</span> + <span class="n">relatedTopics</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="c1"># add answer if there is one</span> + <span class="n">answer</span> <span class="o">=</span> <span class="n">search_res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Answer'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span> + <span class="k">if</span> <span class="n">answer</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'AnswerType="</span><span class="si">%s</span><span class="s1">" Answer="</span><span class="si">%s</span><span class="s1">"'</span><span class="p">,</span> <span class="n">search_res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'AnswerType'</span><span class="p">),</span> <span class="n">answer</span><span class="p">)</span> + <span class="k">if</span> <span class="n">search_res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'AnswerType'</span><span class="p">)</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'calc'</span><span class="p">,</span> <span class="s1">'ip'</span><span class="p">]:</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'answer'</span><span class="p">:</span> <span class="n">html_to_text</span><span class="p">(</span><span class="n">answer</span><span class="p">),</span> <span class="s1">'url'</span><span class="p">:</span> <span class="n">search_res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'AbstractURL'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)})</span> + + <span class="c1"># add infobox</span> + <span class="k">if</span> <span class="s1">'Definition'</span> <span class="ow">in</span> <span class="n">search_res</span><span class="p">:</span> + <span class="n">content</span> <span class="o">=</span> <span class="n">content</span> <span class="o">+</span> <span class="n">search_res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Definition'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span> + + <span class="k">if</span> <span class="s1">'Abstract'</span> <span class="ow">in</span> <span class="n">search_res</span><span class="p">:</span> + <span class="n">content</span> <span class="o">=</span> <span class="n">content</span> <span class="o">+</span> <span class="n">search_res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Abstract'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span> + + <span class="c1"># image</span> + <span class="n">image</span> <span class="o">=</span> <span class="n">search_res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Image'</span><span class="p">)</span> + <span class="n">image</span> <span class="o">=</span> <span class="kc">None</span> <span class="k">if</span> <span class="n">image</span> <span class="o">==</span> <span class="s1">''</span> <span class="k">else</span> <span class="n">image</span> + <span class="k">if</span> <span class="n">image</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">image</span><span class="p">)</span><span class="o">.</span><span class="n">netloc</span> <span class="o">==</span> <span class="s1">''</span><span class="p">:</span> + <span class="n">image</span> <span class="o">=</span> <span class="n">urljoin</span><span class="p">(</span><span class="s1">'https://duckduckgo.com'</span><span class="p">,</span> <span class="n">image</span><span class="p">)</span> + + <span class="c1"># urls</span> + <span class="c1"># Official website, Wikipedia page</span> + <span class="k">for</span> <span class="n">ddg_result</span> <span class="ow">in</span> <span class="n">search_res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Results'</span><span class="p">,</span> <span class="p">[]):</span> + <span class="n">firstURL</span> <span class="o">=</span> <span class="n">ddg_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'FirstURL'</span><span class="p">)</span> + <span class="n">text</span> <span class="o">=</span> <span class="n">ddg_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Text'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">firstURL</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">text</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">urls</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'title'</span><span class="p">:</span> <span class="n">text</span><span class="p">,</span> <span class="s1">'url'</span><span class="p">:</span> <span class="n">firstURL</span><span class="p">})</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'title'</span><span class="p">:</span> <span class="n">heading</span><span class="p">,</span> <span class="s1">'url'</span><span class="p">:</span> <span class="n">firstURL</span><span class="p">})</span> + + <span class="c1"># related topics</span> + <span class="k">for</span> <span class="n">ddg_result</span> <span class="ow">in</span> <span class="n">search_res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'RelatedTopics'</span><span class="p">,</span> <span class="p">[]):</span> + <span class="k">if</span> <span class="s1">'FirstURL'</span> <span class="ow">in</span> <span class="n">ddg_result</span><span class="p">:</span> + <span class="n">firstURL</span> <span class="o">=</span> <span class="n">ddg_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'FirstURL'</span><span class="p">)</span> + <span class="n">text</span> <span class="o">=</span> <span class="n">ddg_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Text'</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">is_broken_text</span><span class="p">(</span><span class="n">text</span><span class="p">):</span> + <span class="n">suggestion</span> <span class="o">=</span> <span class="n">result_to_text</span><span class="p">(</span><span class="n">text</span><span class="p">,</span> <span class="n">ddg_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Result'</span><span class="p">))</span> + <span class="k">if</span> <span class="n">suggestion</span> <span class="o">!=</span> <span class="n">heading</span> <span class="ow">and</span> <span class="n">suggestion</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'suggestion'</span><span class="p">:</span> <span class="n">suggestion</span><span class="p">})</span> + <span class="k">elif</span> <span class="s1">'Topics'</span> <span class="ow">in</span> <span class="n">ddg_result</span><span class="p">:</span> + <span class="n">suggestions</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">relatedTopics</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'name'</span><span class="p">:</span> <span class="n">ddg_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Name'</span><span class="p">,</span> <span class="s1">''</span><span class="p">),</span> <span class="s1">'suggestions'</span><span class="p">:</span> <span class="n">suggestions</span><span class="p">})</span> + <span class="k">for</span> <span class="n">topic_result</span> <span class="ow">in</span> <span class="n">ddg_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Topics'</span><span class="p">,</span> <span class="p">[]):</span> + <span class="n">suggestion</span> <span class="o">=</span> <span class="n">result_to_text</span><span class="p">(</span><span class="n">topic_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Text'</span><span class="p">),</span> <span class="n">topic_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Result'</span><span class="p">))</span> + <span class="k">if</span> <span class="n">suggestion</span> <span class="o">!=</span> <span class="n">heading</span> <span class="ow">and</span> <span class="n">suggestion</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">suggestions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">suggestion</span><span class="p">)</span> + + <span class="c1"># abstract</span> + <span class="n">abstractURL</span> <span class="o">=</span> <span class="n">search_res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'AbstractURL'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span> + <span class="k">if</span> <span class="n">abstractURL</span> <span class="o">!=</span> <span class="s1">''</span><span class="p">:</span> + <span class="c1"># add as result ? problem always in english</span> + <span class="n">infobox_id</span> <span class="o">=</span> <span class="n">abstractURL</span> + <span class="n">urls</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'title'</span><span class="p">:</span> <span class="n">search_res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'AbstractSource'</span><span class="p">),</span> <span class="s1">'url'</span><span class="p">:</span> <span class="n">abstractURL</span><span class="p">,</span> <span class="s1">'official'</span><span class="p">:</span> <span class="kc">True</span><span class="p">})</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'url'</span><span class="p">:</span> <span class="n">abstractURL</span><span class="p">,</span> <span class="s1">'title'</span><span class="p">:</span> <span class="n">heading</span><span class="p">})</span> + + <span class="c1"># definition</span> + <span class="n">definitionURL</span> <span class="o">=</span> <span class="n">search_res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'DefinitionURL'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span> + <span class="k">if</span> <span class="n">definitionURL</span> <span class="o">!=</span> <span class="s1">''</span><span class="p">:</span> + <span class="c1"># add as result ? as answer ? problem always in english</span> + <span class="n">infobox_id</span> <span class="o">=</span> <span class="n">definitionURL</span> + <span class="n">urls</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'title'</span><span class="p">:</span> <span class="n">search_res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'DefinitionSource'</span><span class="p">),</span> <span class="s1">'url'</span><span class="p">:</span> <span class="n">definitionURL</span><span class="p">})</span> + + <span class="c1"># to merge with wikidata's infobox</span> + <span class="k">if</span> <span class="n">infobox_id</span><span class="p">:</span> + <span class="n">infobox_id</span> <span class="o">=</span> <span class="n">replace_http_by_https</span><span class="p">(</span><span class="n">infobox_id</span><span class="p">)</span> + + <span class="c1"># attributes</span> + <span class="c1"># some will be converted to urls</span> + <span class="k">if</span> <span class="s1">'Infobox'</span> <span class="ow">in</span> <span class="n">search_res</span><span class="p">:</span> + <span class="n">infobox</span> <span class="o">=</span> <span class="n">search_res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Infobox'</span><span class="p">)</span> + <span class="k">if</span> <span class="s1">'content'</span> <span class="ow">in</span> <span class="n">infobox</span><span class="p">:</span> + <span class="n">osm_zoom</span> <span class="o">=</span> <span class="mi">17</span> + <span class="n">coordinates</span> <span class="o">=</span> <span class="kc">None</span> + <span class="k">for</span> <span class="n">info</span> <span class="ow">in</span> <span class="n">infobox</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'content'</span><span class="p">):</span> + <span class="n">data_type</span> <span class="o">=</span> <span class="n">info</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'data_type'</span><span class="p">)</span> + <span class="n">data_label</span> <span class="o">=</span> <span class="n">info</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'label'</span><span class="p">)</span> + <span class="n">data_value</span> <span class="o">=</span> <span class="n">info</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'value'</span><span class="p">)</span> + + <span class="c1"># Workaround: ddg may return a double quote</span> + <span class="k">if</span> <span class="n">data_value</span> <span class="o">==</span> <span class="s1">'""'</span><span class="p">:</span> + <span class="k">continue</span> + + <span class="c1"># Is it an external URL ?</span> + <span class="c1"># * imdb_id / facebook_profile / youtube_channel / youtube_video / twitter_profile</span> + <span class="c1"># * instagram_profile / rotten_tomatoes / spotify_artist_id / itunes_artist_id / soundcloud_id</span> + <span class="c1"># * netflix_id</span> + <span class="n">external_url</span> <span class="o">=</span> <span class="n">get_external_url</span><span class="p">(</span><span class="n">data_type</span><span class="p">,</span> <span class="n">data_value</span><span class="p">)</span> + <span class="k">if</span> <span class="n">external_url</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">urls</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'title'</span><span class="p">:</span> <span class="n">data_label</span><span class="p">,</span> <span class="s1">'url'</span><span class="p">:</span> <span class="n">external_url</span><span class="p">})</span> + <span class="k">elif</span> <span class="n">data_type</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'instance'</span><span class="p">,</span> <span class="s1">'wiki_maps_trigger'</span><span class="p">,</span> <span class="s1">'google_play_artist_id'</span><span class="p">]:</span> + <span class="c1"># ignore instance: Wikidata value from "Instance Of" (Qxxxx)</span> + <span class="c1"># ignore wiki_maps_trigger: reference to a javascript</span> + <span class="c1"># ignore google_play_artist_id: service shutdown</span> + <span class="k">pass</span> + <span class="k">elif</span> <span class="n">data_type</span> <span class="o">==</span> <span class="s1">'string'</span> <span class="ow">and</span> <span class="n">data_label</span> <span class="o">==</span> <span class="s1">'Website'</span><span class="p">:</span> + <span class="c1"># There is already an URL for the website</span> + <span class="k">pass</span> + <span class="k">elif</span> <span class="n">data_type</span> <span class="o">==</span> <span class="s1">'area'</span><span class="p">:</span> + <span class="n">attributes</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'label'</span><span class="p">:</span> <span class="n">data_label</span><span class="p">,</span> <span class="s1">'value'</span><span class="p">:</span> <span class="n">area_to_str</span><span class="p">(</span><span class="n">data_value</span><span class="p">),</span> <span class="s1">'entity'</span><span class="p">:</span> <span class="s1">'P2046'</span><span class="p">})</span> + <span class="n">osm_zoom</span> <span class="o">=</span> <span class="n">area_to_osm_zoom</span><span class="p">(</span><span class="n">data_value</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'amount'</span><span class="p">))</span> + <span class="k">elif</span> <span class="n">data_type</span> <span class="o">==</span> <span class="s1">'coordinates'</span><span class="p">:</span> + <span class="k">if</span> <span class="n">data_value</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'globe'</span><span class="p">)</span> <span class="o">==</span> <span class="s1">'http://www.wikidata.org/entity/Q2'</span><span class="p">:</span> + <span class="c1"># coordinate on Earth</span> + <span class="c1"># get the zoom information from the area</span> + <span class="n">coordinates</span> <span class="o">=</span> <span class="n">info</span> + <span class="k">else</span><span class="p">:</span> + <span class="c1"># coordinate NOT on Earth</span> + <span class="n">attributes</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'label'</span><span class="p">:</span> <span class="n">data_label</span><span class="p">,</span> <span class="s1">'value'</span><span class="p">:</span> <span class="n">data_value</span><span class="p">,</span> <span class="s1">'entity'</span><span class="p">:</span> <span class="s1">'P625'</span><span class="p">})</span> + <span class="k">elif</span> <span class="n">data_type</span> <span class="o">==</span> <span class="s1">'string'</span><span class="p">:</span> + <span class="n">attributes</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'label'</span><span class="p">:</span> <span class="n">data_label</span><span class="p">,</span> <span class="s1">'value'</span><span class="p">:</span> <span class="n">data_value</span><span class="p">})</span> + + <span class="k">if</span> <span class="n">coordinates</span><span class="p">:</span> + <span class="n">data_label</span> <span class="o">=</span> <span class="n">coordinates</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'label'</span><span class="p">)</span> + <span class="n">data_value</span> <span class="o">=</span> <span class="n">coordinates</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'value'</span><span class="p">)</span> + <span class="n">latitude</span> <span class="o">=</span> <span class="n">data_value</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'latitude'</span><span class="p">)</span> + <span class="n">longitude</span> <span class="o">=</span> <span class="n">data_value</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'longitude'</span><span class="p">)</span> + <span class="n">url</span> <span class="o">=</span> <span class="n">get_earth_coordinates_url</span><span class="p">(</span><span class="n">latitude</span><span class="p">,</span> <span class="n">longitude</span><span class="p">,</span> <span class="n">osm_zoom</span><span class="p">)</span> + <span class="n">urls</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'title'</span><span class="p">:</span> <span class="s1">'OpenStreetMap'</span><span class="p">,</span> <span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span> <span class="s1">'entity'</span><span class="p">:</span> <span class="s1">'P625'</span><span class="p">})</span> + + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">heading</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span> + <span class="c1"># TODO get infobox.meta.value where .label='article_title' # pylint: disable=fixme</span> + <span class="k">if</span> <span class="n">image</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">attributes</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">urls</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">relatedTopics</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">content</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'url'</span><span class="p">:</span> <span class="n">urls</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="s1">'url'</span><span class="p">],</span> <span class="s1">'title'</span><span class="p">:</span> <span class="n">heading</span><span class="p">,</span> <span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">})</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'infobox'</span><span class="p">:</span> <span class="n">heading</span><span class="p">,</span> + <span class="s1">'id'</span><span class="p">:</span> <span class="n">infobox_id</span><span class="p">,</span> + <span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">,</span> + <span class="s1">'img_src'</span><span class="p">:</span> <span class="n">image</span><span class="p">,</span> + <span class="s1">'attributes'</span><span class="p">:</span> <span class="n">attributes</span><span class="p">,</span> + <span class="s1">'urls'</span><span class="p">:</span> <span class="n">urls</span><span class="p">,</span> + <span class="s1">'relatedTopics'</span><span class="p">:</span> <span class="n">relatedTopics</span><span class="p">,</span> + <span class="p">}</span> + <span class="p">)</span> + + <span class="k">return</span> <span class="n">results</span> + + +<span class="k">def</span> <span class="nf">unit_to_str</span><span class="p">(</span><span class="n">unit</span><span class="p">):</span> + <span class="k">for</span> <span class="n">prefix</span> <span class="ow">in</span> <span class="n">WIKIDATA_PREFIX</span><span class="p">:</span> + <span class="k">if</span> <span class="n">unit</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="n">prefix</span><span class="p">):</span> + <span class="n">wikidata_entity</span> <span class="o">=</span> <span class="n">unit</span><span class="p">[</span><span class="nb">len</span><span class="p">(</span><span class="n">prefix</span><span class="p">)</span> <span class="p">:]</span> + <span class="n">real_unit</span> <span class="o">=</span> <span class="n">WIKIDATA_UNITS</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">wikidata_entity</span><span class="p">)</span> + <span class="k">if</span> <span class="n">real_unit</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">return</span> <span class="n">unit</span> + <span class="k">return</span> <span class="n">real_unit</span><span class="p">[</span><span class="s1">'symbol'</span><span class="p">]</span> + <span class="k">return</span> <span class="n">unit</span> + + +<div class="viewcode-block" id="area_to_str"> +<a class="viewcode-back" href="../../../dev/engines/online/duckduckgo.html#searx.engines.duckduckgo_definitions.area_to_str">[docs]</a> +<span class="k">def</span> <span class="nf">area_to_str</span><span class="p">(</span><span class="n">area</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""parse ``{'unit': 'https://www.wikidata.org/entity/Q712226', 'amount': '+20.99'}``"""</span> + <span class="n">unit</span> <span class="o">=</span> <span class="n">unit_to_str</span><span class="p">(</span><span class="n">area</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'unit'</span><span class="p">))</span> + <span class="k">if</span> <span class="n">unit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">try</span><span class="p">:</span> + <span class="n">amount</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">area</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'amount'</span><span class="p">))</span> + <span class="k">return</span> <span class="s1">'</span><span class="si">{}</span><span class="s1"> </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">amount</span><span class="p">,</span> <span class="n">unit</span><span class="p">)</span> + <span class="k">except</span> <span class="ne">ValueError</span><span class="p">:</span> + <span class="k">pass</span> + <span class="k">return</span> <span class="s1">'</span><span class="si">{}</span><span class="s1"> </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">area</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'amount'</span><span class="p">,</span> <span class="s1">''</span><span class="p">),</span> <span class="n">area</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'unit'</span><span class="p">,</span> <span class="s1">''</span><span class="p">))</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/google.html b/_modules/searx/engines/google.html new file mode 100644 index 000000000..58c3cb740 --- /dev/null +++ b/_modules/searx/engines/google.html @@ -0,0 +1,618 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.google — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.google</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.google</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""This is the implementation of the Google WEB engine. Some of this</span> +<span class="sd">implementations (manly the :py:obj:`get_google_info`) are shared by other</span> +<span class="sd">engines:</span> + +<span class="sd">- :ref:`google images engine`</span> +<span class="sd">- :ref:`google news engine`</span> +<span class="sd">- :ref:`google videos engine`</span> +<span class="sd">- :ref:`google scholar engine`</span> +<span class="sd">- :ref:`google autocomplete`</span> + +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span> + +<span class="kn">import</span> <span class="nn">re</span> +<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span> +<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span> +<span class="kn">import</span> <span class="nn">babel</span> +<span class="kn">import</span> <span class="nn">babel.core</span> +<span class="kn">import</span> <span class="nn">babel.languages</span> + +<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">extract_text</span><span class="p">,</span> <span class="n">eval_xpath</span><span class="p">,</span> <span class="n">eval_xpath_list</span><span class="p">,</span> <span class="n">eval_xpath_getindex</span> +<span class="kn">from</span> <span class="nn">searx.locales</span> <span class="kn">import</span> <span class="n">language_tag</span><span class="p">,</span> <span class="n">region_tag</span><span class="p">,</span> <span class="n">get_official_locales</span> +<span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">get</span> <span class="c1"># see https://github.com/searxng/searxng/issues/762</span> +<span class="kn">from</span> <span class="nn">searx.exceptions</span> <span class="kn">import</span> <span class="n">SearxEngineCaptchaException</span> +<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span> + +<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> + <span class="kn">import</span> <span class="nn">logging</span> + + <span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span> + +<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span> + + +<span class="c1"># about</span> +<span class="n">about</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://www.google.com'</span><span class="p">,</span> + <span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q9366'</span><span class="p">,</span> + <span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://developers.google.com/custom-search/'</span><span class="p">,</span> + <span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"results"</span><span class="p">:</span> <span class="s1">'HTML'</span><span class="p">,</span> +<span class="p">}</span> + +<span class="c1"># engine dependent config</span> +<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'general'</span><span class="p">,</span> <span class="s1">'web'</span><span class="p">]</span> +<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">max_page</span> <span class="o">=</span> <span class="mi">50</span> +<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">True</span> + +<span class="n">time_range_dict</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'day'</span><span class="p">:</span> <span class="s1">'d'</span><span class="p">,</span> <span class="s1">'week'</span><span class="p">:</span> <span class="s1">'w'</span><span class="p">,</span> <span class="s1">'month'</span><span class="p">:</span> <span class="s1">'m'</span><span class="p">,</span> <span class="s1">'year'</span><span class="p">:</span> <span class="s1">'y'</span><span class="p">}</span> + +<span class="c1"># Filter results. 0: None, 1: Moderate, 2: Strict</span> +<span class="n">filter_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="mi">0</span><span class="p">:</span> <span class="s1">'off'</span><span class="p">,</span> <span class="mi">1</span><span class="p">:</span> <span class="s1">'medium'</span><span class="p">,</span> <span class="mi">2</span><span class="p">:</span> <span class="s1">'high'</span><span class="p">}</span> + +<span class="c1"># specific xpath variables</span> +<span class="c1"># ------------------------</span> + +<span class="c1"># Suggestions are links placed in a *card-section*, we extract only the text</span> +<span class="c1"># from the links not the links itself.</span> +<span class="n">suggestion_xpath</span> <span class="o">=</span> <span class="s1">'//div[contains(@class, "EIaa9b")]//a'</span> + +<span class="c1"># UI_ASYNC = 'use_ac:true,_fmt:html' # returns a HTTP 500 when user search for</span> +<span class="c1"># # celebrities like '!google natasha allegri'</span> +<span class="c1"># # or '!google chris evans'</span> +<span class="n">UI_ASYNC</span> <span class="o">=</span> <span class="s1">'use_ac:true,_fmt:prog'</span> +<span class="sd">"""Format of the response from UI's async request."""</span> + + +<div class="viewcode-block" id="get_google_info"> +<a class="viewcode-back" href="../../../dev/engines/online/google.html#searx.engines.google.get_google_info">[docs]</a> +<span class="k">def</span> <span class="nf">get_google_info</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">eng_traits</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Composing various (language) properties for the google engines (:ref:`google</span> +<span class="sd"> API`).</span> + +<span class="sd"> This function is called by the various google engines (:ref:`google web</span> +<span class="sd"> engine`, :ref:`google images engine`, :ref:`google news engine` and</span> +<span class="sd"> :ref:`google videos engine`).</span> + +<span class="sd"> :param dict param: Request parameters of the engine. At least</span> +<span class="sd"> a ``searxng_locale`` key should be in the dictionary.</span> + +<span class="sd"> :param eng_traits: Engine's traits fetched from google preferences</span> +<span class="sd"> (:py:obj:`searx.enginelib.traits.EngineTraits`)</span> + +<span class="sd"> :rtype: dict</span> +<span class="sd"> :returns:</span> +<span class="sd"> Py-Dictionary with the key/value pairs:</span> + +<span class="sd"> language:</span> +<span class="sd"> The language code that is used by google (e.g. ``lang_en`` or</span> +<span class="sd"> ``lang_zh-TW``)</span> + +<span class="sd"> country:</span> +<span class="sd"> The country code that is used by google (e.g. ``US`` or ``TW``)</span> + +<span class="sd"> locale:</span> +<span class="sd"> A instance of :py:obj:`babel.core.Locale` build from the</span> +<span class="sd"> ``searxng_locale`` value.</span> + +<span class="sd"> subdomain:</span> +<span class="sd"> Google subdomain :py:obj:`google_domains` that fits to the country</span> +<span class="sd"> code.</span> + +<span class="sd"> params:</span> +<span class="sd"> Py-Dictionary with additional request arguments (can be passed to</span> +<span class="sd"> :py:func:`urllib.parse.urlencode`).</span> + +<span class="sd"> - ``hl`` parameter: specifies the interface language of user interface.</span> +<span class="sd"> - ``lr`` parameter: restricts search results to documents written in</span> +<span class="sd"> a particular language.</span> +<span class="sd"> - ``cr`` parameter: restricts search results to documents</span> +<span class="sd"> originating in a particular country.</span> +<span class="sd"> - ``ie`` parameter: sets the character encoding scheme that should</span> +<span class="sd"> be used to interpret the query string ('utf8').</span> +<span class="sd"> - ``oe`` parameter: sets the character encoding scheme that should</span> +<span class="sd"> be used to decode the XML result ('utf8').</span> + +<span class="sd"> headers:</span> +<span class="sd"> Py-Dictionary with additional HTTP headers (can be passed to</span> +<span class="sd"> request's headers)</span> + +<span class="sd"> - ``Accept: '*/*``</span> + +<span class="sd"> """</span> + + <span class="n">ret_val</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'language'</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> + <span class="s1">'country'</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> + <span class="s1">'subdomain'</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> + <span class="s1">'params'</span><span class="p">:</span> <span class="p">{},</span> + <span class="s1">'headers'</span><span class="p">:</span> <span class="p">{},</span> + <span class="s1">'cookies'</span><span class="p">:</span> <span class="p">{},</span> + <span class="s1">'locale'</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> + <span class="p">}</span> + + <span class="n">sxng_locale</span> <span class="o">=</span> <span class="n">params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'searxng_locale'</span><span class="p">,</span> <span class="s1">'all'</span><span class="p">)</span> + <span class="k">try</span><span class="p">:</span> + <span class="n">locale</span> <span class="o">=</span> <span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">sxng_locale</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">'-'</span><span class="p">)</span> + <span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">core</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span> + <span class="n">locale</span> <span class="o">=</span> <span class="kc">None</span> + + <span class="n">eng_lang</span> <span class="o">=</span> <span class="n">eng_traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">sxng_locale</span><span class="p">,</span> <span class="s1">'lang_en'</span><span class="p">)</span> + <span class="n">lang_code</span> <span class="o">=</span> <span class="n">eng_lang</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'_'</span><span class="p">)[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="c1"># lang_zh-TW --> zh-TW / lang_en --> en</span> + <span class="n">country</span> <span class="o">=</span> <span class="n">eng_traits</span><span class="o">.</span><span class="n">get_region</span><span class="p">(</span><span class="n">sxng_locale</span><span class="p">,</span> <span class="n">eng_traits</span><span class="o">.</span><span class="n">all_locale</span><span class="p">)</span> + + <span class="c1"># Test zh_hans & zh_hant --> in the topmost links in the result list of list</span> + <span class="c1"># TW and HK you should a find wiktionary.org zh_hant link. In the result</span> + <span class="c1"># list of zh-CN should not be no hant link instead you should find</span> + <span class="c1"># zh.m.wikipedia.org/zh somewhere in the top.</span> + + <span class="c1"># '!go 日 :zh-TW' --> https://zh.m.wiktionary.org/zh-hant/%E6%97%A5</span> + <span class="c1"># '!go 日 :zh-CN' --> https://zh.m.wikipedia.org/zh/%E6%97%A5</span> + + <span class="n">ret_val</span><span class="p">[</span><span class="s1">'language'</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_lang</span> + <span class="n">ret_val</span><span class="p">[</span><span class="s1">'country'</span><span class="p">]</span> <span class="o">=</span> <span class="n">country</span> + <span class="n">ret_val</span><span class="p">[</span><span class="s1">'locale'</span><span class="p">]</span> <span class="o">=</span> <span class="n">locale</span> + <span class="n">ret_val</span><span class="p">[</span><span class="s1">'subdomain'</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'supported_domains'</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">country</span><span class="o">.</span><span class="n">upper</span><span class="p">(),</span> <span class="s1">'www.google.com'</span><span class="p">)</span> + + <span class="c1"># hl parameter:</span> + <span class="c1"># The hl parameter specifies the interface language (host language) of</span> + <span class="c1"># your user interface. To improve the performance and the quality of your</span> + <span class="c1"># search results, you are strongly encouraged to set this parameter</span> + <span class="c1"># explicitly.</span> + <span class="c1"># https://developers.google.com/custom-search/docs/xml_results#hlsp</span> + <span class="c1"># The Interface Language:</span> + <span class="c1"># https://developers.google.com/custom-search/docs/xml_results_appendices#interfaceLanguages</span> + + <span class="c1"># https://github.com/searxng/searxng/issues/2515#issuecomment-1607150817</span> + <span class="n">ret_val</span><span class="p">[</span><span class="s1">'params'</span><span class="p">][</span><span class="s1">'hl'</span><span class="p">]</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">lang_code</span><span class="si">}</span><span class="s1">-</span><span class="si">{</span><span class="n">country</span><span class="si">}</span><span class="s1">'</span> + + <span class="c1"># lr parameter:</span> + <span class="c1"># The lr (language restrict) parameter restricts search results to</span> + <span class="c1"># documents written in a particular language.</span> + <span class="c1"># https://developers.google.com/custom-search/docs/xml_results#lrsp</span> + <span class="c1"># Language Collection Values:</span> + <span class="c1"># https://developers.google.com/custom-search/docs/xml_results_appendices#languageCollections</span> + <span class="c1">#</span> + <span class="c1"># To select 'all' languages an empty 'lr' value is used.</span> + <span class="c1">#</span> + <span class="c1"># Different to other google services, Google Scholar supports to select more</span> + <span class="c1"># than one language. The languages are separated by a pipe '|' (logical OR).</span> + <span class="c1"># By example: &lr=lang_zh-TW%7Clang_de selects articles written in</span> + <span class="c1"># traditional chinese OR german language.</span> + + <span class="n">ret_val</span><span class="p">[</span><span class="s1">'params'</span><span class="p">][</span><span class="s1">'lr'</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_lang</span> + <span class="k">if</span> <span class="n">sxng_locale</span> <span class="o">==</span> <span class="s1">'all'</span><span class="p">:</span> + <span class="n">ret_val</span><span class="p">[</span><span class="s1">'params'</span><span class="p">][</span><span class="s1">'lr'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">''</span> + + <span class="c1"># cr parameter:</span> + <span class="c1"># The cr parameter restricts search results to documents originating in a</span> + <span class="c1"># particular country.</span> + <span class="c1"># https://developers.google.com/custom-search/docs/xml_results#crsp</span> + + <span class="c1"># specify a region (country) only if a region is given in the selected</span> + <span class="c1"># locale --> https://github.com/searxng/searxng/issues/2672</span> + <span class="n">ret_val</span><span class="p">[</span><span class="s1">'params'</span><span class="p">][</span><span class="s1">'cr'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">''</span> + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">sxng_locale</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'-'</span><span class="p">))</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span> + <span class="n">ret_val</span><span class="p">[</span><span class="s1">'params'</span><span class="p">][</span><span class="s1">'cr'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'country'</span> <span class="o">+</span> <span class="n">country</span> + + <span class="c1"># gl parameter: (mandatory by Google News)</span> + <span class="c1"># The gl parameter value is a two-letter country code. For WebSearch</span> + <span class="c1"># results, the gl parameter boosts search results whose country of origin</span> + <span class="c1"># matches the parameter value. See the Country Codes section for a list of</span> + <span class="c1"># valid values.</span> + <span class="c1"># Specifying a gl parameter value in WebSearch requests should improve the</span> + <span class="c1"># relevance of results. This is particularly true for international</span> + <span class="c1"># customers and, even more specifically, for customers in English-speaking</span> + <span class="c1"># countries other than the United States.</span> + <span class="c1"># https://developers.google.com/custom-search/docs/xml_results#glsp</span> + + <span class="c1"># https://github.com/searxng/searxng/issues/2515#issuecomment-1606294635</span> + <span class="c1"># ret_val['params']['gl'] = country</span> + + <span class="c1"># ie parameter:</span> + <span class="c1"># The ie parameter sets the character encoding scheme that should be used</span> + <span class="c1"># to interpret the query string. The default ie value is latin1.</span> + <span class="c1"># https://developers.google.com/custom-search/docs/xml_results#iesp</span> + + <span class="n">ret_val</span><span class="p">[</span><span class="s1">'params'</span><span class="p">][</span><span class="s1">'ie'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'utf8'</span> + + <span class="c1"># oe parameter:</span> + <span class="c1"># The oe parameter sets the character encoding scheme that should be used</span> + <span class="c1"># to decode the XML result. The default oe value is latin1.</span> + <span class="c1"># https://developers.google.com/custom-search/docs/xml_results#oesp</span> + + <span class="n">ret_val</span><span class="p">[</span><span class="s1">'params'</span><span class="p">][</span><span class="s1">'oe'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'utf8'</span> + + <span class="c1"># num parameter:</span> + <span class="c1"># The num parameter identifies the number of search results to return.</span> + <span class="c1"># The default num value is 10, and the maximum value is 20. If you request</span> + <span class="c1"># more than 20 results, only 20 results will be returned.</span> + <span class="c1"># https://developers.google.com/custom-search/docs/xml_results#numsp</span> + + <span class="c1"># HINT: seems to have no effect (tested in google WEB & Images)</span> + <span class="c1"># ret_val['params']['num'] = 20</span> + + <span class="c1"># HTTP headers</span> + + <span class="n">ret_val</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">][</span><span class="s1">'Accept'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'*/*'</span> + + <span class="c1"># Cookies</span> + + <span class="c1"># - https://github.com/searxng/searxng/pull/1679#issuecomment-1235432746</span> + <span class="c1"># - https://github.com/searxng/searxng/issues/1555</span> + <span class="n">ret_val</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">][</span><span class="s1">'CONSENT'</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"YES+"</span> + + <span class="k">return</span> <span class="n">ret_val</span></div> + + + +<span class="k">def</span> <span class="nf">detect_google_sorry</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> + <span class="k">if</span> <span class="n">resp</span><span class="o">.</span><span class="n">url</span><span class="o">.</span><span class="n">host</span> <span class="o">==</span> <span class="s1">'sorry.google.com'</span> <span class="ow">or</span> <span class="n">resp</span><span class="o">.</span><span class="n">url</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'/sorry'</span><span class="p">):</span> + <span class="k">raise</span> <span class="n">SearxEngineCaptchaException</span><span class="p">()</span> + + +<div class="viewcode-block" id="request"> +<a class="viewcode-back" href="../../../dev/engines/online/google.html#searx.engines.google.request">[docs]</a> +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Google search request"""</span> + <span class="c1"># pylint: disable=line-too-long</span> + <span class="n">offset</span> <span class="o">=</span> <span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="mi">10</span> + <span class="n">google_info</span> <span class="o">=</span> <span class="n">get_google_info</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">traits</span><span class="p">)</span> + + <span class="c1"># https://www.google.de/search?q=corona&hl=de&lr=lang_de&start=0&tbs=qdr%3Ad&safe=medium</span> + <span class="n">query_url</span> <span class="o">=</span> <span class="p">(</span> + <span class="s1">'https://'</span> + <span class="o">+</span> <span class="n">google_info</span><span class="p">[</span><span class="s1">'subdomain'</span><span class="p">]</span> + <span class="o">+</span> <span class="s1">'/search'</span> + <span class="o">+</span> <span class="s2">"?"</span> + <span class="o">+</span> <span class="n">urlencode</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span> + <span class="o">**</span><span class="n">google_info</span><span class="p">[</span><span class="s1">'params'</span><span class="p">],</span> + <span class="s1">'filter'</span><span class="p">:</span> <span class="s1">'0'</span><span class="p">,</span> + <span class="s1">'start'</span><span class="p">:</span> <span class="n">offset</span><span class="p">,</span> + <span class="c1"># 'vet': '12ahUKEwik3ZbIzfn7AhXMX_EDHbUDBh0QxK8CegQIARAC..i',</span> + <span class="c1"># 'ved': '2ahUKEwik3ZbIzfn7AhXMX_EDHbUDBh0Q_skCegQIARAG',</span> + <span class="c1"># 'cs' : 1,</span> + <span class="c1"># 'sa': 'N',</span> + <span class="c1"># 'yv': 3,</span> + <span class="c1"># 'prmd': 'vin',</span> + <span class="c1"># 'ei': 'GASaY6TxOcy_xc8PtYeY6AE',</span> + <span class="c1"># 'sa': 'N',</span> + <span class="c1"># 'sstk': 'AcOHfVkD7sWCSAheZi-0tx_09XDO55gTWY0JNq3_V26cNN-c8lfD45aZYPI8s_Bqp8s57AHz5pxchDtAGCA_cikAWSjy9kw3kgg'</span> + <span class="c1"># formally known as use_mobile_ui</span> + <span class="s1">'asearch'</span><span class="p">:</span> <span class="s1">'arc'</span><span class="p">,</span> + <span class="s1">'async'</span><span class="p">:</span> <span class="n">UI_ASYNC</span><span class="p">,</span> + <span class="p">}</span> + <span class="p">)</span> + <span class="p">)</span> + + <span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]</span> <span class="ow">in</span> <span class="n">time_range_dict</span><span class="p">:</span> + <span class="n">query_url</span> <span class="o">+=</span> <span class="s1">'&'</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">({</span><span class="s1">'tbs'</span><span class="p">:</span> <span class="s1">'qdr:'</span> <span class="o">+</span> <span class="n">time_range_dict</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]]})</span> + <span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">]:</span> + <span class="n">query_url</span> <span class="o">+=</span> <span class="s1">'&'</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">({</span><span class="s1">'safe'</span><span class="p">:</span> <span class="n">filter_mapping</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">]]})</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">query_url</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">]</span> <span class="o">=</span> <span class="n">google_info</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">]</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">]</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">google_info</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">])</span> + <span class="k">return</span> <span class="n">params</span></div> + + + +<span class="c1"># =26;[3,"dimg_ZNMiZPCqE4apxc8P3a2tuAQ_137"]a87;data:image/jpeg;base64,/9j/4AAQSkZJRgABA</span> +<span class="c1"># ...6T+9Nl4cnD+gr9OK8I56/tX3l86nWYw//2Q==26;</span> +<span class="n">RE_DATA_IMAGE</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s1">'"(dimg_[^"]*)"[^;]*;(data:image[^;]*;[^;]*);'</span><span class="p">)</span> + + +<span class="k">def</span> <span class="nf">_parse_data_images</span><span class="p">(</span><span class="n">dom</span><span class="p">):</span> + <span class="n">data_image_map</span> <span class="o">=</span> <span class="p">{}</span> + <span class="k">for</span> <span class="n">img_id</span><span class="p">,</span> <span class="n">data_image</span> <span class="ow">in</span> <span class="n">RE_DATA_IMAGE</span><span class="o">.</span><span class="n">findall</span><span class="p">(</span><span class="n">dom</span><span class="o">.</span><span class="n">text_content</span><span class="p">()):</span> + <span class="n">end_pos</span> <span class="o">=</span> <span class="n">data_image</span><span class="o">.</span><span class="n">rfind</span><span class="p">(</span><span class="s1">'='</span><span class="p">)</span> + <span class="k">if</span> <span class="n">end_pos</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span> + <span class="n">data_image</span> <span class="o">=</span> <span class="n">data_image</span><span class="p">[:</span> <span class="n">end_pos</span> <span class="o">+</span> <span class="mi">1</span><span class="p">]</span> + <span class="n">data_image_map</span><span class="p">[</span><span class="n">img_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">data_image</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'data:image objects --> </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="nb">list</span><span class="p">(</span><span class="n">data_image_map</span><span class="o">.</span><span class="n">keys</span><span class="p">()))</span> + <span class="k">return</span> <span class="n">data_image_map</span> + + +<div class="viewcode-block" id="response"> +<a class="viewcode-back" href="../../../dev/engines/online/google.html#searx.engines.google.response">[docs]</a> +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Get response from google's search request"""</span> + <span class="c1"># pylint: disable=too-many-branches, too-many-statements</span> + <span class="n">detect_google_sorry</span><span class="p">(</span><span class="n">resp</span><span class="p">)</span> + + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="c1"># convert the text to dom</span> + <span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> + <span class="n">data_image_map</span> <span class="o">=</span> <span class="n">_parse_data_images</span><span class="p">(</span><span class="n">dom</span><span class="p">)</span> + + <span class="c1"># results --> answer</span> + <span class="n">answer_list</span> <span class="o">=</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//div[contains(@class, "LGOjhe")]'</span><span class="p">)</span> + <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">answer_list</span><span class="p">:</span> + <span class="k">for</span> <span class="n">bubble</span> <span class="ow">in</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'.//div[@class="nnFGuf"]'</span><span class="p">):</span> + <span class="n">bubble</span><span class="o">.</span><span class="n">drop_tree</span><span class="p">()</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'answer'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">item</span><span class="p">),</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'../..//a/@href'</span><span class="p">)</span> <span class="o">+</span> <span class="p">[</span><span class="kc">None</span><span class="p">])[</span><span class="mi">0</span><span class="p">],</span> + <span class="p">}</span> + <span class="p">)</span> + + <span class="c1"># parse results</span> + + <span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'.//div[contains(@jscontroller, "SC7lYd")]'</span><span class="p">):</span> + <span class="c1"># pylint: disable=too-many-nested-blocks</span> + + <span class="k">try</span><span class="p">:</span> + <span class="n">title_tag</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//a/h3[1]'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span> + <span class="k">if</span> <span class="n">title_tag</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="c1"># this not one of the common google results *section*</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'ignoring item from the result_xpath list: missing title'</span><span class="p">)</span> + <span class="k">continue</span> + <span class="n">title</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">title_tag</span><span class="p">)</span> + + <span class="n">url</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//a[h3]/@href'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> + <span class="k">if</span> <span class="n">url</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'ignoring item from the result_xpath list: missing url of title "</span><span class="si">%s</span><span class="s1">"'</span><span class="p">,</span> <span class="n">title</span><span class="p">)</span> + <span class="k">continue</span> + + <span class="n">content_nodes</span> <span class="o">=</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//div[contains(@data-sncf, "1")]'</span><span class="p">)</span> + <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">content_nodes</span><span class="p">:</span> + <span class="k">for</span> <span class="n">script</span> <span class="ow">in</span> <span class="n">item</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s2">".//script"</span><span class="p">):</span> + <span class="n">script</span><span class="o">.</span><span class="n">getparent</span><span class="p">()</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">script</span><span class="p">)</span> + + <span class="n">content</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">content_nodes</span><span class="p">)</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">content</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'ignoring item from the result_xpath list: missing content of title "</span><span class="si">%s</span><span class="s1">"'</span><span class="p">,</span> <span class="n">title</span><span class="p">)</span> + <span class="k">continue</span> + + <span class="n">thumbnail</span> <span class="o">=</span> <span class="n">content_nodes</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'.//img/@src'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">thumbnail</span><span class="p">:</span> + <span class="n">thumbnail</span> <span class="o">=</span> <span class="n">thumbnail</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> + <span class="k">if</span> <span class="n">thumbnail</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'data:image'</span><span class="p">):</span> + <span class="n">img_id</span> <span class="o">=</span> <span class="n">content_nodes</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'.//img/@id'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">img_id</span><span class="p">:</span> + <span class="n">thumbnail</span> <span class="o">=</span> <span class="n">data_image_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">img_id</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">thumbnail</span> <span class="o">=</span> <span class="kc">None</span> + + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span> <span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span> <span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">,</span> <span class="s1">'thumbnail'</span><span class="p">:</span> <span class="n">thumbnail</span><span class="p">})</span> + + <span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> <span class="c1"># pylint: disable=broad-except</span> + <span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="n">e</span><span class="p">,</span> <span class="n">exc_info</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> + <span class="k">continue</span> + + <span class="c1"># parse suggestion</span> + <span class="k">for</span> <span class="n">suggestion</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="n">suggestion_xpath</span><span class="p">):</span> + <span class="c1"># append suggestion</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'suggestion'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">suggestion</span><span class="p">)})</span> + + <span class="c1"># return results</span> + <span class="k">return</span> <span class="n">results</span></div> + + + +<span class="c1"># get supported languages from their site</span> + + +<span class="n">skip_countries</span> <span class="o">=</span> <span class="p">[</span> + <span class="c1"># official language of google-country not in google-languages</span> + <span class="s1">'AL'</span><span class="p">,</span> <span class="c1"># Albanien (sq)</span> + <span class="s1">'AZ'</span><span class="p">,</span> <span class="c1"># Aserbaidschan (az)</span> + <span class="s1">'BD'</span><span class="p">,</span> <span class="c1"># Bangladesch (bn)</span> + <span class="s1">'BN'</span><span class="p">,</span> <span class="c1"># Brunei Darussalam (ms)</span> + <span class="s1">'BT'</span><span class="p">,</span> <span class="c1"># Bhutan (dz)</span> + <span class="s1">'ET'</span><span class="p">,</span> <span class="c1"># Äthiopien (am)</span> + <span class="s1">'GE'</span><span class="p">,</span> <span class="c1"># Georgien (ka, os)</span> + <span class="s1">'GL'</span><span class="p">,</span> <span class="c1"># Grönland (kl)</span> + <span class="s1">'KH'</span><span class="p">,</span> <span class="c1"># Kambodscha (km)</span> + <span class="s1">'LA'</span><span class="p">,</span> <span class="c1"># Laos (lo)</span> + <span class="s1">'LK'</span><span class="p">,</span> <span class="c1"># Sri Lanka (si, ta)</span> + <span class="s1">'ME'</span><span class="p">,</span> <span class="c1"># Montenegro (sr)</span> + <span class="s1">'MK'</span><span class="p">,</span> <span class="c1"># Nordmazedonien (mk, sq)</span> + <span class="s1">'MM'</span><span class="p">,</span> <span class="c1"># Myanmar (my)</span> + <span class="s1">'MN'</span><span class="p">,</span> <span class="c1"># Mongolei (mn)</span> + <span class="s1">'MV'</span><span class="p">,</span> <span class="c1"># Malediven (dv) // dv_MV is unknown by babel</span> + <span class="s1">'MY'</span><span class="p">,</span> <span class="c1"># Malaysia (ms)</span> + <span class="s1">'NP'</span><span class="p">,</span> <span class="c1"># Nepal (ne)</span> + <span class="s1">'TJ'</span><span class="p">,</span> <span class="c1"># Tadschikistan (tg)</span> + <span class="s1">'TM'</span><span class="p">,</span> <span class="c1"># Turkmenistan (tk)</span> + <span class="s1">'UZ'</span><span class="p">,</span> <span class="c1"># Usbekistan (uz)</span> +<span class="p">]</span> + + +<div class="viewcode-block" id="fetch_traits"> +<a class="viewcode-back" href="../../../dev/engines/online/google.html#searx.engines.google.fetch_traits">[docs]</a> +<span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">,</span> <span class="n">add_domains</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Fetch languages from Google."""</span> + <span class="c1"># pylint: disable=import-outside-toplevel, too-many-branches</span> + + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'supported_domains'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> + + <span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="s1">'https://www.google.com/preferences'</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> <span class="c1"># type: ignore</span> + <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s2">"Response from Google's preferences is not OK."</span><span class="p">)</span> + + <span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'<?xml version="1.0" encoding="UTF-8"?>'</span><span class="p">,</span> <span class="s1">''</span><span class="p">))</span> + + <span class="c1"># supported language codes</span> + + <span class="n">lang_map</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'no'</span><span class="p">:</span> <span class="s1">'nb'</span><span class="p">}</span> + <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s2">"//select[@name='hl']/option"</span><span class="p">):</span> + <span class="n">eng_lang</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"value"</span><span class="p">)</span> + <span class="k">try</span><span class="p">:</span> + <span class="n">locale</span> <span class="o">=</span> <span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">lang_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">eng_lang</span><span class="p">,</span> <span class="n">eng_lang</span><span class="p">),</span> <span class="n">sep</span><span class="o">=</span><span class="s1">'-'</span><span class="p">)</span> + <span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"INFO: google UI language </span><span class="si">%s</span><span class="s2"> (</span><span class="si">%s</span><span class="s2">) is unknown by babel"</span> <span class="o">%</span> <span class="p">(</span><span class="n">eng_lang</span><span class="p">,</span> <span class="n">x</span><span class="o">.</span><span class="n">text</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">"("</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">strip</span><span class="p">()))</span> + <span class="k">continue</span> + <span class="n">sxng_lang</span> <span class="o">=</span> <span class="n">language_tag</span><span class="p">(</span><span class="n">locale</span><span class="p">)</span> + + <span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_lang</span><span class="p">)</span> + <span class="k">if</span> <span class="n">conflict</span><span class="p">:</span> + <span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">eng_lang</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_lang</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">eng_lang</span><span class="p">))</span> + <span class="k">continue</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="n">sxng_lang</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'lang_'</span> <span class="o">+</span> <span class="n">eng_lang</span> + + <span class="c1"># alias languages</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="s1">'zh'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'lang_zh-CN'</span> + + <span class="c1"># supported region codes</span> + + <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s2">"//select[@name='gl']/option"</span><span class="p">):</span> + <span class="n">eng_country</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"value"</span><span class="p">)</span> + + <span class="k">if</span> <span class="n">eng_country</span> <span class="ow">in</span> <span class="n">skip_countries</span><span class="p">:</span> + <span class="k">continue</span> + <span class="k">if</span> <span class="n">eng_country</span> <span class="o">==</span> <span class="s1">'ZZ'</span><span class="p">:</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">all_locale</span> <span class="o">=</span> <span class="s1">'ZZ'</span> + <span class="k">continue</span> + + <span class="n">sxng_locales</span> <span class="o">=</span> <span class="n">get_official_locales</span><span class="p">(</span><span class="n">eng_country</span><span class="p">,</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">keys</span><span class="p">(),</span> <span class="n">regional</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">sxng_locales</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: can't map from google country </span><span class="si">%s</span><span class="s2"> (</span><span class="si">%s</span><span class="s2">) to a babel region."</span> <span class="o">%</span> <span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'data-name'</span><span class="p">),</span> <span class="n">eng_country</span><span class="p">))</span> + <span class="k">continue</span> + + <span class="k">for</span> <span class="n">sxng_locale</span> <span class="ow">in</span> <span class="n">sxng_locales</span><span class="p">:</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="p">[</span><span class="n">region_tag</span><span class="p">(</span><span class="n">sxng_locale</span><span class="p">)]</span> <span class="o">=</span> <span class="n">eng_country</span> + + <span class="c1"># alias regions</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="p">[</span><span class="s1">'zh-CN'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'HK'</span> + + <span class="c1"># supported domains</span> + + <span class="k">if</span> <span class="n">add_domains</span><span class="p">:</span> + <span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="s1">'https://www.google.com/supported_domains'</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> <span class="c1"># type: ignore</span> + <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s2">"Response from https://www.google.com/supported_domains is not OK."</span><span class="p">)</span> + + <span class="k">for</span> <span class="n">domain</span> <span class="ow">in</span> <span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="o">.</span><span class="n">split</span><span class="p">():</span> <span class="c1"># type: ignore</span> + <span class="n">domain</span> <span class="o">=</span> <span class="n">domain</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">domain</span> <span class="ow">or</span> <span class="n">domain</span> <span class="ow">in</span> <span class="p">[</span> + <span class="s1">'.google.com'</span><span class="p">,</span> + <span class="p">]:</span> + <span class="k">continue</span> + <span class="n">region</span> <span class="o">=</span> <span class="n">domain</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'.'</span><span class="p">)[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">upper</span><span class="p">()</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'supported_domains'</span><span class="p">][</span><span class="n">region</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'www'</span> <span class="o">+</span> <span class="n">domain</span> <span class="c1"># type: ignore</span> + <span class="k">if</span> <span class="n">region</span> <span class="o">==</span> <span class="s1">'HK'</span><span class="p">:</span> + <span class="c1"># There is no google.cn, we use .com.hk for zh-CN</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'supported_domains'</span><span class="p">][</span><span class="s1">'CN'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'www'</span> <span class="o">+</span> <span class="n">domain</span> <span class="c1"># type: ignore</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/google_images.html b/_modules/searx/engines/google_images.html new file mode 100644 index 000000000..23f72365a --- /dev/null +++ b/_modules/searx/engines/google_images.html @@ -0,0 +1,241 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.google_images — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.google_images</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.google_images</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""This is the implementation of the Google Images engine using the internal</span> +<span class="sd">Google API used by the Google Go Android app.</span> + +<span class="sd">This internal API offer results in</span> + +<span class="sd">- JSON (``_fmt:json``)</span> +<span class="sd">- Protobuf_ (``_fmt:pb``)</span> +<span class="sd">- Protobuf_ compressed? (``_fmt:pc``)</span> +<span class="sd">- HTML (``_fmt:html``)</span> +<span class="sd">- Protobuf_ encoded in JSON (``_fmt:jspb``).</span> + +<span class="sd">.. _Protobuf: https://en.wikipedia.org/wiki/Protocol_Buffers</span> +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span> + +<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span> +<span class="kn">from</span> <span class="nn">json</span> <span class="kn">import</span> <span class="n">loads</span> + +<span class="kn">from</span> <span class="nn">searx.engines.google</span> <span class="kn">import</span> <span class="n">fetch_traits</span> <span class="c1"># pylint: disable=unused-import</span> +<span class="kn">from</span> <span class="nn">searx.engines.google</span> <span class="kn">import</span> <span class="p">(</span> + <span class="n">get_google_info</span><span class="p">,</span> + <span class="n">time_range_dict</span><span class="p">,</span> + <span class="n">detect_google_sorry</span><span class="p">,</span> +<span class="p">)</span> + +<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> + <span class="kn">import</span> <span class="nn">logging</span> + <span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span> + + <span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span> + <span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span> + + +<span class="c1"># about</span> +<span class="n">about</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://images.google.com'</span><span class="p">,</span> + <span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q521550'</span><span class="p">,</span> + <span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://developers.google.com/custom-search'</span><span class="p">,</span> + <span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"results"</span><span class="p">:</span> <span class="s1">'JSON'</span><span class="p">,</span> +<span class="p">}</span> + +<span class="c1"># engine dependent config</span> +<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'images'</span><span class="p">,</span> <span class="s1">'web'</span><span class="p">]</span> +<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">max_page</span> <span class="o">=</span> <span class="mi">50</span> +<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">send_accept_language_header</span> <span class="o">=</span> <span class="kc">True</span> + +<span class="n">filter_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="mi">0</span><span class="p">:</span> <span class="s1">'images'</span><span class="p">,</span> <span class="mi">1</span><span class="p">:</span> <span class="s1">'active'</span><span class="p">,</span> <span class="mi">2</span><span class="p">:</span> <span class="s1">'active'</span><span class="p">}</span> + + +<div class="viewcode-block" id="request"> +<a class="viewcode-back" href="../../../dev/engines/online/google.html#searx.engines.google_images.request">[docs]</a> +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Google-Image search request"""</span> + + <span class="n">google_info</span> <span class="o">=</span> <span class="n">get_google_info</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">traits</span><span class="p">)</span> + + <span class="n">query_url</span> <span class="o">=</span> <span class="p">(</span> + <span class="s1">'https://'</span> + <span class="o">+</span> <span class="n">google_info</span><span class="p">[</span><span class="s1">'subdomain'</span><span class="p">]</span> + <span class="o">+</span> <span class="s1">'/search'</span> + <span class="o">+</span> <span class="s1">'?'</span> + <span class="o">+</span> <span class="n">urlencode</span><span class="p">({</span><span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span> <span class="s1">'tbm'</span><span class="p">:</span> <span class="s2">"isch"</span><span class="p">,</span> <span class="o">**</span><span class="n">google_info</span><span class="p">[</span><span class="s1">'params'</span><span class="p">],</span> <span class="s1">'asearch'</span><span class="p">:</span> <span class="s1">'isch'</span><span class="p">})</span> + <span class="c1"># don't urlencode this because wildly different AND bad results</span> + <span class="c1"># pagination uses Zero-based numbering</span> + <span class="o">+</span> <span class="sa">f</span><span class="s1">'&async=_fmt:json,p:1,ijn:</span><span class="si">{</span><span class="n">params</span><span class="p">[</span><span class="s2">"pageno"</span><span class="p">]</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="mi">1</span><span class="si">}</span><span class="s1">'</span> + <span class="p">)</span> + + <span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]</span> <span class="ow">in</span> <span class="n">time_range_dict</span><span class="p">:</span> + <span class="n">query_url</span> <span class="o">+=</span> <span class="s1">'&'</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">({</span><span class="s1">'tbs'</span><span class="p">:</span> <span class="s1">'qdr:'</span> <span class="o">+</span> <span class="n">time_range_dict</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]]})</span> + <span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">]:</span> + <span class="n">query_url</span> <span class="o">+=</span> <span class="s1">'&'</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">({</span><span class="s1">'safe'</span><span class="p">:</span> <span class="n">filter_mapping</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">]]})</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">query_url</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">]</span> <span class="o">=</span> <span class="n">google_info</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">]</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">]</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">google_info</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">])</span> + <span class="c1"># this ua will allow getting ~50 results instead of 10. #1641</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">][</span><span class="s1">'User-Agent'</span><span class="p">]</span> <span class="o">=</span> <span class="p">(</span> + <span class="s1">'NSTN/3.60.474802233.release Dalvik/2.1.0 (Linux; U; Android 12;'</span> <span class="sa">f</span><span class="s1">' </span><span class="si">{</span><span class="n">google_info</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"country"</span><span class="p">,</span><span class="w"> </span><span class="s2">"US"</span><span class="p">)</span><span class="si">}</span><span class="s1">) gzip'</span> + <span class="p">)</span> + + <span class="k">return</span> <span class="n">params</span></div> + + + +<div class="viewcode-block" id="response"> +<a class="viewcode-back" href="../../../dev/engines/online/google.html#searx.engines.google_images.response">[docs]</a> +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Get response from google's search request"""</span> + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="n">detect_google_sorry</span><span class="p">(</span><span class="n">resp</span><span class="p">)</span> + + <span class="n">json_start</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'{"ischj":'</span><span class="p">)</span> + <span class="n">json_data</span> <span class="o">=</span> <span class="n">loads</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">[</span><span class="n">json_start</span><span class="p">:])</span> + + <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">json_data</span><span class="p">[</span><span class="s2">"ischj"</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"metadata"</span><span class="p">,</span> <span class="p">[]):</span> + <span class="n">result_item</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="n">item</span><span class="p">[</span><span class="s2">"result"</span><span class="p">][</span><span class="s2">"referrer_url"</span><span class="p">],</span> + <span class="s1">'title'</span><span class="p">:</span> <span class="n">item</span><span class="p">[</span><span class="s2">"result"</span><span class="p">][</span><span class="s2">"page_title"</span><span class="p">],</span> + <span class="s1">'content'</span><span class="p">:</span> <span class="n">item</span><span class="p">[</span><span class="s2">"text_in_grid"</span><span class="p">][</span><span class="s2">"snippet"</span><span class="p">],</span> + <span class="s1">'source'</span><span class="p">:</span> <span class="n">item</span><span class="p">[</span><span class="s2">"result"</span><span class="p">][</span><span class="s2">"site_title"</span><span class="p">],</span> + <span class="s1">'resolution'</span><span class="p">:</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">item</span><span class="p">[</span><span class="s2">"original_image"</span><span class="p">][</span><span class="s2">"width"</span><span class="p">]</span><span class="si">}</span><span class="s1"> x </span><span class="si">{</span><span class="n">item</span><span class="p">[</span><span class="s2">"original_image"</span><span class="p">][</span><span class="s2">"height"</span><span class="p">]</span><span class="si">}</span><span class="s1">'</span><span class="p">,</span> + <span class="s1">'img_src'</span><span class="p">:</span> <span class="n">item</span><span class="p">[</span><span class="s2">"original_image"</span><span class="p">][</span><span class="s2">"url"</span><span class="p">],</span> + <span class="s1">'thumbnail_src'</span><span class="p">:</span> <span class="n">item</span><span class="p">[</span><span class="s2">"thumbnail"</span><span class="p">][</span><span class="s2">"url"</span><span class="p">],</span> + <span class="s1">'template'</span><span class="p">:</span> <span class="s1">'images.html'</span><span class="p">,</span> + <span class="p">}</span> + + <span class="n">author</span> <span class="o">=</span> <span class="n">item</span><span class="p">[</span><span class="s2">"result"</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'iptc'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'creator'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">author</span><span class="p">:</span> + <span class="n">result_item</span><span class="p">[</span><span class="s1">'author'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">', '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">author</span><span class="p">)</span> + + <span class="n">copyright_notice</span> <span class="o">=</span> <span class="n">item</span><span class="p">[</span><span class="s2">"result"</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'iptc'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'copyright_notice'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">copyright_notice</span><span class="p">:</span> + <span class="n">result_item</span><span class="p">[</span><span class="s1">'source'</span><span class="p">]</span> <span class="o">+=</span> <span class="s1">' | '</span> <span class="o">+</span> <span class="n">copyright_notice</span> + + <span class="n">freshness_date</span> <span class="o">=</span> <span class="n">item</span><span class="p">[</span><span class="s2">"result"</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"freshness_date"</span><span class="p">)</span> + <span class="k">if</span> <span class="n">freshness_date</span><span class="p">:</span> + <span class="n">result_item</span><span class="p">[</span><span class="s1">'source'</span><span class="p">]</span> <span class="o">+=</span> <span class="s1">' | '</span> <span class="o">+</span> <span class="n">freshness_date</span> + + <span class="n">file_size</span> <span class="o">=</span> <span class="n">item</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'gsa'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'file_size'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">file_size</span><span class="p">:</span> + <span class="n">result_item</span><span class="p">[</span><span class="s1">'source'</span><span class="p">]</span> <span class="o">+=</span> <span class="s1">' (</span><span class="si">%s</span><span class="s1">)'</span> <span class="o">%</span> <span class="n">file_size</span> + + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">result_item</span><span class="p">)</span> + + <span class="k">return</span> <span class="n">results</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/google_news.html b/_modules/searx/engines/google_news.html new file mode 100644 index 000000000..a65796d74 --- /dev/null +++ b/_modules/searx/engines/google_news.html @@ -0,0 +1,418 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.google_news — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.google_news</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.google_news</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""This is the implementation of the Google News engine.</span> + +<span class="sd">Google News has a different region handling compared to Google WEB.</span> + +<span class="sd">- the ``ceid`` argument has to be set (:py:obj:`ceid_list`)</span> +<span class="sd">- the hl_ argument has to be set correctly (and different to Google WEB)</span> +<span class="sd">- the gl_ argument is mandatory</span> + +<span class="sd">If one of this argument is not set correctly, the request is redirected to</span> +<span class="sd">CONSENT dialog::</span> + +<span class="sd"> https://consent.google.com/m?continue=</span> + +<span class="sd">The google news API ignores some parameters from the common :ref:`google API`:</span> + +<span class="sd">- num_ : the number of search results is ignored / there is no paging all</span> +<span class="sd"> results for a query term are in the first response.</span> +<span class="sd">- save_ : is ignored / Google-News results are always *SafeSearch*</span> + +<span class="sd">.. _hl: https://developers.google.com/custom-search/docs/xml_results#hlsp</span> +<span class="sd">.. _gl: https://developers.google.com/custom-search/docs/xml_results#glsp</span> +<span class="sd">.. _num: https://developers.google.com/custom-search/docs/xml_results#numsp</span> +<span class="sd">.. _save: https://developers.google.com/custom-search/docs/xml_results#safesp</span> +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span> + +<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span> +<span class="kn">import</span> <span class="nn">base64</span> +<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span> +<span class="kn">import</span> <span class="nn">babel</span> + +<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">locales</span> +<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="p">(</span> + <span class="n">eval_xpath</span><span class="p">,</span> + <span class="n">eval_xpath_list</span><span class="p">,</span> + <span class="n">eval_xpath_getindex</span><span class="p">,</span> + <span class="n">extract_text</span><span class="p">,</span> +<span class="p">)</span> + +<span class="kn">from</span> <span class="nn">searx.engines.google</span> <span class="kn">import</span> <span class="n">fetch_traits</span> <span class="k">as</span> <span class="n">_fetch_traits</span> <span class="c1"># pylint: disable=unused-import</span> +<span class="kn">from</span> <span class="nn">searx.engines.google</span> <span class="kn">import</span> <span class="p">(</span> + <span class="n">get_google_info</span><span class="p">,</span> + <span class="n">detect_google_sorry</span><span class="p">,</span> +<span class="p">)</span> +<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span> + +<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> + <span class="kn">import</span> <span class="nn">logging</span> + + <span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span> + +<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span> + +<span class="c1"># about</span> +<span class="n">about</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://news.google.com'</span><span class="p">,</span> + <span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q12020'</span><span class="p">,</span> + <span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://developers.google.com/custom-search'</span><span class="p">,</span> + <span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"results"</span><span class="p">:</span> <span class="s1">'HTML'</span><span class="p">,</span> +<span class="p">}</span> + +<span class="c1"># engine dependent config</span> +<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'news'</span><span class="p">]</span> +<span class="n">paging</span> <span class="o">=</span> <span class="kc">False</span> +<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">False</span> + +<span class="c1"># Google-News results are always *SafeSearch*. Option 'safesearch' is set to</span> +<span class="c1"># False here, otherwise checker will report safesearch-errors::</span> +<span class="c1">#</span> +<span class="c1"># safesearch : results are identical for safesearch=0 and safesearch=2</span> +<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">True</span> +<span class="c1"># send_accept_language_header = True</span> + + +<div class="viewcode-block" id="request"> +<a class="viewcode-back" href="../../../dev/engines/online/google.html#searx.engines.google_news.request">[docs]</a> +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Google-News search request"""</span> + + <span class="n">sxng_locale</span> <span class="o">=</span> <span class="n">params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'searxng_locale'</span><span class="p">,</span> <span class="s1">'en-US'</span><span class="p">)</span> + <span class="n">ceid</span> <span class="o">=</span> <span class="n">locales</span><span class="o">.</span><span class="n">get_engine_locale</span><span class="p">(</span><span class="n">sxng_locale</span><span class="p">,</span> <span class="n">traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'ceid'</span><span class="p">],</span> <span class="n">default</span><span class="o">=</span><span class="s1">'US:en'</span><span class="p">)</span> + <span class="n">google_info</span> <span class="o">=</span> <span class="n">get_google_info</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">traits</span><span class="p">)</span> + <span class="n">google_info</span><span class="p">[</span><span class="s1">'subdomain'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'news.google.com'</span> <span class="c1"># google news has only one domain</span> + + <span class="n">ceid_region</span><span class="p">,</span> <span class="n">ceid_lang</span> <span class="o">=</span> <span class="n">ceid</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">':'</span><span class="p">)</span> + <span class="n">ceid_lang</span><span class="p">,</span> <span class="n">ceid_suffix</span> <span class="o">=</span> <span class="p">(</span> + <span class="n">ceid_lang</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'-'</span><span class="p">)</span> + <span class="o">+</span> <span class="p">[</span> + <span class="kc">None</span><span class="p">,</span> + <span class="p">]</span> + <span class="p">)[:</span><span class="mi">2</span><span class="p">]</span> + + <span class="n">google_info</span><span class="p">[</span><span class="s1">'params'</span><span class="p">][</span><span class="s1">'hl'</span><span class="p">]</span> <span class="o">=</span> <span class="n">ceid_lang</span> + + <span class="k">if</span> <span class="n">ceid_suffix</span> <span class="ow">and</span> <span class="n">ceid_suffix</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'Hans'</span><span class="p">,</span> <span class="s1">'Hant'</span><span class="p">]:</span> + + <span class="k">if</span> <span class="n">ceid_region</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="o">==</span> <span class="n">ceid_lang</span><span class="p">:</span> + <span class="n">google_info</span><span class="p">[</span><span class="s1">'params'</span><span class="p">][</span><span class="s1">'hl'</span><span class="p">]</span> <span class="o">=</span> <span class="n">ceid_lang</span> <span class="o">+</span> <span class="s1">'-'</span> <span class="o">+</span> <span class="n">ceid_region</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">google_info</span><span class="p">[</span><span class="s1">'params'</span><span class="p">][</span><span class="s1">'hl'</span><span class="p">]</span> <span class="o">=</span> <span class="n">ceid_lang</span> <span class="o">+</span> <span class="s1">'-'</span> <span class="o">+</span> <span class="n">ceid_suffix</span> + + <span class="k">elif</span> <span class="n">ceid_region</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="o">!=</span> <span class="n">ceid_lang</span><span class="p">:</span> + + <span class="k">if</span> <span class="n">ceid_region</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'AT'</span><span class="p">,</span> <span class="s1">'BE'</span><span class="p">,</span> <span class="s1">'CH'</span><span class="p">,</span> <span class="s1">'IL'</span><span class="p">,</span> <span class="s1">'SA'</span><span class="p">,</span> <span class="s1">'IN'</span><span class="p">,</span> <span class="s1">'BD'</span><span class="p">,</span> <span class="s1">'PT'</span><span class="p">]:</span> + <span class="n">google_info</span><span class="p">[</span><span class="s1">'params'</span><span class="p">][</span><span class="s1">'hl'</span><span class="p">]</span> <span class="o">=</span> <span class="n">ceid_lang</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">google_info</span><span class="p">[</span><span class="s1">'params'</span><span class="p">][</span><span class="s1">'hl'</span><span class="p">]</span> <span class="o">=</span> <span class="n">ceid_lang</span> <span class="o">+</span> <span class="s1">'-'</span> <span class="o">+</span> <span class="n">ceid_region</span> + + <span class="n">google_info</span><span class="p">[</span><span class="s1">'params'</span><span class="p">][</span><span class="s1">'lr'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'lang_'</span> <span class="o">+</span> <span class="n">ceid_lang</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'-'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span> + <span class="n">google_info</span><span class="p">[</span><span class="s1">'params'</span><span class="p">][</span><span class="s1">'gl'</span><span class="p">]</span> <span class="o">=</span> <span class="n">ceid_region</span> + + <span class="n">query_url</span> <span class="o">=</span> <span class="p">(</span> + <span class="s1">'https://'</span> + <span class="o">+</span> <span class="n">google_info</span><span class="p">[</span><span class="s1">'subdomain'</span><span class="p">]</span> + <span class="o">+</span> <span class="s2">"/search?"</span> + <span class="o">+</span> <span class="n">urlencode</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span> + <span class="o">**</span><span class="n">google_info</span><span class="p">[</span><span class="s1">'params'</span><span class="p">],</span> + <span class="p">}</span> + <span class="p">)</span> + <span class="c1"># ceid includes a ':' character which must not be urlencoded</span> + <span class="o">+</span> <span class="p">(</span><span class="s1">'&ceid=</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="n">ceid</span><span class="p">)</span> + <span class="p">)</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">query_url</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">]</span> <span class="o">=</span> <span class="n">google_info</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">]</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">]</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">google_info</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">])</span> + <span class="k">return</span> <span class="n">params</span></div> + + + +<div class="viewcode-block" id="response"> +<a class="viewcode-back" href="../../../dev/engines/online/google.html#searx.engines.google_news.response">[docs]</a> +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Get response from google's search request"""</span> + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">detect_google_sorry</span><span class="p">(</span><span class="n">resp</span><span class="p">)</span> + + <span class="c1"># convert the text to dom</span> + <span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> + + <span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//div[@class="xrnccd"]'</span><span class="p">):</span> + + <span class="c1"># The first <a> tag in the <article> contains the link to the article</span> + <span class="c1"># The href attribute of the <a> tag is a google internal link, we have</span> + <span class="c1"># to decode</span> + + <span class="n">href</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'./article/a/@href'</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span> + <span class="n">href</span> <span class="o">=</span> <span class="n">href</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'?'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span> + <span class="n">href</span> <span class="o">=</span> <span class="n">href</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'/'</span><span class="p">)[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> + <span class="n">href</span> <span class="o">=</span> <span class="n">base64</span><span class="o">.</span><span class="n">urlsafe_b64decode</span><span class="p">(</span><span class="n">href</span> <span class="o">+</span> <span class="s1">'===='</span><span class="p">)</span> + <span class="n">href</span> <span class="o">=</span> <span class="n">href</span><span class="p">[</span><span class="n">href</span><span class="o">.</span><span class="n">index</span><span class="p">(</span><span class="sa">b</span><span class="s1">'http'</span><span class="p">)</span> <span class="p">:]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="sa">b</span><span class="s1">'</span><span class="se">\xd2</span><span class="s1">'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span> + <span class="n">href</span> <span class="o">=</span> <span class="n">href</span><span class="o">.</span><span class="n">decode</span><span class="p">()</span> + + <span class="n">title</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'./article/h3[1]'</span><span class="p">))</span> + + <span class="c1"># The pub_date is mostly a string like 'yesterday', not a real</span> + <span class="c1"># timezone date or time. Therefore we can't use publishedDate.</span> + <span class="n">pub_date</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'./article//time'</span><span class="p">))</span> + <span class="n">pub_origin</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'./article//a[@data-n-tid]'</span><span class="p">))</span> + + <span class="n">content</span> <span class="o">=</span> <span class="s1">' / '</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="n">x</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="p">[</span><span class="n">pub_origin</span><span class="p">,</span> <span class="n">pub_date</span><span class="p">]</span> <span class="k">if</span> <span class="n">x</span><span class="p">])</span> + + <span class="c1"># The image URL is located in a preceding sibling <img> tag, e.g.:</span> + <span class="c1"># "https://lh3.googleusercontent.com/DjhQh7DMszk.....z=-p-h100-w100"</span> + <span class="c1"># These URL are long but not personalized (double checked via tor).</span> + + <span class="n">thumbnail</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'preceding-sibling::a/figure/img/@src'</span><span class="p">))</span> + + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="n">href</span><span class="p">,</span> + <span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span> + <span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">,</span> + <span class="s1">'thumbnail'</span><span class="p">:</span> <span class="n">thumbnail</span><span class="p">,</span> + <span class="p">}</span> + <span class="p">)</span> + + <span class="c1"># return results</span> + <span class="k">return</span> <span class="n">results</span></div> + + + +<span class="n">ceid_list</span> <span class="o">=</span> <span class="p">[</span> + <span class="s1">'AE:ar'</span><span class="p">,</span> + <span class="s1">'AR:es-419'</span><span class="p">,</span> + <span class="s1">'AT:de'</span><span class="p">,</span> + <span class="s1">'AU:en'</span><span class="p">,</span> + <span class="s1">'BD:bn'</span><span class="p">,</span> + <span class="s1">'BE:fr'</span><span class="p">,</span> + <span class="s1">'BE:nl'</span><span class="p">,</span> + <span class="s1">'BG:bg'</span><span class="p">,</span> + <span class="s1">'BR:pt-419'</span><span class="p">,</span> + <span class="s1">'BW:en'</span><span class="p">,</span> + <span class="s1">'CA:en'</span><span class="p">,</span> + <span class="s1">'CA:fr'</span><span class="p">,</span> + <span class="s1">'CH:de'</span><span class="p">,</span> + <span class="s1">'CH:fr'</span><span class="p">,</span> + <span class="s1">'CL:es-419'</span><span class="p">,</span> + <span class="s1">'CN:zh-Hans'</span><span class="p">,</span> + <span class="s1">'CO:es-419'</span><span class="p">,</span> + <span class="s1">'CU:es-419'</span><span class="p">,</span> + <span class="s1">'CZ:cs'</span><span class="p">,</span> + <span class="s1">'DE:de'</span><span class="p">,</span> + <span class="s1">'EG:ar'</span><span class="p">,</span> + <span class="s1">'ES:es'</span><span class="p">,</span> + <span class="s1">'ET:en'</span><span class="p">,</span> + <span class="s1">'FR:fr'</span><span class="p">,</span> + <span class="s1">'GB:en'</span><span class="p">,</span> + <span class="s1">'GH:en'</span><span class="p">,</span> + <span class="s1">'GR:el'</span><span class="p">,</span> + <span class="s1">'HK:zh-Hant'</span><span class="p">,</span> + <span class="s1">'HU:hu'</span><span class="p">,</span> + <span class="s1">'ID:en'</span><span class="p">,</span> + <span class="s1">'ID:id'</span><span class="p">,</span> + <span class="s1">'IE:en'</span><span class="p">,</span> + <span class="s1">'IL:en'</span><span class="p">,</span> + <span class="s1">'IL:he'</span><span class="p">,</span> + <span class="s1">'IN:bn'</span><span class="p">,</span> + <span class="s1">'IN:en'</span><span class="p">,</span> + <span class="s1">'IN:hi'</span><span class="p">,</span> + <span class="s1">'IN:ml'</span><span class="p">,</span> + <span class="s1">'IN:mr'</span><span class="p">,</span> + <span class="s1">'IN:ta'</span><span class="p">,</span> + <span class="s1">'IN:te'</span><span class="p">,</span> + <span class="s1">'IT:it'</span><span class="p">,</span> + <span class="s1">'JP:ja'</span><span class="p">,</span> + <span class="s1">'KE:en'</span><span class="p">,</span> + <span class="s1">'KR:ko'</span><span class="p">,</span> + <span class="s1">'LB:ar'</span><span class="p">,</span> + <span class="s1">'LT:lt'</span><span class="p">,</span> + <span class="s1">'LV:en'</span><span class="p">,</span> + <span class="s1">'LV:lv'</span><span class="p">,</span> + <span class="s1">'MA:fr'</span><span class="p">,</span> + <span class="s1">'MX:es-419'</span><span class="p">,</span> + <span class="s1">'MY:en'</span><span class="p">,</span> + <span class="s1">'NA:en'</span><span class="p">,</span> + <span class="s1">'NG:en'</span><span class="p">,</span> + <span class="s1">'NL:nl'</span><span class="p">,</span> + <span class="s1">'NO:no'</span><span class="p">,</span> + <span class="s1">'NZ:en'</span><span class="p">,</span> + <span class="s1">'PE:es-419'</span><span class="p">,</span> + <span class="s1">'PH:en'</span><span class="p">,</span> + <span class="s1">'PK:en'</span><span class="p">,</span> + <span class="s1">'PL:pl'</span><span class="p">,</span> + <span class="s1">'PT:pt-150'</span><span class="p">,</span> + <span class="s1">'RO:ro'</span><span class="p">,</span> + <span class="s1">'RS:sr'</span><span class="p">,</span> + <span class="s1">'RU:ru'</span><span class="p">,</span> + <span class="s1">'SA:ar'</span><span class="p">,</span> + <span class="s1">'SE:sv'</span><span class="p">,</span> + <span class="s1">'SG:en'</span><span class="p">,</span> + <span class="s1">'SI:sl'</span><span class="p">,</span> + <span class="s1">'SK:sk'</span><span class="p">,</span> + <span class="s1">'SN:fr'</span><span class="p">,</span> + <span class="s1">'TH:th'</span><span class="p">,</span> + <span class="s1">'TR:tr'</span><span class="p">,</span> + <span class="s1">'TW:zh-Hant'</span><span class="p">,</span> + <span class="s1">'TZ:en'</span><span class="p">,</span> + <span class="s1">'UA:ru'</span><span class="p">,</span> + <span class="s1">'UA:uk'</span><span class="p">,</span> + <span class="s1">'UG:en'</span><span class="p">,</span> + <span class="s1">'US:en'</span><span class="p">,</span> + <span class="s1">'US:es-419'</span><span class="p">,</span> + <span class="s1">'VE:es-419'</span><span class="p">,</span> + <span class="s1">'VN:vi'</span><span class="p">,</span> + <span class="s1">'ZA:en'</span><span class="p">,</span> + <span class="s1">'ZW:en'</span><span class="p">,</span> +<span class="p">]</span> +<span class="sd">"""List of region/language combinations supported by Google News. Values of the</span> +<span class="sd">``ceid`` argument of the Google News REST API."""</span> + + +<span class="n">_skip_values</span> <span class="o">=</span> <span class="p">[</span> + <span class="s1">'ET:en'</span><span class="p">,</span> <span class="c1"># english (ethiopia)</span> + <span class="s1">'ID:en'</span><span class="p">,</span> <span class="c1"># english (indonesia)</span> + <span class="s1">'LV:en'</span><span class="p">,</span> <span class="c1"># english (latvia)</span> +<span class="p">]</span> + +<span class="n">_ceid_locale_map</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'NO:no'</span><span class="p">:</span> <span class="s1">'nb-NO'</span><span class="p">}</span> + + +<span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span> + <span class="n">_fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">,</span> <span class="n">add_domains</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> + + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'ceid'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> + + <span class="k">for</span> <span class="n">ceid</span> <span class="ow">in</span> <span class="n">ceid_list</span><span class="p">:</span> + <span class="k">if</span> <span class="n">ceid</span> <span class="ow">in</span> <span class="n">_skip_values</span><span class="p">:</span> + <span class="k">continue</span> + + <span class="n">region</span><span class="p">,</span> <span class="n">lang</span> <span class="o">=</span> <span class="n">ceid</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">':'</span><span class="p">)</span> + <span class="n">x</span> <span class="o">=</span> <span class="n">lang</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'-'</span><span class="p">)</span> + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span> + <span class="k">if</span> <span class="n">x</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'Hant'</span><span class="p">,</span> <span class="s1">'Hans'</span><span class="p">]:</span> + <span class="n">lang</span> <span class="o">=</span> <span class="n">x</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> + + <span class="n">sxng_locale</span> <span class="o">=</span> <span class="n">_ceid_locale_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">ceid</span><span class="p">,</span> <span class="n">lang</span> <span class="o">+</span> <span class="s1">'-'</span> <span class="o">+</span> <span class="n">region</span><span class="p">)</span> + <span class="k">try</span><span class="p">:</span> + <span class="n">locale</span> <span class="o">=</span> <span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">sxng_locale</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">'-'</span><span class="p">)</span> + <span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: </span><span class="si">%s</span><span class="s2"> -> </span><span class="si">%s</span><span class="s2"> is unknown by babel"</span> <span class="o">%</span> <span class="p">(</span><span class="n">ceid</span><span class="p">,</span> <span class="n">sxng_locale</span><span class="p">))</span> + <span class="k">continue</span> + + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'ceid'</span><span class="p">][</span><span class="n">locales</span><span class="o">.</span><span class="n">region_tag</span><span class="p">(</span><span class="n">locale</span><span class="p">)]</span> <span class="o">=</span> <span class="n">ceid</span> +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/google_scholar.html b/_modules/searx/engines/google_scholar.html new file mode 100644 index 000000000..702725784 --- /dev/null +++ b/_modules/searx/engines/google_scholar.html @@ -0,0 +1,340 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.google_scholar — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.google_scholar</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.google_scholar</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""This is the implementation of the Google Scholar engine.</span> + +<span class="sd">Compared to other Google services the Scholar engine has a simple GET REST-API</span> +<span class="sd">and there does not exists `async` API. Even though the API slightly vintage we</span> +<span class="sd">can make use of the :ref:`google API` to assemble the arguments of the GET</span> +<span class="sd">request.</span> +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span> +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Optional</span> + +<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span> +<span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span> +<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span> + +<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="p">(</span> + <span class="n">eval_xpath</span><span class="p">,</span> + <span class="n">eval_xpath_getindex</span><span class="p">,</span> + <span class="n">eval_xpath_list</span><span class="p">,</span> + <span class="n">extract_text</span><span class="p">,</span> +<span class="p">)</span> + +<span class="kn">from</span> <span class="nn">searx.exceptions</span> <span class="kn">import</span> <span class="n">SearxEngineCaptchaException</span> + +<span class="kn">from</span> <span class="nn">searx.engines.google</span> <span class="kn">import</span> <span class="n">fetch_traits</span> <span class="c1"># pylint: disable=unused-import</span> +<span class="kn">from</span> <span class="nn">searx.engines.google</span> <span class="kn">import</span> <span class="p">(</span> + <span class="n">get_google_info</span><span class="p">,</span> + <span class="n">time_range_dict</span><span class="p">,</span> +<span class="p">)</span> +<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span> + +<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> + <span class="kn">import</span> <span class="nn">logging</span> + + <span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span> + +<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span> + +<span class="c1"># about</span> +<span class="n">about</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://scholar.google.com'</span><span class="p">,</span> + <span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q494817'</span><span class="p">,</span> + <span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://developers.google.com/custom-search'</span><span class="p">,</span> + <span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"results"</span><span class="p">:</span> <span class="s1">'HTML'</span><span class="p">,</span> +<span class="p">}</span> + +<span class="c1"># engine dependent config</span> +<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'science'</span><span class="p">,</span> <span class="s1">'scientific publications'</span><span class="p">]</span> +<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">max_page</span> <span class="o">=</span> <span class="mi">50</span> +<span class="n">language_support</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">False</span> +<span class="n">send_accept_language_header</span> <span class="o">=</span> <span class="kc">True</span> + + +<div class="viewcode-block" id="time_range_args"> +<a class="viewcode-back" href="../../../dev/engines/online/google.html#searx.engines.google_scholar.time_range_args">[docs]</a> +<span class="k">def</span> <span class="nf">time_range_args</span><span class="p">(</span><span class="n">params</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Returns a dictionary with a time range arguments based on</span> +<span class="sd"> ``params['time_range']``.</span> + +<span class="sd"> Google Scholar supports a detailed search by year. Searching by *last</span> +<span class="sd"> month* or *last week* (as offered by SearXNG) is uncommon for scientific</span> +<span class="sd"> publications and is not supported by Google Scholar.</span> + +<span class="sd"> To limit the result list when the users selects a range, all the SearXNG</span> +<span class="sd"> ranges (*day*, *week*, *month*, *year*) are mapped to *year*. If no range</span> +<span class="sd"> is set an empty dictionary of arguments is returned. Example; when</span> +<span class="sd"> user selects a time range (current year minus one in 2022):</span> + +<span class="sd"> .. code:: python</span> + +<span class="sd"> { 'as_ylo' : 2021 }</span> + +<span class="sd"> """</span> + <span class="n">ret_val</span> <span class="o">=</span> <span class="p">{}</span> + <span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]</span> <span class="ow">in</span> <span class="n">time_range_dict</span><span class="p">:</span> + <span class="n">ret_val</span><span class="p">[</span><span class="s1">'as_ylo'</span><span class="p">]</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span><span class="o">.</span><span class="n">year</span> <span class="o">-</span> <span class="mi">1</span> + <span class="k">return</span> <span class="n">ret_val</span></div> + + + +<div class="viewcode-block" id="detect_google_captcha"> +<a class="viewcode-back" href="../../../dev/engines/online/google.html#searx.engines.google_scholar.detect_google_captcha">[docs]</a> +<span class="k">def</span> <span class="nf">detect_google_captcha</span><span class="p">(</span><span class="n">dom</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""In case of CAPTCHA Google Scholar open its own *not a Robot* dialog and is</span> +<span class="sd"> not redirected to ``sorry.google.com``.</span> +<span class="sd"> """</span> + <span class="k">if</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s2">"//form[@id='gs_captcha_f']"</span><span class="p">):</span> + <span class="k">raise</span> <span class="n">SearxEngineCaptchaException</span><span class="p">()</span></div> + + + +<div class="viewcode-block" id="request"> +<a class="viewcode-back" href="../../../dev/engines/online/google.html#searx.engines.google_scholar.request">[docs]</a> +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Google-Scholar search request"""</span> + + <span class="n">google_info</span> <span class="o">=</span> <span class="n">get_google_info</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">traits</span><span class="p">)</span> + <span class="c1"># subdomain is: scholar.google.xy</span> + <span class="n">google_info</span><span class="p">[</span><span class="s1">'subdomain'</span><span class="p">]</span> <span class="o">=</span> <span class="n">google_info</span><span class="p">[</span><span class="s1">'subdomain'</span><span class="p">]</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">"www."</span><span class="p">,</span> <span class="s2">"scholar."</span><span class="p">)</span> + + <span class="n">args</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span> + <span class="o">**</span><span class="n">google_info</span><span class="p">[</span><span class="s1">'params'</span><span class="p">],</span> + <span class="s1">'start'</span><span class="p">:</span> <span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="mi">10</span><span class="p">,</span> + <span class="s1">'as_sdt'</span><span class="p">:</span> <span class="s1">'2007'</span><span class="p">,</span> <span class="c1"># include patents / to disable set '0,5'</span> + <span class="s1">'as_vis'</span><span class="p">:</span> <span class="s1">'0'</span><span class="p">,</span> <span class="c1"># include citations / to disable set '1'</span> + <span class="p">}</span> + <span class="n">args</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">time_range_args</span><span class="p">(</span><span class="n">params</span><span class="p">))</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'https://'</span> <span class="o">+</span> <span class="n">google_info</span><span class="p">[</span><span class="s1">'subdomain'</span><span class="p">]</span> <span class="o">+</span> <span class="s1">'/scholar?'</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">(</span><span class="n">args</span><span class="p">)</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">]</span> <span class="o">=</span> <span class="n">google_info</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">]</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">]</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">google_info</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">])</span> + <span class="k">return</span> <span class="n">params</span></div> + + + +<div class="viewcode-block" id="parse_gs_a"> +<a class="viewcode-back" href="../../../dev/engines/online/google.html#searx.engines.google_scholar.parse_gs_a">[docs]</a> +<span class="k">def</span> <span class="nf">parse_gs_a</span><span class="p">(</span><span class="n">text</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]):</span> +<span class="w"> </span><span class="sd">"""Parse the text written in green.</span> + +<span class="sd"> Possible formats:</span> +<span class="sd"> * "{authors} - {journal}, {year} - {publisher}"</span> +<span class="sd"> * "{authors} - {year} - {publisher}"</span> +<span class="sd"> * "{authors} - {publisher}"</span> +<span class="sd"> """</span> + <span class="k">if</span> <span class="n">text</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">text</span> <span class="o">==</span> <span class="s2">""</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span> + + <span class="n">s_text</span> <span class="o">=</span> <span class="n">text</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">' - '</span><span class="p">)</span> + <span class="n">authors</span> <span class="o">=</span> <span class="n">s_text</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">', '</span><span class="p">)</span> + <span class="n">publisher</span> <span class="o">=</span> <span class="n">s_text</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">s_text</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">3</span><span class="p">:</span> + <span class="k">return</span> <span class="n">authors</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="n">publisher</span><span class="p">,</span> <span class="kc">None</span> + + <span class="c1"># the format is "{authors} - {journal}, {year} - {publisher}" or "{authors} - {year} - {publisher}"</span> + <span class="c1"># get journal and year</span> + <span class="n">journal_year</span> <span class="o">=</span> <span class="n">s_text</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">', '</span><span class="p">)</span> + <span class="c1"># journal is optional and may contains some coma</span> + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">journal_year</span><span class="p">)</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span> + <span class="n">journal</span> <span class="o">=</span> <span class="s1">', '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">journal_year</span><span class="p">[</span><span class="mi">0</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span> + <span class="k">if</span> <span class="n">journal</span> <span class="o">==</span> <span class="s1">'…'</span><span class="p">:</span> + <span class="n">journal</span> <span class="o">=</span> <span class="kc">None</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">journal</span> <span class="o">=</span> <span class="kc">None</span> + <span class="c1"># year</span> + <span class="n">year</span> <span class="o">=</span> <span class="n">journal_year</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> + <span class="k">try</span><span class="p">:</span> + <span class="n">publishedDate</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">strptime</span><span class="p">(</span><span class="n">year</span><span class="o">.</span><span class="n">strip</span><span class="p">(),</span> <span class="s1">'%Y'</span><span class="p">)</span> + <span class="k">except</span> <span class="ne">ValueError</span><span class="p">:</span> + <span class="n">publishedDate</span> <span class="o">=</span> <span class="kc">None</span> + <span class="k">return</span> <span class="n">authors</span><span class="p">,</span> <span class="n">journal</span><span class="p">,</span> <span class="n">publisher</span><span class="p">,</span> <span class="n">publishedDate</span></div> + + + +<div class="viewcode-block" id="response"> +<a class="viewcode-back" href="../../../dev/engines/online/google.html#searx.engines.google_scholar.response">[docs]</a> +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> <span class="c1"># pylint: disable=too-many-locals</span> +<span class="w"> </span><span class="sd">"""Parse response from Google Scholar"""</span> + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="c1"># convert the text to dom</span> + <span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> + <span class="n">detect_google_captcha</span><span class="p">(</span><span class="n">dom</span><span class="p">)</span> + + <span class="c1"># parse results</span> + <span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//div[@data-rp]'</span><span class="p">):</span> + + <span class="n">title</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//h3[1]//a'</span><span class="p">))</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">title</span><span class="p">:</span> + <span class="c1"># this is a [ZITATION] block</span> + <span class="k">continue</span> + + <span class="n">pub_type</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//span[@class="gs_ctg2"]'</span><span class="p">))</span> + <span class="k">if</span> <span class="n">pub_type</span><span class="p">:</span> + <span class="n">pub_type</span> <span class="o">=</span> <span class="n">pub_type</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> + + <span class="n">url</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//h3[1]//a/@href'</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span> + <span class="n">content</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//div[@class="gs_rs"]'</span><span class="p">))</span> + <span class="n">authors</span><span class="p">,</span> <span class="n">journal</span><span class="p">,</span> <span class="n">publisher</span><span class="p">,</span> <span class="n">publishedDate</span> <span class="o">=</span> <span class="n">parse_gs_a</span><span class="p">(</span> + <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//div[@class="gs_a"]'</span><span class="p">))</span> + <span class="p">)</span> + <span class="k">if</span> <span class="n">publisher</span> <span class="ow">in</span> <span class="n">url</span><span class="p">:</span> + <span class="n">publisher</span> <span class="o">=</span> <span class="kc">None</span> + + <span class="c1"># cited by</span> + <span class="n">comments</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//div[@class="gs_fl"]/a[starts-with(@href,"/scholar?cites=")]'</span><span class="p">))</span> + + <span class="c1"># link to the html or pdf document</span> + <span class="n">html_url</span> <span class="o">=</span> <span class="kc">None</span> + <span class="n">pdf_url</span> <span class="o">=</span> <span class="kc">None</span> + <span class="n">doc_url</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//div[@class="gs_or_ggsm"]/a/@href'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span> + <span class="n">doc_type</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//span[@class="gs_ctg2"]'</span><span class="p">))</span> + <span class="k">if</span> <span class="n">doc_type</span> <span class="o">==</span> <span class="s2">"[PDF]"</span><span class="p">:</span> + <span class="n">pdf_url</span> <span class="o">=</span> <span class="n">doc_url</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">html_url</span> <span class="o">=</span> <span class="n">doc_url</span> + + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'template'</span><span class="p">:</span> <span class="s1">'paper.html'</span><span class="p">,</span> + <span class="s1">'type'</span><span class="p">:</span> <span class="n">pub_type</span><span class="p">,</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span> + <span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span> + <span class="s1">'authors'</span><span class="p">:</span> <span class="n">authors</span><span class="p">,</span> + <span class="s1">'publisher'</span><span class="p">:</span> <span class="n">publisher</span><span class="p">,</span> + <span class="s1">'journal'</span><span class="p">:</span> <span class="n">journal</span><span class="p">,</span> + <span class="s1">'publishedDate'</span><span class="p">:</span> <span class="n">publishedDate</span><span class="p">,</span> + <span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">,</span> + <span class="s1">'comments'</span><span class="p">:</span> <span class="n">comments</span><span class="p">,</span> + <span class="s1">'html_url'</span><span class="p">:</span> <span class="n">html_url</span><span class="p">,</span> + <span class="s1">'pdf_url'</span><span class="p">:</span> <span class="n">pdf_url</span><span class="p">,</span> + <span class="p">}</span> + <span class="p">)</span> + + <span class="c1"># parse suggestion</span> + <span class="k">for</span> <span class="n">suggestion</span> <span class="ow">in</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//div[contains(@class, "gs_qsuggest_wrap")]//li//a'</span><span class="p">):</span> + <span class="c1"># append suggestion</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'suggestion'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">suggestion</span><span class="p">)})</span> + + <span class="k">for</span> <span class="n">correction</span> <span class="ow">in</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//div[@class="gs_r gs_pda"]/a'</span><span class="p">):</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'correction'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">correction</span><span class="p">)})</span> + + <span class="k">return</span> <span class="n">results</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/google_videos.html b/_modules/searx/engines/google_videos.html new file mode 100644 index 000000000..3c0874847 --- /dev/null +++ b/_modules/searx/engines/google_videos.html @@ -0,0 +1,253 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.google_videos — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.google_videos</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.google_videos</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""This is the implementation of the Google Videos engine.</span> + +<span class="sd">.. admonition:: Content-Security-Policy (CSP)</span> + +<span class="sd"> This engine needs to allow images from the `data URLs`_ (prefixed with the</span> +<span class="sd"> ``data:`` scheme)::</span> + +<span class="sd"> Header set Content-Security-Policy "img-src 'self' data: ;"</span> + +<span class="sd">.. _data URLs:</span> +<span class="sd"> https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs</span> + +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span> + +<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span> +<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span> + +<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="p">(</span> + <span class="n">eval_xpath</span><span class="p">,</span> + <span class="n">eval_xpath_list</span><span class="p">,</span> + <span class="n">eval_xpath_getindex</span><span class="p">,</span> + <span class="n">extract_text</span><span class="p">,</span> +<span class="p">)</span> + +<span class="kn">from</span> <span class="nn">searx.engines.google</span> <span class="kn">import</span> <span class="n">fetch_traits</span> <span class="c1"># pylint: disable=unused-import</span> +<span class="kn">from</span> <span class="nn">searx.engines.google</span> <span class="kn">import</span> <span class="p">(</span> + <span class="n">get_google_info</span><span class="p">,</span> + <span class="n">time_range_dict</span><span class="p">,</span> + <span class="n">filter_mapping</span><span class="p">,</span> + <span class="n">suggestion_xpath</span><span class="p">,</span> + <span class="n">detect_google_sorry</span><span class="p">,</span> +<span class="p">)</span> +<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span> +<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">get_embeded_stream_url</span> + +<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> + <span class="kn">import</span> <span class="nn">logging</span> + + <span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span> + +<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span> + +<span class="c1"># about</span> +<span class="n">about</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://www.google.com'</span><span class="p">,</span> + <span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q219885'</span><span class="p">,</span> + <span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://developers.google.com/custom-search'</span><span class="p">,</span> + <span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"results"</span><span class="p">:</span> <span class="s1">'HTML'</span><span class="p">,</span> +<span class="p">}</span> + +<span class="c1"># engine dependent config</span> + +<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'videos'</span><span class="p">,</span> <span class="s1">'web'</span><span class="p">]</span> +<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">max_page</span> <span class="o">=</span> <span class="mi">50</span> +<span class="n">language_support</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">True</span> + + +<div class="viewcode-block" id="request"> +<a class="viewcode-back" href="../../../dev/engines/online/google.html#searx.engines.google_videos.request">[docs]</a> +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Google-Video search request"""</span> + + <span class="n">google_info</span> <span class="o">=</span> <span class="n">get_google_info</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">traits</span><span class="p">)</span> + + <span class="n">query_url</span> <span class="o">=</span> <span class="p">(</span> + <span class="s1">'https://'</span> + <span class="o">+</span> <span class="n">google_info</span><span class="p">[</span><span class="s1">'subdomain'</span><span class="p">]</span> + <span class="o">+</span> <span class="s1">'/search'</span> + <span class="o">+</span> <span class="s2">"?"</span> + <span class="o">+</span> <span class="n">urlencode</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span> + <span class="s1">'tbm'</span><span class="p">:</span> <span class="s2">"vid"</span><span class="p">,</span> + <span class="s1">'start'</span><span class="p">:</span> <span class="mi">10</span> <span class="o">*</span> <span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">],</span> + <span class="o">**</span><span class="n">google_info</span><span class="p">[</span><span class="s1">'params'</span><span class="p">],</span> + <span class="s1">'asearch'</span><span class="p">:</span> <span class="s1">'arc'</span><span class="p">,</span> + <span class="s1">'async'</span><span class="p">:</span> <span class="s1">'use_ac:true,_fmt:html'</span><span class="p">,</span> + <span class="p">}</span> + <span class="p">)</span> + <span class="p">)</span> + + <span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]</span> <span class="ow">in</span> <span class="n">time_range_dict</span><span class="p">:</span> + <span class="n">query_url</span> <span class="o">+=</span> <span class="s1">'&'</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">({</span><span class="s1">'tbs'</span><span class="p">:</span> <span class="s1">'qdr:'</span> <span class="o">+</span> <span class="n">time_range_dict</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]]})</span> + <span class="k">if</span> <span class="s1">'safesearch'</span> <span class="ow">in</span> <span class="n">params</span><span class="p">:</span> + <span class="n">query_url</span> <span class="o">+=</span> <span class="s1">'&'</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">({</span><span class="s1">'safe'</span><span class="p">:</span> <span class="n">filter_mapping</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">]]})</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">query_url</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">]</span> <span class="o">=</span> <span class="n">google_info</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">]</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">]</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">google_info</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">])</span> + <span class="k">return</span> <span class="n">params</span></div> + + + +<div class="viewcode-block" id="response"> +<a class="viewcode-back" href="../../../dev/engines/online/google.html#searx.engines.google_videos.response">[docs]</a> +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Get response from google's search request"""</span> + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="n">detect_google_sorry</span><span class="p">(</span><span class="n">resp</span><span class="p">)</span> + + <span class="c1"># convert the text to dom</span> + <span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> + + <span class="c1"># parse results</span> + <span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//div[contains(@class, "g ")]'</span><span class="p">):</span> + + <span class="n">thumbnail</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//img/@src'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> + <span class="k">if</span> <span class="n">thumbnail</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">continue</span> + + <span class="n">title</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//a/h3[1]'</span><span class="p">,</span> <span class="mi">0</span><span class="p">))</span> + <span class="n">url</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//a/h3[1]/../@href'</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span> + + <span class="n">c_node</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//div[contains(@class, "ITZIwc")]'</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span> + <span class="n">content</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">c_node</span><span class="p">)</span> + <span class="n">pub_info</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//div[contains(@class, "gqF9jc")]'</span><span class="p">))</span> + + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span> + <span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span> + <span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">,</span> + <span class="s1">'author'</span><span class="p">:</span> <span class="n">pub_info</span><span class="p">,</span> + <span class="s1">'thumbnail'</span><span class="p">:</span> <span class="n">thumbnail</span><span class="p">,</span> + <span class="s1">'iframe_src'</span><span class="p">:</span> <span class="n">get_embeded_stream_url</span><span class="p">(</span><span class="n">url</span><span class="p">),</span> + <span class="s1">'template'</span><span class="p">:</span> <span class="s1">'videos.html'</span><span class="p">,</span> + <span class="p">}</span> + <span class="p">)</span> + + <span class="c1"># parse suggestion</span> + <span class="k">for</span> <span class="n">suggestion</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="n">suggestion_xpath</span><span class="p">):</span> + <span class="c1"># append suggestion</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'suggestion'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">suggestion</span><span class="p">)})</span> + + <span class="k">return</span> <span class="n">results</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/mrs.html b/_modules/searx/engines/mrs.html new file mode 100644 index 000000000..43d015b51 --- /dev/null +++ b/_modules/searx/engines/mrs.html @@ -0,0 +1,181 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.mrs — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.mrs</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.mrs</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""Matrix Rooms Search - a fully-featured, standalone, matrix rooms search service.</span> + +<span class="sd">Configuration</span> +<span class="sd">=============</span> + +<span class="sd">The engine has the following mandatory settings:</span> + +<span class="sd">- :py:obj:`base_url`</span> + +<span class="sd">.. code:: yaml</span> + +<span class="sd"> - name: MRS</span> +<span class="sd"> engine: mrs</span> +<span class="sd"> base_url: https://mrs-host</span> +<span class="sd"> ...</span> + +<span class="sd">Implementation</span> +<span class="sd">==============</span> +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">quote_plus</span> + +<span class="n">about</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://matrixrooms.info'</span><span class="p">,</span> + <span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> + <span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://gitlab.com/etke.cc/mrs/api/-/blob/main/openapi.yml?ref_type=heads'</span><span class="p">,</span> + <span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> + <span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"results"</span><span class="p">:</span> <span class="s1">'JSON'</span><span class="p">,</span> +<span class="p">}</span> +<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'social media'</span><span class="p">]</span> + +<span class="n">base_url</span> <span class="o">=</span> <span class="s2">""</span> +<span class="n">matrix_url</span> <span class="o">=</span> <span class="s2">"https://matrix.to"</span> +<span class="n">page_size</span> <span class="o">=</span> <span class="mi">20</span> + + +<div class="viewcode-block" id="init"> +<a class="viewcode-back" href="../../../dev/engines/online/mrs.html#searx.engines.mrs.init">[docs]</a> +<span class="k">def</span> <span class="nf">init</span><span class="p">(</span><span class="n">engine_settings</span><span class="p">):</span> <span class="c1"># pylint: disable=unused-argument</span> +<span class="w"> </span><span class="sd">"""The ``base_url`` must be set in the configuration, if ``base_url`` is not</span> +<span class="sd"> set, a :py:obj:`ValueError` is raised during initialization.</span> + +<span class="sd"> """</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">base_url</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'engine MRS, base_url is unset'</span><span class="p">)</span></div> + + + +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">base_url</span><span class="si">}</span><span class="s2">/search/</span><span class="si">{</span><span class="n">quote_plus</span><span class="p">(</span><span class="n">query</span><span class="p">)</span><span class="si">}</span><span class="s2">/</span><span class="si">{</span><span class="n">page_size</span><span class="si">}</span><span class="s2">/</span><span class="si">{</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">*</span><span class="n">page_size</span><span class="si">}</span><span class="s2">"</span> + <span class="k">return</span> <span class="n">params</span> + + +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">resp</span><span class="o">.</span><span class="n">json</span><span class="p">():</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="n">matrix_url</span> <span class="o">+</span> <span class="s1">'/#/'</span> <span class="o">+</span> <span class="n">result</span><span class="p">[</span><span class="s1">'alias'</span><span class="p">],</span> + <span class="s1">'title'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'name'</span><span class="p">],</span> + <span class="s1">'content'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'topic'</span><span class="p">]</span> + <span class="o">+</span> <span class="sa">f</span><span class="s2">" // </span><span class="si">{</span><span class="n">result</span><span class="p">[</span><span class="s1">'members'</span><span class="p">]</span><span class="si">}</span><span class="s2"> members"</span> + <span class="o">+</span> <span class="sa">f</span><span class="s2">" // </span><span class="si">{</span><span class="n">result</span><span class="p">[</span><span class="s1">'alias'</span><span class="p">]</span><span class="si">}</span><span class="s2">"</span> + <span class="o">+</span> <span class="sa">f</span><span class="s2">" // </span><span class="si">{</span><span class="n">result</span><span class="p">[</span><span class="s1">'server'</span><span class="p">]</span><span class="si">}</span><span class="s2">"</span><span class="p">,</span> + <span class="s1">'thumbnail'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'avatar_url'</span><span class="p">],</span> + <span class="p">}</span> + <span class="p">)</span> + + <span class="k">return</span> <span class="n">results</span> +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/mullvad_leta.html b/_modules/searx/engines/mullvad_leta.html new file mode 100644 index 000000000..dcf108481 --- /dev/null +++ b/_modules/searx/engines/mullvad_leta.html @@ -0,0 +1,327 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.mullvad_leta — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.mullvad_leta</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.mullvad_leta</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> + +<span class="sd">"""This is the implementation of the Mullvad-Leta meta-search engine.</span> + +<span class="sd">This engine **REQUIRES** that searxng operate within a Mullvad VPN</span> + +<span class="sd">If using docker, consider using gluetun for easily connecting to the Mullvad</span> + +<span class="sd">- https://github.com/qdm12/gluetun</span> + +<span class="sd">Otherwise, follow instructions provided by Mullvad for enabling the VPN on Linux</span> + +<span class="sd">- https://mullvad.net/en/help/install-mullvad-app-linux</span> + +<span class="sd">.. hint::</span> + +<span class="sd"> The :py:obj:`EngineTraits` is empty by default. Maintainers have to run</span> +<span class="sd"> ``make data.traits`` (in the Mullvad VPN / :py:obj:`fetch_traits`) and rebase</span> +<span class="sd"> the modified JSON file ``searx/data/engine_traits.json`` on every single</span> +<span class="sd"> update of SearXNG!</span> +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">annotations</span> + +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span> +<span class="kn">from</span> <span class="nn">httpx</span> <span class="kn">import</span> <span class="n">Response</span> +<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span> +<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span> +<span class="kn">from</span> <span class="nn">searx.locales</span> <span class="kn">import</span> <span class="n">region_tag</span><span class="p">,</span> <span class="n">get_official_locales</span> +<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">eval_xpath</span><span class="p">,</span> <span class="n">extract_text</span><span class="p">,</span> <span class="n">eval_xpath_list</span> +<span class="kn">from</span> <span class="nn">searx.exceptions</span> <span class="kn">import</span> <span class="n">SearxEngineResponseException</span> + +<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> + <span class="kn">import</span> <span class="nn">logging</span> + + <span class="n">logger</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">()</span> + +<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span> + +<span class="n">use_cache</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span> <span class="c1"># non-cache use only has 100 searches per day!</span> + +<span class="n">leta_engine</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">'google'</span> + +<span class="n">search_url</span> <span class="o">=</span> <span class="s2">"https://leta.mullvad.net"</span> + +<span class="c1"># about</span> +<span class="n">about</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"website"</span><span class="p">:</span> <span class="n">search_url</span><span class="p">,</span> + <span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q47008412'</span><span class="p">,</span> <span class="c1"># the Mullvad id - not leta, but related</span> + <span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://leta.mullvad.net/faq'</span><span class="p">,</span> + <span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"results"</span><span class="p">:</span> <span class="s1">'HTML'</span><span class="p">,</span> +<span class="p">}</span> + +<span class="c1"># engine dependent config</span> +<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'general'</span><span class="p">,</span> <span class="s1">'web'</span><span class="p">]</span> +<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">max_page</span> <span class="o">=</span> <span class="mi">50</span> +<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">time_range_dict</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"day"</span><span class="p">:</span> <span class="s2">"d1"</span><span class="p">,</span> + <span class="s2">"week"</span><span class="p">:</span> <span class="s2">"w1"</span><span class="p">,</span> + <span class="s2">"month"</span><span class="p">:</span> <span class="s2">"m1"</span><span class="p">,</span> + <span class="s2">"year"</span><span class="p">:</span> <span class="s2">"y1"</span><span class="p">,</span> +<span class="p">}</span> + +<span class="n">available_leta_engines</span> <span class="o">=</span> <span class="p">[</span> + <span class="s1">'google'</span><span class="p">,</span> <span class="c1"># first will be default if provided engine is invalid</span> + <span class="s1">'brave'</span><span class="p">,</span> +<span class="p">]</span> + + +<div class="viewcode-block" id="is_vpn_connected"> +<a class="viewcode-back" href="../../../dev/engines/online/mullvad_leta.html#searx.engines.mullvad_leta.is_vpn_connected">[docs]</a> +<span class="k">def</span> <span class="nf">is_vpn_connected</span><span class="p">(</span><span class="n">dom</span><span class="p">:</span> <span class="n">html</span><span class="o">.</span><span class="n">HtmlElement</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Returns true if the VPN is connected, False otherwise"""</span> + <span class="n">connected_text</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//main/div/p[1]'</span><span class="p">))</span> + <span class="k">return</span> <span class="n">connected_text</span> <span class="o">!=</span> <span class="s1">'You are not connected to Mullvad VPN.'</span></div> + + + +<div class="viewcode-block" id="assign_headers"> +<a class="viewcode-back" href="../../../dev/engines/online/mullvad_leta.html#searx.engines.mullvad_leta.assign_headers">[docs]</a> +<span class="k">def</span> <span class="nf">assign_headers</span><span class="p">(</span><span class="n">headers</span><span class="p">:</span> <span class="nb">dict</span><span class="p">)</span> <span class="o">-></span> <span class="nb">dict</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Assigns the headers to make a request to Mullvad Leta"""</span> + <span class="n">headers</span><span class="p">[</span><span class="s1">'Accept'</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"</span> + <span class="n">headers</span><span class="p">[</span><span class="s1">'Content-Type'</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"application/x-www-form-urlencoded"</span> + <span class="n">headers</span><span class="p">[</span><span class="s1">'Host'</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"leta.mullvad.net"</span> + <span class="n">headers</span><span class="p">[</span><span class="s1">'Origin'</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"https://leta.mullvad.net"</span> + <span class="k">return</span> <span class="n">headers</span></div> + + + +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">params</span><span class="p">:</span> <span class="nb">dict</span><span class="p">):</span> + <span class="n">country</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_region</span><span class="p">(</span><span class="n">params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'searxng_locale'</span><span class="p">,</span> <span class="s1">'all'</span><span class="p">),</span> <span class="n">traits</span><span class="o">.</span><span class="n">all_locale</span><span class="p">)</span> <span class="c1"># type: ignore</span> + + <span class="n">result_engine</span> <span class="o">=</span> <span class="n">leta_engine</span> + <span class="k">if</span> <span class="n">leta_engine</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">available_leta_engines</span><span class="p">:</span> + <span class="n">result_engine</span> <span class="o">=</span> <span class="n">available_leta_engines</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> + <span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span> + <span class="s1">'Configured engine "</span><span class="si">%s</span><span class="s1">" not one of the available engines </span><span class="si">%s</span><span class="s1">, defaulting to "</span><span class="si">%s</span><span class="s1">"'</span><span class="p">,</span> + <span class="n">leta_engine</span><span class="p">,</span> + <span class="n">available_leta_engines</span><span class="p">,</span> + <span class="n">result_engine</span><span class="p">,</span> + <span class="p">)</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">search_url</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'method'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'POST'</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"q"</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span> + <span class="s2">"gl"</span><span class="p">:</span> <span class="n">country</span> <span class="k">if</span> <span class="n">country</span> <span class="ow">is</span> <span class="nb">str</span> <span class="k">else</span> <span class="s1">''</span><span class="p">,</span> + <span class="s1">'engine'</span><span class="p">:</span> <span class="n">result_engine</span><span class="p">,</span> + <span class="p">}</span> + <span class="c1"># pylint: disable=undefined-variable</span> + <span class="k">if</span> <span class="n">use_cache</span><span class="p">:</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'oc'</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"on"</span> + <span class="c1"># pylint: enable=undefined-variable</span> + + <span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]</span> <span class="ow">in</span> <span class="n">time_range_dict</span><span class="p">:</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'dateRestrict'</span><span class="p">]</span> <span class="o">=</span> <span class="n">time_range_dict</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]]</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'dateRestrict'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">''</span> + + <span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span> + <span class="c1"># Page 1 is n/a, Page 2 is 11, page 3 is 21, ...</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'start'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">''</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="nb">str</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span><span class="p">),</span> <span class="s2">"1"</span><span class="p">])</span> + + <span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> + + <span class="n">assign_headers</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">])</span> + <span class="k">return</span> <span class="n">params</span> + + +<span class="k">def</span> <span class="nf">extract_result</span><span class="p">(</span><span class="n">dom_result</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="n">html</span><span class="o">.</span><span class="n">HtmlElement</span><span class="p">]):</span> + <span class="c1"># Infoboxes sometimes appear in the beginning and will have a length of 0</span> + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">dom_result</span><span class="p">)</span> <span class="o">==</span> <span class="mi">3</span><span class="p">:</span> + <span class="p">[</span><span class="n">a_elem</span><span class="p">,</span> <span class="n">h3_elem</span><span class="p">,</span> <span class="n">p_elem</span><span class="p">]</span> <span class="o">=</span> <span class="n">dom_result</span> + <span class="k">elif</span> <span class="nb">len</span><span class="p">(</span><span class="n">dom_result</span><span class="p">)</span> <span class="o">==</span> <span class="mi">4</span><span class="p">:</span> + <span class="p">[</span><span class="n">_</span><span class="p">,</span> <span class="n">a_elem</span><span class="p">,</span> <span class="n">h3_elem</span><span class="p">,</span> <span class="n">p_elem</span><span class="p">]</span> <span class="o">=</span> <span class="n">dom_result</span> + <span class="k">else</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">None</span> + + <span class="k">return</span> <span class="p">{</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">a_elem</span><span class="o">.</span><span class="n">text</span><span class="p">),</span> + <span class="s1">'title'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">h3_elem</span><span class="p">),</span> + <span class="s1">'content'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">p_elem</span><span class="p">),</span> + <span class="p">}</span> + + +<span class="k">def</span> <span class="nf">extract_results</span><span class="p">(</span><span class="n">search_results</span><span class="p">:</span> <span class="n">html</span><span class="o">.</span><span class="n">HtmlElement</span><span class="p">):</span> + <span class="k">for</span> <span class="n">search_result</span> <span class="ow">in</span> <span class="n">search_results</span><span class="p">:</span> + <span class="n">dom_result</span> <span class="o">=</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">search_result</span><span class="p">,</span> <span class="s1">'div/div/*'</span><span class="p">)</span> + <span class="n">result</span> <span class="o">=</span> <span class="n">extract_result</span><span class="p">(</span><span class="n">dom_result</span><span class="p">)</span> + <span class="k">if</span> <span class="n">result</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">yield</span> <span class="n">result</span> + + +<div class="viewcode-block" id="response"> +<a class="viewcode-back" href="../../../dev/engines/online/mullvad_leta.html#searx.engines.mullvad_leta.response">[docs]</a> +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">:</span> <span class="n">Response</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Checks if connected to Mullvad VPN, then extracts the search results from</span> +<span class="sd"> the DOM resp: requests response object"""</span> + + <span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">is_vpn_connected</span><span class="p">(</span><span class="n">dom</span><span class="p">):</span> + <span class="k">raise</span> <span class="n">SearxEngineResponseException</span><span class="p">(</span><span class="s1">'Not connected to Mullvad VPN'</span><span class="p">)</span> + <span class="n">search_results</span> <span class="o">=</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">dom</span><span class="o">.</span><span class="n">body</span><span class="p">,</span> <span class="s1">'//main/div[2]/div'</span><span class="p">)</span> + <span class="k">return</span> <span class="nb">list</span><span class="p">(</span><span class="n">extract_results</span><span class="p">(</span><span class="n">search_results</span><span class="p">))</span></div> + + + +<div class="viewcode-block" id="fetch_traits"> +<a class="viewcode-back" href="../../../dev/engines/online/mullvad_leta.html#searx.engines.mullvad_leta.fetch_traits">[docs]</a> +<span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Fetch languages and regions from Mullvad-Leta</span> + +<span class="sd"> .. warning::</span> + +<span class="sd"> Fetching the engine traits also requires a Mullvad VPN connection. If</span> +<span class="sd"> not connected, then an error message will print and no traits will be</span> +<span class="sd"> updated.</span> +<span class="sd"> """</span> + <span class="c1"># pylint: disable=import-outside-toplevel</span> + <span class="c1"># see https://github.com/searxng/searxng/issues/762</span> + <span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">post</span> <span class="k">as</span> <span class="n">http_post</span> + + <span class="c1"># pylint: enable=import-outside-toplevel</span> + <span class="n">resp</span> <span class="o">=</span> <span class="n">http_post</span><span class="p">(</span><span class="n">search_url</span><span class="p">,</span> <span class="n">headers</span><span class="o">=</span><span class="n">assign_headers</span><span class="p">({}))</span> + <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">resp</span><span class="p">,</span> <span class="n">Response</span><span class="p">):</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: failed to get response from mullvad-leta. Are you connected to the VPN?"</span><span class="p">)</span> + <span class="k">return</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: response from mullvad-leta is not OK. Are you connected to the VPN?"</span><span class="p">)</span> + <span class="k">return</span> + <span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">is_vpn_connected</span><span class="p">(</span><span class="n">dom</span><span class="p">):</span> + <span class="nb">print</span><span class="p">(</span><span class="s1">'ERROR: Not connected to Mullvad VPN'</span><span class="p">)</span> + <span class="k">return</span> + <span class="c1"># supported region codes</span> + <span class="n">options</span> <span class="o">=</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="o">.</span><span class="n">body</span><span class="p">,</span> <span class="s1">'//main/div/form/div[2]/div/select[1]/option'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">options</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="nb">len</span><span class="p">(</span><span class="n">options</span><span class="p">)</span> <span class="o"><=</span> <span class="mi">0</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s1">'ERROR: could not find any results. Are you connected to the VPN?'</span><span class="p">)</span> + <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">options</span><span class="p">:</span> + <span class="n">eng_country</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"value"</span><span class="p">)</span> + + <span class="n">sxng_locales</span> <span class="o">=</span> <span class="n">get_official_locales</span><span class="p">(</span><span class="n">eng_country</span><span class="p">,</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">keys</span><span class="p">(),</span> <span class="n">regional</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">sxng_locales</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span> + <span class="s2">"ERROR: can't map from Mullvad-Leta country </span><span class="si">%s</span><span class="s2"> (</span><span class="si">%s</span><span class="s2">) to a babel region."</span> + <span class="o">%</span> <span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'data-name'</span><span class="p">),</span> <span class="n">eng_country</span><span class="p">)</span> + <span class="p">)</span> + <span class="k">continue</span> + + <span class="k">for</span> <span class="n">sxng_locale</span> <span class="ow">in</span> <span class="n">sxng_locales</span><span class="p">:</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="p">[</span><span class="n">region_tag</span><span class="p">(</span><span class="n">sxng_locale</span><span class="p">)]</span> <span class="o">=</span> <span class="n">eng_country</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/odysee.html b/_modules/searx/engines/odysee.html new file mode 100644 index 000000000..6476ad007 --- /dev/null +++ b/_modules/searx/engines/odysee.html @@ -0,0 +1,255 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.odysee — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.odysee</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.odysee</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""Odysee_ is a decentralized video hosting platform.</span> + +<span class="sd">.. _Odysee: https://github.com/OdyseeTeam/odysee-frontend</span> +<span class="sd">"""</span> + +<span class="kn">import</span> <span class="nn">time</span> +<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span> +<span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span> + +<span class="kn">import</span> <span class="nn">babel</span> + +<span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">get</span> +<span class="kn">from</span> <span class="nn">searx.locales</span> <span class="kn">import</span> <span class="n">language_tag</span> +<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span> + +<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span> + +<span class="c1"># Engine metadata</span> +<span class="n">about</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"website"</span><span class="p">:</span> <span class="s2">"https://odysee.com/"</span><span class="p">,</span> + <span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s2">"Q102046570"</span><span class="p">,</span> + <span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> + <span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"results"</span><span class="p">:</span> <span class="s2">"JSON"</span><span class="p">,</span> +<span class="p">}</span> + +<span class="c1"># Engine configuration</span> +<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">results_per_page</span> <span class="o">=</span> <span class="mi">20</span> +<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'videos'</span><span class="p">]</span> + +<span class="c1"># Search URL (Note: lighthouse.lbry.com/search works too, and may be faster at times)</span> +<span class="n">base_url</span> <span class="o">=</span> <span class="s2">"https://lighthouse.odysee.tv/search"</span> + + +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> + <span class="n">time_range_dict</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"day"</span><span class="p">:</span> <span class="s2">"today"</span><span class="p">,</span> + <span class="s2">"week"</span><span class="p">:</span> <span class="s2">"thisweek"</span><span class="p">,</span> + <span class="s2">"month"</span><span class="p">:</span> <span class="s2">"thismonth"</span><span class="p">,</span> + <span class="s2">"year"</span><span class="p">:</span> <span class="s2">"thisyear"</span><span class="p">,</span> + <span class="p">}</span> + + <span class="n">start_index</span> <span class="o">=</span> <span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s2">"pageno"</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">results_per_page</span> + <span class="n">query_params</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"s"</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span> + <span class="s2">"size"</span><span class="p">:</span> <span class="n">results_per_page</span><span class="p">,</span> + <span class="s2">"from"</span><span class="p">:</span> <span class="n">start_index</span><span class="p">,</span> + <span class="s2">"include"</span><span class="p">:</span> <span class="s2">"channel,thumbnail_url,title,description,duration,release_time"</span><span class="p">,</span> + <span class="s2">"mediaType"</span><span class="p">:</span> <span class="s2">"video"</span><span class="p">,</span> + <span class="p">}</span> + + <span class="n">lang</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="kc">None</span><span class="p">)</span> + <span class="k">if</span> <span class="n">lang</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">query_params</span><span class="p">[</span><span class="s1">'language'</span><span class="p">]</span> <span class="o">=</span> <span class="n">lang</span> + + <span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]</span> <span class="ow">in</span> <span class="n">time_range_dict</span><span class="p">:</span> + <span class="n">query_params</span><span class="p">[</span><span class="s1">'time_filter'</span><span class="p">]</span> <span class="o">=</span> <span class="n">time_range_dict</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]]</span> + + <span class="n">params</span><span class="p">[</span><span class="s2">"url"</span><span class="p">]</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">base_url</span><span class="si">}</span><span class="s2">?</span><span class="si">{</span><span class="n">urlencode</span><span class="p">(</span><span class="n">query_params</span><span class="p">)</span><span class="si">}</span><span class="s2">"</span> + <span class="k">return</span> <span class="n">params</span> + + +<span class="c1"># Format the video duration</span> +<span class="k">def</span> <span class="nf">format_duration</span><span class="p">(</span><span class="n">duration</span><span class="p">):</span> + <span class="n">seconds</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">duration</span><span class="p">)</span> + <span class="n">length</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">gmtime</span><span class="p">(</span><span class="n">seconds</span><span class="p">)</span> + <span class="k">if</span> <span class="n">length</span><span class="o">.</span><span class="n">tm_hour</span><span class="p">:</span> + <span class="k">return</span> <span class="n">time</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="s2">"%H:%M:%S"</span><span class="p">,</span> <span class="n">length</span><span class="p">)</span> + <span class="k">return</span> <span class="n">time</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="s2">"%M:%S"</span><span class="p">,</span> <span class="n">length</span><span class="p">)</span> + + +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> + <span class="n">data</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">json</span><span class="p">()</span> + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">data</span><span class="p">:</span> + <span class="n">name</span> <span class="o">=</span> <span class="n">item</span><span class="p">[</span><span class="s2">"name"</span><span class="p">]</span> + <span class="n">claim_id</span> <span class="o">=</span> <span class="n">item</span><span class="p">[</span><span class="s2">"claimId"</span><span class="p">]</span> + <span class="n">title</span> <span class="o">=</span> <span class="n">item</span><span class="p">[</span><span class="s2">"title"</span><span class="p">]</span> + <span class="n">thumbnail_url</span> <span class="o">=</span> <span class="n">item</span><span class="p">[</span><span class="s2">"thumbnail_url"</span><span class="p">]</span> + <span class="n">description</span> <span class="o">=</span> <span class="n">item</span><span class="p">[</span><span class="s2">"description"</span><span class="p">]</span> <span class="ow">or</span> <span class="s2">""</span> + <span class="n">channel</span> <span class="o">=</span> <span class="n">item</span><span class="p">[</span><span class="s2">"channel"</span><span class="p">]</span> + <span class="n">release_time</span> <span class="o">=</span> <span class="n">item</span><span class="p">[</span><span class="s2">"release_time"</span><span class="p">]</span> + <span class="n">duration</span> <span class="o">=</span> <span class="n">item</span><span class="p">[</span><span class="s2">"duration"</span><span class="p">]</span> + + <span class="n">release_date</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">strptime</span><span class="p">(</span><span class="n">release_time</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">"T"</span><span class="p">)[</span><span class="mi">0</span><span class="p">],</span> <span class="s2">"%Y-%m-</span><span class="si">%d</span><span class="s2">"</span><span class="p">)</span> + <span class="n">formatted_date</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">utcfromtimestamp</span><span class="p">(</span><span class="n">release_date</span><span class="o">.</span><span class="n">timestamp</span><span class="p">())</span> + + <span class="n">url</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">"https://odysee.com/</span><span class="si">{</span><span class="n">name</span><span class="si">}</span><span class="s2">:</span><span class="si">{</span><span class="n">claim_id</span><span class="si">}</span><span class="s2">"</span> + <span class="n">iframe_url</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">"https://odysee.com/$/embed/</span><span class="si">{</span><span class="n">name</span><span class="si">}</span><span class="s2">:</span><span class="si">{</span><span class="n">claim_id</span><span class="si">}</span><span class="s2">"</span> + <span class="n">odysee_thumbnail</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">"https://thumbnails.odycdn.com/optimize/s:390:0/quality:85/plain/</span><span class="si">{</span><span class="n">thumbnail_url</span><span class="si">}</span><span class="s2">"</span> + <span class="n">formatted_duration</span> <span class="o">=</span> <span class="n">format_duration</span><span class="p">(</span><span class="n">duration</span><span class="p">)</span> + + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">{</span> + <span class="s2">"title"</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span> + <span class="s2">"url"</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span> + <span class="s2">"content"</span><span class="p">:</span> <span class="n">description</span><span class="p">,</span> + <span class="s2">"author"</span><span class="p">:</span> <span class="n">channel</span><span class="p">,</span> + <span class="s2">"publishedDate"</span><span class="p">:</span> <span class="n">formatted_date</span><span class="p">,</span> + <span class="s2">"length"</span><span class="p">:</span> <span class="n">formatted_duration</span><span class="p">,</span> + <span class="s2">"thumbnail"</span><span class="p">:</span> <span class="n">odysee_thumbnail</span><span class="p">,</span> + <span class="s2">"iframe_src"</span><span class="p">:</span> <span class="n">iframe_url</span><span class="p">,</span> + <span class="s2">"template"</span><span class="p">:</span> <span class="s2">"videos.html"</span><span class="p">,</span> + <span class="p">}</span> + <span class="p">)</span> + + <span class="k">return</span> <span class="n">results</span> + + +<div class="viewcode-block" id="fetch_traits"> +<a class="viewcode-back" href="../../../dev/engines/online/odysee.html#searx.engines.odysee.fetch_traits">[docs]</a> +<span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""</span> +<span class="sd"> Fetch languages from Odysee's source code.</span> +<span class="sd"> """</span> + + <span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span> + <span class="s1">'https://raw.githubusercontent.com/OdyseeTeam/odysee-frontend/master/ui/constants/supported_browser_languages.js'</span><span class="p">,</span> <span class="c1"># pylint: disable=line-too-long</span> + <span class="n">timeout</span><span class="o">=</span><span class="mi">60</span><span class="p">,</span> + <span class="p">)</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: can't determine languages from Odysee"</span><span class="p">)</span> + <span class="k">return</span> + + <span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">"</span><span class="se">\n</span><span class="s2">"</span><span class="p">)[</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">4</span><span class="p">]:</span> + <span class="n">lang_tag</span> <span class="o">=</span> <span class="n">line</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">": "</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">"'"</span><span class="p">,</span> <span class="s2">""</span><span class="p">)</span> + + <span class="k">try</span><span class="p">:</span> + <span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">language_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">lang_tag</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s2">"-"</span><span class="p">))</span> + <span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: </span><span class="si">%s</span><span class="s2"> is unknown by babel"</span> <span class="o">%</span> <span class="n">lang_tag</span><span class="p">)</span> + <span class="k">continue</span> + + <span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_tag</span><span class="p">)</span> + <span class="k">if</span> <span class="n">conflict</span><span class="p">:</span> + <span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">lang_tag</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_tag</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">lang_tag</span><span class="p">))</span> + <span class="k">continue</span> + + <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">lang_tag</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/peertube.html b/_modules/searx/engines/peertube.html new file mode 100644 index 000000000..e45e5ea0a --- /dev/null +++ b/_modules/searx/engines/peertube.html @@ -0,0 +1,303 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.peertube — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.peertube</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.peertube</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""Peertube and :py:obj:`SepiaSearch <searx.engines.sepiasearch>` do share</span> +<span class="sd">(more or less) the same REST API and the schema of the JSON result is identical.</span> + +<span class="sd">"""</span> + +<span class="kn">import</span> <span class="nn">re</span> +<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span> +<span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span> +<span class="kn">from</span> <span class="nn">dateutil.parser</span> <span class="kn">import</span> <span class="n">parse</span> +<span class="kn">from</span> <span class="nn">dateutil.relativedelta</span> <span class="kn">import</span> <span class="n">relativedelta</span> + +<span class="kn">import</span> <span class="nn">babel</span> + +<span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">get</span> <span class="c1"># see https://github.com/searxng/searxng/issues/762</span> +<span class="kn">from</span> <span class="nn">searx.locales</span> <span class="kn">import</span> <span class="n">language_tag</span> +<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">html_to_text</span><span class="p">,</span> <span class="n">humanize_number</span> +<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span> + +<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span> + +<span class="n">about</span> <span class="o">=</span> <span class="p">{</span> + <span class="c1"># pylint: disable=line-too-long</span> + <span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://joinpeertube.org'</span><span class="p">,</span> + <span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q50938515'</span><span class="p">,</span> + <span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos'</span><span class="p">,</span> + <span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> + <span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"results"</span><span class="p">:</span> <span class="s1">'JSON'</span><span class="p">,</span> +<span class="p">}</span> + +<span class="c1"># engine dependent config</span> +<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s2">"videos"</span><span class="p">]</span> +<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">base_url</span> <span class="o">=</span> <span class="s2">"https://peer.tube"</span> +<span class="sd">"""Base URL of the Peertube instance. A list of instances is available at:</span> + +<span class="sd">- https://instances.joinpeertube.org/instances</span> +<span class="sd">"""</span> + +<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">time_range_table</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'day'</span><span class="p">:</span> <span class="n">relativedelta</span><span class="p">(),</span> + <span class="s1">'week'</span><span class="p">:</span> <span class="n">relativedelta</span><span class="p">(</span><span class="n">weeks</span><span class="o">=-</span><span class="mi">1</span><span class="p">),</span> + <span class="s1">'month'</span><span class="p">:</span> <span class="n">relativedelta</span><span class="p">(</span><span class="n">months</span><span class="o">=-</span><span class="mi">1</span><span class="p">),</span> + <span class="s1">'year'</span><span class="p">:</span> <span class="n">relativedelta</span><span class="p">(</span><span class="n">years</span><span class="o">=-</span><span class="mi">1</span><span class="p">),</span> +<span class="p">}</span> + +<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">safesearch_table</span> <span class="o">=</span> <span class="p">{</span><span class="mi">0</span><span class="p">:</span> <span class="s1">'both'</span><span class="p">,</span> <span class="mi">1</span><span class="p">:</span> <span class="s1">'false'</span><span class="p">,</span> <span class="mi">2</span><span class="p">:</span> <span class="s1">'false'</span><span class="p">}</span> + + +<span class="k">def</span> <span class="nf">minute_to_hm</span><span class="p">(</span><span class="n">minute</span><span class="p">):</span> + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">minute</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span> + <span class="k">return</span> <span class="s2">"</span><span class="si">%d</span><span class="s2">:</span><span class="si">%02d</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="nb">divmod</span><span class="p">(</span><span class="n">minute</span><span class="p">,</span> <span class="mi">60</span><span class="p">))</span> + <span class="k">return</span> <span class="kc">None</span> + + +<div class="viewcode-block" id="request"> +<a class="viewcode-back" href="../../../dev/engines/online/peertube.html#searx.engines.peertube.request">[docs]</a> +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Assemble request for the Peertube API"""</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">query</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">False</span> + + <span class="c1"># eng_region = traits.get_region(params['searxng_locale'], 'en_US')</span> + <span class="n">eng_lang</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="kc">None</span><span class="p">)</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="p">(</span> + <span class="n">base_url</span><span class="o">.</span><span class="n">rstrip</span><span class="p">(</span><span class="s2">"/"</span><span class="p">)</span> + <span class="o">+</span> <span class="s2">"/api/v1/search/videos?"</span> + <span class="o">+</span> <span class="n">urlencode</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'search'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span> + <span class="s1">'searchTarget'</span><span class="p">:</span> <span class="s1">'search-index'</span><span class="p">,</span> <span class="c1"># Vidiversum</span> + <span class="s1">'resultType'</span><span class="p">:</span> <span class="s1">'videos'</span><span class="p">,</span> + <span class="s1">'start'</span><span class="p">:</span> <span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="mi">10</span><span class="p">,</span> + <span class="s1">'count'</span><span class="p">:</span> <span class="mi">10</span><span class="p">,</span> + <span class="c1"># -createdAt: sort by date ascending / createdAt: date descending</span> + <span class="s1">'sort'</span><span class="p">:</span> <span class="s1">'-match'</span><span class="p">,</span> <span class="c1"># sort by *match descending*</span> + <span class="s1">'nsfw'</span><span class="p">:</span> <span class="n">safesearch_table</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">]],</span> + <span class="p">}</span> + <span class="p">)</span> + <span class="p">)</span> + + <span class="k">if</span> <span class="n">eng_lang</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">+=</span> <span class="s1">'&languageOneOf[]='</span> <span class="o">+</span> <span class="n">eng_lang</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">+=</span> <span class="s1">'&boostLanguages[]='</span> <span class="o">+</span> <span class="n">eng_lang</span> + + <span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]</span> <span class="ow">in</span> <span class="n">time_range_table</span><span class="p">:</span> + <span class="n">time</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span><span class="o">.</span><span class="n">date</span><span class="p">()</span> <span class="o">+</span> <span class="n">time_range_table</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]]</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">+=</span> <span class="s1">'&startDate='</span> <span class="o">+</span> <span class="n">time</span><span class="o">.</span><span class="n">isoformat</span><span class="p">()</span> + + <span class="k">return</span> <span class="n">params</span></div> + + + +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> + <span class="k">return</span> <span class="n">video_response</span><span class="p">(</span><span class="n">resp</span><span class="p">)</span> + + +<div class="viewcode-block" id="video_response"> +<a class="viewcode-back" href="../../../dev/engines/online/peertube.html#searx.engines.peertube.video_response">[docs]</a> +<span class="k">def</span> <span class="nf">video_response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Parse video response from SepiaSearch and Peertube instances."""</span> + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="n">json_data</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">json</span><span class="p">()</span> + + <span class="k">if</span> <span class="s1">'data'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">json_data</span><span class="p">:</span> + <span class="k">return</span> <span class="p">[]</span> + + <span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">json_data</span><span class="p">[</span><span class="s1">'data'</span><span class="p">]:</span> + <span class="n">metadata</span> <span class="o">=</span> <span class="p">[</span> + <span class="n">x</span> + <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="p">[</span> + <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'channel'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'displayName'</span><span class="p">),</span> + <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'channel'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'name'</span><span class="p">)</span> <span class="o">+</span> <span class="s1">'@'</span> <span class="o">+</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'channel'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'host'</span><span class="p">),</span> + <span class="s1">', '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'tags'</span><span class="p">,</span> <span class="p">[])),</span> + <span class="p">]</span> + <span class="k">if</span> <span class="n">x</span> + <span class="p">]</span> + + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'url'</span><span class="p">],</span> + <span class="s1">'title'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'name'</span><span class="p">],</span> + <span class="s1">'content'</span><span class="p">:</span> <span class="n">html_to_text</span><span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'description'</span><span class="p">)</span> <span class="ow">or</span> <span class="s1">''</span><span class="p">),</span> + <span class="s1">'author'</span><span class="p">:</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'account'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'displayName'</span><span class="p">),</span> + <span class="s1">'length'</span><span class="p">:</span> <span class="n">minute_to_hm</span><span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'duration'</span><span class="p">)),</span> + <span class="s1">'views'</span><span class="p">:</span> <span class="n">humanize_number</span><span class="p">(</span><span class="n">result</span><span class="p">[</span><span class="s1">'views'</span><span class="p">]),</span> + <span class="s1">'template'</span><span class="p">:</span> <span class="s1">'videos.html'</span><span class="p">,</span> + <span class="s1">'publishedDate'</span><span class="p">:</span> <span class="n">parse</span><span class="p">(</span><span class="n">result</span><span class="p">[</span><span class="s1">'publishedAt'</span><span class="p">]),</span> + <span class="s1">'iframe_src'</span><span class="p">:</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'embedUrl'</span><span class="p">),</span> + <span class="s1">'thumbnail'</span><span class="p">:</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'thumbnailUrl'</span><span class="p">)</span> <span class="ow">or</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'previewUrl'</span><span class="p">),</span> + <span class="s1">'metadata'</span><span class="p">:</span> <span class="s1">' | '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">metadata</span><span class="p">),</span> + <span class="p">}</span> + <span class="p">)</span> + + <span class="k">return</span> <span class="n">results</span></div> + + + +<div class="viewcode-block" id="fetch_traits"> +<a class="viewcode-back" href="../../../dev/engines/online/peertube.html#searx.engines.peertube.fetch_traits">[docs]</a> +<span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Fetch languages from peertube's search-index source code.</span> + +<span class="sd"> See videoLanguages_ in commit `8ed5c729 - Refactor and redesign client`_</span> + +<span class="sd"> .. _8ed5c729 - Refactor and redesign client:</span> +<span class="sd"> https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729</span> +<span class="sd"> .. _videoLanguages:</span> +<span class="sd"> https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729#3d8747f9a60695c367c70bb64efba8f403721fad_0_291</span> +<span class="sd"> """</span> + + <span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span> + <span class="s1">'https://framagit.org/framasoft/peertube/search-index/-/raw/master/client/src/components/Filters.vue'</span><span class="p">,</span> + <span class="c1"># the response from search-index repository is very slow</span> + <span class="n">timeout</span><span class="o">=</span><span class="mi">60</span><span class="p">,</span> + <span class="p">)</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> <span class="c1"># type: ignore</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: response from peertube is not OK."</span><span class="p">)</span> + <span class="k">return</span> + + <span class="n">js_lang</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="sa">r</span><span class="s2">"videoLanguages \(\)[^\n]+(.*?)\]"</span><span class="p">,</span> <span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">,</span> <span class="n">re</span><span class="o">.</span><span class="n">DOTALL</span><span class="p">)</span> <span class="c1"># type: ignore</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">js_lang</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: can't determine languages from peertube"</span><span class="p">)</span> + <span class="k">return</span> + + <span class="k">for</span> <span class="n">lang</span> <span class="ow">in</span> <span class="n">re</span><span class="o">.</span><span class="n">finditer</span><span class="p">(</span><span class="sa">r</span><span class="s2">"\{ id: '([a-z]+)', label:"</span><span class="p">,</span> <span class="n">js_lang</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">1</span><span class="p">)):</span> + <span class="n">eng_tag</span> <span class="o">=</span> <span class="n">lang</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span> + <span class="k">if</span> <span class="n">eng_tag</span> <span class="o">==</span> <span class="s1">'oc'</span><span class="p">:</span> + <span class="c1"># Occitanis not known by babel, its closest relative is Catalan</span> + <span class="c1"># but 'ca' is already in the list of engine_traits.languages --></span> + <span class="c1"># 'oc' will be ignored.</span> + <span class="k">continue</span> + <span class="k">try</span><span class="p">:</span> + <span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">language_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">eng_tag</span><span class="p">))</span> + <span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: </span><span class="si">%s</span><span class="s2"> is unknown by babel"</span> <span class="o">%</span> <span class="n">eng_tag</span><span class="p">)</span> + <span class="k">continue</span> + + <span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_tag</span><span class="p">)</span> + <span class="k">if</span> <span class="n">conflict</span><span class="p">:</span> + <span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">eng_tag</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_tag</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">eng_tag</span><span class="p">))</span> + <span class="k">continue</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_tag</span> + + <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="s1">'zh_Hans'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'zh'</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="s1">'zh_Hant'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'zh'</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/qwant.html b/_modules/searx/engines/qwant.html new file mode 100644 index 000000000..9b3694391 --- /dev/null +++ b/_modules/searx/engines/qwant.html @@ -0,0 +1,471 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.qwant — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.qwant</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.qwant</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""This engine uses the Qwant API (https://api.qwant.com/v3) to implement Qwant</span> +<span class="sd">-Web, -News, -Images and -Videos. The API is undocumented but can be reverse</span> +<span class="sd">engineered by reading the network log of https://www.qwant.com/ queries.</span> + +<span class="sd">For Qwant's *web-search* two alternatives are implemented:</span> + +<span class="sd">- ``web``: uses the :py:obj:`api_url` which returns a JSON structure</span> +<span class="sd">- ``web-lite``: uses the :py:obj:`web_lite_url` which returns a HTML page</span> + + +<span class="sd">Configuration</span> +<span class="sd">=============</span> + +<span class="sd">The engine has the following additional settings:</span> + +<span class="sd">- :py:obj:`qwant_categ`</span> + +<span class="sd">This implementation is used by different qwant engines in the :ref:`settings.yml</span> +<span class="sd"><settings engine>`:</span> + +<span class="sd">.. code:: yaml</span> + +<span class="sd"> - name: qwant</span> +<span class="sd"> qwant_categ: web-lite # alternatively use 'web'</span> +<span class="sd"> ...</span> +<span class="sd"> - name: qwant news</span> +<span class="sd"> qwant_categ: news</span> +<span class="sd"> ...</span> +<span class="sd"> - name: qwant images</span> +<span class="sd"> qwant_categ: images</span> +<span class="sd"> ...</span> +<span class="sd"> - name: qwant videos</span> +<span class="sd"> qwant_categ: videos</span> +<span class="sd"> ...</span> + +<span class="sd">Implementations</span> +<span class="sd">===============</span> + +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="p">(</span> + <span class="n">datetime</span><span class="p">,</span> + <span class="n">timedelta</span><span class="p">,</span> +<span class="p">)</span> +<span class="kn">from</span> <span class="nn">json</span> <span class="kn">import</span> <span class="n">loads</span> +<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span> +<span class="kn">from</span> <span class="nn">flask_babel</span> <span class="kn">import</span> <span class="n">gettext</span> +<span class="kn">import</span> <span class="nn">babel</span> +<span class="kn">import</span> <span class="nn">lxml</span> + +<span class="kn">from</span> <span class="nn">searx.exceptions</span> <span class="kn">import</span> <span class="p">(</span> + <span class="n">SearxEngineAPIException</span><span class="p">,</span> + <span class="n">SearxEngineTooManyRequestsException</span><span class="p">,</span> + <span class="n">SearxEngineCaptchaException</span><span class="p">,</span> +<span class="p">)</span> +<span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">raise_for_httperror</span> +<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span> + +<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="p">(</span> + <span class="n">eval_xpath</span><span class="p">,</span> + <span class="n">eval_xpath_list</span><span class="p">,</span> + <span class="n">extract_text</span><span class="p">,</span> + <span class="n">get_embeded_stream_url</span><span class="p">,</span> +<span class="p">)</span> + +<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span> + +<span class="c1"># about</span> +<span class="n">about</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://www.qwant.com/'</span><span class="p">,</span> + <span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q14657870'</span><span class="p">,</span> + <span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> + <span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> + <span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"results"</span><span class="p">:</span> <span class="s1">'JSON'</span><span class="p">,</span> +<span class="p">}</span> + +<span class="c1"># engine dependent config</span> +<span class="n">categories</span> <span class="o">=</span> <span class="p">[]</span> +<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">max_page</span> <span class="o">=</span> <span class="mi">5</span> +<span class="sd">"""5 pages maximum (``&p=5``): Trying to do more just results in an improper</span> +<span class="sd">redirect"""</span> + +<span class="n">qwant_categ</span> <span class="o">=</span> <span class="kc">None</span> +<span class="sd">"""One of ``web-lite`` (or ``web``), ``news``, ``images`` or ``videos``"""</span> + +<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">True</span> +<span class="c1"># safe_search_map = {0: '&safesearch=0', 1: '&safesearch=1', 2: '&safesearch=2'}</span> + +<span class="c1"># fmt: off</span> +<span class="n">qwant_news_locales</span> <span class="o">=</span> <span class="p">[</span> + <span class="s1">'ca_ad'</span><span class="p">,</span> <span class="s1">'ca_es'</span><span class="p">,</span> <span class="s1">'ca_fr'</span><span class="p">,</span> <span class="s1">'co_fr'</span><span class="p">,</span> <span class="s1">'de_at'</span><span class="p">,</span> <span class="s1">'de_ch'</span><span class="p">,</span> <span class="s1">'de_de'</span><span class="p">,</span> <span class="s1">'en_au'</span><span class="p">,</span> + <span class="s1">'en_ca'</span><span class="p">,</span> <span class="s1">'en_gb'</span><span class="p">,</span> <span class="s1">'en_ie'</span><span class="p">,</span> <span class="s1">'en_my'</span><span class="p">,</span> <span class="s1">'en_nz'</span><span class="p">,</span> <span class="s1">'en_us'</span><span class="p">,</span> <span class="s1">'es_ad'</span><span class="p">,</span> <span class="s1">'es_ar'</span><span class="p">,</span> + <span class="s1">'es_cl'</span><span class="p">,</span> <span class="s1">'es_co'</span><span class="p">,</span> <span class="s1">'es_es'</span><span class="p">,</span> <span class="s1">'es_mx'</span><span class="p">,</span> <span class="s1">'es_pe'</span><span class="p">,</span> <span class="s1">'eu_es'</span><span class="p">,</span> <span class="s1">'eu_fr'</span><span class="p">,</span> <span class="s1">'fc_ca'</span><span class="p">,</span> + <span class="s1">'fr_ad'</span><span class="p">,</span> <span class="s1">'fr_be'</span><span class="p">,</span> <span class="s1">'fr_ca'</span><span class="p">,</span> <span class="s1">'fr_ch'</span><span class="p">,</span> <span class="s1">'fr_fr'</span><span class="p">,</span> <span class="s1">'it_ch'</span><span class="p">,</span> <span class="s1">'it_it'</span><span class="p">,</span> <span class="s1">'nl_be'</span><span class="p">,</span> + <span class="s1">'nl_nl'</span><span class="p">,</span> <span class="s1">'pt_ad'</span><span class="p">,</span> <span class="s1">'pt_pt'</span><span class="p">,</span> +<span class="p">]</span> +<span class="c1"># fmt: on</span> + +<span class="c1"># search-url</span> + +<span class="n">api_url</span> <span class="o">=</span> <span class="s1">'https://api.qwant.com/v3/search/'</span> +<span class="sd">"""URL of Qwant's API (JSON)"""</span> + +<span class="n">web_lite_url</span> <span class="o">=</span> <span class="s1">'https://lite.qwant.com/'</span> +<span class="sd">"""URL of Qwant-Lite (HTML)"""</span> + + +<div class="viewcode-block" id="request"> +<a class="viewcode-back" href="../../../dev/engines/online/qwant.html#searx.engines.qwant.request">[docs]</a> +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Qwant search request"""</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">query</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">None</span> + + <span class="n">q_locale</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_region</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s2">"searxng_locale"</span><span class="p">],</span> <span class="n">default</span><span class="o">=</span><span class="s1">'en_US'</span><span class="p">)</span> + + <span class="n">url</span> <span class="o">=</span> <span class="n">api_url</span> <span class="o">+</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">qwant_categ</span><span class="si">}</span><span class="s1">?'</span> + <span class="n">args</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">}</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'raise_for_httperror'</span><span class="p">]</span> <span class="o">=</span> <span class="kc">False</span> + + <span class="k">if</span> <span class="n">qwant_categ</span> <span class="o">==</span> <span class="s1">'web-lite'</span><span class="p">:</span> + + <span class="n">url</span> <span class="o">=</span> <span class="n">web_lite_url</span> <span class="o">+</span> <span class="s1">'?'</span> + <span class="n">args</span><span class="p">[</span><span class="s1">'locale'</span><span class="p">]</span> <span class="o">=</span> <span class="n">q_locale</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> + <span class="n">args</span><span class="p">[</span><span class="s1">'l'</span><span class="p">]</span> <span class="o">=</span> <span class="n">q_locale</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'_'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span> + <span class="n">args</span><span class="p">[</span><span class="s1">'s'</span><span class="p">]</span> <span class="o">=</span> <span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">]</span> + <span class="n">args</span><span class="p">[</span><span class="s1">'p'</span><span class="p">]</span> <span class="o">=</span> <span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'raise_for_httperror'</span><span class="p">]</span> <span class="o">=</span> <span class="kc">True</span> + + <span class="k">elif</span> <span class="n">qwant_categ</span> <span class="o">==</span> <span class="s1">'images'</span><span class="p">:</span> + + <span class="n">args</span><span class="p">[</span><span class="s1">'locale'</span><span class="p">]</span> <span class="o">=</span> <span class="n">q_locale</span> + <span class="n">args</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">]</span> <span class="o">=</span> <span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">]</span> + <span class="n">args</span><span class="p">[</span><span class="s1">'count'</span><span class="p">]</span> <span class="o">=</span> <span class="mi">50</span> + <span class="n">args</span><span class="p">[</span><span class="s1">'offset'</span><span class="p">]</span> <span class="o">=</span> <span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">args</span><span class="p">[</span><span class="s1">'count'</span><span class="p">]</span> + + <span class="k">else</span><span class="p">:</span> <span class="c1"># web, news, videos</span> + + <span class="n">args</span><span class="p">[</span><span class="s1">'locale'</span><span class="p">]</span> <span class="o">=</span> <span class="n">q_locale</span> + <span class="n">args</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">]</span> <span class="o">=</span> <span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">]</span> + <span class="n">args</span><span class="p">[</span><span class="s1">'count'</span><span class="p">]</span> <span class="o">=</span> <span class="mi">10</span> + <span class="n">args</span><span class="p">[</span><span class="s1">'offset'</span><span class="p">]</span> <span class="o">=</span> <span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">args</span><span class="p">[</span><span class="s1">'count'</span><span class="p">]</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">url</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">(</span><span class="n">args</span><span class="p">)</span> + + <span class="k">return</span> <span class="n">params</span></div> + + + +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> + + <span class="k">if</span> <span class="n">qwant_categ</span> <span class="o">==</span> <span class="s1">'web-lite'</span><span class="p">:</span> + <span class="k">return</span> <span class="n">parse_web_lite</span><span class="p">(</span><span class="n">resp</span><span class="p">)</span> + <span class="k">return</span> <span class="n">parse_web_api</span><span class="p">(</span><span class="n">resp</span><span class="p">)</span> + + +<div class="viewcode-block" id="parse_web_lite"> +<a class="viewcode-back" href="../../../dev/engines/online/qwant.html#searx.engines.qwant.parse_web_lite">[docs]</a> +<span class="k">def</span> <span class="nf">parse_web_lite</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Parse results from Qwant-Lite"""</span> + + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">dom</span> <span class="o">=</span> <span class="n">lxml</span><span class="o">.</span><span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> + + <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//section/article'</span><span class="p">):</span> + <span class="k">if</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s2">"./span[contains(@class, 'tooltip')]"</span><span class="p">):</span> + <span class="c1"># ignore randomly interspersed advertising adds</span> + <span class="k">continue</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s2">"./span[contains(@class, 'url partner')]"</span><span class="p">)),</span> + <span class="s1">'title'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'./h2/a'</span><span class="p">)),</span> + <span class="s1">'content'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'./p'</span><span class="p">)),</span> + <span class="p">}</span> + <span class="p">)</span> + + <span class="k">return</span> <span class="n">results</span></div> + + + +<div class="viewcode-block" id="parse_web_api"> +<a class="viewcode-back" href="../../../dev/engines/online/qwant.html#searx.engines.qwant.parse_web_api">[docs]</a> +<span class="k">def</span> <span class="nf">parse_web_api</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Parse results from Qwant's API"""</span> + <span class="c1"># pylint: disable=too-many-locals, too-many-branches, too-many-statements</span> + + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="c1"># load JSON result</span> + <span class="n">search_results</span> <span class="o">=</span> <span class="n">loads</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> + <span class="n">data</span> <span class="o">=</span> <span class="n">search_results</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'data'</span><span class="p">,</span> <span class="p">{})</span> + + <span class="c1"># check for an API error</span> + <span class="k">if</span> <span class="n">search_results</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'status'</span><span class="p">)</span> <span class="o">!=</span> <span class="s1">'success'</span><span class="p">:</span> + <span class="n">error_code</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'error_code'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">error_code</span> <span class="o">==</span> <span class="mi">24</span><span class="p">:</span> + <span class="k">raise</span> <span class="n">SearxEngineTooManyRequestsException</span><span class="p">()</span> + <span class="k">if</span> <span class="n">search_results</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"data"</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"error_data"</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"captchaUrl"</span><span class="p">)</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">raise</span> <span class="n">SearxEngineCaptchaException</span><span class="p">()</span> + <span class="n">msg</span> <span class="o">=</span> <span class="s2">","</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'message'</span><span class="p">,</span> <span class="p">[</span><span class="s1">'unknown'</span><span class="p">]))</span> + <span class="k">raise</span> <span class="n">SearxEngineAPIException</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">msg</span><span class="si">}</span><span class="s2"> (</span><span class="si">{</span><span class="n">error_code</span><span class="si">}</span><span class="s2">)"</span><span class="p">)</span> + + <span class="c1"># raise for other errors</span> + <span class="n">raise_for_httperror</span><span class="p">(</span><span class="n">resp</span><span class="p">)</span> + + <span class="k">if</span> <span class="n">qwant_categ</span> <span class="o">==</span> <span class="s1">'web'</span><span class="p">:</span> + <span class="c1"># The WEB query contains a list named 'mainline'. This list can contain</span> + <span class="c1"># different result types (e.g. mainline[0]['type'] returns type of the</span> + <span class="c1"># result items in mainline[0]['items']</span> + <span class="n">mainline</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'result'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'items'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'mainline'</span><span class="p">,</span> <span class="p">{})</span> + <span class="k">else</span><span class="p">:</span> + <span class="c1"># Queries on News, Images and Videos do not have a list named 'mainline'</span> + <span class="c1"># in the response. The result items are directly in the list</span> + <span class="c1"># result['items'].</span> + <span class="n">mainline</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'result'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'items'</span><span class="p">,</span> <span class="p">[])</span> + <span class="n">mainline</span> <span class="o">=</span> <span class="p">[</span> + <span class="p">{</span><span class="s1">'type'</span><span class="p">:</span> <span class="n">qwant_categ</span><span class="p">,</span> <span class="s1">'items'</span><span class="p">:</span> <span class="n">mainline</span><span class="p">},</span> + <span class="p">]</span> + + <span class="c1"># return empty array if there are no results</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">mainline</span><span class="p">:</span> + <span class="k">return</span> <span class="p">[]</span> + + <span class="k">for</span> <span class="n">row</span> <span class="ow">in</span> <span class="n">mainline</span><span class="p">:</span> + <span class="n">mainline_type</span> <span class="o">=</span> <span class="n">row</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'type'</span><span class="p">,</span> <span class="s1">'web'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">mainline_type</span> <span class="o">!=</span> <span class="n">qwant_categ</span><span class="p">:</span> + <span class="k">continue</span> + + <span class="k">if</span> <span class="n">mainline_type</span> <span class="o">==</span> <span class="s1">'ads'</span><span class="p">:</span> + <span class="c1"># ignore adds</span> + <span class="k">continue</span> + + <span class="n">mainline_items</span> <span class="o">=</span> <span class="n">row</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'items'</span><span class="p">,</span> <span class="p">[])</span> + <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">mainline_items</span><span class="p">:</span> + + <span class="n">title</span> <span class="o">=</span> <span class="n">item</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'title'</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> + <span class="n">res_url</span> <span class="o">=</span> <span class="n">item</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'url'</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> + + <span class="k">if</span> <span class="n">mainline_type</span> <span class="o">==</span> <span class="s1">'web'</span><span class="p">:</span> + <span class="n">content</span> <span class="o">=</span> <span class="n">item</span><span class="p">[</span><span class="s1">'desc'</span><span class="p">]</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="n">res_url</span><span class="p">,</span> + <span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">,</span> + <span class="p">}</span> + <span class="p">)</span> + + <span class="k">elif</span> <span class="n">mainline_type</span> <span class="o">==</span> <span class="s1">'news'</span><span class="p">:</span> + + <span class="n">pub_date</span> <span class="o">=</span> <span class="n">item</span><span class="p">[</span><span class="s1">'date'</span><span class="p">]</span> + <span class="k">if</span> <span class="n">pub_date</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">pub_date</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">fromtimestamp</span><span class="p">(</span><span class="n">pub_date</span><span class="p">)</span> + <span class="n">news_media</span> <span class="o">=</span> <span class="n">item</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'media'</span><span class="p">,</span> <span class="p">[])</span> + <span class="n">thumbnail</span> <span class="o">=</span> <span class="kc">None</span> + <span class="k">if</span> <span class="n">news_media</span><span class="p">:</span> + <span class="n">thumbnail</span> <span class="o">=</span> <span class="n">news_media</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'pict'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'url'</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="n">res_url</span><span class="p">,</span> + <span class="s1">'publishedDate'</span><span class="p">:</span> <span class="n">pub_date</span><span class="p">,</span> + <span class="s1">'thumbnail'</span><span class="p">:</span> <span class="n">thumbnail</span><span class="p">,</span> + <span class="p">}</span> + <span class="p">)</span> + + <span class="k">elif</span> <span class="n">mainline_type</span> <span class="o">==</span> <span class="s1">'images'</span><span class="p">:</span> + <span class="n">thumbnail</span> <span class="o">=</span> <span class="n">item</span><span class="p">[</span><span class="s1">'thumbnail'</span><span class="p">]</span> + <span class="n">img_src</span> <span class="o">=</span> <span class="n">item</span><span class="p">[</span><span class="s1">'media'</span><span class="p">]</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="n">res_url</span><span class="p">,</span> + <span class="s1">'template'</span><span class="p">:</span> <span class="s1">'images.html'</span><span class="p">,</span> + <span class="s1">'thumbnail_src'</span><span class="p">:</span> <span class="n">thumbnail</span><span class="p">,</span> + <span class="s1">'img_src'</span><span class="p">:</span> <span class="n">img_src</span><span class="p">,</span> + <span class="s1">'resolution'</span><span class="p">:</span> <span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">item</span><span class="p">[</span><span class="s1">'width'</span><span class="p">]</span><span class="si">}</span><span class="s2"> x </span><span class="si">{</span><span class="n">item</span><span class="p">[</span><span class="s1">'height'</span><span class="p">]</span><span class="si">}</span><span class="s2">"</span><span class="p">,</span> + <span class="s1">'img_format'</span><span class="p">:</span> <span class="n">item</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'thumb_type'</span><span class="p">),</span> + <span class="p">}</span> + <span class="p">)</span> + + <span class="k">elif</span> <span class="n">mainline_type</span> <span class="o">==</span> <span class="s1">'videos'</span><span class="p">:</span> + <span class="c1"># some videos do not have a description: while qwant-video</span> + <span class="c1"># returns an empty string, such video from a qwant-web query</span> + <span class="c1"># miss the 'desc' key.</span> + <span class="n">d</span><span class="p">,</span> <span class="n">s</span><span class="p">,</span> <span class="n">c</span> <span class="o">=</span> <span class="n">item</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'desc'</span><span class="p">),</span> <span class="n">item</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'source'</span><span class="p">),</span> <span class="n">item</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'channel'</span><span class="p">)</span> + <span class="n">content_parts</span> <span class="o">=</span> <span class="p">[]</span> + <span class="k">if</span> <span class="n">d</span><span class="p">:</span> + <span class="n">content_parts</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">d</span><span class="p">)</span> + <span class="k">if</span> <span class="n">s</span><span class="p">:</span> + <span class="n">content_parts</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s2">"</span><span class="si">%s</span><span class="s2">: </span><span class="si">%s</span><span class="s2"> "</span> <span class="o">%</span> <span class="p">(</span><span class="n">gettext</span><span class="p">(</span><span class="s2">"Source"</span><span class="p">),</span> <span class="n">s</span><span class="p">))</span> + <span class="k">if</span> <span class="n">c</span><span class="p">:</span> + <span class="n">content_parts</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s2">"</span><span class="si">%s</span><span class="s2">: </span><span class="si">%s</span><span class="s2"> "</span> <span class="o">%</span> <span class="p">(</span><span class="n">gettext</span><span class="p">(</span><span class="s2">"Channel"</span><span class="p">),</span> <span class="n">c</span><span class="p">))</span> + <span class="n">content</span> <span class="o">=</span> <span class="s1">' // '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">content_parts</span><span class="p">)</span> + <span class="n">length</span> <span class="o">=</span> <span class="n">item</span><span class="p">[</span><span class="s1">'duration'</span><span class="p">]</span> + <span class="k">if</span> <span class="n">length</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">length</span> <span class="o">=</span> <span class="n">timedelta</span><span class="p">(</span><span class="n">milliseconds</span><span class="o">=</span><span class="n">length</span><span class="p">)</span> + <span class="n">pub_date</span> <span class="o">=</span> <span class="n">item</span><span class="p">[</span><span class="s1">'date'</span><span class="p">]</span> + <span class="k">if</span> <span class="n">pub_date</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">pub_date</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">fromtimestamp</span><span class="p">(</span><span class="n">pub_date</span><span class="p">)</span> + <span class="n">thumbnail</span> <span class="o">=</span> <span class="n">item</span><span class="p">[</span><span class="s1">'thumbnail'</span><span class="p">]</span> + <span class="c1"># from some locations (DE and others?) the s2 link do</span> + <span class="c1"># response a 'Please wait ..' but does not deliver the thumbnail</span> + <span class="n">thumbnail</span> <span class="o">=</span> <span class="n">thumbnail</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'https://s2.qwant.com'</span><span class="p">,</span> <span class="s1">'https://s1.qwant.com'</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="n">res_url</span><span class="p">,</span> + <span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">,</span> + <span class="s1">'iframe_src'</span><span class="p">:</span> <span class="n">get_embeded_stream_url</span><span class="p">(</span><span class="n">res_url</span><span class="p">),</span> + <span class="s1">'publishedDate'</span><span class="p">:</span> <span class="n">pub_date</span><span class="p">,</span> + <span class="s1">'thumbnail'</span><span class="p">:</span> <span class="n">thumbnail</span><span class="p">,</span> + <span class="s1">'template'</span><span class="p">:</span> <span class="s1">'videos.html'</span><span class="p">,</span> + <span class="s1">'length'</span><span class="p">:</span> <span class="n">length</span><span class="p">,</span> + <span class="p">}</span> + <span class="p">)</span> + + <span class="k">return</span> <span class="n">results</span></div> + + + +<span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span> + + <span class="c1"># pylint: disable=import-outside-toplevel</span> + <span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">network</span> + <span class="kn">from</span> <span class="nn">searx.locales</span> <span class="kn">import</span> <span class="n">region_tag</span> + <span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">extr</span> + + <span class="n">resp</span> <span class="o">=</span> <span class="n">network</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">about</span><span class="p">[</span><span class="s1">'website'</span><span class="p">])</span> + <span class="n">json_string</span> <span class="o">=</span> <span class="n">extr</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">,</span> <span class="s1">'INITIAL_PROPS = '</span><span class="p">,</span> <span class="s1">'</script>'</span><span class="p">)</span> + + <span class="n">q_initial_props</span> <span class="o">=</span> <span class="n">loads</span><span class="p">(</span><span class="n">json_string</span><span class="p">)</span> + <span class="n">q_locales</span> <span class="o">=</span> <span class="n">q_initial_props</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'locales'</span><span class="p">)</span> + <span class="n">eng_tag_list</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span> + + <span class="k">for</span> <span class="n">country</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">q_locales</span><span class="o">.</span><span class="n">items</span><span class="p">():</span> + <span class="k">for</span> <span class="n">lang</span> <span class="ow">in</span> <span class="n">v</span><span class="p">[</span><span class="s1">'langs'</span><span class="p">]:</span> + <span class="n">_locale</span> <span class="o">=</span> <span class="s2">"</span><span class="si">{lang}</span><span class="s2">_</span><span class="si">{country}</span><span class="s2">"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">lang</span><span class="o">=</span><span class="n">lang</span><span class="p">,</span> <span class="n">country</span><span class="o">=</span><span class="n">country</span><span class="p">)</span> + + <span class="k">if</span> <span class="n">qwant_categ</span> <span class="o">==</span> <span class="s1">'news'</span> <span class="ow">and</span> <span class="n">_locale</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">qwant_news_locales</span><span class="p">:</span> + <span class="c1"># qwant-news does not support all locales from qwant-web:</span> + <span class="k">continue</span> + + <span class="n">eng_tag_list</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">_locale</span><span class="p">)</span> + + <span class="k">for</span> <span class="n">eng_tag</span> <span class="ow">in</span> <span class="n">eng_tag_list</span><span class="p">:</span> + <span class="k">try</span><span class="p">:</span> + <span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">region_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">eng_tag</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">'_'</span><span class="p">))</span> + <span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: can't determine babel locale of quant's locale </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="n">eng_tag</span><span class="p">)</span> + <span class="k">continue</span> + + <span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_tag</span><span class="p">)</span> + <span class="k">if</span> <span class="n">conflict</span><span class="p">:</span> + <span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">eng_tag</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_tag</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">eng_tag</span><span class="p">))</span> + <span class="k">continue</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_tag</span> +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/radio_browser.html b/_modules/searx/engines/radio_browser.html new file mode 100644 index 000000000..83c99a979 --- /dev/null +++ b/_modules/searx/engines/radio_browser.html @@ -0,0 +1,288 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.radio_browser — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.radio_browser</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.radio_browser</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""Search radio stations from RadioBrowser by `Advanced station search API`_.</span> + +<span class="sd">.. _Advanced station search API:</span> +<span class="sd"> https://de1.api.radio-browser.info/#Advanced_station_search</span> + +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span> +<span class="kn">import</span> <span class="nn">babel</span> +<span class="kn">from</span> <span class="nn">flask_babel</span> <span class="kn">import</span> <span class="n">gettext</span> + +<span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">get</span> +<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span> +<span class="kn">from</span> <span class="nn">searx.locales</span> <span class="kn">import</span> <span class="n">language_tag</span> + +<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span> + +<span class="n">about</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://www.radio-browser.info/'</span><span class="p">,</span> + <span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q111664849'</span><span class="p">,</span> + <span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://de1.api.radio-browser.info/'</span><span class="p">,</span> + <span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> + <span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"results"</span><span class="p">:</span> <span class="s1">'JSON'</span><span class="p">,</span> +<span class="p">}</span> +<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'music'</span><span class="p">,</span> <span class="s1">'radio'</span><span class="p">]</span> + +<span class="n">base_url</span> <span class="o">=</span> <span class="s2">"https://de1.api.radio-browser.info"</span> <span class="c1"># see https://api.radio-browser.info/ for all nodes</span> +<span class="n">number_of_results</span> <span class="o">=</span> <span class="mi">10</span> + +<span class="n">station_filters</span> <span class="o">=</span> <span class="p">[]</span> <span class="c1"># ['countrycode', 'language']</span> +<span class="sd">"""A list of filters to be applied to the search of radio stations. By default</span> +<span class="sd">none filters are applied. Valid filters are:</span> + +<span class="sd">``language``</span> +<span class="sd"> Filter stations by selected language. For instance the ``de`` from ``:de-AU``</span> +<span class="sd"> will be translated to `german` and used in the argument ``language=``.</span> + +<span class="sd">``countrycode``</span> +<span class="sd"> Filter stations by selected country. The 2-digit countrycode of the station</span> +<span class="sd"> comes from the region the user selected. For instance ``:de-AU`` will filter</span> +<span class="sd"> out all stations not in ``AU``.</span> + +<span class="sd">.. note::</span> + +<span class="sd"> RadioBrowser has registered a lot of languages and countrycodes unknown to</span> +<span class="sd"> :py:obj:`babel` and note that when searching for radio stations, users are</span> +<span class="sd"> more likely to search by name than by region or language.</span> + +<span class="sd">"""</span> + + +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> + <span class="n">args</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'name'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span> + <span class="s1">'order'</span><span class="p">:</span> <span class="s1">'votes'</span><span class="p">,</span> + <span class="s1">'offset'</span><span class="p">:</span> <span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">number_of_results</span><span class="p">,</span> + <span class="s1">'limit'</span><span class="p">:</span> <span class="n">number_of_results</span><span class="p">,</span> + <span class="s1">'hidebroken'</span><span class="p">:</span> <span class="s1">'true'</span><span class="p">,</span> + <span class="s1">'reverse'</span><span class="p">:</span> <span class="s1">'true'</span><span class="p">,</span> + <span class="p">}</span> + + <span class="k">if</span> <span class="s1">'language'</span> <span class="ow">in</span> <span class="n">station_filters</span><span class="p">:</span> + <span class="n">lang</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">])</span> <span class="c1"># type: ignore</span> + <span class="k">if</span> <span class="n">lang</span><span class="p">:</span> + <span class="n">args</span><span class="p">[</span><span class="s1">'language'</span><span class="p">]</span> <span class="o">=</span> <span class="n">lang</span> + + <span class="k">if</span> <span class="s1">'countrycode'</span> <span class="ow">in</span> <span class="n">station_filters</span><span class="p">:</span> + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'-'</span><span class="p">))</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span> + <span class="n">countrycode</span> <span class="o">=</span> <span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'-'</span><span class="p">)[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">upper</span><span class="p">()</span> + <span class="k">if</span> <span class="n">countrycode</span> <span class="ow">in</span> <span class="n">traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'countrycodes'</span><span class="p">]:</span> <span class="c1"># type: ignore</span> + <span class="n">args</span><span class="p">[</span><span class="s1">'countrycode'</span><span class="p">]</span> <span class="o">=</span> <span class="n">countrycode</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">base_url</span><span class="si">}</span><span class="s2">/json/stations/search?</span><span class="si">{</span><span class="n">urlencode</span><span class="p">(</span><span class="n">args</span><span class="p">)</span><span class="si">}</span><span class="s2">"</span> + <span class="k">return</span> <span class="n">params</span> + + +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="n">json_resp</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">json</span><span class="p">()</span> + + <span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">json_resp</span><span class="p">:</span> + <span class="n">url</span> <span class="o">=</span> <span class="n">result</span><span class="p">[</span><span class="s1">'homepage'</span><span class="p">]</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">url</span><span class="p">:</span> + <span class="n">url</span> <span class="o">=</span> <span class="n">result</span><span class="p">[</span><span class="s1">'url_resolved'</span><span class="p">]</span> + + <span class="n">content</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">tags</span> <span class="o">=</span> <span class="s1">', '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'tags'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">','</span><span class="p">))</span> + <span class="k">if</span> <span class="n">tags</span><span class="p">:</span> + <span class="n">content</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">tags</span><span class="p">)</span> + <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'state'</span><span class="p">,</span> <span class="s1">'country'</span><span class="p">]:</span> + <span class="n">v</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> + <span class="k">if</span> <span class="n">v</span><span class="p">:</span> + <span class="n">v</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">v</span><span class="p">)</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span> + <span class="n">content</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">v</span><span class="p">)</span> + + <span class="n">metadata</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">codec</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'codec'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">codec</span> <span class="ow">and</span> <span class="n">codec</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="o">!=</span> <span class="s1">'unknown'</span><span class="p">:</span> + <span class="n">metadata</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">codec</span><span class="si">}</span><span class="s1"> '</span> <span class="o">+</span> <span class="n">gettext</span><span class="p">(</span><span class="s1">'radio'</span><span class="p">))</span> + <span class="k">for</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span> <span class="ow">in</span> <span class="p">[</span> + <span class="p">(</span><span class="n">gettext</span><span class="p">(</span><span class="s1">'bitrate'</span><span class="p">),</span> <span class="s1">'bitrate'</span><span class="p">),</span> + <span class="p">(</span><span class="n">gettext</span><span class="p">(</span><span class="s1">'votes'</span><span class="p">),</span> <span class="s1">'votes'</span><span class="p">),</span> + <span class="p">(</span><span class="n">gettext</span><span class="p">(</span><span class="s1">'clicks'</span><span class="p">),</span> <span class="s1">'clickcount'</span><span class="p">),</span> + <span class="p">]:</span> + <span class="n">v</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">y</span><span class="p">)</span> + <span class="k">if</span> <span class="n">v</span><span class="p">:</span> + <span class="n">v</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">v</span><span class="p">)</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span> + <span class="n">metadata</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">x</span><span class="si">}</span><span class="s2"> </span><span class="si">{</span><span class="n">v</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span> + <span class="s1">'title'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'name'</span><span class="p">],</span> + <span class="s1">'thumbnail'</span><span class="p">:</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'favicon'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">"http://"</span><span class="p">,</span> <span class="s2">"https://"</span><span class="p">),</span> + <span class="s1">'content'</span><span class="p">:</span> <span class="s1">' | '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">content</span><span class="p">),</span> + <span class="s1">'metadata'</span><span class="p">:</span> <span class="s1">' | '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">metadata</span><span class="p">),</span> + <span class="s1">'iframe_src'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'url_resolved'</span><span class="p">]</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">"http://"</span><span class="p">,</span> <span class="s2">"https://"</span><span class="p">),</span> + <span class="p">}</span> + <span class="p">)</span> + + <span class="k">return</span> <span class="n">results</span> + + +<div class="viewcode-block" id="fetch_traits"> +<a class="viewcode-back" href="../../../dev/engines/online/radio_browser.html#searx.engines.radio_browser.fetch_traits">[docs]</a> +<span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Fetch languages and countrycodes from RadioBrowser</span> + +<span class="sd"> - ``traits.languages``: `list of languages API`_</span> +<span class="sd"> - ``traits.custom['countrycodes']``: `list of countries API`_</span> + +<span class="sd"> .. _list of countries API: https://de1.api.radio-browser.info/#List_of_countries</span> +<span class="sd"> .. _list of languages API: https://de1.api.radio-browser.info/#List_of_languages</span> +<span class="sd"> """</span> + <span class="c1"># pylint: disable=import-outside-toplevel</span> + + <span class="kn">from</span> <span class="nn">babel.core</span> <span class="kn">import</span> <span class="n">get_global</span> + + <span class="n">babel_reg_list</span> <span class="o">=</span> <span class="n">get_global</span><span class="p">(</span><span class="s2">"territory_languages"</span><span class="p">)</span><span class="o">.</span><span class="n">keys</span><span class="p">()</span> + + <span class="n">language_list</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">base_url</span><span class="si">}</span><span class="s1">/json/languages'</span><span class="p">)</span><span class="o">.</span><span class="n">json</span><span class="p">()</span> <span class="c1"># type: ignore</span> + <span class="n">country_list</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">base_url</span><span class="si">}</span><span class="s1">/json/countries'</span><span class="p">)</span><span class="o">.</span><span class="n">json</span><span class="p">()</span> <span class="c1"># type: ignore</span> + + <span class="k">for</span> <span class="n">lang</span> <span class="ow">in</span> <span class="n">language_list</span><span class="p">:</span> + + <span class="n">babel_lang</span> <span class="o">=</span> <span class="n">lang</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'iso_639'</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">babel_lang</span><span class="p">:</span> + <span class="c1"># the language doesn't have any iso code, and hence can't be parsed</span> + <span class="c1"># print(f"ERROR: lang - no iso code in {lang}")</span> + <span class="k">continue</span> + <span class="k">try</span><span class="p">:</span> + <span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">language_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">babel_lang</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s2">"-"</span><span class="p">))</span> + <span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span> + <span class="c1"># print(f"ERROR: language tag {babel_lang} is unknown by babel")</span> + <span class="k">continue</span> + + <span class="n">eng_tag</span> <span class="o">=</span> <span class="n">lang</span><span class="p">[</span><span class="s1">'name'</span><span class="p">]</span> + <span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_tag</span><span class="p">)</span> + <span class="k">if</span> <span class="n">conflict</span><span class="p">:</span> + <span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">eng_tag</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_tag</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">eng_tag</span><span class="p">))</span> + <span class="k">continue</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_tag</span> + + <span class="n">countrycodes</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span> + <span class="k">for</span> <span class="n">region</span> <span class="ow">in</span> <span class="n">country_list</span><span class="p">:</span> + <span class="c1"># country_list contains duplicates that differ only in upper/lower case</span> + <span class="n">_reg</span> <span class="o">=</span> <span class="n">region</span><span class="p">[</span><span class="s1">'iso_3166_1'</span><span class="p">]</span><span class="o">.</span><span class="n">upper</span><span class="p">()</span> + <span class="k">if</span> <span class="n">_reg</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">babel_reg_list</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"ERROR: region tag </span><span class="si">{</span><span class="n">region</span><span class="p">[</span><span class="s1">'iso_3166_1'</span><span class="p">]</span><span class="si">}</span><span class="s2"> is unknown by babel"</span><span class="p">)</span> + <span class="k">continue</span> + <span class="n">countrycodes</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">_reg</span><span class="p">)</span> + + <span class="n">countrycodes</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">countrycodes</span><span class="p">)</span> + <span class="n">countrycodes</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'countrycodes'</span><span class="p">]</span> <span class="o">=</span> <span class="n">countrycodes</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/sepiasearch.html b/_modules/searx/engines/sepiasearch.html new file mode 100644 index 000000000..c1253d8b5 --- /dev/null +++ b/_modules/searx/engines/sepiasearch.html @@ -0,0 +1,196 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.sepiasearch — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.sepiasearch</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.sepiasearch</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""SepiaSearch uses the same languages as :py:obj:`Peertube</span> +<span class="sd"><searx.engines.peertube>` and the response is identical to the response from the</span> +<span class="sd">peertube engines.</span> + +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span> + +<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span> +<span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span> + +<span class="kn">from</span> <span class="nn">searx.engines.peertube</span> <span class="kn">import</span> <span class="n">fetch_traits</span> <span class="c1"># pylint: disable=unused-import</span> +<span class="kn">from</span> <span class="nn">searx.engines.peertube</span> <span class="kn">import</span> <span class="p">(</span> + <span class="c1"># pylint: disable=unused-import</span> + <span class="n">video_response</span><span class="p">,</span> + <span class="n">safesearch_table</span><span class="p">,</span> + <span class="n">time_range_table</span><span class="p">,</span> +<span class="p">)</span> +<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span> + +<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> + <span class="kn">import</span> <span class="nn">logging</span> + + <span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span> + +<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span> + +<span class="n">about</span> <span class="o">=</span> <span class="p">{</span> + <span class="c1"># pylint: disable=line-too-long</span> + <span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://sepiasearch.org'</span><span class="p">,</span> + <span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> + <span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos'</span><span class="p">,</span> + <span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> + <span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"results"</span><span class="p">:</span> <span class="s1">'JSON'</span><span class="p">,</span> +<span class="p">}</span> + +<span class="c1"># engine dependent config</span> +<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'videos'</span><span class="p">]</span> +<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span> + +<span class="n">base_url</span> <span class="o">=</span> <span class="s1">'https://sepiasearch.org'</span> + +<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">True</span> + + +<div class="viewcode-block" id="request"> +<a class="viewcode-back" href="../../../dev/engines/online/peertube.html#searx.engines.sepiasearch.request">[docs]</a> +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Assemble request for the SepiaSearch API"""</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">query</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">False</span> + + <span class="c1"># eng_region = traits.get_region(params['searxng_locale'], 'en_US')</span> + <span class="n">eng_lang</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="kc">None</span><span class="p">)</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="p">(</span> + <span class="n">base_url</span><span class="o">.</span><span class="n">rstrip</span><span class="p">(</span><span class="s2">"/"</span><span class="p">)</span> + <span class="o">+</span> <span class="s2">"/api/v1/search/videos?"</span> + <span class="o">+</span> <span class="n">urlencode</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'search'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span> + <span class="s1">'start'</span><span class="p">:</span> <span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="mi">10</span><span class="p">,</span> + <span class="s1">'count'</span><span class="p">:</span> <span class="mi">10</span><span class="p">,</span> + <span class="c1"># -createdAt: sort by date ascending / createdAt: date descending</span> + <span class="s1">'sort'</span><span class="p">:</span> <span class="s1">'-match'</span><span class="p">,</span> <span class="c1"># sort by *match descending*</span> + <span class="s1">'nsfw'</span><span class="p">:</span> <span class="n">safesearch_table</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">]],</span> + <span class="p">}</span> + <span class="p">)</span> + <span class="p">)</span> + + <span class="k">if</span> <span class="n">eng_lang</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">+=</span> <span class="s1">'&languageOneOf[]='</span> <span class="o">+</span> <span class="n">eng_lang</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">+=</span> <span class="s1">'&boostLanguages[]='</span> <span class="o">+</span> <span class="n">eng_lang</span> + + <span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]</span> <span class="ow">in</span> <span class="n">time_range_table</span><span class="p">:</span> + <span class="n">time</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span><span class="o">.</span><span class="n">date</span><span class="p">()</span> <span class="o">+</span> <span class="n">time_range_table</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]]</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">+=</span> <span class="s1">'&startDate='</span> <span class="o">+</span> <span class="n">time</span><span class="o">.</span><span class="n">isoformat</span><span class="p">()</span> + + <span class="k">return</span> <span class="n">params</span></div> + + + +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> + <span class="k">return</span> <span class="n">video_response</span><span class="p">(</span><span class="n">resp</span><span class="p">)</span> +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/sqlite.html b/_modules/searx/engines/sqlite.html new file mode 100644 index 000000000..7fe524c2c --- /dev/null +++ b/_modules/searx/engines/sqlite.html @@ -0,0 +1,216 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.sqlite — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.sqlite</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.sqlite</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""SQLite is a small, fast and reliable SQL database engine. It does not require</span> +<span class="sd">any extra dependency.</span> + +<span class="sd">Example</span> +<span class="sd">=======</span> + +<span class="sd">.. _MediathekView: https://mediathekview.de/</span> + +<span class="sd">To demonstrate the power of database engines, here is a more complex example</span> +<span class="sd">which reads from a MediathekView_ (DE) movie database. For this example of the</span> +<span class="sd">SQLite engine download the database:</span> + +<span class="sd">- https://liste.mediathekview.de/filmliste-v2.db.bz2</span> + +<span class="sd">and unpack into ``searx/data/filmliste-v2.db``. To search the database use e.g</span> +<span class="sd">Query to test: ``!mediathekview concert``</span> + +<span class="sd">.. code:: yaml</span> + +<span class="sd"> - name: mediathekview</span> +<span class="sd"> engine: sqlite</span> +<span class="sd"> disabled: False</span> +<span class="sd"> categories: general</span> +<span class="sd"> result_template: default.html</span> +<span class="sd"> database: searx/data/filmliste-v2.db</span> +<span class="sd"> query_str: >-</span> +<span class="sd"> SELECT title || ' (' || time(duration, 'unixepoch') || ')' AS title,</span> +<span class="sd"> COALESCE( NULLIF(url_video_hd,''), NULLIF(url_video_sd,''), url_video) AS url,</span> +<span class="sd"> description AS content</span> +<span class="sd"> FROM film</span> +<span class="sd"> WHERE title LIKE :wildcard OR description LIKE :wildcard</span> +<span class="sd"> ORDER BY duration DESC</span> + +<span class="sd">Implementations</span> +<span class="sd">===============</span> + +<span class="sd">"""</span> + +<span class="kn">import</span> <span class="nn">sqlite3</span> +<span class="kn">import</span> <span class="nn">contextlib</span> + +<span class="n">engine_type</span> <span class="o">=</span> <span class="s1">'offline'</span> + +<span class="n">database</span> <span class="o">=</span> <span class="s2">""</span> +<span class="sd">"""Filename of the SQLite DB."""</span> + +<span class="n">query_str</span> <span class="o">=</span> <span class="s2">""</span> +<span class="sd">"""SQL query that returns the result items."""</span> + +<span class="n">limit</span> <span class="o">=</span> <span class="mi">10</span> +<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">result_template</span> <span class="o">=</span> <span class="s1">'key-value.html'</span> + + +<span class="k">def</span> <span class="nf">init</span><span class="p">(</span><span class="n">engine_settings</span><span class="p">):</span> + <span class="k">if</span> <span class="s1">'query_str'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">engine_settings</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'query_str cannot be empty'</span><span class="p">)</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">engine_settings</span><span class="p">[</span><span class="s1">'query_str'</span><span class="p">]</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'select '</span><span class="p">):</span> + <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'only SELECT query is supported'</span><span class="p">)</span> + + +<div class="viewcode-block" id="sqlite_cursor"> +<a class="viewcode-back" href="../../../dev/engines/offline/sql-engines.html#searx.engines.sqlite.sqlite_cursor">[docs]</a> +<span class="nd">@contextlib</span><span class="o">.</span><span class="n">contextmanager</span> +<span class="k">def</span> <span class="nf">sqlite_cursor</span><span class="p">():</span> +<span class="w"> </span><span class="sd">"""Implements a :py:obj:`Context Manager <contextlib.contextmanager>` for a</span> +<span class="sd"> :py:obj:`sqlite3.Cursor`.</span> + +<span class="sd"> Open database in read only mode: if the database doesn't exist. The default</span> +<span class="sd"> mode creates an empty file on the file system. See:</span> + +<span class="sd"> * https://docs.python.org/3/library/sqlite3.html#sqlite3.connect</span> +<span class="sd"> * https://www.sqlite.org/uri.html</span> + +<span class="sd"> """</span> + <span class="n">uri</span> <span class="o">=</span> <span class="s1">'file:'</span> <span class="o">+</span> <span class="n">database</span> <span class="o">+</span> <span class="s1">'?mode=ro'</span> + <span class="k">with</span> <span class="n">contextlib</span><span class="o">.</span><span class="n">closing</span><span class="p">(</span><span class="n">sqlite3</span><span class="o">.</span><span class="n">connect</span><span class="p">(</span><span class="n">uri</span><span class="p">,</span> <span class="n">uri</span><span class="o">=</span><span class="kc">True</span><span class="p">))</span> <span class="k">as</span> <span class="n">connect</span><span class="p">:</span> + <span class="n">connect</span><span class="o">.</span><span class="n">row_factory</span> <span class="o">=</span> <span class="n">sqlite3</span><span class="o">.</span><span class="n">Row</span> + <span class="k">with</span> <span class="n">contextlib</span><span class="o">.</span><span class="n">closing</span><span class="p">(</span><span class="n">connect</span><span class="o">.</span><span class="n">cursor</span><span class="p">())</span> <span class="k">as</span> <span class="n">cursor</span><span class="p">:</span> + <span class="k">yield</span> <span class="n">cursor</span></div> + + + +<span class="k">def</span> <span class="nf">search</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="n">query_params</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'query'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span> + <span class="s1">'wildcard'</span><span class="p">:</span> <span class="sa">r</span><span class="s1">'%'</span> <span class="o">+</span> <span class="n">query</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">' '</span><span class="p">,</span> <span class="sa">r</span><span class="s1">'%'</span><span class="p">)</span> <span class="o">+</span> <span class="sa">r</span><span class="s1">'%'</span><span class="p">,</span> + <span class="s1">'limit'</span><span class="p">:</span> <span class="n">limit</span><span class="p">,</span> + <span class="s1">'offset'</span><span class="p">:</span> <span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">limit</span><span class="p">,</span> + <span class="p">}</span> + <span class="n">query_to_run</span> <span class="o">=</span> <span class="n">query_str</span> <span class="o">+</span> <span class="s1">' LIMIT :limit OFFSET :offset'</span> + + <span class="k">with</span> <span class="n">sqlite_cursor</span><span class="p">()</span> <span class="k">as</span> <span class="n">cur</span><span class="p">:</span> + + <span class="n">cur</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">query_to_run</span><span class="p">,</span> <span class="n">query_params</span><span class="p">)</span> + <span class="n">col_names</span> <span class="o">=</span> <span class="p">[</span><span class="n">cn</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="k">for</span> <span class="n">cn</span> <span class="ow">in</span> <span class="n">cur</span><span class="o">.</span><span class="n">description</span><span class="p">]</span> + + <span class="k">for</span> <span class="n">row</span> <span class="ow">in</span> <span class="n">cur</span><span class="o">.</span><span class="n">fetchall</span><span class="p">():</span> + <span class="n">item</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">col_names</span><span class="p">,</span> <span class="nb">map</span><span class="p">(</span><span class="nb">str</span><span class="p">,</span> <span class="n">row</span><span class="p">)))</span> + <span class="n">item</span><span class="p">[</span><span class="s1">'template'</span><span class="p">]</span> <span class="o">=</span> <span class="n">result_template</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"append result --> </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">item</span><span class="p">)</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">item</span><span class="p">)</span> + + <span class="k">return</span> <span class="n">results</span> +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/startpage.html b/_modules/searx/engines/startpage.html new file mode 100644 index 000000000..e214bb8cd --- /dev/null +++ b/_modules/searx/engines/startpage.html @@ -0,0 +1,610 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.startpage — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.startpage</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.startpage</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""Startpage's language & region selectors are a mess ..</span> + +<span class="sd">.. _startpage regions:</span> + +<span class="sd">Startpage regions</span> +<span class="sd">=================</span> + +<span class="sd">In the list of regions there are tags we need to map to common region tags::</span> + +<span class="sd"> pt-BR_BR --> pt_BR</span> +<span class="sd"> zh-CN_CN --> zh_Hans_CN</span> +<span class="sd"> zh-TW_TW --> zh_Hant_TW</span> +<span class="sd"> zh-TW_HK --> zh_Hant_HK</span> +<span class="sd"> en-GB_GB --> en_GB</span> + +<span class="sd">and there is at least one tag with a three letter language tag (ISO 639-2)::</span> + +<span class="sd"> fil_PH --> fil_PH</span> + +<span class="sd">The locale code ``no_NO`` from Startpage does not exists and is mapped to</span> +<span class="sd">``nb-NO``::</span> + +<span class="sd"> babel.core.UnknownLocaleError: unknown locale 'no_NO'</span> + +<span class="sd">For reference see languages-subtag at iana; ``no`` is the macrolanguage [1]_ and</span> +<span class="sd">W3C recommends subtag over macrolanguage [2]_.</span> + +<span class="sd">.. [1] `iana: language-subtag-registry</span> +<span class="sd"> <https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry>`_ ::</span> + +<span class="sd"> type: language</span> +<span class="sd"> Subtag: nb</span> +<span class="sd"> Description: Norwegian Bokmål</span> +<span class="sd"> Added: 2005-10-16</span> +<span class="sd"> Suppress-Script: Latn</span> +<span class="sd"> Macrolanguage: no</span> + +<span class="sd">.. [2]</span> +<span class="sd"> Use macrolanguages with care. Some language subtags have a Scope field set to</span> +<span class="sd"> macrolanguage, i.e. this primary language subtag encompasses a number of more</span> +<span class="sd"> specific primary language subtags in the registry. ... As we recommended for</span> +<span class="sd"> the collection subtags mentioned above, in most cases you should try to use</span> +<span class="sd"> the more specific subtags ... `W3: The primary language subtag</span> +<span class="sd"> <https://www.w3.org/International/questions/qa-choosing-language-tags#langsubtag>`_</span> + +<span class="sd">.. _startpage languages:</span> + +<span class="sd">Startpage languages</span> +<span class="sd">===================</span> + +<span class="sd">:py:obj:`send_accept_language_header`:</span> +<span class="sd"> The displayed name in Startpage's settings page depend on the location of the</span> +<span class="sd"> IP when ``Accept-Language`` HTTP header is unset. In :py:obj:`fetch_traits`</span> +<span class="sd"> we use::</span> + +<span class="sd"> 'Accept-Language': "en-US,en;q=0.5",</span> +<span class="sd"> ..</span> + +<span class="sd"> to get uniform names independent from the IP).</span> + +<span class="sd">.. _startpage categories:</span> + +<span class="sd">Startpage categories</span> +<span class="sd">====================</span> + +<span class="sd">Startpage's category (for Web-search, News, Videos, ..) is set by</span> +<span class="sd">:py:obj:`startpage_categ` in settings.yml::</span> + +<span class="sd"> - name: startpage</span> +<span class="sd"> engine: startpage</span> +<span class="sd"> startpage_categ: web</span> +<span class="sd"> ...</span> + +<span class="sd">.. hint::</span> + +<span class="sd"> The default category is ``web`` .. and other categories than ``web`` are not</span> +<span class="sd"> yet implemented.</span> + +<span class="sd">"""</span> +<span class="c1"># pylint: disable=too-many-statements</span> + +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span> +<span class="kn">from</span> <span class="nn">collections</span> <span class="kn">import</span> <span class="n">OrderedDict</span> +<span class="kn">import</span> <span class="nn">re</span> +<span class="kn">from</span> <span class="nn">unicodedata</span> <span class="kn">import</span> <span class="n">normalize</span><span class="p">,</span> <span class="n">combining</span> +<span class="kn">from</span> <span class="nn">time</span> <span class="kn">import</span> <span class="n">time</span> +<span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span><span class="p">,</span> <span class="n">timedelta</span> + +<span class="kn">import</span> <span class="nn">dateutil.parser</span> +<span class="kn">import</span> <span class="nn">lxml.html</span> +<span class="kn">import</span> <span class="nn">babel.localedata</span> + +<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">extract_text</span><span class="p">,</span> <span class="n">eval_xpath</span><span class="p">,</span> <span class="n">gen_useragent</span> +<span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">get</span> <span class="c1"># see https://github.com/searxng/searxng/issues/762</span> +<span class="kn">from</span> <span class="nn">searx.exceptions</span> <span class="kn">import</span> <span class="n">SearxEngineCaptchaException</span> +<span class="kn">from</span> <span class="nn">searx.locales</span> <span class="kn">import</span> <span class="n">region_tag</span> +<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span> + +<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> + <span class="kn">import</span> <span class="nn">logging</span> + + <span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span> + +<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span> + +<span class="c1"># about</span> +<span class="n">about</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://startpage.com'</span><span class="p">,</span> + <span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q2333295'</span><span class="p">,</span> + <span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> + <span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"results"</span><span class="p">:</span> <span class="s1">'HTML'</span><span class="p">,</span> +<span class="p">}</span> + +<span class="n">startpage_categ</span> <span class="o">=</span> <span class="s1">'web'</span> +<span class="sd">"""Startpage's category, visit :ref:`startpage categories`.</span> +<span class="sd">"""</span> + +<span class="n">send_accept_language_header</span> <span class="o">=</span> <span class="kc">True</span> +<span class="sd">"""Startpage tries to guess user's language and territory from the HTTP</span> +<span class="sd">``Accept-Language``. Optional the user can select a search-language (can be</span> +<span class="sd">different to the UI language) and a region filter.</span> +<span class="sd">"""</span> + +<span class="c1"># engine dependent config</span> +<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'general'</span><span class="p">,</span> <span class="s1">'web'</span><span class="p">]</span> +<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">max_page</span> <span class="o">=</span> <span class="mi">18</span> +<span class="sd">"""Tested 18 pages maximum (argument ``page``), to be save max is set to 20."""</span> + +<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">True</span> + +<span class="n">time_range_dict</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'day'</span><span class="p">:</span> <span class="s1">'d'</span><span class="p">,</span> <span class="s1">'week'</span><span class="p">:</span> <span class="s1">'w'</span><span class="p">,</span> <span class="s1">'month'</span><span class="p">:</span> <span class="s1">'m'</span><span class="p">,</span> <span class="s1">'year'</span><span class="p">:</span> <span class="s1">'y'</span><span class="p">}</span> +<span class="n">safesearch_dict</span> <span class="o">=</span> <span class="p">{</span><span class="mi">0</span><span class="p">:</span> <span class="s1">'0'</span><span class="p">,</span> <span class="mi">1</span><span class="p">:</span> <span class="s1">'1'</span><span class="p">,</span> <span class="mi">2</span><span class="p">:</span> <span class="s1">'1'</span><span class="p">}</span> + +<span class="c1"># search-url</span> +<span class="n">base_url</span> <span class="o">=</span> <span class="s1">'https://www.startpage.com'</span> +<span class="n">search_url</span> <span class="o">=</span> <span class="n">base_url</span> <span class="o">+</span> <span class="s1">'/sp/search'</span> + +<span class="c1"># specific xpath variables</span> +<span class="c1"># ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]</span> +<span class="c1"># not ads: div[@class="result"] are the direct children of div[@id="results"]</span> +<span class="n">search_form_xpath</span> <span class="o">=</span> <span class="s1">'//form[@id="search"]'</span> +<span class="sd">"""XPath of Startpage's origin search form</span> + +<span class="sd">.. code: html</span> + +<span class="sd"> <form action="/sp/search" method="post"></span> +<span class="sd"> <input type="text" name="query" value="" ..></span> +<span class="sd"> <input type="hidden" name="t" value="device"></span> +<span class="sd"> <input type="hidden" name="lui" value="english"></span> +<span class="sd"> <input type="hidden" name="sc" value="Q7Mt5TRqowKB00"></span> +<span class="sd"> <input type="hidden" name="cat" value="web"></span> +<span class="sd"> <input type="hidden" class="abp" id="abp-input" name="abp" value="1"></span> +<span class="sd"> </form></span> +<span class="sd">"""</span> + +<span class="c1"># timestamp of the last fetch of 'sc' code</span> +<span class="n">sc_code_ts</span> <span class="o">=</span> <span class="mi">0</span> +<span class="n">sc_code</span> <span class="o">=</span> <span class="s1">''</span> +<span class="n">sc_code_cache_sec</span> <span class="o">=</span> <span class="mi">30</span> +<span class="sd">"""Time in seconds the sc-code is cached in memory :py:obj:`get_sc_code`."""</span> + + +<div class="viewcode-block" id="get_sc_code"> +<a class="viewcode-back" href="../../../dev/engines/online/startpage.html#searx.engines.startpage.get_sc_code">[docs]</a> +<span class="k">def</span> <span class="nf">get_sc_code</span><span class="p">(</span><span class="n">searxng_locale</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Get an actual ``sc`` argument from Startpage's search form (HTML page).</span> + +<span class="sd"> Startpage puts a ``sc`` argument on every HTML :py:obj:`search form</span> +<span class="sd"> <search_form_xpath>`. Without this argument Startpage considers the request</span> +<span class="sd"> is from a bot. We do not know what is encoded in the value of the ``sc``</span> +<span class="sd"> argument, but it seems to be a kind of a *time-stamp*.</span> + +<span class="sd"> Startpage's search form generates a new sc-code on each request. This</span> +<span class="sd"> function scrap a new sc-code from Startpage's home page every</span> +<span class="sd"> :py:obj:`sc_code_cache_sec` seconds.</span> + +<span class="sd"> """</span> + + <span class="k">global</span> <span class="n">sc_code_ts</span><span class="p">,</span> <span class="n">sc_code</span> <span class="c1"># pylint: disable=global-statement</span> + + <span class="k">if</span> <span class="n">sc_code</span> <span class="ow">and</span> <span class="p">(</span><span class="n">time</span><span class="p">()</span> <span class="o"><</span> <span class="p">(</span><span class="n">sc_code_ts</span> <span class="o">+</span> <span class="n">sc_code_cache_sec</span><span class="p">)):</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"get_sc_code: reuse '</span><span class="si">%s</span><span class="s2">'"</span><span class="p">,</span> <span class="n">sc_code</span><span class="p">)</span> + <span class="k">return</span> <span class="n">sc_code</span> + + <span class="n">headers</span> <span class="o">=</span> <span class="p">{</span><span class="o">**</span><span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">]}</span> + <span class="n">headers</span><span class="p">[</span><span class="s1">'Origin'</span><span class="p">]</span> <span class="o">=</span> <span class="n">base_url</span> + <span class="n">headers</span><span class="p">[</span><span class="s1">'Referer'</span><span class="p">]</span> <span class="o">=</span> <span class="n">base_url</span> <span class="o">+</span> <span class="s1">'/'</span> + <span class="c1"># headers['Connection'] = 'keep-alive'</span> + <span class="c1"># headers['Accept-Encoding'] = 'gzip, deflate, br'</span> + <span class="c1"># headers['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8'</span> + <span class="c1"># headers['User-Agent'] = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:105.0) Gecko/20100101 Firefox/105.0'</span> + + <span class="c1"># add Accept-Language header</span> + <span class="k">if</span> <span class="n">searxng_locale</span> <span class="o">==</span> <span class="s1">'all'</span><span class="p">:</span> + <span class="n">searxng_locale</span> <span class="o">=</span> <span class="s1">'en-US'</span> + <span class="n">locale</span> <span class="o">=</span> <span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">searxng_locale</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">'-'</span><span class="p">)</span> + + <span class="k">if</span> <span class="n">send_accept_language_header</span><span class="p">:</span> + <span class="n">ac_lang</span> <span class="o">=</span> <span class="n">locale</span><span class="o">.</span><span class="n">language</span> + <span class="k">if</span> <span class="n">locale</span><span class="o">.</span><span class="n">territory</span><span class="p">:</span> + <span class="n">ac_lang</span> <span class="o">=</span> <span class="s2">"</span><span class="si">%s</span><span class="s2">-</span><span class="si">%s</span><span class="s2">,</span><span class="si">%s</span><span class="s2">;q=0.9,*;q=0.5"</span> <span class="o">%</span> <span class="p">(</span> + <span class="n">locale</span><span class="o">.</span><span class="n">language</span><span class="p">,</span> + <span class="n">locale</span><span class="o">.</span><span class="n">territory</span><span class="p">,</span> + <span class="n">locale</span><span class="o">.</span><span class="n">language</span><span class="p">,</span> + <span class="p">)</span> + <span class="n">headers</span><span class="p">[</span><span class="s1">'Accept-Language'</span><span class="p">]</span> <span class="o">=</span> <span class="n">ac_lang</span> + + <span class="n">get_sc_url</span> <span class="o">=</span> <span class="n">base_url</span> <span class="o">+</span> <span class="s1">'/?sc=</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span><span class="n">sc_code</span><span class="p">)</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"query new sc time-stamp ... </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">get_sc_url</span><span class="p">)</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"headers: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">headers</span><span class="p">)</span> + <span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="n">get_sc_url</span><span class="p">,</span> <span class="n">headers</span><span class="o">=</span><span class="n">headers</span><span class="p">)</span> + + <span class="c1"># ?? x = network.get('https://www.startpage.com/sp/cdn/images/filter-chevron.svg', headers=headers)</span> + <span class="c1"># ?? https://www.startpage.com/sp/cdn/images/filter-chevron.svg</span> + <span class="c1"># ?? ping-back URL: https://www.startpage.com/sp/pb?sc=TLsB0oITjZ8F21</span> + + <span class="k">if</span> <span class="nb">str</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">url</span><span class="p">)</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'https://www.startpage.com/sp/captcha'</span><span class="p">):</span> <span class="c1"># type: ignore</span> + <span class="k">raise</span> <span class="n">SearxEngineCaptchaException</span><span class="p">(</span> + <span class="n">message</span><span class="o">=</span><span class="s2">"get_sc_code: got redirected to https://www.startpage.com/sp/captcha"</span><span class="p">,</span> + <span class="p">)</span> + + <span class="n">dom</span> <span class="o">=</span> <span class="n">lxml</span><span class="o">.</span><span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> <span class="c1"># type: ignore</span> + + <span class="k">try</span><span class="p">:</span> + <span class="n">sc_code</span> <span class="o">=</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="n">search_form_xpath</span> <span class="o">+</span> <span class="s1">'//input[@name="sc"]/@value'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span> + <span class="k">except</span> <span class="ne">IndexError</span> <span class="k">as</span> <span class="n">exc</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"suspend startpage API --> https://github.com/searxng/searxng/pull/695"</span><span class="p">)</span> + <span class="k">raise</span> <span class="n">SearxEngineCaptchaException</span><span class="p">(</span> + <span class="n">message</span><span class="o">=</span><span class="s2">"get_sc_code: [PR-695] query new sc time-stamp failed! (</span><span class="si">%s</span><span class="s2">)"</span> <span class="o">%</span> <span class="n">resp</span><span class="o">.</span><span class="n">url</span><span class="p">,</span> <span class="c1"># type: ignore</span> + <span class="p">)</span> <span class="kn">from</span> <span class="nn">exc</span> + + <span class="n">sc_code_ts</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"get_sc_code: new value is: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">sc_code</span><span class="p">)</span> + <span class="k">return</span> <span class="n">sc_code</span></div> + + + +<div class="viewcode-block" id="request"> +<a class="viewcode-back" href="../../../dev/engines/online/startpage.html#searx.engines.startpage.request">[docs]</a> +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Assemble a Startpage request.</span> + +<span class="sd"> To avoid CAPTCHA we need to send a well formed HTTP POST request with a</span> +<span class="sd"> cookie. We need to form a request that is identical to the request build by</span> +<span class="sd"> Startpage's search form:</span> + +<span class="sd"> - in the cookie the **region** is selected</span> +<span class="sd"> - in the HTTP POST data the **language** is selected</span> + +<span class="sd"> Additionally the arguments form Startpage's search form needs to be set in</span> +<span class="sd"> HTML POST data / compare ``<input>`` elements: :py:obj:`search_form_xpath`.</span> +<span class="sd"> """</span> + <span class="k">if</span> <span class="n">startpage_categ</span> <span class="o">==</span> <span class="s1">'web'</span><span class="p">:</span> + <span class="k">return</span> <span class="n">_request_cat_web</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">)</span> + + <span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s2">"Startpages's category '%' is not yet implemented."</span><span class="p">,</span> <span class="n">startpage_categ</span><span class="p">)</span> + <span class="k">return</span> <span class="n">params</span></div> + + + +<span class="k">def</span> <span class="nf">_request_cat_web</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> + + <span class="n">engine_region</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_region</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="s1">'en-US'</span><span class="p">)</span> + <span class="n">engine_language</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="s1">'en'</span><span class="p">)</span> + + <span class="c1"># build arguments</span> + <span class="n">args</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'query'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span> + <span class="s1">'cat'</span><span class="p">:</span> <span class="s1">'web'</span><span class="p">,</span> + <span class="s1">'t'</span><span class="p">:</span> <span class="s1">'device'</span><span class="p">,</span> + <span class="s1">'sc'</span><span class="p">:</span> <span class="n">get_sc_code</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="n">params</span><span class="p">),</span> <span class="c1"># hint: this func needs HTTP headers,</span> + <span class="s1">'with_date'</span><span class="p">:</span> <span class="n">time_range_dict</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">],</span> <span class="s1">''</span><span class="p">),</span> + <span class="p">}</span> + + <span class="k">if</span> <span class="n">engine_language</span><span class="p">:</span> + <span class="n">args</span><span class="p">[</span><span class="s1">'language'</span><span class="p">]</span> <span class="o">=</span> <span class="n">engine_language</span> + <span class="n">args</span><span class="p">[</span><span class="s1">'lui'</span><span class="p">]</span> <span class="o">=</span> <span class="n">engine_language</span> + + <span class="n">args</span><span class="p">[</span><span class="s1">'abp'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'1'</span> + <span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span> + <span class="n">args</span><span class="p">[</span><span class="s1">'page'</span><span class="p">]</span> <span class="o">=</span> <span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> + + <span class="c1"># build cookie</span> + <span class="n">lang_homepage</span> <span class="o">=</span> <span class="s1">'en'</span> + <span class="n">cookie</span> <span class="o">=</span> <span class="n">OrderedDict</span><span class="p">()</span> + <span class="n">cookie</span><span class="p">[</span><span class="s1">'date_time'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'world'</span> + <span class="n">cookie</span><span class="p">[</span><span class="s1">'disable_family_filter'</span><span class="p">]</span> <span class="o">=</span> <span class="n">safesearch_dict</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">]]</span> + <span class="n">cookie</span><span class="p">[</span><span class="s1">'disable_open_in_new_window'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'0'</span> + <span class="n">cookie</span><span class="p">[</span><span class="s1">'enable_post_method'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'1'</span> <span class="c1"># hint: POST</span> + <span class="n">cookie</span><span class="p">[</span><span class="s1">'enable_proxy_safety_suggest'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'1'</span> + <span class="n">cookie</span><span class="p">[</span><span class="s1">'enable_stay_control'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'1'</span> + <span class="n">cookie</span><span class="p">[</span><span class="s1">'instant_answers'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'1'</span> + <span class="n">cookie</span><span class="p">[</span><span class="s1">'lang_homepage'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'s/device/</span><span class="si">%s</span><span class="s1">/'</span> <span class="o">%</span> <span class="n">lang_homepage</span> + <span class="n">cookie</span><span class="p">[</span><span class="s1">'num_of_results'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'10'</span> + <span class="n">cookie</span><span class="p">[</span><span class="s1">'suggestions'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'1'</span> + <span class="n">cookie</span><span class="p">[</span><span class="s1">'wt_unit'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'celsius'</span> + + <span class="k">if</span> <span class="n">engine_language</span><span class="p">:</span> + <span class="n">cookie</span><span class="p">[</span><span class="s1">'language'</span><span class="p">]</span> <span class="o">=</span> <span class="n">engine_language</span> + <span class="n">cookie</span><span class="p">[</span><span class="s1">'language_ui'</span><span class="p">]</span> <span class="o">=</span> <span class="n">engine_language</span> + + <span class="k">if</span> <span class="n">engine_region</span><span class="p">:</span> + <span class="n">cookie</span><span class="p">[</span><span class="s1">'search_results_region'</span><span class="p">]</span> <span class="o">=</span> <span class="n">engine_region</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">][</span><span class="s1">'preferences'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'N1N'</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="s2">"</span><span class="si">%s</span><span class="s2">EEE</span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="n">x</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">cookie</span><span class="o">.</span><span class="n">items</span><span class="p">()])</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'cookie preferences: </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">][</span><span class="s1">'preferences'</span><span class="p">])</span> + + <span class="c1"># POST request</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"data: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">args</span><span class="p">)</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">]</span> <span class="o">=</span> <span class="n">args</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'method'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'POST'</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">search_url</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">][</span><span class="s1">'Origin'</span><span class="p">]</span> <span class="o">=</span> <span class="n">base_url</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">][</span><span class="s1">'Referer'</span><span class="p">]</span> <span class="o">=</span> <span class="n">base_url</span> <span class="o">+</span> <span class="s1">'/'</span> + <span class="c1"># is the Accept header needed?</span> + <span class="c1"># params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'</span> + + <span class="k">return</span> <span class="n">params</span> + + +<span class="c1"># get response from search-request</span> +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> + <span class="n">dom</span> <span class="o">=</span> <span class="n">lxml</span><span class="o">.</span><span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> + + <span class="k">if</span> <span class="n">startpage_categ</span> <span class="o">==</span> <span class="s1">'web'</span><span class="p">:</span> + <span class="k">return</span> <span class="n">_response_cat_web</span><span class="p">(</span><span class="n">dom</span><span class="p">)</span> + + <span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s2">"Startpages's category '%' is not yet implemented."</span><span class="p">,</span> <span class="n">startpage_categ</span><span class="p">)</span> + <span class="k">return</span> <span class="p">[]</span> + + +<span class="k">def</span> <span class="nf">_response_cat_web</span><span class="p">(</span><span class="n">dom</span><span class="p">):</span> + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="c1"># parse results</span> + <span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//div[@class="w-gl"]/div[contains(@class, "result")]'</span><span class="p">):</span> + <span class="n">links</span> <span class="o">=</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//a[contains(@class, "result-title result-link")]'</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">links</span><span class="p">:</span> + <span class="k">continue</span> + <span class="n">link</span> <span class="o">=</span> <span class="n">links</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> + <span class="n">url</span> <span class="o">=</span> <span class="n">link</span><span class="o">.</span><span class="n">attrib</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'href'</span><span class="p">)</span> + + <span class="c1"># block google-ad url's</span> + <span class="k">if</span> <span class="n">re</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="sa">r</span><span class="s2">"^http(s|)://(www\.)?google\.[a-z]+/aclk.*$"</span><span class="p">,</span> <span class="n">url</span><span class="p">):</span> + <span class="k">continue</span> + + <span class="c1"># block startpage search url's</span> + <span class="k">if</span> <span class="n">re</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="sa">r</span><span class="s2">"^http(s|)://(www\.)?startpage\.com/do/search\?.*$"</span><span class="p">,</span> <span class="n">url</span><span class="p">):</span> + <span class="k">continue</span> + + <span class="n">title</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">link</span><span class="p">,</span> <span class="s1">'h2'</span><span class="p">))</span> + <span class="n">content</span> <span class="o">=</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//p[contains(@class, "description")]'</span><span class="p">)</span> + <span class="n">content</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">content</span><span class="p">,</span> <span class="n">allow_none</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> <span class="ow">or</span> <span class="s1">''</span> + + <span class="n">published_date</span> <span class="o">=</span> <span class="kc">None</span> + + <span class="c1"># check if search result starts with something like: "2 Sep 2014 ... "</span> + <span class="k">if</span> <span class="n">re</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="sa">r</span><span class="s2">"^([1-9]|[1-2][0-9]|3[0-1]) [A-Z][a-z]</span><span class="si">{2}</span><span class="s2"> [0-9]</span><span class="si">{4}</span><span class="s2"> \.\.\. "</span><span class="p">,</span> <span class="n">content</span><span class="p">):</span> + <span class="n">date_pos</span> <span class="o">=</span> <span class="n">content</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'...'</span><span class="p">)</span> <span class="o">+</span> <span class="mi">4</span> + <span class="n">date_string</span> <span class="o">=</span> <span class="n">content</span><span class="p">[</span><span class="mi">0</span> <span class="p">:</span> <span class="n">date_pos</span> <span class="o">-</span> <span class="mi">5</span><span class="p">]</span> + <span class="c1"># fix content string</span> + <span class="n">content</span> <span class="o">=</span> <span class="n">content</span><span class="p">[</span><span class="n">date_pos</span><span class="p">:]</span> + + <span class="k">try</span><span class="p">:</span> + <span class="n">published_date</span> <span class="o">=</span> <span class="n">dateutil</span><span class="o">.</span><span class="n">parser</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">date_string</span><span class="p">,</span> <span class="n">dayfirst</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> + <span class="k">except</span> <span class="ne">ValueError</span><span class="p">:</span> + <span class="k">pass</span> + + <span class="c1"># check if search result starts with something like: "5 days ago ... "</span> + <span class="k">elif</span> <span class="n">re</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="sa">r</span><span class="s2">"^[0-9]+ days? ago \.\.\. "</span><span class="p">,</span> <span class="n">content</span><span class="p">):</span> + <span class="n">date_pos</span> <span class="o">=</span> <span class="n">content</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'...'</span><span class="p">)</span> <span class="o">+</span> <span class="mi">4</span> + <span class="n">date_string</span> <span class="o">=</span> <span class="n">content</span><span class="p">[</span><span class="mi">0</span> <span class="p">:</span> <span class="n">date_pos</span> <span class="o">-</span> <span class="mi">5</span><span class="p">]</span> + + <span class="c1"># calculate datetime</span> + <span class="n">published_date</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span> <span class="o">-</span> <span class="n">timedelta</span><span class="p">(</span><span class="n">days</span><span class="o">=</span><span class="nb">int</span><span class="p">(</span><span class="n">re</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="sa">r</span><span class="s1">'\d+'</span><span class="p">,</span> <span class="n">date_string</span><span class="p">)</span><span class="o">.</span><span class="n">group</span><span class="p">()))</span> <span class="c1"># type: ignore</span> + + <span class="c1"># fix content string</span> + <span class="n">content</span> <span class="o">=</span> <span class="n">content</span><span class="p">[</span><span class="n">date_pos</span><span class="p">:]</span> + + <span class="k">if</span> <span class="n">published_date</span><span class="p">:</span> + <span class="c1"># append result</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span> <span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span> <span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">,</span> <span class="s1">'publishedDate'</span><span class="p">:</span> <span class="n">published_date</span><span class="p">})</span> + <span class="k">else</span><span class="p">:</span> + <span class="c1"># append result</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span> <span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span> <span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">})</span> + + <span class="c1"># return results</span> + <span class="k">return</span> <span class="n">results</span> + + +<div class="viewcode-block" id="fetch_traits"> +<a class="viewcode-back" href="../../../dev/engines/online/startpage.html#searx.engines.startpage.fetch_traits">[docs]</a> +<span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Fetch :ref:`languages <startpage languages>` and :ref:`regions <startpage</span> +<span class="sd"> regions>` from Startpage."""</span> + <span class="c1"># pylint: disable=too-many-branches</span> + + <span class="n">headers</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'User-Agent'</span><span class="p">:</span> <span class="n">gen_useragent</span><span class="p">(),</span> + <span class="s1">'Accept-Language'</span><span class="p">:</span> <span class="s2">"en-US,en;q=0.5"</span><span class="p">,</span> <span class="c1"># bing needs to set the English language</span> + <span class="p">}</span> + <span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="s1">'https://www.startpage.com/do/settings'</span><span class="p">,</span> <span class="n">headers</span><span class="o">=</span><span class="n">headers</span><span class="p">)</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> <span class="c1"># type: ignore</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: response from Startpage is not OK."</span><span class="p">)</span> + + <span class="n">dom</span> <span class="o">=</span> <span class="n">lxml</span><span class="o">.</span><span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> <span class="c1"># type: ignore</span> + + <span class="c1"># regions</span> + + <span class="n">sp_region_names</span> <span class="o">=</span> <span class="p">[]</span> + <span class="k">for</span> <span class="n">option</span> <span class="ow">in</span> <span class="n">dom</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'//form[@name="settings"]//select[@name="search_results_region"]/option'</span><span class="p">):</span> + <span class="n">sp_region_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">option</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'value'</span><span class="p">))</span> + + <span class="k">for</span> <span class="n">eng_tag</span> <span class="ow">in</span> <span class="n">sp_region_names</span><span class="p">:</span> + <span class="k">if</span> <span class="n">eng_tag</span> <span class="o">==</span> <span class="s1">'all'</span><span class="p">:</span> + <span class="k">continue</span> + <span class="n">babel_region_tag</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'no_NO'</span><span class="p">:</span> <span class="s1">'nb_NO'</span><span class="p">}</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">eng_tag</span><span class="p">,</span> <span class="n">eng_tag</span><span class="p">)</span> <span class="c1"># norway</span> + + <span class="k">if</span> <span class="s1">'-'</span> <span class="ow">in</span> <span class="n">babel_region_tag</span><span class="p">:</span> + <span class="n">l</span><span class="p">,</span> <span class="n">r</span> <span class="o">=</span> <span class="n">babel_region_tag</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'-'</span><span class="p">)</span> + <span class="n">r</span> <span class="o">=</span> <span class="n">r</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'_'</span><span class="p">)[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> + <span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">region_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">l</span> <span class="o">+</span> <span class="s1">'_'</span> <span class="o">+</span> <span class="n">r</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">'_'</span><span class="p">))</span> + + <span class="k">else</span><span class="p">:</span> + <span class="k">try</span><span class="p">:</span> + <span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">region_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">babel_region_tag</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">'_'</span><span class="p">))</span> + + <span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: can't determine babel locale of startpage's locale </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="n">eng_tag</span><span class="p">)</span> + <span class="k">continue</span> + + <span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_tag</span><span class="p">)</span> + <span class="k">if</span> <span class="n">conflict</span><span class="p">:</span> + <span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">eng_tag</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_tag</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">eng_tag</span><span class="p">))</span> + <span class="k">continue</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_tag</span> + + <span class="c1"># languages</span> + + <span class="n">catalog_engine2code</span> <span class="o">=</span> <span class="p">{</span><span class="n">name</span><span class="o">.</span><span class="n">lower</span><span class="p">():</span> <span class="n">lang_code</span> <span class="k">for</span> <span class="n">lang_code</span><span class="p">,</span> <span class="n">name</span> <span class="ow">in</span> <span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="p">(</span><span class="s1">'en'</span><span class="p">)</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">items</span><span class="p">()}</span> + + <span class="c1"># get the native name of every language known by babel</span> + + <span class="k">for</span> <span class="n">lang_code</span> <span class="ow">in</span> <span class="nb">filter</span><span class="p">(</span><span class="k">lambda</span> <span class="n">lang_code</span><span class="p">:</span> <span class="n">lang_code</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'_'</span><span class="p">)</span> <span class="o">==</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="n">babel</span><span class="o">.</span><span class="n">localedata</span><span class="o">.</span><span class="n">locale_identifiers</span><span class="p">()):</span> + <span class="n">native_name</span> <span class="o">=</span> <span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="p">(</span><span class="n">lang_code</span><span class="p">)</span><span class="o">.</span><span class="n">get_language_name</span><span class="p">()</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">native_name</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"ERROR: language name of startpage's language </span><span class="si">{</span><span class="n">lang_code</span><span class="si">}</span><span class="s2"> is unknown by babel"</span><span class="p">)</span> + <span class="k">continue</span> + <span class="n">native_name</span> <span class="o">=</span> <span class="n">native_name</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> + <span class="c1"># add native name exactly as it is</span> + <span class="n">catalog_engine2code</span><span class="p">[</span><span class="n">native_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">lang_code</span> + + <span class="c1"># add "normalized" language name (i.e. français becomes francais and español becomes espanol)</span> + <span class="n">unaccented_name</span> <span class="o">=</span> <span class="s1">''</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="nb">filter</span><span class="p">(</span><span class="k">lambda</span> <span class="n">c</span><span class="p">:</span> <span class="ow">not</span> <span class="n">combining</span><span class="p">(</span><span class="n">c</span><span class="p">),</span> <span class="n">normalize</span><span class="p">(</span><span class="s1">'NFKD'</span><span class="p">,</span> <span class="n">native_name</span><span class="p">)))</span> + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">unaccented_name</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span><span class="n">unaccented_name</span><span class="o">.</span><span class="n">encode</span><span class="p">()):</span> + <span class="c1"># add only if result is ascii (otherwise "normalization" didn't work)</span> + <span class="n">catalog_engine2code</span><span class="p">[</span><span class="n">unaccented_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">lang_code</span> + + <span class="c1"># values that can't be determined by babel's languages names</span> + + <span class="n">catalog_engine2code</span><span class="o">.</span><span class="n">update</span><span class="p">(</span> + <span class="p">{</span> + <span class="c1"># traditional chinese used in ..</span> + <span class="s1">'fantizhengwen'</span><span class="p">:</span> <span class="s1">'zh_Hant'</span><span class="p">,</span> + <span class="c1"># Korean alphabet</span> + <span class="s1">'hangul'</span><span class="p">:</span> <span class="s1">'ko'</span><span class="p">,</span> + <span class="c1"># Malayalam is one of 22 scheduled languages of India.</span> + <span class="s1">'malayam'</span><span class="p">:</span> <span class="s1">'ml'</span><span class="p">,</span> + <span class="s1">'norsk'</span><span class="p">:</span> <span class="s1">'nb'</span><span class="p">,</span> + <span class="s1">'sinhalese'</span><span class="p">:</span> <span class="s1">'si'</span><span class="p">,</span> + <span class="p">}</span> + <span class="p">)</span> + + <span class="n">skip_eng_tags</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'english_uk'</span><span class="p">,</span> <span class="c1"># SearXNG lang 'en' already maps to 'english'</span> + <span class="p">}</span> + + <span class="k">for</span> <span class="n">option</span> <span class="ow">in</span> <span class="n">dom</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'//form[@name="settings"]//select[@name="language"]/option'</span><span class="p">):</span> + + <span class="n">eng_tag</span> <span class="o">=</span> <span class="n">option</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'value'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">eng_tag</span> <span class="ow">in</span> <span class="n">skip_eng_tags</span><span class="p">:</span> + <span class="k">continue</span> + <span class="n">name</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">option</span><span class="p">)</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="c1"># type: ignore</span> + + <span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">catalog_engine2code</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">eng_tag</span><span class="p">)</span> + <span class="k">if</span> <span class="n">sxng_tag</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">catalog_engine2code</span><span class="p">[</span><span class="n">name</span><span class="p">]</span> + + <span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_tag</span><span class="p">)</span> + <span class="k">if</span> <span class="n">conflict</span><span class="p">:</span> + <span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">eng_tag</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_tag</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">eng_tag</span><span class="p">))</span> + <span class="k">continue</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_tag</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/tineye.html b/_modules/searx/engines/tineye.html new file mode 100644 index 000000000..99885d2ad --- /dev/null +++ b/_modules/searx/engines/tineye.html @@ -0,0 +1,340 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.tineye — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.tineye</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.tineye</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""This engine implements *Tineye - reverse image search*</span> + +<span class="sd">Using TinEye, you can search by image or perform what we call a reverse image</span> +<span class="sd">search. You can do that by uploading an image or searching by URL. You can also</span> +<span class="sd">simply drag and drop your images to start your search. TinEye constantly crawls</span> +<span class="sd">the web and adds images to its index. Today, the TinEye index is over 50.2</span> +<span class="sd">billion images `[tineye.com] <https://tineye.com/how>`_.</span> + +<span class="sd">.. hint::</span> + +<span class="sd"> This SearXNG engine only supports *'searching by URL'* and it does not use</span> +<span class="sd"> the official API `[api.tineye.com] <https://api.tineye.com/python/docs/>`_.</span> + +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span> +<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span> +<span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span> +<span class="kn">from</span> <span class="nn">flask_babel</span> <span class="kn">import</span> <span class="n">gettext</span> + +<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> + <span class="kn">import</span> <span class="nn">logging</span> + + <span class="n">logger</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">()</span> + +<span class="n">about</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://tineye.com'</span><span class="p">,</span> + <span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q2382535'</span><span class="p">,</span> + <span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://api.tineye.com/python/docs/'</span><span class="p">,</span> + <span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"results"</span><span class="p">:</span> <span class="s1">'JSON'</span><span class="p">,</span> +<span class="p">}</span> + +<span class="n">engine_type</span> <span class="o">=</span> <span class="s1">'online_url_search'</span> +<span class="sd">""":py:obj:`searx.search.processors.online_url_search`"""</span> + +<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'general'</span><span class="p">]</span> +<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">False</span> +<span class="n">base_url</span> <span class="o">=</span> <span class="s1">'https://tineye.com'</span> +<span class="n">search_string</span> <span class="o">=</span> <span class="s1">'/api/v1/result_json/?page=</span><span class="si">{page}</span><span class="s1">&</span><span class="si">{query}</span><span class="s1">'</span> + +<span class="n">FORMAT_NOT_SUPPORTED</span> <span class="o">=</span> <span class="n">gettext</span><span class="p">(</span> + <span class="s2">"Could not read that image url. This may be due to an unsupported file"</span> + <span class="s2">" format. TinEye only supports images that are JPEG, PNG, GIF, BMP, TIFF or WebP."</span> +<span class="p">)</span> +<span class="sd">"""TinEye error message"""</span> + +<span class="n">NO_SIGNATURE_ERROR</span> <span class="o">=</span> <span class="n">gettext</span><span class="p">(</span> + <span class="s2">"The image is too simple to find matches. TinEye requires a basic level of"</span> + <span class="s2">" visual detail to successfully identify matches."</span> +<span class="p">)</span> +<span class="sd">"""TinEye error message"""</span> + +<span class="n">DOWNLOAD_ERROR</span> <span class="o">=</span> <span class="n">gettext</span><span class="p">(</span><span class="s2">"The image could not be downloaded."</span><span class="p">)</span> +<span class="sd">"""TinEye error message"""</span> + + +<div class="viewcode-block" id="request"> +<a class="viewcode-back" href="../../../dev/engines/online_url_search/tineye.html#searx.engines.tineye.request">[docs]</a> +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Build TinEye HTTP request using ``search_urls`` of a :py:obj:`engine_type`."""</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'raise_for_httperror'</span><span class="p">]</span> <span class="o">=</span> <span class="kc">False</span> + + <span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'search_urls'</span><span class="p">][</span><span class="s1">'data:image'</span><span class="p">]:</span> + <span class="n">query</span> <span class="o">=</span> <span class="n">params</span><span class="p">[</span><span class="s1">'search_urls'</span><span class="p">][</span><span class="s1">'data:image'</span><span class="p">]</span> + <span class="k">elif</span> <span class="n">params</span><span class="p">[</span><span class="s1">'search_urls'</span><span class="p">][</span><span class="s1">'http'</span><span class="p">]:</span> + <span class="n">query</span> <span class="o">=</span> <span class="n">params</span><span class="p">[</span><span class="s1">'search_urls'</span><span class="p">][</span><span class="s1">'http'</span><span class="p">]</span> + + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"query URL: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">query</span><span class="p">)</span> + <span class="n">query</span> <span class="o">=</span> <span class="n">urlencode</span><span class="p">({</span><span class="s1">'url'</span><span class="p">:</span> <span class="n">query</span><span class="p">})</span> + + <span class="c1"># see https://github.com/TinEye/pytineye/blob/main/pytineye/api.py</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">base_url</span> <span class="o">+</span> <span class="n">search_string</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">query</span><span class="o">=</span><span class="n">query</span><span class="p">,</span> <span class="n">page</span><span class="o">=</span><span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">])</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">]</span><span class="o">.</span><span class="n">update</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'Connection'</span><span class="p">:</span> <span class="s1">'keep-alive'</span><span class="p">,</span> + <span class="s1">'Accept-Encoding'</span><span class="p">:</span> <span class="s1">'gzip, defalte, br'</span><span class="p">,</span> + <span class="s1">'Host'</span><span class="p">:</span> <span class="s1">'tineye.com'</span><span class="p">,</span> + <span class="s1">'DNT'</span><span class="p">:</span> <span class="s1">'1'</span><span class="p">,</span> + <span class="s1">'TE'</span><span class="p">:</span> <span class="s1">'trailers'</span><span class="p">,</span> + <span class="p">}</span> + <span class="p">)</span> + <span class="k">return</span> <span class="n">params</span></div> + + + +<div class="viewcode-block" id="parse_tineye_match"> +<a class="viewcode-back" href="../../../dev/engines/online_url_search/tineye.html#searx.engines.tineye.parse_tineye_match">[docs]</a> +<span class="k">def</span> <span class="nf">parse_tineye_match</span><span class="p">(</span><span class="n">match_json</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Takes parsed JSON from the API server and turns it into a :py:obj:`dict`</span> +<span class="sd"> object.</span> + +<span class="sd"> Attributes `(class Match) <https://github.com/TinEye/pytineye/blob/main/pytineye/api.py>`__</span> + +<span class="sd"> - `image_url`, link to the result image.</span> +<span class="sd"> - `domain`, domain this result was found on.</span> +<span class="sd"> - `score`, a number (0 to 100) that indicates how closely the images match.</span> +<span class="sd"> - `width`, image width in pixels.</span> +<span class="sd"> - `height`, image height in pixels.</span> +<span class="sd"> - `size`, image area in pixels.</span> +<span class="sd"> - `format`, image format.</span> +<span class="sd"> - `filesize`, image size in bytes.</span> +<span class="sd"> - `overlay`, overlay URL.</span> +<span class="sd"> - `tags`, whether this match belongs to a collection or stock domain.</span> + +<span class="sd"> - `backlinks`, a list of Backlink objects pointing to the original websites</span> +<span class="sd"> and image URLs. List items are instances of :py:obj:`dict`, (`Backlink</span> +<span class="sd"> <https://github.com/TinEye/pytineye/blob/main/pytineye/api.py>`__):</span> + +<span class="sd"> - `url`, the image URL to the image.</span> +<span class="sd"> - `backlink`, the original website URL.</span> +<span class="sd"> - `crawl_date`, the date the image was crawled.</span> + +<span class="sd"> """</span> + + <span class="c1"># HINT: there exists an alternative backlink dict in the domains list / e.g.::</span> + <span class="c1">#</span> + <span class="c1"># match_json['domains'][0]['backlinks']</span> + + <span class="n">backlinks</span> <span class="o">=</span> <span class="p">[]</span> + <span class="k">if</span> <span class="s2">"backlinks"</span> <span class="ow">in</span> <span class="n">match_json</span><span class="p">:</span> + + <span class="k">for</span> <span class="n">backlink_json</span> <span class="ow">in</span> <span class="n">match_json</span><span class="p">[</span><span class="s2">"backlinks"</span><span class="p">]:</span> + <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">backlink_json</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span> + <span class="k">continue</span> + + <span class="n">crawl_date</span> <span class="o">=</span> <span class="n">backlink_json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"crawl_date"</span><span class="p">)</span> + <span class="k">if</span> <span class="n">crawl_date</span><span class="p">:</span> + <span class="n">crawl_date</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">strptime</span><span class="p">(</span><span class="n">crawl_date</span><span class="p">,</span> <span class="s1">'%Y-%m-</span><span class="si">%d</span><span class="s1">'</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">crawl_date</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">min</span> + + <span class="n">backlinks</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="n">backlink_json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"url"</span><span class="p">),</span> + <span class="s1">'backlink'</span><span class="p">:</span> <span class="n">backlink_json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"backlink"</span><span class="p">),</span> + <span class="s1">'crawl_date'</span><span class="p">:</span> <span class="n">crawl_date</span><span class="p">,</span> + <span class="s1">'image_name'</span><span class="p">:</span> <span class="n">backlink_json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"image_name"</span><span class="p">),</span> + <span class="p">}</span> + <span class="p">)</span> + + <span class="k">return</span> <span class="p">{</span> + <span class="s1">'image_url'</span><span class="p">:</span> <span class="n">match_json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"image_url"</span><span class="p">),</span> + <span class="s1">'domain'</span><span class="p">:</span> <span class="n">match_json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"domain"</span><span class="p">),</span> + <span class="s1">'score'</span><span class="p">:</span> <span class="n">match_json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"score"</span><span class="p">),</span> + <span class="s1">'width'</span><span class="p">:</span> <span class="n">match_json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"width"</span><span class="p">),</span> + <span class="s1">'height'</span><span class="p">:</span> <span class="n">match_json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"height"</span><span class="p">),</span> + <span class="s1">'size'</span><span class="p">:</span> <span class="n">match_json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"size"</span><span class="p">),</span> + <span class="s1">'image_format'</span><span class="p">:</span> <span class="n">match_json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"format"</span><span class="p">),</span> + <span class="s1">'filesize'</span><span class="p">:</span> <span class="n">match_json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"filesize"</span><span class="p">),</span> + <span class="s1">'overlay'</span><span class="p">:</span> <span class="n">match_json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"overlay"</span><span class="p">),</span> + <span class="s1">'tags'</span><span class="p">:</span> <span class="n">match_json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"tags"</span><span class="p">),</span> + <span class="s1">'backlinks'</span><span class="p">:</span> <span class="n">backlinks</span><span class="p">,</span> + <span class="p">}</span></div> + + + +<div class="viewcode-block" id="response"> +<a class="viewcode-back" href="../../../dev/engines/online_url_search/tineye.html#searx.engines.tineye.response">[docs]</a> +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Parse HTTP response from TinEye."""</span> + + <span class="c1"># handle the 422 client side errors, and the possible 400 status code error</span> + <span class="k">if</span> <span class="n">resp</span><span class="o">.</span><span class="n">status_code</span> <span class="ow">in</span> <span class="p">(</span><span class="mi">400</span><span class="p">,</span> <span class="mi">422</span><span class="p">):</span> + <span class="n">json_data</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">json</span><span class="p">()</span> + <span class="n">suggestions</span> <span class="o">=</span> <span class="n">json_data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'suggestions'</span><span class="p">,</span> <span class="p">{})</span> + <span class="n">message</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">'HTTP Status Code: </span><span class="si">{</span><span class="n">resp</span><span class="o">.</span><span class="n">status_code</span><span class="si">}</span><span class="s1">'</span> + + <span class="k">if</span> <span class="n">resp</span><span class="o">.</span><span class="n">status_code</span> <span class="o">==</span> <span class="mi">422</span><span class="p">:</span> + <span class="n">s_key</span> <span class="o">=</span> <span class="n">suggestions</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'key'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span> + <span class="k">if</span> <span class="n">s_key</span> <span class="o">==</span> <span class="s2">"Invalid image URL"</span><span class="p">:</span> + <span class="c1"># test https://docs.searxng.org/_static/searxng-wordmark.svg</span> + <span class="n">message</span> <span class="o">=</span> <span class="n">FORMAT_NOT_SUPPORTED</span> + <span class="k">elif</span> <span class="n">s_key</span> <span class="o">==</span> <span class="s1">'NO_SIGNATURE_ERROR'</span><span class="p">:</span> + <span class="c1"># test https://pngimg.com/uploads/dot/dot_PNG4.png</span> + <span class="n">message</span> <span class="o">=</span> <span class="n">NO_SIGNATURE_ERROR</span> + <span class="k">elif</span> <span class="n">s_key</span> <span class="o">==</span> <span class="s1">'Download Error'</span><span class="p">:</span> + <span class="c1"># test https://notexists</span> + <span class="n">message</span> <span class="o">=</span> <span class="n">DOWNLOAD_ERROR</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">"Unknown suggestion key encountered: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">s_key</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> <span class="c1"># 400</span> + <span class="n">description</span> <span class="o">=</span> <span class="n">suggestions</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'description'</span><span class="p">)</span> + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">description</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span> + <span class="n">message</span> <span class="o">=</span> <span class="s1">','</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">description</span><span class="p">)</span> + + <span class="c1"># see https://github.com/searxng/searxng/pull/1456#issuecomment-1193105023</span> + <span class="c1"># results.append({'answer': message})</span> + <span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="n">message</span><span class="p">)</span> + <span class="k">return</span> <span class="p">[]</span> + + <span class="c1"># Raise for all other responses</span> + <span class="n">resp</span><span class="o">.</span><span class="n">raise_for_status</span><span class="p">()</span> + + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">json_data</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">json</span><span class="p">()</span> + + <span class="k">for</span> <span class="n">match_json</span> <span class="ow">in</span> <span class="n">json_data</span><span class="p">[</span><span class="s1">'matches'</span><span class="p">]:</span> + + <span class="n">tineye_match</span> <span class="o">=</span> <span class="n">parse_tineye_match</span><span class="p">(</span><span class="n">match_json</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">tineye_match</span><span class="p">[</span><span class="s1">'backlinks'</span><span class="p">]:</span> + <span class="k">continue</span> + + <span class="n">backlink</span> <span class="o">=</span> <span class="n">tineye_match</span><span class="p">[</span><span class="s1">'backlinks'</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'template'</span><span class="p">:</span> <span class="s1">'images.html'</span><span class="p">,</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="n">backlink</span><span class="p">[</span><span class="s1">'backlink'</span><span class="p">],</span> + <span class="s1">'thumbnail_src'</span><span class="p">:</span> <span class="n">tineye_match</span><span class="p">[</span><span class="s1">'image_url'</span><span class="p">],</span> + <span class="s1">'source'</span><span class="p">:</span> <span class="n">backlink</span><span class="p">[</span><span class="s1">'url'</span><span class="p">],</span> + <span class="s1">'title'</span><span class="p">:</span> <span class="n">backlink</span><span class="p">[</span><span class="s1">'image_name'</span><span class="p">],</span> + <span class="s1">'img_src'</span><span class="p">:</span> <span class="n">backlink</span><span class="p">[</span><span class="s1">'url'</span><span class="p">],</span> + <span class="s1">'format'</span><span class="p">:</span> <span class="n">tineye_match</span><span class="p">[</span><span class="s1">'image_format'</span><span class="p">],</span> + <span class="s1">'width'</span><span class="p">:</span> <span class="n">tineye_match</span><span class="p">[</span><span class="s1">'width'</span><span class="p">],</span> + <span class="s1">'height'</span><span class="p">:</span> <span class="n">tineye_match</span><span class="p">[</span><span class="s1">'height'</span><span class="p">],</span> + <span class="s1">'publishedDate'</span><span class="p">:</span> <span class="n">backlink</span><span class="p">[</span><span class="s1">'crawl_date'</span><span class="p">],</span> + <span class="p">}</span> + <span class="p">)</span> + + <span class="c1"># append number of results</span> + + <span class="n">number_of_results</span> <span class="o">=</span> <span class="n">json_data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'num_matches'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">number_of_results</span><span class="p">:</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'number_of_results'</span><span class="p">:</span> <span class="n">number_of_results</span><span class="p">})</span> + + <span class="k">return</span> <span class="n">results</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/torznab.html b/_modules/searx/engines/torznab.html new file mode 100644 index 000000000..e7ff55d68 --- /dev/null +++ b/_modules/searx/engines/torznab.html @@ -0,0 +1,367 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.torznab — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.torznab</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.torznab</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""Torznab_ is an API specification that provides a standardized way to query</span> +<span class="sd">torrent site for content. It is used by a number of torrent applications,</span> +<span class="sd">including Prowlarr_ and Jackett_.</span> + +<span class="sd">Using this engine together with Prowlarr_ or Jackett_ allows you to search</span> +<span class="sd">a huge number of torrent sites which are not directly supported.</span> + +<span class="sd">Configuration</span> +<span class="sd">=============</span> + +<span class="sd">The engine has the following settings:</span> + +<span class="sd">``base_url``:</span> +<span class="sd"> Torznab endpoint URL.</span> + +<span class="sd">``api_key``:</span> +<span class="sd"> The API key to use for authentication.</span> + +<span class="sd">``torznab_categories``:</span> +<span class="sd"> The categories to use for searching. This is a list of category IDs. See</span> +<span class="sd"> Prowlarr-categories_ or Jackett-categories_ for more information.</span> + +<span class="sd">``show_torrent_files``:</span> +<span class="sd"> Whether to show the torrent file in the search results. Be careful as using</span> +<span class="sd"> this with Prowlarr_ or Jackett_ leaks the API key. This should be used only</span> +<span class="sd"> if you are querying a Torznab endpoint without authentication or if the</span> +<span class="sd"> instance is private. Be aware that private trackers may ban you if you share</span> +<span class="sd"> the torrent file. Defaults to ``false``.</span> + +<span class="sd">``show_magnet_links``:</span> +<span class="sd"> Whether to show the magnet link in the search results. Be aware that private</span> +<span class="sd"> trackers may ban you if you share the magnet link. Defaults to ``true``.</span> + +<span class="sd">.. _Torznab:</span> +<span class="sd"> https://torznab.github.io/spec-1.3-draft/index.html</span> +<span class="sd">.. _Prowlarr:</span> +<span class="sd"> https://github.com/Prowlarr/Prowlarr</span> +<span class="sd">.. _Jackett:</span> +<span class="sd"> https://github.com/Jackett/Jackett</span> +<span class="sd">.. _Prowlarr-categories:</span> +<span class="sd"> https://wiki.servarr.com/en/prowlarr/cardigann-yml-definition#categories</span> +<span class="sd">.. _Jackett-categories:</span> +<span class="sd"> https://github.com/Jackett/Jackett/wiki/Jackett-Categories</span> + +<span class="sd">Implementations</span> +<span class="sd">===============</span> + +<span class="sd">"""</span> +<span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">annotations</span> +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span> + +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">List</span><span class="p">,</span> <span class="n">Dict</span><span class="p">,</span> <span class="n">Any</span> +<span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span> +<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">quote</span> +<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">etree</span> <span class="c1"># type: ignore</span> + +<span class="kn">from</span> <span class="nn">searx.exceptions</span> <span class="kn">import</span> <span class="n">SearxEngineAPIException</span> +<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">humanize_bytes</span> + +<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> + <span class="kn">import</span> <span class="nn">httpx</span> + <span class="kn">import</span> <span class="nn">logging</span> + + <span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span> + +<span class="c1"># engine settings</span> +<span class="n">about</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"website"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> + <span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> + <span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s2">"https://torznab.github.io/spec-1.3-draft"</span><span class="p">,</span> + <span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> + <span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"results"</span><span class="p">:</span> <span class="s1">'XML'</span><span class="p">,</span> +<span class="p">}</span> +<span class="n">categories</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'files'</span><span class="p">]</span> +<span class="n">paging</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span> +<span class="n">time_range_support</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span> + +<span class="c1"># defined in settings.yml</span> +<span class="c1"># example (Jackett): "http://localhost:9117/api/v2.0/indexers/all/results/torznab"</span> +<span class="n">base_url</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">''</span> +<span class="n">api_key</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">''</span> +<span class="c1"># https://newznab.readthedocs.io/en/latest/misc/api/#predefined-categories</span> +<span class="n">torznab_categories</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span> +<span class="n">show_torrent_files</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span> +<span class="n">show_magnet_links</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span> + + +<div class="viewcode-block" id="init"> +<a class="viewcode-back" href="../../../dev/engines/online/torznab.html#searx.engines.torznab.init">[docs]</a> +<span class="k">def</span> <span class="nf">init</span><span class="p">(</span><span class="n">engine_settings</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span> <span class="c1"># pylint: disable=unused-argument</span> +<span class="w"> </span><span class="sd">"""Initialize the engine."""</span> + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">base_url</span><span class="p">)</span> <span class="o"><</span> <span class="mi">1</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'missing torznab base_url'</span><span class="p">)</span></div> + + + +<div class="viewcode-block" id="request"> +<a class="viewcode-back" href="../../../dev/engines/online/torznab.html#searx.engines.torznab.request">[docs]</a> +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">params</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span> +<span class="w"> </span><span class="sd">"""Build the request params."""</span> + <span class="n">search_url</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">base_url</span> <span class="o">+</span> <span class="s1">'?t=search&q=</span><span class="si">{search_query}</span><span class="s1">'</span> + + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">api_key</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span> + <span class="n">search_url</span> <span class="o">+=</span> <span class="s1">'&apikey=</span><span class="si">{api_key}</span><span class="s1">'</span> + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">torznab_categories</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span> + <span class="n">search_url</span> <span class="o">+=</span> <span class="s1">'&cat=</span><span class="si">{torznab_categories}</span><span class="s1">'</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">search_url</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> + <span class="n">search_query</span><span class="o">=</span><span class="n">quote</span><span class="p">(</span><span class="n">query</span><span class="p">),</span> <span class="n">api_key</span><span class="o">=</span><span class="n">api_key</span><span class="p">,</span> <span class="n">torznab_categories</span><span class="o">=</span><span class="s2">","</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="nb">str</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">torznab_categories</span><span class="p">])</span> + <span class="p">)</span> + + <span class="k">return</span> <span class="n">params</span></div> + + + +<div class="viewcode-block" id="response"> +<a class="viewcode-back" href="../../../dev/engines/online/torznab.html#searx.engines.torznab.response">[docs]</a> +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">:</span> <span class="n">httpx</span><span class="o">.</span><span class="n">Response</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]:</span> +<span class="w"> </span><span class="sd">"""Parse the XML response and return a list of results."""</span> + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">search_results</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">XML</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">content</span><span class="p">)</span> + + <span class="c1"># handle errors: https://newznab.readthedocs.io/en/latest/misc/api/#newznab-error-codes</span> + <span class="k">if</span> <span class="n">search_results</span><span class="o">.</span><span class="n">tag</span> <span class="o">==</span> <span class="s2">"error"</span><span class="p">:</span> + <span class="k">raise</span> <span class="n">SearxEngineAPIException</span><span class="p">(</span><span class="n">search_results</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"description"</span><span class="p">))</span> + + <span class="n">channel</span><span class="p">:</span> <span class="n">etree</span><span class="o">.</span><span class="n">Element</span> <span class="o">=</span> <span class="n">search_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> + + <span class="n">item</span><span class="p">:</span> <span class="n">etree</span><span class="o">.</span><span class="n">Element</span> + <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">channel</span><span class="o">.</span><span class="n">iterfind</span><span class="p">(</span><span class="s1">'item'</span><span class="p">):</span> + <span class="n">result</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="n">build_result</span><span class="p">(</span><span class="n">item</span><span class="p">)</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">result</span><span class="p">)</span> + + <span class="k">return</span> <span class="n">results</span></div> + + + +<div class="viewcode-block" id="build_result"> +<a class="viewcode-back" href="../../../dev/engines/online/torznab.html#searx.engines.torznab.build_result">[docs]</a> +<span class="k">def</span> <span class="nf">build_result</span><span class="p">(</span><span class="n">item</span><span class="p">:</span> <span class="n">etree</span><span class="o">.</span><span class="n">Element</span><span class="p">)</span> <span class="o">-></span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span> +<span class="w"> </span><span class="sd">"""Build a result from a XML item."""</span> + + <span class="c1"># extract attributes from XML</span> + <span class="c1"># see https://torznab.github.io/spec-1.3-draft/torznab/Specification-v1.3.html#predefined-attributes</span> + <span class="n">enclosure</span><span class="p">:</span> <span class="n">etree</span><span class="o">.</span><span class="n">Element</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="n">item</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'enclosure'</span><span class="p">)</span> + <span class="n">enclosure_url</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span> + <span class="k">if</span> <span class="n">enclosure</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">enclosure_url</span> <span class="o">=</span> <span class="n">enclosure</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'url'</span><span class="p">)</span> + + <span class="n">filesize</span> <span class="o">=</span> <span class="n">get_attribute</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'size'</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">filesize</span> <span class="ow">and</span> <span class="n">enclosure</span><span class="p">:</span> + <span class="n">filesize</span> <span class="o">=</span> <span class="n">enclosure</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'length'</span><span class="p">)</span> + + <span class="n">guid</span> <span class="o">=</span> <span class="n">get_attribute</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'guid'</span><span class="p">)</span> + <span class="n">comments</span> <span class="o">=</span> <span class="n">get_attribute</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'comments'</span><span class="p">)</span> + <span class="n">pubDate</span> <span class="o">=</span> <span class="n">get_attribute</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'pubDate'</span><span class="p">)</span> + <span class="n">seeders</span> <span class="o">=</span> <span class="n">get_torznab_attribute</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'seeders'</span><span class="p">)</span> + <span class="n">leechers</span> <span class="o">=</span> <span class="n">get_torznab_attribute</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'leechers'</span><span class="p">)</span> + <span class="n">peers</span> <span class="o">=</span> <span class="n">get_torznab_attribute</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'peers'</span><span class="p">)</span> + + <span class="c1"># map attributes to searx result</span> + <span class="n">result</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'template'</span><span class="p">:</span> <span class="s1">'torrent.html'</span><span class="p">,</span> + <span class="s1">'title'</span><span class="p">:</span> <span class="n">get_attribute</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'title'</span><span class="p">),</span> + <span class="s1">'filesize'</span><span class="p">:</span> <span class="n">humanize_bytes</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">filesize</span><span class="p">))</span> <span class="k">if</span> <span class="n">filesize</span> <span class="k">else</span> <span class="kc">None</span><span class="p">,</span> + <span class="s1">'files'</span><span class="p">:</span> <span class="n">get_attribute</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'files'</span><span class="p">),</span> + <span class="s1">'seed'</span><span class="p">:</span> <span class="n">seeders</span><span class="p">,</span> + <span class="s1">'leech'</span><span class="p">:</span> <span class="n">_map_leechers</span><span class="p">(</span><span class="n">leechers</span><span class="p">,</span> <span class="n">seeders</span><span class="p">,</span> <span class="n">peers</span><span class="p">),</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="n">_map_result_url</span><span class="p">(</span><span class="n">guid</span><span class="p">,</span> <span class="n">comments</span><span class="p">),</span> + <span class="s1">'publishedDate'</span><span class="p">:</span> <span class="n">_map_published_date</span><span class="p">(</span><span class="n">pubDate</span><span class="p">),</span> + <span class="s1">'torrentfile'</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> + <span class="s1">'magnetlink'</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> + <span class="p">}</span> + + <span class="n">link</span> <span class="o">=</span> <span class="n">get_attribute</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'link'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">show_torrent_files</span><span class="p">:</span> + <span class="n">result</span><span class="p">[</span><span class="s1">'torrentfile'</span><span class="p">]</span> <span class="o">=</span> <span class="n">_map_torrent_file</span><span class="p">(</span><span class="n">link</span><span class="p">,</span> <span class="n">enclosure_url</span><span class="p">)</span> + <span class="k">if</span> <span class="n">show_magnet_links</span><span class="p">:</span> + <span class="n">magneturl</span> <span class="o">=</span> <span class="n">get_torznab_attribute</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'magneturl'</span><span class="p">)</span> + <span class="n">result</span><span class="p">[</span><span class="s1">'magnetlink'</span><span class="p">]</span> <span class="o">=</span> <span class="n">_map_magnet_link</span><span class="p">(</span><span class="n">magneturl</span><span class="p">,</span> <span class="n">guid</span><span class="p">,</span> <span class="n">enclosure_url</span><span class="p">,</span> <span class="n">link</span><span class="p">)</span> + <span class="k">return</span> <span class="n">result</span></div> + + + +<span class="k">def</span> <span class="nf">_map_result_url</span><span class="p">(</span><span class="n">guid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">,</span> <span class="n">comments</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">if</span> <span class="n">guid</span> <span class="ow">and</span> <span class="n">guid</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'http'</span><span class="p">):</span> + <span class="k">return</span> <span class="n">guid</span> + <span class="k">if</span> <span class="n">comments</span> <span class="ow">and</span> <span class="n">comments</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'http'</span><span class="p">):</span> + <span class="k">return</span> <span class="n">comments</span> + <span class="k">return</span> <span class="kc">None</span> + + +<span class="k">def</span> <span class="nf">_map_leechers</span><span class="p">(</span><span class="n">leechers</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">,</span> <span class="n">seeders</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">,</span> <span class="n">peers</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">if</span> <span class="n">leechers</span><span class="p">:</span> + <span class="k">return</span> <span class="n">leechers</span> + <span class="k">if</span> <span class="n">seeders</span> <span class="ow">and</span> <span class="n">peers</span><span class="p">:</span> + <span class="k">return</span> <span class="nb">str</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">peers</span><span class="p">)</span> <span class="o">-</span> <span class="nb">int</span><span class="p">(</span><span class="n">seeders</span><span class="p">))</span> + <span class="k">return</span> <span class="kc">None</span> + + +<span class="k">def</span> <span class="nf">_map_published_date</span><span class="p">(</span><span class="n">pubDate</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="n">datetime</span> <span class="o">|</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">if</span> <span class="n">pubDate</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">try</span><span class="p">:</span> + <span class="k">return</span> <span class="n">datetime</span><span class="o">.</span><span class="n">strptime</span><span class="p">(</span><span class="n">pubDate</span><span class="p">,</span> <span class="s1">'</span><span class="si">%a</span><span class="s1">, </span><span class="si">%d</span><span class="s1"> %b %Y %H:%M:%S %z'</span><span class="p">)</span> + <span class="k">except</span> <span class="p">(</span><span class="ne">ValueError</span><span class="p">,</span> <span class="ne">TypeError</span><span class="p">)</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"ignore exception (publishedDate): </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span> + <span class="k">return</span> <span class="kc">None</span> + + +<span class="k">def</span> <span class="nf">_map_torrent_file</span><span class="p">(</span><span class="n">link</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">,</span> <span class="n">enclosure_url</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">if</span> <span class="n">link</span> <span class="ow">and</span> <span class="n">link</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'http'</span><span class="p">):</span> + <span class="k">return</span> <span class="n">link</span> + <span class="k">if</span> <span class="n">enclosure_url</span> <span class="ow">and</span> <span class="n">enclosure_url</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'http'</span><span class="p">):</span> + <span class="k">return</span> <span class="n">enclosure_url</span> + <span class="k">return</span> <span class="kc">None</span> + + +<span class="k">def</span> <span class="nf">_map_magnet_link</span><span class="p">(</span> + <span class="n">magneturl</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">,</span> + <span class="n">guid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">,</span> + <span class="n">enclosure_url</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">,</span> + <span class="n">link</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">,</span> +<span class="p">)</span> <span class="o">-></span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">if</span> <span class="n">magneturl</span> <span class="ow">and</span> <span class="n">magneturl</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'magnet'</span><span class="p">):</span> + <span class="k">return</span> <span class="n">magneturl</span> + <span class="k">if</span> <span class="n">guid</span> <span class="ow">and</span> <span class="n">guid</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'magnet'</span><span class="p">):</span> + <span class="k">return</span> <span class="n">guid</span> + <span class="k">if</span> <span class="n">enclosure_url</span> <span class="ow">and</span> <span class="n">enclosure_url</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'magnet'</span><span class="p">):</span> + <span class="k">return</span> <span class="n">enclosure_url</span> + <span class="k">if</span> <span class="n">link</span> <span class="ow">and</span> <span class="n">link</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'magnet'</span><span class="p">):</span> + <span class="k">return</span> <span class="n">link</span> + <span class="k">return</span> <span class="kc">None</span> + + +<div class="viewcode-block" id="get_attribute"> +<a class="viewcode-back" href="../../../dev/engines/online/torznab.html#searx.engines.torznab.get_attribute">[docs]</a> +<span class="k">def</span> <span class="nf">get_attribute</span><span class="p">(</span><span class="n">item</span><span class="p">:</span> <span class="n">etree</span><span class="o">.</span><span class="n">Element</span><span class="p">,</span> <span class="n">property_name</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Get attribute from item."""</span> + <span class="n">property_element</span><span class="p">:</span> <span class="n">etree</span><span class="o">.</span><span class="n">Element</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="n">item</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="n">property_name</span><span class="p">)</span> + <span class="k">if</span> <span class="n">property_element</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">return</span> <span class="n">property_element</span><span class="o">.</span><span class="n">text</span> + <span class="k">return</span> <span class="kc">None</span></div> + + + +<div class="viewcode-block" id="get_torznab_attribute"> +<a class="viewcode-back" href="../../../dev/engines/online/torznab.html#searx.engines.torznab.get_torznab_attribute">[docs]</a> +<span class="k">def</span> <span class="nf">get_torznab_attribute</span><span class="p">(</span><span class="n">item</span><span class="p">:</span> <span class="n">etree</span><span class="o">.</span><span class="n">Element</span><span class="p">,</span> <span class="n">attribute_name</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Get torznab special attribute from item."""</span> + <span class="n">element</span><span class="p">:</span> <span class="n">etree</span><span class="o">.</span><span class="n">Element</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="n">item</span><span class="o">.</span><span class="n">find</span><span class="p">(</span> + <span class="s1">'.//torznab:attr[@name="</span><span class="si">{attribute_name}</span><span class="s1">"]'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">attribute_name</span><span class="o">=</span><span class="n">attribute_name</span><span class="p">),</span> + <span class="p">{</span><span class="s1">'torznab'</span><span class="p">:</span> <span class="s1">'http://torznab.com/schemas/2015/feed'</span><span class="p">},</span> + <span class="p">)</span> + <span class="k">if</span> <span class="n">element</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">return</span> <span class="n">element</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"value"</span><span class="p">)</span> + <span class="k">return</span> <span class="kc">None</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/voidlinux.html b/_modules/searx/engines/voidlinux.html new file mode 100644 index 000000000..418b87284 --- /dev/null +++ b/_modules/searx/engines/voidlinux.html @@ -0,0 +1,201 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.voidlinux — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.voidlinux</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.voidlinux</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""SearXNG engine for `Void Linux binary packages`_. Void is a general purpose</span> +<span class="sd">operating system, based on the monolithic Linux kernel. Its package system</span> +<span class="sd">allows you to quickly install, update and remove software; software is provided</span> +<span class="sd">in binary packages or can be built directly from sources with the help of the</span> +<span class="sd">XBPS source packages collection.</span> + +<span class="sd">.. _Void Linux binary packages: https://voidlinux.org/packages/</span> + +<span class="sd">"""</span> + +<span class="kn">import</span> <span class="nn">re</span> + +<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">quote_plus</span> +<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">humanize_bytes</span> + +<span class="n">about</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'website'</span><span class="p">:</span> <span class="s1">'https://voidlinux.org/packages/'</span><span class="p">,</span> + <span class="s1">'wikidata_id'</span><span class="p">:</span> <span class="s1">'Q19310966'</span><span class="p">,</span> + <span class="s1">'use_official_api'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> + <span class="s1">'official_api_documentation'</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> + <span class="s1">'require_api_key'</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s1">'results'</span><span class="p">:</span> <span class="s1">'JSON'</span><span class="p">,</span> +<span class="p">}</span> + +<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'packages'</span><span class="p">,</span> <span class="s1">'it'</span><span class="p">]</span> + +<span class="n">base_url</span> <span class="o">=</span> <span class="s2">"https://xq-api.voidlinux.org"</span> +<span class="n">pkg_repo_url</span> <span class="o">=</span> <span class="s2">"https://github.com/void-linux/void-packages"</span> + +<span class="n">void_arch</span> <span class="o">=</span> <span class="s1">'x86_64'</span> +<span class="sd">"""Default architecture to search for. For valid values see :py:obj:`ARCH_RE`"""</span> + +<span class="n">ARCH_RE</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s1">'aarch64-musl|armv6l-musl|armv7l-musl|x86_64-musl|aarch64|armv6l|armv7l|i686|x86_64'</span><span class="p">)</span> +<span class="sd">"""Regular expression that match a architecture in the query string."""</span> + + +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> + <span class="n">arch_path</span> <span class="o">=</span> <span class="n">ARCH_RE</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="n">query</span><span class="p">)</span> + <span class="k">if</span> <span class="n">arch_path</span><span class="p">:</span> + <span class="n">arch_path</span> <span class="o">=</span> <span class="n">arch_path</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span> + <span class="n">query</span> <span class="o">=</span> <span class="n">query</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">arch_path</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">arch_path</span> <span class="o">=</span> <span class="n">void_arch</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">base_url</span><span class="si">}</span><span class="s2">/v1/query/</span><span class="si">{</span><span class="n">arch_path</span><span class="si">}</span><span class="s2">?q=</span><span class="si">{</span><span class="n">quote_plus</span><span class="p">(</span><span class="n">query</span><span class="p">)</span><span class="si">}</span><span class="s2">"</span> + <span class="k">return</span> <span class="n">params</span> + + +<div class="viewcode-block" id="response"> +<a class="viewcode-back" href="../../../dev/engines/online/void.html#searx.engines.voidlinux.response">[docs]</a> +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""</span> +<span class="sd"> At Void Linux, several packages sometimes share the same source code</span> +<span class="sd"> (template) and therefore also have the same URL. Results with identical</span> +<span class="sd"> URLs are merged as one result for SearXNG.</span> +<span class="sd"> """</span> + + <span class="n">packages</span> <span class="o">=</span> <span class="p">{}</span> + <span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">resp</span><span class="o">.</span><span class="n">json</span><span class="p">()[</span><span class="s1">'data'</span><span class="p">]:</span> + + <span class="c1"># 32bit and dbg packages don't have their own package templates</span> + <span class="n">github_slug</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="sa">r</span><span class="s2">"-(32bit|dbg)$"</span><span class="p">,</span> <span class="s2">""</span><span class="p">,</span> <span class="n">result</span><span class="p">[</span><span class="s1">'name'</span><span class="p">])</span> + <span class="n">pkg_url</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">pkg_repo_url</span><span class="si">}</span><span class="s2">/tree/master/srcpkgs/</span><span class="si">{</span><span class="n">github_slug</span><span class="si">}</span><span class="s2">"</span> + + <span class="n">pkg_list</span> <span class="o">=</span> <span class="n">packages</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">pkg_url</span><span class="p">,</span> <span class="p">[])</span> + <span class="n">pkg_list</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'title'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'name'</span><span class="p">],</span> + <span class="s1">'content'</span><span class="p">:</span> <span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">result</span><span class="p">[</span><span class="s1">'short_desc'</span><span class="p">]</span><span class="si">}</span><span class="s2"> - </span><span class="si">{</span><span class="n">humanize_bytes</span><span class="p">(</span><span class="n">result</span><span class="p">[</span><span class="s1">'filename_size'</span><span class="p">])</span><span class="si">}</span><span class="s2">"</span><span class="p">,</span> + <span class="s1">'package_name'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'name'</span><span class="p">],</span> + <span class="s1">'version'</span><span class="p">:</span> <span class="sa">f</span><span class="s2">"v</span><span class="si">{</span><span class="n">result</span><span class="p">[</span><span class="s1">'version'</span><span class="p">]</span><span class="si">}</span><span class="s2">_</span><span class="si">{</span><span class="n">result</span><span class="p">[</span><span class="s1">'revision'</span><span class="p">]</span><span class="si">}</span><span class="s2">"</span><span class="p">,</span> + <span class="s1">'tags'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'repository'</span><span class="p">],</span> + <span class="p">}</span> + <span class="p">)</span> + <span class="n">packages</span><span class="p">[</span><span class="n">pkg_url</span><span class="p">]</span> <span class="o">=</span> <span class="n">pkg_list</span> + + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + <span class="k">for</span> <span class="n">pkg_url</span><span class="p">,</span> <span class="n">pkg_list</span> <span class="ow">in</span> <span class="n">packages</span><span class="o">.</span><span class="n">items</span><span class="p">():</span> + + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="n">pkg_url</span><span class="p">,</span> + <span class="s1">'template'</span><span class="p">:</span> <span class="s1">'packages.html'</span><span class="p">,</span> + <span class="s1">'title'</span><span class="p">:</span> <span class="s1">' | '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">x</span><span class="p">[</span><span class="s1">'title'</span><span class="p">]</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">pkg_list</span><span class="p">),</span> + <span class="s1">'content'</span><span class="p">:</span> <span class="n">pkg_list</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="s1">'content'</span><span class="p">],</span> + <span class="s1">'package_name'</span><span class="p">:</span> <span class="s1">' | '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">x</span><span class="p">[</span><span class="s1">'package_name'</span><span class="p">]</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">pkg_list</span><span class="p">),</span> + <span class="s1">'version'</span><span class="p">:</span> <span class="n">pkg_list</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="s1">'version'</span><span class="p">],</span> + <span class="s1">'tags'</span><span class="p">:</span> <span class="p">[</span><span class="n">x</span><span class="p">[</span><span class="s1">'tags'</span><span class="p">]</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">pkg_list</span><span class="p">],</span> + <span class="p">}</span> + <span class="p">)</span> + <span class="k">return</span> <span class="n">results</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/wikidata.html b/_modules/searx/engines/wikidata.html new file mode 100644 index 000000000..c939f4e71 --- /dev/null +++ b/_modules/searx/engines/wikidata.html @@ -0,0 +1,910 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.wikidata — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.wikidata</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.wikidata</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""This module implements the Wikidata engine. Some implementations are shared</span> +<span class="sd">from :ref:`wikipedia engine`.</span> + +<span class="sd">"""</span> +<span class="c1"># pylint: disable=missing-class-docstring</span> + +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span> +<span class="kn">from</span> <span class="nn">hashlib</span> <span class="kn">import</span> <span class="n">md5</span> +<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span><span class="p">,</span> <span class="n">unquote</span> +<span class="kn">from</span> <span class="nn">json</span> <span class="kn">import</span> <span class="n">loads</span> + +<span class="kn">from</span> <span class="nn">dateutil.parser</span> <span class="kn">import</span> <span class="n">isoparse</span> +<span class="kn">from</span> <span class="nn">babel.dates</span> <span class="kn">import</span> <span class="n">format_datetime</span><span class="p">,</span> <span class="n">format_date</span><span class="p">,</span> <span class="n">format_time</span><span class="p">,</span> <span class="n">get_datetime_format</span> + +<span class="kn">from</span> <span class="nn">searx.data</span> <span class="kn">import</span> <span class="n">WIKIDATA_UNITS</span> +<span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">post</span><span class="p">,</span> <span class="n">get</span> +<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">searx_useragent</span><span class="p">,</span> <span class="n">get_string_replaces_function</span> +<span class="kn">from</span> <span class="nn">searx.external_urls</span> <span class="kn">import</span> <span class="n">get_external_url</span><span class="p">,</span> <span class="n">get_earth_coordinates_url</span><span class="p">,</span> <span class="n">area_to_osm_zoom</span> +<span class="kn">from</span> <span class="nn">searx.engines.wikipedia</span> <span class="kn">import</span> <span class="p">(</span> + <span class="n">fetch_wikimedia_traits</span><span class="p">,</span> + <span class="n">get_wiki_params</span><span class="p">,</span> +<span class="p">)</span> +<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span> + +<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> + <span class="kn">import</span> <span class="nn">logging</span> + + <span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span> + +<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span> + +<span class="c1"># about</span> +<span class="n">about</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://wikidata.org/'</span><span class="p">,</span> + <span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q2013'</span><span class="p">,</span> + <span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://query.wikidata.org/'</span><span class="p">,</span> + <span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> + <span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"results"</span><span class="p">:</span> <span class="s1">'JSON'</span><span class="p">,</span> +<span class="p">}</span> + +<span class="n">display_type</span> <span class="o">=</span> <span class="p">[</span><span class="s2">"infobox"</span><span class="p">]</span> +<span class="sd">"""A list of display types composed from ``infobox`` and ``list``. The latter</span> +<span class="sd">one will add a hit to the result list. The first one will show a hit in the</span> +<span class="sd">info box. Both values can be set, or one of the two can be set."""</span> + + +<span class="c1"># SPARQL</span> +<span class="n">SPARQL_ENDPOINT_URL</span> <span class="o">=</span> <span class="s1">'https://query.wikidata.org/sparql'</span> +<span class="n">SPARQL_EXPLAIN_URL</span> <span class="o">=</span> <span class="s1">'https://query.wikidata.org/bigdata/namespace/wdq/sparql?explain'</span> +<span class="n">WIKIDATA_PROPERTIES</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'P434'</span><span class="p">:</span> <span class="s1">'MusicBrainz'</span><span class="p">,</span> + <span class="s1">'P435'</span><span class="p">:</span> <span class="s1">'MusicBrainz'</span><span class="p">,</span> + <span class="s1">'P436'</span><span class="p">:</span> <span class="s1">'MusicBrainz'</span><span class="p">,</span> + <span class="s1">'P966'</span><span class="p">:</span> <span class="s1">'MusicBrainz'</span><span class="p">,</span> + <span class="s1">'P345'</span><span class="p">:</span> <span class="s1">'IMDb'</span><span class="p">,</span> + <span class="s1">'P2397'</span><span class="p">:</span> <span class="s1">'YouTube'</span><span class="p">,</span> + <span class="s1">'P1651'</span><span class="p">:</span> <span class="s1">'YouTube'</span><span class="p">,</span> + <span class="s1">'P2002'</span><span class="p">:</span> <span class="s1">'Twitter'</span><span class="p">,</span> + <span class="s1">'P2013'</span><span class="p">:</span> <span class="s1">'Facebook'</span><span class="p">,</span> + <span class="s1">'P2003'</span><span class="p">:</span> <span class="s1">'Instagram'</span><span class="p">,</span> +<span class="p">}</span> + +<span class="c1"># SERVICE wikibase:mwapi : https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual/MWAPI</span> +<span class="c1"># SERVICE wikibase:label: https://en.wikibooks.org/wiki/SPARQL/SERVICE_-_Label#Manual_Label_SERVICE</span> +<span class="c1"># https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates</span> +<span class="c1"># https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format#Data_model</span> +<span class="c1"># optimization:</span> +<span class="c1"># * https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/query_optimization</span> +<span class="c1"># * https://github.com/blazegraph/database/wiki/QueryHints</span> +<span class="n">QUERY_TEMPLATE</span> <span class="o">=</span> <span class="s2">"""</span> +<span class="s2">SELECT ?item ?itemLabel ?itemDescription ?lat ?long %SELECT%</span> +<span class="s2">WHERE</span> +<span class="s2">{</span> +<span class="s2"> SERVICE wikibase:mwapi {</span> +<span class="s2"> bd:serviceParam wikibase:endpoint "www.wikidata.org";</span> +<span class="s2"> wikibase:api "EntitySearch";</span> +<span class="s2"> wikibase:limit 1;</span> +<span class="s2"> mwapi:search "%QUERY%";</span> +<span class="s2"> mwapi:language "%LANGUAGE%".</span> +<span class="s2"> ?item wikibase:apiOutputItem mwapi:item.</span> +<span class="s2"> }</span> +<span class="s2"> hint:Prior hint:runFirst "true".</span> + +<span class="s2"> %WHERE%</span> + +<span class="s2"> SERVICE wikibase:label {</span> +<span class="s2"> bd:serviceParam wikibase:language "%LANGUAGE%,en".</span> +<span class="s2"> ?item rdfs:label ?itemLabel .</span> +<span class="s2"> ?item schema:description ?itemDescription .</span> +<span class="s2"> %WIKIBASE_LABELS%</span> +<span class="s2"> }</span> + +<span class="s2">}</span> +<span class="s2">GROUP BY ?item ?itemLabel ?itemDescription ?lat ?long </span><span class="si">%G</span><span class="s2">ROUP_BY%</span> +<span class="s2">"""</span> + +<span class="c1"># Get the calendar names and the property names</span> +<span class="n">QUERY_PROPERTY_NAMES</span> <span class="o">=</span> <span class="s2">"""</span> +<span class="s2">SELECT ?item ?name</span> +<span class="s2">WHERE {</span> +<span class="s2"> {</span> +<span class="s2"> SELECT ?item</span> +<span class="s2"> WHERE { ?item wdt:P279* wd:Q12132 }</span> +<span class="s2"> } UNION {</span> +<span class="s2"> VALUES ?item { %ATTRIBUTES% }</span> +<span class="s2"> }</span> +<span class="s2"> OPTIONAL { ?item rdfs:label ?name. }</span> +<span class="s2">}</span> +<span class="s2">"""</span> + +<span class="c1"># see the property "dummy value" of https://www.wikidata.org/wiki/Q2013 (Wikidata)</span> +<span class="c1"># hard coded here to avoid to an additional SPARQL request when the server starts</span> +<span class="n">DUMMY_ENTITY_URLS</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span> + <span class="s2">"http://www.wikidata.org/entity/"</span> <span class="o">+</span> <span class="n">wid</span> <span class="k">for</span> <span class="n">wid</span> <span class="ow">in</span> <span class="p">(</span><span class="s2">"Q4115189"</span><span class="p">,</span> <span class="s2">"Q13406268"</span><span class="p">,</span> <span class="s2">"Q15397819"</span><span class="p">,</span> <span class="s2">"Q17339402"</span><span class="p">)</span> +<span class="p">)</span> + + +<span class="c1"># https://www.w3.org/TR/sparql11-query/#rSTRING_LITERAL1</span> +<span class="c1"># https://lists.w3.org/Archives/Public/public-rdf-dawg/2011OctDec/0175.html</span> +<span class="n">sparql_string_escape</span> <span class="o">=</span> <span class="n">get_string_replaces_function</span><span class="p">(</span> + <span class="c1"># fmt: off</span> + <span class="p">{</span> + <span class="s1">'</span><span class="se">\t</span><span class="s1">'</span><span class="p">:</span> <span class="s1">'</span><span class="se">\\\t</span><span class="s1">'</span><span class="p">,</span> + <span class="s1">'</span><span class="se">\n</span><span class="s1">'</span><span class="p">:</span> <span class="s1">'</span><span class="se">\\\n</span><span class="s1">'</span><span class="p">,</span> + <span class="s1">'</span><span class="se">\r</span><span class="s1">'</span><span class="p">:</span> <span class="s1">'</span><span class="se">\\\r</span><span class="s1">'</span><span class="p">,</span> + <span class="s1">'</span><span class="se">\b</span><span class="s1">'</span><span class="p">:</span> <span class="s1">'</span><span class="se">\\\b</span><span class="s1">'</span><span class="p">,</span> + <span class="s1">'</span><span class="se">\f</span><span class="s1">'</span><span class="p">:</span> <span class="s1">'</span><span class="se">\\\f</span><span class="s1">'</span><span class="p">,</span> + <span class="s1">'</span><span class="se">\"</span><span class="s1">'</span><span class="p">:</span> <span class="s1">'</span><span class="se">\\\"</span><span class="s1">'</span><span class="p">,</span> + <span class="s1">'</span><span class="se">\'</span><span class="s1">'</span><span class="p">:</span> <span class="s1">'</span><span class="se">\\\'</span><span class="s1">'</span><span class="p">,</span> + <span class="s1">'</span><span class="se">\\</span><span class="s1">'</span><span class="p">:</span> <span class="s1">'</span><span class="se">\\\\</span><span class="s1">'</span> + <span class="p">}</span> + <span class="c1"># fmt: on</span> +<span class="p">)</span> + +<span class="n">replace_http_by_https</span> <span class="o">=</span> <span class="n">get_string_replaces_function</span><span class="p">({</span><span class="s1">'http:'</span><span class="p">:</span> <span class="s1">'https:'</span><span class="p">})</span> + + +<span class="k">def</span> <span class="nf">get_headers</span><span class="p">():</span> + <span class="c1"># user agent: https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual#Query_limits</span> + <span class="k">return</span> <span class="p">{</span><span class="s1">'Accept'</span><span class="p">:</span> <span class="s1">'application/sparql-results+json'</span><span class="p">,</span> <span class="s1">'User-Agent'</span><span class="p">:</span> <span class="n">searx_useragent</span><span class="p">()}</span> + + +<span class="k">def</span> <span class="nf">get_label_for_entity</span><span class="p">(</span><span class="n">entity_id</span><span class="p">,</span> <span class="n">language</span><span class="p">):</span> + <span class="n">name</span> <span class="o">=</span> <span class="n">WIKIDATA_PROPERTIES</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">entity_id</span><span class="p">)</span> + <span class="k">if</span> <span class="n">name</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">name</span> <span class="o">=</span> <span class="n">WIKIDATA_PROPERTIES</span><span class="o">.</span><span class="n">get</span><span class="p">((</span><span class="n">entity_id</span><span class="p">,</span> <span class="n">language</span><span class="p">))</span> + <span class="k">if</span> <span class="n">name</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">name</span> <span class="o">=</span> <span class="n">WIKIDATA_PROPERTIES</span><span class="o">.</span><span class="n">get</span><span class="p">((</span><span class="n">entity_id</span><span class="p">,</span> <span class="n">language</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'-'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]))</span> + <span class="k">if</span> <span class="n">name</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">name</span> <span class="o">=</span> <span class="n">WIKIDATA_PROPERTIES</span><span class="o">.</span><span class="n">get</span><span class="p">((</span><span class="n">entity_id</span><span class="p">,</span> <span class="s1">'en'</span><span class="p">))</span> + <span class="k">if</span> <span class="n">name</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">name</span> <span class="o">=</span> <span class="n">entity_id</span> + <span class="k">return</span> <span class="n">name</span> + + +<span class="k">def</span> <span class="nf">send_wikidata_query</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="s1">'GET'</span><span class="p">):</span> + <span class="k">if</span> <span class="n">method</span> <span class="o">==</span> <span class="s1">'GET'</span><span class="p">:</span> + <span class="c1"># query will be cached by wikidata</span> + <span class="n">http_response</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="n">SPARQL_ENDPOINT_URL</span> <span class="o">+</span> <span class="s1">'?'</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">({</span><span class="s1">'query'</span><span class="p">:</span> <span class="n">query</span><span class="p">}),</span> <span class="n">headers</span><span class="o">=</span><span class="n">get_headers</span><span class="p">())</span> + <span class="k">else</span><span class="p">:</span> + <span class="c1"># query won't be cached by wikidata</span> + <span class="n">http_response</span> <span class="o">=</span> <span class="n">post</span><span class="p">(</span><span class="n">SPARQL_ENDPOINT_URL</span><span class="p">,</span> <span class="n">data</span><span class="o">=</span><span class="p">{</span><span class="s1">'query'</span><span class="p">:</span> <span class="n">query</span><span class="p">},</span> <span class="n">headers</span><span class="o">=</span><span class="n">get_headers</span><span class="p">())</span> + <span class="k">if</span> <span class="n">http_response</span><span class="o">.</span><span class="n">status_code</span> <span class="o">!=</span> <span class="mi">200</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'SPARQL endpoint error </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="n">http_response</span><span class="o">.</span><span class="n">content</span><span class="o">.</span><span class="n">decode</span><span class="p">())</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'request time </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="nb">str</span><span class="p">(</span><span class="n">http_response</span><span class="o">.</span><span class="n">elapsed</span><span class="p">))</span> + <span class="n">http_response</span><span class="o">.</span><span class="n">raise_for_status</span><span class="p">()</span> + <span class="k">return</span> <span class="n">loads</span><span class="p">(</span><span class="n">http_response</span><span class="o">.</span><span class="n">content</span><span class="o">.</span><span class="n">decode</span><span class="p">())</span> + + +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> + + <span class="n">eng_tag</span><span class="p">,</span> <span class="n">_wiki_netloc</span> <span class="o">=</span> <span class="n">get_wiki_params</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="n">traits</span><span class="p">)</span> + <span class="n">query</span><span class="p">,</span> <span class="n">attributes</span> <span class="o">=</span> <span class="n">get_query</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">eng_tag</span><span class="p">)</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"request --> language </span><span class="si">%s</span><span class="s2"> // len(attributes): </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">eng_tag</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">attributes</span><span class="p">))</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'method'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'POST'</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">SPARQL_ENDPOINT_URL</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'query'</span><span class="p">:</span> <span class="n">query</span><span class="p">}</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">]</span> <span class="o">=</span> <span class="n">get_headers</span><span class="p">()</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'language'</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_tag</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'attributes'</span><span class="p">]</span> <span class="o">=</span> <span class="n">attributes</span> + + <span class="k">return</span> <span class="n">params</span> + + +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> + + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">jsonresponse</span> <span class="o">=</span> <span class="n">loads</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">content</span><span class="o">.</span><span class="n">decode</span><span class="p">())</span> + + <span class="n">language</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">search_params</span><span class="p">[</span><span class="s1">'language'</span><span class="p">]</span> + <span class="n">attributes</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">search_params</span><span class="p">[</span><span class="s1">'attributes'</span><span class="p">]</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"request --> language </span><span class="si">%s</span><span class="s2"> // len(attributes): </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">language</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">attributes</span><span class="p">))</span> + + <span class="n">seen_entities</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span> + <span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">jsonresponse</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'results'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'bindings'</span><span class="p">,</span> <span class="p">[]):</span> + <span class="n">attribute_result</span> <span class="o">=</span> <span class="p">{</span><span class="n">key</span><span class="p">:</span> <span class="n">value</span><span class="p">[</span><span class="s1">'value'</span><span class="p">]</span> <span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">result</span><span class="o">.</span><span class="n">items</span><span class="p">()}</span> + <span class="n">entity_url</span> <span class="o">=</span> <span class="n">attribute_result</span><span class="p">[</span><span class="s1">'item'</span><span class="p">]</span> + <span class="k">if</span> <span class="n">entity_url</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">seen_entities</span> <span class="ow">and</span> <span class="n">entity_url</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">DUMMY_ENTITY_URLS</span><span class="p">:</span> + <span class="n">seen_entities</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">entity_url</span><span class="p">)</span> + <span class="n">results</span> <span class="o">+=</span> <span class="n">get_results</span><span class="p">(</span><span class="n">attribute_result</span><span class="p">,</span> <span class="n">attributes</span><span class="p">,</span> <span class="n">language</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'The SPARQL request returns duplicate entities: </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="nb">str</span><span class="p">(</span><span class="n">attribute_result</span><span class="p">))</span> + + <span class="k">return</span> <span class="n">results</span> + + +<span class="n">_IMG_SRC_DEFAULT_URL_PREFIX</span> <span class="o">=</span> <span class="s2">"https://commons.wikimedia.org/wiki/Special:FilePath/"</span> +<span class="n">_IMG_SRC_NEW_URL_PREFIX</span> <span class="o">=</span> <span class="s2">"https://upload.wikimedia.org/wikipedia/commons/thumb/"</span> + + +<div class="viewcode-block" id="get_thumbnail"> +<a class="viewcode-back" href="../../../dev/engines/online/wikipedia.html#searx.engines.wikidata.get_thumbnail">[docs]</a> +<span class="k">def</span> <span class="nf">get_thumbnail</span><span class="p">(</span><span class="n">img_src</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Get Thumbnail image from wikimedia commons</span> + +<span class="sd"> Images from commons.wikimedia.org are (HTTP) redirected to</span> +<span class="sd"> upload.wikimedia.org. The redirected URL can be calculated by this</span> +<span class="sd"> function.</span> + +<span class="sd"> - https://stackoverflow.com/a/33691240</span> + +<span class="sd"> """</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'get_thumbnail(): </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="n">img_src</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">img_src</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">_IMG_SRC_DEFAULT_URL_PREFIX</span> <span class="ow">in</span> <span class="n">img_src</span><span class="o">.</span><span class="n">split</span><span class="p">()[</span><span class="mi">0</span><span class="p">]:</span> + <span class="n">img_src_name</span> <span class="o">=</span> <span class="n">unquote</span><span class="p">(</span><span class="n">img_src</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">_IMG_SRC_DEFAULT_URL_PREFIX</span><span class="p">,</span> <span class="s2">""</span><span class="p">)</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">"?"</span><span class="p">,</span> <span class="mi">1</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">"%20"</span><span class="p">,</span> <span class="s2">"_"</span><span class="p">))</span> + <span class="n">img_src_name_first</span> <span class="o">=</span> <span class="n">img_src_name</span> + <span class="n">img_src_name_second</span> <span class="o">=</span> <span class="n">img_src_name</span> + + <span class="k">if</span> <span class="s2">".svg"</span> <span class="ow">in</span> <span class="n">img_src_name</span><span class="o">.</span><span class="n">split</span><span class="p">()[</span><span class="mi">0</span><span class="p">]:</span> + <span class="n">img_src_name_second</span> <span class="o">=</span> <span class="n">img_src_name</span> <span class="o">+</span> <span class="s2">".png"</span> + + <span class="n">img_src_size</span> <span class="o">=</span> <span class="n">img_src</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">_IMG_SRC_DEFAULT_URL_PREFIX</span><span class="p">,</span> <span class="s2">""</span><span class="p">)</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">"?"</span><span class="p">,</span> <span class="mi">1</span><span class="p">)[</span><span class="mi">1</span><span class="p">]</span> + <span class="n">img_src_size</span> <span class="o">=</span> <span class="n">img_src_size</span><span class="p">[</span><span class="n">img_src_size</span><span class="o">.</span><span class="n">index</span><span class="p">(</span><span class="s2">"="</span><span class="p">)</span> <span class="o">+</span> <span class="mi">1</span> <span class="p">:</span> <span class="n">img_src_size</span><span class="o">.</span><span class="n">index</span><span class="p">(</span><span class="s2">"&"</span><span class="p">)]</span> + <span class="n">img_src_name_md5</span> <span class="o">=</span> <span class="n">md5</span><span class="p">(</span><span class="n">img_src_name</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s2">"utf-8"</span><span class="p">))</span><span class="o">.</span><span class="n">hexdigest</span><span class="p">()</span> + <span class="n">img_src</span> <span class="o">=</span> <span class="p">(</span> + <span class="n">_IMG_SRC_NEW_URL_PREFIX</span> + <span class="o">+</span> <span class="n">img_src_name_md5</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> + <span class="o">+</span> <span class="s2">"/"</span> + <span class="o">+</span> <span class="n">img_src_name_md5</span><span class="p">[</span><span class="mi">0</span><span class="p">:</span><span class="mi">2</span><span class="p">]</span> + <span class="o">+</span> <span class="s2">"/"</span> + <span class="o">+</span> <span class="n">img_src_name_first</span> + <span class="o">+</span> <span class="s2">"/"</span> + <span class="o">+</span> <span class="n">img_src_size</span> + <span class="o">+</span> <span class="s2">"px-"</span> + <span class="o">+</span> <span class="n">img_src_name_second</span> + <span class="p">)</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'get_thumbnail() redirected: </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="n">img_src</span><span class="p">)</span> + + <span class="k">return</span> <span class="n">img_src</span></div> + + + +<span class="k">def</span> <span class="nf">get_results</span><span class="p">(</span><span class="n">attribute_result</span><span class="p">,</span> <span class="n">attributes</span><span class="p">,</span> <span class="n">language</span><span class="p">):</span> + <span class="c1"># pylint: disable=too-many-branches</span> + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">infobox_title</span> <span class="o">=</span> <span class="n">attribute_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'itemLabel'</span><span class="p">)</span> + <span class="n">infobox_id</span> <span class="o">=</span> <span class="n">attribute_result</span><span class="p">[</span><span class="s1">'item'</span><span class="p">]</span> + <span class="n">infobox_id_lang</span> <span class="o">=</span> <span class="kc">None</span> + <span class="n">infobox_urls</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">infobox_attributes</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">infobox_content</span> <span class="o">=</span> <span class="n">attribute_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'itemDescription'</span><span class="p">,</span> <span class="p">[])</span> + <span class="n">img_src</span> <span class="o">=</span> <span class="kc">None</span> + <span class="n">img_src_priority</span> <span class="o">=</span> <span class="mi">0</span> + + <span class="k">for</span> <span class="n">attribute</span> <span class="ow">in</span> <span class="n">attributes</span><span class="p">:</span> + <span class="n">value</span> <span class="o">=</span> <span class="n">attribute</span><span class="o">.</span><span class="n">get_str</span><span class="p">(</span><span class="n">attribute_result</span><span class="p">,</span> <span class="n">language</span><span class="p">)</span> + <span class="k">if</span> <span class="n">value</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">value</span> <span class="o">!=</span> <span class="s1">''</span><span class="p">:</span> + <span class="n">attribute_type</span> <span class="o">=</span> <span class="nb">type</span><span class="p">(</span><span class="n">attribute</span><span class="p">)</span> + + <span class="k">if</span> <span class="n">attribute_type</span> <span class="ow">in</span> <span class="p">(</span><span class="n">WDURLAttribute</span><span class="p">,</span> <span class="n">WDArticle</span><span class="p">):</span> + <span class="c1"># get_select() method : there is group_concat(distinct ...;separator=", ")</span> + <span class="c1"># split the value here</span> + <span class="k">for</span> <span class="n">url</span> <span class="ow">in</span> <span class="n">value</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">', '</span><span class="p">):</span> + <span class="n">infobox_urls</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'title'</span><span class="p">:</span> <span class="n">attribute</span><span class="o">.</span><span class="n">get_label</span><span class="p">(</span><span class="n">language</span><span class="p">),</span> <span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span> <span class="o">**</span><span class="n">attribute</span><span class="o">.</span><span class="n">kwargs</span><span class="p">})</span> + <span class="c1"># "normal" results (not infobox) include official website and Wikipedia links.</span> + <span class="k">if</span> <span class="s2">"list"</span> <span class="ow">in</span> <span class="n">display_type</span> <span class="ow">and</span> <span class="p">(</span><span class="n">attribute</span><span class="o">.</span><span class="n">kwargs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'official'</span><span class="p">)</span> <span class="ow">or</span> <span class="n">attribute_type</span> <span class="o">==</span> <span class="n">WDArticle</span><span class="p">):</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'title'</span><span class="p">:</span> <span class="n">infobox_title</span><span class="p">,</span> <span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span> <span class="s2">"content"</span><span class="p">:</span> <span class="n">infobox_content</span><span class="p">})</span> + + <span class="c1"># update the infobox_id with the wikipedia URL</span> + <span class="c1"># first the local wikipedia URL, and as fallback the english wikipedia URL</span> + <span class="k">if</span> <span class="n">attribute_type</span> <span class="o">==</span> <span class="n">WDArticle</span> <span class="ow">and</span> <span class="p">(</span> + <span class="p">(</span><span class="n">attribute</span><span class="o">.</span><span class="n">language</span> <span class="o">==</span> <span class="s1">'en'</span> <span class="ow">and</span> <span class="n">infobox_id_lang</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">or</span> <span class="n">attribute</span><span class="o">.</span><span class="n">language</span> <span class="o">!=</span> <span class="s1">'en'</span> + <span class="p">):</span> + <span class="n">infobox_id_lang</span> <span class="o">=</span> <span class="n">attribute</span><span class="o">.</span><span class="n">language</span> + <span class="n">infobox_id</span> <span class="o">=</span> <span class="n">url</span> + <span class="k">elif</span> <span class="n">attribute_type</span> <span class="o">==</span> <span class="n">WDImageAttribute</span><span class="p">:</span> + <span class="c1"># this attribute is an image.</span> + <span class="c1"># replace the current image only the priority is lower</span> + <span class="c1"># (the infobox contain only one image).</span> + <span class="k">if</span> <span class="n">attribute</span><span class="o">.</span><span class="n">priority</span> <span class="o">></span> <span class="n">img_src_priority</span><span class="p">:</span> + <span class="n">img_src</span> <span class="o">=</span> <span class="n">get_thumbnail</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> + <span class="n">img_src_priority</span> <span class="o">=</span> <span class="n">attribute</span><span class="o">.</span><span class="n">priority</span> + <span class="k">elif</span> <span class="n">attribute_type</span> <span class="o">==</span> <span class="n">WDGeoAttribute</span><span class="p">:</span> + <span class="c1"># geocoordinate link</span> + <span class="c1"># use the area to get the OSM zoom</span> + <span class="c1"># Note: ignore the unit (must be km² otherwise the calculation is wrong)</span> + <span class="c1"># Should use normalized value p:P2046/psn:P2046/wikibase:quantityAmount</span> + <span class="n">area</span> <span class="o">=</span> <span class="n">attribute_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'P2046'</span><span class="p">)</span> + <span class="n">osm_zoom</span> <span class="o">=</span> <span class="n">area_to_osm_zoom</span><span class="p">(</span><span class="n">area</span><span class="p">)</span> <span class="k">if</span> <span class="n">area</span> <span class="k">else</span> <span class="mi">19</span> + <span class="n">url</span> <span class="o">=</span> <span class="n">attribute</span><span class="o">.</span><span class="n">get_geo_url</span><span class="p">(</span><span class="n">attribute_result</span><span class="p">,</span> <span class="n">osm_zoom</span><span class="o">=</span><span class="n">osm_zoom</span><span class="p">)</span> + <span class="k">if</span> <span class="n">url</span><span class="p">:</span> + <span class="n">infobox_urls</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'title'</span><span class="p">:</span> <span class="n">attribute</span><span class="o">.</span><span class="n">get_label</span><span class="p">(</span><span class="n">language</span><span class="p">),</span> <span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span> <span class="s1">'entity'</span><span class="p">:</span> <span class="n">attribute</span><span class="o">.</span><span class="n">name</span><span class="p">})</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">infobox_attributes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">{</span><span class="s1">'label'</span><span class="p">:</span> <span class="n">attribute</span><span class="o">.</span><span class="n">get_label</span><span class="p">(</span><span class="n">language</span><span class="p">),</span> <span class="s1">'value'</span><span class="p">:</span> <span class="n">value</span><span class="p">,</span> <span class="s1">'entity'</span><span class="p">:</span> <span class="n">attribute</span><span class="o">.</span><span class="n">name</span><span class="p">}</span> + <span class="p">)</span> + + <span class="k">if</span> <span class="n">infobox_id</span><span class="p">:</span> + <span class="n">infobox_id</span> <span class="o">=</span> <span class="n">replace_http_by_https</span><span class="p">(</span><span class="n">infobox_id</span><span class="p">)</span> + + <span class="c1"># add the wikidata URL at the end</span> + <span class="n">infobox_urls</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'title'</span><span class="p">:</span> <span class="s1">'Wikidata'</span><span class="p">,</span> <span class="s1">'url'</span><span class="p">:</span> <span class="n">attribute_result</span><span class="p">[</span><span class="s1">'item'</span><span class="p">]})</span> + + <span class="k">if</span> <span class="p">(</span> + <span class="s2">"list"</span> <span class="ow">in</span> <span class="n">display_type</span> + <span class="ow">and</span> <span class="n">img_src</span> <span class="ow">is</span> <span class="kc">None</span> + <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">infobox_attributes</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span> + <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">infobox_urls</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span> + <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">infobox_content</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span> + <span class="p">):</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'url'</span><span class="p">:</span> <span class="n">infobox_urls</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="s1">'url'</span><span class="p">],</span> <span class="s1">'title'</span><span class="p">:</span> <span class="n">infobox_title</span><span class="p">,</span> <span class="s1">'content'</span><span class="p">:</span> <span class="n">infobox_content</span><span class="p">})</span> + <span class="k">elif</span> <span class="s2">"infobox"</span> <span class="ow">in</span> <span class="n">display_type</span><span class="p">:</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'infobox'</span><span class="p">:</span> <span class="n">infobox_title</span><span class="p">,</span> + <span class="s1">'id'</span><span class="p">:</span> <span class="n">infobox_id</span><span class="p">,</span> + <span class="s1">'content'</span><span class="p">:</span> <span class="n">infobox_content</span><span class="p">,</span> + <span class="s1">'img_src'</span><span class="p">:</span> <span class="n">img_src</span><span class="p">,</span> + <span class="s1">'urls'</span><span class="p">:</span> <span class="n">infobox_urls</span><span class="p">,</span> + <span class="s1">'attributes'</span><span class="p">:</span> <span class="n">infobox_attributes</span><span class="p">,</span> + <span class="p">}</span> + <span class="p">)</span> + <span class="k">return</span> <span class="n">results</span> + + +<span class="k">def</span> <span class="nf">get_query</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">language</span><span class="p">):</span> + <span class="n">attributes</span> <span class="o">=</span> <span class="n">get_attributes</span><span class="p">(</span><span class="n">language</span><span class="p">)</span> + <span class="n">select</span> <span class="o">=</span> <span class="p">[</span><span class="n">a</span><span class="o">.</span><span class="n">get_select</span><span class="p">()</span> <span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="n">attributes</span><span class="p">]</span> + <span class="n">where</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">filter</span><span class="p">(</span><span class="k">lambda</span> <span class="n">s</span><span class="p">:</span> <span class="nb">len</span><span class="p">(</span><span class="n">s</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">,</span> <span class="p">[</span><span class="n">a</span><span class="o">.</span><span class="n">get_where</span><span class="p">()</span> <span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="n">attributes</span><span class="p">]))</span> + <span class="n">wikibase_label</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">filter</span><span class="p">(</span><span class="k">lambda</span> <span class="n">s</span><span class="p">:</span> <span class="nb">len</span><span class="p">(</span><span class="n">s</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">,</span> <span class="p">[</span><span class="n">a</span><span class="o">.</span><span class="n">get_wikibase_label</span><span class="p">()</span> <span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="n">attributes</span><span class="p">]))</span> + <span class="n">group_by</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">filter</span><span class="p">(</span><span class="k">lambda</span> <span class="n">s</span><span class="p">:</span> <span class="nb">len</span><span class="p">(</span><span class="n">s</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">,</span> <span class="p">[</span><span class="n">a</span><span class="o">.</span><span class="n">get_group_by</span><span class="p">()</span> <span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="n">attributes</span><span class="p">]))</span> + <span class="n">query</span> <span class="o">=</span> <span class="p">(</span> + <span class="n">QUERY_TEMPLATE</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'%QUERY%'</span><span class="p">,</span> <span class="n">sparql_string_escape</span><span class="p">(</span><span class="n">query</span><span class="p">))</span> + <span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'%SELECT%'</span><span class="p">,</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">select</span><span class="p">))</span> + <span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'%WHERE%'</span><span class="p">,</span> <span class="s1">'</span><span class="se">\n</span><span class="s1"> '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">where</span><span class="p">))</span> + <span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'%WIKIBASE_LABELS%'</span><span class="p">,</span> <span class="s1">'</span><span class="se">\n</span><span class="s1"> '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">wikibase_label</span><span class="p">))</span> + <span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">%G</span><span class="s1">ROUP_BY%'</span><span class="p">,</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">group_by</span><span class="p">))</span> + <span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'%LANGUAGE%'</span><span class="p">,</span> <span class="n">language</span><span class="p">)</span> + <span class="p">)</span> + <span class="k">return</span> <span class="n">query</span><span class="p">,</span> <span class="n">attributes</span> + + +<span class="k">def</span> <span class="nf">get_attributes</span><span class="p">(</span><span class="n">language</span><span class="p">):</span> + <span class="c1"># pylint: disable=too-many-statements</span> + <span class="n">attributes</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="k">def</span> <span class="nf">add_value</span><span class="p">(</span><span class="n">name</span><span class="p">):</span> + <span class="n">attributes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">WDAttribute</span><span class="p">(</span><span class="n">name</span><span class="p">))</span> + + <span class="k">def</span> <span class="nf">add_amount</span><span class="p">(</span><span class="n">name</span><span class="p">):</span> + <span class="n">attributes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">WDAmountAttribute</span><span class="p">(</span><span class="n">name</span><span class="p">))</span> + + <span class="k">def</span> <span class="nf">add_label</span><span class="p">(</span><span class="n">name</span><span class="p">):</span> + <span class="n">attributes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">WDLabelAttribute</span><span class="p">(</span><span class="n">name</span><span class="p">))</span> + + <span class="k">def</span> <span class="nf">add_url</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span> + <span class="n">attributes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">WDURLAttribute</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">url_id</span><span class="p">,</span> <span class="n">kwargs</span><span class="p">))</span> + + <span class="k">def</span> <span class="nf">add_image</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">priority</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span> + <span class="n">attributes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">WDImageAttribute</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">url_id</span><span class="p">,</span> <span class="n">priority</span><span class="p">))</span> + + <span class="k">def</span> <span class="nf">add_date</span><span class="p">(</span><span class="n">name</span><span class="p">):</span> + <span class="n">attributes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">WDDateAttribute</span><span class="p">(</span><span class="n">name</span><span class="p">))</span> + + <span class="c1"># Dates</span> + <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="p">[</span> + <span class="s1">'P571'</span><span class="p">,</span> <span class="c1"># inception date</span> + <span class="s1">'P576'</span><span class="p">,</span> <span class="c1"># dissolution date</span> + <span class="s1">'P580'</span><span class="p">,</span> <span class="c1"># start date</span> + <span class="s1">'P582'</span><span class="p">,</span> <span class="c1"># end date</span> + <span class="s1">'P569'</span><span class="p">,</span> <span class="c1"># date of birth</span> + <span class="s1">'P570'</span><span class="p">,</span> <span class="c1"># date of death</span> + <span class="s1">'P619'</span><span class="p">,</span> <span class="c1"># date of spacecraft launch</span> + <span class="s1">'P620'</span><span class="p">,</span> + <span class="p">]:</span> <span class="c1"># date of spacecraft landing</span> + <span class="n">add_date</span><span class="p">(</span><span class="n">p</span><span class="p">)</span> + + <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="p">[</span> + <span class="s1">'P27'</span><span class="p">,</span> <span class="c1"># country of citizenship</span> + <span class="s1">'P495'</span><span class="p">,</span> <span class="c1"># country of origin</span> + <span class="s1">'P17'</span><span class="p">,</span> <span class="c1"># country</span> + <span class="s1">'P159'</span><span class="p">,</span> + <span class="p">]:</span> <span class="c1"># headquarters location</span> + <span class="n">add_label</span><span class="p">(</span><span class="n">p</span><span class="p">)</span> + + <span class="c1"># Places</span> + <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="p">[</span> + <span class="s1">'P36'</span><span class="p">,</span> <span class="c1"># capital</span> + <span class="s1">'P35'</span><span class="p">,</span> <span class="c1"># head of state</span> + <span class="s1">'P6'</span><span class="p">,</span> <span class="c1"># head of government</span> + <span class="s1">'P122'</span><span class="p">,</span> <span class="c1"># basic form of government</span> + <span class="s1">'P37'</span><span class="p">,</span> + <span class="p">]:</span> <span class="c1"># official language</span> + <span class="n">add_label</span><span class="p">(</span><span class="n">p</span><span class="p">)</span> + + <span class="n">add_value</span><span class="p">(</span><span class="s1">'P1082'</span><span class="p">)</span> <span class="c1"># population</span> + <span class="n">add_amount</span><span class="p">(</span><span class="s1">'P2046'</span><span class="p">)</span> <span class="c1"># area</span> + <span class="n">add_amount</span><span class="p">(</span><span class="s1">'P281'</span><span class="p">)</span> <span class="c1"># postal code</span> + <span class="n">add_label</span><span class="p">(</span><span class="s1">'P38'</span><span class="p">)</span> <span class="c1"># currency</span> + <span class="n">add_amount</span><span class="p">(</span><span class="s1">'P2048'</span><span class="p">)</span> <span class="c1"># height (building)</span> + + <span class="c1"># Media</span> + <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="p">[</span> + <span class="s1">'P400'</span><span class="p">,</span> <span class="c1"># platform (videogames, computing)</span> + <span class="s1">'P50'</span><span class="p">,</span> <span class="c1"># author</span> + <span class="s1">'P170'</span><span class="p">,</span> <span class="c1"># creator</span> + <span class="s1">'P57'</span><span class="p">,</span> <span class="c1"># director</span> + <span class="s1">'P175'</span><span class="p">,</span> <span class="c1"># performer</span> + <span class="s1">'P178'</span><span class="p">,</span> <span class="c1"># developer</span> + <span class="s1">'P162'</span><span class="p">,</span> <span class="c1"># producer</span> + <span class="s1">'P176'</span><span class="p">,</span> <span class="c1"># manufacturer</span> + <span class="s1">'P58'</span><span class="p">,</span> <span class="c1"># screenwriter</span> + <span class="s1">'P272'</span><span class="p">,</span> <span class="c1"># production company</span> + <span class="s1">'P264'</span><span class="p">,</span> <span class="c1"># record label</span> + <span class="s1">'P123'</span><span class="p">,</span> <span class="c1"># publisher</span> + <span class="s1">'P449'</span><span class="p">,</span> <span class="c1"># original network</span> + <span class="s1">'P750'</span><span class="p">,</span> <span class="c1"># distributed by</span> + <span class="s1">'P86'</span><span class="p">,</span> + <span class="p">]:</span> <span class="c1"># composer</span> + <span class="n">add_label</span><span class="p">(</span><span class="n">p</span><span class="p">)</span> + + <span class="n">add_date</span><span class="p">(</span><span class="s1">'P577'</span><span class="p">)</span> <span class="c1"># publication date</span> + <span class="n">add_label</span><span class="p">(</span><span class="s1">'P136'</span><span class="p">)</span> <span class="c1"># genre (music, film, artistic...)</span> + <span class="n">add_label</span><span class="p">(</span><span class="s1">'P364'</span><span class="p">)</span> <span class="c1"># original language</span> + <span class="n">add_value</span><span class="p">(</span><span class="s1">'P212'</span><span class="p">)</span> <span class="c1"># ISBN-13</span> + <span class="n">add_value</span><span class="p">(</span><span class="s1">'P957'</span><span class="p">)</span> <span class="c1"># ISBN-10</span> + <span class="n">add_label</span><span class="p">(</span><span class="s1">'P275'</span><span class="p">)</span> <span class="c1"># copyright license</span> + <span class="n">add_label</span><span class="p">(</span><span class="s1">'P277'</span><span class="p">)</span> <span class="c1"># programming language</span> + <span class="n">add_value</span><span class="p">(</span><span class="s1">'P348'</span><span class="p">)</span> <span class="c1"># version</span> + <span class="n">add_label</span><span class="p">(</span><span class="s1">'P840'</span><span class="p">)</span> <span class="c1"># narrative location</span> + + <span class="c1"># Languages</span> + <span class="n">add_value</span><span class="p">(</span><span class="s1">'P1098'</span><span class="p">)</span> <span class="c1"># number of speakers</span> + <span class="n">add_label</span><span class="p">(</span><span class="s1">'P282'</span><span class="p">)</span> <span class="c1"># writing system</span> + <span class="n">add_label</span><span class="p">(</span><span class="s1">'P1018'</span><span class="p">)</span> <span class="c1"># language regulatory body</span> + <span class="n">add_value</span><span class="p">(</span><span class="s1">'P218'</span><span class="p">)</span> <span class="c1"># language code (ISO 639-1)</span> + + <span class="c1"># Other</span> + <span class="n">add_label</span><span class="p">(</span><span class="s1">'P169'</span><span class="p">)</span> <span class="c1"># ceo</span> + <span class="n">add_label</span><span class="p">(</span><span class="s1">'P112'</span><span class="p">)</span> <span class="c1"># founded by</span> + <span class="n">add_label</span><span class="p">(</span><span class="s1">'P1454'</span><span class="p">)</span> <span class="c1"># legal form (company, organization)</span> + <span class="n">add_label</span><span class="p">(</span><span class="s1">'P137'</span><span class="p">)</span> <span class="c1"># operator (service, facility, ...)</span> + <span class="n">add_label</span><span class="p">(</span><span class="s1">'P1029'</span><span class="p">)</span> <span class="c1"># crew members (tripulation)</span> + <span class="n">add_label</span><span class="p">(</span><span class="s1">'P225'</span><span class="p">)</span> <span class="c1"># taxon name</span> + <span class="n">add_value</span><span class="p">(</span><span class="s1">'P274'</span><span class="p">)</span> <span class="c1"># chemical formula</span> + <span class="n">add_label</span><span class="p">(</span><span class="s1">'P1346'</span><span class="p">)</span> <span class="c1"># winner (sports, contests, ...)</span> + <span class="n">add_value</span><span class="p">(</span><span class="s1">'P1120'</span><span class="p">)</span> <span class="c1"># number of deaths</span> + <span class="n">add_value</span><span class="p">(</span><span class="s1">'P498'</span><span class="p">)</span> <span class="c1"># currency code (ISO 4217)</span> + + <span class="c1"># URL</span> + <span class="n">add_url</span><span class="p">(</span><span class="s1">'P856'</span><span class="p">,</span> <span class="n">official</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> <span class="c1"># official website</span> + <span class="n">attributes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">WDArticle</span><span class="p">(</span><span class="n">language</span><span class="p">))</span> <span class="c1"># wikipedia (user language)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">language</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'en'</span><span class="p">):</span> + <span class="n">attributes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">WDArticle</span><span class="p">(</span><span class="s1">'en'</span><span class="p">))</span> <span class="c1"># wikipedia (english)</span> + + <span class="n">add_url</span><span class="p">(</span><span class="s1">'P1324'</span><span class="p">)</span> <span class="c1"># source code repository</span> + <span class="n">add_url</span><span class="p">(</span><span class="s1">'P1581'</span><span class="p">)</span> <span class="c1"># blog</span> + <span class="n">add_url</span><span class="p">(</span><span class="s1">'P434'</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'musicbrainz_artist'</span><span class="p">)</span> + <span class="n">add_url</span><span class="p">(</span><span class="s1">'P435'</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'musicbrainz_work'</span><span class="p">)</span> + <span class="n">add_url</span><span class="p">(</span><span class="s1">'P436'</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'musicbrainz_release_group'</span><span class="p">)</span> + <span class="n">add_url</span><span class="p">(</span><span class="s1">'P966'</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'musicbrainz_label'</span><span class="p">)</span> + <span class="n">add_url</span><span class="p">(</span><span class="s1">'P345'</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'imdb_id'</span><span class="p">)</span> + <span class="n">add_url</span><span class="p">(</span><span class="s1">'P2397'</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'youtube_channel'</span><span class="p">)</span> + <span class="n">add_url</span><span class="p">(</span><span class="s1">'P1651'</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'youtube_video'</span><span class="p">)</span> + <span class="n">add_url</span><span class="p">(</span><span class="s1">'P2002'</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'twitter_profile'</span><span class="p">)</span> + <span class="n">add_url</span><span class="p">(</span><span class="s1">'P2013'</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'facebook_profile'</span><span class="p">)</span> + <span class="n">add_url</span><span class="p">(</span><span class="s1">'P2003'</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'instagram_profile'</span><span class="p">)</span> + + <span class="c1"># Map</span> + <span class="n">attributes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">WDGeoAttribute</span><span class="p">(</span><span class="s1">'P625'</span><span class="p">))</span> + + <span class="c1"># Image</span> + <span class="n">add_image</span><span class="p">(</span><span class="s1">'P15'</span><span class="p">,</span> <span class="n">priority</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'wikimedia_image'</span><span class="p">)</span> <span class="c1"># route map</span> + <span class="n">add_image</span><span class="p">(</span><span class="s1">'P242'</span><span class="p">,</span> <span class="n">priority</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'wikimedia_image'</span><span class="p">)</span> <span class="c1"># locator map</span> + <span class="n">add_image</span><span class="p">(</span><span class="s1">'P154'</span><span class="p">,</span> <span class="n">priority</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'wikimedia_image'</span><span class="p">)</span> <span class="c1"># logo</span> + <span class="n">add_image</span><span class="p">(</span><span class="s1">'P18'</span><span class="p">,</span> <span class="n">priority</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'wikimedia_image'</span><span class="p">)</span> <span class="c1"># image</span> + <span class="n">add_image</span><span class="p">(</span><span class="s1">'P41'</span><span class="p">,</span> <span class="n">priority</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'wikimedia_image'</span><span class="p">)</span> <span class="c1"># flag</span> + <span class="n">add_image</span><span class="p">(</span><span class="s1">'P2716'</span><span class="p">,</span> <span class="n">priority</span><span class="o">=</span><span class="mi">6</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'wikimedia_image'</span><span class="p">)</span> <span class="c1"># collage</span> + <span class="n">add_image</span><span class="p">(</span><span class="s1">'P2910'</span><span class="p">,</span> <span class="n">priority</span><span class="o">=</span><span class="mi">7</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'wikimedia_image'</span><span class="p">)</span> <span class="c1"># icon</span> + + <span class="k">return</span> <span class="n">attributes</span> + + +<span class="k">class</span> <span class="nc">WDAttribute</span><span class="p">:</span> + <span class="vm">__slots__</span> <span class="o">=</span> <span class="p">(</span><span class="s1">'name'</span><span class="p">,)</span> + + <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">):</span> + <span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">=</span> <span class="n">name</span> + + <span class="k">def</span> <span class="nf">get_select</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="s1">'(group_concat(distinct ?</span><span class="si">{name}</span><span class="s1">;separator=", ") as ?</span><span class="si">{name}</span><span class="s1">s)'</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">{name}</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span> + + <span class="k">def</span> <span class="nf">get_label</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">language</span><span class="p">):</span> + <span class="k">return</span> <span class="n">get_label_for_entity</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">language</span><span class="p">)</span> + + <span class="k">def</span> <span class="nf">get_where</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="s2">"OPTIONAL { ?item wdt:</span><span class="si">{name}</span><span class="s2"> ?</span><span class="si">{name}</span><span class="s2"> . }"</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">{name}</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span> + + <span class="k">def</span> <span class="nf">get_wikibase_label</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="s2">""</span> + + <span class="k">def</span> <span class="nf">get_group_by</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="s2">""</span> + + <span class="k">def</span> <span class="nf">get_str</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">result</span><span class="p">,</span> <span class="n">language</span><span class="p">):</span> <span class="c1"># pylint: disable=unused-argument</span> + <span class="k">return</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">+</span> <span class="s1">'s'</span><span class="p">)</span> + + <span class="k">def</span> <span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="s1">'<'</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">)</span> <span class="o">+</span> <span class="s1">':'</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">+</span> <span class="s1">'>'</span> + + +<span class="k">class</span> <span class="nc">WDAmountAttribute</span><span class="p">(</span><span class="n">WDAttribute</span><span class="p">):</span> + <span class="k">def</span> <span class="nf">get_select</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="s1">'?</span><span class="si">{name}</span><span class="s1"> ?</span><span class="si">{name}</span><span class="s1">Unit'</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">{name}</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span> + + <span class="k">def</span> <span class="nf">get_where</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="s2">""" OPTIONAL { ?item p:</span><span class="si">{name}</span><span class="s2"> ?</span><span class="si">{name}</span><span class="s2">Node .</span> +<span class="s2"> ?</span><span class="si">{name}</span><span class="s2">Node rdf:type wikibase:BestRank ; ps:</span><span class="si">{name}</span><span class="s2"> ?</span><span class="si">{name}</span><span class="s2"> .</span> +<span class="s2"> OPTIONAL { ?</span><span class="si">{name}</span><span class="s2">Node psv:</span><span class="si">{name}</span><span class="s2">/wikibase:quantityUnit ?</span><span class="si">{name}</span><span class="s2">Unit. } }"""</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span> + <span class="s1">'</span><span class="si">{name}</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span> + <span class="p">)</span> + + <span class="k">def</span> <span class="nf">get_group_by</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_select</span><span class="p">()</span> + + <span class="k">def</span> <span class="nf">get_str</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">result</span><span class="p">,</span> <span class="n">language</span><span class="p">):</span> + <span class="n">value</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span> + <span class="n">unit</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">+</span> <span class="s2">"Unit"</span><span class="p">)</span> + <span class="k">if</span> <span class="n">unit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">unit</span> <span class="o">=</span> <span class="n">unit</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'http://www.wikidata.org/entity/'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span> + <span class="k">return</span> <span class="n">value</span> <span class="o">+</span> <span class="s2">" "</span> <span class="o">+</span> <span class="n">get_label_for_entity</span><span class="p">(</span><span class="n">unit</span><span class="p">,</span> <span class="n">language</span><span class="p">)</span> + <span class="k">return</span> <span class="n">value</span> + + +<span class="k">class</span> <span class="nc">WDArticle</span><span class="p">(</span><span class="n">WDAttribute</span><span class="p">):</span> + + <span class="vm">__slots__</span> <span class="o">=</span> <span class="s1">'language'</span><span class="p">,</span> <span class="s1">'kwargs'</span> + + <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">language</span><span class="p">,</span> <span class="n">kwargs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span> + <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="s1">'wikipedia'</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">language</span> <span class="o">=</span> <span class="n">language</span> + <span class="bp">self</span><span class="o">.</span><span class="n">kwargs</span> <span class="o">=</span> <span class="n">kwargs</span> <span class="ow">or</span> <span class="p">{}</span> + + <span class="k">def</span> <span class="nf">get_label</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">language</span><span class="p">):</span> + <span class="c1"># language parameter is ignored</span> + <span class="k">return</span> <span class="s2">"Wikipedia (</span><span class="si">{language}</span><span class="s2">)"</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">{language}</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">language</span><span class="p">)</span> + + <span class="k">def</span> <span class="nf">get_select</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="s2">"?article</span><span class="si">{language}</span><span class="s2"> ?articleName</span><span class="si">{language}</span><span class="s2">"</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">{language}</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">language</span><span class="p">)</span> + + <span class="k">def</span> <span class="nf">get_where</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="s2">"""OPTIONAL { ?article</span><span class="si">{language}</span><span class="s2"> schema:about ?item ;</span> +<span class="s2"> schema:inLanguage "</span><span class="si">{language}</span><span class="s2">" ;</span> +<span class="s2"> schema:isPartOf <https://</span><span class="si">{language}</span><span class="s2">.wikipedia.org/> ;</span> +<span class="s2"> schema:name ?articleName</span><span class="si">{language}</span><span class="s2"> . }"""</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span> + <span class="s1">'</span><span class="si">{language}</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">language</span> + <span class="p">)</span> + + <span class="k">def</span> <span class="nf">get_group_by</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_select</span><span class="p">()</span> + + <span class="k">def</span> <span class="nf">get_str</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">result</span><span class="p">,</span> <span class="n">language</span><span class="p">):</span> + <span class="n">key</span> <span class="o">=</span> <span class="s1">'article</span><span class="si">{language}</span><span class="s1">'</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">{language}</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">language</span><span class="p">)</span> + <span class="k">return</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">key</span><span class="p">)</span> + + +<span class="k">class</span> <span class="nc">WDLabelAttribute</span><span class="p">(</span><span class="n">WDAttribute</span><span class="p">):</span> + <span class="k">def</span> <span class="nf">get_select</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="s1">'(group_concat(distinct ?</span><span class="si">{name}</span><span class="s1">Label;separator=", ") as ?</span><span class="si">{name}</span><span class="s1">Labels)'</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">{name}</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span> + + <span class="k">def</span> <span class="nf">get_where</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="s2">"OPTIONAL { ?item wdt:</span><span class="si">{name}</span><span class="s2"> ?</span><span class="si">{name}</span><span class="s2"> . }"</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">{name}</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span> + + <span class="k">def</span> <span class="nf">get_wikibase_label</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="s2">"?</span><span class="si">{name}</span><span class="s2"> rdfs:label ?</span><span class="si">{name}</span><span class="s2">Label ."</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">{name}</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span> + + <span class="k">def</span> <span class="nf">get_str</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">result</span><span class="p">,</span> <span class="n">language</span><span class="p">):</span> + <span class="k">return</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">+</span> <span class="s1">'Labels'</span><span class="p">)</span> + + +<span class="k">class</span> <span class="nc">WDURLAttribute</span><span class="p">(</span><span class="n">WDAttribute</span><span class="p">):</span> + + <span class="n">HTTP_WIKIMEDIA_IMAGE</span> <span class="o">=</span> <span class="s1">'http://commons.wikimedia.org/wiki/Special:FilePath/'</span> + + <span class="vm">__slots__</span> <span class="o">=</span> <span class="s1">'url_id'</span><span class="p">,</span> <span class="s1">'kwargs'</span> + + <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">kwargs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span> + <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">name</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">url_id</span> <span class="o">=</span> <span class="n">url_id</span> + <span class="bp">self</span><span class="o">.</span><span class="n">kwargs</span> <span class="o">=</span> <span class="n">kwargs</span> + + <span class="k">def</span> <span class="nf">get_str</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">result</span><span class="p">,</span> <span class="n">language</span><span class="p">):</span> + <span class="n">value</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">+</span> <span class="s1">'s'</span><span class="p">)</span> + <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">url_id</span> <span class="ow">and</span> <span class="n">value</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">value</span> <span class="o">!=</span> <span class="s1">''</span><span class="p">:</span> + <span class="n">value</span> <span class="o">=</span> <span class="n">value</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">','</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span> + <span class="n">url_id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">url_id</span> + <span class="k">if</span> <span class="n">value</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="n">WDURLAttribute</span><span class="o">.</span><span class="n">HTTP_WIKIMEDIA_IMAGE</span><span class="p">):</span> + <span class="n">value</span> <span class="o">=</span> <span class="n">value</span><span class="p">[</span><span class="nb">len</span><span class="p">(</span><span class="n">WDURLAttribute</span><span class="o">.</span><span class="n">HTTP_WIKIMEDIA_IMAGE</span><span class="p">)</span> <span class="p">:]</span> + <span class="n">url_id</span> <span class="o">=</span> <span class="s1">'wikimedia_image'</span> + <span class="k">return</span> <span class="n">get_external_url</span><span class="p">(</span><span class="n">url_id</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span> + <span class="k">return</span> <span class="n">value</span> + + +<span class="k">class</span> <span class="nc">WDGeoAttribute</span><span class="p">(</span><span class="n">WDAttribute</span><span class="p">):</span> + <span class="k">def</span> <span class="nf">get_label</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">language</span><span class="p">):</span> + <span class="k">return</span> <span class="s2">"OpenStreetMap"</span> + + <span class="k">def</span> <span class="nf">get_select</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="s2">"?</span><span class="si">{name}</span><span class="s2">Lat ?</span><span class="si">{name}</span><span class="s2">Long"</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">{name}</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span> + + <span class="k">def</span> <span class="nf">get_where</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="s2">"""OPTIONAL { ?item p:</span><span class="si">{name}</span><span class="s2">/psv:</span><span class="si">{name}</span><span class="s2"> [</span> +<span class="s2"> wikibase:geoLatitude ?</span><span class="si">{name}</span><span class="s2">Lat ;</span> +<span class="s2"> wikibase:geoLongitude ?</span><span class="si">{name}</span><span class="s2">Long ] }"""</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span> + <span class="s1">'</span><span class="si">{name}</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span> + <span class="p">)</span> + + <span class="k">def</span> <span class="nf">get_group_by</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_select</span><span class="p">()</span> + + <span class="k">def</span> <span class="nf">get_str</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">result</span><span class="p">,</span> <span class="n">language</span><span class="p">):</span> + <span class="n">latitude</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">+</span> <span class="s1">'Lat'</span><span class="p">)</span> + <span class="n">longitude</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">+</span> <span class="s1">'Long'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">latitude</span> <span class="ow">and</span> <span class="n">longitude</span><span class="p">:</span> + <span class="k">return</span> <span class="n">latitude</span> <span class="o">+</span> <span class="s1">' '</span> <span class="o">+</span> <span class="n">longitude</span> + <span class="k">return</span> <span class="kc">None</span> + + <span class="k">def</span> <span class="nf">get_geo_url</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">result</span><span class="p">,</span> <span class="n">osm_zoom</span><span class="o">=</span><span class="mi">19</span><span class="p">):</span> + <span class="n">latitude</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">+</span> <span class="s1">'Lat'</span><span class="p">)</span> + <span class="n">longitude</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">+</span> <span class="s1">'Long'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">latitude</span> <span class="ow">and</span> <span class="n">longitude</span><span class="p">:</span> + <span class="k">return</span> <span class="n">get_earth_coordinates_url</span><span class="p">(</span><span class="n">latitude</span><span class="p">,</span> <span class="n">longitude</span><span class="p">,</span> <span class="n">osm_zoom</span><span class="p">)</span> + <span class="k">return</span> <span class="kc">None</span> + + +<span class="k">class</span> <span class="nc">WDImageAttribute</span><span class="p">(</span><span class="n">WDURLAttribute</span><span class="p">):</span> + + <span class="vm">__slots__</span> <span class="o">=</span> <span class="p">(</span><span class="s1">'priority'</span><span class="p">,)</span> + + <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">priority</span><span class="o">=</span><span class="mi">100</span><span class="p">):</span> + <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">url_id</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">priority</span> <span class="o">=</span> <span class="n">priority</span> + + +<span class="k">class</span> <span class="nc">WDDateAttribute</span><span class="p">(</span><span class="n">WDAttribute</span><span class="p">):</span> + <span class="k">def</span> <span class="nf">get_select</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="s1">'?</span><span class="si">{name}</span><span class="s1"> ?</span><span class="si">{name}</span><span class="s1">timePrecision ?</span><span class="si">{name}</span><span class="s1">timeZone ?</span><span class="si">{name}</span><span class="s1">timeCalendar'</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">{name}</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span> + + <span class="k">def</span> <span class="nf">get_where</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="c1"># To remove duplicate, add</span> + <span class="c1"># FILTER NOT EXISTS { ?item p:{name}/psv:{name}/wikibase:timeValue ?{name}bis FILTER (?{name}bis < ?{name}) }</span> + <span class="c1"># this filter is too slow, so the response function ignore duplicate results</span> + <span class="c1"># (see the seen_entities variable)</span> + <span class="k">return</span> <span class="s2">"""OPTIONAL { ?item p:</span><span class="si">{name}</span><span class="s2">/psv:</span><span class="si">{name}</span><span class="s2"> [</span> +<span class="s2"> wikibase:timeValue ?</span><span class="si">{name}</span><span class="s2"> ;</span> +<span class="s2"> wikibase:timePrecision ?</span><span class="si">{name}</span><span class="s2">timePrecision ;</span> +<span class="s2"> wikibase:timeTimezone ?</span><span class="si">{name}</span><span class="s2">timeZone ;</span> +<span class="s2"> wikibase:timeCalendarModel ?</span><span class="si">{name}</span><span class="s2">timeCalendar ] . }</span> +<span class="s2"> hint:Prior hint:rangeSafe true;"""</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span> + <span class="s1">'</span><span class="si">{name}</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span> + <span class="p">)</span> + + <span class="k">def</span> <span class="nf">get_group_by</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_select</span><span class="p">()</span> + + <span class="k">def</span> <span class="nf">format_8</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">,</span> <span class="n">locale</span><span class="p">):</span> <span class="c1"># pylint: disable=unused-argument</span> + <span class="c1"># precision: less than a year</span> + <span class="k">return</span> <span class="n">value</span> + + <span class="k">def</span> <span class="nf">format_9</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">,</span> <span class="n">locale</span><span class="p">):</span> + <span class="n">year</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> + <span class="c1"># precision: year</span> + <span class="k">if</span> <span class="n">year</span> <span class="o"><</span> <span class="mi">1584</span><span class="p">:</span> + <span class="k">if</span> <span class="n">year</span> <span class="o"><</span> <span class="mi">0</span><span class="p">:</span> + <span class="k">return</span> <span class="nb">str</span><span class="p">(</span><span class="n">year</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> + <span class="k">return</span> <span class="nb">str</span><span class="p">(</span><span class="n">year</span><span class="p">)</span> + <span class="n">timestamp</span> <span class="o">=</span> <span class="n">isoparse</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> + <span class="k">return</span> <span class="n">format_date</span><span class="p">(</span><span class="n">timestamp</span><span class="p">,</span> <span class="nb">format</span><span class="o">=</span><span class="s1">'yyyy'</span><span class="p">,</span> <span class="n">locale</span><span class="o">=</span><span class="n">locale</span><span class="p">)</span> + + <span class="k">def</span> <span class="nf">format_10</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">,</span> <span class="n">locale</span><span class="p">):</span> + <span class="c1"># precision: month</span> + <span class="n">timestamp</span> <span class="o">=</span> <span class="n">isoparse</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> + <span class="k">return</span> <span class="n">format_date</span><span class="p">(</span><span class="n">timestamp</span><span class="p">,</span> <span class="nb">format</span><span class="o">=</span><span class="s1">'MMMM y'</span><span class="p">,</span> <span class="n">locale</span><span class="o">=</span><span class="n">locale</span><span class="p">)</span> + + <span class="k">def</span> <span class="nf">format_11</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">,</span> <span class="n">locale</span><span class="p">):</span> + <span class="c1"># precision: day</span> + <span class="n">timestamp</span> <span class="o">=</span> <span class="n">isoparse</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> + <span class="k">return</span> <span class="n">format_date</span><span class="p">(</span><span class="n">timestamp</span><span class="p">,</span> <span class="nb">format</span><span class="o">=</span><span class="s1">'full'</span><span class="p">,</span> <span class="n">locale</span><span class="o">=</span><span class="n">locale</span><span class="p">)</span> + + <span class="k">def</span> <span class="nf">format_13</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">,</span> <span class="n">locale</span><span class="p">):</span> + <span class="n">timestamp</span> <span class="o">=</span> <span class="n">isoparse</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> + <span class="c1"># precision: minute</span> + <span class="k">return</span> <span class="p">(</span> + <span class="n">get_datetime_format</span><span class="p">(</span><span class="nb">format</span><span class="p">,</span> <span class="n">locale</span><span class="o">=</span><span class="n">locale</span><span class="p">)</span> + <span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">"'"</span><span class="p">,</span> <span class="s2">""</span><span class="p">)</span> + <span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">{0}</span><span class="s1">'</span><span class="p">,</span> <span class="n">format_time</span><span class="p">(</span><span class="n">timestamp</span><span class="p">,</span> <span class="s1">'full'</span><span class="p">,</span> <span class="n">tzinfo</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">locale</span><span class="o">=</span><span class="n">locale</span><span class="p">))</span> + <span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">{1}</span><span class="s1">'</span><span class="p">,</span> <span class="n">format_date</span><span class="p">(</span><span class="n">timestamp</span><span class="p">,</span> <span class="s1">'short'</span><span class="p">,</span> <span class="n">locale</span><span class="o">=</span><span class="n">locale</span><span class="p">))</span> + <span class="p">)</span> + + <span class="k">def</span> <span class="nf">format_14</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">,</span> <span class="n">locale</span><span class="p">):</span> + <span class="c1"># precision: second.</span> + <span class="k">return</span> <span class="n">format_datetime</span><span class="p">(</span><span class="n">isoparse</span><span class="p">(</span><span class="n">value</span><span class="p">),</span> <span class="nb">format</span><span class="o">=</span><span class="s1">'full'</span><span class="p">,</span> <span class="n">locale</span><span class="o">=</span><span class="n">locale</span><span class="p">)</span> + + <span class="n">DATE_FORMAT</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'0'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'format_8'</span><span class="p">,</span> <span class="mi">1000000000</span><span class="p">),</span> + <span class="s1">'1'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'format_8'</span><span class="p">,</span> <span class="mi">100000000</span><span class="p">),</span> + <span class="s1">'2'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'format_8'</span><span class="p">,</span> <span class="mi">10000000</span><span class="p">),</span> + <span class="s1">'3'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'format_8'</span><span class="p">,</span> <span class="mi">1000000</span><span class="p">),</span> + <span class="s1">'4'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'format_8'</span><span class="p">,</span> <span class="mi">100000</span><span class="p">),</span> + <span class="s1">'5'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'format_8'</span><span class="p">,</span> <span class="mi">10000</span><span class="p">),</span> + <span class="s1">'6'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'format_8'</span><span class="p">,</span> <span class="mi">1000</span><span class="p">),</span> + <span class="s1">'7'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'format_8'</span><span class="p">,</span> <span class="mi">100</span><span class="p">),</span> + <span class="s1">'8'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'format_8'</span><span class="p">,</span> <span class="mi">10</span><span class="p">),</span> + <span class="s1">'9'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'format_9'</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="c1"># year</span> + <span class="s1">'10'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'format_10'</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="c1"># month</span> + <span class="s1">'11'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'format_11'</span><span class="p">,</span> <span class="mi">0</span><span class="p">),</span> <span class="c1"># day</span> + <span class="s1">'12'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'format_13'</span><span class="p">,</span> <span class="mi">0</span><span class="p">),</span> <span class="c1"># hour (not supported by babel, display minute)</span> + <span class="s1">'13'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'format_13'</span><span class="p">,</span> <span class="mi">0</span><span class="p">),</span> <span class="c1"># minute</span> + <span class="s1">'14'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'format_14'</span><span class="p">,</span> <span class="mi">0</span><span class="p">),</span> <span class="c1"># second</span> + <span class="p">}</span> + + <span class="k">def</span> <span class="nf">get_str</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">result</span><span class="p">,</span> <span class="n">language</span><span class="p">):</span> + <span class="n">value</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span> + <span class="k">if</span> <span class="n">value</span> <span class="o">==</span> <span class="s1">''</span> <span class="ow">or</span> <span class="n">value</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">None</span> + <span class="n">precision</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">+</span> <span class="s1">'timePrecision'</span><span class="p">)</span> + <span class="n">date_format</span> <span class="o">=</span> <span class="n">WDDateAttribute</span><span class="o">.</span><span class="n">DATE_FORMAT</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">precision</span><span class="p">)</span> + <span class="k">if</span> <span class="n">date_format</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">format_method</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">date_format</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> + <span class="n">precision</span> <span class="o">=</span> <span class="n">date_format</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> + <span class="k">try</span><span class="p">:</span> + <span class="k">if</span> <span class="n">precision</span> <span class="o">>=</span> <span class="mi">1</span><span class="p">:</span> + <span class="n">t</span> <span class="o">=</span> <span class="n">value</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'-'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">value</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'-'</span><span class="p">):</span> + <span class="n">value</span> <span class="o">=</span> <span class="s1">'-'</span> <span class="o">+</span> <span class="n">t</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">value</span> <span class="o">=</span> <span class="n">t</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> + <span class="k">return</span> <span class="n">format_method</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="n">language</span><span class="p">)</span> + <span class="k">except</span> <span class="ne">Exception</span><span class="p">:</span> <span class="c1"># pylint: disable=broad-except</span> + <span class="k">return</span> <span class="n">value</span> + <span class="k">return</span> <span class="n">value</span> + + +<span class="k">def</span> <span class="nf">debug_explain_wikidata_query</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="s1">'GET'</span><span class="p">):</span> + <span class="k">if</span> <span class="n">method</span> <span class="o">==</span> <span class="s1">'GET'</span><span class="p">:</span> + <span class="n">http_response</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="n">SPARQL_EXPLAIN_URL</span> <span class="o">+</span> <span class="s1">'&'</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">({</span><span class="s1">'query'</span><span class="p">:</span> <span class="n">query</span><span class="p">}),</span> <span class="n">headers</span><span class="o">=</span><span class="n">get_headers</span><span class="p">())</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">http_response</span> <span class="o">=</span> <span class="n">post</span><span class="p">(</span><span class="n">SPARQL_EXPLAIN_URL</span><span class="p">,</span> <span class="n">data</span><span class="o">=</span><span class="p">{</span><span class="s1">'query'</span><span class="p">:</span> <span class="n">query</span><span class="p">},</span> <span class="n">headers</span><span class="o">=</span><span class="n">get_headers</span><span class="p">())</span> + <span class="n">http_response</span><span class="o">.</span><span class="n">raise_for_status</span><span class="p">()</span> + <span class="k">return</span> <span class="n">http_response</span><span class="o">.</span><span class="n">content</span> + + +<span class="k">def</span> <span class="nf">init</span><span class="p">(</span><span class="n">engine_settings</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span> <span class="c1"># pylint: disable=unused-argument</span> + <span class="c1"># WIKIDATA_PROPERTIES : add unit symbols</span> + <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">WIKIDATA_UNITS</span><span class="o">.</span><span class="n">items</span><span class="p">():</span> + <span class="n">WIKIDATA_PROPERTIES</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> <span class="o">=</span> <span class="n">v</span><span class="p">[</span><span class="s1">'symbol'</span><span class="p">]</span> + + <span class="c1"># WIKIDATA_PROPERTIES : add property labels</span> + <span class="n">wikidata_property_names</span> <span class="o">=</span> <span class="p">[]</span> + <span class="k">for</span> <span class="n">attribute</span> <span class="ow">in</span> <span class="n">get_attributes</span><span class="p">(</span><span class="s1">'en'</span><span class="p">):</span> + <span class="k">if</span> <span class="nb">type</span><span class="p">(</span><span class="n">attribute</span><span class="p">)</span> <span class="ow">in</span> <span class="p">(</span><span class="n">WDAttribute</span><span class="p">,</span> <span class="n">WDAmountAttribute</span><span class="p">,</span> <span class="n">WDURLAttribute</span><span class="p">,</span> <span class="n">WDDateAttribute</span><span class="p">,</span> <span class="n">WDLabelAttribute</span><span class="p">):</span> + <span class="k">if</span> <span class="n">attribute</span><span class="o">.</span><span class="n">name</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">WIKIDATA_PROPERTIES</span><span class="p">:</span> + <span class="n">wikidata_property_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s2">"wd:"</span> <span class="o">+</span> <span class="n">attribute</span><span class="o">.</span><span class="n">name</span><span class="p">)</span> + <span class="n">query</span> <span class="o">=</span> <span class="n">QUERY_PROPERTY_NAMES</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'%ATTRIBUTES%'</span><span class="p">,</span> <span class="s2">" "</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">wikidata_property_names</span><span class="p">))</span> + <span class="n">jsonresponse</span> <span class="o">=</span> <span class="n">send_wikidata_query</span><span class="p">(</span><span class="n">query</span><span class="p">)</span> + <span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">jsonresponse</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'results'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'bindings'</span><span class="p">,</span> <span class="p">{}):</span> + <span class="n">name</span> <span class="o">=</span> <span class="n">result</span><span class="p">[</span><span class="s1">'name'</span><span class="p">][</span><span class="s1">'value'</span><span class="p">]</span> + <span class="n">lang</span> <span class="o">=</span> <span class="n">result</span><span class="p">[</span><span class="s1">'name'</span><span class="p">][</span><span class="s1">'xml:lang'</span><span class="p">]</span> + <span class="n">entity_id</span> <span class="o">=</span> <span class="n">result</span><span class="p">[</span><span class="s1">'item'</span><span class="p">][</span><span class="s1">'value'</span><span class="p">]</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'http://www.wikidata.org/entity/'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span> + <span class="n">WIKIDATA_PROPERTIES</span><span class="p">[(</span><span class="n">entity_id</span><span class="p">,</span> <span class="n">lang</span><span class="p">)]</span> <span class="o">=</span> <span class="n">name</span><span class="o">.</span><span class="n">capitalize</span><span class="p">()</span> + + +<div class="viewcode-block" id="fetch_traits"> +<a class="viewcode-back" href="../../../dev/engines/online/wikipedia.html#searx.engines.wikidata.fetch_traits">[docs]</a> +<span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Uses languages evaluated from :py:obj:`wikipedia.fetch_wikimedia_traits</span> +<span class="sd"> <searx.engines.wikipedia.fetch_wikimedia_traits>` and removes</span> + +<span class="sd"> - ``traits.custom['wiki_netloc']``: wikidata does not have net-locations for</span> +<span class="sd"> the languages and the list of all</span> + +<span class="sd"> - ``traits.custom['WIKIPEDIA_LANGUAGES']``: not used in the wikipedia engine</span> + +<span class="sd"> """</span> + + <span class="n">fetch_wikimedia_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">)</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'wiki_netloc'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'WIKIPEDIA_LANGUAGES'</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/wikipedia.html b/_modules/searx/engines/wikipedia.html new file mode 100644 index 000000000..71fc01fd0 --- /dev/null +++ b/_modules/searx/engines/wikipedia.html @@ -0,0 +1,443 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.wikipedia — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.wikipedia</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.wikipedia</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""This module implements the Wikipedia engine. Some of this implementations</span> +<span class="sd">are shared by other engines:</span> + +<span class="sd">- :ref:`wikidata engine`</span> + +<span class="sd">The list of supported languages is :py:obj:`fetched <fetch_wikimedia_traits>` from</span> +<span class="sd">the article linked by :py:obj:`list_of_wikipedias`.</span> + +<span class="sd">Unlike traditional search engines, wikipedia does not support one Wikipedia for</span> +<span class="sd">all languages, but there is one Wikipedia for each supported language. Some of</span> +<span class="sd">these Wikipedias have a LanguageConverter_ enabled</span> +<span class="sd">(:py:obj:`rest_v1_summary_url`).</span> + +<span class="sd">A LanguageConverter_ (LC) is a system based on language variants that</span> +<span class="sd">automatically converts the content of a page into a different variant. A variant</span> +<span class="sd">is mostly the same language in a different script.</span> + +<span class="sd">- `Wikipedias in multiple writing systems`_</span> +<span class="sd">- `Automatic conversion between traditional and simplified Chinese characters`_</span> + +<span class="sd">PR-2554_:</span> +<span class="sd"> The Wikipedia link returned by the API is still the same in all cases</span> +<span class="sd"> (`https://zh.wikipedia.org/wiki/出租車`_) but if your browser's</span> +<span class="sd"> ``Accept-Language`` is set to any of ``zh``, ``zh-CN``, ``zh-TW``, ``zh-HK``</span> +<span class="sd"> or .. Wikipedia's LC automatically returns the desired script in their</span> +<span class="sd"> web-page.</span> + +<span class="sd"> - You can test the API here: https://reqbin.com/gesg2kvx</span> + +<span class="sd">.. _https://zh.wikipedia.org/wiki/出租車:</span> +<span class="sd"> https://zh.wikipedia.org/wiki/%E5%87%BA%E7%A7%9F%E8%BB%8A</span> + +<span class="sd">To support Wikipedia's LanguageConverter_, a SearXNG request to Wikipedia uses</span> +<span class="sd">:py:obj:`get_wiki_params` and :py:obj:`wiki_lc_locale_variants' in the</span> +<span class="sd">:py:obj:`fetch_wikimedia_traits` function.</span> + +<span class="sd">To test in SearXNG, query for ``!wp 出租車`` with each of the available Chinese</span> +<span class="sd">options:</span> + +<span class="sd">- ``!wp 出租車 :zh`` should show 出租車</span> +<span class="sd">- ``!wp 出租車 :zh-CN`` should show 出租车</span> +<span class="sd">- ``!wp 出租車 :zh-TW`` should show 計程車</span> +<span class="sd">- ``!wp 出租車 :zh-HK`` should show 的士</span> +<span class="sd">- ``!wp 出租車 :zh-SG`` should show 德士</span> + +<span class="sd">.. _LanguageConverter:</span> +<span class="sd"> https://www.mediawiki.org/wiki/Writing_systems#LanguageConverter</span> +<span class="sd">.. _Wikipedias in multiple writing systems:</span> +<span class="sd"> https://meta.wikimedia.org/wiki/Wikipedias_in_multiple_writing_systems</span> +<span class="sd">.. _Automatic conversion between traditional and simplified Chinese characters:</span> +<span class="sd"> https://en.wikipedia.org/wiki/Chinese_Wikipedia#Automatic_conversion_between_traditional_and_simplified_Chinese_characters</span> +<span class="sd">.. _PR-2554: https://github.com/searx/searx/pull/2554</span> + +<span class="sd">"""</span> + +<span class="kn">import</span> <span class="nn">urllib.parse</span> +<span class="kn">import</span> <span class="nn">babel</span> + +<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span> + +<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">utils</span> +<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">network</span> <span class="k">as</span> <span class="n">_network</span> +<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">locales</span> +<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span> + +<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span> + +<span class="c1"># about</span> +<span class="n">about</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://www.wikipedia.org/'</span><span class="p">,</span> + <span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q52'</span><span class="p">,</span> + <span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://en.wikipedia.org/api/'</span><span class="p">,</span> + <span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> + <span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"results"</span><span class="p">:</span> <span class="s1">'JSON'</span><span class="p">,</span> +<span class="p">}</span> + +<span class="n">display_type</span> <span class="o">=</span> <span class="p">[</span><span class="s2">"infobox"</span><span class="p">]</span> +<span class="sd">"""A list of display types composed from ``infobox`` and ``list``. The latter</span> +<span class="sd">one will add a hit to the result list. The first one will show a hit in the</span> +<span class="sd">info box. Both values can be set, or one of the two can be set."""</span> + +<span class="n">send_accept_language_header</span> <span class="o">=</span> <span class="kc">True</span> +<span class="sd">"""The HTTP ``Accept-Language`` header is needed for wikis where</span> +<span class="sd">LanguageConverter_ is enabled."""</span> + +<span class="n">list_of_wikipedias</span> <span class="o">=</span> <span class="s1">'https://meta.wikimedia.org/wiki/List_of_Wikipedias'</span> +<span class="sd">"""`List of all wikipedias <https://meta.wikimedia.org/wiki/List_of_Wikipedias>`_</span> +<span class="sd">"""</span> + +<span class="n">wikipedia_article_depth</span> <span class="o">=</span> <span class="s1">'https://meta.wikimedia.org/wiki/Wikipedia_article_depth'</span> +<span class="sd">"""The *editing depth* of Wikipedia is one of several possible rough indicators</span> +<span class="sd">of the encyclopedia's collaborative quality, showing how frequently its articles</span> +<span class="sd">are updated. The measurement of depth was introduced after some limitations of</span> +<span class="sd">the classic measurement of article count were realized.</span> +<span class="sd">"""</span> + +<span class="n">rest_v1_summary_url</span> <span class="o">=</span> <span class="s1">'https://</span><span class="si">{wiki_netloc}</span><span class="s1">/api/rest_v1/page/summary/</span><span class="si">{title}</span><span class="s1">'</span> +<span class="sd">"""</span> +<span class="sd">`wikipedia rest_v1 summary API`_:</span> +<span class="sd"> The summary response includes an extract of the first paragraph of the page in</span> +<span class="sd"> plain text and HTML as well as the type of page. This is useful for page</span> +<span class="sd"> previews (fka. Hovercards, aka. Popups) on the web and link previews in the</span> +<span class="sd"> apps.</span> + +<span class="sd">HTTP ``Accept-Language`` header (:py:obj:`send_accept_language_header`):</span> +<span class="sd"> The desired language variant code for wikis where LanguageConverter_ is</span> +<span class="sd"> enabled.</span> + +<span class="sd">.. _wikipedia rest_v1 summary API:</span> +<span class="sd"> https://en.wikipedia.org/api/rest_v1/#/Page%20content/get_page_summary__title_</span> + +<span class="sd">"""</span> + +<span class="n">wiki_lc_locale_variants</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"zh"</span><span class="p">:</span> <span class="p">(</span> + <span class="s2">"zh-CN"</span><span class="p">,</span> + <span class="s2">"zh-HK"</span><span class="p">,</span> + <span class="s2">"zh-MO"</span><span class="p">,</span> + <span class="s2">"zh-MY"</span><span class="p">,</span> + <span class="s2">"zh-SG"</span><span class="p">,</span> + <span class="s2">"zh-TW"</span><span class="p">,</span> + <span class="p">),</span> + <span class="s2">"zh-classical"</span><span class="p">:</span> <span class="p">(</span><span class="s2">"zh-classical"</span><span class="p">,),</span> +<span class="p">}</span> +<span class="sd">"""Mapping rule of the LanguageConverter_ to map a language and its variants to</span> +<span class="sd">a Locale (used in the HTTP ``Accept-Language`` header). For example see `LC</span> +<span class="sd">Chinese`_.</span> + +<span class="sd">.. _LC Chinese:</span> +<span class="sd"> https://meta.wikimedia.org/wiki/Wikipedias_in_multiple_writing_systems#Chinese</span> +<span class="sd">"""</span> + +<span class="n">wikipedia_script_variants</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"zh"</span><span class="p">:</span> <span class="p">(</span> + <span class="s2">"zh_Hant"</span><span class="p">,</span> + <span class="s2">"zh_Hans"</span><span class="p">,</span> + <span class="p">)</span> +<span class="p">}</span> + + +<div class="viewcode-block" id="get_wiki_params"> +<a class="viewcode-back" href="../../../dev/engines/online/wikipedia.html#searx.engines.wikipedia.get_wiki_params">[docs]</a> +<span class="k">def</span> <span class="nf">get_wiki_params</span><span class="p">(</span><span class="n">sxng_locale</span><span class="p">,</span> <span class="n">eng_traits</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Returns the Wikipedia language tag and the netloc that fits to the</span> +<span class="sd"> ``sxng_locale``. To support LanguageConverter_ this function rates a locale</span> +<span class="sd"> (region) higher than a language (compare :py:obj:`wiki_lc_locale_variants`).</span> + +<span class="sd"> """</span> + <span class="n">eng_tag</span> <span class="o">=</span> <span class="n">eng_traits</span><span class="o">.</span><span class="n">get_region</span><span class="p">(</span><span class="n">sxng_locale</span><span class="p">,</span> <span class="n">eng_traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">sxng_locale</span><span class="p">,</span> <span class="s1">'en'</span><span class="p">))</span> + <span class="n">wiki_netloc</span> <span class="o">=</span> <span class="n">eng_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'wiki_netloc'</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">eng_tag</span><span class="p">,</span> <span class="s1">'en.wikipedia.org'</span><span class="p">)</span> + <span class="k">return</span> <span class="n">eng_tag</span><span class="p">,</span> <span class="n">wiki_netloc</span></div> + + + +<div class="viewcode-block" id="request"> +<a class="viewcode-back" href="../../../dev/engines/online/wikipedia.html#searx.engines.wikipedia.request">[docs]</a> +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Assemble a request (`wikipedia rest_v1 summary API`_)."""</span> + <span class="k">if</span> <span class="n">query</span><span class="o">.</span><span class="n">islower</span><span class="p">():</span> + <span class="n">query</span> <span class="o">=</span> <span class="n">query</span><span class="o">.</span><span class="n">title</span><span class="p">()</span> + + <span class="n">_eng_tag</span><span class="p">,</span> <span class="n">wiki_netloc</span> <span class="o">=</span> <span class="n">get_wiki_params</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="n">traits</span><span class="p">)</span> + <span class="n">title</span> <span class="o">=</span> <span class="n">urllib</span><span class="o">.</span><span class="n">parse</span><span class="o">.</span><span class="n">quote</span><span class="p">(</span><span class="n">query</span><span class="p">)</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">rest_v1_summary_url</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">wiki_netloc</span><span class="o">=</span><span class="n">wiki_netloc</span><span class="p">,</span> <span class="n">title</span><span class="o">=</span><span class="n">title</span><span class="p">)</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'raise_for_httperror'</span><span class="p">]</span> <span class="o">=</span> <span class="kc">False</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'soft_max_redirects'</span><span class="p">]</span> <span class="o">=</span> <span class="mi">2</span> + + <span class="k">return</span> <span class="n">params</span></div> + + + +<span class="c1"># get response from search-request</span> +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> + + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + <span class="k">if</span> <span class="n">resp</span><span class="o">.</span><span class="n">status_code</span> <span class="o">==</span> <span class="mi">404</span><span class="p">:</span> + <span class="k">return</span> <span class="p">[]</span> + <span class="k">if</span> <span class="n">resp</span><span class="o">.</span><span class="n">status_code</span> <span class="o">==</span> <span class="mi">400</span><span class="p">:</span> + <span class="k">try</span><span class="p">:</span> + <span class="n">api_result</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">json</span><span class="p">()</span> + <span class="k">except</span> <span class="ne">Exception</span><span class="p">:</span> <span class="c1"># pylint: disable=broad-except</span> + <span class="k">pass</span> + <span class="k">else</span><span class="p">:</span> + <span class="k">if</span> <span class="p">(</span> + <span class="n">api_result</span><span class="p">[</span><span class="s1">'type'</span><span class="p">]</span> <span class="o">==</span> <span class="s1">'https://mediawiki.org/wiki/HyperSwitch/errors/bad_request'</span> + <span class="ow">and</span> <span class="n">api_result</span><span class="p">[</span><span class="s1">'detail'</span><span class="p">]</span> <span class="o">==</span> <span class="s1">'title-invalid-characters'</span> + <span class="p">):</span> + <span class="k">return</span> <span class="p">[]</span> + + <span class="n">_network</span><span class="o">.</span><span class="n">raise_for_httperror</span><span class="p">(</span><span class="n">resp</span><span class="p">)</span> + + <span class="n">api_result</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">json</span><span class="p">()</span> + <span class="n">title</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">html_to_text</span><span class="p">(</span><span class="n">api_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'titles'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'display'</span><span class="p">)</span> <span class="ow">or</span> <span class="n">api_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'title'</span><span class="p">))</span> + <span class="n">wikipedia_link</span> <span class="o">=</span> <span class="n">api_result</span><span class="p">[</span><span class="s1">'content_urls'</span><span class="p">][</span><span class="s1">'desktop'</span><span class="p">][</span><span class="s1">'page'</span><span class="p">]</span> + + <span class="k">if</span> <span class="s2">"list"</span> <span class="ow">in</span> <span class="n">display_type</span> <span class="ow">or</span> <span class="n">api_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'type'</span><span class="p">)</span> <span class="o">!=</span> <span class="s1">'standard'</span><span class="p">:</span> + <span class="c1"># show item in the result list if 'list' is in the display options or it</span> + <span class="c1"># is a item that can't be displayed in a infobox.</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'url'</span><span class="p">:</span> <span class="n">wikipedia_link</span><span class="p">,</span> <span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span> <span class="s1">'content'</span><span class="p">:</span> <span class="n">api_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'description'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)})</span> + + <span class="k">if</span> <span class="s2">"infobox"</span> <span class="ow">in</span> <span class="n">display_type</span><span class="p">:</span> + <span class="k">if</span> <span class="n">api_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'type'</span><span class="p">)</span> <span class="o">==</span> <span class="s1">'standard'</span><span class="p">:</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'infobox'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span> + <span class="s1">'id'</span><span class="p">:</span> <span class="n">wikipedia_link</span><span class="p">,</span> + <span class="s1">'content'</span><span class="p">:</span> <span class="n">api_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'extract'</span><span class="p">,</span> <span class="s1">''</span><span class="p">),</span> + <span class="s1">'img_src'</span><span class="p">:</span> <span class="n">api_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'thumbnail'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'source'</span><span class="p">),</span> + <span class="s1">'urls'</span><span class="p">:</span> <span class="p">[{</span><span class="s1">'title'</span><span class="p">:</span> <span class="s1">'Wikipedia'</span><span class="p">,</span> <span class="s1">'url'</span><span class="p">:</span> <span class="n">wikipedia_link</span><span class="p">}],</span> + <span class="p">}</span> + <span class="p">)</span> + + <span class="k">return</span> <span class="n">results</span> + + +<span class="c1"># Nonstandard language codes</span> +<span class="c1">#</span> +<span class="c1"># These Wikipedias use language codes that do not conform to the ISO 639</span> +<span class="c1"># standard (which is how wiki subdomains are chosen nowadays).</span> + +<span class="n">lang_map</span> <span class="o">=</span> <span class="n">locales</span><span class="o">.</span><span class="n">LOCALE_BEST_MATCH</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span> +<span class="n">lang_map</span><span class="o">.</span><span class="n">update</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'be-tarask'</span><span class="p">:</span> <span class="s1">'bel'</span><span class="p">,</span> + <span class="s1">'ak'</span><span class="p">:</span> <span class="s1">'aka'</span><span class="p">,</span> + <span class="s1">'als'</span><span class="p">:</span> <span class="s1">'gsw'</span><span class="p">,</span> + <span class="s1">'bat-smg'</span><span class="p">:</span> <span class="s1">'sgs'</span><span class="p">,</span> + <span class="s1">'cbk-zam'</span><span class="p">:</span> <span class="s1">'cbk'</span><span class="p">,</span> + <span class="s1">'fiu-vro'</span><span class="p">:</span> <span class="s1">'vro'</span><span class="p">,</span> + <span class="s1">'map-bms'</span><span class="p">:</span> <span class="s1">'map'</span><span class="p">,</span> + <span class="s1">'no'</span><span class="p">:</span> <span class="s1">'nb-NO'</span><span class="p">,</span> + <span class="s1">'nrm'</span><span class="p">:</span> <span class="s1">'nrf'</span><span class="p">,</span> + <span class="s1">'roa-rup'</span><span class="p">:</span> <span class="s1">'rup'</span><span class="p">,</span> + <span class="s1">'nds-nl'</span><span class="p">:</span> <span class="s1">'nds'</span><span class="p">,</span> + <span class="c1">#'simple: – invented code used for the Simple English Wikipedia (not the official IETF code en-simple)</span> + <span class="s1">'zh-min-nan'</span><span class="p">:</span> <span class="s1">'nan'</span><span class="p">,</span> + <span class="s1">'zh-yue'</span><span class="p">:</span> <span class="s1">'yue'</span><span class="p">,</span> + <span class="s1">'an'</span><span class="p">:</span> <span class="s1">'arg'</span><span class="p">,</span> + <span class="p">}</span> +<span class="p">)</span> + + +<span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span> + <span class="n">fetch_wikimedia_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">)</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"WIKIPEDIA_LANGUAGES: </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="nb">len</span><span class="p">(</span><span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'WIKIPEDIA_LANGUAGES'</span><span class="p">]))</span> + + +<div class="viewcode-block" id="fetch_wikimedia_traits"> +<a class="viewcode-back" href="../../../dev/engines/online/wikipedia.html#searx.engines.wikipedia.fetch_wikimedia_traits">[docs]</a> +<span class="k">def</span> <span class="nf">fetch_wikimedia_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Fetch languages from Wikipedia. Not all languages from the</span> +<span class="sd"> :py:obj:`list_of_wikipedias` are supported by SearXNG locales, only those</span> +<span class="sd"> known from :py:obj:`searx.locales.LOCALE_NAMES` or those with a minimal</span> +<span class="sd"> :py:obj:`editing depth <wikipedia_article_depth>`.</span> + +<span class="sd"> The location of the Wikipedia address of a language is mapped in a</span> +<span class="sd"> :py:obj:`custom field <searx.enginelib.traits.EngineTraits.custom>`</span> +<span class="sd"> (``wiki_netloc``). Here is a reduced example:</span> + +<span class="sd"> .. code:: python</span> + +<span class="sd"> traits.custom['wiki_netloc'] = {</span> +<span class="sd"> "en": "en.wikipedia.org",</span> +<span class="sd"> ..</span> +<span class="sd"> "gsw": "als.wikipedia.org",</span> +<span class="sd"> ..</span> +<span class="sd"> "zh": "zh.wikipedia.org",</span> +<span class="sd"> "zh-classical": "zh-classical.wikipedia.org"</span> +<span class="sd"> }</span> +<span class="sd"> """</span> + <span class="c1"># pylint: disable=too-many-branches</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'wiki_netloc'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'WIKIPEDIA_LANGUAGES'</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="c1"># insert alias to map from a script or region to a wikipedia variant</span> + + <span class="k">for</span> <span class="n">eng_tag</span><span class="p">,</span> <span class="n">sxng_tag_list</span> <span class="ow">in</span> <span class="n">wikipedia_script_variants</span><span class="o">.</span><span class="n">items</span><span class="p">():</span> + <span class="k">for</span> <span class="n">sxng_tag</span> <span class="ow">in</span> <span class="n">sxng_tag_list</span><span class="p">:</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_tag</span> + <span class="k">for</span> <span class="n">eng_tag</span><span class="p">,</span> <span class="n">sxng_tag_list</span> <span class="ow">in</span> <span class="n">wiki_lc_locale_variants</span><span class="o">.</span><span class="n">items</span><span class="p">():</span> + <span class="k">for</span> <span class="n">sxng_tag</span> <span class="ow">in</span> <span class="n">sxng_tag_list</span><span class="p">:</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_tag</span> + + <span class="n">resp</span> <span class="o">=</span> <span class="n">_network</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">list_of_wikipedias</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: response from Wikipedia is not OK."</span><span class="p">)</span> + + <span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> + <span class="k">for</span> <span class="n">row</span> <span class="ow">in</span> <span class="n">dom</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'//table[contains(@class,"sortable")]//tbody/tr'</span><span class="p">):</span> + + <span class="n">cols</span> <span class="o">=</span> <span class="n">row</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'./td'</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">cols</span><span class="p">:</span> + <span class="k">continue</span> + <span class="n">cols</span> <span class="o">=</span> <span class="p">[</span><span class="n">c</span><span class="o">.</span><span class="n">text_content</span><span class="p">()</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">cols</span><span class="p">]</span> + + <span class="n">depth</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">cols</span><span class="p">[</span><span class="mi">11</span><span class="p">]</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'-'</span><span class="p">,</span> <span class="s1">'0'</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">','</span><span class="p">,</span> <span class="s1">''</span><span class="p">))</span> + <span class="n">articles</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">cols</span><span class="p">[</span><span class="mi">4</span><span class="p">]</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">','</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">','</span><span class="p">,</span> <span class="s1">''</span><span class="p">))</span> + + <span class="n">eng_tag</span> <span class="o">=</span> <span class="n">cols</span><span class="p">[</span><span class="mi">3</span><span class="p">]</span> + <span class="n">wiki_url</span> <span class="o">=</span> <span class="n">row</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'./td[4]/a/@href'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span> + <span class="n">wiki_url</span> <span class="o">=</span> <span class="n">urllib</span><span class="o">.</span><span class="n">parse</span><span class="o">.</span><span class="n">urlparse</span><span class="p">(</span><span class="n">wiki_url</span><span class="p">)</span> + + <span class="k">try</span><span class="p">:</span> + <span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">locales</span><span class="o">.</span><span class="n">language_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">lang_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">eng_tag</span><span class="p">,</span> <span class="n">eng_tag</span><span class="p">),</span> <span class="n">sep</span><span class="o">=</span><span class="s1">'-'</span><span class="p">))</span> + <span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span> + <span class="c1"># print("ERROR: %s [%s] is unknown by babel" % (cols[0], eng_tag))</span> + <span class="k">continue</span> + <span class="k">finally</span><span class="p">:</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'WIKIPEDIA_LANGUAGES'</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">eng_tag</span><span class="p">)</span> + + <span class="k">if</span> <span class="n">sxng_tag</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">locales</span><span class="o">.</span><span class="n">LOCALE_NAMES</span><span class="p">:</span> + + <span class="k">if</span> <span class="n">articles</span> <span class="o"><</span> <span class="mi">10000</span><span class="p">:</span> + <span class="c1"># exclude languages with too few articles</span> + <span class="k">continue</span> + + <span class="k">if</span> <span class="nb">int</span><span class="p">(</span><span class="n">depth</span><span class="p">)</span> <span class="o"><</span> <span class="mi">20</span><span class="p">:</span> + <span class="c1"># Rough indicator of a Wikipedia’s quality, showing how</span> + <span class="c1"># frequently its articles are updated.</span> + <span class="k">continue</span> + + <span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_tag</span><span class="p">)</span> + <span class="k">if</span> <span class="n">conflict</span><span class="p">:</span> + <span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">eng_tag</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_tag</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">eng_tag</span><span class="p">))</span> + <span class="k">continue</span> + + <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_tag</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'wiki_netloc'</span><span class="p">][</span><span class="n">eng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">wiki_url</span><span class="o">.</span><span class="n">netloc</span> + + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'WIKIPEDIA_LANGUAGES'</span><span class="p">]</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/xpath.html b/_modules/searx/engines/xpath.html new file mode 100644 index 000000000..e5839f37b --- /dev/null +++ b/_modules/searx/engines/xpath.html @@ -0,0 +1,446 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.xpath — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.xpath</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.xpath</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""The XPath engine is a *generic* engine with which it is possible to configure</span> +<span class="sd">engines in the settings.</span> + +<span class="sd">.. _XPath selector: https://quickref.me/xpath.html#xpath-selectors</span> + +<span class="sd">Configuration</span> +<span class="sd">=============</span> + +<span class="sd">Request:</span> + +<span class="sd">- :py:obj:`search_url`</span> +<span class="sd">- :py:obj:`lang_all`</span> +<span class="sd">- :py:obj:`soft_max_redirects`</span> +<span class="sd">- :py:obj:`method`</span> +<span class="sd">- :py:obj:`request_body`</span> +<span class="sd">- :py:obj:`cookies`</span> +<span class="sd">- :py:obj:`headers`</span> + +<span class="sd">Paging:</span> + +<span class="sd">- :py:obj:`paging`</span> +<span class="sd">- :py:obj:`page_size`</span> +<span class="sd">- :py:obj:`first_page_num`</span> + +<span class="sd">Time Range:</span> + +<span class="sd">- :py:obj:`time_range_support`</span> +<span class="sd">- :py:obj:`time_range_url`</span> +<span class="sd">- :py:obj:`time_range_map`</span> + +<span class="sd">Safe-Search:</span> + +<span class="sd">- :py:obj:`safe_search_support`</span> +<span class="sd">- :py:obj:`safe_search_map`</span> + +<span class="sd">Response:</span> + +<span class="sd">- :py:obj:`no_result_for_http_status`</span> + +<span class="sd">`XPath selector`_:</span> + +<span class="sd">- :py:obj:`results_xpath`</span> +<span class="sd">- :py:obj:`url_xpath`</span> +<span class="sd">- :py:obj:`title_xpath`</span> +<span class="sd">- :py:obj:`content_xpath`</span> +<span class="sd">- :py:obj:`thumbnail_xpath`</span> +<span class="sd">- :py:obj:`suggestion_xpath`</span> + + +<span class="sd">Example</span> +<span class="sd">=======</span> + +<span class="sd">Here is a simple example of a XPath engine configured in the :ref:`settings</span> +<span class="sd">engine` section, further read :ref:`engines-dev`.</span> + +<span class="sd">.. code:: yaml</span> + +<span class="sd"> - name : bitbucket</span> +<span class="sd"> engine : xpath</span> +<span class="sd"> paging : True</span> +<span class="sd"> search_url : https://bitbucket.org/repo/all/{pageno}?name={query}</span> +<span class="sd"> url_xpath : //article[@class="repo-summary"]//a[@class="repo-link"]/@href</span> +<span class="sd"> title_xpath : //article[@class="repo-summary"]//a[@class="repo-link"]</span> +<span class="sd"> content_xpath : //article[@class="repo-summary"]/p</span> + +<span class="sd">Implementations</span> +<span class="sd">===============</span> + +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span> + +<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span> +<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">extract_text</span><span class="p">,</span> <span class="n">extract_url</span><span class="p">,</span> <span class="n">eval_xpath</span><span class="p">,</span> <span class="n">eval_xpath_list</span> +<span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">raise_for_httperror</span> + +<span class="n">search_url</span> <span class="o">=</span> <span class="kc">None</span> +<span class="sd">"""</span> +<span class="sd">Search URL of the engine. Example::</span> + +<span class="sd"> https://example.org/?search={query}&page={pageno}{time_range}{safe_search}</span> + +<span class="sd">Replacements are:</span> + +<span class="sd">``{query}``:</span> +<span class="sd"> Search terms from user.</span> + +<span class="sd">``{pageno}``:</span> +<span class="sd"> Page number if engine supports paging :py:obj:`paging`</span> + +<span class="sd">``{lang}``:</span> +<span class="sd"> ISO 639-1 language code (en, de, fr ..)</span> + +<span class="sd">``{time_range}``:</span> +<span class="sd"> :py:obj:`URL parameter <time_range_url>` if engine :py:obj:`supports time</span> +<span class="sd"> range <time_range_support>`. The value for the parameter is taken from</span> +<span class="sd"> :py:obj:`time_range_map`.</span> + +<span class="sd">``{safe_search}``:</span> +<span class="sd"> Safe-search :py:obj:`URL parameter <safe_search_map>` if engine</span> +<span class="sd"> :py:obj:`supports safe-search <safe_search_support>`. The ``{safe_search}``</span> +<span class="sd"> replacement is taken from the :py:obj:`safes_search_map`. Filter results::</span> + +<span class="sd"> 0: none, 1: moderate, 2:strict</span> + +<span class="sd"> If not supported, the URL parameter is an empty string.</span> + +<span class="sd">"""</span> + +<span class="n">lang_all</span> <span class="o">=</span> <span class="s1">'en'</span> +<span class="sd">'''Replacement ``{lang}`` in :py:obj:`search_url` if language ``all`` is</span> +<span class="sd">selected.</span> +<span class="sd">'''</span> + +<span class="n">no_result_for_http_status</span> <span class="o">=</span> <span class="p">[]</span> +<span class="sd">'''Return empty result for these HTTP status codes instead of throwing an error.</span> + +<span class="sd">.. code:: yaml</span> + +<span class="sd"> no_result_for_http_status: []</span> +<span class="sd">'''</span> + +<span class="n">soft_max_redirects</span> <span class="o">=</span> <span class="mi">0</span> +<span class="sd">'''Maximum redirects, soft limit. Record an error but don't stop the engine'''</span> + +<span class="n">results_xpath</span> <span class="o">=</span> <span class="s1">''</span> +<span class="sd">'''`XPath selector`_ for the list of result items'''</span> + +<span class="n">url_xpath</span> <span class="o">=</span> <span class="kc">None</span> +<span class="sd">'''`XPath selector`_ of result's ``url``.'''</span> + +<span class="n">content_xpath</span> <span class="o">=</span> <span class="kc">None</span> +<span class="sd">'''`XPath selector`_ of result's ``content``.'''</span> + +<span class="n">title_xpath</span> <span class="o">=</span> <span class="kc">None</span> +<span class="sd">'''`XPath selector`_ of result's ``title``.'''</span> + +<span class="n">thumbnail_xpath</span> <span class="o">=</span> <span class="kc">False</span> +<span class="sd">'''`XPath selector`_ of result's ``thumbnail``.'''</span> + +<span class="n">suggestion_xpath</span> <span class="o">=</span> <span class="s1">''</span> +<span class="sd">'''`XPath selector`_ of result's ``suggestion``.'''</span> + +<span class="n">cached_xpath</span> <span class="o">=</span> <span class="s1">''</span> +<span class="n">cached_url</span> <span class="o">=</span> <span class="s1">''</span> + +<span class="n">cookies</span> <span class="o">=</span> <span class="p">{}</span> +<span class="sd">'''Some engines might offer different result based on cookies.</span> +<span class="sd">Possible use-case: To set safesearch cookie.'''</span> + +<span class="n">headers</span> <span class="o">=</span> <span class="p">{}</span> +<span class="sd">'''Some engines might offer different result based headers. Possible use-case:</span> +<span class="sd">To set header to moderate.'''</span> + +<span class="n">method</span> <span class="o">=</span> <span class="s1">'GET'</span> +<span class="sd">'''Some engines might require to do POST requests for search.'''</span> + +<span class="n">request_body</span> <span class="o">=</span> <span class="s1">''</span> +<span class="sd">'''The body of the request. This can only be used if different :py:obj:`method`</span> +<span class="sd">is set, e.g. ``POST``. For formatting see the documentation of :py:obj:`search_url`::</span> + +<span class="sd"> search={query}&page={pageno}{time_range}{safe_search}</span> +<span class="sd">'''</span> + +<span class="n">paging</span> <span class="o">=</span> <span class="kc">False</span> +<span class="sd">'''Engine supports paging [True or False].'''</span> + +<span class="n">page_size</span> <span class="o">=</span> <span class="mi">1</span> +<span class="sd">'''Number of results on each page. Only needed if the site requires not a page</span> +<span class="sd">number, but an offset.'''</span> + +<span class="n">first_page_num</span> <span class="o">=</span> <span class="mi">1</span> +<span class="sd">'''Number of the first page (usually 0 or 1).'''</span> + +<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">False</span> +<span class="sd">'''Engine supports search time range.'''</span> + +<span class="n">time_range_url</span> <span class="o">=</span> <span class="s1">'&hours=</span><span class="si">{time_range_val}</span><span class="s1">'</span> +<span class="sd">'''Time range URL parameter in the in :py:obj:`search_url`. If no time range is</span> +<span class="sd">requested by the user, the URL parameter is an empty string. The</span> +<span class="sd">``{time_range_val}`` replacement is taken from the :py:obj:`time_range_map`.</span> + +<span class="sd">.. code:: yaml</span> + +<span class="sd"> time_range_url : '&days={time_range_val}'</span> +<span class="sd">'''</span> + +<span class="n">time_range_map</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'day'</span><span class="p">:</span> <span class="mi">24</span><span class="p">,</span> + <span class="s1">'week'</span><span class="p">:</span> <span class="mi">24</span> <span class="o">*</span> <span class="mi">7</span><span class="p">,</span> + <span class="s1">'month'</span><span class="p">:</span> <span class="mi">24</span> <span class="o">*</span> <span class="mi">30</span><span class="p">,</span> + <span class="s1">'year'</span><span class="p">:</span> <span class="mi">24</span> <span class="o">*</span> <span class="mi">365</span><span class="p">,</span> +<span class="p">}</span> +<span class="sd">'''Maps time range value from user to ``{time_range_val}`` in</span> +<span class="sd">:py:obj:`time_range_url`.</span> + +<span class="sd">.. code:: yaml</span> + +<span class="sd"> time_range_map:</span> +<span class="sd"> day: 1</span> +<span class="sd"> week: 7</span> +<span class="sd"> month: 30</span> +<span class="sd"> year: 365</span> +<span class="sd">'''</span> + +<span class="n">safe_search_support</span> <span class="o">=</span> <span class="kc">False</span> +<span class="sd">'''Engine supports safe-search.'''</span> + +<span class="n">safe_search_map</span> <span class="o">=</span> <span class="p">{</span><span class="mi">0</span><span class="p">:</span> <span class="s1">'&filter=none'</span><span class="p">,</span> <span class="mi">1</span><span class="p">:</span> <span class="s1">'&filter=moderate'</span><span class="p">,</span> <span class="mi">2</span><span class="p">:</span> <span class="s1">'&filter=strict'</span><span class="p">}</span> +<span class="sd">'''Maps safe-search value to ``{safe_search}`` in :py:obj:`search_url`.</span> + +<span class="sd">.. code:: yaml</span> + +<span class="sd"> safesearch: true</span> +<span class="sd"> safes_search_map:</span> +<span class="sd"> 0: '&filter=none'</span> +<span class="sd"> 1: '&filter=moderate'</span> +<span class="sd"> 2: '&filter=strict'</span> + +<span class="sd">'''</span> + + +<div class="viewcode-block" id="request"> +<a class="viewcode-back" href="../../../dev/engines/xpath.html#searx.engines.xpath.request">[docs]</a> +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> +<span class="w"> </span><span class="sd">'''Build request parameters (see :ref:`engine request`).'''</span> + <span class="n">lang</span> <span class="o">=</span> <span class="n">lang_all</span> + <span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'language'</span><span class="p">]</span> <span class="o">!=</span> <span class="s1">'all'</span><span class="p">:</span> + <span class="n">lang</span> <span class="o">=</span> <span class="n">params</span><span class="p">[</span><span class="s1">'language'</span><span class="p">][:</span><span class="mi">2</span><span class="p">]</span> + + <span class="n">time_range</span> <span class="o">=</span> <span class="s1">''</span> + <span class="k">if</span> <span class="n">params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'time_range'</span><span class="p">):</span> + <span class="n">time_range_val</span> <span class="o">=</span> <span class="n">time_range_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'time_range'</span><span class="p">))</span> + <span class="n">time_range</span> <span class="o">=</span> <span class="n">time_range_url</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">time_range_val</span><span class="o">=</span><span class="n">time_range_val</span><span class="p">)</span> + + <span class="n">safe_search</span> <span class="o">=</span> <span class="s1">''</span> + <span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">]:</span> + <span class="n">safe_search</span> <span class="o">=</span> <span class="n">safe_search_map</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">]]</span> + + <span class="n">fargs</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'query'</span><span class="p">:</span> <span class="n">urlencode</span><span class="p">({</span><span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">})[</span><span class="mi">2</span><span class="p">:],</span> + <span class="s1">'lang'</span><span class="p">:</span> <span class="n">lang</span><span class="p">,</span> + <span class="s1">'pageno'</span><span class="p">:</span> <span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">page_size</span> <span class="o">+</span> <span class="n">first_page_num</span><span class="p">,</span> + <span class="s1">'time_range'</span><span class="p">:</span> <span class="n">time_range</span><span class="p">,</span> + <span class="s1">'safe_search'</span><span class="p">:</span> <span class="n">safe_search</span><span class="p">,</span> + <span class="p">}</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">]</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">cookies</span><span class="p">)</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">]</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">headers</span><span class="p">)</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">search_url</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="n">fargs</span><span class="p">)</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'method'</span><span class="p">]</span> <span class="o">=</span> <span class="n">method</span> + + <span class="k">if</span> <span class="n">request_body</span><span class="p">:</span> + <span class="c1"># don't url-encode the query if it's in the request body</span> + <span class="n">fargs</span><span class="p">[</span><span class="s1">'query'</span><span class="p">]</span> <span class="o">=</span> <span class="n">query</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">]</span> <span class="o">=</span> <span class="n">request_body</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="n">fargs</span><span class="p">)</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'soft_max_redirects'</span><span class="p">]</span> <span class="o">=</span> <span class="n">soft_max_redirects</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'raise_for_httperror'</span><span class="p">]</span> <span class="o">=</span> <span class="kc">False</span> + + <span class="k">return</span> <span class="n">params</span></div> + + + +<div class="viewcode-block" id="response"> +<a class="viewcode-back" href="../../../dev/engines/xpath.html#searx.engines.xpath.response">[docs]</a> +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> <span class="c1"># pylint: disable=too-many-branches</span> +<span class="w"> </span><span class="sd">'''Scrap *results* from the response (see :ref:`engine results`).'''</span> + <span class="k">if</span> <span class="n">no_result_for_http_status</span> <span class="ow">and</span> <span class="n">resp</span><span class="o">.</span><span class="n">status_code</span> <span class="ow">in</span> <span class="n">no_result_for_http_status</span><span class="p">:</span> + <span class="k">return</span> <span class="p">[]</span> + + <span class="n">raise_for_httperror</span><span class="p">(</span><span class="n">resp</span><span class="p">)</span> + + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">:</span> + <span class="k">return</span> <span class="n">results</span> + + <span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> + <span class="n">is_onion</span> <span class="o">=</span> <span class="s1">'onions'</span> <span class="ow">in</span> <span class="n">categories</span> + + <span class="k">if</span> <span class="n">results_xpath</span><span class="p">:</span> + <span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="n">results_xpath</span><span class="p">):</span> + + <span class="n">url</span> <span class="o">=</span> <span class="n">extract_url</span><span class="p">(</span><span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="n">url_xpath</span><span class="p">,</span> <span class="n">min_len</span><span class="o">=</span><span class="mi">1</span><span class="p">),</span> <span class="n">search_url</span><span class="p">)</span> + <span class="n">title</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="n">title_xpath</span><span class="p">,</span> <span class="n">min_len</span><span class="o">=</span><span class="mi">1</span><span class="p">))</span> + <span class="n">content</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="n">content_xpath</span><span class="p">))</span> + <span class="n">tmp_result</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span> <span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span> <span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">}</span> + + <span class="c1"># add thumbnail if available</span> + <span class="k">if</span> <span class="n">thumbnail_xpath</span><span class="p">:</span> + <span class="n">thumbnail_xpath_result</span> <span class="o">=</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="n">thumbnail_xpath</span><span class="p">)</span> + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">thumbnail_xpath_result</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span> + <span class="n">tmp_result</span><span class="p">[</span><span class="s1">'thumbnail'</span><span class="p">]</span> <span class="o">=</span> <span class="n">extract_url</span><span class="p">(</span><span class="n">thumbnail_xpath_result</span><span class="p">,</span> <span class="n">search_url</span><span class="p">)</span> + + <span class="c1"># add alternative cached url if available</span> + <span class="k">if</span> <span class="n">cached_xpath</span><span class="p">:</span> + <span class="n">tmp_result</span><span class="p">[</span><span class="s1">'cached_url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">cached_url</span> <span class="o">+</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="n">cached_xpath</span><span class="p">,</span> <span class="n">min_len</span><span class="o">=</span><span class="mi">1</span><span class="p">))</span> + + <span class="k">if</span> <span class="n">is_onion</span><span class="p">:</span> + <span class="n">tmp_result</span><span class="p">[</span><span class="s1">'is_onion'</span><span class="p">]</span> <span class="o">=</span> <span class="kc">True</span> + + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">tmp_result</span><span class="p">)</span> + + <span class="k">else</span><span class="p">:</span> + <span class="k">if</span> <span class="n">cached_xpath</span><span class="p">:</span> + <span class="k">for</span> <span class="n">url</span><span class="p">,</span> <span class="n">title</span><span class="p">,</span> <span class="n">content</span><span class="p">,</span> <span class="n">cached</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span> + <span class="p">(</span><span class="n">extract_url</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">search_url</span><span class="p">)</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="n">url_xpath</span><span class="p">)),</span> + <span class="nb">map</span><span class="p">(</span><span class="n">extract_text</span><span class="p">,</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="n">title_xpath</span><span class="p">)),</span> + <span class="nb">map</span><span class="p">(</span><span class="n">extract_text</span><span class="p">,</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="n">content_xpath</span><span class="p">)),</span> + <span class="nb">map</span><span class="p">(</span><span class="n">extract_text</span><span class="p">,</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="n">cached_xpath</span><span class="p">)),</span> + <span class="p">):</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span> + <span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span> + <span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">,</span> + <span class="s1">'cached_url'</span><span class="p">:</span> <span class="n">cached_url</span> <span class="o">+</span> <span class="n">cached</span><span class="p">,</span> + <span class="s1">'is_onion'</span><span class="p">:</span> <span class="n">is_onion</span><span class="p">,</span> + <span class="p">}</span> + <span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="k">for</span> <span class="n">url</span><span class="p">,</span> <span class="n">title</span><span class="p">,</span> <span class="n">content</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span> + <span class="p">(</span><span class="n">extract_url</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">search_url</span><span class="p">)</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="n">url_xpath</span><span class="p">)),</span> + <span class="nb">map</span><span class="p">(</span><span class="n">extract_text</span><span class="p">,</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="n">title_xpath</span><span class="p">)),</span> + <span class="nb">map</span><span class="p">(</span><span class="n">extract_text</span><span class="p">,</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="n">content_xpath</span><span class="p">)),</span> + <span class="p">):</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span> <span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span> <span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">,</span> <span class="s1">'is_onion'</span><span class="p">:</span> <span class="n">is_onion</span><span class="p">})</span> + + <span class="k">if</span> <span class="n">suggestion_xpath</span><span class="p">:</span> + <span class="k">for</span> <span class="n">suggestion</span> <span class="ow">in</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="n">suggestion_xpath</span><span class="p">):</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'suggestion'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">suggestion</span><span class="p">)})</span> + + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"found </span><span class="si">%s</span><span class="s2"> results"</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">results</span><span class="p">))</span> + <span class="k">return</span> <span class="n">results</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/yahoo.html b/_modules/searx/engines/yahoo.html new file mode 100644 index 000000000..affb5cf22 --- /dev/null +++ b/_modules/searx/engines/yahoo.html @@ -0,0 +1,312 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.yahoo — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.yahoo</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.yahoo</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""Yahoo Search (Web)</span> + +<span class="sd">Languages are supported by mapping the language to a domain. If domain is not</span> +<span class="sd">found in :py:obj:`lang2domain` URL ``<lang>.search.yahoo.com`` is used.</span> + +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="p">(</span> + <span class="n">unquote</span><span class="p">,</span> + <span class="n">urlencode</span><span class="p">,</span> +<span class="p">)</span> +<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span> + +<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="p">(</span> + <span class="n">eval_xpath_getindex</span><span class="p">,</span> + <span class="n">eval_xpath_list</span><span class="p">,</span> + <span class="n">extract_text</span><span class="p">,</span> + <span class="n">html_to_text</span><span class="p">,</span> +<span class="p">)</span> +<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span> + +<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span> + +<span class="c1"># about</span> +<span class="n">about</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://search.yahoo.com/'</span><span class="p">,</span> + <span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> + <span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://developer.yahoo.com/api/'</span><span class="p">,</span> + <span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"results"</span><span class="p">:</span> <span class="s1">'HTML'</span><span class="p">,</span> +<span class="p">}</span> + +<span class="c1"># engine dependent config</span> +<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'general'</span><span class="p">,</span> <span class="s1">'web'</span><span class="p">]</span> +<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">True</span> +<span class="c1"># send_accept_language_header = True</span> + +<span class="n">time_range_dict</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'day'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'1d'</span><span class="p">,</span> <span class="s1">'d'</span><span class="p">),</span> + <span class="s1">'week'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'1w'</span><span class="p">,</span> <span class="s1">'w'</span><span class="p">),</span> + <span class="s1">'month'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'1m'</span><span class="p">,</span> <span class="s1">'m'</span><span class="p">),</span> +<span class="p">}</span> + +<span class="n">lang2domain</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'zh_chs'</span><span class="p">:</span> <span class="s1">'hk.search.yahoo.com'</span><span class="p">,</span> + <span class="s1">'zh_cht'</span><span class="p">:</span> <span class="s1">'tw.search.yahoo.com'</span><span class="p">,</span> + <span class="s1">'any'</span><span class="p">:</span> <span class="s1">'search.yahoo.com'</span><span class="p">,</span> + <span class="s1">'en'</span><span class="p">:</span> <span class="s1">'search.yahoo.com'</span><span class="p">,</span> + <span class="s1">'bg'</span><span class="p">:</span> <span class="s1">'search.yahoo.com'</span><span class="p">,</span> + <span class="s1">'cs'</span><span class="p">:</span> <span class="s1">'search.yahoo.com'</span><span class="p">,</span> + <span class="s1">'da'</span><span class="p">:</span> <span class="s1">'search.yahoo.com'</span><span class="p">,</span> + <span class="s1">'el'</span><span class="p">:</span> <span class="s1">'search.yahoo.com'</span><span class="p">,</span> + <span class="s1">'et'</span><span class="p">:</span> <span class="s1">'search.yahoo.com'</span><span class="p">,</span> + <span class="s1">'he'</span><span class="p">:</span> <span class="s1">'search.yahoo.com'</span><span class="p">,</span> + <span class="s1">'hr'</span><span class="p">:</span> <span class="s1">'search.yahoo.com'</span><span class="p">,</span> + <span class="s1">'ja'</span><span class="p">:</span> <span class="s1">'search.yahoo.com'</span><span class="p">,</span> + <span class="s1">'ko'</span><span class="p">:</span> <span class="s1">'search.yahoo.com'</span><span class="p">,</span> + <span class="s1">'sk'</span><span class="p">:</span> <span class="s1">'search.yahoo.com'</span><span class="p">,</span> + <span class="s1">'sl'</span><span class="p">:</span> <span class="s1">'search.yahoo.com'</span><span class="p">,</span> +<span class="p">}</span> +<span class="sd">"""Map language to domain"""</span> + +<span class="n">locale_aliases</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'zh'</span><span class="p">:</span> <span class="s1">'zh_Hans'</span><span class="p">,</span> + <span class="s1">'zh-HK'</span><span class="p">:</span> <span class="s1">'zh_Hans'</span><span class="p">,</span> + <span class="s1">'zh-CN'</span><span class="p">:</span> <span class="s1">'zh_Hans'</span><span class="p">,</span> <span class="c1"># dead since 2015 / routed to hk.search.yahoo.com</span> + <span class="s1">'zh-TW'</span><span class="p">:</span> <span class="s1">'zh_Hant'</span><span class="p">,</span> +<span class="p">}</span> + + +<div class="viewcode-block" id="request"> +<a class="viewcode-back" href="../../../dev/engines/online/yahoo.html#searx.engines.yahoo.request">[docs]</a> +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""build request"""</span> + + <span class="n">lang</span> <span class="o">=</span> <span class="n">locale_aliases</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'language'</span><span class="p">],</span> <span class="kc">None</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">lang</span><span class="p">:</span> + <span class="n">lang</span> <span class="o">=</span> <span class="n">params</span><span class="p">[</span><span class="s1">'language'</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'-'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span> + <span class="n">lang</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">lang</span><span class="p">,</span> <span class="n">traits</span><span class="o">.</span><span class="n">all_locale</span><span class="p">)</span> + + <span class="n">offset</span> <span class="o">=</span> <span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="mi">7</span> <span class="o">+</span> <span class="mi">1</span> + <span class="n">age</span><span class="p">,</span> <span class="n">btf</span> <span class="o">=</span> <span class="n">time_range_dict</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">],</span> <span class="p">(</span><span class="s1">''</span><span class="p">,</span> <span class="s1">''</span><span class="p">))</span> + + <span class="n">args</span> <span class="o">=</span> <span class="n">urlencode</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'p'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span> + <span class="s1">'ei'</span><span class="p">:</span> <span class="s1">'UTF-8'</span><span class="p">,</span> + <span class="s1">'fl'</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> + <span class="s1">'vl'</span><span class="p">:</span> <span class="s1">'lang_'</span> <span class="o">+</span> <span class="n">lang</span><span class="p">,</span> + <span class="s1">'btf'</span><span class="p">:</span> <span class="n">btf</span><span class="p">,</span> + <span class="s1">'fr2'</span><span class="p">:</span> <span class="s1">'time'</span><span class="p">,</span> + <span class="s1">'age'</span><span class="p">:</span> <span class="n">age</span><span class="p">,</span> + <span class="s1">'b'</span><span class="p">:</span> <span class="n">offset</span><span class="p">,</span> + <span class="s1">'xargs'</span><span class="p">:</span> <span class="mi">0</span><span class="p">,</span> + <span class="p">}</span> + <span class="p">)</span> + + <span class="n">domain</span> <span class="o">=</span> <span class="n">lang2domain</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">lang</span><span class="p">,</span> <span class="s1">'</span><span class="si">%s</span><span class="s1">.search.yahoo.com'</span> <span class="o">%</span> <span class="n">lang</span><span class="p">)</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'https://</span><span class="si">%s</span><span class="s1">/search?</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span><span class="n">domain</span><span class="p">,</span> <span class="n">args</span><span class="p">)</span> + <span class="k">return</span> <span class="n">params</span></div> + + + +<div class="viewcode-block" id="parse_url"> +<a class="viewcode-back" href="../../../dev/engines/online/yahoo.html#searx.engines.yahoo.parse_url">[docs]</a> +<span class="k">def</span> <span class="nf">parse_url</span><span class="p">(</span><span class="n">url_string</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""remove yahoo-specific tracking-url"""</span> + + <span class="n">endings</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'/RS'</span><span class="p">,</span> <span class="s1">'/RK'</span><span class="p">]</span> + <span class="n">endpositions</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">start</span> <span class="o">=</span> <span class="n">url_string</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'http'</span><span class="p">,</span> <span class="n">url_string</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'/RU='</span><span class="p">)</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> + + <span class="k">for</span> <span class="n">ending</span> <span class="ow">in</span> <span class="n">endings</span><span class="p">:</span> + <span class="n">endpos</span> <span class="o">=</span> <span class="n">url_string</span><span class="o">.</span><span class="n">rfind</span><span class="p">(</span><span class="n">ending</span><span class="p">)</span> + <span class="k">if</span> <span class="n">endpos</span> <span class="o">></span> <span class="o">-</span><span class="mi">1</span><span class="p">:</span> + <span class="n">endpositions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">endpos</span><span class="p">)</span> + + <span class="k">if</span> <span class="n">start</span> <span class="o">==</span> <span class="mi">0</span> <span class="ow">or</span> <span class="nb">len</span><span class="p">(</span><span class="n">endpositions</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> + <span class="k">return</span> <span class="n">url_string</span> + + <span class="n">end</span> <span class="o">=</span> <span class="nb">min</span><span class="p">(</span><span class="n">endpositions</span><span class="p">)</span> + <span class="k">return</span> <span class="n">unquote</span><span class="p">(</span><span class="n">url_string</span><span class="p">[</span><span class="n">start</span><span class="p">:</span><span class="n">end</span><span class="p">])</span></div> + + + +<div class="viewcode-block" id="response"> +<a class="viewcode-back" href="../../../dev/engines/online/yahoo.html#searx.engines.yahoo.response">[docs]</a> +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""parse response"""</span> + + <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> + + <span class="c1"># parse results</span> + <span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//div[contains(@class,"algo-sr")]'</span><span class="p">):</span> + <span class="n">url</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//h3/a/@href'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span> + <span class="k">if</span> <span class="n">url</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">continue</span> + <span class="n">url</span> <span class="o">=</span> <span class="n">parse_url</span><span class="p">(</span><span class="n">url</span><span class="p">)</span> + + <span class="n">title</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//h3//a/@aria-label'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="s1">''</span><span class="p">)</span> + <span class="n">title</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">title</span><span class="p">)</span> + <span class="n">content</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//div[contains(@class, "compText")]'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="s1">''</span><span class="p">)</span> + <span class="n">content</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">content</span><span class="p">,</span> <span class="n">allow_none</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> + + <span class="c1"># append result</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">{</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span> + <span class="c1"># title sometimes contains HTML tags / see</span> + <span class="c1"># https://github.com/searxng/searxng/issues/3790</span> + <span class="s1">'title'</span><span class="p">:</span> <span class="s2">" "</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">html_to_text</span><span class="p">(</span><span class="n">title</span><span class="p">)</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="o">.</span><span class="n">split</span><span class="p">()),</span> + <span class="s1">'content'</span><span class="p">:</span> <span class="s2">" "</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">html_to_text</span><span class="p">(</span><span class="n">content</span><span class="p">)</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="o">.</span><span class="n">split</span><span class="p">()),</span> + <span class="p">}</span> + <span class="p">)</span> + + <span class="k">for</span> <span class="n">suggestion</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//div[contains(@class, "AlsoTry")]//table//a'</span><span class="p">):</span> + <span class="c1"># append suggestion</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'suggestion'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">suggestion</span><span class="p">)})</span> + + <span class="k">return</span> <span class="n">results</span></div> + + + +<div class="viewcode-block" id="fetch_traits"> +<a class="viewcode-back" href="../../../dev/engines/online/yahoo.html#searx.engines.yahoo.fetch_traits">[docs]</a> +<span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Fetch languages from yahoo"""</span> + + <span class="c1"># pylint: disable=import-outside-toplevel</span> + <span class="kn">import</span> <span class="nn">babel</span> + <span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">network</span> + <span class="kn">from</span> <span class="nn">searx.locales</span> <span class="kn">import</span> <span class="n">language_tag</span> + + <span class="n">engine_traits</span><span class="o">.</span><span class="n">all_locale</span> <span class="o">=</span> <span class="s1">'any'</span> + + <span class="n">resp</span> <span class="o">=</span> <span class="n">network</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'https://search.yahoo.com/preferences/languages'</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: response from yahoo is not OK."</span><span class="p">)</span> + + <span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> + <span class="n">offset</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="s1">'lang_'</span><span class="p">)</span> + + <span class="n">eng2sxng</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'zh_chs'</span><span class="p">:</span> <span class="s1">'zh_Hans'</span><span class="p">,</span> <span class="s1">'zh_cht'</span><span class="p">:</span> <span class="s1">'zh_Hant'</span><span class="p">}</span> + + <span class="k">for</span> <span class="n">val</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//div[contains(@class, "lang-item")]/input/@value'</span><span class="p">):</span> + <span class="n">eng_tag</span> <span class="o">=</span> <span class="n">val</span><span class="p">[</span><span class="n">offset</span><span class="p">:]</span> + + <span class="k">try</span><span class="p">:</span> + <span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">language_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">eng2sxng</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">eng_tag</span><span class="p">,</span> <span class="n">eng_tag</span><span class="p">)))</span> + <span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s1">'ERROR: unknown language --> </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="n">eng_tag</span><span class="p">)</span> + <span class="k">continue</span> + + <span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_tag</span><span class="p">)</span> + <span class="k">if</span> <span class="n">conflict</span><span class="p">:</span> + <span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">eng_tag</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_tag</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">eng_tag</span><span class="p">))</span> + <span class="k">continue</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_tag</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/engines/zlibrary.html b/_modules/searx/engines/zlibrary.html new file mode 100644 index 000000000..6a986d111 --- /dev/null +++ b/_modules/searx/engines/zlibrary.html @@ -0,0 +1,366 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.engines.zlibrary — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.engines.zlibrary</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.engines.zlibrary</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""`Z-Library`_ (abbreviated as z-lib, formerly BookFinder) is a shadow library</span> +<span class="sd">project for file-sharing access to scholarly journal articles, academic texts</span> +<span class="sd">and general-interest books. It began as a mirror of Library Genesis, from which</span> +<span class="sd">most of its books originate.</span> + +<span class="sd">.. _Z-Library: https://zlibrary-global.se/</span> + +<span class="sd">Configuration</span> +<span class="sd">=============</span> + +<span class="sd">The engine has the following additional settings:</span> + +<span class="sd">- :py:obj:`zlib_year_from`</span> +<span class="sd">- :py:obj:`zlib_year_to`</span> +<span class="sd">- :py:obj:`zlib_ext`</span> + +<span class="sd">With this options a SearXNG maintainer is able to configure **additional**</span> +<span class="sd">engines for specific searches in Z-Library. For example a engine to search</span> +<span class="sd">only for EPUB from 2010 to 2020.</span> + +<span class="sd">.. code:: yaml</span> + +<span class="sd"> - name: z-library 2010s epub</span> +<span class="sd"> engine: zlibrary</span> +<span class="sd"> shortcut: zlib2010s</span> +<span class="sd"> zlib_year_from: '2010'</span> +<span class="sd"> zlib_year_to: '2020'</span> +<span class="sd"> zlib_ext: 'EPUB'</span> + +<span class="sd">Implementations</span> +<span class="sd">===============</span> + +<span class="sd">"""</span> +<span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">annotations</span> +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span> +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">List</span><span class="p">,</span> <span class="n">Dict</span><span class="p">,</span> <span class="n">Any</span><span class="p">,</span> <span class="n">Optional</span> +<span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span> +<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">quote</span> +<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span> +<span class="kn">from</span> <span class="nn">flask_babel</span> <span class="kn">import</span> <span class="n">gettext</span> + +<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">extract_text</span><span class="p">,</span> <span class="n">eval_xpath</span><span class="p">,</span> <span class="n">eval_xpath_list</span> +<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span> +<span class="kn">from</span> <span class="nn">searx.data</span> <span class="kn">import</span> <span class="n">ENGINE_TRAITS</span> +<span class="kn">from</span> <span class="nn">searx.exceptions</span> <span class="kn">import</span> <span class="n">SearxException</span> + +<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> + <span class="kn">import</span> <span class="nn">httpx</span> + <span class="kn">import</span> <span class="nn">logging</span> + + <span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span> + +<span class="c1"># about</span> +<span class="n">about</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"website"</span><span class="p">:</span> <span class="s2">"https://zlibrary-global.se"</span><span class="p">,</span> + <span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s2">"Q104863992"</span><span class="p">,</span> + <span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> + <span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="s2">"results"</span><span class="p">:</span> <span class="s2">"HTML"</span><span class="p">,</span> +<span class="p">}</span> + +<span class="n">categories</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span><span class="s2">"files"</span><span class="p">]</span> +<span class="n">paging</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span> +<span class="n">base_url</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"https://zlibrary-global.se"</span> + +<span class="n">zlib_year_from</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">""</span> +<span class="sd">"""Filter z-library's results by year from. E.g '2010'.</span> +<span class="sd">"""</span> + +<span class="n">zlib_year_to</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">""</span> +<span class="sd">"""Filter z-library's results by year to. E.g. '2010'.</span> +<span class="sd">"""</span> + +<span class="n">zlib_ext</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">""</span> +<span class="sd">"""Filter z-library's results by a file ending. Common filters for example are</span> +<span class="sd">``PDF`` and ``EPUB``.</span> +<span class="sd">"""</span> + + +<div class="viewcode-block" id="init"> +<a class="viewcode-back" href="../../../dev/engines/online/zlibrary.html#searx.engines.zlibrary.init">[docs]</a> +<span class="k">def</span> <span class="nf">init</span><span class="p">(</span><span class="n">engine_settings</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> <span class="c1"># pylint: disable=unused-argument</span> +<span class="w"> </span><span class="sd">"""Check of engine's settings."""</span> + <span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span> <span class="o">=</span> <span class="n">EngineTraits</span><span class="p">(</span><span class="o">**</span><span class="n">ENGINE_TRAITS</span><span class="p">[</span><span class="s2">"z-library"</span><span class="p">])</span> + + <span class="k">if</span> <span class="n">zlib_ext</span> <span class="ow">and</span> <span class="n">zlib_ext</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s2">"ext"</span><span class="p">]:</span> + <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"invalid setting ext: </span><span class="si">{</span><span class="n">zlib_ext</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> + <span class="k">if</span> <span class="n">zlib_year_from</span> <span class="ow">and</span> <span class="n">zlib_year_from</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s2">"year_from"</span><span class="p">]:</span> + <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"invalid setting year_from: </span><span class="si">{</span><span class="n">zlib_year_from</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> + <span class="k">if</span> <span class="n">zlib_year_to</span> <span class="ow">and</span> <span class="n">zlib_year_to</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s2">"year_to"</span><span class="p">]:</span> + <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"invalid setting year_to: </span><span class="si">{</span><span class="n">zlib_year_to</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span></div> + + + +<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">params</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span> + <span class="n">lang</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s2">"language"</span><span class="p">],</span> <span class="n">traits</span><span class="o">.</span><span class="n">all_locale</span><span class="p">)</span> <span class="c1"># type: ignore</span> + <span class="n">search_url</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="p">(</span> + <span class="n">base_url</span> + <span class="o">+</span> <span class="s2">"/s/</span><span class="si">{search_query}</span><span class="s2">/?page=</span><span class="si">{pageno}</span><span class="s2">"</span> + <span class="o">+</span> <span class="s2">"&yearFrom=</span><span class="si">{zlib_year_from}</span><span class="s2">"</span> + <span class="o">+</span> <span class="s2">"&yearTo=</span><span class="si">{zlib_year_to}</span><span class="s2">"</span> + <span class="o">+</span> <span class="s2">"&languages[]=</span><span class="si">{lang}</span><span class="s2">"</span> + <span class="o">+</span> <span class="s2">"&extensions[]=</span><span class="si">{zlib_ext}</span><span class="s2">"</span> + <span class="p">)</span> + <span class="n">params</span><span class="p">[</span><span class="s2">"url"</span><span class="p">]</span> <span class="o">=</span> <span class="n">search_url</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> + <span class="n">search_query</span><span class="o">=</span><span class="n">quote</span><span class="p">(</span><span class="n">query</span><span class="p">),</span> + <span class="n">pageno</span><span class="o">=</span><span class="n">params</span><span class="p">[</span><span class="s2">"pageno"</span><span class="p">],</span> + <span class="n">lang</span><span class="o">=</span><span class="n">lang</span><span class="p">,</span> + <span class="n">zlib_year_from</span><span class="o">=</span><span class="n">zlib_year_from</span><span class="p">,</span> + <span class="n">zlib_year_to</span><span class="o">=</span><span class="n">zlib_year_to</span><span class="p">,</span> + <span class="n">zlib_ext</span><span class="o">=</span><span class="n">zlib_ext</span><span class="p">,</span> + <span class="p">)</span> + <span class="n">params</span><span class="p">[</span><span class="s2">"verify"</span><span class="p">]</span> <span class="o">=</span> <span class="kc">False</span> + <span class="k">return</span> <span class="n">params</span> + + +<span class="k">def</span> <span class="nf">domain_is_seized</span><span class="p">(</span><span class="n">dom</span><span class="p">):</span> + <span class="k">return</span> <span class="nb">bool</span><span class="p">(</span><span class="n">dom</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'//title'</span><span class="p">)</span> <span class="ow">and</span> <span class="s2">"seized"</span> <span class="ow">in</span> <span class="n">dom</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'//title'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">text</span><span class="o">.</span><span class="n">lower</span><span class="p">())</span> + + +<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">:</span> <span class="n">httpx</span><span class="o">.</span><span class="n">Response</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]:</span> + <span class="n">results</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> + + <span class="k">if</span> <span class="n">domain_is_seized</span><span class="p">(</span><span class="n">dom</span><span class="p">):</span> + <span class="k">raise</span> <span class="n">SearxException</span><span class="p">(</span><span class="sa">f</span><span class="s2">"zlibrary domain is seized: </span><span class="si">{</span><span class="n">base_url</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> + + <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">dom</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'//div[@id="searchResultBox"]//div[contains(@class, "resItemBox")]'</span><span class="p">):</span> + <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">_parse_result</span><span class="p">(</span><span class="n">item</span><span class="p">))</span> + + <span class="k">return</span> <span class="n">results</span> + + +<span class="k">def</span> <span class="nf">_text</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="n">selector</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">return</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="n">selector</span><span class="p">))</span> + + +<span class="n">i18n_language</span> <span class="o">=</span> <span class="n">gettext</span><span class="p">(</span><span class="s2">"Language"</span><span class="p">)</span> +<span class="n">i18n_book_rating</span> <span class="o">=</span> <span class="n">gettext</span><span class="p">(</span><span class="s2">"Book rating"</span><span class="p">)</span> +<span class="n">i18n_file_quality</span> <span class="o">=</span> <span class="n">gettext</span><span class="p">(</span><span class="s2">"File quality"</span><span class="p">)</span> + + +<span class="k">def</span> <span class="nf">_parse_result</span><span class="p">(</span><span class="n">item</span><span class="p">)</span> <span class="o">-></span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span> + + <span class="n">author_elements</span> <span class="o">=</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'.//div[@class="authors"]//a[@itemprop="author"]'</span><span class="p">)</span> + + <span class="n">result</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"template"</span><span class="p">:</span> <span class="s2">"paper.html"</span><span class="p">,</span> + <span class="s2">"url"</span><span class="p">:</span> <span class="n">base_url</span> <span class="o">+</span> <span class="n">item</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'(.//a[starts-with(@href, "/book/")])[1]/@href'</span><span class="p">)[</span><span class="mi">0</span><span class="p">],</span> + <span class="s2">"title"</span><span class="p">:</span> <span class="n">_text</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'.//*[@itemprop="name"]'</span><span class="p">),</span> + <span class="s2">"authors"</span><span class="p">:</span> <span class="p">[</span><span class="n">extract_text</span><span class="p">(</span><span class="n">author</span><span class="p">)</span> <span class="k">for</span> <span class="n">author</span> <span class="ow">in</span> <span class="n">author_elements</span><span class="p">],</span> + <span class="s2">"publisher"</span><span class="p">:</span> <span class="n">_text</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'.//a[@title="Publisher"]'</span><span class="p">),</span> + <span class="s2">"type"</span><span class="p">:</span> <span class="n">_text</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'.//div[contains(@class, "property__file")]//div[contains(@class, "property_value")]'</span><span class="p">),</span> + <span class="p">}</span> + + <span class="n">thumbnail</span> <span class="o">=</span> <span class="n">_text</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'.//img[contains(@class, "cover")]/@data-src'</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">thumbnail</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'/'</span><span class="p">):</span> + <span class="n">result</span><span class="p">[</span><span class="s2">"thumbnail"</span><span class="p">]</span> <span class="o">=</span> <span class="n">thumbnail</span> + + <span class="n">year</span> <span class="o">=</span> <span class="n">_text</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'.//div[contains(@class, "property_year")]//div[contains(@class, "property_value")]'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">year</span><span class="p">:</span> + <span class="n">result</span><span class="p">[</span><span class="s2">"publishedDate"</span><span class="p">]</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">strptime</span><span class="p">(</span><span class="n">year</span><span class="p">,</span> <span class="s1">'%Y'</span><span class="p">)</span> + + <span class="n">content</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">language</span> <span class="o">=</span> <span class="n">_text</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'.//div[contains(@class, "property_language")]//div[contains(@class, "property_value")]'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">language</span><span class="p">:</span> + <span class="n">content</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">i18n_language</span><span class="si">}</span><span class="s2">: </span><span class="si">{</span><span class="n">language</span><span class="o">.</span><span class="n">capitalize</span><span class="p">()</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> + <span class="n">book_rating</span> <span class="o">=</span> <span class="n">_text</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'.//span[contains(@class, "book-rating-interest-score")]'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">book_rating</span> <span class="ow">and</span> <span class="nb">float</span><span class="p">(</span><span class="n">book_rating</span><span class="p">):</span> + <span class="n">content</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">i18n_book_rating</span><span class="si">}</span><span class="s2">: </span><span class="si">{</span><span class="n">book_rating</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> + <span class="n">file_quality</span> <span class="o">=</span> <span class="n">_text</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'.//span[contains(@class, "book-rating-quality-score")]'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">file_quality</span> <span class="ow">and</span> <span class="nb">float</span><span class="p">(</span><span class="n">file_quality</span><span class="p">):</span> + <span class="n">content</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">i18n_file_quality</span><span class="si">}</span><span class="s2">: </span><span class="si">{</span><span class="n">file_quality</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> + <span class="n">result</span><span class="p">[</span><span class="s2">"content"</span><span class="p">]</span> <span class="o">=</span> <span class="s2">" | "</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">content</span><span class="p">)</span> + + <span class="k">return</span> <span class="n">result</span> + + +<div class="viewcode-block" id="fetch_traits"> +<a class="viewcode-back" href="../../../dev/engines/online/zlibrary.html#searx.engines.zlibrary.fetch_traits">[docs]</a> +<span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Fetch languages and other search arguments from zlibrary's search form."""</span> + <span class="c1"># pylint: disable=import-outside-toplevel, too-many-branches</span> + + <span class="kn">import</span> <span class="nn">babel</span> + <span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">get</span> <span class="c1"># see https://github.com/searxng/searxng/issues/762</span> + <span class="kn">from</span> <span class="nn">searx.locales</span> <span class="kn">import</span> <span class="n">language_tag</span> + + <span class="k">def</span> <span class="nf">_use_old_values</span><span class="p">():</span> + <span class="c1"># don't change anything, re-use the existing values</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">all_locale</span> <span class="o">=</span> <span class="n">ENGINE_TRAITS</span><span class="p">[</span><span class="s2">"z-library"</span><span class="p">][</span><span class="s2">"all_locale"</span><span class="p">]</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span> <span class="o">=</span> <span class="n">ENGINE_TRAITS</span><span class="p">[</span><span class="s2">"z-library"</span><span class="p">][</span><span class="s2">"custom"</span><span class="p">]</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span> <span class="o">=</span> <span class="n">ENGINE_TRAITS</span><span class="p">[</span><span class="s2">"z-library"</span><span class="p">][</span><span class="s2">"languages"</span><span class="p">]</span> + + <span class="k">try</span><span class="p">:</span> + <span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="n">base_url</span><span class="p">,</span> <span class="n">verify</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> + <span class="k">except</span> <span class="n">SearxException</span> <span class="k">as</span> <span class="n">exc</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"ERROR: zlibrary domain '</span><span class="si">{</span><span class="n">base_url</span><span class="si">}</span><span class="s2">' is seized?"</span><span class="p">)</span> + <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">" --> </span><span class="si">{</span><span class="n">exc</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> + <span class="n">_use_old_values</span><span class="p">()</span> + <span class="k">return</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> <span class="c1"># type: ignore</span> + <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s2">"Response from zlibrary's search page is not OK."</span><span class="p">)</span> + <span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> <span class="c1"># type: ignore</span> + + <span class="k">if</span> <span class="n">domain_is_seized</span><span class="p">(</span><span class="n">dom</span><span class="p">):</span> + <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"ERROR: zlibrary domain is seized: </span><span class="si">{</span><span class="n">base_url</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> + <span class="n">_use_old_values</span><span class="p">()</span> + <span class="k">return</span> + + <span class="n">engine_traits</span><span class="o">.</span><span class="n">all_locale</span> <span class="o">=</span> <span class="s2">""</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s2">"ext"</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s2">"year_from"</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s2">"year_to"</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="k">for</span> <span class="n">year</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s2">"//div[@id='advSearch-noJS']//select[@id='sf_yearFrom']/option"</span><span class="p">):</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s2">"year_from"</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">year</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"value"</span><span class="p">))</span> + + <span class="k">for</span> <span class="n">year</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s2">"//div[@id='advSearch-noJS']//select[@id='sf_yearTo']/option"</span><span class="p">):</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s2">"year_to"</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">year</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"value"</span><span class="p">))</span> + + <span class="k">for</span> <span class="n">ext</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s2">"//div[@id='advSearch-noJS']//select[@id='sf_extensions']/option"</span><span class="p">):</span> + <span class="n">value</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">ext</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"value"</span><span class="p">)</span> + <span class="k">if</span> <span class="n">value</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">value</span> <span class="o">=</span> <span class="s2">""</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s2">"ext"</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> + + <span class="c1"># Handle languages</span> + <span class="c1"># Z-library uses English names for languages, so we need to map them to their respective locales</span> + <span class="n">language_name_locale_map</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> + <span class="k">for</span> <span class="n">locale</span> <span class="ow">in</span> <span class="n">babel</span><span class="o">.</span><span class="n">core</span><span class="o">.</span><span class="n">localedata</span><span class="o">.</span><span class="n">locale_identifiers</span><span class="p">():</span> <span class="c1"># type: ignore</span> + <span class="c1"># Create a Locale object for the current locale</span> + <span class="n">loc</span> <span class="o">=</span> <span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">locale</span><span class="p">)</span> + <span class="k">if</span> <span class="n">loc</span><span class="o">.</span><span class="n">english_name</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">continue</span> + <span class="n">language_name_locale_map</span><span class="p">[</span><span class="n">loc</span><span class="o">.</span><span class="n">english_name</span><span class="o">.</span><span class="n">lower</span><span class="p">()]</span> <span class="o">=</span> <span class="n">loc</span> <span class="c1"># type: ignore</span> + + <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s2">"//div[@id='advSearch-noJS']//select[@id='sf_languages']/option"</span><span class="p">):</span> + <span class="n">eng_lang</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"value"</span><span class="p">)</span> + <span class="k">if</span> <span class="n">eng_lang</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">continue</span> + <span class="k">try</span><span class="p">:</span> + <span class="n">locale</span> <span class="o">=</span> <span class="n">language_name_locale_map</span><span class="p">[</span><span class="n">eng_lang</span><span class="o">.</span><span class="n">lower</span><span class="p">()]</span> + <span class="k">except</span> <span class="ne">KeyError</span><span class="p">:</span> + <span class="c1"># silently ignore unknown languages</span> + <span class="c1"># print("ERROR: %s is unknown by babel" % (eng_lang))</span> + <span class="k">continue</span> + <span class="n">sxng_lang</span> <span class="o">=</span> <span class="n">language_tag</span><span class="p">(</span><span class="n">locale</span><span class="p">)</span> + <span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_lang</span><span class="p">)</span> + <span class="k">if</span> <span class="n">conflict</span><span class="p">:</span> + <span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">eng_lang</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_lang</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">eng_lang</span><span class="p">))</span> + <span class="k">continue</span> + <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="n">sxng_lang</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_lang</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../engines.html">searx.engines</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/exceptions.html b/_modules/searx/exceptions.html new file mode 100644 index 000000000..9af3468d4 --- /dev/null +++ b/_modules/searx/exceptions.html @@ -0,0 +1,255 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.exceptions — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../_static/searxng.css?v=52e4ff28" /> + <script src="../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../genindex.html" /> + <link rel="search" title="Search" href="../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../index.html" accesskey="U">Module code</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.exceptions</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.exceptions</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""Exception types raised by SearXNG modules.</span> +<span class="sd">"""</span> +<span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">annotations</span> + +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Optional</span><span class="p">,</span> <span class="n">Union</span> + + +<div class="viewcode-block" id="SearxException"> +<a class="viewcode-back" href="../../src/searx.exceptions.html#searx.exceptions.SearxException">[docs]</a> +<span class="k">class</span> <span class="nc">SearxException</span><span class="p">(</span><span class="ne">Exception</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Base SearXNG exception."""</span></div> + + + +<div class="viewcode-block" id="SearxParameterException"> +<a class="viewcode-back" href="../../src/searx.exceptions.html#searx.exceptions.SearxParameterException">[docs]</a> +<span class="k">class</span> <span class="nc">SearxParameterException</span><span class="p">(</span><span class="n">SearxException</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Raised when query miss a required parameter"""</span> + + <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">,</span> <span class="n">value</span><span class="p">):</span> + <span class="k">if</span> <span class="n">value</span> <span class="o">==</span> <span class="s1">''</span> <span class="ow">or</span> <span class="n">value</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">message</span> <span class="o">=</span> <span class="s1">'Empty '</span> <span class="o">+</span> <span class="n">name</span> <span class="o">+</span> <span class="s1">' parameter'</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">message</span> <span class="o">=</span> <span class="s1">'Invalid value "'</span> <span class="o">+</span> <span class="n">value</span> <span class="o">+</span> <span class="s1">'" for parameter '</span> <span class="o">+</span> <span class="n">name</span> + <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">message</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">message</span> <span class="o">=</span> <span class="n">message</span> + <span class="bp">self</span><span class="o">.</span><span class="n">parameter_name</span> <span class="o">=</span> <span class="n">name</span> + <span class="bp">self</span><span class="o">.</span><span class="n">parameter_value</span> <span class="o">=</span> <span class="n">value</span></div> + + + +<div class="viewcode-block" id="SearxSettingsException"> +<a class="viewcode-back" href="../../src/searx.exceptions.html#searx.exceptions.SearxSettingsException">[docs]</a> +<span class="k">class</span> <span class="nc">SearxSettingsException</span><span class="p">(</span><span class="n">SearxException</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Error while loading the settings"""</span> + + <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">message</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="ne">Exception</span><span class="p">],</span> <span class="n">filename</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]):</span> + <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">message</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">message</span> <span class="o">=</span> <span class="n">message</span> + <span class="bp">self</span><span class="o">.</span><span class="n">filename</span> <span class="o">=</span> <span class="n">filename</span></div> + + + +<div class="viewcode-block" id="SearxEngineException"> +<a class="viewcode-back" href="../../src/searx.exceptions.html#searx.exceptions.SearxEngineException">[docs]</a> +<span class="k">class</span> <span class="nc">SearxEngineException</span><span class="p">(</span><span class="n">SearxException</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Error inside an engine"""</span></div> + + + +<div class="viewcode-block" id="SearxXPathSyntaxException"> +<a class="viewcode-back" href="../../src/searx.exceptions.html#searx.exceptions.SearxXPathSyntaxException">[docs]</a> +<span class="k">class</span> <span class="nc">SearxXPathSyntaxException</span><span class="p">(</span><span class="n">SearxEngineException</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Syntax error in a XPATH"""</span> + + <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">xpath_spec</span><span class="p">,</span> <span class="n">message</span><span class="p">):</span> + <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">xpath_spec</span><span class="p">)</span> <span class="o">+</span> <span class="s2">" "</span> <span class="o">+</span> <span class="n">message</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">message</span> <span class="o">=</span> <span class="n">message</span> + <span class="c1"># str(xpath_spec) to deal with str and XPath instance</span> + <span class="bp">self</span><span class="o">.</span><span class="n">xpath_str</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">xpath_spec</span><span class="p">)</span></div> + + + +<div class="viewcode-block" id="SearxEngineResponseException"> +<a class="viewcode-back" href="../../src/searx.exceptions.html#searx.exceptions.SearxEngineResponseException">[docs]</a> +<span class="k">class</span> <span class="nc">SearxEngineResponseException</span><span class="p">(</span><span class="n">SearxEngineException</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Impossible to parse the result of an engine"""</span></div> + + + +<div class="viewcode-block" id="SearxEngineAPIException"> +<a class="viewcode-back" href="../../src/searx.exceptions.html#searx.exceptions.SearxEngineAPIException">[docs]</a> +<span class="k">class</span> <span class="nc">SearxEngineAPIException</span><span class="p">(</span><span class="n">SearxEngineResponseException</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""The website has returned an application error"""</span></div> + + + +<div class="viewcode-block" id="SearxEngineAccessDeniedException"> +<a class="viewcode-back" href="../../src/searx.exceptions.html#searx.exceptions.SearxEngineAccessDeniedException">[docs]</a> +<span class="k">class</span> <span class="nc">SearxEngineAccessDeniedException</span><span class="p">(</span><span class="n">SearxEngineResponseException</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""The website is blocking the access"""</span> + + <span class="n">SUSPEND_TIME_SETTING</span> <span class="o">=</span> <span class="s2">"search.suspended_times.SearxEngineAccessDenied"</span> +<span class="w"> </span><span class="sd">"""This settings contains the default suspended time (default 86400 sec / 1</span> +<span class="sd"> day)."""</span> + + <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">suspended_time</span><span class="p">:</span> <span class="nb">int</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">message</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">'Access denied'</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Generic exception to raise when an engine denies access to the results.</span> + +<span class="sd"> :param suspended_time: How long the engine is going to be suspended in</span> +<span class="sd"> second. Defaults to None.</span> +<span class="sd"> :type suspended_time: int, None</span> +<span class="sd"> :param message: Internal message. Defaults to ``Access denied``</span> +<span class="sd"> :type message: str</span> +<span class="sd"> """</span> + <span class="k">if</span> <span class="n">suspended_time</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">suspended_time</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_default_suspended_time</span><span class="p">()</span> + <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">message</span> <span class="o">+</span> <span class="s1">', suspended_time='</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">suspended_time</span><span class="p">))</span> + <span class="bp">self</span><span class="o">.</span><span class="n">suspended_time</span> <span class="o">=</span> <span class="n">suspended_time</span> + <span class="bp">self</span><span class="o">.</span><span class="n">message</span> <span class="o">=</span> <span class="n">message</span> + + <span class="k">def</span> <span class="nf">_get_default_suspended_time</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> + <span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">get_setting</span> <span class="c1"># pylint: disable=C0415</span> + + <span class="k">return</span> <span class="n">get_setting</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">SUSPEND_TIME_SETTING</span><span class="p">)</span></div> + + + +<div class="viewcode-block" id="SearxEngineCaptchaException"> +<a class="viewcode-back" href="../../src/searx.exceptions.html#searx.exceptions.SearxEngineCaptchaException">[docs]</a> +<span class="k">class</span> <span class="nc">SearxEngineCaptchaException</span><span class="p">(</span><span class="n">SearxEngineAccessDeniedException</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""The website has returned a CAPTCHA."""</span> + + <span class="n">SUSPEND_TIME_SETTING</span> <span class="o">=</span> <span class="s2">"search.suspended_times.SearxEngineCaptcha"</span> +<span class="w"> </span><span class="sd">"""This settings contains the default suspended time (default 86400 sec / 1</span> +<span class="sd"> day)."""</span> + + <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">suspended_time</span><span class="p">:</span> <span class="nb">int</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">message</span><span class="o">=</span><span class="s1">'CAPTCHA'</span><span class="p">):</span> + <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">message</span><span class="o">=</span><span class="n">message</span><span class="p">,</span> <span class="n">suspended_time</span><span class="o">=</span><span class="n">suspended_time</span><span class="p">)</span></div> + + + +<div class="viewcode-block" id="SearxEngineTooManyRequestsException"> +<a class="viewcode-back" href="../../src/searx.exceptions.html#searx.exceptions.SearxEngineTooManyRequestsException">[docs]</a> +<span class="k">class</span> <span class="nc">SearxEngineTooManyRequestsException</span><span class="p">(</span><span class="n">SearxEngineAccessDeniedException</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""The website has returned a Too Many Request status code</span> + +<span class="sd"> By default, searx stops sending requests to this engine for 1 hour.</span> +<span class="sd"> """</span> + + <span class="n">SUSPEND_TIME_SETTING</span> <span class="o">=</span> <span class="s2">"search.suspended_times.SearxEngineTooManyRequests"</span> +<span class="w"> </span><span class="sd">"""This settings contains the default suspended time (default 3660 sec / 1</span> +<span class="sd"> hour)."""</span> + + <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">suspended_time</span><span class="p">:</span> <span class="nb">int</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">message</span><span class="o">=</span><span class="s1">'Too many request'</span><span class="p">):</span> + <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">message</span><span class="o">=</span><span class="n">message</span><span class="p">,</span> <span class="n">suspended_time</span><span class="o">=</span><span class="n">suspended_time</span><span class="p">)</span></div> + + + +<div class="viewcode-block" id="SearxEngineXPathException"> +<a class="viewcode-back" href="../../src/searx.exceptions.html#searx.exceptions.SearxEngineXPathException">[docs]</a> +<span class="k">class</span> <span class="nc">SearxEngineXPathException</span><span class="p">(</span><span class="n">SearxEngineResponseException</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Error while getting the result of an XPath expression"""</span> + + <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">xpath_spec</span><span class="p">,</span> <span class="n">message</span><span class="p">):</span> + <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">xpath_spec</span><span class="p">)</span> <span class="o">+</span> <span class="s2">" "</span> <span class="o">+</span> <span class="n">message</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">message</span> <span class="o">=</span> <span class="n">message</span> + <span class="c1"># str(xpath_spec) to deal with str and XPath instance</span> + <span class="bp">self</span><span class="o">.</span><span class="n">xpath_str</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">xpath_spec</span><span class="p">)</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../index.html"> + <img class="logo" src="../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../index.html">Overview</a> + <ul> + <li><a href="../index.html">Module code</a> + + + </ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/favicons/cache.html b/_modules/searx/favicons/cache.html new file mode 100644 index 000000000..d6cb25fbe --- /dev/null +++ b/_modules/searx/favicons/cache.html @@ -0,0 +1,643 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.favicons.cache — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" accesskey="U">Module code</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.favicons.cache</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.favicons.cache</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""Implementations for caching favicons.</span> + +<span class="sd">:py:obj:`FaviconCacheConfig`:</span> +<span class="sd"> Configuration of the favicon cache</span> + +<span class="sd">:py:obj:`FaviconCache`:</span> +<span class="sd"> Abstract base class for the implementation of a favicon cache.</span> + +<span class="sd">:py:obj:`FaviconCacheSQLite`:</span> +<span class="sd"> Favicon cache that manages the favicon BLOBs in a SQLite DB.</span> + +<span class="sd">:py:obj:`FaviconCacheNull`:</span> +<span class="sd"> Fallback solution if the configured cache cannot be used for system reasons.</span> + +<span class="sd">----</span> + +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">annotations</span> +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Literal</span> + +<span class="kn">import</span> <span class="nn">os</span> +<span class="kn">import</span> <span class="nn">abc</span> +<span class="kn">import</span> <span class="nn">dataclasses</span> +<span class="kn">import</span> <span class="nn">hashlib</span> +<span class="kn">import</span> <span class="nn">logging</span> +<span class="kn">import</span> <span class="nn">sqlite3</span> +<span class="kn">import</span> <span class="nn">tempfile</span> +<span class="kn">import</span> <span class="nn">time</span> +<span class="kn">import</span> <span class="nn">typer</span> + +<span class="kn">import</span> <span class="nn">msgspec</span> + +<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">sqlitedb</span> +<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">logger</span> +<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">humanize_bytes</span><span class="p">,</span> <span class="n">humanize_number</span> + +<span class="n">CACHE</span><span class="p">:</span> <span class="s2">"FaviconCache"</span> +<span class="n">FALLBACK_ICON</span> <span class="o">=</span> <span class="sa">b</span><span class="s2">"FALLBACK_ICON"</span> + +<span class="n">logger</span> <span class="o">=</span> <span class="n">logger</span><span class="o">.</span><span class="n">getChild</span><span class="p">(</span><span class="s1">'favicons.cache'</span><span class="p">)</span> +<span class="n">app</span> <span class="o">=</span> <span class="n">typer</span><span class="o">.</span><span class="n">Typer</span><span class="p">()</span> + + +<div class="viewcode-block" id="state"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.cache.state">[docs]</a> +<span class="nd">@app</span><span class="o">.</span><span class="n">command</span><span class="p">()</span> +<span class="k">def</span> <span class="nf">state</span><span class="p">():</span> +<span class="w"> </span><span class="sd">"""show state of the cache"""</span> + <span class="nb">print</span><span class="p">(</span><span class="n">CACHE</span><span class="o">.</span><span class="n">state</span><span class="p">()</span><span class="o">.</span><span class="n">report</span><span class="p">())</span></div> + + + +<div class="viewcode-block" id="maintenance"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.cache.maintenance">[docs]</a> +<span class="nd">@app</span><span class="o">.</span><span class="n">command</span><span class="p">()</span> +<span class="k">def</span> <span class="nf">maintenance</span><span class="p">(</span><span class="n">force</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> <span class="n">debug</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""perform maintenance of the cache"""</span> + <span class="n">root_log</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">()</span> + <span class="k">if</span> <span class="n">debug</span><span class="p">:</span> + <span class="n">root_log</span><span class="o">.</span><span class="n">setLevel</span><span class="p">(</span><span class="n">logging</span><span class="o">.</span><span class="n">DEBUG</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">root_log</span><span class="o">.</span><span class="n">handlers</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">handler</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">StreamHandler</span><span class="p">()</span> + <span class="n">handler</span><span class="o">.</span><span class="n">setFormatter</span><span class="p">(</span><span class="n">logging</span><span class="o">.</span><span class="n">Formatter</span><span class="p">(</span><span class="s2">"</span><span class="si">%(message)s</span><span class="s2">"</span><span class="p">))</span> + <span class="n">logger</span><span class="o">.</span><span class="n">addHandler</span><span class="p">(</span><span class="n">handler</span><span class="p">)</span> + <span class="n">logger</span><span class="o">.</span><span class="n">setLevel</span><span class="p">(</span><span class="n">logging</span><span class="o">.</span><span class="n">DEBUG</span><span class="p">)</span> + + <span class="n">state_t0</span> <span class="o">=</span> <span class="n">CACHE</span><span class="o">.</span><span class="n">state</span><span class="p">()</span> + <span class="n">CACHE</span><span class="o">.</span><span class="n">maintenance</span><span class="p">(</span><span class="n">force</span><span class="o">=</span><span class="n">force</span><span class="p">)</span> + <span class="n">state_t1</span> <span class="o">=</span> <span class="n">CACHE</span><span class="o">.</span><span class="n">state</span><span class="p">()</span> + <span class="n">state_delta</span> <span class="o">=</span> <span class="n">state_t0</span> <span class="o">-</span> <span class="n">state_t1</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"The cache has been reduced by:"</span><span class="p">)</span> + <span class="nb">print</span><span class="p">(</span><span class="n">state_delta</span><span class="o">.</span><span class="n">report</span><span class="p">(</span><span class="s2">"</span><span class="se">\n</span><span class="s2">- </span><span class="si">{descr}</span><span class="s2">: </span><span class="si">{val}</span><span class="s2">"</span><span class="p">)</span><span class="o">.</span><span class="n">lstrip</span><span class="p">(</span><span class="s2">"</span><span class="se">\n</span><span class="s2">"</span><span class="p">))</span></div> + + + +<div class="viewcode-block" id="init"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.cache.init">[docs]</a> +<span class="k">def</span> <span class="nf">init</span><span class="p">(</span><span class="n">cfg</span><span class="p">:</span> <span class="s2">"FaviconCacheConfig"</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Initialization of a global ``CACHE``"""</span> + + <span class="k">global</span> <span class="n">CACHE</span> <span class="c1"># pylint: disable=global-statement</span> + <span class="k">if</span> <span class="n">cfg</span><span class="o">.</span><span class="n">db_type</span> <span class="o">==</span> <span class="s2">"sqlite"</span><span class="p">:</span> + <span class="k">if</span> <span class="n">sqlite3</span><span class="o">.</span><span class="n">sqlite_version_info</span> <span class="o"><=</span> <span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">35</span><span class="p">):</span> + <span class="n">logger</span><span class="o">.</span><span class="n">critical</span><span class="p">(</span> + <span class="s2">"Disable favicon caching completely: SQLite library (</span><span class="si">%s</span><span class="s2">) is too old! (require >= 3.35)"</span><span class="p">,</span> + <span class="n">sqlite3</span><span class="o">.</span><span class="n">sqlite_version</span><span class="p">,</span> + <span class="p">)</span> + <span class="n">CACHE</span> <span class="o">=</span> <span class="n">FaviconCacheNull</span><span class="p">(</span><span class="n">cfg</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">CACHE</span> <span class="o">=</span> <span class="n">FaviconCacheSQLite</span><span class="p">(</span><span class="n">cfg</span><span class="p">)</span> + <span class="k">elif</span> <span class="n">cfg</span><span class="o">.</span><span class="n">db_type</span> <span class="o">==</span> <span class="s2">"mem"</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s2">"Favicons are cached in memory, don't use this in production!"</span><span class="p">)</span> + <span class="n">CACHE</span> <span class="o">=</span> <span class="n">FaviconCacheMEM</span><span class="p">(</span><span class="n">cfg</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"favicons db_type '</span><span class="si">{</span><span class="n">cfg</span><span class="o">.</span><span class="n">db_type</span><span class="si">}</span><span class="s2">' is unknown"</span><span class="p">)</span></div> + + + +<div class="viewcode-block" id="FaviconCacheConfig"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.cache.FaviconCacheConfig">[docs]</a> +<span class="k">class</span> <span class="nc">FaviconCacheConfig</span><span class="p">(</span><span class="n">msgspec</span><span class="o">.</span><span class="n">Struct</span><span class="p">):</span> <span class="c1"># pylint: disable=too-few-public-methods</span> +<span class="w"> </span><span class="sd">"""Configuration of the favicon cache."""</span> + + <span class="n">db_type</span><span class="p">:</span> <span class="n">Literal</span><span class="p">[</span><span class="s2">"sqlite"</span><span class="p">,</span> <span class="s2">"mem"</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"sqlite"</span> +<span class="w"> </span><span class="sd">"""Type of the database:</span> + +<span class="sd"> ``sqlite``:</span> +<span class="sd"> :py:obj:`.cache.FaviconCacheSQLite`</span> + +<span class="sd"> ``mem``:</span> +<span class="sd"> :py:obj:`.cache.FaviconCacheMEM` (not recommended)</span> +<span class="sd"> """</span> + + <span class="n">db_url</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">tempfile</span><span class="o">.</span><span class="n">gettempdir</span><span class="p">()</span> <span class="o">+</span> <span class="n">os</span><span class="o">.</span><span class="n">sep</span> <span class="o">+</span> <span class="s2">"faviconcache.db"</span> +<span class="w"> </span><span class="sd">"""URL of the SQLite DB, the path to the database file."""</span> + + <span class="n">HOLD_TIME</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">60</span> <span class="o">*</span> <span class="mi">60</span> <span class="o">*</span> <span class="mi">24</span> <span class="o">*</span> <span class="mi">30</span> <span class="c1"># 30 days</span> +<span class="w"> </span><span class="sd">"""Hold time (default in sec.), after which a BLOB is removed from the cache."""</span> + + <span class="n">LIMIT_TOTAL_BYTES</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1024</span> <span class="o">*</span> <span class="mi">1024</span> <span class="o">*</span> <span class="mi">50</span> <span class="c1"># 50 MB</span> +<span class="w"> </span><span class="sd">"""Maximum of bytes (default) stored in the cache of all blobs. Note: The</span> +<span class="sd"> limit is only reached at each maintenance interval after which the oldest</span> +<span class="sd"> BLOBs are deleted; the limit is exceeded during the maintenance period. If</span> +<span class="sd"> the maintenance period is *too long* or maintenance is switched off</span> +<span class="sd"> completely, the cache grows uncontrollably."""</span> + + <span class="n">BLOB_MAX_BYTES</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1024</span> <span class="o">*</span> <span class="mi">20</span> <span class="c1"># 20 KB</span> +<span class="w"> </span><span class="sd">"""The maximum BLOB size in bytes that a favicon may have so that it can be</span> +<span class="sd"> saved in the cache. If the favicon is larger, it is not saved in the cache</span> +<span class="sd"> and must be requested by the client via the proxy."""</span> + + <span class="n">MAINTENANCE_PERIOD</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">60</span> <span class="o">*</span> <span class="mi">60</span> +<span class="w"> </span><span class="sd">"""Maintenance period in seconds / when :py:obj:`MAINTENANCE_MODE` is set to</span> +<span class="sd"> ``auto``."""</span> + + <span class="n">MAINTENANCE_MODE</span><span class="p">:</span> <span class="n">Literal</span><span class="p">[</span><span class="s2">"auto"</span><span class="p">,</span> <span class="s2">"off"</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"auto"</span> +<span class="w"> </span><span class="sd">"""Type of maintenance mode</span> + +<span class="sd"> ``auto``:</span> +<span class="sd"> Maintenance is carried out automatically as part of the maintenance</span> +<span class="sd"> intervals (:py:obj:`MAINTENANCE_PERIOD`); no external process is required.</span> + +<span class="sd"> ``off``:</span> +<span class="sd"> Maintenance is switched off and must be carried out by an external process</span> +<span class="sd"> if required.</span> +<span class="sd"> """</span></div> + + + +<div class="viewcode-block" id="FaviconCacheStats"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.cache.FaviconCacheStats">[docs]</a> +<span class="nd">@dataclasses</span><span class="o">.</span><span class="n">dataclass</span> +<span class="k">class</span> <span class="nc">FaviconCacheStats</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Dataclass wich provides information on the status of the cache."""</span> + + <span class="n">favicons</span><span class="p">:</span> <span class="nb">int</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span> + <span class="nb">bytes</span><span class="p">:</span> <span class="nb">int</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span> + <span class="n">domains</span><span class="p">:</span> <span class="nb">int</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span> + <span class="n">resolvers</span><span class="p">:</span> <span class="nb">int</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span> + + <span class="n">field_descr</span> <span class="o">=</span> <span class="p">(</span> + <span class="p">(</span><span class="s2">"favicons"</span><span class="p">,</span> <span class="s2">"number of favicons in cache"</span><span class="p">,</span> <span class="n">humanize_number</span><span class="p">),</span> + <span class="p">(</span><span class="s2">"bytes"</span><span class="p">,</span> <span class="s2">"total size (approx. bytes) of cache"</span><span class="p">,</span> <span class="n">humanize_bytes</span><span class="p">),</span> + <span class="p">(</span><span class="s2">"domains"</span><span class="p">,</span> <span class="s2">"total number of domains in cache"</span><span class="p">,</span> <span class="n">humanize_number</span><span class="p">),</span> + <span class="p">(</span><span class="s2">"resolvers"</span><span class="p">,</span> <span class="s2">"number of resolvers"</span><span class="p">,</span> <span class="nb">str</span><span class="p">),</span> + <span class="p">)</span> + + <span class="k">def</span> <span class="fm">__sub__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span> <span class="o">-></span> <span class="n">FaviconCacheStats</span><span class="p">:</span> + <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="p">):</span> + <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"unsupported operand type(s) for +: '</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="si">}</span><span class="s2">' and '</span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">other</span><span class="p">)</span><span class="si">}</span><span class="s2">'"</span><span class="p">)</span> + <span class="n">kwargs</span> <span class="o">=</span> <span class="p">{}</span> + <span class="k">for</span> <span class="n">field</span><span class="p">,</span> <span class="n">_</span><span class="p">,</span> <span class="n">_</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">field_descr</span><span class="p">:</span> + <span class="n">self_val</span><span class="p">,</span> <span class="n">other_val</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">),</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">field</span><span class="p">)</span> + <span class="k">if</span> <span class="kc">None</span> <span class="ow">in</span> <span class="p">(</span><span class="n">self_val</span><span class="p">,</span> <span class="n">other_val</span><span class="p">):</span> + <span class="k">continue</span> + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">self_val</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span> + <span class="n">kwargs</span><span class="p">[</span><span class="n">field</span><span class="p">]</span> <span class="o">=</span> <span class="n">self_val</span> <span class="o">-</span> <span class="n">other_val</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">kwargs</span><span class="p">[</span><span class="n">field</span><span class="p">]</span> <span class="o">=</span> <span class="n">self_val</span> + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> + + <span class="k">def</span> <span class="nf">report</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">fmt</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"</span><span class="si">{descr}</span><span class="s2">: </span><span class="si">{val}</span><span class="se">\n</span><span class="s2">"</span><span class="p">):</span> + <span class="n">s</span> <span class="o">=</span> <span class="p">[]</span> + <span class="k">for</span> <span class="n">field</span><span class="p">,</span> <span class="n">descr</span><span class="p">,</span> <span class="n">cast</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">field_descr</span><span class="p">:</span> + <span class="n">val</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">)</span> + <span class="k">if</span> <span class="n">val</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">val</span> <span class="o">=</span> <span class="s2">"--"</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">val</span> <span class="o">=</span> <span class="n">cast</span><span class="p">(</span><span class="n">val</span><span class="p">)</span> + <span class="n">s</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">fmt</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">descr</span><span class="o">=</span><span class="n">descr</span><span class="p">,</span> <span class="n">val</span><span class="o">=</span><span class="n">val</span><span class="p">))</span> + <span class="k">return</span> <span class="s2">""</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">s</span><span class="p">)</span></div> + + + +<div class="viewcode-block" id="FaviconCache"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.cache.FaviconCache">[docs]</a> +<span class="k">class</span> <span class="nc">FaviconCache</span><span class="p">(</span><span class="n">abc</span><span class="o">.</span><span class="n">ABC</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Abstract base class for the implementation of a favicon cache."""</span> + + <span class="nd">@abc</span><span class="o">.</span><span class="n">abstractmethod</span> + <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">cfg</span><span class="p">:</span> <span class="n">FaviconCacheConfig</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""An instance of the favicon cache is build up from the configuration."""</span> + + <span class="nd">@abc</span><span class="o">.</span><span class="n">abstractmethod</span> + <span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">resolver</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">authority</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span> <span class="o">|</span> <span class="nb">tuple</span><span class="p">[</span><span class="kc">None</span> <span class="o">|</span> <span class="nb">bytes</span><span class="p">,</span> <span class="kc">None</span> <span class="o">|</span> <span class="nb">str</span><span class="p">]:</span> +<span class="w"> </span><span class="sd">"""Returns ``None`` or the tuple of ``(data, mime)`` that has been</span> +<span class="sd"> registered in the cache. The ``None`` indicates that there was no entry</span> +<span class="sd"> in the cache."""</span> + +<div class="viewcode-block" id="FaviconCache.set"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.cache.FaviconCache.set">[docs]</a> + <span class="nd">@abc</span><span class="o">.</span><span class="n">abstractmethod</span> + <span class="k">def</span> <span class="nf">set</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">resolver</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">authority</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">mime</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="nb">bytes</span> <span class="o">|</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Set data and mime-type in the cache. If data is None, the</span> +<span class="sd"> :py:obj:`FALLBACK_ICON` is registered. in the cache."""</span></div> + + +<div class="viewcode-block" id="FaviconCache.state"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.cache.FaviconCache.state">[docs]</a> + <span class="nd">@abc</span><span class="o">.</span><span class="n">abstractmethod</span> + <span class="k">def</span> <span class="nf">state</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">FaviconCacheStats</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Returns a :py:obj:`FaviconCacheStats` (key/values) with information</span> +<span class="sd"> on the state of the cache."""</span></div> + + +<div class="viewcode-block" id="FaviconCache.maintenance"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.cache.FaviconCache.maintenance">[docs]</a> + <span class="nd">@abc</span><span class="o">.</span><span class="n">abstractmethod</span> + <span class="k">def</span> <span class="nf">maintenance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">force</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Performs maintenance on the cache"""</span></div> +</div> + + + +<div class="viewcode-block" id="FaviconCacheNull"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.cache.FaviconCacheNull">[docs]</a> +<span class="k">class</span> <span class="nc">FaviconCacheNull</span><span class="p">(</span><span class="n">FaviconCache</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""A dummy favicon cache that caches nothing / a fallback solution. The</span> +<span class="sd"> NullCache is used when more efficient caches such as the</span> +<span class="sd"> :py:obj:`FaviconCacheSQLite` cannot be used because, for example, the SQLite</span> +<span class="sd"> library is only available in an old version and does not meet the</span> +<span class="sd"> requirements."""</span> + + <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">cfg</span><span class="p">:</span> <span class="n">FaviconCacheConfig</span><span class="p">):</span> + <span class="k">return</span> <span class="kc">None</span> + + <span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">resolver</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">authority</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span> <span class="o">|</span> <span class="nb">tuple</span><span class="p">[</span><span class="kc">None</span> <span class="o">|</span> <span class="nb">bytes</span><span class="p">,</span> <span class="kc">None</span> <span class="o">|</span> <span class="nb">str</span><span class="p">]:</span> + <span class="k">return</span> <span class="kc">None</span> + +<div class="viewcode-block" id="FaviconCacheNull.set"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.cache.FaviconCacheNull.set">[docs]</a> + <span class="k">def</span> <span class="nf">set</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">resolver</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">authority</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">mime</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="nb">bytes</span> <span class="o">|</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">False</span></div> + + +<div class="viewcode-block" id="FaviconCacheNull.state"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.cache.FaviconCacheNull.state">[docs]</a> + <span class="k">def</span> <span class="nf">state</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="n">FaviconCacheStats</span><span class="p">(</span><span class="n">favicons</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span></div> + + +<div class="viewcode-block" id="FaviconCacheNull.maintenance"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.cache.FaviconCacheNull.maintenance">[docs]</a> + <span class="k">def</span> <span class="nf">maintenance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">force</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span> + <span class="k">pass</span></div> +</div> + + + +<div class="viewcode-block" id="FaviconCacheSQLite"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.cache.FaviconCacheSQLite">[docs]</a> +<span class="k">class</span> <span class="nc">FaviconCacheSQLite</span><span class="p">(</span><span class="n">sqlitedb</span><span class="o">.</span><span class="n">SQLiteAppl</span><span class="p">,</span> <span class="n">FaviconCache</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Favicon cache that manages the favicon BLOBs in a SQLite DB. The DB</span> +<span class="sd"> model in the SQLite DB is implemented using the abstract class</span> +<span class="sd"> :py:obj:`sqlitedb.SQLiteAppl`.</span> + +<span class="sd"> The following configurations are required / supported:</span> + +<span class="sd"> - :py:obj:`FaviconCacheConfig.db_url`</span> +<span class="sd"> - :py:obj:`FaviconCacheConfig.HOLD_TIME`</span> +<span class="sd"> - :py:obj:`FaviconCacheConfig.LIMIT_TOTAL_BYTES`</span> +<span class="sd"> - :py:obj:`FaviconCacheConfig.BLOB_MAX_BYTES`</span> +<span class="sd"> - :py:obj:`MAINTENANCE_PERIOD`</span> +<span class="sd"> - :py:obj:`MAINTENANCE_MODE`</span> +<span class="sd"> """</span> + + <span class="n">DB_SCHEMA</span> <span class="o">=</span> <span class="mi">1</span> + + <span class="n">DDL_BLOBS</span> <span class="o">=</span> <span class="s2">"""</span><span class="se">\</span> +<span class="s2">CREATE TABLE IF NOT EXISTS blobs (</span> +<span class="s2"> sha256 TEXT,</span> +<span class="s2"> bytes_c INTEGER,</span> +<span class="s2"> mime TEXT NOT NULL,</span> +<span class="s2"> data BLOB NOT NULL,</span> +<span class="s2"> PRIMARY KEY (sha256))"""</span> + +<span class="w"> </span><span class="sd">"""Table to store BLOB objects by their sha256 hash values."""</span> + + <span class="n">DDL_BLOB_MAP</span> <span class="o">=</span> <span class="s2">"""</span><span class="se">\</span> +<span class="s2">CREATE TABLE IF NOT EXISTS blob_map (</span> +<span class="s2"> m_time INTEGER DEFAULT (strftime('</span><span class="si">%s</span><span class="s2">', 'now')), -- last modified (unix epoch) time in sec.</span> +<span class="s2"> sha256 TEXT,</span> +<span class="s2"> resolver TEXT,</span> +<span class="s2"> authority TEXT,</span> +<span class="s2"> PRIMARY KEY (resolver, authority))"""</span> + +<span class="w"> </span><span class="sd">"""Table to map from (resolver, authority) to sha256 hash values."""</span> + + <span class="n">DDL_CREATE_TABLES</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"blobs"</span><span class="p">:</span> <span class="n">DDL_BLOBS</span><span class="p">,</span> + <span class="s2">"blob_map"</span><span class="p">:</span> <span class="n">DDL_BLOB_MAP</span><span class="p">,</span> + <span class="p">}</span> + + <span class="n">SQL_DROP_LEFTOVER_BLOBS</span> <span class="o">=</span> <span class="p">(</span> + <span class="s2">"DELETE FROM blobs WHERE sha256 IN ("</span> + <span class="s2">" SELECT b.sha256"</span> + <span class="s2">" FROM blobs b"</span> + <span class="s2">" LEFT JOIN blob_map bm"</span> + <span class="s2">" ON b.sha256 = bm.sha256"</span> + <span class="s2">" WHERE bm.sha256 IS NULL)"</span> + <span class="p">)</span> +<span class="w"> </span><span class="sd">"""Delete blobs.sha256 (BLOBs) no longer in blob_map.sha256."""</span> + + <span class="n">SQL_ITER_BLOBS_SHA256_BYTES_C</span> <span class="o">=</span> <span class="p">(</span> + <span class="s2">"SELECT b.sha256, b.bytes_c FROM blobs b"</span> + <span class="s2">" JOIN blob_map bm "</span> + <span class="s2">" ON b.sha256 = bm.sha256"</span> + <span class="s2">" ORDER BY bm.m_time ASC"</span> + <span class="p">)</span> + + <span class="n">SQL_INSERT_BLOBS</span> <span class="o">=</span> <span class="p">(</span> + <span class="s2">"INSERT INTO blobs (sha256, bytes_c, mime, data) VALUES (?, ?, ?, ?)"</span> + <span class="s2">" ON CONFLICT (sha256) DO NOTHING"</span> + <span class="p">)</span> <span class="c1"># fmt: skip</span> + + <span class="n">SQL_INSERT_BLOB_MAP</span> <span class="o">=</span> <span class="p">(</span> + <span class="s2">"INSERT INTO blob_map (sha256, resolver, authority) VALUES (?, ?, ?)"</span> + <span class="s2">" ON CONFLICT DO UPDATE "</span> + <span class="s2">" SET sha256=excluded.sha256, m_time=strftime('</span><span class="si">%s</span><span class="s2">', 'now')"</span> + <span class="p">)</span> + + <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">cfg</span><span class="p">:</span> <span class="n">FaviconCacheConfig</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""An instance of the favicon cache is build up from the configuration."""</span> <span class="c1">#</span> + + <span class="k">if</span> <span class="n">cfg</span><span class="o">.</span><span class="n">db_url</span> <span class="o">==</span> <span class="s2">":memory:"</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">critical</span><span class="p">(</span><span class="s2">"don't use SQLite DB in :memory: in production!!"</span><span class="p">)</span> + <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">cfg</span><span class="o">.</span><span class="n">db_url</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">cfg</span> <span class="o">=</span> <span class="n">cfg</span> + + <span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">resolver</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">authority</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span> <span class="o">|</span> <span class="nb">tuple</span><span class="p">[</span><span class="kc">None</span> <span class="o">|</span> <span class="nb">bytes</span><span class="p">,</span> <span class="kc">None</span> <span class="o">|</span> <span class="nb">str</span><span class="p">]:</span> + + <span class="n">sql</span> <span class="o">=</span> <span class="s2">"SELECT sha256 FROM blob_map WHERE resolver = ? AND authority = ?"</span> + <span class="n">res</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">DB</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">sql</span><span class="p">,</span> <span class="p">(</span><span class="n">resolver</span><span class="p">,</span> <span class="n">authority</span><span class="p">))</span><span class="o">.</span><span class="n">fetchone</span><span class="p">()</span> + <span class="k">if</span> <span class="n">res</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">None</span> + + <span class="n">data</span><span class="p">,</span> <span class="n">mime</span> <span class="o">=</span> <span class="p">(</span><span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> + <span class="n">sha256</span> <span class="o">=</span> <span class="n">res</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> + <span class="k">if</span> <span class="n">sha256</span> <span class="o">==</span> <span class="n">FALLBACK_ICON</span><span class="p">:</span> + <span class="k">return</span> <span class="n">data</span><span class="p">,</span> <span class="n">mime</span> + + <span class="n">sql</span> <span class="o">=</span> <span class="s2">"SELECT data, mime FROM blobs WHERE sha256 = ?"</span> + <span class="n">res</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">DB</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">sql</span><span class="p">,</span> <span class="p">(</span><span class="n">sha256</span><span class="p">,))</span><span class="o">.</span><span class="n">fetchone</span><span class="p">()</span> + <span class="k">if</span> <span class="n">res</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">data</span><span class="p">,</span> <span class="n">mime</span> <span class="o">=</span> <span class="n">res</span> + <span class="k">return</span> <span class="n">data</span><span class="p">,</span> <span class="n">mime</span> + +<div class="viewcode-block" id="FaviconCacheSQLite.set"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.cache.FaviconCacheSQLite.set">[docs]</a> + <span class="k">def</span> <span class="nf">set</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">resolver</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">authority</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">mime</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="nb">bytes</span> <span class="o">|</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> + + <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">cfg</span><span class="o">.</span><span class="n">MAINTENANCE_MODE</span> <span class="o">==</span> <span class="s2">"auto"</span> <span class="ow">and</span> <span class="nb">int</span><span class="p">(</span><span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">())</span> <span class="o">></span> <span class="bp">self</span><span class="o">.</span><span class="n">next_maintenance_time</span><span class="p">:</span> + <span class="c1"># Should automatic maintenance be moved to a new thread?</span> + <span class="bp">self</span><span class="o">.</span><span class="n">maintenance</span><span class="p">()</span> + + <span class="k">if</span> <span class="n">data</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">mime</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span> + <span class="s2">"favicon resolver </span><span class="si">%s</span><span class="s2"> tries to cache mime-type None for authority </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> + <span class="n">resolver</span><span class="p">,</span> + <span class="n">authority</span><span class="p">,</span> + <span class="p">)</span> + <span class="k">return</span> <span class="kc">False</span> + + <span class="n">bytes_c</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">data</span> <span class="ow">or</span> <span class="sa">b</span><span class="s2">""</span><span class="p">)</span> + <span class="k">if</span> <span class="n">bytes_c</span> <span class="o">></span> <span class="bp">self</span><span class="o">.</span><span class="n">cfg</span><span class="o">.</span><span class="n">BLOB_MAX_BYTES</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span> + <span class="s2">"favicon of resolver: </span><span class="si">%s</span><span class="s2"> / authority: </span><span class="si">%s</span><span class="s2"> to big to cache (bytes: </span><span class="si">%s</span><span class="s2">) "</span> <span class="o">%</span> <span class="p">(</span><span class="n">resolver</span><span class="p">,</span> <span class="n">authority</span><span class="p">,</span> <span class="n">bytes_c</span><span class="p">)</span> + <span class="p">)</span> + <span class="k">return</span> <span class="kc">False</span> + + <span class="k">if</span> <span class="n">data</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">sha256</span> <span class="o">=</span> <span class="n">FALLBACK_ICON</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">sha256</span> <span class="o">=</span> <span class="n">hashlib</span><span class="o">.</span><span class="n">sha256</span><span class="p">(</span><span class="n">data</span><span class="p">)</span><span class="o">.</span><span class="n">hexdigest</span><span class="p">()</span> + + <span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">connect</span><span class="p">()</span> <span class="k">as</span> <span class="n">conn</span><span class="p">:</span> + <span class="k">if</span> <span class="n">sha256</span> <span class="o">!=</span> <span class="n">FALLBACK_ICON</span><span class="p">:</span> + <span class="n">conn</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">SQL_INSERT_BLOBS</span><span class="p">,</span> <span class="p">(</span><span class="n">sha256</span><span class="p">,</span> <span class="n">bytes_c</span><span class="p">,</span> <span class="n">mime</span><span class="p">,</span> <span class="n">data</span><span class="p">))</span> + <span class="n">conn</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">SQL_INSERT_BLOB_MAP</span><span class="p">,</span> <span class="p">(</span><span class="n">sha256</span><span class="p">,</span> <span class="n">resolver</span><span class="p">,</span> <span class="n">authority</span><span class="p">))</span> + + <span class="k">return</span> <span class="kc">True</span></div> + + + <span class="nd">@property</span> + <span class="k">def</span> <span class="nf">next_maintenance_time</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Returns (unix epoch) time of the next maintenance."""</span> + + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">cfg</span><span class="o">.</span><span class="n">MAINTENANCE_PERIOD</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">m_time</span><span class="p">(</span><span class="s2">"LAST_MAINTENANCE"</span><span class="p">)</span> + +<div class="viewcode-block" id="FaviconCacheSQLite.maintenance"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.cache.FaviconCacheSQLite.maintenance">[docs]</a> + <span class="k">def</span> <span class="nf">maintenance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">force</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span> + + <span class="c1"># Prevent parallel DB maintenance cycles from other DB connections</span> + <span class="c1"># (e.g. in multi thread or process environments).</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">force</span> <span class="ow">and</span> <span class="nb">int</span><span class="p">(</span><span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">())</span> <span class="o"><</span> <span class="bp">self</span><span class="o">.</span><span class="n">next_maintenance_time</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"no maintenance required yet, next maintenance interval is in the future"</span><span class="p">)</span> + <span class="k">return</span> + <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">"LAST_MAINTENANCE"</span><span class="p">,</span> <span class="s2">""</span><span class="p">)</span> <span class="c1"># hint: this (also) sets the m_time of the property!</span> + + <span class="c1"># do maintenance tasks</span> + + <span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">connect</span><span class="p">()</span> <span class="k">as</span> <span class="n">conn</span><span class="p">:</span> + + <span class="c1"># drop items not in HOLD time</span> + <span class="n">res</span> <span class="o">=</span> <span class="n">conn</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span> + <span class="sa">f</span><span class="s2">"DELETE FROM blob_map"</span> + <span class="sa">f</span><span class="s2">" WHERE cast(m_time as integer) < cast(strftime('%s', 'now') as integer) - </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">cfg</span><span class="o">.</span><span class="n">HOLD_TIME</span><span class="si">}</span><span class="s2">"</span> + <span class="p">)</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"dropped </span><span class="si">%s</span><span class="s2"> obsolete blob_map items from db"</span><span class="p">,</span> <span class="n">res</span><span class="o">.</span><span class="n">rowcount</span><span class="p">)</span> + <span class="n">res</span> <span class="o">=</span> <span class="n">conn</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">SQL_DROP_LEFTOVER_BLOBS</span><span class="p">)</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"dropped </span><span class="si">%s</span><span class="s2"> obsolete BLOBS from db"</span><span class="p">,</span> <span class="n">res</span><span class="o">.</span><span class="n">rowcount</span><span class="p">)</span> + + <span class="c1"># drop old items to be in LIMIT_TOTAL_BYTES</span> + <span class="n">total_bytes</span> <span class="o">=</span> <span class="n">conn</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="s2">"SELECT SUM(bytes_c) FROM blobs"</span><span class="p">)</span><span class="o">.</span><span class="n">fetchone</span><span class="p">()[</span><span class="mi">0</span><span class="p">]</span> <span class="ow">or</span> <span class="mi">0</span> + <span class="k">if</span> <span class="n">total_bytes</span> <span class="o">></span> <span class="bp">self</span><span class="o">.</span><span class="n">cfg</span><span class="o">.</span><span class="n">LIMIT_TOTAL_BYTES</span><span class="p">:</span> + + <span class="n">x</span> <span class="o">=</span> <span class="n">total_bytes</span> <span class="o">-</span> <span class="bp">self</span><span class="o">.</span><span class="n">cfg</span><span class="o">.</span><span class="n">LIMIT_TOTAL_BYTES</span> + <span class="n">c</span> <span class="o">=</span> <span class="mi">0</span> + <span class="n">sha_list</span> <span class="o">=</span> <span class="p">[]</span> + <span class="k">for</span> <span class="n">row</span> <span class="ow">in</span> <span class="n">conn</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">SQL_ITER_BLOBS_SHA256_BYTES_C</span><span class="p">):</span> + <span class="n">sha256</span><span class="p">,</span> <span class="n">bytes_c</span> <span class="o">=</span> <span class="n">row</span> + <span class="n">sha_list</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">sha256</span><span class="p">)</span> + <span class="n">c</span> <span class="o">+=</span> <span class="n">bytes_c</span> + <span class="k">if</span> <span class="n">c</span> <span class="o">></span> <span class="n">x</span><span class="p">:</span> + <span class="k">break</span> + <span class="k">if</span> <span class="n">sha_list</span><span class="p">:</span> + <span class="n">conn</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="s2">"DELETE FROM blobs WHERE sha256 IN ('</span><span class="si">%s</span><span class="s2">')"</span> <span class="o">%</span> <span class="s2">"','"</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">sha_list</span><span class="p">))</span> + <span class="n">conn</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="s2">"DELETE FROM blob_map WHERE sha256 IN ('</span><span class="si">%s</span><span class="s2">')"</span> <span class="o">%</span> <span class="s2">"','"</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">sha_list</span><span class="p">))</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"dropped </span><span class="si">%s</span><span class="s2"> blobs with total size of </span><span class="si">%s</span><span class="s2"> bytes"</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">sha_list</span><span class="p">),</span> <span class="n">c</span><span class="p">)</span></div> + + + <span class="k">def</span> <span class="nf">_query_val</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sql</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span> + <span class="n">val</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">DB</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">sql</span><span class="p">)</span><span class="o">.</span><span class="n">fetchone</span><span class="p">()</span> + <span class="k">if</span> <span class="n">val</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">val</span> <span class="o">=</span> <span class="n">val</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> + <span class="k">if</span> <span class="n">val</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">val</span> <span class="o">=</span> <span class="n">default</span> + <span class="k">return</span> <span class="n">val</span> + +<div class="viewcode-block" id="FaviconCacheSQLite.state"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.cache.FaviconCacheSQLite.state">[docs]</a> + <span class="k">def</span> <span class="nf">state</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">FaviconCacheStats</span><span class="p">:</span> + <span class="k">return</span> <span class="n">FaviconCacheStats</span><span class="p">(</span> + <span class="n">favicons</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_query_val</span><span class="p">(</span><span class="s2">"SELECT count(*) FROM blobs"</span><span class="p">,</span> <span class="mi">0</span><span class="p">),</span> + <span class="nb">bytes</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_query_val</span><span class="p">(</span><span class="s2">"SELECT SUM(bytes_c) FROM blobs"</span><span class="p">,</span> <span class="mi">0</span><span class="p">),</span> + <span class="n">domains</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_query_val</span><span class="p">(</span><span class="s2">"SELECT count(*) FROM (SELECT authority FROM blob_map GROUP BY authority)"</span><span class="p">,</span> <span class="mi">0</span><span class="p">),</span> + <span class="n">resolvers</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_query_val</span><span class="p">(</span><span class="s2">"SELECT count(*) FROM (SELECT resolver FROM blob_map GROUP BY resolver)"</span><span class="p">,</span> <span class="mi">0</span><span class="p">),</span> + <span class="p">)</span></div> +</div> + + + +<div class="viewcode-block" id="FaviconCacheMEM"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.cache.FaviconCacheMEM">[docs]</a> +<span class="k">class</span> <span class="nc">FaviconCacheMEM</span><span class="p">(</span><span class="n">FaviconCache</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Favicon cache in process' memory. Its just a POC that stores the</span> +<span class="sd"> favicons in the memory of the process.</span> + +<span class="sd"> .. attention::</span> + +<span class="sd"> Don't use it in production, it will blow up your memory!!</span> + +<span class="sd"> """</span> + + <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">cfg</span><span class="p">):</span> + + <span class="bp">self</span><span class="o">.</span><span class="n">cfg</span> <span class="o">=</span> <span class="n">cfg</span> + <span class="bp">self</span><span class="o">.</span><span class="n">_data</span> <span class="o">=</span> <span class="p">{}</span> + <span class="bp">self</span><span class="o">.</span><span class="n">_sha_mime</span> <span class="o">=</span> <span class="p">{}</span> + + <span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">resolver</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">authority</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span> <span class="o">|</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">bytes</span> <span class="o">|</span> <span class="kc">None</span><span class="p">,</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">]:</span> + + <span class="n">sha</span><span class="p">,</span> <span class="n">mime</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_sha_mime</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">resolver</span><span class="si">}</span><span class="s2">:</span><span class="si">{</span><span class="n">authority</span><span class="si">}</span><span class="s2">"</span><span class="p">,</span> <span class="p">(</span><span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">))</span> + <span class="k">if</span> <span class="n">sha</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">None</span> + <span class="n">data</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sha</span><span class="p">)</span> + <span class="k">if</span> <span class="n">data</span> <span class="o">==</span> <span class="n">FALLBACK_ICON</span><span class="p">:</span> + <span class="n">data</span> <span class="o">=</span> <span class="kc">None</span> + <span class="k">return</span> <span class="n">data</span><span class="p">,</span> <span class="n">mime</span> + +<div class="viewcode-block" id="FaviconCacheMEM.set"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.cache.FaviconCacheMEM.set">[docs]</a> + <span class="k">def</span> <span class="nf">set</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">resolver</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">authority</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">mime</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="nb">bytes</span> <span class="o">|</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> + + <span class="k">if</span> <span class="n">data</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">data</span> <span class="o">=</span> <span class="n">FALLBACK_ICON</span> + <span class="n">mime</span> <span class="o">=</span> <span class="kc">None</span> + + <span class="k">elif</span> <span class="n">mime</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span> + <span class="s2">"favicon resolver </span><span class="si">%s</span><span class="s2"> tries to cache mime-type None for authority </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> + <span class="n">resolver</span><span class="p">,</span> + <span class="n">authority</span><span class="p">,</span> + <span class="p">)</span> + <span class="k">return</span> <span class="kc">False</span> + + <span class="n">digest</span> <span class="o">=</span> <span class="n">hashlib</span><span class="o">.</span><span class="n">sha256</span><span class="p">(</span><span class="n">data</span><span class="p">)</span><span class="o">.</span><span class="n">hexdigest</span><span class="p">()</span> + <span class="bp">self</span><span class="o">.</span><span class="n">_data</span><span class="p">[</span><span class="n">digest</span><span class="p">]</span> <span class="o">=</span> <span class="n">data</span> + <span class="bp">self</span><span class="o">.</span><span class="n">_sha_mime</span><span class="p">[</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">resolver</span><span class="si">}</span><span class="s2">:</span><span class="si">{</span><span class="n">authority</span><span class="si">}</span><span class="s2">"</span><span class="p">]</span> <span class="o">=</span> <span class="p">(</span><span class="n">digest</span><span class="p">,</span> <span class="n">mime</span><span class="p">)</span> + <span class="k">return</span> <span class="kc">True</span></div> + + +<div class="viewcode-block" id="FaviconCacheMEM.state"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.cache.FaviconCacheMEM.state">[docs]</a> + <span class="k">def</span> <span class="nf">state</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="n">FaviconCacheStats</span><span class="p">(</span><span class="n">favicons</span><span class="o">=</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_data</span><span class="o">.</span><span class="n">keys</span><span class="p">()))</span></div> + + +<div class="viewcode-block" id="FaviconCacheMEM.maintenance"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.cache.FaviconCacheMEM.maintenance">[docs]</a> + <span class="k">def</span> <span class="nf">maintenance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">force</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span> + <span class="k">pass</span></div> +</div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + + + </ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/favicons/config.html b/_modules/searx/favicons/config.html new file mode 100644 index 000000000..e7e184c3b --- /dev/null +++ b/_modules/searx/favicons/config.html @@ -0,0 +1,175 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.favicons.config — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" accesskey="U">Module code</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.favicons.config</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.favicons.config</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="c1"># pylint: disable=missing-module-docstring</span> + +<span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">annotations</span> + +<span class="kn">import</span> <span class="nn">pathlib</span> +<span class="kn">import</span> <span class="nn">msgspec</span> + +<span class="kn">from</span> <span class="nn">.cache</span> <span class="kn">import</span> <span class="n">FaviconCacheConfig</span> +<span class="kn">from</span> <span class="nn">.proxy</span> <span class="kn">import</span> <span class="n">FaviconProxyConfig</span> + +<span class="n">CONFIG_SCHEMA</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span> +<span class="sd">"""Version of the configuration schema."""</span> + +<span class="n">TOML_CACHE_CFG</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="s2">"FaviconConfig"</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> +<span class="sd">"""Cache config objects by TOML's filename."""</span> + +<span class="n">DEFAULT_CFG_TOML_PATH</span> <span class="o">=</span> <span class="n">pathlib</span><span class="o">.</span><span class="n">Path</span><span class="p">(</span><span class="vm">__file__</span><span class="p">)</span><span class="o">.</span><span class="n">parent</span> <span class="o">/</span> <span class="s2">"favicons.toml"</span> + + +<div class="viewcode-block" id="FaviconConfig"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.config.FaviconConfig">[docs]</a> +<span class="k">class</span> <span class="nc">FaviconConfig</span><span class="p">(</span><span class="n">msgspec</span><span class="o">.</span><span class="n">Struct</span><span class="p">):</span> <span class="c1"># pylint: disable=too-few-public-methods</span> +<span class="w"> </span><span class="sd">"""The class aggregates configurations of the favicon tools"""</span> + + <span class="n">cfg_schema</span><span class="p">:</span> <span class="nb">int</span> +<span class="w"> </span><span class="sd">"""Config's schema version. The specification of the version of the schema</span> +<span class="sd"> is mandatory, currently only version :py:obj:`CONFIG_SCHEMA` is supported.</span> +<span class="sd"> By specifying a version, it is possible to ensure downward compatibility in</span> +<span class="sd"> the event of future changes to the configuration schema"""</span> + + <span class="n">cache</span><span class="p">:</span> <span class="n">FaviconCacheConfig</span> <span class="o">=</span> <span class="n">msgspec</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="n">default_factory</span><span class="o">=</span><span class="n">FaviconCacheConfig</span><span class="p">)</span> +<span class="w"> </span><span class="sd">"""Setup of the :py:obj:`.cache.FaviconCacheConfig`."""</span> + + <span class="n">proxy</span><span class="p">:</span> <span class="n">FaviconProxyConfig</span> <span class="o">=</span> <span class="n">msgspec</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="n">default_factory</span><span class="o">=</span><span class="n">FaviconProxyConfig</span><span class="p">)</span> +<span class="w"> </span><span class="sd">"""Setup of the :py:obj:`.proxy.FaviconProxyConfig`."""</span> + +<div class="viewcode-block" id="FaviconConfig.from_toml_file"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.config.FaviconConfig.from_toml_file">[docs]</a> + <span class="nd">@classmethod</span> + <span class="k">def</span> <span class="nf">from_toml_file</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">cfg_file</span><span class="p">:</span> <span class="n">pathlib</span><span class="o">.</span><span class="n">Path</span><span class="p">,</span> <span class="n">use_cache</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"FaviconConfig"</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Create a config object from a TOML file, the ``use_cache`` argument</span> +<span class="sd"> specifies whether a cache should be used.</span> +<span class="sd"> """</span> + + <span class="n">cached</span> <span class="o">=</span> <span class="n">TOML_CACHE_CFG</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">cfg_file</span><span class="p">))</span> + <span class="k">if</span> <span class="n">use_cache</span> <span class="ow">and</span> <span class="n">cached</span><span class="p">:</span> + <span class="k">return</span> <span class="n">cached</span> + + <span class="k">with</span> <span class="n">cfg_file</span><span class="o">.</span><span class="n">open</span><span class="p">(</span><span class="s2">"rb"</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span> + <span class="n">data</span> <span class="o">=</span> <span class="n">f</span><span class="o">.</span><span class="n">read</span><span class="p">()</span> + + <span class="n">cfg</span> <span class="o">=</span> <span class="n">msgspec</span><span class="o">.</span><span class="n">toml</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="nb">type</span><span class="o">=</span><span class="n">_FaviconConfig</span><span class="p">)</span> + <span class="n">schema</span> <span class="o">=</span> <span class="n">cfg</span><span class="o">.</span><span class="n">favicons</span><span class="o">.</span><span class="n">cfg_schema</span> + <span class="k">if</span> <span class="n">schema</span> <span class="o">!=</span> <span class="n">CONFIG_SCHEMA</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span> + <span class="sa">f</span><span class="s2">"config schema version </span><span class="si">{</span><span class="n">CONFIG_SCHEMA</span><span class="si">}</span><span class="s2"> is needed, version </span><span class="si">{</span><span class="n">schema</span><span class="si">}</span><span class="s2"> is given in </span><span class="si">{</span><span class="n">cfg_file</span><span class="si">}</span><span class="s2">"</span> + <span class="p">)</span> + + <span class="n">cfg</span> <span class="o">=</span> <span class="n">cfg</span><span class="o">.</span><span class="n">favicons</span> + <span class="k">if</span> <span class="n">use_cache</span> <span class="ow">and</span> <span class="n">cached</span><span class="p">:</span> + <span class="n">TOML_CACHE_CFG</span><span class="p">[</span><span class="nb">str</span><span class="p">(</span><span class="n">cfg_file</span><span class="o">.</span><span class="n">resolve</span><span class="p">())]</span> <span class="o">=</span> <span class="n">cfg</span> + + <span class="k">return</span> <span class="n">cfg</span></div> +</div> + + + +<span class="k">class</span> <span class="nc">_FaviconConfig</span><span class="p">(</span><span class="n">msgspec</span><span class="o">.</span><span class="n">Struct</span><span class="p">):</span> <span class="c1"># pylint: disable=too-few-public-methods</span> + <span class="c1"># wrapper struct for root object "favicons."</span> + <span class="n">favicons</span><span class="p">:</span> <span class="n">FaviconConfig</span> +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + + + </ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/favicons/proxy.html b/_modules/searx/favicons/proxy.html new file mode 100644 index 000000000..d780e2ecc --- /dev/null +++ b/_modules/searx/favicons/proxy.html @@ -0,0 +1,362 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.favicons.proxy — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" accesskey="U">Module code</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.favicons.proxy</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.favicons.proxy</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""Implementations for a favicon proxy"""</span> + +<span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">annotations</span> + +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Callable</span> + +<span class="kn">import</span> <span class="nn">importlib</span> +<span class="kn">import</span> <span class="nn">base64</span> +<span class="kn">import</span> <span class="nn">pathlib</span> +<span class="kn">import</span> <span class="nn">urllib.parse</span> + +<span class="kn">import</span> <span class="nn">flask</span> +<span class="kn">from</span> <span class="nn">httpx</span> <span class="kn">import</span> <span class="n">HTTPError</span> +<span class="kn">import</span> <span class="nn">msgspec</span> + +<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">get_setting</span> + +<span class="kn">from</span> <span class="nn">searx.webutils</span> <span class="kn">import</span> <span class="n">new_hmac</span><span class="p">,</span> <span class="n">is_hmac_of</span> +<span class="kn">from</span> <span class="nn">searx.exceptions</span> <span class="kn">import</span> <span class="n">SearxEngineResponseException</span> + +<span class="kn">from</span> <span class="nn">.resolvers</span> <span class="kn">import</span> <span class="n">DEFAULT_RESOLVER_MAP</span> +<span class="kn">from</span> <span class="nn">.</span> <span class="kn">import</span> <span class="n">cache</span> + +<span class="n">DEFAULT_FAVICON_URL</span> <span class="o">=</span> <span class="p">{}</span> +<span class="n">CFG</span><span class="p">:</span> <span class="n">FaviconProxyConfig</span> <span class="o">=</span> <span class="kc">None</span> <span class="c1"># type: ignore</span> + + +<span class="k">def</span> <span class="nf">init</span><span class="p">(</span><span class="n">cfg</span><span class="p">:</span> <span class="n">FaviconProxyConfig</span><span class="p">):</span> + <span class="k">global</span> <span class="n">CFG</span> <span class="c1"># pylint: disable=global-statement</span> + <span class="n">CFG</span> <span class="o">=</span> <span class="n">cfg</span> + + +<span class="k">def</span> <span class="nf">_initial_resolver_map</span><span class="p">():</span> + <span class="n">d</span> <span class="o">=</span> <span class="p">{}</span> + <span class="n">name</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">get_setting</span><span class="p">(</span><span class="s2">"search.favicon_resolver"</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> <span class="c1"># type: ignore</span> + <span class="k">if</span> <span class="n">name</span><span class="p">:</span> + <span class="n">func</span> <span class="o">=</span> <span class="n">DEFAULT_RESOLVER_MAP</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">name</span><span class="p">)</span> + <span class="k">if</span> <span class="n">func</span><span class="p">:</span> + <span class="n">d</span> <span class="o">=</span> <span class="p">{</span><span class="n">name</span><span class="p">:</span> <span class="sa">f</span><span class="s2">"searx.favicons.resolvers.</span><span class="si">{</span><span class="n">func</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s2">"</span><span class="p">}</span> + <span class="k">return</span> <span class="n">d</span> + + +<div class="viewcode-block" id="FaviconProxyConfig"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.FaviconProxyConfig">[docs]</a> +<span class="k">class</span> <span class="nc">FaviconProxyConfig</span><span class="p">(</span><span class="n">msgspec</span><span class="o">.</span><span class="n">Struct</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Configuration of the favicon proxy."""</span> + + <span class="n">max_age</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">60</span> <span class="o">*</span> <span class="mi">60</span> <span class="o">*</span> <span class="mi">24</span> <span class="o">*</span> <span class="mi">7</span> <span class="c1"># seven days</span> +<span class="w"> </span><span class="sd">"""HTTP header Cache-Control_ ``max-age``</span> + +<span class="sd"> .. _Cache-Control: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control</span> +<span class="sd"> """</span> + + <span class="n">secret_key</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">get_setting</span><span class="p">(</span><span class="s2">"server.secret_key"</span><span class="p">)</span> <span class="c1"># type: ignore</span> +<span class="w"> </span><span class="sd">"""By default, the value from :ref:`server.secret_key <settings server>`</span> +<span class="sd"> setting is used."""</span> + + <span class="n">resolver_timeout</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="n">get_setting</span><span class="p">(</span><span class="s2">"outgoing.request_timeout"</span><span class="p">)</span> <span class="c1"># type: ignore</span> +<span class="w"> </span><span class="sd">"""Timeout which the resolvers should not exceed, is usually passed to the</span> +<span class="sd"> outgoing request of the resolver. By default, the value from</span> +<span class="sd"> :ref:`outgoing.request_timeout <settings outgoing>` setting is used."""</span> + + <span class="n">resolver_map</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">msgspec</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="n">default_factory</span><span class="o">=</span><span class="n">_initial_resolver_map</span><span class="p">)</span> +<span class="w"> </span><span class="sd">"""The resolver_map is a key / value dictionary where the key is the name of</span> +<span class="sd"> the resolver and the value is the fully qualifying name (fqn) of resolver's</span> +<span class="sd"> function (the callable). The resolvers from the python module</span> +<span class="sd"> :py:obj:`searx.favicons.resolver` are available by default."""</span> + +<div class="viewcode-block" id="FaviconProxyConfig.get_resolver"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.FaviconProxyConfig.get_resolver">[docs]</a> + <span class="k">def</span> <span class="nf">get_resolver</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">Callable</span> <span class="o">|</span> <span class="kc">None</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Returns the callable object (function) of the resolver with the</span> +<span class="sd"> ``name``. If no resolver is registered for the ``name``, ``None`` is</span> +<span class="sd"> returned.</span> +<span class="sd"> """</span> + <span class="n">fqn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">resolver_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">name</span><span class="p">)</span> + <span class="k">if</span> <span class="n">fqn</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">None</span> + <span class="n">mod_name</span><span class="p">,</span> <span class="n">_</span><span class="p">,</span> <span class="n">func_name</span> <span class="o">=</span> <span class="n">fqn</span><span class="o">.</span><span class="n">rpartition</span><span class="p">(</span><span class="s1">'.'</span><span class="p">)</span> + <span class="n">mod</span> <span class="o">=</span> <span class="n">importlib</span><span class="o">.</span><span class="n">import_module</span><span class="p">(</span><span class="n">mod_name</span><span class="p">)</span> + <span class="n">func</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">mod</span><span class="p">,</span> <span class="n">func_name</span><span class="p">)</span> + <span class="k">if</span> <span class="n">func</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"resolver </span><span class="si">{</span><span class="n">fqn</span><span class="si">}</span><span class="s2"> is not implemented"</span><span class="p">)</span> + <span class="k">return</span> <span class="n">func</span></div> + + + <span class="n">favicon_path</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">get_setting</span><span class="p">(</span><span class="s2">"ui.static_path"</span><span class="p">)</span> <span class="o">+</span> <span class="s2">"/themes/</span><span class="si">{theme}</span><span class="s2">/img/empty_favicon.svg"</span> <span class="c1"># type: ignore</span> + <span class="n">favicon_mime_type</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"image/svg+xml"</span> + +<div class="viewcode-block" id="FaviconProxyConfig.favicon"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.FaviconProxyConfig.favicon">[docs]</a> + <span class="k">def</span> <span class="nf">favicon</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">replacements</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Returns pathname and mimetype of the default favicon."""</span> + <span class="k">return</span> <span class="p">(</span> + <span class="n">pathlib</span><span class="o">.</span><span class="n">Path</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">favicon_path</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="n">replacements</span><span class="p">)),</span> + <span class="bp">self</span><span class="o">.</span><span class="n">favicon_mime_type</span><span class="p">,</span> + <span class="p">)</span></div> + + +<div class="viewcode-block" id="FaviconProxyConfig.favicon_data_url"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.FaviconProxyConfig.favicon_data_url">[docs]</a> + <span class="k">def</span> <span class="nf">favicon_data_url</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">replacements</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Returns data image URL of the default favicon."""</span> + + <span class="n">cache_key</span> <span class="o">=</span> <span class="s2">", "</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">x</span><span class="si">}</span><span class="s2">:</span><span class="si">{</span><span class="n">replacements</span><span class="p">[</span><span class="n">x</span><span class="p">]</span><span class="si">}</span><span class="s2">"</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="nb">sorted</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="n">replacements</span><span class="o">.</span><span class="n">keys</span><span class="p">()),</span> <span class="n">key</span><span class="o">=</span><span class="nb">str</span><span class="p">))</span> + <span class="n">data_url</span> <span class="o">=</span> <span class="n">DEFAULT_FAVICON_URL</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">cache_key</span><span class="p">)</span> + <span class="k">if</span> <span class="n">data_url</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">return</span> <span class="n">data_url</span> + + <span class="n">fav</span><span class="p">,</span> <span class="n">mimetype</span> <span class="o">=</span> <span class="n">CFG</span><span class="o">.</span><span class="n">favicon</span><span class="p">(</span><span class="o">**</span><span class="n">replacements</span><span class="p">)</span> + <span class="c1"># hint: encoding utf-8 limits favicons to be a SVG image</span> + <span class="k">with</span> <span class="n">fav</span><span class="o">.</span><span class="n">open</span><span class="p">(</span><span class="s2">"r"</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s2">"utf-8"</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span> + <span class="n">data_url</span> <span class="o">=</span> <span class="n">f</span><span class="o">.</span><span class="n">read</span><span class="p">()</span> + + <span class="n">data_url</span> <span class="o">=</span> <span class="n">urllib</span><span class="o">.</span><span class="n">parse</span><span class="o">.</span><span class="n">quote</span><span class="p">(</span><span class="n">data_url</span><span class="p">)</span> + <span class="n">data_url</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">"data:</span><span class="si">{</span><span class="n">mimetype</span><span class="si">}</span><span class="s2">;utf8,</span><span class="si">{</span><span class="n">data_url</span><span class="si">}</span><span class="s2">"</span> + <span class="n">DEFAULT_FAVICON_URL</span><span class="p">[</span><span class="n">cache_key</span><span class="p">]</span> <span class="o">=</span> <span class="n">data_url</span> + <span class="k">return</span> <span class="n">data_url</span></div> +</div> + + + +<div class="viewcode-block" id="favicon_proxy"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.favicon_proxy">[docs]</a> +<span class="k">def</span> <span class="nf">favicon_proxy</span><span class="p">():</span> +<span class="w"> </span><span class="sd">"""REST API of SearXNG's favicon proxy service</span> + +<span class="sd"> ::</span> + +<span class="sd"> /favicon_proxy?authority=<...>&h=<...></span> + +<span class="sd"> ``authority``:</span> +<span class="sd"> Domain name :rfc:`3986` / see :py:obj:`favicon_url`</span> + +<span class="sd"> ``h``:</span> +<span class="sd"> HMAC :rfc:`2104`, build up from the :ref:`server.secret_key <settings</span> +<span class="sd"> server>` setting.</span> + +<span class="sd"> """</span> + <span class="n">authority</span> <span class="o">=</span> <span class="n">flask</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'authority'</span><span class="p">)</span> + + <span class="c1"># malformed request or RFC 3986 authority</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">authority</span> <span class="ow">or</span> <span class="s2">"/"</span> <span class="ow">in</span> <span class="n">authority</span><span class="p">:</span> + <span class="k">return</span> <span class="s1">''</span><span class="p">,</span> <span class="mi">400</span> + + <span class="c1"># malformed request / does not have authorisation</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">is_hmac_of</span><span class="p">(</span> + <span class="n">CFG</span><span class="o">.</span><span class="n">secret_key</span><span class="p">,</span> + <span class="n">authority</span><span class="o">.</span><span class="n">encode</span><span class="p">(),</span> + <span class="n">flask</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">args</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'h'</span><span class="p">,</span> <span class="s1">''</span><span class="p">),</span> + <span class="p">):</span> + <span class="k">return</span> <span class="s1">''</span><span class="p">,</span> <span class="mi">400</span> + + <span class="n">resolver</span> <span class="o">=</span> <span class="n">flask</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">preferences</span><span class="o">.</span><span class="n">get_value</span><span class="p">(</span><span class="s1">'favicon_resolver'</span><span class="p">)</span> <span class="c1"># type: ignore</span> + <span class="c1"># if resolver is empty or not valid, just return HTTP 400.</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">resolver</span> <span class="ow">or</span> <span class="n">resolver</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">CFG</span><span class="o">.</span><span class="n">resolver_map</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span> + <span class="k">return</span> <span class="s2">""</span><span class="p">,</span> <span class="mi">400</span> + + <span class="n">data</span><span class="p">,</span> <span class="n">mime</span> <span class="o">=</span> <span class="n">search_favicon</span><span class="p">(</span><span class="n">resolver</span><span class="p">,</span> <span class="n">authority</span><span class="p">)</span> + + <span class="k">if</span> <span class="n">data</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">mime</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">resp</span> <span class="o">=</span> <span class="n">flask</span><span class="o">.</span><span class="n">Response</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">mimetype</span><span class="o">=</span><span class="n">mime</span><span class="p">)</span> <span class="c1"># type: ignore</span> + <span class="n">resp</span><span class="o">.</span><span class="n">headers</span><span class="p">[</span><span class="s1">'Cache-Control'</span><span class="p">]</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">"max-age=</span><span class="si">{</span><span class="n">CFG</span><span class="o">.</span><span class="n">max_age</span><span class="si">}</span><span class="s2">"</span> + <span class="k">return</span> <span class="n">resp</span> + + <span class="c1"># return default favicon from static path</span> + <span class="n">theme</span> <span class="o">=</span> <span class="n">flask</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">preferences</span><span class="o">.</span><span class="n">get_value</span><span class="p">(</span><span class="s2">"theme"</span><span class="p">)</span> <span class="c1"># type: ignore</span> + <span class="n">fav</span><span class="p">,</span> <span class="n">mimetype</span> <span class="o">=</span> <span class="n">CFG</span><span class="o">.</span><span class="n">favicon</span><span class="p">(</span><span class="n">theme</span><span class="o">=</span><span class="n">theme</span><span class="p">)</span> + <span class="k">return</span> <span class="n">flask</span><span class="o">.</span><span class="n">send_from_directory</span><span class="p">(</span><span class="n">fav</span><span class="o">.</span><span class="n">parent</span><span class="p">,</span> <span class="n">fav</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">mimetype</span><span class="o">=</span><span class="n">mimetype</span><span class="p">)</span></div> + + + +<div class="viewcode-block" id="search_favicon"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.search_favicon">[docs]</a> +<span class="k">def</span> <span class="nf">search_favicon</span><span class="p">(</span><span class="n">resolver</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">authority</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">tuple</span><span class="p">[</span><span class="kc">None</span> <span class="o">|</span> <span class="nb">bytes</span><span class="p">,</span> <span class="kc">None</span> <span class="o">|</span> <span class="nb">str</span><span class="p">]:</span> +<span class="w"> </span><span class="sd">"""Sends the request to the favicon resolver and returns a tuple for the</span> +<span class="sd"> favicon. The tuple consists of ``(data, mime)``, if the resolver has not</span> +<span class="sd"> determined a favicon, both values are ``None``.</span> + +<span class="sd"> ``data``:</span> +<span class="sd"> Binary data of the favicon.</span> + +<span class="sd"> ``mime``:</span> +<span class="sd"> Mime type of the favicon.</span> + +<span class="sd"> """</span> + + <span class="n">data</span><span class="p">,</span> <span class="n">mime</span> <span class="o">=</span> <span class="p">(</span><span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> + + <span class="n">func</span> <span class="o">=</span> <span class="n">CFG</span><span class="o">.</span><span class="n">get_resolver</span><span class="p">(</span><span class="n">resolver</span><span class="p">)</span> + <span class="k">if</span> <span class="n">func</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">return</span> <span class="n">data</span><span class="p">,</span> <span class="n">mime</span> + + <span class="c1"># to avoid superfluous requests to the resolver, first look in the cache</span> + <span class="n">data_mime</span> <span class="o">=</span> <span class="n">cache</span><span class="o">.</span><span class="n">CACHE</span><span class="p">(</span><span class="n">resolver</span><span class="p">,</span> <span class="n">authority</span><span class="p">)</span> + <span class="k">if</span> <span class="n">data_mime</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">return</span> <span class="n">data_mime</span> + + <span class="k">try</span><span class="p">:</span> + <span class="n">data</span><span class="p">,</span> <span class="n">mime</span> <span class="o">=</span> <span class="n">func</span><span class="p">(</span><span class="n">authority</span><span class="p">,</span> <span class="n">timeout</span><span class="o">=</span><span class="n">CFG</span><span class="o">.</span><span class="n">resolver_timeout</span><span class="p">)</span> + <span class="k">if</span> <span class="n">data</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">mime</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">data</span><span class="p">,</span> <span class="n">mime</span> <span class="o">=</span> <span class="p">(</span><span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> + + <span class="k">except</span> <span class="p">(</span><span class="n">HTTPError</span><span class="p">,</span> <span class="n">SearxEngineResponseException</span><span class="p">):</span> + <span class="k">pass</span> + + <span class="n">cache</span><span class="o">.</span><span class="n">CACHE</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="n">resolver</span><span class="p">,</span> <span class="n">authority</span><span class="p">,</span> <span class="n">mime</span><span class="p">,</span> <span class="n">data</span><span class="p">)</span> + <span class="k">return</span> <span class="n">data</span><span class="p">,</span> <span class="n">mime</span></div> + + + +<div class="viewcode-block" id="favicon_url"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.favicon_url">[docs]</a> +<span class="k">def</span> <span class="nf">favicon_url</span><span class="p">(</span><span class="n">authority</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Function to generate the image URL used for favicons in SearXNG's result</span> +<span class="sd"> lists. The ``authority`` argument (aka netloc / :rfc:`3986`) is usually a</span> +<span class="sd"> (sub-) domain name. This function is used in the HTML (jinja) templates.</span> + +<span class="sd"> .. code:: html</span> + +<span class="sd"> <div class="favicon"></span> +<span class="sd"> <img src="{{ favicon_url(result.parsed_url.netloc) }}"></span> +<span class="sd"> </div></span> + +<span class="sd"> The returned URL is a route to :py:obj:`favicon_proxy` REST API.</span> + +<span class="sd"> If the favicon is already in the cache, the returned URL is a `data URL`_</span> +<span class="sd"> (something like ``data:image/png;base64,...``). By generating a data url from</span> +<span class="sd"> the :py:obj:`.cache.FaviconCache`, additional HTTP roundtripps via the</span> +<span class="sd"> :py:obj:`favicon_proxy` are saved. However, it must also be borne in mind</span> +<span class="sd"> that data urls are not cached in the client (web browser).</span> + +<span class="sd"> .. _data URL: https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs</span> + +<span class="sd"> """</span> + + <span class="n">resolver</span> <span class="o">=</span> <span class="n">flask</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">preferences</span><span class="o">.</span><span class="n">get_value</span><span class="p">(</span><span class="s1">'favicon_resolver'</span><span class="p">)</span> <span class="c1"># type: ignore</span> + <span class="c1"># if resolver is empty or not valid, just return nothing.</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">resolver</span> <span class="ow">or</span> <span class="n">resolver</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">CFG</span><span class="o">.</span><span class="n">resolver_map</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span> + <span class="k">return</span> <span class="s2">""</span> + + <span class="n">data_mime</span> <span class="o">=</span> <span class="n">cache</span><span class="o">.</span><span class="n">CACHE</span><span class="p">(</span><span class="n">resolver</span><span class="p">,</span> <span class="n">authority</span><span class="p">)</span> + + <span class="k">if</span> <span class="n">data_mime</span> <span class="o">==</span> <span class="p">(</span><span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">):</span> + <span class="c1"># we have already checked, the resolver does not have a favicon</span> + <span class="n">theme</span> <span class="o">=</span> <span class="n">flask</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">preferences</span><span class="o">.</span><span class="n">get_value</span><span class="p">(</span><span class="s2">"theme"</span><span class="p">)</span> <span class="c1"># type: ignore</span> + <span class="k">return</span> <span class="n">CFG</span><span class="o">.</span><span class="n">favicon_data_url</span><span class="p">(</span><span class="n">theme</span><span class="o">=</span><span class="n">theme</span><span class="p">)</span> + + <span class="k">if</span> <span class="n">data_mime</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">data</span><span class="p">,</span> <span class="n">mime</span> <span class="o">=</span> <span class="n">data_mime</span> + <span class="k">return</span> <span class="sa">f</span><span class="s2">"data:</span><span class="si">{</span><span class="n">mime</span><span class="si">}</span><span class="s2">;base64,</span><span class="si">{</span><span class="nb">str</span><span class="p">(</span><span class="n">base64</span><span class="o">.</span><span class="n">b64encode</span><span class="p">(</span><span class="n">data</span><span class="p">),</span><span class="w"> </span><span class="s1">'utf-8'</span><span class="p">)</span><span class="si">}</span><span class="s2">"</span> <span class="c1"># type: ignore</span> + + <span class="n">h</span> <span class="o">=</span> <span class="n">new_hmac</span><span class="p">(</span><span class="n">CFG</span><span class="o">.</span><span class="n">secret_key</span><span class="p">,</span> <span class="n">authority</span><span class="o">.</span><span class="n">encode</span><span class="p">())</span> + <span class="n">proxy_url</span> <span class="o">=</span> <span class="n">flask</span><span class="o">.</span><span class="n">url_for</span><span class="p">(</span><span class="s1">'favicon_proxy'</span><span class="p">)</span> + <span class="n">query</span> <span class="o">=</span> <span class="n">urllib</span><span class="o">.</span><span class="n">parse</span><span class="o">.</span><span class="n">urlencode</span><span class="p">({</span><span class="s2">"authority"</span><span class="p">:</span> <span class="n">authority</span><span class="p">,</span> <span class="s2">"h"</span><span class="p">:</span> <span class="n">h</span><span class="p">})</span> + <span class="k">return</span> <span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">proxy_url</span><span class="si">}</span><span class="s2">?</span><span class="si">{</span><span class="n">query</span><span class="si">}</span><span class="s2">"</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + + + </ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/favicons/resolvers.html b/_modules/searx/favicons/resolvers.html new file mode 100644 index 000000000..da6cbc657 --- /dev/null +++ b/_modules/searx/favicons/resolvers.html @@ -0,0 +1,216 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.favicons.resolvers — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" accesskey="U">Module code</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.favicons.resolvers</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.favicons.resolvers</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""Implementations of the favicon *resolvers* that are available in the favicon</span> +<span class="sd">proxy by default. A *resolver* is a function that obtains the favicon from an</span> +<span class="sd">external source. The *resolver* function receives two arguments (``domain,</span> +<span class="sd">timeout``) and returns a tuple ``(data, mime)``.</span> + +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">annotations</span> + +<span class="n">__all__</span> <span class="o">=</span> <span class="p">[</span><span class="s2">"DEFAULT_RESOLVER_MAP"</span><span class="p">,</span> <span class="s2">"allesedv"</span><span class="p">,</span> <span class="s2">"duckduckgo"</span><span class="p">,</span> <span class="s2">"google"</span><span class="p">,</span> <span class="s2">"yandex"</span><span class="p">]</span> + +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Callable</span> +<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">network</span> +<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">logger</span> + +<span class="n">DEFAULT_RESOLVER_MAP</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Callable</span><span class="p">]</span> +<span class="n">logger</span> <span class="o">=</span> <span class="n">logger</span><span class="o">.</span><span class="n">getChild</span><span class="p">(</span><span class="s1">'favicons.resolvers'</span><span class="p">)</span> + + +<span class="k">def</span> <span class="nf">_req_args</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">):</span> + <span class="c1"># add the request arguments from the searx.network</span> + <span class="n">d</span> <span class="o">=</span> <span class="p">{</span><span class="s2">"raise_for_httperror"</span><span class="p">:</span> <span class="kc">False</span><span class="p">}</span> + <span class="n">d</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">kwargs</span><span class="p">)</span> + <span class="k">return</span> <span class="n">d</span> + + +<div class="viewcode-block" id="allesedv"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.resolvers.allesedv">[docs]</a> +<span class="k">def</span> <span class="nf">allesedv</span><span class="p">(</span><span class="n">domain</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">timeout</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="nb">tuple</span><span class="p">[</span><span class="kc">None</span> <span class="o">|</span> <span class="nb">bytes</span><span class="p">,</span> <span class="kc">None</span> <span class="o">|</span> <span class="nb">str</span><span class="p">]:</span> +<span class="w"> </span><span class="sd">"""Favicon Resolver from allesedv.com / https://favicon.allesedv.com/"""</span> + <span class="n">data</span><span class="p">,</span> <span class="n">mime</span> <span class="o">=</span> <span class="p">(</span><span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> + <span class="n">url</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">"https://f1.allesedv.com/32/</span><span class="si">{</span><span class="n">domain</span><span class="si">}</span><span class="s2">"</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"fetch favicon from: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">url</span><span class="p">)</span> + + <span class="c1"># will just return a 200 regardless of the favicon existing or not</span> + <span class="c1"># sometimes will be correct size, sometimes not</span> + <span class="n">response</span> <span class="o">=</span> <span class="n">network</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="o">**</span><span class="n">_req_args</span><span class="p">(</span><span class="n">timeout</span><span class="o">=</span><span class="n">timeout</span><span class="p">))</span> + <span class="k">if</span> <span class="n">response</span> <span class="ow">and</span> <span class="n">response</span><span class="o">.</span><span class="n">status_code</span> <span class="o">==</span> <span class="mi">200</span><span class="p">:</span> + <span class="n">mime</span> <span class="o">=</span> <span class="n">response</span><span class="o">.</span><span class="n">headers</span><span class="p">[</span><span class="s1">'Content-Type'</span><span class="p">]</span> + <span class="k">if</span> <span class="n">mime</span> <span class="o">!=</span> <span class="s1">'image/gif'</span><span class="p">:</span> + <span class="n">data</span> <span class="o">=</span> <span class="n">response</span><span class="o">.</span><span class="n">content</span> + <span class="k">return</span> <span class="n">data</span><span class="p">,</span> <span class="n">mime</span></div> + + + +<div class="viewcode-block" id="duckduckgo"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.resolvers.duckduckgo">[docs]</a> +<span class="k">def</span> <span class="nf">duckduckgo</span><span class="p">(</span><span class="n">domain</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">timeout</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="nb">tuple</span><span class="p">[</span><span class="kc">None</span> <span class="o">|</span> <span class="nb">bytes</span><span class="p">,</span> <span class="kc">None</span> <span class="o">|</span> <span class="nb">str</span><span class="p">]:</span> +<span class="w"> </span><span class="sd">"""Favicon Resolver from duckduckgo.com / https://blog.jim-nielsen.com/2021/displaying-favicons-for-any-domain/"""</span> + <span class="n">data</span><span class="p">,</span> <span class="n">mime</span> <span class="o">=</span> <span class="p">(</span><span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> + <span class="n">url</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">"https://icons.duckduckgo.com/ip2/</span><span class="si">{</span><span class="n">domain</span><span class="si">}</span><span class="s2">.ico"</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"fetch favicon from: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">url</span><span class="p">)</span> + + <span class="c1"># will return a 404 if the favicon does not exist and a 200 if it does,</span> + <span class="n">response</span> <span class="o">=</span> <span class="n">network</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="o">**</span><span class="n">_req_args</span><span class="p">(</span><span class="n">timeout</span><span class="o">=</span><span class="n">timeout</span><span class="p">))</span> + <span class="k">if</span> <span class="n">response</span> <span class="ow">and</span> <span class="n">response</span><span class="o">.</span><span class="n">status_code</span> <span class="o">==</span> <span class="mi">200</span><span class="p">:</span> + <span class="c1"># api will respond with a 32x32 png image</span> + <span class="n">mime</span> <span class="o">=</span> <span class="n">response</span><span class="o">.</span><span class="n">headers</span><span class="p">[</span><span class="s1">'Content-Type'</span><span class="p">]</span> + <span class="n">data</span> <span class="o">=</span> <span class="n">response</span><span class="o">.</span><span class="n">content</span> + <span class="k">return</span> <span class="n">data</span><span class="p">,</span> <span class="n">mime</span></div> + + + +<div class="viewcode-block" id="google"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.resolvers.google">[docs]</a> +<span class="k">def</span> <span class="nf">google</span><span class="p">(</span><span class="n">domain</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">timeout</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="nb">tuple</span><span class="p">[</span><span class="kc">None</span> <span class="o">|</span> <span class="nb">bytes</span><span class="p">,</span> <span class="kc">None</span> <span class="o">|</span> <span class="nb">str</span><span class="p">]:</span> +<span class="w"> </span><span class="sd">"""Favicon Resolver from google.com"""</span> + <span class="n">data</span><span class="p">,</span> <span class="n">mime</span> <span class="o">=</span> <span class="p">(</span><span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> + + <span class="c1"># URL https://www.google.com/s2/favicons?sz=32&domain={domain}" will be</span> + <span class="c1"># redirected (HTTP 301 Moved Permanently) to t1.gstatic.com/faviconV2:</span> + <span class="n">url</span> <span class="o">=</span> <span class="p">(</span> + <span class="sa">f</span><span class="s2">"https://t1.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL"</span> + <span class="sa">f</span><span class="s2">"&url=https://</span><span class="si">{</span><span class="n">domain</span><span class="si">}</span><span class="s2">&size=32"</span> + <span class="p">)</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"fetch favicon from: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">url</span><span class="p">)</span> + + <span class="c1"># will return a 404 if the favicon does not exist and a 200 if it does,</span> + <span class="n">response</span> <span class="o">=</span> <span class="n">network</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="o">**</span><span class="n">_req_args</span><span class="p">(</span><span class="n">timeout</span><span class="o">=</span><span class="n">timeout</span><span class="p">))</span> + <span class="k">if</span> <span class="n">response</span> <span class="ow">and</span> <span class="n">response</span><span class="o">.</span><span class="n">status_code</span> <span class="o">==</span> <span class="mi">200</span><span class="p">:</span> + <span class="c1"># api will respond with a 32x32 png image</span> + <span class="n">mime</span> <span class="o">=</span> <span class="n">response</span><span class="o">.</span><span class="n">headers</span><span class="p">[</span><span class="s1">'Content-Type'</span><span class="p">]</span> + <span class="n">data</span> <span class="o">=</span> <span class="n">response</span><span class="o">.</span><span class="n">content</span> + <span class="k">return</span> <span class="n">data</span><span class="p">,</span> <span class="n">mime</span></div> + + + +<div class="viewcode-block" id="yandex"> +<a class="viewcode-back" href="../../../src/searx.favicons.html#searx.favicons.resolvers.yandex">[docs]</a> +<span class="k">def</span> <span class="nf">yandex</span><span class="p">(</span><span class="n">domain</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">timeout</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="nb">tuple</span><span class="p">[</span><span class="kc">None</span> <span class="o">|</span> <span class="nb">bytes</span><span class="p">,</span> <span class="kc">None</span> <span class="o">|</span> <span class="nb">str</span><span class="p">]:</span> +<span class="w"> </span><span class="sd">"""Favicon Resolver from yandex.com"""</span> + <span class="n">data</span><span class="p">,</span> <span class="n">mime</span> <span class="o">=</span> <span class="p">(</span><span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> + <span class="n">url</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">"https://favicon.yandex.net/favicon/</span><span class="si">{</span><span class="n">domain</span><span class="si">}</span><span class="s2">"</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"fetch favicon from: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">url</span><span class="p">)</span> + + <span class="c1"># api will respond with a 16x16 png image, if it doesn't exist, it will be a</span> + <span class="c1"># 1x1 png image (70 bytes)</span> + <span class="n">response</span> <span class="o">=</span> <span class="n">network</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="o">**</span><span class="n">_req_args</span><span class="p">(</span><span class="n">timeout</span><span class="o">=</span><span class="n">timeout</span><span class="p">))</span> + <span class="k">if</span> <span class="n">response</span> <span class="ow">and</span> <span class="n">response</span><span class="o">.</span><span class="n">status_code</span> <span class="o">==</span> <span class="mi">200</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">response</span><span class="o">.</span><span class="n">content</span><span class="p">)</span> <span class="o">></span> <span class="mi">70</span><span class="p">:</span> + <span class="n">mime</span> <span class="o">=</span> <span class="n">response</span><span class="o">.</span><span class="n">headers</span><span class="p">[</span><span class="s1">'Content-Type'</span><span class="p">]</span> + <span class="n">data</span> <span class="o">=</span> <span class="n">response</span><span class="o">.</span><span class="n">content</span> + <span class="k">return</span> <span class="n">data</span><span class="p">,</span> <span class="n">mime</span></div> + + + +<span class="n">DEFAULT_RESOLVER_MAP</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"allesedv"</span><span class="p">:</span> <span class="n">allesedv</span><span class="p">,</span> + <span class="s2">"duckduckgo"</span><span class="p">:</span> <span class="n">duckduckgo</span><span class="p">,</span> + <span class="s2">"google"</span><span class="p">:</span> <span class="n">google</span><span class="p">,</span> + <span class="s2">"yandex"</span><span class="p">:</span> <span class="n">yandex</span><span class="p">,</span> +<span class="p">}</span> +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + + + </ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/infopage.html b/_modules/searx/infopage.html new file mode 100644 index 000000000..34ee50c2d --- /dev/null +++ b/_modules/searx/infopage.html @@ -0,0 +1,316 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.infopage — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../_static/searxng.css?v=52e4ff28" /> + <script src="../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../genindex.html" /> + <link rel="search" title="Search" href="../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../index.html" accesskey="U">Module code</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.infopage</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.infopage</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""Render SearXNG instance documentation.</span> + +<span class="sd">Usage in a Flask app route:</span> + +<span class="sd">.. code:: python</span> + +<span class="sd"> from searx import infopage</span> + +<span class="sd"> _INFO_PAGES = infopage.InfoPageSet(infopage.MistletoePage)</span> + +<span class="sd"> @app.route('/info/<pagename>', methods=['GET'])</span> +<span class="sd"> def info(pagename):</span> + +<span class="sd"> locale = request.preferences.get_value('locale')</span> +<span class="sd"> page = _INFO_PAGES.get_page(pagename, locale)</span> + +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">annotations</span> + +<span class="n">__all__</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'InfoPage'</span><span class="p">,</span> <span class="s1">'InfoPageSet'</span><span class="p">]</span> + +<span class="kn">import</span> <span class="nn">os</span> +<span class="kn">import</span> <span class="nn">os.path</span> +<span class="kn">import</span> <span class="nn">logging</span> +<span class="kn">import</span> <span class="nn">typing</span> + +<span class="kn">import</span> <span class="nn">urllib.parse</span> +<span class="kn">from</span> <span class="nn">functools</span> <span class="kn">import</span> <span class="n">cached_property</span> +<span class="kn">import</span> <span class="nn">jinja2</span> +<span class="kn">from</span> <span class="nn">flask.helpers</span> <span class="kn">import</span> <span class="n">url_for</span> +<span class="kn">from</span> <span class="nn">markdown_it</span> <span class="kn">import</span> <span class="n">MarkdownIt</span> + +<span class="kn">from</span> <span class="nn">..</span> <span class="kn">import</span> <span class="n">get_setting</span> +<span class="kn">from</span> <span class="nn">..version</span> <span class="kn">import</span> <span class="n">GIT_URL</span> +<span class="kn">from</span> <span class="nn">..locales</span> <span class="kn">import</span> <span class="n">LOCALE_NAMES</span> + + +<span class="n">logger</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">(</span><span class="s1">'searx.infopage'</span><span class="p">)</span> +<span class="n">_INFO_FOLDER</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">dirname</span><span class="p">(</span><span class="vm">__file__</span><span class="p">))</span> +<span class="n">INFO_PAGES</span><span class="p">:</span> <span class="s1">'InfoPageSet'</span> + + +<span class="k">def</span> <span class="fm">__getattr__</span><span class="p">(</span><span class="n">name</span><span class="p">):</span> + <span class="k">if</span> <span class="n">name</span> <span class="o">==</span> <span class="s1">'INFO_PAGES'</span><span class="p">:</span> + <span class="k">global</span> <span class="n">INFO_PAGES</span> <span class="c1"># pylint: disable=global-statement</span> + <span class="n">INFO_PAGES</span> <span class="o">=</span> <span class="n">InfoPageSet</span><span class="p">()</span> + <span class="k">return</span> <span class="n">INFO_PAGES</span> + + <span class="k">raise</span> <span class="ne">AttributeError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"module </span><span class="si">{</span><span class="vm">__name__</span><span class="si">!r}</span><span class="s2"> has no attribute </span><span class="si">{</span><span class="n">name</span><span class="si">!r}</span><span class="s2">"</span><span class="p">)</span> + + +<div class="viewcode-block" id="InfoPage"> +<a class="viewcode-back" href="../../src/searx.infopage.html#searx.infopage.InfoPage">[docs]</a> +<span class="k">class</span> <span class="nc">InfoPage</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""A page of the :py:obj:`online documentation <InfoPageSet>`."""</span> + + <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">fname</span><span class="p">):</span> + <span class="bp">self</span><span class="o">.</span><span class="n">fname</span> <span class="o">=</span> <span class="n">fname</span> + + <span class="nd">@cached_property</span> + <span class="k">def</span> <span class="nf">raw_content</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Raw content of the page (without any jinja rendering)"""</span> + <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">fname</span><span class="p">,</span> <span class="s1">'r'</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">'utf-8'</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span> + <span class="k">return</span> <span class="n">f</span><span class="o">.</span><span class="n">read</span><span class="p">()</span> + + <span class="nd">@cached_property</span> + <span class="k">def</span> <span class="nf">content</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Content of the page (rendered in a Jinja context)"""</span> + <span class="n">ctx</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_ctx</span><span class="p">()</span> + <span class="n">template</span> <span class="o">=</span> <span class="n">jinja2</span><span class="o">.</span><span class="n">Environment</span><span class="p">()</span><span class="o">.</span><span class="n">from_string</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">raw_content</span><span class="p">)</span> + <span class="k">return</span> <span class="n">template</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="o">**</span><span class="n">ctx</span><span class="p">)</span> + + <span class="nd">@cached_property</span> + <span class="k">def</span> <span class="nf">title</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Title of the content (without any markup)"""</span> + <span class="n">t</span> <span class="o">=</span> <span class="s2">""</span> + <span class="k">for</span> <span class="n">l</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">raw_content</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'</span><span class="se">\n</span><span class="s1">'</span><span class="p">):</span> + <span class="k">if</span> <span class="n">l</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'# '</span><span class="p">):</span> + <span class="n">t</span> <span class="o">=</span> <span class="n">l</span><span class="o">.</span><span class="n">strip</span><span class="p">(</span><span class="s1">'# '</span><span class="p">)</span> + <span class="k">return</span> <span class="n">t</span> + + <span class="nd">@cached_property</span> + <span class="k">def</span> <span class="nf">html</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Render Markdown (CommonMark_) to HTML by using markdown-it-py_.</span> + +<span class="sd"> .. _CommonMark: https://commonmark.org/</span> +<span class="sd"> .. _markdown-it-py: https://github.com/executablebooks/markdown-it-py</span> + +<span class="sd"> """</span> + <span class="k">return</span> <span class="p">(</span> + <span class="n">MarkdownIt</span><span class="p">(</span><span class="s2">"commonmark"</span><span class="p">,</span> <span class="p">{</span><span class="s2">"typographer"</span><span class="p">:</span> <span class="kc">True</span><span class="p">})</span><span class="o">.</span><span class="n">enable</span><span class="p">([</span><span class="s2">"replacements"</span><span class="p">,</span> <span class="s2">"smartquotes"</span><span class="p">])</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">content</span><span class="p">)</span> + <span class="p">)</span> + +<div class="viewcode-block" id="InfoPage.get_ctx"> +<a class="viewcode-back" href="../../src/searx.infopage.html#searx.infopage.InfoPage.get_ctx">[docs]</a> + <span class="k">def</span> <span class="nf">get_ctx</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Jinja context to render :py:obj:`InfoPage.content`"""</span> + + <span class="k">def</span> <span class="nf">_md_link</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">url</span><span class="p">):</span> + <span class="n">url</span> <span class="o">=</span> <span class="n">url_for</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">_external</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> + <span class="k">return</span> <span class="s2">"[</span><span class="si">%s</span><span class="s2">](</span><span class="si">%s</span><span class="s2">)"</span> <span class="o">%</span> <span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">url</span><span class="p">)</span> + + <span class="k">def</span> <span class="nf">_md_search</span><span class="p">(</span><span class="n">query</span><span class="p">):</span> + <span class="n">url</span> <span class="o">=</span> <span class="s1">'</span><span class="si">%s</span><span class="s1">?q=</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span><span class="n">url_for</span><span class="p">(</span><span class="s1">'search'</span><span class="p">,</span> <span class="n">_external</span><span class="o">=</span><span class="kc">True</span><span class="p">),</span> <span class="n">urllib</span><span class="o">.</span><span class="n">parse</span><span class="o">.</span><span class="n">quote</span><span class="p">(</span><span class="n">query</span><span class="p">))</span> + <span class="k">return</span> <span class="s1">'[</span><span class="si">%s</span><span class="s1">](</span><span class="si">%s</span><span class="s1">)'</span> <span class="o">%</span> <span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">url</span><span class="p">)</span> + + <span class="n">ctx</span> <span class="o">=</span> <span class="p">{}</span> + <span class="n">ctx</span><span class="p">[</span><span class="s1">'GIT_URL'</span><span class="p">]</span> <span class="o">=</span> <span class="n">GIT_URL</span> + <span class="n">ctx</span><span class="p">[</span><span class="s1">'get_setting'</span><span class="p">]</span> <span class="o">=</span> <span class="n">get_setting</span> + <span class="n">ctx</span><span class="p">[</span><span class="s1">'link'</span><span class="p">]</span> <span class="o">=</span> <span class="n">_md_link</span> + <span class="n">ctx</span><span class="p">[</span><span class="s1">'search'</span><span class="p">]</span> <span class="o">=</span> <span class="n">_md_search</span> + + <span class="k">return</span> <span class="n">ctx</span></div> + + + <span class="k">def</span> <span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="sa">f</span><span class="s1">'<</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1"> fname=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">fname</span><span class="si">!r}</span><span class="s1">>'</span></div> + + + +<div class="viewcode-block" id="InfoPageSet"> +<a class="viewcode-back" href="../../src/searx.infopage.html#searx.infopage.InfoPageSet">[docs]</a> +<span class="k">class</span> <span class="nc">InfoPageSet</span><span class="p">:</span> <span class="c1"># pylint: disable=too-few-public-methods</span> +<span class="w"> </span><span class="sd">"""Cached rendering of the online documentation a SearXNG instance has.</span> + +<span class="sd"> :param page_class: render online documentation by :py:obj:`InfoPage` parser.</span> +<span class="sd"> :type page_class: :py:obj:`InfoPage`</span> + +<span class="sd"> :param info_folder: information directory</span> +<span class="sd"> :type info_folder: str</span> +<span class="sd"> """</span> + + <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> + <span class="bp">self</span><span class="p">,</span> <span class="n">page_class</span><span class="p">:</span> <span class="n">typing</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">typing</span><span class="o">.</span><span class="n">Type</span><span class="p">[</span><span class="n">InfoPage</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">info_folder</span><span class="p">:</span> <span class="n">typing</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span> + <span class="p">):</span> + <span class="bp">self</span><span class="o">.</span><span class="n">page_class</span> <span class="o">=</span> <span class="n">page_class</span> <span class="ow">or</span> <span class="n">InfoPage</span> + <span class="bp">self</span><span class="o">.</span><span class="n">folder</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">info_folder</span> <span class="ow">or</span> <span class="n">_INFO_FOLDER</span> +<span class="w"> </span><span class="sd">"""location of the Markdown files"""</span> + + <span class="bp">self</span><span class="o">.</span><span class="n">CACHE</span><span class="p">:</span> <span class="n">typing</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="nb">tuple</span><span class="p">,</span> <span class="n">typing</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">InfoPage</span><span class="p">]]</span> <span class="o">=</span> <span class="p">{}</span> + + <span class="bp">self</span><span class="o">.</span><span class="n">locale_default</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">'en'</span> +<span class="w"> </span><span class="sd">"""default language"""</span> + + <span class="bp">self</span><span class="o">.</span><span class="n">locales</span><span class="p">:</span> <span class="n">typing</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span> + <span class="n">locale</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'_'</span><span class="p">,</span> <span class="s1">'-'</span><span class="p">)</span> <span class="k">for</span> <span class="n">locale</span> <span class="ow">in</span> <span class="n">os</span><span class="o">.</span><span class="n">listdir</span><span class="p">(</span><span class="n">_INFO_FOLDER</span><span class="p">)</span> <span class="k">if</span> <span class="n">locale</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'_'</span><span class="p">,</span> <span class="s1">'-'</span><span class="p">)</span> <span class="ow">in</span> <span class="n">LOCALE_NAMES</span> + <span class="p">]</span> +<span class="w"> </span><span class="sd">"""list of supported languages (aka locales)"""</span> + + <span class="bp">self</span><span class="o">.</span><span class="n">toc</span><span class="p">:</span> <span class="n">typing</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span> + <span class="s1">'search-syntax'</span><span class="p">,</span> + <span class="s1">'about'</span><span class="p">,</span> + <span class="s1">'donate'</span><span class="p">,</span> + <span class="p">]</span> +<span class="w"> </span><span class="sd">"""list of articles in the online documentation"""</span> + +<div class="viewcode-block" id="InfoPageSet.get_page"> +<a class="viewcode-back" href="../../src/searx.infopage.html#searx.infopage.InfoPageSet.get_page">[docs]</a> + <span class="k">def</span> <span class="nf">get_page</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pagename</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">locale</span><span class="p">:</span> <span class="n">typing</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Return ``pagename`` instance of :py:obj:`InfoPage`</span> + +<span class="sd"> :param pagename: name of the page, a value from :py:obj:`InfoPageSet.toc`</span> +<span class="sd"> :type pagename: str</span> + +<span class="sd"> :param locale: language of the page, e.g. ``en``, ``zh_Hans_CN``</span> +<span class="sd"> (default: :py:obj:`InfoPageSet.i18n_origin`)</span> +<span class="sd"> :type locale: str</span> + +<span class="sd"> """</span> + <span class="n">locale</span> <span class="o">=</span> <span class="n">locale</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">locale_default</span> + + <span class="k">if</span> <span class="n">pagename</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">toc</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">None</span> + <span class="k">if</span> <span class="n">locale</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">locales</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">None</span> + + <span class="n">cache_key</span> <span class="o">=</span> <span class="p">(</span><span class="n">pagename</span><span class="p">,</span> <span class="n">locale</span><span class="p">)</span> + + <span class="k">if</span> <span class="n">cache_key</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">CACHE</span><span class="p">:</span> + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">CACHE</span><span class="p">[</span><span class="n">cache_key</span><span class="p">]</span> + + <span class="c1"># not yet instantiated</span> + + <span class="n">fname</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">folder</span><span class="p">,</span> <span class="n">locale</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'-'</span><span class="p">,</span> <span class="s1">'_'</span><span class="p">),</span> <span class="n">pagename</span><span class="p">)</span> <span class="o">+</span> <span class="s1">'.md'</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">fname</span><span class="p">):</span> + <span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'file </span><span class="si">%s</span><span class="s1"> does not exists'</span><span class="p">,</span> <span class="n">fname</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">CACHE</span><span class="p">[</span><span class="n">cache_key</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span> + <span class="k">return</span> <span class="kc">None</span> + + <span class="n">page</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">page_class</span><span class="p">(</span><span class="n">fname</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">CACHE</span><span class="p">[</span><span class="n">cache_key</span><span class="p">]</span> <span class="o">=</span> <span class="n">page</span> + <span class="k">return</span> <span class="n">page</span></div> + + +<div class="viewcode-block" id="InfoPageSet.iter_pages"> +<a class="viewcode-back" href="../../src/searx.infopage.html#searx.infopage.InfoPageSet.iter_pages">[docs]</a> + <span class="k">def</span> <span class="nf">iter_pages</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">locale</span><span class="p">:</span> <span class="n">typing</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">fallback_to_default</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Iterate over all pages of the TOC"""</span> + <span class="n">locale</span> <span class="o">=</span> <span class="n">locale</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">locale_default</span> + <span class="k">for</span> <span class="n">page_name</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">toc</span><span class="p">:</span> + <span class="n">page_locale</span> <span class="o">=</span> <span class="n">locale</span> + <span class="n">page</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_page</span><span class="p">(</span><span class="n">page_name</span><span class="p">,</span> <span class="n">locale</span><span class="p">)</span> + <span class="k">if</span> <span class="n">fallback_to_default</span> <span class="ow">and</span> <span class="n">page</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">page_locale</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">locale_default</span> + <span class="n">page</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_page</span><span class="p">(</span><span class="n">page_name</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">locale_default</span><span class="p">)</span> + <span class="k">if</span> <span class="n">page</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="c1"># page is None if the page was deleted by the administrator</span> + <span class="k">yield</span> <span class="n">page_name</span><span class="p">,</span> <span class="n">page_locale</span><span class="p">,</span> <span class="n">page</span></div> +</div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../index.html"> + <img class="logo" src="../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../index.html">Overview</a> + <ul> + <li><a href="../index.html">Module code</a> + + + </ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/limiter.html b/_modules/searx/limiter.html new file mode 100644 index 000000000..6497e0b86 --- /dev/null +++ b/_modules/searx/limiter.html @@ -0,0 +1,360 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.limiter — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../_static/searxng.css?v=52e4ff28" /> + <script src="../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../genindex.html" /> + <link rel="search" title="Search" href="../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../index.html" accesskey="U">Module code</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.limiter</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.limiter</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""Bot protection / IP rate limitation. The intention of rate limitation is to</span> +<span class="sd">limit suspicious requests from an IP. The motivation behind this is the fact</span> +<span class="sd">that SearXNG passes through requests from bots and is thus classified as a bot</span> +<span class="sd">itself. As a result, the SearXNG engine then receives a CAPTCHA or is blocked</span> +<span class="sd">by the search engine (the origin) in some other way.</span> + +<span class="sd">To avoid blocking, the requests from bots to SearXNG must also be blocked, this</span> +<span class="sd">is the task of the limiter. To perform this task, the limiter uses the methods</span> +<span class="sd">from the :ref:`botdetection`:</span> + +<span class="sd">- Analysis of the HTTP header in the request / :ref:`botdetection probe headers`</span> +<span class="sd"> can be easily bypassed.</span> + +<span class="sd">- Block and pass lists in which IPs are listed / :ref:`botdetection ip_lists`</span> +<span class="sd"> are hard to maintain, since the IPs of bots are not all known and change over</span> +<span class="sd"> the time.</span> + +<span class="sd">- Detection & dynamically :ref:`botdetection rate limit` of bots based on the</span> +<span class="sd"> behavior of the requests. For dynamically changeable IP lists a Redis</span> +<span class="sd"> database is needed.</span> + +<span class="sd">The prerequisite for IP based methods is the correct determination of the IP of</span> +<span class="sd">the client. The IP of the client is determined via the X-Forwarded-For_ HTTP</span> +<span class="sd">header.</span> + +<span class="sd">.. attention::</span> + +<span class="sd"> A correct setup of the HTTP request headers ``X-Forwarded-For`` and</span> +<span class="sd"> ``X-Real-IP`` is essential to be able to assign a request to an IP correctly:</span> + +<span class="sd"> - `NGINX RequestHeader`_</span> +<span class="sd"> - `Apache RequestHeader`_</span> + +<span class="sd">.. _X-Forwarded-For:</span> +<span class="sd"> https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For</span> +<span class="sd">.. _NGINX RequestHeader:</span> +<span class="sd"> https://docs.searxng.org/admin/installation-nginx.html#nginx-s-searxng-site</span> +<span class="sd">.. _Apache RequestHeader:</span> +<span class="sd"> https://docs.searxng.org/admin/installation-apache.html#apache-s-searxng-site</span> + +<span class="sd">Enable Limiter</span> +<span class="sd">==============</span> + +<span class="sd">To enable the limiter activate:</span> + +<span class="sd">.. code:: yaml</span> + +<span class="sd"> server:</span> +<span class="sd"> ...</span> +<span class="sd"> limiter: true # rate limit the number of request on the instance, block some bots</span> + +<span class="sd">and set the redis-url connection. Check the value, it depends on your redis DB</span> +<span class="sd">(see :ref:`settings redis`), by example:</span> + +<span class="sd">.. code:: yaml</span> + +<span class="sd"> redis:</span> +<span class="sd"> url: unix:///usr/local/searxng-redis/run/redis.sock?db=0</span> + + +<span class="sd">Configure Limiter</span> +<span class="sd">=================</span> + +<span class="sd">The methods of :ref:`botdetection` the limiter uses are configured in a local</span> +<span class="sd">file ``/etc/searxng/limiter.toml``. The defaults are shown in limiter.toml_ /</span> +<span class="sd">Don't copy all values to your local configuration, just enable what you need by</span> +<span class="sd">overwriting the defaults. For instance to activate the ``link_token`` method in</span> +<span class="sd">the :ref:`botdetection.ip_limit` you only need to set this option to ``true``:</span> + +<span class="sd">.. code:: toml</span> + +<span class="sd"> [botdetection.ip_limit]</span> +<span class="sd"> link_token = true</span> + +<span class="sd">.. _limiter.toml:</span> + +<span class="sd">``limiter.toml``</span> +<span class="sd">================</span> + +<span class="sd">In this file the limiter finds the configuration of the :ref:`botdetection`:</span> + +<span class="sd">- :ref:`botdetection ip_lists`</span> +<span class="sd">- :ref:`botdetection rate limit`</span> +<span class="sd">- :ref:`botdetection probe headers`</span> + +<span class="sd">.. kernel-include:: $SOURCEDIR/limiter.toml</span> +<span class="sd"> :code: toml</span> + +<span class="sd">Implementation</span> +<span class="sd">==============</span> + +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">annotations</span> +<span class="kn">import</span> <span class="nn">sys</span> + +<span class="kn">from</span> <span class="nn">pathlib</span> <span class="kn">import</span> <span class="n">Path</span> +<span class="kn">from</span> <span class="nn">ipaddress</span> <span class="kn">import</span> <span class="n">ip_address</span> +<span class="kn">import</span> <span class="nn">flask</span> +<span class="kn">import</span> <span class="nn">werkzeug</span> + +<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="p">(</span> + <span class="n">logger</span><span class="p">,</span> + <span class="n">redisdb</span><span class="p">,</span> +<span class="p">)</span> +<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">botdetection</span> +<span class="kn">from</span> <span class="nn">searx.botdetection</span> <span class="kn">import</span> <span class="p">(</span> + <span class="n">config</span><span class="p">,</span> + <span class="n">http_accept</span><span class="p">,</span> + <span class="n">http_accept_encoding</span><span class="p">,</span> + <span class="n">http_accept_language</span><span class="p">,</span> + <span class="n">http_user_agent</span><span class="p">,</span> + <span class="n">ip_limit</span><span class="p">,</span> + <span class="n">ip_lists</span><span class="p">,</span> + <span class="n">get_network</span><span class="p">,</span> + <span class="n">get_real_ip</span><span class="p">,</span> + <span class="n">dump_request</span><span class="p">,</span> +<span class="p">)</span> + +<span class="c1"># the configuration are limiter.toml and "limiter" in settings.yml so, for</span> +<span class="c1"># coherency, the logger is "limiter"</span> +<span class="n">logger</span> <span class="o">=</span> <span class="n">logger</span><span class="o">.</span><span class="n">getChild</span><span class="p">(</span><span class="s1">'limiter'</span><span class="p">)</span> + +<span class="n">CFG</span><span class="p">:</span> <span class="n">config</span><span class="o">.</span><span class="n">Config</span> <span class="o">=</span> <span class="kc">None</span> <span class="c1"># type: ignore</span> +<span class="n">_INSTALLED</span> <span class="o">=</span> <span class="kc">False</span> + +<span class="n">LIMITER_CFG_SCHEMA</span> <span class="o">=</span> <span class="n">Path</span><span class="p">(</span><span class="vm">__file__</span><span class="p">)</span><span class="o">.</span><span class="n">parent</span> <span class="o">/</span> <span class="s2">"limiter.toml"</span> +<span class="sd">"""Base configuration (schema) of the botdetection."""</span> + +<span class="n">CFG_DEPRECATED</span> <span class="o">=</span> <span class="p">{</span> + <span class="c1"># "dummy.old.foo": "config 'dummy.old.foo' exists only for tests. Don't use it in your real project config."</span> +<span class="p">}</span> + + +<span class="k">def</span> <span class="nf">get_cfg</span><span class="p">()</span> <span class="o">-></span> <span class="n">config</span><span class="o">.</span><span class="n">Config</span><span class="p">:</span> + <span class="k">global</span> <span class="n">CFG</span> <span class="c1"># pylint: disable=global-statement</span> + + <span class="k">if</span> <span class="n">CFG</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="kn">from</span> <span class="nn">.</span> <span class="kn">import</span> <span class="n">settings_loader</span> <span class="c1"># pylint: disable=import-outside-toplevel</span> + + <span class="n">cfg_file</span> <span class="o">=</span> <span class="p">(</span><span class="n">settings_loader</span><span class="o">.</span><span class="n">get_user_cfg_folder</span><span class="p">()</span> <span class="ow">or</span> <span class="n">Path</span><span class="p">(</span><span class="s2">"/etc/searxng"</span><span class="p">))</span> <span class="o">/</span> <span class="s2">"limiter.toml"</span> + <span class="n">CFG</span> <span class="o">=</span> <span class="n">config</span><span class="o">.</span><span class="n">Config</span><span class="o">.</span><span class="n">from_toml</span><span class="p">(</span><span class="n">LIMITER_CFG_SCHEMA</span><span class="p">,</span> <span class="n">cfg_file</span><span class="p">,</span> <span class="n">CFG_DEPRECATED</span><span class="p">)</span> + <span class="k">return</span> <span class="n">CFG</span> + + +<span class="k">def</span> <span class="nf">filter_request</span><span class="p">(</span><span class="n">request</span><span class="p">:</span> <span class="n">flask</span><span class="o">.</span><span class="n">Request</span><span class="p">)</span> <span class="o">-></span> <span class="n">werkzeug</span><span class="o">.</span><span class="n">Response</span> <span class="o">|</span> <span class="kc">None</span><span class="p">:</span> + <span class="c1"># pylint: disable=too-many-return-statements</span> + + <span class="n">cfg</span> <span class="o">=</span> <span class="n">get_cfg</span><span class="p">()</span> + <span class="n">real_ip</span> <span class="o">=</span> <span class="n">ip_address</span><span class="p">(</span><span class="n">get_real_ip</span><span class="p">(</span><span class="n">request</span><span class="p">))</span> + <span class="n">network</span> <span class="o">=</span> <span class="n">get_network</span><span class="p">(</span><span class="n">real_ip</span><span class="p">,</span> <span class="n">cfg</span><span class="p">)</span> + + <span class="k">if</span> <span class="n">request</span><span class="o">.</span><span class="n">path</span> <span class="o">==</span> <span class="s1">'/healthz'</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">None</span> + + <span class="c1"># link-local</span> + + <span class="k">if</span> <span class="n">network</span><span class="o">.</span><span class="n">is_link_local</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">None</span> + + <span class="c1"># block- & pass- lists</span> + <span class="c1">#</span> + <span class="c1"># 1. The IP of the request is first checked against the pass-list; if the IP</span> + <span class="c1"># matches an entry in the list, the request is not blocked.</span> + <span class="c1"># 2. If no matching entry is found in the pass-list, then a check is made against</span> + <span class="c1"># the block list; if the IP matches an entry in the list, the request is</span> + <span class="c1"># blocked.</span> + <span class="c1"># 3. If the IP is not in either list, the request is not blocked.</span> + + <span class="n">match</span><span class="p">,</span> <span class="n">msg</span> <span class="o">=</span> <span class="n">ip_lists</span><span class="o">.</span><span class="n">pass_ip</span><span class="p">(</span><span class="n">real_ip</span><span class="p">,</span> <span class="n">cfg</span><span class="p">)</span> + <span class="k">if</span> <span class="n">match</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">"PASS </span><span class="si">%s</span><span class="s2">: matched PASSLIST - </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">network</span><span class="o">.</span><span class="n">compressed</span><span class="p">,</span> <span class="n">msg</span><span class="p">)</span> + <span class="k">return</span> <span class="kc">None</span> + + <span class="n">match</span><span class="p">,</span> <span class="n">msg</span> <span class="o">=</span> <span class="n">ip_lists</span><span class="o">.</span><span class="n">block_ip</span><span class="p">(</span><span class="n">real_ip</span><span class="p">,</span> <span class="n">cfg</span><span class="p">)</span> + <span class="k">if</span> <span class="n">match</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s2">"BLOCK </span><span class="si">%s</span><span class="s2">: matched BLOCKLIST - </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">network</span><span class="o">.</span><span class="n">compressed</span><span class="p">,</span> <span class="n">msg</span><span class="p">)</span> + <span class="k">return</span> <span class="n">flask</span><span class="o">.</span><span class="n">make_response</span><span class="p">((</span><span class="s1">'IP is on BLOCKLIST - </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="n">msg</span><span class="p">,</span> <span class="mi">429</span><span class="p">))</span> + + <span class="c1"># methods applied on /</span> + + <span class="k">for</span> <span class="n">func</span> <span class="ow">in</span> <span class="p">[</span> + <span class="n">http_user_agent</span><span class="p">,</span> + <span class="p">]:</span> + <span class="n">val</span> <span class="o">=</span> <span class="n">func</span><span class="o">.</span><span class="n">filter_request</span><span class="p">(</span><span class="n">network</span><span class="p">,</span> <span class="n">request</span><span class="p">,</span> <span class="n">cfg</span><span class="p">)</span> + <span class="k">if</span> <span class="n">val</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">return</span> <span class="n">val</span> + + <span class="c1"># methods applied on /search</span> + + <span class="k">if</span> <span class="n">request</span><span class="o">.</span><span class="n">path</span> <span class="o">==</span> <span class="s1">'/search'</span><span class="p">:</span> + + <span class="k">for</span> <span class="n">func</span> <span class="ow">in</span> <span class="p">[</span> + <span class="n">http_accept</span><span class="p">,</span> + <span class="n">http_accept_encoding</span><span class="p">,</span> + <span class="n">http_accept_language</span><span class="p">,</span> + <span class="n">http_user_agent</span><span class="p">,</span> + <span class="n">ip_limit</span><span class="p">,</span> + <span class="p">]:</span> + <span class="n">val</span> <span class="o">=</span> <span class="n">func</span><span class="o">.</span><span class="n">filter_request</span><span class="p">(</span><span class="n">network</span><span class="p">,</span> <span class="n">request</span><span class="p">,</span> <span class="n">cfg</span><span class="p">)</span> + <span class="k">if</span> <span class="n">val</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">return</span> <span class="n">val</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="sa">f</span><span class="s2">"OK </span><span class="si">{</span><span class="n">network</span><span class="si">}</span><span class="s2">: %s"</span><span class="p">,</span> <span class="n">dump_request</span><span class="p">(</span><span class="n">flask</span><span class="o">.</span><span class="n">request</span><span class="p">))</span> + <span class="k">return</span> <span class="kc">None</span> + + +<div class="viewcode-block" id="pre_request"> +<a class="viewcode-back" href="../../admin/searx.limiter.html#searx.limiter.pre_request">[docs]</a> +<span class="k">def</span> <span class="nf">pre_request</span><span class="p">():</span> +<span class="w"> </span><span class="sd">"""See :py:obj:`flask.Flask.before_request`"""</span> + <span class="k">return</span> <span class="n">filter_request</span><span class="p">(</span><span class="n">flask</span><span class="o">.</span><span class="n">request</span><span class="p">)</span></div> + + + +<div class="viewcode-block" id="is_installed"> +<a class="viewcode-back" href="../../admin/searx.limiter.html#searx.limiter.is_installed">[docs]</a> +<span class="k">def</span> <span class="nf">is_installed</span><span class="p">():</span> +<span class="w"> </span><span class="sd">"""Returns ``True`` if limiter is active and a redis DB is available."""</span> + <span class="k">return</span> <span class="n">_INSTALLED</span></div> + + + +<div class="viewcode-block" id="initialize"> +<a class="viewcode-back" href="../../admin/searx.limiter.html#searx.limiter.initialize">[docs]</a> +<span class="k">def</span> <span class="nf">initialize</span><span class="p">(</span><span class="n">app</span><span class="p">:</span> <span class="n">flask</span><span class="o">.</span><span class="n">Flask</span><span class="p">,</span> <span class="n">settings</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Install the limiter"""</span> + <span class="k">global</span> <span class="n">_INSTALLED</span> <span class="c1"># pylint: disable=global-statement</span> + + <span class="c1"># even if the limiter is not activated, the botdetection must be activated</span> + <span class="c1"># (e.g. the self_info plugin uses the botdetection to get client IP)</span> + + <span class="n">cfg</span> <span class="o">=</span> <span class="n">get_cfg</span><span class="p">()</span> + <span class="n">redis_client</span> <span class="o">=</span> <span class="n">redisdb</span><span class="o">.</span><span class="n">client</span><span class="p">()</span> + <span class="n">botdetection</span><span class="o">.</span><span class="n">init</span><span class="p">(</span><span class="n">cfg</span><span class="p">,</span> <span class="n">redis_client</span><span class="p">)</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="p">(</span><span class="n">settings</span><span class="p">[</span><span class="s1">'server'</span><span class="p">][</span><span class="s1">'limiter'</span><span class="p">]</span> <span class="ow">or</span> <span class="n">settings</span><span class="p">[</span><span class="s1">'server'</span><span class="p">][</span><span class="s1">'public_instance'</span><span class="p">]):</span> + <span class="k">return</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">redis_client</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span> + <span class="s2">"The limiter requires Redis, please consult the documentation: "</span> + <span class="s2">"https://docs.searxng.org/admin/searx.limiter.html"</span> + <span class="p">)</span> + <span class="k">if</span> <span class="n">settings</span><span class="p">[</span><span class="s1">'server'</span><span class="p">][</span><span class="s1">'public_instance'</span><span class="p">]:</span> + <span class="n">sys</span><span class="o">.</span><span class="n">exit</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span> + <span class="k">return</span> + + <span class="n">_INSTALLED</span> <span class="o">=</span> <span class="kc">True</span> + + <span class="k">if</span> <span class="n">settings</span><span class="p">[</span><span class="s1">'server'</span><span class="p">][</span><span class="s1">'public_instance'</span><span class="p">]:</span> + <span class="c1"># overwrite limiter.toml setting</span> + <span class="n">cfg</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s1">'botdetection.ip_limit.link_token'</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span> + + <span class="n">app</span><span class="o">.</span><span class="n">before_request</span><span class="p">(</span><span class="n">pre_request</span><span class="p">)</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../index.html"> + <img class="logo" src="../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../index.html">Overview</a> + <ul> + <li><a href="../index.html">Module code</a> + + + </ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/locales.html b/_modules/searx/locales.html new file mode 100644 index 000000000..a4cacc7fc --- /dev/null +++ b/_modules/searx/locales.html @@ -0,0 +1,594 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.locales — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../_static/searxng.css?v=52e4ff28" /> + <script src="../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../genindex.html" /> + <link rel="search" title="Search" href="../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../index.html" accesskey="U">Module code</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.locales</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.locales</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""</span> +<span class="sd">SearXNG’s locale data</span> +<span class="sd">=====================</span> + +<span class="sd">The variables :py:obj:`RTL_LOCALES` and :py:obj:`LOCALE_NAMES` are loaded from</span> +<span class="sd">:origin:`searx/data/locales.json` / see :py:obj:`locales_initialize` and</span> +<span class="sd">:ref:`update_locales.py`.</span> + +<span class="sd">.. hint::</span> + +<span class="sd"> Whenever the value of :py:obj:`ADDITIONAL_TRANSLATIONS` or</span> +<span class="sd"> :py:obj:`LOCALE_BEST_MATCH` is modified, the</span> +<span class="sd"> :origin:`searx/data/locales.json` needs to be rebuild::</span> + +<span class="sd"> ./manage data.locales</span> + +<span class="sd">SearXNG's locale codes</span> +<span class="sd">======================</span> + +<span class="sd">.. automodule:: searx.sxng_locales</span> +<span class="sd"> :members:</span> + + +<span class="sd">SearXNG’s locale implementations</span> +<span class="sd">================================</span> +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">annotations</span> + +<span class="kn">from</span> <span class="nn">pathlib</span> <span class="kn">import</span> <span class="n">Path</span> + +<span class="kn">import</span> <span class="nn">babel</span> +<span class="kn">from</span> <span class="nn">babel.support</span> <span class="kn">import</span> <span class="n">Translations</span> +<span class="kn">import</span> <span class="nn">babel.languages</span> +<span class="kn">import</span> <span class="nn">babel.core</span> +<span class="kn">import</span> <span class="nn">flask_babel</span> +<span class="kn">import</span> <span class="nn">flask</span> +<span class="kn">from</span> <span class="nn">flask.ctx</span> <span class="kn">import</span> <span class="n">has_request_context</span> +<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="p">(</span> + <span class="n">data</span><span class="p">,</span> + <span class="n">logger</span><span class="p">,</span> + <span class="n">searx_dir</span><span class="p">,</span> +<span class="p">)</span> + +<span class="n">logger</span> <span class="o">=</span> <span class="n">logger</span><span class="o">.</span><span class="n">getChild</span><span class="p">(</span><span class="s1">'locales'</span><span class="p">)</span> + + +<span class="c1"># safe before monkey patching flask_babel.get_translations</span> +<span class="n">_flask_babel_get_translations</span> <span class="o">=</span> <span class="n">flask_babel</span><span class="o">.</span><span class="n">get_translations</span> + +<span class="n">LOCALE_NAMES</span> <span class="o">=</span> <span class="p">{}</span> +<span class="sd">"""Mapping of locales and their description. Locales e.g. 'fr' or 'pt-BR' (see</span> +<span class="sd">:py:obj:`locales_initialize`).</span> + +<span class="sd">:meta hide-value:</span> +<span class="sd">"""</span> + +<span class="n">RTL_LOCALES</span><span class="p">:</span> <span class="nb">set</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span> +<span class="sd">"""List of *Right-To-Left* locales e.g. 'he' or 'fa-IR' (see</span> +<span class="sd">:py:obj:`locales_initialize`)."""</span> + +<span class="n">ADDITIONAL_TRANSLATIONS</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"dv"</span><span class="p">:</span> <span class="s2">"ދިވެހި (Dhivehi)"</span><span class="p">,</span> + <span class="s2">"oc"</span><span class="p">:</span> <span class="s2">"Occitan"</span><span class="p">,</span> + <span class="s2">"szl"</span><span class="p">:</span> <span class="s2">"Ślōnski (Silesian)"</span><span class="p">,</span> + <span class="s2">"pap"</span><span class="p">:</span> <span class="s2">"Papiamento"</span><span class="p">,</span> +<span class="p">}</span> +<span class="sd">"""Additional languages SearXNG has translations for but not supported by</span> +<span class="sd">python-babel (see :py:obj:`locales_initialize`)."""</span> + +<span class="n">LOCALE_BEST_MATCH</span> <span class="o">=</span> <span class="p">{</span> + <span class="s2">"dv"</span><span class="p">:</span> <span class="s2">"si"</span><span class="p">,</span> + <span class="s2">"oc"</span><span class="p">:</span> <span class="s1">'fr-FR'</span><span class="p">,</span> + <span class="s2">"szl"</span><span class="p">:</span> <span class="s2">"pl"</span><span class="p">,</span> + <span class="s2">"nl-BE"</span><span class="p">:</span> <span class="s2">"nl"</span><span class="p">,</span> + <span class="s2">"zh-HK"</span><span class="p">:</span> <span class="s2">"zh-Hant-TW"</span><span class="p">,</span> + <span class="s2">"pap"</span><span class="p">:</span> <span class="s2">"pt-BR"</span><span class="p">,</span> +<span class="p">}</span> +<span class="sd">"""Map a locale we do not have a translations for to a locale we have a</span> +<span class="sd">translation for. By example: use Taiwan version of the translation for Hong</span> +<span class="sd">Kong."""</span> + + +<span class="k">def</span> <span class="nf">localeselector</span><span class="p">():</span> + <span class="n">locale</span> <span class="o">=</span> <span class="s1">'en'</span> + <span class="k">if</span> <span class="n">has_request_context</span><span class="p">():</span> + <span class="n">value</span> <span class="o">=</span> <span class="n">flask</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">preferences</span><span class="o">.</span><span class="n">get_value</span><span class="p">(</span><span class="s1">'locale'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">value</span><span class="p">:</span> + <span class="n">locale</span> <span class="o">=</span> <span class="n">value</span> + + <span class="c1"># first, set the language that is not supported by babel</span> + <span class="k">if</span> <span class="n">locale</span> <span class="ow">in</span> <span class="n">ADDITIONAL_TRANSLATIONS</span><span class="p">:</span> + <span class="n">flask</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">form</span><span class="p">[</span><span class="s1">'use-translation'</span><span class="p">]</span> <span class="o">=</span> <span class="n">locale</span> + + <span class="c1"># second, map locale to a value python-babel supports</span> + <span class="n">locale</span> <span class="o">=</span> <span class="n">LOCALE_BEST_MATCH</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">locale</span><span class="p">,</span> <span class="n">locale</span><span class="p">)</span> + + <span class="k">if</span> <span class="n">locale</span> <span class="o">==</span> <span class="s1">''</span><span class="p">:</span> + <span class="c1"># if there is an error loading the preferences</span> + <span class="c1"># the locale is going to be ''</span> + <span class="n">locale</span> <span class="o">=</span> <span class="s1">'en'</span> + + <span class="c1"># babel uses underscore instead of hyphen.</span> + <span class="n">locale</span> <span class="o">=</span> <span class="n">locale</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'-'</span><span class="p">,</span> <span class="s1">'_'</span><span class="p">)</span> + <span class="k">return</span> <span class="n">locale</span> + + +<div class="viewcode-block" id="get_translations"> +<a class="viewcode-back" href="../../src/searx.locales.html#searx.locales.get_translations">[docs]</a> +<span class="k">def</span> <span class="nf">get_translations</span><span class="p">():</span> +<span class="w"> </span><span class="sd">"""Monkey patch of :py:obj:`flask_babel.get_translations`"""</span> + <span class="k">if</span> <span class="n">has_request_context</span><span class="p">():</span> + <span class="n">use_translation</span> <span class="o">=</span> <span class="n">flask</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">form</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'use-translation'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">use_translation</span> <span class="ow">in</span> <span class="n">ADDITIONAL_TRANSLATIONS</span><span class="p">:</span> + <span class="n">babel_ext</span> <span class="o">=</span> <span class="n">flask_babel</span><span class="o">.</span><span class="n">current_app</span><span class="o">.</span><span class="n">extensions</span><span class="p">[</span><span class="s1">'babel'</span><span class="p">]</span> + <span class="k">return</span> <span class="n">Translations</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">babel_ext</span><span class="o">.</span><span class="n">translation_directories</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">use_translation</span><span class="p">)</span> + <span class="k">return</span> <span class="n">_flask_babel_get_translations</span><span class="p">()</span></div> + + + +<span class="n">_TR_LOCALES</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span> + + +<div class="viewcode-block" id="get_translation_locales"> +<a class="viewcode-back" href="../../src/searx.locales.html#searx.locales.get_translation_locales">[docs]</a> +<span class="k">def</span> <span class="nf">get_translation_locales</span><span class="p">()</span> <span class="o">-></span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span> +<span class="w"> </span><span class="sd">"""Returns the list of translation locales (*underscore*). The list is</span> +<span class="sd"> generated from the translation folders in :origin:`searx/translations`"""</span> + + <span class="k">global</span> <span class="n">_TR_LOCALES</span> <span class="c1"># pylint:disable=global-statement</span> + <span class="k">if</span> <span class="n">_TR_LOCALES</span><span class="p">:</span> + <span class="k">return</span> <span class="n">_TR_LOCALES</span> + + <span class="n">tr_locales</span> <span class="o">=</span> <span class="p">[]</span> + <span class="k">for</span> <span class="n">folder</span> <span class="ow">in</span> <span class="p">(</span><span class="n">Path</span><span class="p">(</span><span class="n">searx_dir</span><span class="p">)</span> <span class="o">/</span> <span class="s1">'translations'</span><span class="p">)</span><span class="o">.</span><span class="n">iterdir</span><span class="p">():</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">folder</span><span class="o">.</span><span class="n">is_dir</span><span class="p">():</span> + <span class="k">continue</span> + <span class="k">if</span> <span class="ow">not</span> <span class="p">(</span><span class="n">folder</span> <span class="o">/</span> <span class="s1">'LC_MESSAGES'</span><span class="p">)</span><span class="o">.</span><span class="n">is_dir</span><span class="p">():</span> + <span class="k">continue</span> + <span class="n">tr_locales</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">folder</span><span class="o">.</span><span class="n">name</span><span class="p">)</span> + <span class="n">_TR_LOCALES</span> <span class="o">=</span> <span class="nb">sorted</span><span class="p">(</span><span class="n">tr_locales</span><span class="p">)</span> + <span class="k">return</span> <span class="n">_TR_LOCALES</span></div> + + + +<div class="viewcode-block" id="locales_initialize"> +<a class="viewcode-back" href="../../src/searx.locales.html#searx.locales.locales_initialize">[docs]</a> +<span class="k">def</span> <span class="nf">locales_initialize</span><span class="p">():</span> +<span class="w"> </span><span class="sd">"""Initialize locales environment of the SearXNG session.</span> + +<span class="sd"> - monkey patch :py:obj:`flask_babel.get_translations` by :py:obj:`get_translations`</span> +<span class="sd"> - init global names :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`</span> +<span class="sd"> """</span> + <span class="n">flask_babel</span><span class="o">.</span><span class="n">get_translations</span> <span class="o">=</span> <span class="n">get_translations</span> + <span class="n">LOCALE_NAMES</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">data</span><span class="o">.</span><span class="n">LOCALES</span><span class="p">[</span><span class="s2">"LOCALE_NAMES"</span><span class="p">])</span> + <span class="n">RTL_LOCALES</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">data</span><span class="o">.</span><span class="n">LOCALES</span><span class="p">[</span><span class="s2">"RTL_LOCALES"</span><span class="p">])</span></div> + + + +<div class="viewcode-block" id="region_tag"> +<a class="viewcode-back" href="../../src/searx.locales.html#searx.locales.region_tag">[docs]</a> +<span class="k">def</span> <span class="nf">region_tag</span><span class="p">(</span><span class="n">locale</span><span class="p">:</span> <span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Returns SearXNG's region tag from the locale (e.g. zh-TW , en-US)."""</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">locale</span><span class="o">.</span><span class="n">territory</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'babel.Locale </span><span class="si">%s</span><span class="s1">: missed a territory'</span> <span class="o">%</span> <span class="n">locale</span><span class="p">)</span> + <span class="k">return</span> <span class="n">locale</span><span class="o">.</span><span class="n">language</span> <span class="o">+</span> <span class="s1">'-'</span> <span class="o">+</span> <span class="n">locale</span><span class="o">.</span><span class="n">territory</span></div> + + + +<div class="viewcode-block" id="language_tag"> +<a class="viewcode-back" href="../../src/searx.locales.html#searx.locales.language_tag">[docs]</a> +<span class="k">def</span> <span class="nf">language_tag</span><span class="p">(</span><span class="n">locale</span><span class="p">:</span> <span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Returns SearXNG's language tag from the locale and if exits, the tag</span> +<span class="sd"> includes the script name (e.g. en, zh_Hant).</span> +<span class="sd"> """</span> + <span class="n">sxng_lang</span> <span class="o">=</span> <span class="n">locale</span><span class="o">.</span><span class="n">language</span> + <span class="k">if</span> <span class="n">locale</span><span class="o">.</span><span class="n">script</span><span class="p">:</span> + <span class="n">sxng_lang</span> <span class="o">+=</span> <span class="s1">'_'</span> <span class="o">+</span> <span class="n">locale</span><span class="o">.</span><span class="n">script</span> + <span class="k">return</span> <span class="n">sxng_lang</span></div> + + + +<div class="viewcode-block" id="get_locale"> +<a class="viewcode-back" href="../../src/searx.locales.html#searx.locales.get_locale">[docs]</a> +<span class="k">def</span> <span class="nf">get_locale</span><span class="p">(</span><span class="n">locale_tag</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">babel</span><span class="o">.</span><span class="n">Locale</span> <span class="o">|</span> <span class="kc">None</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Returns a :py:obj:`babel.Locale` object parsed from argument</span> +<span class="sd"> ``locale_tag``"""</span> + <span class="k">try</span><span class="p">:</span> + <span class="n">locale</span> <span class="o">=</span> <span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">locale_tag</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">'-'</span><span class="p">)</span> + <span class="k">return</span> <span class="n">locale</span> + + <span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">core</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">None</span></div> + + + +<div class="viewcode-block" id="get_official_locales"> +<a class="viewcode-back" href="../../src/searx.locales.html#searx.locales.get_official_locales">[docs]</a> +<span class="k">def</span> <span class="nf">get_official_locales</span><span class="p">(</span> + <span class="n">territory</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">languages</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">regional</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> <span class="n">de_facto</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span> +<span class="p">)</span> <span class="o">-></span> <span class="nb">set</span><span class="p">[</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="p">]:</span> +<span class="w"> </span><span class="sd">"""Returns a list of :py:obj:`babel.Locale` with languages from</span> +<span class="sd"> :py:obj:`babel.languages.get_official_languages`.</span> + +<span class="sd"> :param territory: The territory (country or region) code.</span> + +<span class="sd"> :param languages: A list of language codes the languages from</span> +<span class="sd"> :py:obj:`babel.languages.get_official_languages` should be in</span> +<span class="sd"> (intersection). If this argument is ``None``, all official languages in</span> +<span class="sd"> this territory are used.</span> + +<span class="sd"> :param regional: If the regional flag is set, then languages which are</span> +<span class="sd"> regionally official are also returned.</span> + +<span class="sd"> :param de_facto: If the de_facto flag is set to `False`, then languages</span> +<span class="sd"> which are “de facto” official are not returned.</span> + +<span class="sd"> """</span> + <span class="n">ret_val</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span> + <span class="n">o_languages</span> <span class="o">=</span> <span class="n">babel</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">get_official_languages</span><span class="p">(</span><span class="n">territory</span><span class="p">,</span> <span class="n">regional</span><span class="o">=</span><span class="n">regional</span><span class="p">,</span> <span class="n">de_facto</span><span class="o">=</span><span class="n">de_facto</span><span class="p">)</span> + + <span class="k">if</span> <span class="n">languages</span><span class="p">:</span> + <span class="n">languages</span> <span class="o">=</span> <span class="p">[</span><span class="n">l</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="k">for</span> <span class="n">l</span> <span class="ow">in</span> <span class="n">languages</span><span class="p">]</span> + <span class="n">o_languages</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="n">l</span> <span class="k">for</span> <span class="n">l</span> <span class="ow">in</span> <span class="n">o_languages</span> <span class="k">if</span> <span class="n">l</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="ow">in</span> <span class="n">languages</span><span class="p">)</span> + + <span class="k">for</span> <span class="n">lang</span> <span class="ow">in</span> <span class="n">o_languages</span><span class="p">:</span> + <span class="k">try</span><span class="p">:</span> + <span class="n">locale</span> <span class="o">=</span> <span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">lang</span> <span class="o">+</span> <span class="s1">'_'</span> <span class="o">+</span> <span class="n">territory</span><span class="p">)</span> + <span class="n">ret_val</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">locale</span><span class="p">)</span> + <span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span> + <span class="k">continue</span> + + <span class="k">return</span> <span class="n">ret_val</span></div> + + + +<div class="viewcode-block" id="get_engine_locale"> +<a class="viewcode-back" href="../../src/searx.locales.html#searx.locales.get_engine_locale">[docs]</a> +<span class="k">def</span> <span class="nf">get_engine_locale</span><span class="p">(</span><span class="n">searxng_locale</span><span class="p">,</span> <span class="n">engine_locales</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Return engine's language (aka locale) string that best fits to argument</span> +<span class="sd"> ``searxng_locale``.</span> + +<span class="sd"> Argument ``engine_locales`` is a python dict that maps *SearXNG locales* to</span> +<span class="sd"> corresponding *engine locales*::</span> + +<span class="sd"> <engine>: {</span> +<span class="sd"> # SearXNG string : engine-string</span> +<span class="sd"> 'ca-ES' : 'ca_ES',</span> +<span class="sd"> 'fr-BE' : 'fr_BE',</span> +<span class="sd"> 'fr-CA' : 'fr_CA',</span> +<span class="sd"> 'fr-CH' : 'fr_CH',</span> +<span class="sd"> 'fr' : 'fr_FR',</span> +<span class="sd"> ...</span> +<span class="sd"> 'pl-PL' : 'pl_PL',</span> +<span class="sd"> 'pt-PT' : 'pt_PT'</span> +<span class="sd"> ..</span> +<span class="sd"> 'zh' : 'zh'</span> +<span class="sd"> 'zh_Hans' : 'zh'</span> +<span class="sd"> 'zh_Hant' : 'zh_TW'</span> +<span class="sd"> }</span> + +<span class="sd"> .. hint::</span> + +<span class="sd"> The *SearXNG locale* string has to be known by babel!</span> + +<span class="sd"> If there is no direct 1:1 mapping, this functions tries to narrow down</span> +<span class="sd"> engine's language (locale). If no value can be determined by these</span> +<span class="sd"> approximation attempts the ``default`` value is returned.</span> + +<span class="sd"> Assumptions:</span> + +<span class="sd"> A. When user select a language the results should be optimized according to</span> +<span class="sd"> the selected language.</span> + +<span class="sd"> B. When user select a language and a territory the results should be</span> +<span class="sd"> optimized with first priority on territory and second on language.</span> + +<span class="sd"> First approximation rule (*by territory*):</span> + +<span class="sd"> When the user selects a locale with territory (and a language), the</span> +<span class="sd"> territory has priority over the language. If any of the official languages</span> +<span class="sd"> in the territory is supported by the engine (``engine_locales``) it will</span> +<span class="sd"> be used.</span> + +<span class="sd"> Second approximation rule (*by language*):</span> + +<span class="sd"> If "First approximation rule" brings no result or the user selects only a</span> +<span class="sd"> language without a territory. Check in which territories the language</span> +<span class="sd"> has an official status and if one of these territories is supported by the</span> +<span class="sd"> engine.</span> + +<span class="sd"> """</span> + <span class="c1"># pylint: disable=too-many-branches, too-many-return-statements</span> + + <span class="n">engine_locale</span> <span class="o">=</span> <span class="n">engine_locales</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">searxng_locale</span><span class="p">)</span> + + <span class="k">if</span> <span class="n">engine_locale</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="c1"># There was a 1:1 mapping (e.g. a region "fr-BE --> fr_BE" or a language</span> + <span class="c1"># "zh --> zh"), no need to narrow language-script nor territory.</span> + <span class="k">return</span> <span class="n">engine_locale</span> + + <span class="k">try</span><span class="p">:</span> + <span class="n">locale</span> <span class="o">=</span> <span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">searxng_locale</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">'-'</span><span class="p">)</span> + <span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">core</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span> + <span class="k">try</span><span class="p">:</span> + <span class="n">locale</span> <span class="o">=</span> <span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">searxng_locale</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'-'</span><span class="p">)[</span><span class="mi">0</span><span class="p">])</span> + <span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">core</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span> + <span class="k">return</span> <span class="n">default</span> + + <span class="n">searxng_lang</span> <span class="o">=</span> <span class="n">language_tag</span><span class="p">(</span><span class="n">locale</span><span class="p">)</span> + <span class="n">engine_locale</span> <span class="o">=</span> <span class="n">engine_locales</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">searxng_lang</span><span class="p">)</span> + <span class="k">if</span> <span class="n">engine_locale</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="c1"># There was a 1:1 mapping (e.g. "zh-HK --> zh_Hant" or "zh-CN --> zh_Hans")</span> + <span class="k">return</span> <span class="n">engine_locale</span> + + <span class="c1"># SearXNG's selected locale is not supported by the engine ..</span> + + <span class="k">if</span> <span class="n">locale</span><span class="o">.</span><span class="n">territory</span><span class="p">:</span> + <span class="c1"># Try to narrow by *official* languages in the territory (??-XX).</span> + + <span class="k">for</span> <span class="n">official_language</span> <span class="ow">in</span> <span class="n">babel</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">get_official_languages</span><span class="p">(</span><span class="n">locale</span><span class="o">.</span><span class="n">territory</span><span class="p">,</span> <span class="n">de_facto</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span> + <span class="n">searxng_locale</span> <span class="o">=</span> <span class="n">official_language</span> <span class="o">+</span> <span class="s1">'-'</span> <span class="o">+</span> <span class="n">locale</span><span class="o">.</span><span class="n">territory</span> + <span class="n">engine_locale</span> <span class="o">=</span> <span class="n">engine_locales</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">searxng_locale</span><span class="p">)</span> + <span class="k">if</span> <span class="n">engine_locale</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">return</span> <span class="n">engine_locale</span> + + <span class="c1"># Engine does not support one of the official languages in the territory or</span> + <span class="c1"># there is only a language selected without a territory.</span> + + <span class="c1"># Now lets have a look if the searxng_lang (the language selected by the</span> + <span class="c1"># user) is a official language in other territories. If so, check if</span> + <span class="c1"># engine does support the searxng_lang in this other territory.</span> + + <span class="k">if</span> <span class="n">locale</span><span class="o">.</span><span class="n">language</span><span class="p">:</span> + + <span class="n">terr_lang_dict</span> <span class="o">=</span> <span class="p">{}</span> + <span class="k">for</span> <span class="n">territory</span><span class="p">,</span> <span class="n">langs</span> <span class="ow">in</span> <span class="n">babel</span><span class="o">.</span><span class="n">core</span><span class="o">.</span><span class="n">get_global</span><span class="p">(</span><span class="s2">"territory_languages"</span><span class="p">)</span><span class="o">.</span><span class="n">items</span><span class="p">():</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">langs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">searxng_lang</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'official_status'</span><span class="p">):</span> + <span class="k">continue</span> + <span class="n">terr_lang_dict</span><span class="p">[</span><span class="n">territory</span><span class="p">]</span> <span class="o">=</span> <span class="n">langs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">searxng_lang</span><span class="p">)</span> + + <span class="c1"># first: check fr-FR, de-DE .. is supported by the engine</span> + <span class="c1"># exception: 'en' --> 'en-US'</span> + + <span class="n">territory</span> <span class="o">=</span> <span class="n">locale</span><span class="o">.</span><span class="n">language</span><span class="o">.</span><span class="n">upper</span><span class="p">()</span> + <span class="k">if</span> <span class="n">territory</span> <span class="o">==</span> <span class="s1">'EN'</span><span class="p">:</span> + <span class="n">territory</span> <span class="o">=</span> <span class="s1">'US'</span> + + <span class="k">if</span> <span class="n">terr_lang_dict</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">territory</span><span class="p">):</span> + <span class="n">searxng_locale</span> <span class="o">=</span> <span class="n">locale</span><span class="o">.</span><span class="n">language</span> <span class="o">+</span> <span class="s1">'-'</span> <span class="o">+</span> <span class="n">territory</span> + <span class="n">engine_locale</span> <span class="o">=</span> <span class="n">engine_locales</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">searxng_locale</span><span class="p">)</span> + <span class="k">if</span> <span class="n">engine_locale</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">return</span> <span class="n">engine_locale</span> + + <span class="c1"># second: sort by population_percent and take first match</span> + + <span class="c1"># drawback of "population percent": if there is a territory with a</span> + <span class="c1"># small number of people (e.g 100) but the majority speaks the</span> + <span class="c1"># language, then the percentage might be 100% (--> 100 people) but in</span> + <span class="c1"># a different territory with more people (e.g. 10.000) where only 10%</span> + <span class="c1"># speak the language the total amount of speaker is higher (--> 200</span> + <span class="c1"># people).</span> + <span class="c1">#</span> + <span class="c1"># By example: The population of Saint-Martin is 33.000, of which 100%</span> + <span class="c1"># speak French, but this is less than the 30% of the approximately 2.5</span> + <span class="c1"># million Belgian citizens</span> + <span class="c1">#</span> + <span class="c1"># - 'fr-MF', 'population_percent': 100.0, 'official_status': 'official'</span> + <span class="c1"># - 'fr-BE', 'population_percent': 38.0, 'official_status': 'official'</span> + + <span class="n">terr_lang_list</span> <span class="o">=</span> <span class="p">[]</span> + <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">terr_lang_dict</span><span class="o">.</span><span class="n">items</span><span class="p">():</span> + <span class="n">terr_lang_list</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">k</span><span class="p">,</span> <span class="n">v</span><span class="p">))</span> + + <span class="k">for</span> <span class="n">territory</span><span class="p">,</span> <span class="n">_lang</span> <span class="ow">in</span> <span class="nb">sorted</span><span class="p">(</span><span class="n">terr_lang_list</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">item</span><span class="p">:</span> <span class="n">item</span><span class="p">[</span><span class="mi">1</span><span class="p">][</span><span class="s1">'population_percent'</span><span class="p">],</span> <span class="n">reverse</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span> + <span class="n">searxng_locale</span> <span class="o">=</span> <span class="n">locale</span><span class="o">.</span><span class="n">language</span> <span class="o">+</span> <span class="s1">'-'</span> <span class="o">+</span> <span class="n">territory</span> + <span class="n">engine_locale</span> <span class="o">=</span> <span class="n">engine_locales</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">searxng_locale</span><span class="p">)</span> + <span class="k">if</span> <span class="n">engine_locale</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">return</span> <span class="n">engine_locale</span> + + <span class="c1"># No luck: narrow by "language from territory" and "territory from language"</span> + <span class="c1"># does not fit to a locale supported by the engine.</span> + + <span class="k">if</span> <span class="n">engine_locale</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">engine_locale</span> <span class="o">=</span> <span class="n">default</span> + + <span class="k">return</span> <span class="n">default</span></div> + + + +<div class="viewcode-block" id="match_locale"> +<a class="viewcode-back" href="../../src/searx.locales.html#searx.locales.match_locale">[docs]</a> +<span class="k">def</span> <span class="nf">match_locale</span><span class="p">(</span><span class="n">searxng_locale</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">locale_tag_list</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="n">fallback</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Return tag from ``locale_tag_list`` that best fits to ``searxng_locale``.</span> + +<span class="sd"> :param str searxng_locale: SearXNG's internal representation of locale (de,</span> +<span class="sd"> de-DE, fr-BE, zh, zh-CN, zh-TW ..).</span> + +<span class="sd"> :param list locale_tag_list: The list of locale tags to select from</span> + +<span class="sd"> :param str fallback: fallback locale tag (if unset --> ``None``)</span> + +<span class="sd"> The rules to find a match are implemented in :py:obj:`get_engine_locale`,</span> +<span class="sd"> the ``engine_locales`` is build up by :py:obj:`build_engine_locales`.</span> + +<span class="sd"> .. hint::</span> + +<span class="sd"> The *SearXNG locale* string and the members of ``locale_tag_list`` has to</span> +<span class="sd"> be known by babel! The :py:obj:`ADDITIONAL_TRANSLATIONS` are used in the</span> +<span class="sd"> UI and are not known by babel --> will be ignored.</span> +<span class="sd"> """</span> + + <span class="c1"># searxng_locale = 'es'</span> + <span class="c1"># locale_tag_list = ['es-AR', 'es-ES', 'es-MX']</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">searxng_locale</span><span class="p">:</span> + <span class="k">return</span> <span class="n">fallback</span> + + <span class="n">locale</span> <span class="o">=</span> <span class="n">get_locale</span><span class="p">(</span><span class="n">searxng_locale</span><span class="p">)</span> + <span class="k">if</span> <span class="n">locale</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">return</span> <span class="n">fallback</span> + + <span class="c1"># normalize to a SearXNG locale that can be passed to get_engine_locale</span> + + <span class="n">searxng_locale</span> <span class="o">=</span> <span class="n">language_tag</span><span class="p">(</span><span class="n">locale</span><span class="p">)</span> + <span class="k">if</span> <span class="n">locale</span><span class="o">.</span><span class="n">territory</span><span class="p">:</span> + <span class="n">searxng_locale</span> <span class="o">=</span> <span class="n">region_tag</span><span class="p">(</span><span class="n">locale</span><span class="p">)</span> + + <span class="c1"># clean up locale_tag_list</span> + + <span class="n">tag_list</span> <span class="o">=</span> <span class="p">[]</span> + <span class="k">for</span> <span class="n">tag</span> <span class="ow">in</span> <span class="n">locale_tag_list</span><span class="p">:</span> + <span class="k">if</span> <span class="n">tag</span> <span class="ow">in</span> <span class="p">(</span><span class="s1">'all'</span><span class="p">,</span> <span class="s1">'auto'</span><span class="p">)</span> <span class="ow">or</span> <span class="n">tag</span> <span class="ow">in</span> <span class="n">ADDITIONAL_TRANSLATIONS</span><span class="p">:</span> + <span class="k">continue</span> + <span class="n">tag_list</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">tag</span><span class="p">)</span> + + <span class="c1"># emulate fetch_traits</span> + <span class="n">engine_locales</span> <span class="o">=</span> <span class="n">build_engine_locales</span><span class="p">(</span><span class="n">tag_list</span><span class="p">)</span> + <span class="k">return</span> <span class="n">get_engine_locale</span><span class="p">(</span><span class="n">searxng_locale</span><span class="p">,</span> <span class="n">engine_locales</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="n">fallback</span><span class="p">)</span></div> + + + +<div class="viewcode-block" id="build_engine_locales"> +<a class="viewcode-back" href="../../src/searx.locales.html#searx.locales.build_engine_locales">[docs]</a> +<span class="k">def</span> <span class="nf">build_engine_locales</span><span class="p">(</span><span class="n">tag_list</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]):</span> +<span class="w"> </span><span class="sd">"""From a list of locale tags a dictionary is build that can be passed by</span> +<span class="sd"> argument ``engine_locales`` to :py:obj:`get_engine_locale`. This function</span> +<span class="sd"> is mainly used by :py:obj:`match_locale` and is similar to what the</span> +<span class="sd"> ``fetch_traits(..)`` function of engines do.</span> + +<span class="sd"> If there are territory codes in the ``tag_list`` that have a *script code*</span> +<span class="sd"> additional keys are added to the returned dictionary.</span> + +<span class="sd"> .. code:: python</span> + +<span class="sd"> >>> import locales</span> +<span class="sd"> >>> engine_locales = locales.build_engine_locales(['en', 'en-US', 'zh', 'zh-CN', 'zh-TW'])</span> +<span class="sd"> >>> engine_locales</span> +<span class="sd"> {</span> +<span class="sd"> 'en': 'en', 'en-US': 'en-US',</span> +<span class="sd"> 'zh': 'zh', 'zh-CN': 'zh-CN', 'zh_Hans': 'zh-CN',</span> +<span class="sd"> 'zh-TW': 'zh-TW', 'zh_Hant': 'zh-TW'</span> +<span class="sd"> }</span> +<span class="sd"> >>> get_engine_locale('zh-Hans', engine_locales)</span> +<span class="sd"> 'zh-CN'</span> + +<span class="sd"> This function is a good example to understand the language/region model</span> +<span class="sd"> of SearXNG:</span> + +<span class="sd"> SearXNG only distinguishes between **search languages** and **search</span> +<span class="sd"> regions**, by adding the *script-tags*, languages with *script-tags* can</span> +<span class="sd"> be assigned to the **regions** that SearXNG supports.</span> + +<span class="sd"> """</span> + <span class="n">engine_locales</span> <span class="o">=</span> <span class="p">{}</span> + + <span class="k">for</span> <span class="n">tag</span> <span class="ow">in</span> <span class="n">tag_list</span><span class="p">:</span> + <span class="n">locale</span> <span class="o">=</span> <span class="n">get_locale</span><span class="p">(</span><span class="n">tag</span><span class="p">)</span> + <span class="k">if</span> <span class="n">locale</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">"build_engine_locales: skip locale tag </span><span class="si">%s</span><span class="s2"> / unknown by babel"</span><span class="p">,</span> <span class="n">tag</span><span class="p">)</span> + <span class="k">continue</span> + <span class="k">if</span> <span class="n">locale</span><span class="o">.</span><span class="n">territory</span><span class="p">:</span> + <span class="n">engine_locales</span><span class="p">[</span><span class="n">region_tag</span><span class="p">(</span><span class="n">locale</span><span class="p">)]</span> <span class="o">=</span> <span class="n">tag</span> + <span class="k">if</span> <span class="n">locale</span><span class="o">.</span><span class="n">script</span><span class="p">:</span> + <span class="n">engine_locales</span><span class="p">[</span><span class="n">language_tag</span><span class="p">(</span><span class="n">locale</span><span class="p">)]</span> <span class="o">=</span> <span class="n">tag</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">engine_locales</span><span class="p">[</span><span class="n">language_tag</span><span class="p">(</span><span class="n">locale</span><span class="p">)]</span> <span class="o">=</span> <span class="n">tag</span> + <span class="k">return</span> <span class="n">engine_locales</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../index.html"> + <img class="logo" src="../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../index.html">Overview</a> + <ul> + <li><a href="../index.html">Module code</a> + + + </ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/plugins/unit_converter.html b/_modules/searx/plugins/unit_converter.html new file mode 100644 index 000000000..04f0edea1 --- /dev/null +++ b/_modules/searx/plugins/unit_converter.html @@ -0,0 +1,370 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.plugins.unit_converter — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" accesskey="U">Module code</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.plugins.unit_converter</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.plugins.unit_converter</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""A plugin for converting measured values from one unit to another unit (a</span> +<span class="sd">unit converter).</span> + +<span class="sd">The plugin looks up the symbols (given in the query term) in a list of</span> +<span class="sd">converters, each converter is one item in the list (compare</span> +<span class="sd">:py:obj:`ADDITIONAL_UNITS`). If the symbols are ambiguous, the matching units</span> +<span class="sd">of measurement are evaluated. The weighting in the evaluation results from the</span> +<span class="sd">sorting of the :py:obj:`list of unit converters<symbol_to_si>`.</span> + +<span class="sd">Enable in ``settings.yml``:</span> + +<span class="sd">.. code:: yaml</span> + +<span class="sd"> enabled_plugins:</span> +<span class="sd"> ..</span> +<span class="sd"> - 'Unit converter plugin'</span> + +<span class="sd">"""</span> + +<span class="kn">import</span> <span class="nn">re</span> +<span class="kn">import</span> <span class="nn">babel.numbers</span> +<span class="kn">from</span> <span class="nn">flask_babel</span> <span class="kn">import</span> <span class="n">gettext</span><span class="p">,</span> <span class="n">get_locale</span> + +<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">data</span> + + +<span class="n">name</span> <span class="o">=</span> <span class="s2">"Unit converter plugin"</span> +<span class="n">description</span> <span class="o">=</span> <span class="n">gettext</span><span class="p">(</span><span class="s2">"Convert between units"</span><span class="p">)</span> +<span class="n">default_on</span> <span class="o">=</span> <span class="kc">True</span> + +<span class="n">plugin_id</span> <span class="o">=</span> <span class="s2">"unit_converter"</span> +<span class="n">preference_section</span> <span class="o">=</span> <span class="s2">"general"</span> + +<span class="n">CONVERT_KEYWORDS</span> <span class="o">=</span> <span class="p">[</span><span class="s2">"in"</span><span class="p">,</span> <span class="s2">"to"</span><span class="p">,</span> <span class="s2">"as"</span><span class="p">]</span> + +<span class="c1"># inspired from https://stackoverflow.com/a/42475086</span> +<span class="n">RE_MEASURE</span> <span class="o">=</span> <span class="sa">r</span><span class="s1">'''</span> +<span class="s1">(?P<sign>[-+]?) # +/- or nothing for positive</span> +<span class="s1">(\s*) # separator: white space or nothing</span> +<span class="s1">(?P<number>[\d\.,]*) # number: 1,000.00 (en) or 1.000,00 (de)</span> +<span class="s1">(?P<E>[eE][-+]?\d+)? # scientific notation: e(+/-)2 (*10^2)</span> +<span class="s1">(\s*) # separator: white space or nothing</span> +<span class="s1">(?P<unit>\S+) # unit of measure</span> +<span class="s1">'''</span> + + +<span class="n">ADDITIONAL_UNITS</span> <span class="o">=</span> <span class="p">[</span> + <span class="p">{</span> + <span class="s2">"si_name"</span><span class="p">:</span> <span class="s2">"Q11579"</span><span class="p">,</span> + <span class="s2">"symbol"</span><span class="p">:</span> <span class="s2">"°C"</span><span class="p">,</span> + <span class="s2">"to_si"</span><span class="p">:</span> <span class="k">lambda</span> <span class="n">val</span><span class="p">:</span> <span class="n">val</span> <span class="o">+</span> <span class="mf">273.15</span><span class="p">,</span> + <span class="s2">"from_si"</span><span class="p">:</span> <span class="k">lambda</span> <span class="n">val</span><span class="p">:</span> <span class="n">val</span> <span class="o">-</span> <span class="mf">273.15</span><span class="p">,</span> + <span class="p">},</span> + <span class="p">{</span> + <span class="s2">"si_name"</span><span class="p">:</span> <span class="s2">"Q11579"</span><span class="p">,</span> + <span class="s2">"symbol"</span><span class="p">:</span> <span class="s2">"°F"</span><span class="p">,</span> + <span class="s2">"to_si"</span><span class="p">:</span> <span class="k">lambda</span> <span class="n">val</span><span class="p">:</span> <span class="p">(</span><span class="n">val</span> <span class="o">+</span> <span class="mf">459.67</span><span class="p">)</span> <span class="o">*</span> <span class="mi">5</span> <span class="o">/</span> <span class="mi">9</span><span class="p">,</span> + <span class="s2">"from_si"</span><span class="p">:</span> <span class="k">lambda</span> <span class="n">val</span><span class="p">:</span> <span class="p">(</span><span class="n">val</span> <span class="o">*</span> <span class="mi">9</span> <span class="o">/</span> <span class="mi">5</span><span class="p">)</span> <span class="o">-</span> <span class="mf">459.67</span><span class="p">,</span> + <span class="p">},</span> +<span class="p">]</span> +<span class="sd">"""Additional items to convert from a measure unit to a SI unit (vice versa).</span> + +<span class="sd">.. code:: python</span> + +<span class="sd"> {</span> +<span class="sd"> "si_name": "Q11579", # Wikidata item ID of the SI unit (Kelvin)</span> +<span class="sd"> "symbol": "°C", # symbol of the measure unit</span> +<span class="sd"> "to_si": lambda val: val + 273.15, # convert measure value (val) to SI unit</span> +<span class="sd"> "from_si": lambda val: val - 273.15, # convert SI value (val) measure unit</span> +<span class="sd"> },</span> +<span class="sd"> {</span> +<span class="sd"> "si_name": "Q11573",</span> +<span class="sd"> "symbol": "mi",</span> +<span class="sd"> "to_si": 1609.344, # convert measure value (val) to SI unit</span> +<span class="sd"> "from_si": 1 / 1609.344 # convert SI value (val) measure unit</span> +<span class="sd"> },</span> + +<span class="sd">The values of ``to_si`` and ``from_si`` can be of :py:obj:`float` (a multiplier)</span> +<span class="sd">or a callable_ (val in / converted value returned).</span> + +<span class="sd">.. _callable: https://docs.python.org/3/glossary.html#term-callable</span> +<span class="sd">"""</span> + + +<span class="n">ALIAS_SYMBOLS</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'°C'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'C'</span><span class="p">,),</span> + <span class="s1">'°F'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'F'</span><span class="p">,),</span> + <span class="s1">'mi'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'L'</span><span class="p">,),</span> +<span class="p">}</span> +<span class="sd">"""Alias symbols for known unit of measure symbols / by example::</span> + +<span class="sd"> '°C': ('C', ...), # list of alias symbols for °C (Q69362731)</span> +<span class="sd"> '°F': ('F', ...), # list of alias symbols for °F (Q99490479)</span> +<span class="sd"> 'mi': ('L',), # list of alias symbols for mi (Q253276)</span> +<span class="sd">"""</span> + + +<span class="n">SYMBOL_TO_SI</span> <span class="o">=</span> <span class="p">[]</span> + + +<div class="viewcode-block" id="symbol_to_si"> +<a class="viewcode-back" href="../../../src/searx.plugins.unit_converter.html#searx.plugins.unit_converter.symbol_to_si">[docs]</a> +<span class="k">def</span> <span class="nf">symbol_to_si</span><span class="p">():</span> +<span class="w"> </span><span class="sd">"""Generates a list of tuples, each tuple is a measure unit and the fields</span> +<span class="sd"> in the tuple are:</span> + +<span class="sd"> 0. Symbol of the measure unit (e.g. 'mi' for measure unit 'miles' Q253276)</span> + +<span class="sd"> 1. SI name of the measure unit (e.g. Q11573 for SI unit 'metre')</span> + +<span class="sd"> 2. Factor to get SI value from measure unit (e.g. 1mi is equal to SI 1m</span> +<span class="sd"> multiplied by 1609.344)</span> + +<span class="sd"> 3. Factor to get measure value from from SI value (e.g. SI 100m is equal to</span> +<span class="sd"> 100mi divided by 1609.344)</span> + +<span class="sd"> The returned list is sorted, the first items are created from</span> +<span class="sd"> ``WIKIDATA_UNITS``, the second group of items is build from</span> +<span class="sd"> :py:obj:`ADDITIONAL_UNITS` and items created from :py:obj:`ALIAS_SYMBOLS`.</span> + +<span class="sd"> If you search this list for a symbol, then a match with a symbol from</span> +<span class="sd"> Wikidata has the highest weighting (first hit in the list), followed by the</span> +<span class="sd"> symbols from the :py:obj:`ADDITIONAL_UNITS` and the lowest weighting is</span> +<span class="sd"> given to the symbols resulting from the aliases :py:obj:`ALIAS_SYMBOLS`.</span> + +<span class="sd"> """</span> + + <span class="k">global</span> <span class="n">SYMBOL_TO_SI</span> <span class="c1"># pylint: disable=global-statement</span> + <span class="k">if</span> <span class="n">SYMBOL_TO_SI</span><span class="p">:</span> + <span class="k">return</span> <span class="n">SYMBOL_TO_SI</span> + + <span class="c1"># filter out units which can't be normalized to a SI unit and filter out</span> + <span class="c1"># units without a symbol / arcsecond does not have a symbol</span> + <span class="c1"># https://www.wikidata.org/wiki/Q829073</span> + + <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">data</span><span class="o">.</span><span class="n">WIKIDATA_UNITS</span><span class="o">.</span><span class="n">values</span><span class="p">():</span> + <span class="k">if</span> <span class="n">item</span><span class="p">[</span><span class="s1">'to_si_factor'</span><span class="p">]</span> <span class="ow">and</span> <span class="n">item</span><span class="p">[</span><span class="s1">'symbol'</span><span class="p">]:</span> + <span class="n">SYMBOL_TO_SI</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">(</span> + <span class="n">item</span><span class="p">[</span><span class="s1">'symbol'</span><span class="p">],</span> + <span class="n">item</span><span class="p">[</span><span class="s1">'si_name'</span><span class="p">],</span> + <span class="mi">1</span> <span class="o">/</span> <span class="n">item</span><span class="p">[</span><span class="s1">'to_si_factor'</span><span class="p">],</span> <span class="c1"># from_si</span> + <span class="n">item</span><span class="p">[</span><span class="s1">'to_si_factor'</span><span class="p">],</span> <span class="c1"># to_si</span> + <span class="n">item</span><span class="p">[</span><span class="s1">'symbol'</span><span class="p">],</span> + <span class="p">)</span> + <span class="p">)</span> + + <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">ADDITIONAL_UNITS</span><span class="p">:</span> + <span class="n">SYMBOL_TO_SI</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">(</span> + <span class="n">item</span><span class="p">[</span><span class="s1">'symbol'</span><span class="p">],</span> + <span class="n">item</span><span class="p">[</span><span class="s1">'si_name'</span><span class="p">],</span> + <span class="n">item</span><span class="p">[</span><span class="s1">'from_si'</span><span class="p">],</span> + <span class="n">item</span><span class="p">[</span><span class="s1">'to_si'</span><span class="p">],</span> + <span class="n">item</span><span class="p">[</span><span class="s1">'symbol'</span><span class="p">],</span> + <span class="p">)</span> + <span class="p">)</span> + + <span class="n">alias_items</span> <span class="o">=</span> <span class="p">[]</span> + <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">SYMBOL_TO_SI</span><span class="p">:</span> + <span class="k">for</span> <span class="n">alias</span> <span class="ow">in</span> <span class="n">ALIAS_SYMBOLS</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">item</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="p">()):</span> + <span class="n">alias_items</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> + <span class="p">(</span> + <span class="n">alias</span><span class="p">,</span> + <span class="n">item</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> + <span class="n">item</span><span class="p">[</span><span class="mi">2</span><span class="p">],</span> <span class="c1"># from_si</span> + <span class="n">item</span><span class="p">[</span><span class="mi">3</span><span class="p">],</span> <span class="c1"># to_si</span> + <span class="n">item</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="c1"># origin unit</span> + <span class="p">)</span> + <span class="p">)</span> + <span class="n">SYMBOL_TO_SI</span> <span class="o">=</span> <span class="n">SYMBOL_TO_SI</span> <span class="o">+</span> <span class="n">alias_items</span> + <span class="k">return</span> <span class="n">SYMBOL_TO_SI</span></div> + + + +<span class="k">def</span> <span class="nf">_parse_text_and_convert</span><span class="p">(</span><span class="n">search</span><span class="p">,</span> <span class="n">from_query</span><span class="p">,</span> <span class="n">to_query</span><span class="p">):</span> + + <span class="c1"># pylint: disable=too-many-branches, too-many-locals</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="p">(</span><span class="n">from_query</span> <span class="ow">and</span> <span class="n">to_query</span><span class="p">):</span> + <span class="k">return</span> + + <span class="n">measured</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="n">RE_MEASURE</span><span class="p">,</span> <span class="n">from_query</span><span class="p">,</span> <span class="n">re</span><span class="o">.</span><span class="n">VERBOSE</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="p">(</span><span class="n">measured</span> <span class="ow">and</span> <span class="n">measured</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="s1">'number'</span><span class="p">),</span> <span class="n">measured</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="s1">'unit'</span><span class="p">)):</span> + <span class="k">return</span> + + <span class="c1"># Symbols are not unique, if there are several hits for the from-unit, then</span> + <span class="c1"># the correct one must be determined by comparing it with the to-unit</span> + <span class="c1"># https://github.com/searxng/searxng/pull/3378#issuecomment-2080974863</span> + + <span class="c1"># first: collecting possible units</span> + + <span class="n">source_list</span><span class="p">,</span> <span class="n">target_list</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[]</span> + + <span class="k">for</span> <span class="n">symbol</span><span class="p">,</span> <span class="n">si_name</span><span class="p">,</span> <span class="n">from_si</span><span class="p">,</span> <span class="n">to_si</span><span class="p">,</span> <span class="n">orig_symbol</span> <span class="ow">in</span> <span class="n">symbol_to_si</span><span class="p">():</span> + + <span class="k">if</span> <span class="n">symbol</span> <span class="o">==</span> <span class="n">measured</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="s1">'unit'</span><span class="p">):</span> + <span class="n">source_list</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">si_name</span><span class="p">,</span> <span class="n">to_si</span><span class="p">))</span> + <span class="k">if</span> <span class="n">symbol</span> <span class="o">==</span> <span class="n">to_query</span><span class="p">:</span> + <span class="n">target_list</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">si_name</span><span class="p">,</span> <span class="n">from_si</span><span class="p">,</span> <span class="n">orig_symbol</span><span class="p">))</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="p">(</span><span class="n">source_list</span> <span class="ow">and</span> <span class="n">target_list</span><span class="p">):</span> + <span class="k">return</span> + + <span class="n">source_to_si</span> <span class="o">=</span> <span class="n">target_from_si</span> <span class="o">=</span> <span class="n">target_symbol</span> <span class="o">=</span> <span class="kc">None</span> + + <span class="c1"># second: find the right unit by comparing list of from-units with list of to-units</span> + + <span class="k">for</span> <span class="n">source</span> <span class="ow">in</span> <span class="n">source_list</span><span class="p">:</span> + <span class="k">for</span> <span class="n">target</span> <span class="ow">in</span> <span class="n">target_list</span><span class="p">:</span> + <span class="k">if</span> <span class="n">source</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">==</span> <span class="n">target</span><span class="p">[</span><span class="mi">0</span><span class="p">]:</span> <span class="c1"># compare si_name</span> + <span class="n">source_to_si</span> <span class="o">=</span> <span class="n">source</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> + <span class="n">target_from_si</span> <span class="o">=</span> <span class="n">target</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> + <span class="n">target_symbol</span> <span class="o">=</span> <span class="n">target</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="p">(</span><span class="n">source_to_si</span> <span class="ow">and</span> <span class="n">target_from_si</span><span class="p">):</span> + <span class="k">return</span> + + <span class="n">_locale</span> <span class="o">=</span> <span class="n">get_locale</span><span class="p">()</span> <span class="ow">or</span> <span class="s1">'en_US'</span> + + <span class="n">value</span> <span class="o">=</span> <span class="n">measured</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="s1">'sign'</span><span class="p">)</span> <span class="o">+</span> <span class="n">measured</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="s1">'number'</span><span class="p">)</span> <span class="o">+</span> <span class="p">(</span><span class="n">measured</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="s1">'E'</span><span class="p">)</span> <span class="ow">or</span> <span class="s1">''</span><span class="p">)</span> + <span class="n">value</span> <span class="o">=</span> <span class="n">babel</span><span class="o">.</span><span class="n">numbers</span><span class="o">.</span><span class="n">parse_decimal</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="n">locale</span><span class="o">=</span><span class="n">_locale</span><span class="p">)</span> + + <span class="c1"># convert value to SI unit</span> + + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">source_to_si</span><span class="p">,</span> <span class="p">(</span><span class="nb">float</span><span class="p">,</span> <span class="nb">int</span><span class="p">)):</span> + <span class="n">value</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="o">*</span> <span class="n">source_to_si</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">value</span> <span class="o">=</span> <span class="n">source_to_si</span><span class="p">(</span><span class="nb">float</span><span class="p">(</span><span class="n">value</span><span class="p">))</span> + + <span class="c1"># convert value from SI unit to target unit</span> + + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">target_from_si</span><span class="p">,</span> <span class="p">(</span><span class="nb">float</span><span class="p">,</span> <span class="nb">int</span><span class="p">)):</span> + <span class="n">value</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="o">*</span> <span class="n">target_from_si</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">value</span> <span class="o">=</span> <span class="n">target_from_si</span><span class="p">(</span><span class="nb">float</span><span class="p">(</span><span class="n">value</span><span class="p">))</span> + + <span class="k">if</span> <span class="n">measured</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="s1">'E'</span><span class="p">):</span> + <span class="c1"># when incoming notation is scientific, outgoing notation is scientific</span> + <span class="n">result</span> <span class="o">=</span> <span class="n">babel</span><span class="o">.</span><span class="n">numbers</span><span class="o">.</span><span class="n">format_scientific</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="n">locale</span><span class="o">=</span><span class="n">_locale</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">result</span> <span class="o">=</span> <span class="n">babel</span><span class="o">.</span><span class="n">numbers</span><span class="o">.</span><span class="n">format_decimal</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="n">locale</span><span class="o">=</span><span class="n">_locale</span><span class="p">,</span> <span class="nb">format</span><span class="o">=</span><span class="s1">'#,##0.##########;-#'</span><span class="p">)</span> + + <span class="n">search</span><span class="o">.</span><span class="n">result_container</span><span class="o">.</span><span class="n">answers</span><span class="p">[</span><span class="s1">'conversion'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'answer'</span><span class="p">:</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">result</span><span class="si">}</span><span class="s1"> </span><span class="si">{</span><span class="n">target_symbol</span><span class="si">}</span><span class="s1">'</span><span class="p">}</span> + + +<span class="k">def</span> <span class="nf">post_search</span><span class="p">(</span><span class="n">_request</span><span class="p">,</span> <span class="n">search</span><span class="p">):</span> + <span class="c1"># only convert between units on the first page</span> + <span class="k">if</span> <span class="n">search</span><span class="o">.</span><span class="n">search_query</span><span class="o">.</span><span class="n">pageno</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">True</span> + + <span class="n">query</span> <span class="o">=</span> <span class="n">search</span><span class="o">.</span><span class="n">search_query</span><span class="o">.</span><span class="n">query</span> + <span class="n">query_parts</span> <span class="o">=</span> <span class="n">query</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">" "</span><span class="p">)</span> + + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">query_parts</span><span class="p">)</span> <span class="o"><</span> <span class="mi">3</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">True</span> + + <span class="k">for</span> <span class="n">query_part</span> <span class="ow">in</span> <span class="n">query_parts</span><span class="p">:</span> + <span class="k">for</span> <span class="n">keyword</span> <span class="ow">in</span> <span class="n">CONVERT_KEYWORDS</span><span class="p">:</span> + <span class="k">if</span> <span class="n">query_part</span> <span class="o">==</span> <span class="n">keyword</span><span class="p">:</span> + <span class="n">from_query</span><span class="p">,</span> <span class="n">to_query</span> <span class="o">=</span> <span class="n">query</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">keyword</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span> + <span class="n">_parse_text_and_convert</span><span class="p">(</span><span class="n">search</span><span class="p">,</span> <span class="n">from_query</span><span class="o">.</span><span class="n">strip</span><span class="p">(),</span> <span class="n">to_query</span><span class="o">.</span><span class="n">strip</span><span class="p">())</span> + <span class="k">return</span> <span class="kc">True</span> + + <span class="k">return</span> <span class="kc">True</span> +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + + + </ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/redislib.html b/_modules/searx/redislib.html new file mode 100644 index 000000000..d8a5efc91 --- /dev/null +++ b/_modules/searx/redislib.html @@ -0,0 +1,362 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.redislib — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../_static/searxng.css?v=52e4ff28" /> + <script src="../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../genindex.html" /> + <link rel="search" title="Search" href="../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../index.html" accesskey="U">Module code</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.redislib</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.redislib</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""A collection of convenient functions and redis/lua scripts.</span> + +<span class="sd">This code was partial inspired by the `Bullet-Proofing Lua Scripts in RedisPy`_</span> +<span class="sd">article.</span> + +<span class="sd">.. _Bullet-Proofing Lua Scripts in RedisPy:</span> +<span class="sd"> https://redis.com/blog/bullet-proofing-lua-scripts-in-redispy/</span> + +<span class="sd">"""</span> + +<span class="kn">import</span> <span class="nn">hmac</span> + +<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">get_setting</span> + +<span class="n">LUA_SCRIPT_STORAGE</span> <span class="o">=</span> <span class="p">{}</span> +<span class="sd">"""A global dictionary to cache client's ``Script`` objects, used by</span> +<span class="sd">:py:obj:`lua_script_storage`"""</span> + + +<div class="viewcode-block" id="lua_script_storage"> +<a class="viewcode-back" href="../../src/searx.redislib.html#searx.redislib.lua_script_storage">[docs]</a> +<span class="k">def</span> <span class="nf">lua_script_storage</span><span class="p">(</span><span class="n">client</span><span class="p">,</span> <span class="n">script</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Returns a redis :py:obj:`Script</span> +<span class="sd"> <redis.commands.core.CoreCommands.register_script>` instance.</span> + +<span class="sd"> Due to performance reason the ``Script`` object is instantiated only once</span> +<span class="sd"> for a client (``client.register_script(..)``) and is cached in</span> +<span class="sd"> :py:obj:`LUA_SCRIPT_STORAGE`.</span> + +<span class="sd"> """</span> + + <span class="c1"># redis connection can be closed, lets use the id() of the redis connector</span> + <span class="c1"># as key in the script-storage:</span> + <span class="n">client_id</span> <span class="o">=</span> <span class="nb">id</span><span class="p">(</span><span class="n">client</span><span class="p">)</span> + + <span class="k">if</span> <span class="n">LUA_SCRIPT_STORAGE</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">client_id</span><span class="p">)</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">LUA_SCRIPT_STORAGE</span><span class="p">[</span><span class="n">client_id</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> + + <span class="k">if</span> <span class="n">LUA_SCRIPT_STORAGE</span><span class="p">[</span><span class="n">client_id</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">script</span><span class="p">)</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">LUA_SCRIPT_STORAGE</span><span class="p">[</span><span class="n">client_id</span><span class="p">][</span><span class="n">script</span><span class="p">]</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">register_script</span><span class="p">(</span><span class="n">script</span><span class="p">)</span> + + <span class="k">return</span> <span class="n">LUA_SCRIPT_STORAGE</span><span class="p">[</span><span class="n">client_id</span><span class="p">][</span><span class="n">script</span><span class="p">]</span></div> + + + +<span class="n">PURGE_BY_PREFIX</span> <span class="o">=</span> <span class="s2">"""</span> +<span class="s2">local prefix = tostring(ARGV[1])</span> +<span class="s2">for i, name in ipairs(redis.call('KEYS', prefix .. '*')) do</span> +<span class="s2"> redis.call('EXPIRE', name, 0)</span> +<span class="s2">end</span> +<span class="s2">"""</span> + + +<div class="viewcode-block" id="purge_by_prefix"> +<a class="viewcode-back" href="../../src/searx.redislib.html#searx.redislib.purge_by_prefix">[docs]</a> +<span class="k">def</span> <span class="nf">purge_by_prefix</span><span class="p">(</span><span class="n">client</span><span class="p">,</span> <span class="n">prefix</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"SearXNG_"</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Purge all keys with ``prefix`` from database.</span> + +<span class="sd"> Queries all keys in the database by the given prefix and set expire time to</span> +<span class="sd"> zero. The default prefix will drop all keys which has been set by SearXNG</span> +<span class="sd"> (drops SearXNG schema entirely from database).</span> + +<span class="sd"> The implementation is the lua script from string :py:obj:`PURGE_BY_PREFIX`.</span> +<span class="sd"> The lua script uses EXPIRE_ instead of DEL_: if there are a lot keys to</span> +<span class="sd"> delete and/or their values are big, `DEL` could take more time and blocks</span> +<span class="sd"> the command loop while `EXPIRE` turns back immediate.</span> + +<span class="sd"> :param prefix: prefix of the key to delete (default: ``SearXNG_``)</span> +<span class="sd"> :type name: str</span> + +<span class="sd"> .. _EXPIRE: https://redis.io/commands/expire/</span> +<span class="sd"> .. _DEL: https://redis.io/commands/del/</span> + +<span class="sd"> """</span> + <span class="n">script</span> <span class="o">=</span> <span class="n">lua_script_storage</span><span class="p">(</span><span class="n">client</span><span class="p">,</span> <span class="n">PURGE_BY_PREFIX</span><span class="p">)</span> + <span class="n">script</span><span class="p">(</span><span class="n">args</span><span class="o">=</span><span class="p">[</span><span class="n">prefix</span><span class="p">])</span></div> + + + +<div class="viewcode-block" id="secret_hash"> +<a class="viewcode-back" href="../../src/searx.redislib.html#searx.redislib.secret_hash">[docs]</a> +<span class="k">def</span> <span class="nf">secret_hash</span><span class="p">(</span><span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Creates a hash of the ``name``.</span> + +<span class="sd"> Combines argument ``name`` with the ``secret_key`` from :ref:`settings</span> +<span class="sd"> server`. This function can be used to get a more anonymized name of a Redis</span> +<span class="sd"> KEY.</span> + +<span class="sd"> :param name: the name to create a secret hash for</span> +<span class="sd"> :type name: str</span> +<span class="sd"> """</span> + <span class="n">m</span> <span class="o">=</span> <span class="n">hmac</span><span class="o">.</span><span class="n">new</span><span class="p">(</span><span class="nb">bytes</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">'utf-8'</span><span class="p">),</span> <span class="n">digestmod</span><span class="o">=</span><span class="s1">'sha256'</span><span class="p">)</span> + <span class="n">m</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="nb">bytes</span><span class="p">(</span><span class="n">get_setting</span><span class="p">(</span><span class="s1">'server.secret_key'</span><span class="p">),</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">'utf-8'</span><span class="p">))</span> + <span class="k">return</span> <span class="n">m</span><span class="o">.</span><span class="n">hexdigest</span><span class="p">()</span></div> + + + +<span class="n">INCR_COUNTER</span> <span class="o">=</span> <span class="s2">"""</span> +<span class="s2">local limit = tonumber(ARGV[1])</span> +<span class="s2">local expire = tonumber(ARGV[2])</span> +<span class="s2">local c_name = KEYS[1]</span> + +<span class="s2">local c = redis.call('GET', c_name)</span> + +<span class="s2">if not c then</span> +<span class="s2"> c = redis.call('INCR', c_name)</span> +<span class="s2"> if expire > 0 then</span> +<span class="s2"> redis.call('EXPIRE', c_name, expire)</span> +<span class="s2"> end</span> +<span class="s2">else</span> +<span class="s2"> c = tonumber(c)</span> +<span class="s2"> if limit == 0 or c < limit then</span> +<span class="s2"> c = redis.call('INCR', c_name)</span> +<span class="s2"> end</span> +<span class="s2">end</span> +<span class="s2">return c</span> +<span class="s2">"""</span> + + +<div class="viewcode-block" id="incr_counter"> +<a class="viewcode-back" href="../../src/searx.redislib.html#searx.redislib.incr_counter">[docs]</a> +<span class="k">def</span> <span class="nf">incr_counter</span><span class="p">(</span><span class="n">client</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">limit</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span> <span class="n">expire</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Increment a counter and return the new value.</span> + +<span class="sd"> If counter with redis key ``SearXNG_counter_<name>`` does not exists it is</span> +<span class="sd"> created with initial value 1 returned. The replacement ``<name>`` is a</span> +<span class="sd"> *secret hash* of the value from argument ``name`` (see</span> +<span class="sd"> :py:func:`secret_hash`).</span> + +<span class="sd"> The implementation of the redis counter is the lua script from string</span> +<span class="sd"> :py:obj:`INCR_COUNTER`.</span> + +<span class="sd"> :param name: name of the counter</span> +<span class="sd"> :type name: str</span> + +<span class="sd"> :param expire: live-time of the counter in seconds (default ``None`` means</span> +<span class="sd"> infinite).</span> +<span class="sd"> :type expire: int / see EXPIRE_</span> + +<span class="sd"> :param limit: limit where the counter stops to increment (default ``None``)</span> +<span class="sd"> :type limit: int / limit is 2^64 see INCR_</span> + +<span class="sd"> :return: value of the incremented counter</span> +<span class="sd"> :type return: int</span> + +<span class="sd"> .. _EXPIRE: https://redis.io/commands/expire/</span> +<span class="sd"> .. _INCR: https://redis.io/commands/incr/</span> + +<span class="sd"> A simple demo of a counter with expire time and limit::</span> + +<span class="sd"> >>> for i in range(6):</span> +<span class="sd"> ... i, incr_counter(client, "foo", 3, 5) # max 3, duration 5 sec</span> +<span class="sd"> ... time.sleep(1) # from the third call on max has been reached</span> +<span class="sd"> ...</span> +<span class="sd"> (0, 1)</span> +<span class="sd"> (1, 2)</span> +<span class="sd"> (2, 3)</span> +<span class="sd"> (3, 3)</span> +<span class="sd"> (4, 3)</span> +<span class="sd"> (5, 1)</span> + +<span class="sd"> """</span> + <span class="n">script</span> <span class="o">=</span> <span class="n">lua_script_storage</span><span class="p">(</span><span class="n">client</span><span class="p">,</span> <span class="n">INCR_COUNTER</span><span class="p">)</span> + <span class="n">name</span> <span class="o">=</span> <span class="s2">"SearXNG_counter_"</span> <span class="o">+</span> <span class="n">secret_hash</span><span class="p">(</span><span class="n">name</span><span class="p">)</span> + <span class="n">c</span> <span class="o">=</span> <span class="n">script</span><span class="p">(</span><span class="n">args</span><span class="o">=</span><span class="p">[</span><span class="n">limit</span><span class="p">,</span> <span class="n">expire</span><span class="p">],</span> <span class="n">keys</span><span class="o">=</span><span class="p">[</span><span class="n">name</span><span class="p">])</span> + <span class="k">return</span> <span class="n">c</span></div> + + + +<div class="viewcode-block" id="drop_counter"> +<a class="viewcode-back" href="../../src/searx.redislib.html#searx.redislib.drop_counter">[docs]</a> +<span class="k">def</span> <span class="nf">drop_counter</span><span class="p">(</span><span class="n">client</span><span class="p">,</span> <span class="n">name</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Drop counter with redis key ``SearXNG_counter_<name>``</span> + +<span class="sd"> The replacement ``<name>`` is a *secret hash* of the value from argument</span> +<span class="sd"> ``name`` (see :py:func:`incr_counter` and :py:func:`incr_sliding_window`).</span> +<span class="sd"> """</span> + <span class="n">name</span> <span class="o">=</span> <span class="s2">"SearXNG_counter_"</span> <span class="o">+</span> <span class="n">secret_hash</span><span class="p">(</span><span class="n">name</span><span class="p">)</span> + <span class="n">client</span><span class="o">.</span><span class="n">delete</span><span class="p">(</span><span class="n">name</span><span class="p">)</span></div> + + + +<span class="n">INCR_SLIDING_WINDOW</span> <span class="o">=</span> <span class="s2">"""</span> +<span class="s2">local expire = tonumber(ARGV[1])</span> +<span class="s2">local name = KEYS[1]</span> +<span class="s2">local current_time = redis.call('TIME')</span> + +<span class="s2">redis.call('ZREMRANGEBYSCORE', name, 0, current_time[1] - expire)</span> +<span class="s2">redis.call('ZADD', name, current_time[1], current_time[1] .. current_time[2])</span> +<span class="s2">local result = redis.call('ZCOUNT', name, 0, current_time[1] + 1)</span> +<span class="s2">redis.call('EXPIRE', name, expire)</span> +<span class="s2">return result</span> +<span class="s2">"""</span> + + +<div class="viewcode-block" id="incr_sliding_window"> +<a class="viewcode-back" href="../../src/searx.redislib.html#searx.redislib.incr_sliding_window">[docs]</a> +<span class="k">def</span> <span class="nf">incr_sliding_window</span><span class="p">(</span><span class="n">client</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">duration</span><span class="p">:</span> <span class="nb">int</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Increment a sliding-window counter and return the new value.</span> + +<span class="sd"> If counter with redis key ``SearXNG_counter_<name>`` does not exists it is</span> +<span class="sd"> created with initial value 1 returned. The replacement ``<name>`` is a</span> +<span class="sd"> *secret hash* of the value from argument ``name`` (see</span> +<span class="sd"> :py:func:`secret_hash`).</span> + +<span class="sd"> :param name: name of the counter</span> +<span class="sd"> :type name: str</span> + +<span class="sd"> :param duration: live-time of the sliding window in seconds</span> +<span class="sd"> :typeduration: int</span> + +<span class="sd"> :return: value of the incremented counter</span> +<span class="sd"> :type return: int</span> + +<span class="sd"> The implementation of the redis counter is the lua script from string</span> +<span class="sd"> :py:obj:`INCR_SLIDING_WINDOW`. The lua script uses `sorted sets in Redis`_</span> +<span class="sd"> to implement a sliding window for the redis key ``SearXNG_counter_<name>``</span> +<span class="sd"> (ZADD_). The current TIME_ is used to score the items in the sorted set and</span> +<span class="sd"> the time window is moved by removing items with a score lower current time</span> +<span class="sd"> minus *duration* time (ZREMRANGEBYSCORE_).</span> + +<span class="sd"> The EXPIRE_ time (the duration of the sliding window) is refreshed on each</span> +<span class="sd"> call (increment) and if there is no call in this duration, the sorted</span> +<span class="sd"> set expires from the redis DB.</span> + +<span class="sd"> The return value is the amount of items in the sorted set (ZCOUNT_), what</span> +<span class="sd"> means the number of calls in the sliding window.</span> + +<span class="sd"> .. _Sorted sets in Redis:</span> +<span class="sd"> https://redis.com/ebook/part-1-getting-started/chapter-1-getting-to-know-redis/1-2-what-redis-data-structures-look-like/1-2-5-sorted-sets-in-redis/</span> +<span class="sd"> .. _TIME: https://redis.io/commands/time/</span> +<span class="sd"> .. _ZADD: https://redis.io/commands/zadd/</span> +<span class="sd"> .. _EXPIRE: https://redis.io/commands/expire/</span> +<span class="sd"> .. _ZREMRANGEBYSCORE: https://redis.io/commands/zremrangebyscore/</span> +<span class="sd"> .. _ZCOUNT: https://redis.io/commands/zcount/</span> + +<span class="sd"> A simple demo of the sliding window::</span> + +<span class="sd"> >>> for i in range(5):</span> +<span class="sd"> ... incr_sliding_window(client, "foo", 3) # duration 3 sec</span> +<span class="sd"> ... time.sleep(1) # from the third call (second) on the window is moved</span> +<span class="sd"> ...</span> +<span class="sd"> 1</span> +<span class="sd"> 2</span> +<span class="sd"> 3</span> +<span class="sd"> 3</span> +<span class="sd"> 3</span> +<span class="sd"> >>> time.sleep(3) # wait until expire</span> +<span class="sd"> >>> incr_sliding_window(client, "foo", 3)</span> +<span class="sd"> 1</span> + +<span class="sd"> """</span> + <span class="n">script</span> <span class="o">=</span> <span class="n">lua_script_storage</span><span class="p">(</span><span class="n">client</span><span class="p">,</span> <span class="n">INCR_SLIDING_WINDOW</span><span class="p">)</span> + <span class="n">name</span> <span class="o">=</span> <span class="s2">"SearXNG_counter_"</span> <span class="o">+</span> <span class="n">secret_hash</span><span class="p">(</span><span class="n">name</span><span class="p">)</span> + <span class="n">c</span> <span class="o">=</span> <span class="n">script</span><span class="p">(</span><span class="n">args</span><span class="o">=</span><span class="p">[</span><span class="n">duration</span><span class="p">],</span> <span class="n">keys</span><span class="o">=</span><span class="p">[</span><span class="n">name</span><span class="p">])</span> + <span class="k">return</span> <span class="n">c</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../index.html"> + <img class="logo" src="../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../index.html">Overview</a> + <ul> + <li><a href="../index.html">Module code</a> + + + </ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/search.html b/_modules/searx/search.html new file mode 100644 index 000000000..a37f9334b --- /dev/null +++ b/_modules/searx/search.html @@ -0,0 +1,329 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.search — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../_static/searxng.css?v=52e4ff28" /> + <script src="../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../genindex.html" /> + <link rel="search" title="Search" href="../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../index.html" accesskey="U">Module code</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.search</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.search</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="c1"># pylint: disable=missing-module-docstring, too-few-public-methods</span> + +<span class="kn">import</span> <span class="nn">threading</span> +<span class="kn">from</span> <span class="nn">copy</span> <span class="kn">import</span> <span class="n">copy</span> +<span class="kn">from</span> <span class="nn">timeit</span> <span class="kn">import</span> <span class="n">default_timer</span> +<span class="kn">from</span> <span class="nn">uuid</span> <span class="kn">import</span> <span class="n">uuid4</span> + +<span class="kn">import</span> <span class="nn">flask</span> +<span class="kn">from</span> <span class="nn">flask</span> <span class="kn">import</span> <span class="n">copy_current_request_context</span> +<span class="kn">import</span> <span class="nn">babel</span> + +<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">settings</span> +<span class="kn">from</span> <span class="nn">searx.answerers</span> <span class="kn">import</span> <span class="n">ask</span> +<span class="kn">from</span> <span class="nn">searx.external_bang</span> <span class="kn">import</span> <span class="n">get_bang_url</span> +<span class="kn">from</span> <span class="nn">searx.results</span> <span class="kn">import</span> <span class="n">ResultContainer</span> +<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">logger</span> +<span class="kn">from</span> <span class="nn">searx.plugins</span> <span class="kn">import</span> <span class="n">plugins</span> +<span class="kn">from</span> <span class="nn">searx.search.models</span> <span class="kn">import</span> <span class="n">EngineRef</span><span class="p">,</span> <span class="n">SearchQuery</span> +<span class="kn">from</span> <span class="nn">searx.engines</span> <span class="kn">import</span> <span class="n">load_engines</span> +<span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">initialize</span> <span class="k">as</span> <span class="n">initialize_network</span><span class="p">,</span> <span class="n">check_network_configuration</span> +<span class="kn">from</span> <span class="nn">searx.metrics</span> <span class="kn">import</span> <span class="n">initialize</span> <span class="k">as</span> <span class="n">initialize_metrics</span><span class="p">,</span> <span class="n">counter_inc</span><span class="p">,</span> <span class="n">histogram_observe_time</span> +<span class="kn">from</span> <span class="nn">searx.search.processors</span> <span class="kn">import</span> <span class="n">PROCESSORS</span><span class="p">,</span> <span class="n">initialize</span> <span class="k">as</span> <span class="n">initialize_processors</span> +<span class="kn">from</span> <span class="nn">searx.search.checker</span> <span class="kn">import</span> <span class="n">initialize</span> <span class="k">as</span> <span class="n">initialize_checker</span> + + +<span class="n">logger</span> <span class="o">=</span> <span class="n">logger</span><span class="o">.</span><span class="n">getChild</span><span class="p">(</span><span class="s1">'search'</span><span class="p">)</span> + + +<span class="k">def</span> <span class="nf">initialize</span><span class="p">(</span><span class="n">settings_engines</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">enable_checker</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">check_network</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">enable_metrics</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span> + <span class="n">settings_engines</span> <span class="o">=</span> <span class="n">settings_engines</span> <span class="ow">or</span> <span class="n">settings</span><span class="p">[</span><span class="s1">'engines'</span><span class="p">]</span> + <span class="n">load_engines</span><span class="p">(</span><span class="n">settings_engines</span><span class="p">)</span> + <span class="n">initialize_network</span><span class="p">(</span><span class="n">settings_engines</span><span class="p">,</span> <span class="n">settings</span><span class="p">[</span><span class="s1">'outgoing'</span><span class="p">])</span> + <span class="k">if</span> <span class="n">check_network</span><span class="p">:</span> + <span class="n">check_network_configuration</span><span class="p">()</span> + <span class="n">initialize_metrics</span><span class="p">([</span><span class="n">engine</span><span class="p">[</span><span class="s1">'name'</span><span class="p">]</span> <span class="k">for</span> <span class="n">engine</span> <span class="ow">in</span> <span class="n">settings_engines</span><span class="p">],</span> <span class="n">enable_metrics</span><span class="p">)</span> + <span class="n">initialize_processors</span><span class="p">(</span><span class="n">settings_engines</span><span class="p">)</span> + <span class="k">if</span> <span class="n">enable_checker</span><span class="p">:</span> + <span class="n">initialize_checker</span><span class="p">()</span> + + +<div class="viewcode-block" id="Search"> +<a class="viewcode-back" href="../../src/searx.search.html#searx.search.Search">[docs]</a> +<span class="k">class</span> <span class="nc">Search</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Search information container"""</span> + + <span class="vm">__slots__</span> <span class="o">=</span> <span class="s2">"search_query"</span><span class="p">,</span> <span class="s2">"result_container"</span><span class="p">,</span> <span class="s2">"start_time"</span><span class="p">,</span> <span class="s2">"actual_timeout"</span> + + <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">search_query</span><span class="p">:</span> <span class="n">SearchQuery</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Initialize the Search"""</span> + <span class="c1"># init vars</span> + <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> + <span class="bp">self</span><span class="o">.</span><span class="n">search_query</span> <span class="o">=</span> <span class="n">search_query</span> + <span class="bp">self</span><span class="o">.</span><span class="n">result_container</span> <span class="o">=</span> <span class="n">ResultContainer</span><span class="p">()</span> + <span class="bp">self</span><span class="o">.</span><span class="n">start_time</span> <span class="o">=</span> <span class="kc">None</span> + <span class="bp">self</span><span class="o">.</span><span class="n">actual_timeout</span> <span class="o">=</span> <span class="kc">None</span> + + <span class="k">def</span> <span class="nf">search_external_bang</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""</span> +<span class="sd"> Check if there is a external bang.</span> +<span class="sd"> If yes, update self.result_container and return True</span> +<span class="sd"> """</span> + <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">search_query</span><span class="o">.</span><span class="n">external_bang</span><span class="p">:</span> + <span class="bp">self</span><span class="o">.</span><span class="n">result_container</span><span class="o">.</span><span class="n">redirect_url</span> <span class="o">=</span> <span class="n">get_bang_url</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">search_query</span><span class="p">)</span> + + <span class="c1"># This means there was a valid bang and the</span> + <span class="c1"># rest of the search does not need to be continued</span> + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">result_container</span><span class="o">.</span><span class="n">redirect_url</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span> + <span class="k">return</span> <span class="kc">True</span> + <span class="k">return</span> <span class="kc">False</span> + + <span class="k">def</span> <span class="nf">search_answerers</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""</span> +<span class="sd"> Check if an answer return a result.</span> +<span class="sd"> If yes, update self.result_container and return True</span> +<span class="sd"> """</span> + <span class="n">answerers_results</span> <span class="o">=</span> <span class="n">ask</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">search_query</span><span class="p">)</span> + + <span class="k">if</span> <span class="n">answerers_results</span><span class="p">:</span> + <span class="k">for</span> <span class="n">results</span> <span class="ow">in</span> <span class="n">answerers_results</span><span class="p">:</span> + <span class="bp">self</span><span class="o">.</span><span class="n">result_container</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="s1">'answer'</span><span class="p">,</span> <span class="n">results</span><span class="p">)</span> + <span class="k">return</span> <span class="kc">True</span> + <span class="k">return</span> <span class="kc">False</span> + + <span class="c1"># do search-request</span> + <span class="k">def</span> <span class="nf">_get_requests</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="c1"># init vars</span> + <span class="n">requests</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="c1"># max of all selected engine timeout</span> + <span class="n">default_timeout</span> <span class="o">=</span> <span class="mi">0</span> + + <span class="c1"># start search-request for all selected engines</span> + <span class="k">for</span> <span class="n">engineref</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">search_query</span><span class="o">.</span><span class="n">engineref_list</span><span class="p">:</span> + <span class="n">processor</span> <span class="o">=</span> <span class="n">PROCESSORS</span><span class="p">[</span><span class="n">engineref</span><span class="o">.</span><span class="n">name</span><span class="p">]</span> + + <span class="c1"># stop the request now if the engine is suspend</span> + <span class="k">if</span> <span class="n">processor</span><span class="o">.</span><span class="n">extend_container_if_suspended</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">result_container</span><span class="p">):</span> + <span class="k">continue</span> + + <span class="c1"># set default request parameters</span> + <span class="n">request_params</span> <span class="o">=</span> <span class="n">processor</span><span class="o">.</span><span class="n">get_params</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">search_query</span><span class="p">,</span> <span class="n">engineref</span><span class="o">.</span><span class="n">category</span><span class="p">)</span> + <span class="k">if</span> <span class="n">request_params</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">continue</span> + + <span class="n">counter_inc</span><span class="p">(</span><span class="s1">'engine'</span><span class="p">,</span> <span class="n">engineref</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="s1">'search'</span><span class="p">,</span> <span class="s1">'count'</span><span class="p">,</span> <span class="s1">'sent'</span><span class="p">)</span> + + <span class="c1"># append request to list</span> + <span class="n">requests</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">engineref</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">search_query</span><span class="o">.</span><span class="n">query</span><span class="p">,</span> <span class="n">request_params</span><span class="p">))</span> + + <span class="c1"># update default_timeout</span> + <span class="n">default_timeout</span> <span class="o">=</span> <span class="nb">max</span><span class="p">(</span><span class="n">default_timeout</span><span class="p">,</span> <span class="n">processor</span><span class="o">.</span><span class="n">engine</span><span class="o">.</span><span class="n">timeout</span><span class="p">)</span> + + <span class="c1"># adjust timeout</span> + <span class="n">max_request_timeout</span> <span class="o">=</span> <span class="n">settings</span><span class="p">[</span><span class="s1">'outgoing'</span><span class="p">][</span><span class="s1">'max_request_timeout'</span><span class="p">]</span> + <span class="n">actual_timeout</span> <span class="o">=</span> <span class="n">default_timeout</span> + <span class="n">query_timeout</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">search_query</span><span class="o">.</span><span class="n">timeout_limit</span> + + <span class="k">if</span> <span class="n">max_request_timeout</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">query_timeout</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="c1"># No max, no user query: default_timeout</span> + <span class="k">pass</span> + <span class="k">elif</span> <span class="n">max_request_timeout</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">query_timeout</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="c1"># No max, but user query: From user query except if above default</span> + <span class="n">actual_timeout</span> <span class="o">=</span> <span class="nb">min</span><span class="p">(</span><span class="n">default_timeout</span><span class="p">,</span> <span class="n">query_timeout</span><span class="p">)</span> + <span class="k">elif</span> <span class="n">max_request_timeout</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">query_timeout</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="c1"># Max, no user query: Default except if above max</span> + <span class="n">actual_timeout</span> <span class="o">=</span> <span class="nb">min</span><span class="p">(</span><span class="n">default_timeout</span><span class="p">,</span> <span class="n">max_request_timeout</span><span class="p">)</span> + <span class="k">elif</span> <span class="n">max_request_timeout</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">query_timeout</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="c1"># Max & user query: From user query except if above max</span> + <span class="n">actual_timeout</span> <span class="o">=</span> <span class="nb">min</span><span class="p">(</span><span class="n">query_timeout</span><span class="p">,</span> <span class="n">max_request_timeout</span><span class="p">)</span> + + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span> + <span class="s2">"actual_timeout=</span><span class="si">{0}</span><span class="s2"> (default_timeout=</span><span class="si">{1}</span><span class="s2">, ?timeout_limit=</span><span class="si">{2}</span><span class="s2">, max_request_timeout=</span><span class="si">{3}</span><span class="s2">)"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> + <span class="n">actual_timeout</span><span class="p">,</span> <span class="n">default_timeout</span><span class="p">,</span> <span class="n">query_timeout</span><span class="p">,</span> <span class="n">max_request_timeout</span> + <span class="p">)</span> + <span class="p">)</span> + + <span class="k">return</span> <span class="n">requests</span><span class="p">,</span> <span class="n">actual_timeout</span> + + <span class="k">def</span> <span class="nf">search_multiple_requests</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">requests</span><span class="p">):</span> + <span class="c1"># pylint: disable=protected-access</span> + <span class="n">search_id</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">uuid4</span><span class="p">())</span> + + <span class="k">for</span> <span class="n">engine_name</span><span class="p">,</span> <span class="n">query</span><span class="p">,</span> <span class="n">request_params</span> <span class="ow">in</span> <span class="n">requests</span><span class="p">:</span> + <span class="n">_search</span> <span class="o">=</span> <span class="n">copy_current_request_context</span><span class="p">(</span><span class="n">PROCESSORS</span><span class="p">[</span><span class="n">engine_name</span><span class="p">]</span><span class="o">.</span><span class="n">search</span><span class="p">)</span> + <span class="n">th</span> <span class="o">=</span> <span class="n">threading</span><span class="o">.</span><span class="n">Thread</span><span class="p">(</span> <span class="c1"># pylint: disable=invalid-name</span> + <span class="n">target</span><span class="o">=</span><span class="n">_search</span><span class="p">,</span> + <span class="n">args</span><span class="o">=</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">request_params</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">result_container</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">start_time</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">actual_timeout</span><span class="p">),</span> + <span class="n">name</span><span class="o">=</span><span class="n">search_id</span><span class="p">,</span> + <span class="p">)</span> + <span class="n">th</span><span class="o">.</span><span class="n">_timeout</span> <span class="o">=</span> <span class="kc">False</span> + <span class="n">th</span><span class="o">.</span><span class="n">_engine_name</span> <span class="o">=</span> <span class="n">engine_name</span> + <span class="n">th</span><span class="o">.</span><span class="n">start</span><span class="p">()</span> + + <span class="k">for</span> <span class="n">th</span> <span class="ow">in</span> <span class="n">threading</span><span class="o">.</span><span class="n">enumerate</span><span class="p">():</span> <span class="c1"># pylint: disable=invalid-name</span> + <span class="k">if</span> <span class="n">th</span><span class="o">.</span><span class="n">name</span> <span class="o">==</span> <span class="n">search_id</span><span class="p">:</span> + <span class="n">remaining_time</span> <span class="o">=</span> <span class="nb">max</span><span class="p">(</span><span class="mf">0.0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">actual_timeout</span> <span class="o">-</span> <span class="p">(</span><span class="n">default_timer</span><span class="p">()</span> <span class="o">-</span> <span class="bp">self</span><span class="o">.</span><span class="n">start_time</span><span class="p">))</span> + <span class="n">th</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">remaining_time</span><span class="p">)</span> + <span class="k">if</span> <span class="n">th</span><span class="o">.</span><span class="n">is_alive</span><span class="p">():</span> + <span class="n">th</span><span class="o">.</span><span class="n">_timeout</span> <span class="o">=</span> <span class="kc">True</span> + <span class="bp">self</span><span class="o">.</span><span class="n">result_container</span><span class="o">.</span><span class="n">add_unresponsive_engine</span><span class="p">(</span><span class="n">th</span><span class="o">.</span><span class="n">_engine_name</span><span class="p">,</span> <span class="s1">'timeout'</span><span class="p">)</span> + <span class="n">PROCESSORS</span><span class="p">[</span><span class="n">th</span><span class="o">.</span><span class="n">_engine_name</span><span class="p">]</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s1">'engine timeout'</span><span class="p">)</span> + + <span class="k">def</span> <span class="nf">search_standard</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""</span> +<span class="sd"> Update self.result_container, self.actual_timeout</span> +<span class="sd"> """</span> + <span class="n">requests</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">actual_timeout</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_requests</span><span class="p">()</span> + + <span class="c1"># send all search-request</span> + <span class="k">if</span> <span class="n">requests</span><span class="p">:</span> + <span class="bp">self</span><span class="o">.</span><span class="n">search_multiple_requests</span><span class="p">(</span><span class="n">requests</span><span class="p">)</span> + + <span class="c1"># return results, suggestions, answers and infoboxes</span> + <span class="k">return</span> <span class="kc">True</span> + + <span class="c1"># do search-request</span> +<div class="viewcode-block" id="Search.search"> +<a class="viewcode-back" href="../../src/searx.search.html#searx.search.Search.search">[docs]</a> + <span class="k">def</span> <span class="nf">search</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">ResultContainer</span><span class="p">:</span> + <span class="bp">self</span><span class="o">.</span><span class="n">start_time</span> <span class="o">=</span> <span class="n">default_timer</span><span class="p">()</span> + <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">search_external_bang</span><span class="p">():</span> + <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">search_answerers</span><span class="p">():</span> + <span class="bp">self</span><span class="o">.</span><span class="n">search_standard</span><span class="p">()</span> + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">result_container</span></div> +</div> + + + +<div class="viewcode-block" id="SearchWithPlugins"> +<a class="viewcode-back" href="../../src/searx.search.html#searx.search.SearchWithPlugins">[docs]</a> +<span class="k">class</span> <span class="nc">SearchWithPlugins</span><span class="p">(</span><span class="n">Search</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Inherit from the Search class, add calls to the plugins."""</span> + + <span class="vm">__slots__</span> <span class="o">=</span> <span class="s1">'ordered_plugin_list'</span><span class="p">,</span> <span class="s1">'request'</span> + + <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">search_query</span><span class="p">:</span> <span class="n">SearchQuery</span><span class="p">,</span> <span class="n">ordered_plugin_list</span><span class="p">,</span> <span class="n">request</span><span class="p">:</span> <span class="n">flask</span><span class="o">.</span><span class="n">Request</span><span class="p">):</span> + <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">search_query</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">ordered_plugin_list</span> <span class="o">=</span> <span class="n">ordered_plugin_list</span> + <span class="bp">self</span><span class="o">.</span><span class="n">result_container</span><span class="o">.</span><span class="n">on_result</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_on_result</span> + <span class="c1"># pylint: disable=line-too-long</span> + <span class="c1"># get the "real" request to use it outside the Flask context.</span> + <span class="c1"># see</span> + <span class="c1"># * https://github.com/pallets/flask/blob/d01d26e5210e3ee4cbbdef12f05c886e08e92852/src/flask/globals.py#L55</span> + <span class="c1"># * https://github.com/pallets/werkzeug/blob/3c5d3c9bd0d9ce64590f0af8997a38f3823b368d/src/werkzeug/local.py#L548-L559</span> + <span class="c1"># * https://werkzeug.palletsprojects.com/en/2.0.x/local/#werkzeug.local.LocalProxy._get_current_object</span> + <span class="c1"># pylint: enable=line-too-long</span> + <span class="bp">self</span><span class="o">.</span><span class="n">request</span> <span class="o">=</span> <span class="n">request</span><span class="o">.</span><span class="n">_get_current_object</span><span class="p">()</span> + + <span class="k">def</span> <span class="nf">_on_result</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">result</span><span class="p">):</span> + <span class="k">return</span> <span class="n">plugins</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">ordered_plugin_list</span><span class="p">,</span> <span class="s1">'on_result'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">request</span><span class="p">,</span> <span class="bp">self</span><span class="p">,</span> <span class="n">result</span><span class="p">)</span> + +<div class="viewcode-block" id="SearchWithPlugins.search"> +<a class="viewcode-back" href="../../src/searx.search.html#searx.search.SearchWithPlugins.search">[docs]</a> + <span class="k">def</span> <span class="nf">search</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">ResultContainer</span><span class="p">:</span> + <span class="k">if</span> <span class="n">plugins</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">ordered_plugin_list</span><span class="p">,</span> <span class="s1">'pre_search'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">request</span><span class="p">,</span> <span class="bp">self</span><span class="p">):</span> + <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">search</span><span class="p">()</span> + + <span class="n">plugins</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">ordered_plugin_list</span><span class="p">,</span> <span class="s1">'post_search'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">request</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span> + + <span class="bp">self</span><span class="o">.</span><span class="n">result_container</span><span class="o">.</span><span class="n">close</span><span class="p">()</span> + + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">result_container</span></div> +</div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../index.html"> + <img class="logo" src="../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../index.html">Overview</a> + <ul> + <li><a href="../index.html">Module code</a> + + + </ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/search/models.html b/_modules/searx/search/models.html new file mode 100644 index 000000000..73ed2243b --- /dev/null +++ b/_modules/searx/search/models.html @@ -0,0 +1,246 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.search.models — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../genindex.html" /> + <link rel="search" title="Search" href="../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../search.html" accesskey="U">searx.search</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.search.models</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.search.models</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="c1"># pylint: disable=missing-module-docstring</span> + +<span class="kn">import</span> <span class="nn">typing</span> +<span class="kn">import</span> <span class="nn">babel</span> + + +<div class="viewcode-block" id="EngineRef"> +<a class="viewcode-back" href="../../../src/searx.search.html#searx.search.EngineRef">[docs]</a> +<span class="k">class</span> <span class="nc">EngineRef</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Reference by names to an engine and category"""</span> + + <span class="vm">__slots__</span> <span class="o">=</span> <span class="s1">'name'</span><span class="p">,</span> <span class="s1">'category'</span> + + <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">category</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span> + <span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">=</span> <span class="n">name</span> + <span class="bp">self</span><span class="o">.</span><span class="n">category</span> <span class="o">=</span> <span class="n">category</span> + + <span class="k">def</span> <span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="s2">"EngineRef(</span><span class="si">{!r}</span><span class="s2">, </span><span class="si">{!r}</span><span class="s2">)"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">category</span><span class="p">)</span> + + <span class="k">def</span> <span class="fm">__eq__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span> + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="n">name</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">category</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="n">category</span> + + <span class="k">def</span> <span class="fm">__hash__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="nb">hash</span><span class="p">((</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">category</span><span class="p">))</span></div> + + + +<div class="viewcode-block" id="SearchQuery"> +<a class="viewcode-back" href="../../../src/searx.search.html#searx.search.SearchQuery">[docs]</a> +<span class="k">class</span> <span class="nc">SearchQuery</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""container for all the search parameters (query, language, etc...)"""</span> + + <span class="vm">__slots__</span> <span class="o">=</span> <span class="p">(</span> + <span class="s1">'query'</span><span class="p">,</span> + <span class="s1">'engineref_list'</span><span class="p">,</span> + <span class="s1">'lang'</span><span class="p">,</span> + <span class="s1">'locale'</span><span class="p">,</span> + <span class="s1">'safesearch'</span><span class="p">,</span> + <span class="s1">'pageno'</span><span class="p">,</span> + <span class="s1">'time_range'</span><span class="p">,</span> + <span class="s1">'timeout_limit'</span><span class="p">,</span> + <span class="s1">'external_bang'</span><span class="p">,</span> + <span class="s1">'engine_data'</span><span class="p">,</span> + <span class="s1">'redirect_to_first_result'</span><span class="p">,</span> + <span class="p">)</span> + + <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> + <span class="bp">self</span><span class="p">,</span> + <span class="n">query</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> + <span class="n">engineref_list</span><span class="p">:</span> <span class="n">typing</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">EngineRef</span><span class="p">],</span> + <span class="n">lang</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">'all'</span><span class="p">,</span> + <span class="n">safesearch</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span> + <span class="n">pageno</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> + <span class="n">time_range</span><span class="p">:</span> <span class="n">typing</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> + <span class="n">timeout_limit</span><span class="p">:</span> <span class="n">typing</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> + <span class="n">external_bang</span><span class="p">:</span> <span class="n">typing</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> + <span class="n">engine_data</span><span class="p">:</span> <span class="n">typing</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">typing</span><span class="o">.</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> + <span class="n">redirect_to_first_result</span><span class="p">:</span> <span class="n">typing</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> + <span class="p">):</span> <span class="c1"># pylint:disable=too-many-arguments</span> + <span class="bp">self</span><span class="o">.</span><span class="n">query</span> <span class="o">=</span> <span class="n">query</span> + <span class="bp">self</span><span class="o">.</span><span class="n">engineref_list</span> <span class="o">=</span> <span class="n">engineref_list</span> + <span class="bp">self</span><span class="o">.</span><span class="n">lang</span> <span class="o">=</span> <span class="n">lang</span> + <span class="bp">self</span><span class="o">.</span><span class="n">safesearch</span> <span class="o">=</span> <span class="n">safesearch</span> + <span class="bp">self</span><span class="o">.</span><span class="n">pageno</span> <span class="o">=</span> <span class="n">pageno</span> + <span class="bp">self</span><span class="o">.</span><span class="n">time_range</span> <span class="o">=</span> <span class="n">time_range</span> + <span class="bp">self</span><span class="o">.</span><span class="n">timeout_limit</span> <span class="o">=</span> <span class="n">timeout_limit</span> + <span class="bp">self</span><span class="o">.</span><span class="n">external_bang</span> <span class="o">=</span> <span class="n">external_bang</span> + <span class="bp">self</span><span class="o">.</span><span class="n">engine_data</span> <span class="o">=</span> <span class="n">engine_data</span> <span class="ow">or</span> <span class="p">{}</span> + <span class="bp">self</span><span class="o">.</span><span class="n">redirect_to_first_result</span> <span class="o">=</span> <span class="n">redirect_to_first_result</span> + + <span class="bp">self</span><span class="o">.</span><span class="n">locale</span> <span class="o">=</span> <span class="kc">None</span> + <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">lang</span><span class="p">:</span> + <span class="k">try</span><span class="p">:</span> + <span class="bp">self</span><span class="o">.</span><span class="n">locale</span> <span class="o">=</span> <span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">lang</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">'-'</span><span class="p">)</span> + <span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">core</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span> + <span class="k">pass</span> + + <span class="nd">@property</span> + <span class="k">def</span> <span class="nf">categories</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="nb">list</span><span class="p">(</span><span class="nb">set</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">engineref</span><span class="p">:</span> <span class="n">engineref</span><span class="o">.</span><span class="n">category</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">engineref_list</span><span class="p">)))</span> + + <span class="k">def</span> <span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="s2">"SearchQuery(</span><span class="si">{!r}</span><span class="s2">, </span><span class="si">{!r}</span><span class="s2">, </span><span class="si">{!r}</span><span class="s2">, </span><span class="si">{!r}</span><span class="s2">, </span><span class="si">{!r}</span><span class="s2">, </span><span class="si">{!r}</span><span class="s2">, </span><span class="si">{!r}</span><span class="s2">, </span><span class="si">{!r}</span><span class="s2">, </span><span class="si">{!r}</span><span class="s2">)"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> + <span class="bp">self</span><span class="o">.</span><span class="n">query</span><span class="p">,</span> + <span class="bp">self</span><span class="o">.</span><span class="n">engineref_list</span><span class="p">,</span> + <span class="bp">self</span><span class="o">.</span><span class="n">lang</span><span class="p">,</span> + <span class="bp">self</span><span class="o">.</span><span class="n">safesearch</span><span class="p">,</span> + <span class="bp">self</span><span class="o">.</span><span class="n">pageno</span><span class="p">,</span> + <span class="bp">self</span><span class="o">.</span><span class="n">time_range</span><span class="p">,</span> + <span class="bp">self</span><span class="o">.</span><span class="n">timeout_limit</span><span class="p">,</span> + <span class="bp">self</span><span class="o">.</span><span class="n">external_bang</span><span class="p">,</span> + <span class="bp">self</span><span class="o">.</span><span class="n">redirect_to_first_result</span><span class="p">,</span> + <span class="p">)</span> + + <span class="k">def</span> <span class="fm">__eq__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span> + <span class="k">return</span> <span class="p">(</span> + <span class="bp">self</span><span class="o">.</span><span class="n">query</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="n">query</span> + <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">engineref_list</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="n">engineref_list</span> + <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">lang</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="n">lang</span> + <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">safesearch</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="n">safesearch</span> + <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">pageno</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="n">pageno</span> + <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">time_range</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="n">time_range</span> + <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">timeout_limit</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="n">timeout_limit</span> + <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">external_bang</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="n">external_bang</span> + <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">redirect_to_first_result</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="n">redirect_to_first_result</span> + <span class="p">)</span> + + <span class="k">def</span> <span class="fm">__hash__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="nb">hash</span><span class="p">(</span> + <span class="p">(</span> + <span class="bp">self</span><span class="o">.</span><span class="n">query</span><span class="p">,</span> + <span class="nb">tuple</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">engineref_list</span><span class="p">),</span> + <span class="bp">self</span><span class="o">.</span><span class="n">lang</span><span class="p">,</span> + <span class="bp">self</span><span class="o">.</span><span class="n">safesearch</span><span class="p">,</span> + <span class="bp">self</span><span class="o">.</span><span class="n">pageno</span><span class="p">,</span> + <span class="bp">self</span><span class="o">.</span><span class="n">time_range</span><span class="p">,</span> + <span class="bp">self</span><span class="o">.</span><span class="n">timeout_limit</span><span class="p">,</span> + <span class="bp">self</span><span class="o">.</span><span class="n">external_bang</span><span class="p">,</span> + <span class="bp">self</span><span class="o">.</span><span class="n">redirect_to_first_result</span><span class="p">,</span> + <span class="p">)</span> + <span class="p">)</span> + + <span class="k">def</span> <span class="nf">__copy__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="n">SearchQuery</span><span class="p">(</span> + <span class="bp">self</span><span class="o">.</span><span class="n">query</span><span class="p">,</span> + <span class="bp">self</span><span class="o">.</span><span class="n">engineref_list</span><span class="p">,</span> + <span class="bp">self</span><span class="o">.</span><span class="n">lang</span><span class="p">,</span> + <span class="bp">self</span><span class="o">.</span><span class="n">safesearch</span><span class="p">,</span> + <span class="bp">self</span><span class="o">.</span><span class="n">pageno</span><span class="p">,</span> + <span class="bp">self</span><span class="o">.</span><span class="n">time_range</span><span class="p">,</span> + <span class="bp">self</span><span class="o">.</span><span class="n">timeout_limit</span><span class="p">,</span> + <span class="bp">self</span><span class="o">.</span><span class="n">external_bang</span><span class="p">,</span> + <span class="bp">self</span><span class="o">.</span><span class="n">engine_data</span><span class="p">,</span> + <span class="bp">self</span><span class="o">.</span><span class="n">redirect_to_first_result</span><span class="p">,</span> + <span class="p">)</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../index.html"> + <img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../index.html">Overview</a> + <ul> + <li><a href="../../index.html">Module code</a> + <ul> + <li><a href="../search.html">searx.search</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/search/processors/abstract.html b/_modules/searx/search/processors/abstract.html new file mode 100644 index 000000000..665668ce6 --- /dev/null +++ b/_modules/searx/search/processors/abstract.html @@ -0,0 +1,311 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.search.processors.abstract — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../../genindex.html" /> + <link rel="search" title="Search" href="../../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../../search.html" accesskey="U">searx.search</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.search.processors.abstract</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.search.processors.abstract</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""Abstract base classes for engine request processors.</span> + +<span class="sd">"""</span> + +<span class="kn">import</span> <span class="nn">threading</span> +<span class="kn">from</span> <span class="nn">abc</span> <span class="kn">import</span> <span class="n">abstractmethod</span><span class="p">,</span> <span class="n">ABC</span> +<span class="kn">from</span> <span class="nn">timeit</span> <span class="kn">import</span> <span class="n">default_timer</span> +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Dict</span><span class="p">,</span> <span class="n">Union</span> + +<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">settings</span><span class="p">,</span> <span class="n">logger</span> +<span class="kn">from</span> <span class="nn">searx.engines</span> <span class="kn">import</span> <span class="n">engines</span> +<span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">get_time_for_thread</span><span class="p">,</span> <span class="n">get_network</span> +<span class="kn">from</span> <span class="nn">searx.metrics</span> <span class="kn">import</span> <span class="n">histogram_observe</span><span class="p">,</span> <span class="n">counter_inc</span><span class="p">,</span> <span class="n">count_exception</span><span class="p">,</span> <span class="n">count_error</span> +<span class="kn">from</span> <span class="nn">searx.exceptions</span> <span class="kn">import</span> <span class="n">SearxEngineAccessDeniedException</span><span class="p">,</span> <span class="n">SearxEngineResponseException</span> +<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">get_engine_from_settings</span> + +<span class="n">logger</span> <span class="o">=</span> <span class="n">logger</span><span class="o">.</span><span class="n">getChild</span><span class="p">(</span><span class="s1">'searx.search.processor'</span><span class="p">)</span> +<span class="n">SUSPENDED_STATUS</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">str</span><span class="p">],</span> <span class="s1">'SuspendedStatus'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> + + +<div class="viewcode-block" id="SuspendedStatus"> +<a class="viewcode-back" href="../../../../src/searx.search.processors.html#searx.search.processors.abstract.SuspendedStatus">[docs]</a> +<span class="k">class</span> <span class="nc">SuspendedStatus</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Class to handle suspend state."""</span> + + <span class="vm">__slots__</span> <span class="o">=</span> <span class="s1">'suspend_end_time'</span><span class="p">,</span> <span class="s1">'suspend_reason'</span><span class="p">,</span> <span class="s1">'continuous_errors'</span><span class="p">,</span> <span class="s1">'lock'</span> + + <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="bp">self</span><span class="o">.</span><span class="n">lock</span> <span class="o">=</span> <span class="n">threading</span><span class="o">.</span><span class="n">Lock</span><span class="p">()</span> + <span class="bp">self</span><span class="o">.</span><span class="n">continuous_errors</span> <span class="o">=</span> <span class="mi">0</span> + <span class="bp">self</span><span class="o">.</span><span class="n">suspend_end_time</span> <span class="o">=</span> <span class="mi">0</span> + <span class="bp">self</span><span class="o">.</span><span class="n">suspend_reason</span> <span class="o">=</span> <span class="kc">None</span> + + <span class="nd">@property</span> + <span class="k">def</span> <span class="nf">is_suspended</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">suspend_end_time</span> <span class="o">>=</span> <span class="n">default_timer</span><span class="p">()</span> + + <span class="k">def</span> <span class="nf">suspend</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">suspended_time</span><span class="p">,</span> <span class="n">suspend_reason</span><span class="p">):</span> + <span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">lock</span><span class="p">:</span> + <span class="c1"># update continuous_errors / suspend_end_time</span> + <span class="bp">self</span><span class="o">.</span><span class="n">continuous_errors</span> <span class="o">+=</span> <span class="mi">1</span> + <span class="k">if</span> <span class="n">suspended_time</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">suspended_time</span> <span class="o">=</span> <span class="nb">min</span><span class="p">(</span> + <span class="n">settings</span><span class="p">[</span><span class="s1">'search'</span><span class="p">][</span><span class="s1">'max_ban_time_on_fail'</span><span class="p">],</span> + <span class="bp">self</span><span class="o">.</span><span class="n">continuous_errors</span> <span class="o">*</span> <span class="n">settings</span><span class="p">[</span><span class="s1">'search'</span><span class="p">][</span><span class="s1">'ban_time_on_fail'</span><span class="p">],</span> + <span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">suspend_end_time</span> <span class="o">=</span> <span class="n">default_timer</span><span class="p">()</span> <span class="o">+</span> <span class="n">suspended_time</span> + <span class="bp">self</span><span class="o">.</span><span class="n">suspend_reason</span> <span class="o">=</span> <span class="n">suspend_reason</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'Suspend for </span><span class="si">%i</span><span class="s1"> seconds'</span><span class="p">,</span> <span class="n">suspended_time</span><span class="p">)</span> + + <span class="k">def</span> <span class="nf">resume</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">lock</span><span class="p">:</span> + <span class="c1"># reset the suspend variables</span> + <span class="bp">self</span><span class="o">.</span><span class="n">continuous_errors</span> <span class="o">=</span> <span class="mi">0</span> + <span class="bp">self</span><span class="o">.</span><span class="n">suspend_end_time</span> <span class="o">=</span> <span class="mi">0</span> + <span class="bp">self</span><span class="o">.</span><span class="n">suspend_reason</span> <span class="o">=</span> <span class="kc">None</span></div> + + + +<div class="viewcode-block" id="EngineProcessor"> +<a class="viewcode-back" href="../../../../src/searx.search.processors.html#searx.search.processors.abstract.EngineProcessor">[docs]</a> +<span class="k">class</span> <span class="nc">EngineProcessor</span><span class="p">(</span><span class="n">ABC</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Base classes used for all types of request processors."""</span> + + <span class="vm">__slots__</span> <span class="o">=</span> <span class="s1">'engine'</span><span class="p">,</span> <span class="s1">'engine_name'</span><span class="p">,</span> <span class="s1">'lock'</span><span class="p">,</span> <span class="s1">'suspended_status'</span><span class="p">,</span> <span class="s1">'logger'</span> + + <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">engine</span><span class="p">,</span> <span class="n">engine_name</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span> + <span class="bp">self</span><span class="o">.</span><span class="n">engine</span> <span class="o">=</span> <span class="n">engine</span> + <span class="bp">self</span><span class="o">.</span><span class="n">engine_name</span> <span class="o">=</span> <span class="n">engine_name</span> + <span class="bp">self</span><span class="o">.</span><span class="n">logger</span> <span class="o">=</span> <span class="n">engines</span><span class="p">[</span><span class="n">engine_name</span><span class="p">]</span><span class="o">.</span><span class="n">logger</span> + <span class="n">key</span> <span class="o">=</span> <span class="n">get_network</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">engine_name</span><span class="p">)</span> + <span class="n">key</span> <span class="o">=</span> <span class="nb">id</span><span class="p">(</span><span class="n">key</span><span class="p">)</span> <span class="k">if</span> <span class="n">key</span> <span class="k">else</span> <span class="bp">self</span><span class="o">.</span><span class="n">engine_name</span> + <span class="bp">self</span><span class="o">.</span><span class="n">suspended_status</span> <span class="o">=</span> <span class="n">SUSPENDED_STATUS</span><span class="o">.</span><span class="n">setdefault</span><span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="n">SuspendedStatus</span><span class="p">())</span> + + <span class="k">def</span> <span class="nf">initialize</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">try</span><span class="p">:</span> + <span class="bp">self</span><span class="o">.</span><span class="n">engine</span><span class="o">.</span><span class="n">init</span><span class="p">(</span><span class="n">get_engine_from_settings</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">engine_name</span><span class="p">))</span> + <span class="k">except</span> <span class="n">SearxEngineResponseException</span> <span class="k">as</span> <span class="n">exc</span><span class="p">:</span> + <span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s1">'Fail to initialize // </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="n">exc</span><span class="p">)</span> + <span class="k">except</span> <span class="ne">Exception</span><span class="p">:</span> <span class="c1"># pylint: disable=broad-except</span> + <span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">exception</span><span class="p">(</span><span class="s1">'Fail to initialize'</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'Initialized'</span><span class="p">)</span> + + <span class="nd">@property</span> + <span class="k">def</span> <span class="nf">has_initialize_function</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">engine</span><span class="p">,</span> <span class="s1">'init'</span><span class="p">)</span> + + <span class="k">def</span> <span class="nf">handle_exception</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">result_container</span><span class="p">,</span> <span class="n">exception_or_message</span><span class="p">,</span> <span class="n">suspend</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span> + <span class="c1"># update result_container</span> + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">exception_or_message</span><span class="p">,</span> <span class="ne">BaseException</span><span class="p">):</span> + <span class="n">exception_class</span> <span class="o">=</span> <span class="n">exception_or_message</span><span class="o">.</span><span class="vm">__class__</span> + <span class="n">module_name</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">exception_class</span><span class="p">,</span> <span class="s1">'__module__'</span><span class="p">,</span> <span class="s1">'builtins'</span><span class="p">)</span> + <span class="n">module_name</span> <span class="o">=</span> <span class="s1">''</span> <span class="k">if</span> <span class="n">module_name</span> <span class="o">==</span> <span class="s1">'builtins'</span> <span class="k">else</span> <span class="n">module_name</span> <span class="o">+</span> <span class="s1">'.'</span> + <span class="n">error_message</span> <span class="o">=</span> <span class="n">module_name</span> <span class="o">+</span> <span class="n">exception_class</span><span class="o">.</span><span class="vm">__qualname__</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">error_message</span> <span class="o">=</span> <span class="n">exception_or_message</span> + <span class="n">result_container</span><span class="o">.</span><span class="n">add_unresponsive_engine</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">engine_name</span><span class="p">,</span> <span class="n">error_message</span><span class="p">)</span> + <span class="c1"># metrics</span> + <span class="n">counter_inc</span><span class="p">(</span><span class="s1">'engine'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">engine_name</span><span class="p">,</span> <span class="s1">'search'</span><span class="p">,</span> <span class="s1">'count'</span><span class="p">,</span> <span class="s1">'error'</span><span class="p">)</span> + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">exception_or_message</span><span class="p">,</span> <span class="ne">BaseException</span><span class="p">):</span> + <span class="n">count_exception</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">engine_name</span><span class="p">,</span> <span class="n">exception_or_message</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">count_error</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">engine_name</span><span class="p">,</span> <span class="n">exception_or_message</span><span class="p">)</span> + <span class="c1"># suspend the engine ?</span> + <span class="k">if</span> <span class="n">suspend</span><span class="p">:</span> + <span class="n">suspended_time</span> <span class="o">=</span> <span class="kc">None</span> + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">exception_or_message</span><span class="p">,</span> <span class="n">SearxEngineAccessDeniedException</span><span class="p">):</span> + <span class="n">suspended_time</span> <span class="o">=</span> <span class="n">exception_or_message</span><span class="o">.</span><span class="n">suspended_time</span> + <span class="bp">self</span><span class="o">.</span><span class="n">suspended_status</span><span class="o">.</span><span class="n">suspend</span><span class="p">(</span><span class="n">suspended_time</span><span class="p">,</span> <span class="n">error_message</span><span class="p">)</span> <span class="c1"># pylint: disable=no-member</span> + + <span class="k">def</span> <span class="nf">_extend_container_basic</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">result_container</span><span class="p">,</span> <span class="n">start_time</span><span class="p">,</span> <span class="n">search_results</span><span class="p">):</span> + <span class="c1"># update result_container</span> + <span class="n">result_container</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">engine_name</span><span class="p">,</span> <span class="n">search_results</span><span class="p">)</span> + <span class="n">engine_time</span> <span class="o">=</span> <span class="n">default_timer</span><span class="p">()</span> <span class="o">-</span> <span class="n">start_time</span> + <span class="n">page_load_time</span> <span class="o">=</span> <span class="n">get_time_for_thread</span><span class="p">()</span> + <span class="n">result_container</span><span class="o">.</span><span class="n">add_timing</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">engine_name</span><span class="p">,</span> <span class="n">engine_time</span><span class="p">,</span> <span class="n">page_load_time</span><span class="p">)</span> + <span class="c1"># metrics</span> + <span class="n">counter_inc</span><span class="p">(</span><span class="s1">'engine'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">engine_name</span><span class="p">,</span> <span class="s1">'search'</span><span class="p">,</span> <span class="s1">'count'</span><span class="p">,</span> <span class="s1">'successful'</span><span class="p">)</span> + <span class="n">histogram_observe</span><span class="p">(</span><span class="n">engine_time</span><span class="p">,</span> <span class="s1">'engine'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">engine_name</span><span class="p">,</span> <span class="s1">'time'</span><span class="p">,</span> <span class="s1">'total'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">page_load_time</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">histogram_observe</span><span class="p">(</span><span class="n">page_load_time</span><span class="p">,</span> <span class="s1">'engine'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">engine_name</span><span class="p">,</span> <span class="s1">'time'</span><span class="p">,</span> <span class="s1">'http'</span><span class="p">)</span> + + <span class="k">def</span> <span class="nf">extend_container</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">result_container</span><span class="p">,</span> <span class="n">start_time</span><span class="p">,</span> <span class="n">search_results</span><span class="p">):</span> + <span class="k">if</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">threading</span><span class="o">.</span><span class="n">current_thread</span><span class="p">(),</span> <span class="s1">'_timeout'</span><span class="p">,</span> <span class="kc">False</span><span class="p">):</span> + <span class="c1"># the main thread is not waiting anymore</span> + <span class="bp">self</span><span class="o">.</span><span class="n">handle_exception</span><span class="p">(</span><span class="n">result_container</span><span class="p">,</span> <span class="s1">'timeout'</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="c1"># check if the engine accepted the request</span> + <span class="k">if</span> <span class="n">search_results</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="bp">self</span><span class="o">.</span><span class="n">_extend_container_basic</span><span class="p">(</span><span class="n">result_container</span><span class="p">,</span> <span class="n">start_time</span><span class="p">,</span> <span class="n">search_results</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">suspended_status</span><span class="o">.</span><span class="n">resume</span><span class="p">()</span> + + <span class="k">def</span> <span class="nf">extend_container_if_suspended</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">result_container</span><span class="p">):</span> + <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">suspended_status</span><span class="o">.</span><span class="n">is_suspended</span><span class="p">:</span> + <span class="n">result_container</span><span class="o">.</span><span class="n">add_unresponsive_engine</span><span class="p">(</span> + <span class="bp">self</span><span class="o">.</span><span class="n">engine_name</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">suspended_status</span><span class="o">.</span><span class="n">suspend_reason</span><span class="p">,</span> <span class="n">suspended</span><span class="o">=</span><span class="kc">True</span> + <span class="p">)</span> + <span class="k">return</span> <span class="kc">True</span> + <span class="k">return</span> <span class="kc">False</span> + +<div class="viewcode-block" id="EngineProcessor.get_params"> +<a class="viewcode-back" href="../../../../src/searx.search.processors.html#searx.search.processors.abstract.EngineProcessor.get_params">[docs]</a> + <span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">search_query</span><span class="p">,</span> <span class="n">engine_category</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Returns a set of (see :ref:`request params <engine request arguments>`) or</span> +<span class="sd"> ``None`` if request is not supported.</span> + +<span class="sd"> Not supported conditions (``None`` is returned):</span> + +<span class="sd"> - A page-number > 1 when engine does not support paging.</span> +<span class="sd"> - A time range when the engine does not support time range.</span> +<span class="sd"> """</span> + <span class="c1"># if paging is not supported, skip</span> + <span class="k">if</span> <span class="n">search_query</span><span class="o">.</span><span class="n">pageno</span> <span class="o">></span> <span class="mi">1</span> <span class="ow">and</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">engine</span><span class="o">.</span><span class="n">paging</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">None</span> + + <span class="c1"># if max page is reached, skip</span> + <span class="n">max_page</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">engine</span><span class="o">.</span><span class="n">max_page</span> <span class="ow">or</span> <span class="n">settings</span><span class="p">[</span><span class="s1">'search'</span><span class="p">][</span><span class="s1">'max_page'</span><span class="p">]</span> + <span class="k">if</span> <span class="n">max_page</span> <span class="ow">and</span> <span class="n">max_page</span> <span class="o"><</span> <span class="n">search_query</span><span class="o">.</span><span class="n">pageno</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">None</span> + + <span class="c1"># if time_range is not supported, skip</span> + <span class="k">if</span> <span class="n">search_query</span><span class="o">.</span><span class="n">time_range</span> <span class="ow">and</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">engine</span><span class="o">.</span><span class="n">time_range_support</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">None</span> + + <span class="n">params</span> <span class="o">=</span> <span class="p">{}</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'category'</span><span class="p">]</span> <span class="o">=</span> <span class="n">engine_category</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">=</span> <span class="n">search_query</span><span class="o">.</span><span class="n">pageno</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">]</span> <span class="o">=</span> <span class="n">search_query</span><span class="o">.</span><span class="n">safesearch</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]</span> <span class="o">=</span> <span class="n">search_query</span><span class="o">.</span><span class="n">time_range</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'engine_data'</span><span class="p">]</span> <span class="o">=</span> <span class="n">search_query</span><span class="o">.</span><span class="n">engine_data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">engine_name</span><span class="p">,</span> <span class="p">{})</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">]</span> <span class="o">=</span> <span class="n">search_query</span><span class="o">.</span><span class="n">lang</span> + + <span class="c1"># deprecated / vintage --> use params['searxng_locale']</span> + <span class="c1">#</span> + <span class="c1"># Conditions related to engine's traits are implemented in engine.traits</span> + <span class="c1"># module. Don't do 'locale' decisions here in the abstract layer of the</span> + <span class="c1"># search processor, just pass the value from user's choice unchanged to</span> + <span class="c1"># the engine request.</span> + + <span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">engine</span><span class="p">,</span> <span class="s1">'language'</span><span class="p">)</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">engine</span><span class="o">.</span><span class="n">language</span><span class="p">:</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'language'</span><span class="p">]</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">engine</span><span class="o">.</span><span class="n">language</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'language'</span><span class="p">]</span> <span class="o">=</span> <span class="n">search_query</span><span class="o">.</span><span class="n">lang</span> + + <span class="k">return</span> <span class="n">params</span></div> + + + <span class="nd">@abstractmethod</span> + <span class="k">def</span> <span class="nf">search</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">,</span> <span class="n">result_container</span><span class="p">,</span> <span class="n">start_time</span><span class="p">,</span> <span class="n">timeout_limit</span><span class="p">):</span> + <span class="k">pass</span> + + <span class="k">def</span> <span class="nf">get_tests</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="n">tests</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">engine</span><span class="p">,</span> <span class="s1">'tests'</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> + <span class="k">if</span> <span class="n">tests</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">tests</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">engine</span><span class="p">,</span> <span class="s1">'additional_tests'</span><span class="p">,</span> <span class="p">{})</span> + <span class="n">tests</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">get_default_tests</span><span class="p">())</span> + <span class="k">return</span> <span class="n">tests</span> + + <span class="k">def</span> <span class="nf">get_default_tests</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="p">{}</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../../index.html"> + <img class="logo" src="../../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../../index.html">Overview</a> + <ul> + <li><a href="../../../index.html">Module code</a> + <ul> + <li><a href="../../search.html">searx.search</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/search/processors/offline.html b/_modules/searx/search/processors/offline.html new file mode 100644 index 000000000..15aad05a7 --- /dev/null +++ b/_modules/searx/search/processors/offline.html @@ -0,0 +1,137 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.search.processors.offline — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../../genindex.html" /> + <link rel="search" title="Search" href="../../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../../search.html" accesskey="U">searx.search</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.search.processors.offline</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.search.processors.offline</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""Processors for engine-type: ``offline``</span> + +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">.abstract</span> <span class="kn">import</span> <span class="n">EngineProcessor</span> + + +<div class="viewcode-block" id="OfflineProcessor"> +<a class="viewcode-back" href="../../../../src/searx.search.processors.html#searx.search.processors.offline.OfflineProcessor">[docs]</a> +<span class="k">class</span> <span class="nc">OfflineProcessor</span><span class="p">(</span><span class="n">EngineProcessor</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Processor class used by ``offline`` engines"""</span> + + <span class="n">engine_type</span> <span class="o">=</span> <span class="s1">'offline'</span> + + <span class="k">def</span> <span class="nf">_search_basic</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">engine</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">)</span> + + <span class="k">def</span> <span class="nf">search</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">,</span> <span class="n">result_container</span><span class="p">,</span> <span class="n">start_time</span><span class="p">,</span> <span class="n">timeout_limit</span><span class="p">):</span> + <span class="k">try</span><span class="p">:</span> + <span class="n">search_results</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_search_basic</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">extend_container</span><span class="p">(</span><span class="n">result_container</span><span class="p">,</span> <span class="n">start_time</span><span class="p">,</span> <span class="n">search_results</span><span class="p">)</span> + <span class="k">except</span> <span class="ne">ValueError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> + <span class="c1"># do not record the error</span> + <span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">exception</span><span class="p">(</span><span class="s1">'engine </span><span class="si">{0}</span><span class="s1"> : invalid input : </span><span class="si">{1}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">engine_name</span><span class="p">,</span> <span class="n">e</span><span class="p">))</span> + <span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> <span class="c1"># pylint: disable=broad-except</span> + <span class="bp">self</span><span class="o">.</span><span class="n">handle_exception</span><span class="p">(</span><span class="n">result_container</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">exception</span><span class="p">(</span><span class="s1">'engine </span><span class="si">{0}</span><span class="s1"> : exception : </span><span class="si">{1}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">engine_name</span><span class="p">,</span> <span class="n">e</span><span class="p">))</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../../index.html"> + <img class="logo" src="../../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../../index.html">Overview</a> + <ul> + <li><a href="../../../index.html">Module code</a> + <ul> + <li><a href="../../search.html">searx.search</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/search/processors/online.html b/_modules/searx/search/processors/online.html new file mode 100644 index 000000000..4dffedd52 --- /dev/null +++ b/_modules/searx/search/processors/online.html @@ -0,0 +1,350 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.search.processors.online — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../../genindex.html" /> + <link rel="search" title="Search" href="../../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../../search.html" accesskey="U">searx.search</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.search.processors.online</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.search.processors.online</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""Processors for engine-type: ``online``</span> + +<span class="sd">"""</span> +<span class="c1"># pylint: disable=use-dict-literal</span> + +<span class="kn">from</span> <span class="nn">timeit</span> <span class="kn">import</span> <span class="n">default_timer</span> +<span class="kn">import</span> <span class="nn">asyncio</span> +<span class="kn">import</span> <span class="nn">ssl</span> +<span class="kn">import</span> <span class="nn">httpx</span> + +<span class="kn">import</span> <span class="nn">searx.network</span> +<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">gen_useragent</span> +<span class="kn">from</span> <span class="nn">searx.exceptions</span> <span class="kn">import</span> <span class="p">(</span> + <span class="n">SearxEngineAccessDeniedException</span><span class="p">,</span> + <span class="n">SearxEngineCaptchaException</span><span class="p">,</span> + <span class="n">SearxEngineTooManyRequestsException</span><span class="p">,</span> +<span class="p">)</span> +<span class="kn">from</span> <span class="nn">searx.metrics.error_recorder</span> <span class="kn">import</span> <span class="n">count_error</span> +<span class="kn">from</span> <span class="nn">.abstract</span> <span class="kn">import</span> <span class="n">EngineProcessor</span> + + +<div class="viewcode-block" id="default_request_params"> +<a class="viewcode-back" href="../../../../src/searx.search.processors.html#searx.search.processors.online.default_request_params">[docs]</a> +<span class="k">def</span> <span class="nf">default_request_params</span><span class="p">():</span> +<span class="w"> </span><span class="sd">"""Default request parameters for ``online`` engines."""</span> + <span class="k">return</span> <span class="p">{</span> + <span class="c1"># fmt: off</span> + <span class="s1">'method'</span><span class="p">:</span> <span class="s1">'GET'</span><span class="p">,</span> + <span class="s1">'headers'</span><span class="p">:</span> <span class="p">{},</span> + <span class="s1">'data'</span><span class="p">:</span> <span class="p">{},</span> + <span class="s1">'url'</span><span class="p">:</span> <span class="s1">''</span><span class="p">,</span> + <span class="s1">'cookies'</span><span class="p">:</span> <span class="p">{},</span> + <span class="s1">'auth'</span><span class="p">:</span> <span class="kc">None</span> + <span class="c1"># fmt: on</span> + <span class="p">}</span></div> + + + +<div class="viewcode-block" id="OnlineProcessor"> +<a class="viewcode-back" href="../../../../src/searx.search.processors.html#searx.search.processors.online.OnlineProcessor">[docs]</a> +<span class="k">class</span> <span class="nc">OnlineProcessor</span><span class="p">(</span><span class="n">EngineProcessor</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Processor class for ``online`` engines."""</span> + + <span class="n">engine_type</span> <span class="o">=</span> <span class="s1">'online'</span> + + <span class="k">def</span> <span class="nf">initialize</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="c1"># set timeout for all HTTP requests</span> + <span class="n">searx</span><span class="o">.</span><span class="n">network</span><span class="o">.</span><span class="n">set_timeout_for_thread</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">engine</span><span class="o">.</span><span class="n">timeout</span><span class="p">,</span> <span class="n">start_time</span><span class="o">=</span><span class="n">default_timer</span><span class="p">())</span> + <span class="c1"># reset the HTTP total time</span> + <span class="n">searx</span><span class="o">.</span><span class="n">network</span><span class="o">.</span><span class="n">reset_time_for_thread</span><span class="p">()</span> + <span class="c1"># set the network</span> + <span class="n">searx</span><span class="o">.</span><span class="n">network</span><span class="o">.</span><span class="n">set_context_network_name</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">engine_name</span><span class="p">)</span> + <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">initialize</span><span class="p">()</span> + +<div class="viewcode-block" id="OnlineProcessor.get_params"> +<a class="viewcode-back" href="../../../../src/searx.search.processors.html#searx.search.processors.online.OnlineProcessor.get_params">[docs]</a> + <span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">search_query</span><span class="p">,</span> <span class="n">engine_category</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Returns a set of :ref:`request params <engine request online>` or ``None``</span> +<span class="sd"> if request is not supported.</span> +<span class="sd"> """</span> + <span class="n">params</span> <span class="o">=</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">get_params</span><span class="p">(</span><span class="n">search_query</span><span class="p">,</span> <span class="n">engine_category</span><span class="p">)</span> + <span class="k">if</span> <span class="n">params</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">None</span> + + <span class="c1"># add default params</span> + <span class="n">params</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">default_request_params</span><span class="p">())</span> + + <span class="c1"># add an user agent</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">][</span><span class="s1">'User-Agent'</span><span class="p">]</span> <span class="o">=</span> <span class="n">gen_useragent</span><span class="p">()</span> + + <span class="c1"># add Accept-Language header</span> + <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">engine</span><span class="o">.</span><span class="n">send_accept_language_header</span> <span class="ow">and</span> <span class="n">search_query</span><span class="o">.</span><span class="n">locale</span><span class="p">:</span> + <span class="n">ac_lang</span> <span class="o">=</span> <span class="n">search_query</span><span class="o">.</span><span class="n">locale</span><span class="o">.</span><span class="n">language</span> + <span class="k">if</span> <span class="n">search_query</span><span class="o">.</span><span class="n">locale</span><span class="o">.</span><span class="n">territory</span><span class="p">:</span> + <span class="n">ac_lang</span> <span class="o">=</span> <span class="s2">"</span><span class="si">%s</span><span class="s2">-</span><span class="si">%s</span><span class="s2">,</span><span class="si">%s</span><span class="s2">;q=0.9,*;q=0.5"</span> <span class="o">%</span> <span class="p">(</span> + <span class="n">search_query</span><span class="o">.</span><span class="n">locale</span><span class="o">.</span><span class="n">language</span><span class="p">,</span> + <span class="n">search_query</span><span class="o">.</span><span class="n">locale</span><span class="o">.</span><span class="n">territory</span><span class="p">,</span> + <span class="n">search_query</span><span class="o">.</span><span class="n">locale</span><span class="o">.</span><span class="n">language</span><span class="p">,</span> + <span class="p">)</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">][</span><span class="s1">'Accept-Language'</span><span class="p">]</span> <span class="o">=</span> <span class="n">ac_lang</span> + + <span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'HTTP Accept-Language: </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Accept-Language'</span><span class="p">,</span> <span class="s1">''</span><span class="p">))</span> + <span class="k">return</span> <span class="n">params</span></div> + + + <span class="k">def</span> <span class="nf">_send_http_request</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> + <span class="c1"># create dictionary which contain all</span> + <span class="c1"># information about the request</span> + <span class="n">request_args</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">(</span><span class="n">headers</span><span class="o">=</span><span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">],</span> <span class="n">cookies</span><span class="o">=</span><span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">],</span> <span class="n">auth</span><span class="o">=</span><span class="n">params</span><span class="p">[</span><span class="s1">'auth'</span><span class="p">])</span> + + <span class="c1"># verify</span> + <span class="c1"># if not None, it overrides the verify value defined in the network.</span> + <span class="c1"># use False to accept any server certificate</span> + <span class="c1"># use a path to file to specify a server certificate</span> + <span class="n">verify</span> <span class="o">=</span> <span class="n">params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'verify'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">verify</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">request_args</span><span class="p">[</span><span class="s1">'verify'</span><span class="p">]</span> <span class="o">=</span> <span class="n">params</span><span class="p">[</span><span class="s1">'verify'</span><span class="p">]</span> + + <span class="c1"># max_redirects</span> + <span class="n">max_redirects</span> <span class="o">=</span> <span class="n">params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'max_redirects'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">max_redirects</span><span class="p">:</span> + <span class="n">request_args</span><span class="p">[</span><span class="s1">'max_redirects'</span><span class="p">]</span> <span class="o">=</span> <span class="n">max_redirects</span> + + <span class="c1"># allow_redirects</span> + <span class="k">if</span> <span class="s1">'allow_redirects'</span> <span class="ow">in</span> <span class="n">params</span><span class="p">:</span> + <span class="n">request_args</span><span class="p">[</span><span class="s1">'allow_redirects'</span><span class="p">]</span> <span class="o">=</span> <span class="n">params</span><span class="p">[</span><span class="s1">'allow_redirects'</span><span class="p">]</span> + + <span class="c1"># soft_max_redirects</span> + <span class="n">soft_max_redirects</span> <span class="o">=</span> <span class="n">params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'soft_max_redirects'</span><span class="p">,</span> <span class="n">max_redirects</span> <span class="ow">or</span> <span class="mi">0</span><span class="p">)</span> + + <span class="c1"># raise_for_status</span> + <span class="n">request_args</span><span class="p">[</span><span class="s1">'raise_for_httperror'</span><span class="p">]</span> <span class="o">=</span> <span class="n">params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'raise_for_httperror'</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span> + + <span class="c1"># specific type of request (GET or POST)</span> + <span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'method'</span><span class="p">]</span> <span class="o">==</span> <span class="s1">'GET'</span><span class="p">:</span> + <span class="n">req</span> <span class="o">=</span> <span class="n">searx</span><span class="o">.</span><span class="n">network</span><span class="o">.</span><span class="n">get</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">req</span> <span class="o">=</span> <span class="n">searx</span><span class="o">.</span><span class="n">network</span><span class="o">.</span><span class="n">post</span> + + <span class="n">request_args</span><span class="p">[</span><span class="s1">'data'</span><span class="p">]</span> <span class="o">=</span> <span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">]</span> + + <span class="c1"># send the request</span> + <span class="n">response</span> <span class="o">=</span> <span class="n">req</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">],</span> <span class="o">**</span><span class="n">request_args</span><span class="p">)</span> + + <span class="c1"># check soft limit of the redirect count</span> + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">response</span><span class="o">.</span><span class="n">history</span><span class="p">)</span> <span class="o">></span> <span class="n">soft_max_redirects</span><span class="p">:</span> + <span class="c1"># unexpected redirect : record an error</span> + <span class="c1"># but the engine might still return valid results.</span> + <span class="n">status_code</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">response</span><span class="o">.</span><span class="n">status_code</span> <span class="ow">or</span> <span class="s1">''</span><span class="p">)</span> + <span class="n">reason</span> <span class="o">=</span> <span class="n">response</span><span class="o">.</span><span class="n">reason_phrase</span> <span class="ow">or</span> <span class="s1">''</span> + <span class="n">hostname</span> <span class="o">=</span> <span class="n">response</span><span class="o">.</span><span class="n">url</span><span class="o">.</span><span class="n">host</span> + <span class="n">count_error</span><span class="p">(</span> + <span class="bp">self</span><span class="o">.</span><span class="n">engine_name</span><span class="p">,</span> + <span class="s1">'</span><span class="si">{}</span><span class="s1"> redirects, maximum: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">response</span><span class="o">.</span><span class="n">history</span><span class="p">),</span> <span class="n">soft_max_redirects</span><span class="p">),</span> + <span class="p">(</span><span class="n">status_code</span><span class="p">,</span> <span class="n">reason</span><span class="p">,</span> <span class="n">hostname</span><span class="p">),</span> + <span class="n">secondary</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> + <span class="p">)</span> + + <span class="k">return</span> <span class="n">response</span> + + <span class="k">def</span> <span class="nf">_search_basic</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span> + <span class="c1"># update request parameters dependent on</span> + <span class="c1"># search-engine (contained in engines folder)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">engine</span><span class="o">.</span><span class="n">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">)</span> + + <span class="c1"># ignoring empty urls</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]:</span> + <span class="k">return</span> <span class="kc">None</span> + + <span class="c1"># send request</span> + <span class="n">response</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_send_http_request</span><span class="p">(</span><span class="n">params</span><span class="p">)</span> + + <span class="c1"># parse the response</span> + <span class="n">response</span><span class="o">.</span><span class="n">search_params</span> <span class="o">=</span> <span class="n">params</span> + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">engine</span><span class="o">.</span><span class="n">response</span><span class="p">(</span><span class="n">response</span><span class="p">)</span> + + <span class="k">def</span> <span class="nf">search</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">,</span> <span class="n">result_container</span><span class="p">,</span> <span class="n">start_time</span><span class="p">,</span> <span class="n">timeout_limit</span><span class="p">):</span> + <span class="c1"># set timeout for all HTTP requests</span> + <span class="n">searx</span><span class="o">.</span><span class="n">network</span><span class="o">.</span><span class="n">set_timeout_for_thread</span><span class="p">(</span><span class="n">timeout_limit</span><span class="p">,</span> <span class="n">start_time</span><span class="o">=</span><span class="n">start_time</span><span class="p">)</span> + <span class="c1"># reset the HTTP total time</span> + <span class="n">searx</span><span class="o">.</span><span class="n">network</span><span class="o">.</span><span class="n">reset_time_for_thread</span><span class="p">()</span> + <span class="c1"># set the network</span> + <span class="n">searx</span><span class="o">.</span><span class="n">network</span><span class="o">.</span><span class="n">set_context_network_name</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">engine_name</span><span class="p">)</span> + + <span class="k">try</span><span class="p">:</span> + <span class="c1"># send requests and parse the results</span> + <span class="n">search_results</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_search_basic</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">extend_container</span><span class="p">(</span><span class="n">result_container</span><span class="p">,</span> <span class="n">start_time</span><span class="p">,</span> <span class="n">search_results</span><span class="p">)</span> + <span class="k">except</span> <span class="n">ssl</span><span class="o">.</span><span class="n">SSLError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> + <span class="c1"># requests timeout (connect or read)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">handle_exception</span><span class="p">(</span><span class="n">result_container</span><span class="p">,</span> <span class="n">e</span><span class="p">,</span> <span class="n">suspend</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s2">"SSLError </span><span class="si">{}</span><span class="s2">, verify=</span><span class="si">{}</span><span class="s2">"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">e</span><span class="p">,</span> <span class="n">searx</span><span class="o">.</span><span class="n">network</span><span class="o">.</span><span class="n">get_network</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">engine_name</span><span class="p">)</span><span class="o">.</span><span class="n">verify</span><span class="p">))</span> + <span class="k">except</span> <span class="p">(</span><span class="n">httpx</span><span class="o">.</span><span class="n">TimeoutException</span><span class="p">,</span> <span class="n">asyncio</span><span class="o">.</span><span class="n">TimeoutError</span><span class="p">)</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> + <span class="c1"># requests timeout (connect or read)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">handle_exception</span><span class="p">(</span><span class="n">result_container</span><span class="p">,</span> <span class="n">e</span><span class="p">,</span> <span class="n">suspend</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span> + <span class="s2">"HTTP requests timeout (search duration : </span><span class="si">{0}</span><span class="s2"> s, timeout: </span><span class="si">{1}</span><span class="s2"> s) : </span><span class="si">{2}</span><span class="s2">"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> + <span class="n">default_timer</span><span class="p">()</span> <span class="o">-</span> <span class="n">start_time</span><span class="p">,</span> <span class="n">timeout_limit</span><span class="p">,</span> <span class="n">e</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span> + <span class="p">)</span> + <span class="p">)</span> + <span class="k">except</span> <span class="p">(</span><span class="n">httpx</span><span class="o">.</span><span class="n">HTTPError</span><span class="p">,</span> <span class="n">httpx</span><span class="o">.</span><span class="n">StreamError</span><span class="p">)</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> + <span class="c1"># other requests exception</span> + <span class="bp">self</span><span class="o">.</span><span class="n">handle_exception</span><span class="p">(</span><span class="n">result_container</span><span class="p">,</span> <span class="n">e</span><span class="p">,</span> <span class="n">suspend</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">exception</span><span class="p">(</span> + <span class="s2">"requests exception (search duration : </span><span class="si">{0}</span><span class="s2"> s, timeout: </span><span class="si">{1}</span><span class="s2"> s) : </span><span class="si">{2}</span><span class="s2">"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> + <span class="n">default_timer</span><span class="p">()</span> <span class="o">-</span> <span class="n">start_time</span><span class="p">,</span> <span class="n">timeout_limit</span><span class="p">,</span> <span class="n">e</span> + <span class="p">)</span> + <span class="p">)</span> + <span class="k">except</span> <span class="n">SearxEngineCaptchaException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> + <span class="bp">self</span><span class="o">.</span><span class="n">handle_exception</span><span class="p">(</span><span class="n">result_container</span><span class="p">,</span> <span class="n">e</span><span class="p">,</span> <span class="n">suspend</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">exception</span><span class="p">(</span><span class="s1">'CAPTCHA'</span><span class="p">)</span> + <span class="k">except</span> <span class="n">SearxEngineTooManyRequestsException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> + <span class="bp">self</span><span class="o">.</span><span class="n">handle_exception</span><span class="p">(</span><span class="n">result_container</span><span class="p">,</span> <span class="n">e</span><span class="p">,</span> <span class="n">suspend</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">exception</span><span class="p">(</span><span class="s1">'Too many requests'</span><span class="p">)</span> + <span class="k">except</span> <span class="n">SearxEngineAccessDeniedException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> + <span class="bp">self</span><span class="o">.</span><span class="n">handle_exception</span><span class="p">(</span><span class="n">result_container</span><span class="p">,</span> <span class="n">e</span><span class="p">,</span> <span class="n">suspend</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">exception</span><span class="p">(</span><span class="s1">'SearXNG is blocked'</span><span class="p">)</span> + <span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> <span class="c1"># pylint: disable=broad-except</span> + <span class="bp">self</span><span class="o">.</span><span class="n">handle_exception</span><span class="p">(</span><span class="n">result_container</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">exception</span><span class="p">(</span><span class="s1">'exception : </span><span class="si">{0}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">e</span><span class="p">))</span> + + <span class="k">def</span> <span class="nf">get_default_tests</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="n">tests</span> <span class="o">=</span> <span class="p">{}</span> + + <span class="n">tests</span><span class="p">[</span><span class="s1">'simple'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'matrix'</span><span class="p">:</span> <span class="p">{</span><span class="s1">'query'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'life'</span><span class="p">,</span> <span class="s1">'computer'</span><span class="p">)},</span> + <span class="s1">'result_container'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'not_empty'</span><span class="p">],</span> + <span class="p">}</span> + + <span class="k">if</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">engine</span><span class="p">,</span> <span class="s1">'paging'</span><span class="p">,</span> <span class="kc">False</span><span class="p">):</span> + <span class="n">tests</span><span class="p">[</span><span class="s1">'paging'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'matrix'</span><span class="p">:</span> <span class="p">{</span><span class="s1">'query'</span><span class="p">:</span> <span class="s1">'time'</span><span class="p">,</span> <span class="s1">'pageno'</span><span class="p">:</span> <span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">)},</span> + <span class="s1">'result_container'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'not_empty'</span><span class="p">],</span> + <span class="s1">'test'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'unique_results'</span><span class="p">],</span> + <span class="p">}</span> + <span class="k">if</span> <span class="s1">'general'</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">engine</span><span class="o">.</span><span class="n">categories</span><span class="p">:</span> + <span class="c1"># avoid documentation about HTML tags (<time> and <input type="time">)</span> + <span class="n">tests</span><span class="p">[</span><span class="s1">'paging'</span><span class="p">][</span><span class="s1">'matrix'</span><span class="p">][</span><span class="s1">'query'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'news'</span> + + <span class="k">if</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">engine</span><span class="p">,</span> <span class="s1">'time_range'</span><span class="p">,</span> <span class="kc">False</span><span class="p">):</span> + <span class="n">tests</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'matrix'</span><span class="p">:</span> <span class="p">{</span><span class="s1">'query'</span><span class="p">:</span> <span class="s1">'news'</span><span class="p">,</span> <span class="s1">'time_range'</span><span class="p">:</span> <span class="p">(</span><span class="kc">None</span><span class="p">,</span> <span class="s1">'day'</span><span class="p">)},</span> + <span class="s1">'result_container'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'not_empty'</span><span class="p">],</span> + <span class="s1">'test'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'unique_results'</span><span class="p">],</span> + <span class="p">}</span> + + <span class="k">if</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">engine</span><span class="p">,</span> <span class="s1">'traits'</span><span class="p">,</span> <span class="kc">False</span><span class="p">):</span> + <span class="n">tests</span><span class="p">[</span><span class="s1">'lang_fr'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'matrix'</span><span class="p">:</span> <span class="p">{</span><span class="s1">'query'</span><span class="p">:</span> <span class="s1">'paris'</span><span class="p">,</span> <span class="s1">'lang'</span><span class="p">:</span> <span class="s1">'fr'</span><span class="p">},</span> + <span class="s1">'result_container'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'not_empty'</span><span class="p">,</span> <span class="p">(</span><span class="s1">'has_language'</span><span class="p">,</span> <span class="s1">'fr'</span><span class="p">)],</span> + <span class="p">}</span> + <span class="n">tests</span><span class="p">[</span><span class="s1">'lang_en'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'matrix'</span><span class="p">:</span> <span class="p">{</span><span class="s1">'query'</span><span class="p">:</span> <span class="s1">'paris'</span><span class="p">,</span> <span class="s1">'lang'</span><span class="p">:</span> <span class="s1">'en'</span><span class="p">},</span> + <span class="s1">'result_container'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'not_empty'</span><span class="p">,</span> <span class="p">(</span><span class="s1">'has_language'</span><span class="p">,</span> <span class="s1">'en'</span><span class="p">)],</span> + <span class="p">}</span> + + <span class="k">if</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">engine</span><span class="p">,</span> <span class="s1">'safesearch'</span><span class="p">,</span> <span class="kc">False</span><span class="p">):</span> + <span class="n">tests</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'matrix'</span><span class="p">:</span> <span class="p">{</span><span class="s1">'query'</span><span class="p">:</span> <span class="s1">'porn'</span><span class="p">,</span> <span class="s1">'safesearch'</span><span class="p">:</span> <span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">)},</span> <span class="s1">'test'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'unique_results'</span><span class="p">]}</span> + + <span class="k">return</span> <span class="n">tests</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../../index.html"> + <img class="logo" src="../../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../../index.html">Overview</a> + <ul> + <li><a href="../../../index.html">Module code</a> + <ul> + <li><a href="../../search.html">searx.search</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/search/processors/online_currency.html b/_modules/searx/search/processors/online_currency.html new file mode 100644 index 000000000..7f8f007d1 --- /dev/null +++ b/_modules/searx/search/processors/online_currency.html @@ -0,0 +1,187 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.search.processors.online_currency — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../../genindex.html" /> + <link rel="search" title="Search" href="../../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../../search.html" accesskey="U">searx.search</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.search.processors.online_currency</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.search.processors.online_currency</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""Processors for engine-type: ``online_currency``</span> + +<span class="sd">"""</span> + +<span class="kn">import</span> <span class="nn">unicodedata</span> +<span class="kn">import</span> <span class="nn">re</span> + +<span class="kn">from</span> <span class="nn">searx.data</span> <span class="kn">import</span> <span class="n">CURRENCIES</span> +<span class="kn">from</span> <span class="nn">.online</span> <span class="kn">import</span> <span class="n">OnlineProcessor</span> + +<span class="n">parser_re</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s1">'.*?(</span><span class="se">\\</span><span class="s1">d+(?:</span><span class="se">\\</span><span class="s1">.</span><span class="se">\\</span><span class="s1">d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)'</span><span class="p">,</span> <span class="n">re</span><span class="o">.</span><span class="n">I</span><span class="p">)</span> + + +<span class="k">def</span> <span class="nf">normalize_name</span><span class="p">(</span><span class="n">name</span><span class="p">):</span> + <span class="n">name</span> <span class="o">=</span> <span class="n">name</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'-'</span><span class="p">,</span> <span class="s1">' '</span><span class="p">)</span><span class="o">.</span><span class="n">rstrip</span><span class="p">(</span><span class="s1">'s'</span><span class="p">)</span> + <span class="n">name</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="s1">' +'</span><span class="p">,</span> <span class="s1">' '</span><span class="p">,</span> <span class="n">name</span><span class="p">)</span> + <span class="k">return</span> <span class="n">unicodedata</span><span class="o">.</span><span class="n">normalize</span><span class="p">(</span><span class="s1">'NFKD'</span><span class="p">,</span> <span class="n">name</span><span class="p">)</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> + + +<span class="k">def</span> <span class="nf">name_to_iso4217</span><span class="p">(</span><span class="n">name</span><span class="p">):</span> + <span class="n">name</span> <span class="o">=</span> <span class="n">normalize_name</span><span class="p">(</span><span class="n">name</span><span class="p">)</span> + <span class="n">currency</span> <span class="o">=</span> <span class="n">CURRENCIES</span><span class="p">[</span><span class="s1">'names'</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="p">[</span><span class="n">name</span><span class="p">])</span> + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">currency</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span> + <span class="k">return</span> <span class="n">currency</span> + <span class="k">return</span> <span class="n">currency</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> + + +<span class="k">def</span> <span class="nf">iso4217_to_name</span><span class="p">(</span><span class="n">iso4217</span><span class="p">,</span> <span class="n">language</span><span class="p">):</span> + <span class="k">return</span> <span class="n">CURRENCIES</span><span class="p">[</span><span class="s1">'iso4217'</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">iso4217</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">language</span><span class="p">,</span> <span class="n">iso4217</span><span class="p">)</span> + + +<div class="viewcode-block" id="OnlineCurrencyProcessor"> +<a class="viewcode-back" href="../../../../src/searx.search.processors.html#searx.search.processors.online_currency.OnlineCurrencyProcessor">[docs]</a> +<span class="k">class</span> <span class="nc">OnlineCurrencyProcessor</span><span class="p">(</span><span class="n">OnlineProcessor</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Processor class used by ``online_currency`` engines."""</span> + + <span class="n">engine_type</span> <span class="o">=</span> <span class="s1">'online_currency'</span> + +<div class="viewcode-block" id="OnlineCurrencyProcessor.get_params"> +<a class="viewcode-back" href="../../../../src/searx.search.processors.html#searx.search.processors.online_currency.OnlineCurrencyProcessor.get_params">[docs]</a> + <span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">search_query</span><span class="p">,</span> <span class="n">engine_category</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Returns a set of :ref:`request params <engine request online_currency>`</span> +<span class="sd"> or ``None`` if search query does not match to :py:obj:`parser_re`."""</span> + + <span class="n">params</span> <span class="o">=</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">get_params</span><span class="p">(</span><span class="n">search_query</span><span class="p">,</span> <span class="n">engine_category</span><span class="p">)</span> + <span class="k">if</span> <span class="n">params</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">None</span> + + <span class="n">m</span> <span class="o">=</span> <span class="n">parser_re</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="n">search_query</span><span class="o">.</span><span class="n">query</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">m</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">None</span> + + <span class="n">amount_str</span><span class="p">,</span> <span class="n">from_currency</span><span class="p">,</span> <span class="n">to_currency</span> <span class="o">=</span> <span class="n">m</span><span class="o">.</span><span class="n">groups</span><span class="p">()</span> + <span class="k">try</span><span class="p">:</span> + <span class="n">amount</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">amount_str</span><span class="p">)</span> + <span class="k">except</span> <span class="ne">ValueError</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">None</span> + <span class="n">from_currency</span> <span class="o">=</span> <span class="n">name_to_iso4217</span><span class="p">(</span><span class="n">from_currency</span><span class="o">.</span><span class="n">strip</span><span class="p">())</span> + <span class="n">to_currency</span> <span class="o">=</span> <span class="n">name_to_iso4217</span><span class="p">(</span><span class="n">to_currency</span><span class="o">.</span><span class="n">strip</span><span class="p">())</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'amount'</span><span class="p">]</span> <span class="o">=</span> <span class="n">amount</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'from'</span><span class="p">]</span> <span class="o">=</span> <span class="n">from_currency</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'to'</span><span class="p">]</span> <span class="o">=</span> <span class="n">to_currency</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'from_name'</span><span class="p">]</span> <span class="o">=</span> <span class="n">iso4217_to_name</span><span class="p">(</span><span class="n">from_currency</span><span class="p">,</span> <span class="s1">'en'</span><span class="p">)</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'to_name'</span><span class="p">]</span> <span class="o">=</span> <span class="n">iso4217_to_name</span><span class="p">(</span><span class="n">to_currency</span><span class="p">,</span> <span class="s1">'en'</span><span class="p">)</span> + <span class="k">return</span> <span class="n">params</span></div> + + + <span class="k">def</span> <span class="nf">get_default_tests</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="n">tests</span> <span class="o">=</span> <span class="p">{}</span> + + <span class="n">tests</span><span class="p">[</span><span class="s1">'currency'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'matrix'</span><span class="p">:</span> <span class="p">{</span><span class="s1">'query'</span><span class="p">:</span> <span class="s1">'1337 usd in rmb'</span><span class="p">},</span> + <span class="s1">'result_container'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'has_answer'</span><span class="p">],</span> + <span class="p">}</span> + + <span class="k">return</span> <span class="n">tests</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../../index.html"> + <img class="logo" src="../../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../../index.html">Overview</a> + <ul> + <li><a href="../../../index.html">Module code</a> + <ul> + <li><a href="../../search.html">searx.search</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/search/processors/online_dictionary.html b/_modules/searx/search/processors/online_dictionary.html new file mode 100644 index 000000000..8430d4b31 --- /dev/null +++ b/_modules/searx/search/processors/online_dictionary.html @@ -0,0 +1,174 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.search.processors.online_dictionary — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../../genindex.html" /> + <link rel="search" title="Search" href="../../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../../search.html" accesskey="U">searx.search</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.search.processors.online_dictionary</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.search.processors.online_dictionary</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""Processors for engine-type: ``online_dictionary``</span> + +<span class="sd">"""</span> + +<span class="kn">import</span> <span class="nn">re</span> + +<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">is_valid_lang</span> +<span class="kn">from</span> <span class="nn">.online</span> <span class="kn">import</span> <span class="n">OnlineProcessor</span> + +<span class="n">parser_re</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s1">'.*?([a-z]+)-([a-z]+) (.+)$'</span><span class="p">,</span> <span class="n">re</span><span class="o">.</span><span class="n">I</span><span class="p">)</span> + + +<div class="viewcode-block" id="OnlineDictionaryProcessor"> +<a class="viewcode-back" href="../../../../src/searx.search.processors.html#searx.search.processors.online_dictionary.OnlineDictionaryProcessor">[docs]</a> +<span class="k">class</span> <span class="nc">OnlineDictionaryProcessor</span><span class="p">(</span><span class="n">OnlineProcessor</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Processor class used by ``online_dictionary`` engines."""</span> + + <span class="n">engine_type</span> <span class="o">=</span> <span class="s1">'online_dictionary'</span> + +<div class="viewcode-block" id="OnlineDictionaryProcessor.get_params"> +<a class="viewcode-back" href="../../../../src/searx.search.processors.html#searx.search.processors.online_dictionary.OnlineDictionaryProcessor.get_params">[docs]</a> + <span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">search_query</span><span class="p">,</span> <span class="n">engine_category</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Returns a set of :ref:`request params <engine request online_dictionary>` or</span> +<span class="sd"> ``None`` if search query does not match to :py:obj:`parser_re`.</span> +<span class="sd"> """</span> + <span class="n">params</span> <span class="o">=</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">get_params</span><span class="p">(</span><span class="n">search_query</span><span class="p">,</span> <span class="n">engine_category</span><span class="p">)</span> + <span class="k">if</span> <span class="n">params</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">None</span> + + <span class="n">m</span> <span class="o">=</span> <span class="n">parser_re</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="n">search_query</span><span class="o">.</span><span class="n">query</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">m</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">None</span> + + <span class="n">from_lang</span><span class="p">,</span> <span class="n">to_lang</span><span class="p">,</span> <span class="n">query</span> <span class="o">=</span> <span class="n">m</span><span class="o">.</span><span class="n">groups</span><span class="p">()</span> + + <span class="n">from_lang</span> <span class="o">=</span> <span class="n">is_valid_lang</span><span class="p">(</span><span class="n">from_lang</span><span class="p">)</span> + <span class="n">to_lang</span> <span class="o">=</span> <span class="n">is_valid_lang</span><span class="p">(</span><span class="n">to_lang</span><span class="p">)</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">from_lang</span> <span class="ow">or</span> <span class="ow">not</span> <span class="n">to_lang</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">None</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'from_lang'</span><span class="p">]</span> <span class="o">=</span> <span class="n">from_lang</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'to_lang'</span><span class="p">]</span> <span class="o">=</span> <span class="n">to_lang</span> + <span class="n">params</span><span class="p">[</span><span class="s1">'query'</span><span class="p">]</span> <span class="o">=</span> <span class="n">query</span> + + <span class="k">return</span> <span class="n">params</span></div> + + + <span class="k">def</span> <span class="nf">get_default_tests</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="n">tests</span> <span class="o">=</span> <span class="p">{}</span> + + <span class="k">if</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">engine</span><span class="p">,</span> <span class="s1">'paging'</span><span class="p">,</span> <span class="kc">False</span><span class="p">):</span> + <span class="n">tests</span><span class="p">[</span><span class="s1">'translation_paging'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'matrix'</span><span class="p">:</span> <span class="p">{</span><span class="s1">'query'</span><span class="p">:</span> <span class="s1">'en-es house'</span><span class="p">,</span> <span class="s1">'pageno'</span><span class="p">:</span> <span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">)},</span> + <span class="s1">'result_container'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'not_empty'</span><span class="p">,</span> <span class="p">(</span><span class="s1">'one_title_contains'</span><span class="p">,</span> <span class="s1">'house'</span><span class="p">)],</span> + <span class="s1">'test'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'unique_results'</span><span class="p">],</span> + <span class="p">}</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">tests</span><span class="p">[</span><span class="s1">'translation'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'matrix'</span><span class="p">:</span> <span class="p">{</span><span class="s1">'query'</span><span class="p">:</span> <span class="s1">'en-es house'</span><span class="p">},</span> + <span class="s1">'result_container'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'not_empty'</span><span class="p">,</span> <span class="p">(</span><span class="s1">'one_title_contains'</span><span class="p">,</span> <span class="s1">'house'</span><span class="p">)],</span> + <span class="p">}</span> + + <span class="k">return</span> <span class="n">tests</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../../index.html"> + <img class="logo" src="../../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../../index.html">Overview</a> + <ul> + <li><a href="../../../index.html">Module code</a> + <ul> + <li><a href="../../search.html">searx.search</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/search/processors/online_url_search.html b/_modules/searx/search/processors/online_url_search.html new file mode 100644 index 000000000..4961f5c68 --- /dev/null +++ b/_modules/searx/search/processors/online_url_search.html @@ -0,0 +1,159 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.search.processors.online_url_search — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../../../_static/searxng.css?v=52e4ff28" /> + <script src="../../../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../../../genindex.html" /> + <link rel="search" title="Search" href="../../../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../../search.html" accesskey="U">searx.search</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.search.processors.online_url_search</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.search.processors.online_url_search</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""Processors for engine-type: ``online_url_search``</span> + +<span class="sd">"""</span> + +<span class="kn">import</span> <span class="nn">re</span> +<span class="kn">from</span> <span class="nn">.online</span> <span class="kn">import</span> <span class="n">OnlineProcessor</span> + +<span class="n">re_search_urls</span> <span class="o">=</span> <span class="p">{</span> + <span class="s1">'http'</span><span class="p">:</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s1">'https?:\/\/[^ ]*'</span><span class="p">),</span> + <span class="s1">'ftp'</span><span class="p">:</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s1">'ftps?:\/\/[^ ]*'</span><span class="p">),</span> + <span class="s1">'data:image'</span><span class="p">:</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s1">'data:image/[^; ]*;base64,[^ ]*'</span><span class="p">),</span> +<span class="p">}</span> + + +<div class="viewcode-block" id="OnlineUrlSearchProcessor"> +<a class="viewcode-back" href="../../../../src/searx.search.processors.html#searx.search.processors.online_url_search.OnlineUrlSearchProcessor">[docs]</a> +<span class="k">class</span> <span class="nc">OnlineUrlSearchProcessor</span><span class="p">(</span><span class="n">OnlineProcessor</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Processor class used by ``online_url_search`` engines."""</span> + + <span class="n">engine_type</span> <span class="o">=</span> <span class="s1">'online_url_search'</span> + +<div class="viewcode-block" id="OnlineUrlSearchProcessor.get_params"> +<a class="viewcode-back" href="../../../../src/searx.search.processors.html#searx.search.processors.online_url_search.OnlineUrlSearchProcessor.get_params">[docs]</a> + <span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">search_query</span><span class="p">,</span> <span class="n">engine_category</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Returns a set of :ref:`request params <engine request online>` or ``None`` if</span> +<span class="sd"> search query does not match to :py:obj:`re_search_urls`.</span> +<span class="sd"> """</span> + + <span class="n">params</span> <span class="o">=</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">get_params</span><span class="p">(</span><span class="n">search_query</span><span class="p">,</span> <span class="n">engine_category</span><span class="p">)</span> + <span class="k">if</span> <span class="n">params</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">None</span> + + <span class="n">url_match</span> <span class="o">=</span> <span class="kc">False</span> + <span class="n">search_urls</span> <span class="o">=</span> <span class="p">{}</span> + + <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">re_search_urls</span><span class="o">.</span><span class="n">items</span><span class="p">():</span> + <span class="n">m</span> <span class="o">=</span> <span class="n">v</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="n">search_query</span><span class="o">.</span><span class="n">query</span><span class="p">)</span> + <span class="n">v</span> <span class="o">=</span> <span class="kc">None</span> + <span class="k">if</span> <span class="n">m</span><span class="p">:</span> + <span class="n">url_match</span> <span class="o">=</span> <span class="kc">True</span> + <span class="n">v</span> <span class="o">=</span> <span class="n">m</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> + <span class="n">search_urls</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> <span class="o">=</span> <span class="n">v</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">url_match</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">None</span> + + <span class="n">params</span><span class="p">[</span><span class="s1">'search_urls'</span><span class="p">]</span> <span class="o">=</span> <span class="n">search_urls</span> + <span class="k">return</span> <span class="n">params</span></div> +</div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../../../index.html"> + <img class="logo" src="../../../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../../../index.html">Overview</a> + <ul> + <li><a href="../../../index.html">Module code</a> + <ul> + <li><a href="../../search.html">searx.search</a> + + + </ul> + </li></ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/settings_loader.html b/_modules/searx/settings_loader.html new file mode 100644 index 000000000..e7e1fb470 --- /dev/null +++ b/_modules/searx/settings_loader.html @@ -0,0 +1,337 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.settings_loader — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../_static/searxng.css?v=52e4ff28" /> + <script src="../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../genindex.html" /> + <link rel="search" title="Search" href="../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../index.html" accesskey="U">Module code</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.settings_loader</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.settings_loader</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""Implementations for loading configurations from YAML files. This essentially</span> +<span class="sd">includes the configuration of the (:ref:`SearXNG appl <searxng settings.yml>`)</span> +<span class="sd">server. The default configuration for the application server is loaded from the</span> +<span class="sd">:origin:`DEFAULT_SETTINGS_FILE <searx/settings.yml>`. This default</span> +<span class="sd">configuration can be completely replaced or :ref:`customized individually</span> +<span class="sd"><use_default_settings.yml>` and the ``SEARXNG_SETTINGS_PATH`` environment</span> +<span class="sd">variable can be used to set the location from which the local customizations are</span> +<span class="sd">to be loaded. The rules used for this can be found in the</span> +<span class="sd">:py:obj:`get_user_cfg_folder` function.</span> + +<span class="sd">- By default, local configurations are expected in folder ``/etc/searxng`` from</span> +<span class="sd"> where applications can load them with the :py:obj:`get_yaml_cfg` function.</span> + +<span class="sd">- By default, customized :ref:`SearXNG appl <searxng settings.yml>` settings are</span> +<span class="sd"> expected in a file named ``settings.yml``.</span> + +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">annotations</span> + +<span class="kn">import</span> <span class="nn">os.path</span> +<span class="kn">from</span> <span class="nn">collections.abc</span> <span class="kn">import</span> <span class="n">Mapping</span> +<span class="kn">from</span> <span class="nn">itertools</span> <span class="kn">import</span> <span class="n">filterfalse</span> +<span class="kn">from</span> <span class="nn">pathlib</span> <span class="kn">import</span> <span class="n">Path</span> + +<span class="kn">import</span> <span class="nn">yaml</span> + +<span class="kn">from</span> <span class="nn">searx.exceptions</span> <span class="kn">import</span> <span class="n">SearxSettingsException</span> + +<span class="n">searx_dir</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">dirname</span><span class="p">(</span><span class="vm">__file__</span><span class="p">))</span> + +<span class="n">SETTINGS_YAML</span> <span class="o">=</span> <span class="n">Path</span><span class="p">(</span><span class="s2">"settings.yml"</span><span class="p">)</span> +<span class="n">DEFAULT_SETTINGS_FILE</span> <span class="o">=</span> <span class="n">Path</span><span class="p">(</span><span class="n">searx_dir</span><span class="p">)</span> <span class="o">/</span> <span class="n">SETTINGS_YAML</span> +<span class="sd">"""The :origin:`searx/settings.yml` file with all the default settings."""</span> + + +<div class="viewcode-block" id="load_yaml"> +<a class="viewcode-back" href="../../src/searx.settings.html#searx.settings_loader.load_yaml">[docs]</a> +<span class="k">def</span> <span class="nf">load_yaml</span><span class="p">(</span><span class="n">file_name</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="n">Path</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Load YAML config from a file."""</span> + <span class="k">try</span><span class="p">:</span> + <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">file_name</span><span class="p">,</span> <span class="s1">'r'</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">'utf-8'</span><span class="p">)</span> <span class="k">as</span> <span class="n">settings_yaml</span><span class="p">:</span> + <span class="k">return</span> <span class="n">yaml</span><span class="o">.</span><span class="n">safe_load</span><span class="p">(</span><span class="n">settings_yaml</span><span class="p">)</span> <span class="ow">or</span> <span class="p">{}</span> + <span class="k">except</span> <span class="ne">IOError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> + <span class="k">raise</span> <span class="n">SearxSettingsException</span><span class="p">(</span><span class="n">e</span><span class="p">,</span> <span class="nb">str</span><span class="p">(</span><span class="n">file_name</span><span class="p">))</span> <span class="kn">from</span> <span class="nn">e</span> + <span class="k">except</span> <span class="n">yaml</span><span class="o">.</span><span class="n">YAMLError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> + <span class="k">raise</span> <span class="n">SearxSettingsException</span><span class="p">(</span><span class="n">e</span><span class="p">,</span> <span class="nb">str</span><span class="p">(</span><span class="n">file_name</span><span class="p">))</span> <span class="kn">from</span> <span class="nn">e</span></div> + + + +<div class="viewcode-block" id="get_yaml_cfg"> +<a class="viewcode-back" href="../../src/searx.settings.html#searx.settings_loader.get_yaml_cfg">[docs]</a> +<span class="k">def</span> <span class="nf">get_yaml_cfg</span><span class="p">(</span><span class="n">file_name</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="n">Path</span><span class="p">)</span> <span class="o">-></span> <span class="nb">dict</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Shortcut to load a YAML config from a file, located in the</span> + +<span class="sd"> - :py:obj:`get_user_cfg_folder` or</span> +<span class="sd"> - in the ``searx`` folder of the SearXNG installation</span> +<span class="sd"> """</span> + + <span class="n">folder</span> <span class="o">=</span> <span class="n">get_user_cfg_folder</span><span class="p">()</span> <span class="ow">or</span> <span class="n">Path</span><span class="p">(</span><span class="n">searx_dir</span><span class="p">)</span> + <span class="n">fname</span> <span class="o">=</span> <span class="n">folder</span> <span class="o">/</span> <span class="n">file_name</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">fname</span><span class="o">.</span><span class="n">is_file</span><span class="p">():</span> + <span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"File </span><span class="si">{</span><span class="n">fname</span><span class="si">}</span><span class="s2"> does not exist!"</span><span class="p">)</span> + + <span class="k">return</span> <span class="n">load_yaml</span><span class="p">(</span><span class="n">fname</span><span class="p">)</span></div> + + + +<div class="viewcode-block" id="get_user_cfg_folder"> +<a class="viewcode-back" href="../../src/searx.settings.html#searx.settings_loader.get_user_cfg_folder">[docs]</a> +<span class="k">def</span> <span class="nf">get_user_cfg_folder</span><span class="p">()</span> <span class="o">-></span> <span class="n">Path</span> <span class="o">|</span> <span class="kc">None</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Returns folder where the local configurations are located.</span> + +<span class="sd"> 1. If the ``SEARXNG_SETTINGS_PATH`` environment is set and points to a</span> +<span class="sd"> folder (e.g. ``/etc/mysxng/``), all local configurations are expected in</span> +<span class="sd"> this folder. The settings of the :ref:`SearXNG appl <searxng</span> +<span class="sd"> settings.yml>` then expected in ``settings.yml``</span> +<span class="sd"> (e.g. ``/etc/mysxng/settings.yml``).</span> + +<span class="sd"> 2. If the ``SEARXNG_SETTINGS_PATH`` environment is set and points to a file</span> +<span class="sd"> (e.g. ``/etc/mysxng/myinstance.yml``), this file contains the settings of</span> +<span class="sd"> the :ref:`SearXNG appl <searxng settings.yml>` and the folder</span> +<span class="sd"> (e.g. ``/etc/mysxng/``) is used for all other configurations.</span> + +<span class="sd"> This type (``SEARXNG_SETTINGS_PATH`` points to a file) is suitable for</span> +<span class="sd"> use cases in which different profiles of the :ref:`SearXNG appl <searxng</span> +<span class="sd"> settings.yml>` are to be managed, such as in test scenarios.</span> + +<span class="sd"> 3. If folder ``/etc/searxng`` exists, it is used.</span> + +<span class="sd"> In case none of the above path exists, ``None`` is returned. In case of</span> +<span class="sd"> environment ``SEARXNG_SETTINGS_PATH`` is set, but the (folder or file) does</span> +<span class="sd"> not exists, a :py:obj:`EnvironmentError` is raised.</span> + +<span class="sd"> """</span> + + <span class="n">folder</span> <span class="o">=</span> <span class="kc">None</span> + <span class="n">settings_path</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"SEARXNG_SETTINGS_PATH"</span><span class="p">)</span> + + <span class="c1"># Disable default /etc/searxng is intended exclusively for internal testing purposes</span> + <span class="c1"># and is therefore not documented!</span> + <span class="n">disable_etc</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'SEARXNG_DISABLE_ETC_SETTINGS'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="ow">in</span> <span class="p">(</span><span class="s1">'1'</span><span class="p">,</span> <span class="s1">'true'</span><span class="p">)</span> + + <span class="k">if</span> <span class="n">settings_path</span><span class="p">:</span> + <span class="c1"># rule 1. and 2.</span> + <span class="n">settings_path</span> <span class="o">=</span> <span class="n">Path</span><span class="p">(</span><span class="n">settings_path</span><span class="p">)</span> + <span class="k">if</span> <span class="n">settings_path</span><span class="o">.</span><span class="n">is_dir</span><span class="p">():</span> + <span class="n">folder</span> <span class="o">=</span> <span class="n">settings_path</span> + <span class="k">elif</span> <span class="n">settings_path</span><span class="o">.</span><span class="n">is_file</span><span class="p">():</span> + <span class="n">folder</span> <span class="o">=</span> <span class="n">settings_path</span><span class="o">.</span><span class="n">parent</span> + <span class="k">else</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">EnvironmentError</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">settings_path</span><span class="si">}</span><span class="s2"> not exists!"</span><span class="p">,</span> <span class="n">settings_path</span><span class="p">)</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">folder</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">disable_etc</span><span class="p">:</span> + <span class="c1"># default: rule 3.</span> + <span class="n">folder</span> <span class="o">=</span> <span class="n">Path</span><span class="p">(</span><span class="s2">"/etc/searxng"</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">folder</span><span class="o">.</span><span class="n">is_dir</span><span class="p">():</span> + <span class="n">folder</span> <span class="o">=</span> <span class="kc">None</span> + + <span class="k">return</span> <span class="n">folder</span></div> + + + +<span class="k">def</span> <span class="nf">update_dict</span><span class="p">(</span><span class="n">default_dict</span><span class="p">,</span> <span class="n">user_dict</span><span class="p">):</span> + <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">user_dict</span><span class="o">.</span><span class="n">items</span><span class="p">():</span> + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">v</span><span class="p">,</span> <span class="n">Mapping</span><span class="p">):</span> + <span class="n">default_dict</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> <span class="o">=</span> <span class="n">update_dict</span><span class="p">(</span><span class="n">default_dict</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">k</span><span class="p">,</span> <span class="p">{}),</span> <span class="n">v</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">default_dict</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> <span class="o">=</span> <span class="n">v</span> + <span class="k">return</span> <span class="n">default_dict</span> + + +<span class="k">def</span> <span class="nf">update_settings</span><span class="p">(</span><span class="n">default_settings</span><span class="p">:</span> <span class="nb">dict</span><span class="p">,</span> <span class="n">user_settings</span><span class="p">:</span> <span class="nb">dict</span><span class="p">):</span> + <span class="c1"># pylint: disable=too-many-branches</span> + + <span class="c1"># merge everything except the engines</span> + <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">user_settings</span><span class="o">.</span><span class="n">items</span><span class="p">():</span> + <span class="k">if</span> <span class="n">k</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">(</span><span class="s1">'use_default_settings'</span><span class="p">,</span> <span class="s1">'engines'</span><span class="p">):</span> + <span class="k">if</span> <span class="n">k</span> <span class="ow">in</span> <span class="n">default_settings</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">v</span><span class="p">,</span> <span class="n">Mapping</span><span class="p">):</span> + <span class="n">update_dict</span><span class="p">(</span><span class="n">default_settings</span><span class="p">[</span><span class="n">k</span><span class="p">],</span> <span class="n">v</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">default_settings</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> <span class="o">=</span> <span class="n">v</span> + + <span class="n">categories_as_tabs</span> <span class="o">=</span> <span class="n">user_settings</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'categories_as_tabs'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">categories_as_tabs</span><span class="p">:</span> + <span class="n">default_settings</span><span class="p">[</span><span class="s1">'categories_as_tabs'</span><span class="p">]</span> <span class="o">=</span> <span class="n">categories_as_tabs</span> + + <span class="c1"># parse the engines</span> + <span class="n">remove_engines</span> <span class="o">=</span> <span class="kc">None</span> + <span class="n">keep_only_engines</span> <span class="o">=</span> <span class="kc">None</span> + <span class="n">use_default_settings</span> <span class="o">=</span> <span class="n">user_settings</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'use_default_settings'</span><span class="p">)</span> + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">use_default_settings</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span> + <span class="n">remove_engines</span> <span class="o">=</span> <span class="n">use_default_settings</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'engines'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'remove'</span><span class="p">)</span> + <span class="n">keep_only_engines</span> <span class="o">=</span> <span class="n">use_default_settings</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'engines'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'keep_only'</span><span class="p">)</span> + + <span class="k">if</span> <span class="s1">'engines'</span> <span class="ow">in</span> <span class="n">user_settings</span> <span class="ow">or</span> <span class="n">remove_engines</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">keep_only_engines</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">engines</span> <span class="o">=</span> <span class="n">default_settings</span><span class="p">[</span><span class="s1">'engines'</span><span class="p">]</span> + + <span class="c1"># parse "use_default_settings.engines.remove"</span> + <span class="k">if</span> <span class="n">remove_engines</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">engines</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">filterfalse</span><span class="p">(</span><span class="k">lambda</span> <span class="n">engine</span><span class="p">:</span> <span class="p">(</span><span class="n">engine</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'name'</span><span class="p">))</span> <span class="ow">in</span> <span class="n">remove_engines</span><span class="p">,</span> <span class="n">engines</span><span class="p">))</span> + + <span class="c1"># parse "use_default_settings.engines.keep_only"</span> + <span class="k">if</span> <span class="n">keep_only_engines</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">engines</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">filter</span><span class="p">(</span><span class="k">lambda</span> <span class="n">engine</span><span class="p">:</span> <span class="p">(</span><span class="n">engine</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'name'</span><span class="p">))</span> <span class="ow">in</span> <span class="n">keep_only_engines</span><span class="p">,</span> <span class="n">engines</span><span class="p">))</span> + + <span class="c1"># parse "engines"</span> + <span class="n">user_engines</span> <span class="o">=</span> <span class="n">user_settings</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'engines'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">user_engines</span><span class="p">:</span> + <span class="n">engines_dict</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">((</span><span class="n">definition</span><span class="p">[</span><span class="s1">'name'</span><span class="p">],</span> <span class="n">definition</span><span class="p">)</span> <span class="k">for</span> <span class="n">definition</span> <span class="ow">in</span> <span class="n">engines</span><span class="p">)</span> + <span class="k">for</span> <span class="n">user_engine</span> <span class="ow">in</span> <span class="n">user_engines</span><span class="p">:</span> + <span class="n">default_engine</span> <span class="o">=</span> <span class="n">engines_dict</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">user_engine</span><span class="p">[</span><span class="s1">'name'</span><span class="p">])</span> + <span class="k">if</span> <span class="n">default_engine</span><span class="p">:</span> + <span class="n">update_dict</span><span class="p">(</span><span class="n">default_engine</span><span class="p">,</span> <span class="n">user_engine</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">engines</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">user_engine</span><span class="p">)</span> + + <span class="c1"># store the result</span> + <span class="n">default_settings</span><span class="p">[</span><span class="s1">'engines'</span><span class="p">]</span> <span class="o">=</span> <span class="n">engines</span> + + <span class="k">return</span> <span class="n">default_settings</span> + + +<span class="k">def</span> <span class="nf">is_use_default_settings</span><span class="p">(</span><span class="n">user_settings</span><span class="p">):</span> + + <span class="n">use_default_settings</span> <span class="o">=</span> <span class="n">user_settings</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'use_default_settings'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">use_default_settings</span> <span class="ow">is</span> <span class="kc">True</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">True</span> + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">use_default_settings</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span> + <span class="k">return</span> <span class="kc">True</span> + <span class="k">if</span> <span class="n">use_default_settings</span> <span class="ow">is</span> <span class="kc">False</span> <span class="ow">or</span> <span class="n">use_default_settings</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">False</span> + <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'Invalid value for use_default_settings'</span><span class="p">)</span> + + +<div class="viewcode-block" id="load_settings"> +<a class="viewcode-back" href="../../src/searx.settings.html#searx.settings_loader.load_settings">[docs]</a> +<span class="k">def</span> <span class="nf">load_settings</span><span class="p">(</span><span class="n">load_user_settings</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> <span class="o">-></span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">dict</span><span class="p">,</span> <span class="nb">str</span><span class="p">]:</span> +<span class="w"> </span><span class="sd">"""Function for loading the settings of the SearXNG application</span> +<span class="sd"> (:ref:`settings.yml <searxng settings.yml>`)."""</span> + + <span class="n">msg</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">"load the default settings from </span><span class="si">{</span><span class="n">DEFAULT_SETTINGS_FILE</span><span class="si">}</span><span class="s2">"</span> + <span class="n">cfg</span> <span class="o">=</span> <span class="n">load_yaml</span><span class="p">(</span><span class="n">DEFAULT_SETTINGS_FILE</span><span class="p">)</span> + <span class="n">cfg_folder</span> <span class="o">=</span> <span class="n">get_user_cfg_folder</span><span class="p">()</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">load_user_settings</span> <span class="ow">or</span> <span class="ow">not</span> <span class="n">cfg_folder</span><span class="p">:</span> + <span class="k">return</span> <span class="n">cfg</span><span class="p">,</span> <span class="n">msg</span> + + <span class="n">settings_yml</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"SEARXNG_SETTINGS_PATH"</span><span class="p">)</span> + <span class="k">if</span> <span class="n">settings_yml</span> <span class="ow">and</span> <span class="n">Path</span><span class="p">(</span><span class="n">settings_yml</span><span class="p">)</span><span class="o">.</span><span class="n">is_file</span><span class="p">():</span> + <span class="c1"># see get_user_cfg_folder() --> SEARXNG_SETTINGS_PATH points to a file</span> + <span class="n">settings_yml</span> <span class="o">=</span> <span class="n">Path</span><span class="p">(</span><span class="n">settings_yml</span><span class="p">)</span><span class="o">.</span><span class="n">name</span> + <span class="k">else</span><span class="p">:</span> + <span class="c1"># see get_user_cfg_folder() --> SEARXNG_SETTINGS_PATH points to a folder</span> + <span class="n">settings_yml</span> <span class="o">=</span> <span class="n">SETTINGS_YAML</span> + + <span class="n">cfg_file</span> <span class="o">=</span> <span class="n">cfg_folder</span> <span class="o">/</span> <span class="n">settings_yml</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">cfg_file</span><span class="o">.</span><span class="n">exists</span><span class="p">():</span> + <span class="k">return</span> <span class="n">cfg</span><span class="p">,</span> <span class="n">msg</span> + + <span class="n">msg</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">"load the user settings from </span><span class="si">{</span><span class="n">cfg_file</span><span class="si">}</span><span class="s2">"</span> + <span class="n">user_cfg</span> <span class="o">=</span> <span class="n">load_yaml</span><span class="p">(</span><span class="n">cfg_file</span><span class="p">)</span> + + <span class="k">if</span> <span class="n">is_use_default_settings</span><span class="p">(</span><span class="n">user_cfg</span><span class="p">):</span> + <span class="c1"># the user settings are merged with the default configuration</span> + <span class="n">msg</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">"merge the default settings ( </span><span class="si">{</span><span class="n">DEFAULT_SETTINGS_FILE</span><span class="si">}</span><span class="s2"> ) and the user settings ( </span><span class="si">{</span><span class="n">cfg_file</span><span class="si">}</span><span class="s2"> )"</span> + <span class="n">update_settings</span><span class="p">(</span><span class="n">cfg</span><span class="p">,</span> <span class="n">user_cfg</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">cfg</span> <span class="o">=</span> <span class="n">user_cfg</span> + + <span class="k">return</span> <span class="n">cfg</span><span class="p">,</span> <span class="n">msg</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../index.html"> + <img class="logo" src="../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../index.html">Overview</a> + <ul> + <li><a href="../index.html">Module code</a> + + + </ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/sqlitedb.html b/_modules/searx/sqlitedb.html new file mode 100644 index 000000000..4f486bd5c --- /dev/null +++ b/_modules/searx/sqlitedb.html @@ -0,0 +1,454 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.sqlitedb — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../_static/searxng.css?v=52e4ff28" /> + <script src="../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../genindex.html" /> + <link rel="search" title="Search" href="../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../index.html" accesskey="U">Module code</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.sqlitedb</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.sqlitedb</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""Implementations to make access to SQLite databases a little more convenient.</span> + +<span class="sd">:py:obj:`SQLiteAppl`</span> +<span class="sd"> Abstract class with which DB applications can be implemented.</span> + +<span class="sd">:py:obj:`SQLiteProperties`:</span> +<span class="sd"> Class to manage properties stored in a database.</span> + +<span class="sd">----</span> + +<span class="sd">"""</span> +<span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">annotations</span> + +<span class="kn">import</span> <span class="nn">sys</span> +<span class="kn">import</span> <span class="nn">re</span> +<span class="kn">import</span> <span class="nn">sqlite3</span> +<span class="kn">import</span> <span class="nn">threading</span> +<span class="kn">import</span> <span class="nn">abc</span> + +<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">logger</span> + +<span class="n">logger</span> <span class="o">=</span> <span class="n">logger</span><span class="o">.</span><span class="n">getChild</span><span class="p">(</span><span class="s1">'sqlitedb'</span><span class="p">)</span> + + +<div class="viewcode-block" id="SQLiteAppl"> +<a class="viewcode-back" href="../../src/searx.sqlitedb.html#searx.sqlitedb.SQLiteAppl">[docs]</a> +<span class="k">class</span> <span class="nc">SQLiteAppl</span><span class="p">(</span><span class="n">abc</span><span class="o">.</span><span class="n">ABC</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Abstract base class for implementing convenient DB access in SQLite</span> +<span class="sd"> applications. In the constructor, a :py:obj:`SQLiteProperties` instance is</span> +<span class="sd"> already aggregated under ``self.properties``."""</span> + + <span class="n">DDL_CREATE_TABLES</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> + + <span class="n">DB_SCHEMA</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span> +<span class="w"> </span><span class="sd">"""As soon as changes are made to the DB schema, the version number must be</span> +<span class="sd"> increased. Changes to the version number require the DB to be recreated (or</span> +<span class="sd"> migrated / if an migration path exists and is implemented)."""</span> + + <span class="n">SQLITE_THREADING_MODE</span> <span class="o">=</span> <span class="p">{</span> + <span class="mi">0</span><span class="p">:</span> <span class="s2">"single-thread"</span><span class="p">,</span> + <span class="mi">1</span><span class="p">:</span> <span class="s2">"multi-thread"</span><span class="p">,</span> + <span class="mi">3</span><span class="p">:</span> <span class="s2">"serialized"</span><span class="p">}[</span><span class="n">sqlite3</span><span class="o">.</span><span class="n">threadsafety</span><span class="p">]</span> <span class="c1"># fmt:skip</span> +<span class="w"> </span><span class="sd">"""Threading mode of the SQLite library. Depends on the options used at</span> +<span class="sd"> compile time and is different for different distributions and architectures.</span> + +<span class="sd"> Possible values are 0:``single-thread``, 1:``multi-thread``,</span> +<span class="sd"> 3:``serialized`` (see :py:obj:`sqlite3.threadsafety`). Pre- Python 3.11</span> +<span class="sd"> this value was hard coded to 1.</span> + +<span class="sd"> Depending on this value, optimizations are made, e.g. in “serialized” mode</span> +<span class="sd"> it is not necessary to create a separate DB connector for each thread.</span> +<span class="sd"> """</span> + + <span class="n">SQLITE_JOURNAL_MODE</span> <span class="o">=</span> <span class="s2">"WAL"</span> + <span class="n">SQLITE_CONNECT_ARGS</span> <span class="o">=</span> <span class="p">{</span> + <span class="c1"># "timeout": 5.0,</span> + <span class="c1"># "detect_types": 0,</span> + <span class="s2">"check_same_thread"</span><span class="p">:</span> <span class="nb">bool</span><span class="p">(</span><span class="n">SQLITE_THREADING_MODE</span> <span class="o">!=</span> <span class="s2">"serialized"</span><span class="p">),</span> + <span class="s2">"cached_statements"</span><span class="p">:</span> <span class="mi">0</span><span class="p">,</span> <span class="c1"># https://github.com/python/cpython/issues/118172</span> + <span class="c1"># "uri": False,</span> + <span class="s2">"autocommit"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> + <span class="p">}</span> <span class="c1"># fmt:skip</span> +<span class="w"> </span><span class="sd">"""Connection arguments (:py:obj:`sqlite3.connect`)</span> + +<span class="sd"> ``check_same_thread``:</span> +<span class="sd"> Is disabled by default when :py:obj:`SQLITE_THREADING_MODE` is</span> +<span class="sd"> ``serialized``. The check is more of a hindrance in this case because it</span> +<span class="sd"> would prevent a DB connector from being used in multiple threads.</span> + +<span class="sd"> ``autocommit``:</span> +<span class="sd"> Is disabled by default. Note: autocommit option has been added in Python</span> +<span class="sd"> 3.12.</span> + +<span class="sd"> ``cached_statements``:</span> +<span class="sd"> Is set to ``0`` by default. Note: Python 3.12+ fetch result are not</span> +<span class="sd"> consistent in multi-threading application and causing an API misuse error.</span> + +<span class="sd"> The multithreading use in SQLiteAppl is intended and supported if</span> +<span class="sd"> threadsafety is set to 3 (aka "serialized"). CPython supports “serialized”</span> +<span class="sd"> from version 3.12 on, but unfortunately only with errors:</span> + +<span class="sd"> - https://github.com/python/cpython/issues/118172</span> +<span class="sd"> - https://github.com/python/cpython/issues/123873</span> + +<span class="sd"> The workaround for SQLite3 multithreading cache inconsistency ist to set</span> +<span class="sd"> option ``cached_statements`` to ``0`` by default.</span> +<span class="sd"> """</span> + + <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">db_url</span><span class="p">):</span> + + <span class="bp">self</span><span class="o">.</span><span class="n">db_url</span> <span class="o">=</span> <span class="n">db_url</span> + <span class="bp">self</span><span class="o">.</span><span class="n">properties</span> <span class="o">=</span> <span class="n">SQLiteProperties</span><span class="p">(</span><span class="n">db_url</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">thread_local</span> <span class="o">=</span> <span class="n">threading</span><span class="o">.</span><span class="n">local</span><span class="p">()</span> + <span class="bp">self</span><span class="o">.</span><span class="n">_init_done</span> <span class="o">=</span> <span class="kc">False</span> + <span class="bp">self</span><span class="o">.</span><span class="n">_compatibility</span><span class="p">()</span> + + <span class="k">def</span> <span class="nf">_compatibility</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + + <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">SQLITE_THREADING_MODE</span> <span class="o">==</span> <span class="s2">"serialized"</span><span class="p">:</span> + <span class="bp">self</span><span class="o">.</span><span class="n">_DB</span> <span class="o">=</span> <span class="kc">None</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">msg</span> <span class="o">=</span> <span class="p">(</span> + <span class="sa">f</span><span class="s2">"SQLite library is compiled with </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">SQLITE_THREADING_MODE</span><span class="si">}</span><span class="s2"> mode,"</span> + <span class="s2">" read https://docs.python.org/3/library/sqlite3.html#sqlite3.threadsafety"</span> + <span class="p">)</span> + <span class="k">if</span> <span class="n">threading</span><span class="o">.</span><span class="n">active_count</span><span class="p">()</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="n">msg</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="n">msg</span><span class="p">)</span> + + <span class="k">if</span> <span class="n">sqlite3</span><span class="o">.</span><span class="n">sqlite_version_info</span> <span class="o"><=</span> <span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">35</span><span class="p">):</span> + <span class="c1"># See "Generalize UPSERT:" in https://sqlite.org/releaselog/3_35_0.html</span> + <span class="n">logger</span><span class="o">.</span><span class="n">critical</span><span class="p">(</span> + <span class="s2">"SQLite runtime library version </span><span class="si">%s</span><span class="s2"> is not supported (require >= 3.35)"</span><span class="p">,</span> <span class="n">sqlite3</span><span class="o">.</span><span class="n">sqlite_version</span> + <span class="p">)</span> + +<div class="viewcode-block" id="SQLiteAppl.connect"> +<a class="viewcode-back" href="../../src/searx.sqlitedb.html#searx.sqlitedb.SQLiteAppl.connect">[docs]</a> + <span class="k">def</span> <span class="nf">connect</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">sqlite3</span><span class="o">.</span><span class="n">Connection</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Creates a new DB connection (:py:obj:`SQLITE_CONNECT_ARGS`). If not</span> +<span class="sd"> already done, the DB schema is set up</span> +<span class="sd"> """</span> + <span class="k">if</span> <span class="n">sys</span><span class="o">.</span><span class="n">version_info</span> <span class="o"><</span> <span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">12</span><span class="p">):</span> + <span class="c1"># Prior Python 3.12 there is no "autocommit" option</span> + <span class="bp">self</span><span class="o">.</span><span class="n">SQLITE_CONNECT_ARGS</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="s2">"autocommit"</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> + + <span class="bp">self</span><span class="o">.</span><span class="n">init</span><span class="p">()</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"</span><span class="si">%s</span><span class="s2">: connect to DB: </span><span class="si">%s</span><span class="s2"> // </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">db_url</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">SQLITE_CONNECT_ARGS</span><span class="p">)</span> + <span class="n">conn</span> <span class="o">=</span> <span class="n">sqlite3</span><span class="o">.</span><span class="n">Connection</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">db_url</span><span class="p">,</span> <span class="o">**</span><span class="bp">self</span><span class="o">.</span><span class="n">SQLITE_CONNECT_ARGS</span><span class="p">)</span> <span class="c1"># type: ignore</span> + <span class="n">conn</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="sa">f</span><span class="s2">"PRAGMA journal_mode=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">SQLITE_JOURNAL_MODE</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">register_functions</span><span class="p">(</span><span class="n">conn</span><span class="p">)</span> + <span class="k">return</span> <span class="n">conn</span></div> + + +<div class="viewcode-block" id="SQLiteAppl.register_functions"> +<a class="viewcode-back" href="../../src/searx.sqlitedb.html#searx.sqlitedb.SQLiteAppl.register_functions">[docs]</a> + <span class="k">def</span> <span class="nf">register_functions</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">conn</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Create user-defined_ SQL functions.</span> + +<span class="sd"> ``REGEXP(<pattern>, <field>)`` : 0 | 1</span> +<span class="sd"> `re.search`_ returns (int) 1 for a match and 0 for none match of</span> +<span class="sd"> ``<pattern>`` in ``<field>``.</span> + +<span class="sd"> .. code:: sql</span> + +<span class="sd"> SELECT '12' AS field WHERE REGEXP('^[0-9][0-9]$', field)</span> +<span class="sd"> -- 12</span> + +<span class="sd"> SELECT REGEXP('[0-9][0-9]', 'X12Y')</span> +<span class="sd"> -- 1</span> +<span class="sd"> SELECT REGEXP('[0-9][0-9]', 'X1Y')</span> +<span class="sd"> -- 0</span> + +<span class="sd"> .. _user-defined: https://docs.python.org/3/library/sqlite3.html#sqlite3.Connection.create_function</span> +<span class="sd"> .. _deterministic: https://sqlite.org/deterministic.html</span> +<span class="sd"> .. _re.search: https://docs.python.org/3/library/re.html#re.search</span> +<span class="sd"> """</span> + + <span class="n">conn</span><span class="o">.</span><span class="n">create_function</span><span class="p">(</span><span class="s1">'regexp'</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="mi">1</span> <span class="k">if</span> <span class="n">re</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span> <span class="k">else</span> <span class="mi">0</span><span class="p">,</span> <span class="n">deterministic</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span></div> + + + <span class="nd">@property</span> + <span class="k">def</span> <span class="nf">DB</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">sqlite3</span><span class="o">.</span><span class="n">Connection</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Provides a DB connection. The connection is a *singleton* and</span> +<span class="sd"> therefore well suited for read access. If</span> +<span class="sd"> :py:obj:`SQLITE_THREADING_MODE` is ``serialized`` only one DB connection</span> +<span class="sd"> is created for all threads.</span> + +<span class="sd"> .. note::</span> + +<span class="sd"> For dedicated `transaction control`_, it is recommended to create a</span> +<span class="sd"> new connection (:py:obj:`SQLiteAppl.connect`).</span> + +<span class="sd"> .. _transaction control:</span> +<span class="sd"> https://docs.python.org/3/library/sqlite3.html#sqlite3-controlling-transactions</span> +<span class="sd"> """</span> + + <span class="k">if</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">thread_local</span><span class="p">,</span> <span class="s1">'DB'</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="bp">self</span><span class="o">.</span><span class="n">thread_local</span><span class="o">.</span><span class="n">DB</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">connect</span><span class="p">()</span> + + <span class="c1"># Theoretically it is possible to reuse the DB cursor across threads as</span> + <span class="c1"># of Python 3.12, in practice the threading of the cursor seems to me to</span> + <span class="c1"># be so faulty that I prefer to establish one connection per thread</span> + + <span class="bp">self</span><span class="o">.</span><span class="n">thread_local</span><span class="o">.</span><span class="n">DB</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span> + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">thread_local</span><span class="o">.</span><span class="n">DB</span> + + <span class="c1"># In "serialized" mode, SQLite can be safely used by multiple threads</span> + <span class="c1"># with no restriction.</span> + <span class="c1">#</span> + <span class="c1"># if self.SQLITE_THREADING_MODE != "serialized":</span> + <span class="c1"># if getattr(self.thread_local, 'DB', None) is None:</span> + <span class="c1"># self.thread_local.DB = self.connect()</span> + <span class="c1"># return self.thread_local.DB</span> + <span class="c1">#</span> + <span class="c1"># if self._DB is None:</span> + <span class="c1"># self._DB = self.connect() # pylint: disable=attribute-defined-outside-init</span> + <span class="c1"># return self._DB</span> + +<div class="viewcode-block" id="SQLiteAppl.init"> +<a class="viewcode-back" href="../../src/searx.sqlitedb.html#searx.sqlitedb.SQLiteAppl.init">[docs]</a> + <span class="k">def</span> <span class="nf">init</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Initializes the DB schema and properties, is only executed once even</span> +<span class="sd"> if called several times."""</span> + + <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_init_done</span><span class="p">:</span> + <span class="k">return</span> + <span class="bp">self</span><span class="o">.</span><span class="n">_init_done</span> <span class="o">=</span> <span class="kc">True</span> + + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"init DB: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">db_url</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">init</span><span class="p">()</span> + <span class="n">ver</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">(</span><span class="s2">"DB_SCHEMA"</span><span class="p">)</span> + <span class="k">if</span> <span class="n">ver</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">DB</span><span class="p">:</span> + <span class="bp">self</span><span class="o">.</span><span class="n">create_schema</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">DB</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">ver</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">ver</span><span class="p">)</span> + <span class="k">if</span> <span class="n">ver</span> <span class="o">!=</span> <span class="bp">self</span><span class="o">.</span><span class="n">DB_SCHEMA</span><span class="p">:</span> + <span class="k">raise</span> <span class="n">sqlite3</span><span class="o">.</span><span class="n">DatabaseError</span><span class="p">(</span><span class="s2">"Expected DB schema v</span><span class="si">%s</span><span class="s2">, DB schema is v</span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">DB_SCHEMA</span><span class="p">,</span> <span class="n">ver</span><span class="p">))</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"DB_SCHEMA = </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">ver</span><span class="p">)</span></div> + + + <span class="k">def</span> <span class="nf">create_schema</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">conn</span><span class="p">):</span> + + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"create schema .."</span><span class="p">)</span> + <span class="k">with</span> <span class="n">conn</span><span class="p">:</span> + <span class="k">for</span> <span class="n">table_name</span><span class="p">,</span> <span class="n">sql</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">DDL_CREATE_TABLES</span><span class="o">.</span><span class="n">items</span><span class="p">():</span> + <span class="n">conn</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">sql</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Table </span><span class="si">{</span><span class="n">table_name</span><span class="si">}</span><span class="s2"> created"</span><span class="p">,</span> <span class="n">table_name</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">"DB_SCHEMA"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">DB_SCHEMA</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="s2">"LAST_MAINTENANCE"</span><span class="p">,</span> <span class="s2">""</span><span class="p">)</span></div> + + + +<div class="viewcode-block" id="SQLiteProperties"> +<a class="viewcode-back" href="../../src/searx.sqlitedb.html#searx.sqlitedb.SQLiteProperties">[docs]</a> +<span class="k">class</span> <span class="nc">SQLiteProperties</span><span class="p">(</span><span class="n">SQLiteAppl</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Simple class to manage properties of a DB application in the DB. The</span> +<span class="sd"> object has its own DB connection and transaction area.</span> + +<span class="sd"> .. code:: sql</span> + +<span class="sd"> CREATE TABLE IF NOT EXISTS properties (</span> +<span class="sd"> name TEXT,</span> +<span class="sd"> value TEXT,</span> +<span class="sd"> m_time INTEGER DEFAULT (strftime('%s', 'now')),</span> +<span class="sd"> PRIMARY KEY (name))</span> + +<span class="sd"> """</span> + + <span class="n">SQLITE_JOURNAL_MODE</span> <span class="o">=</span> <span class="s2">"WAL"</span> + + <span class="n">DDL_PROPERTIES</span> <span class="o">=</span> <span class="s2">"""</span><span class="se">\</span> +<span class="s2">CREATE TABLE IF NOT EXISTS properties (</span> +<span class="s2"> name TEXT,</span> +<span class="s2"> value TEXT,</span> +<span class="s2"> m_time INTEGER DEFAULT (strftime('</span><span class="si">%s</span><span class="s2">', 'now')), -- last modified (unix epoch) time in sec.</span> +<span class="s2"> PRIMARY KEY (name))"""</span> + +<span class="w"> </span><span class="sd">"""Table to store properties of the DB application"""</span> + + <span class="n">SQL_GET</span> <span class="o">=</span> <span class="s2">"SELECT value FROM properties WHERE name = ?"</span> + <span class="n">SQL_M_TIME</span> <span class="o">=</span> <span class="s2">"SELECT m_time FROM properties WHERE name = ?"</span> + <span class="n">SQL_SET</span> <span class="o">=</span> <span class="p">(</span> + <span class="s2">"INSERT INTO properties (name, value) VALUES (?, ?)"</span> + <span class="s2">" ON CONFLICT(name) DO UPDATE"</span> + <span class="s2">" SET value=excluded.value, m_time=strftime('</span><span class="si">%s</span><span class="s2">', 'now')"</span> + <span class="p">)</span> + <span class="n">SQL_TABLE_EXISTS</span> <span class="o">=</span> <span class="p">(</span> + <span class="s2">"SELECT name FROM sqlite_master"</span> + <span class="s2">" WHERE type='table' AND name='properties'"</span> + <span class="p">)</span> <span class="c1"># fmt:skip</span> + <span class="n">SQLITE_CONNECT_ARGS</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">(</span><span class="n">SQLiteAppl</span><span class="o">.</span><span class="n">SQLITE_CONNECT_ARGS</span><span class="p">)</span> + <span class="n">SQLITE_CONNECT_ARGS</span><span class="p">[</span><span class="s2">"autocommit"</span><span class="p">]</span> <span class="o">=</span> <span class="kc">True</span> <span class="c1"># This option has no effect before Python 3.12</span> + + <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">db_url</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span> <span class="c1"># pylint: disable=super-init-not-called</span> + + <span class="bp">self</span><span class="o">.</span><span class="n">db_url</span> <span class="o">=</span> <span class="n">db_url</span> + <span class="bp">self</span><span class="o">.</span><span class="n">thread_local</span> <span class="o">=</span> <span class="n">threading</span><span class="o">.</span><span class="n">local</span><span class="p">()</span> + <span class="bp">self</span><span class="o">.</span><span class="n">_init_done</span> <span class="o">=</span> <span class="kc">False</span> + <span class="bp">self</span><span class="o">.</span><span class="n">_compatibility</span><span class="p">()</span> + +<div class="viewcode-block" id="SQLiteProperties.init"> +<a class="viewcode-back" href="../../src/searx.sqlitedb.html#searx.sqlitedb.SQLiteProperties.init">[docs]</a> + <span class="k">def</span> <span class="nf">init</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Initializes DB schema of the properties in the DB."""</span> + + <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_init_done</span><span class="p">:</span> + <span class="k">return</span> + <span class="bp">self</span><span class="o">.</span><span class="n">_init_done</span> <span class="o">=</span> <span class="kc">True</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"init properties of DB: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">db_url</span><span class="p">)</span> + <span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">DB</span> <span class="k">as</span> <span class="n">conn</span><span class="p">:</span> + <span class="n">res</span> <span class="o">=</span> <span class="n">conn</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">SQL_TABLE_EXISTS</span><span class="p">)</span> + <span class="k">if</span> <span class="n">res</span><span class="o">.</span><span class="n">fetchone</span><span class="p">()</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> <span class="c1"># DB schema needs to be be created</span> + <span class="bp">self</span><span class="o">.</span><span class="n">create_schema</span><span class="p">(</span><span class="n">conn</span><span class="p">)</span></div> + + + <span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Returns the value of the property ``name`` or ``default`` if property</span> +<span class="sd"> not exists in DB."""</span> + + <span class="n">res</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">DB</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">SQL_GET</span><span class="p">,</span> <span class="p">(</span><span class="n">name</span><span class="p">,))</span><span class="o">.</span><span class="n">fetchone</span><span class="p">()</span> + <span class="k">if</span> <span class="n">res</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">return</span> <span class="n">default</span> + <span class="k">return</span> <span class="n">res</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> + +<div class="viewcode-block" id="SQLiteProperties.set"> +<a class="viewcode-back" href="../../src/searx.sqlitedb.html#searx.sqlitedb.SQLiteProperties.set">[docs]</a> + <span class="k">def</span> <span class="nf">set</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">,</span> <span class="n">value</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Set ``value`` of property ``name`` in DB. If property already</span> +<span class="sd"> exists, update the ``m_time`` (and the value)."""</span> + + <span class="bp">self</span><span class="o">.</span><span class="n">DB</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">SQL_SET</span><span class="p">,</span> <span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">value</span><span class="p">))</span> + + <span class="k">if</span> <span class="n">sys</span><span class="o">.</span><span class="n">version_info</span> <span class="o"><=</span> <span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">12</span><span class="p">):</span> + <span class="c1"># Prior Python 3.12 there is no "autocommit" option / lets commit</span> + <span class="c1"># explicitely.</span> + <span class="bp">self</span><span class="o">.</span><span class="n">DB</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span></div> + + +<div class="viewcode-block" id="SQLiteProperties.row"> +<a class="viewcode-back" href="../../src/searx.sqlitedb.html#searx.sqlitedb.SQLiteProperties.row">[docs]</a> + <span class="k">def</span> <span class="nf">row</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Returns the DB row of property ``name`` or ``default`` if property</span> +<span class="sd"> not exists in DB."""</span> + + <span class="n">cur</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">DB</span><span class="o">.</span><span class="n">cursor</span><span class="p">()</span> + <span class="n">cur</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="s2">"SELECT * FROM properties WHERE name = ?"</span><span class="p">,</span> <span class="p">(</span><span class="n">name</span><span class="p">,))</span> + <span class="n">res</span> <span class="o">=</span> <span class="n">cur</span><span class="o">.</span><span class="n">fetchone</span><span class="p">()</span> + <span class="k">if</span> <span class="n">res</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">return</span> <span class="n">default</span> + <span class="n">col_names</span> <span class="o">=</span> <span class="p">[</span><span class="n">column</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="k">for</span> <span class="n">column</span> <span class="ow">in</span> <span class="n">cur</span><span class="o">.</span><span class="n">description</span><span class="p">]</span> + <span class="k">return</span> <span class="nb">dict</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">col_names</span><span class="p">,</span> <span class="n">res</span><span class="p">))</span></div> + + +<div class="viewcode-block" id="SQLiteProperties.m_time"> +<a class="viewcode-back" href="../../src/searx.sqlitedb.html#searx.sqlitedb.SQLiteProperties.m_time">[docs]</a> + <span class="k">def</span> <span class="nf">m_time</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">,</span> <span class="n">default</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Last modification time of this property."""</span> + <span class="n">res</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">DB</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">SQL_M_TIME</span><span class="p">,</span> <span class="p">(</span><span class="n">name</span><span class="p">,))</span><span class="o">.</span><span class="n">fetchone</span><span class="p">()</span> + <span class="k">if</span> <span class="n">res</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">return</span> <span class="n">default</span> + <span class="k">return</span> <span class="nb">int</span><span class="p">(</span><span class="n">res</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span></div> + + + <span class="k">def</span> <span class="nf">create_schema</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">conn</span><span class="p">):</span> + <span class="k">with</span> <span class="n">conn</span><span class="p">:</span> + <span class="n">conn</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">DDL_PROPERTIES</span><span class="p">)</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../index.html"> + <img class="logo" src="../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../index.html">Overview</a> + <ul> + <li><a href="../index.html">Module code</a> + + + </ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file diff --git a/_modules/searx/utils.html b/_modules/searx/utils.html new file mode 100644 index 000000000..362d1ec67 --- /dev/null +++ b/_modules/searx/utils.html @@ -0,0 +1,978 @@ +<!DOCTYPE html> + +<html lang="en" data-content_root="../../"> + <head> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>searx.utils — SearXNG Documentation (2025.1.10+94a0b415e)</title> + <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=4f649999" /> + <link rel="stylesheet" type="text/css" href="../../_static/searxng.css?v=52e4ff28" /> + <script src="../../_static/documentation_options.js?v=532e341d"></script> + <script src="../../_static/doctools.js?v=9a2dae69"></script> + <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script> + <script data-project="searxng" data-version="2025.1.10+94a0b415e" src="../../_static/describe_version.js?v=fa7f30d0"></script> + <link rel="index" title="Index" href="../../genindex.html" /> + <link rel="search" title="Search" href="../../search.html" /> + </head><body> + <div class="related" role="navigation" aria-label="Related"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="../../genindex.html" title="General Index" + accesskey="I">index</a></li> + <li class="right" > + <a href="../../py-modindex.html" title="Python Module Index" + >modules</a> |</li> + <li class="nav-item nav-item-0"><a href="../../index.html">SearXNG Documentation (2025.1.10+94a0b415e)</a> »</li> + <li class="nav-item nav-item-1"><a href="../index.html" accesskey="U">Module code</a> »</li> + <li class="nav-item nav-item-this"><a href="">searx.utils</a></li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for searx.utils</h1><div class="highlight"><pre> +<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span> +<span class="sd">"""Utility functions for the engines</span> + +<span class="sd">"""</span> + +<span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">annotations</span> + +<span class="kn">import</span> <span class="nn">re</span> +<span class="kn">import</span> <span class="nn">importlib</span> +<span class="kn">import</span> <span class="nn">importlib.util</span> +<span class="kn">import</span> <span class="nn">json</span> +<span class="kn">import</span> <span class="nn">types</span> + +<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Optional</span><span class="p">,</span> <span class="n">Union</span><span class="p">,</span> <span class="n">Any</span><span class="p">,</span> <span class="n">Set</span><span class="p">,</span> <span class="n">List</span><span class="p">,</span> <span class="n">Dict</span><span class="p">,</span> <span class="n">MutableMapping</span><span class="p">,</span> <span class="n">Tuple</span><span class="p">,</span> <span class="n">Callable</span> +<span class="kn">from</span> <span class="nn">numbers</span> <span class="kn">import</span> <span class="n">Number</span> +<span class="kn">from</span> <span class="nn">os.path</span> <span class="kn">import</span> <span class="n">splitext</span><span class="p">,</span> <span class="n">join</span> +<span class="kn">from</span> <span class="nn">random</span> <span class="kn">import</span> <span class="n">choice</span> +<span class="kn">from</span> <span class="nn">html.parser</span> <span class="kn">import</span> <span class="n">HTMLParser</span> +<span class="kn">from</span> <span class="nn">html</span> <span class="kn">import</span> <span class="n">escape</span> +<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urljoin</span><span class="p">,</span> <span class="n">urlparse</span><span class="p">,</span> <span class="n">parse_qs</span><span class="p">,</span> <span class="n">urlencode</span> +<span class="kn">from</span> <span class="nn">markdown_it</span> <span class="kn">import</span> <span class="n">MarkdownIt</span> + +<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span> +<span class="kn">from</span> <span class="nn">lxml.etree</span> <span class="kn">import</span> <span class="n">ElementBase</span><span class="p">,</span> <span class="n">XPath</span><span class="p">,</span> <span class="n">XPathError</span><span class="p">,</span> <span class="n">XPathSyntaxError</span> + +<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">settings</span> +<span class="kn">from</span> <span class="nn">searx.data</span> <span class="kn">import</span> <span class="n">USER_AGENTS</span><span class="p">,</span> <span class="n">data_dir</span> +<span class="kn">from</span> <span class="nn">searx.version</span> <span class="kn">import</span> <span class="n">VERSION_TAG</span> +<span class="kn">from</span> <span class="nn">searx.sxng_locales</span> <span class="kn">import</span> <span class="n">sxng_locales</span> +<span class="kn">from</span> <span class="nn">searx.exceptions</span> <span class="kn">import</span> <span class="n">SearxXPathSyntaxException</span><span class="p">,</span> <span class="n">SearxEngineXPathException</span> +<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">logger</span> + + +<span class="n">logger</span> <span class="o">=</span> <span class="n">logger</span><span class="o">.</span><span class="n">getChild</span><span class="p">(</span><span class="s1">'utils'</span><span class="p">)</span> + +<span class="n">XPathSpecType</span> <span class="o">=</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">XPath</span><span class="p">]</span> + +<span class="n">_BLOCKED_TAGS</span> <span class="o">=</span> <span class="p">(</span><span class="s1">'script'</span><span class="p">,</span> <span class="s1">'style'</span><span class="p">)</span> + +<span class="n">_ECMA_UNESCAPE4_RE</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s1">'</span><span class="si">%u</span><span class="s1">([0-9a-fA-F]</span><span class="si">{4}</span><span class="s1">)'</span><span class="p">,</span> <span class="n">re</span><span class="o">.</span><span class="n">UNICODE</span><span class="p">)</span> +<span class="n">_ECMA_UNESCAPE2_RE</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s1">'%([0-9a-fA-F]</span><span class="si">{2}</span><span class="s1">)'</span><span class="p">,</span> <span class="n">re</span><span class="o">.</span><span class="n">UNICODE</span><span class="p">)</span> + +<span class="n">_JS_QUOTE_KEYS_RE</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s1">'([\{\s,])(\w+)(:)'</span><span class="p">)</span> +<span class="n">_JS_VOID_RE</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s1">'void\s+[0-9]+|void\s*\([0-9]+\)'</span><span class="p">)</span> +<span class="n">_JS_DECIMAL_RE</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s2">":\s*\."</span><span class="p">)</span> + +<span class="n">_XPATH_CACHE</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">XPath</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> +<span class="n">_LANG_TO_LC_CACHE</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="p">{}</span> + +<span class="n">_FASTTEXT_MODEL</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="s2">"fasttext.FastText._FastText"</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span> <span class="c1"># type: ignore</span> +<span class="sd">"""fasttext model to predict language of a search term"""</span> + +<span class="n">SEARCH_LANGUAGE_CODES</span> <span class="o">=</span> <span class="nb">frozenset</span><span class="p">([</span><span class="n">searxng_locale</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'-'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span> <span class="k">for</span> <span class="n">searxng_locale</span> <span class="ow">in</span> <span class="n">sxng_locales</span><span class="p">])</span> +<span class="sd">"""Languages supported by most searxng engines (:py:obj:`searx.sxng_locales.sxng_locales`)."""</span> + + +<span class="k">class</span> <span class="nc">_NotSetClass</span><span class="p">:</span> <span class="c1"># pylint: disable=too-few-public-methods</span> +<span class="w"> </span><span class="sd">"""Internal class for this module, do not create instance of this class.</span> +<span class="sd"> Replace the None value, allow explicitly pass None as a function argument"""</span> + + +<span class="n">_NOTSET</span> <span class="o">=</span> <span class="n">_NotSetClass</span><span class="p">()</span> + + +<div class="viewcode-block" id="searx_useragent"> +<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.searx_useragent">[docs]</a> +<span class="k">def</span> <span class="nf">searx_useragent</span><span class="p">()</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Return the searx User Agent"""</span> + <span class="k">return</span> <span class="s1">'searx/</span><span class="si">{searx_version}</span><span class="s1"> </span><span class="si">{suffix}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> + <span class="n">searx_version</span><span class="o">=</span><span class="n">VERSION_TAG</span><span class="p">,</span> <span class="n">suffix</span><span class="o">=</span><span class="n">settings</span><span class="p">[</span><span class="s1">'outgoing'</span><span class="p">][</span><span class="s1">'useragent_suffix'</span><span class="p">]</span> + <span class="p">)</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span></div> + + + +<div class="viewcode-block" id="gen_useragent"> +<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.gen_useragent">[docs]</a> +<span class="k">def</span> <span class="nf">gen_useragent</span><span class="p">(</span><span class="n">os_string</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Return a random browser User Agent</span> + +<span class="sd"> See searx/data/useragents.json</span> +<span class="sd"> """</span> + <span class="k">return</span> <span class="n">USER_AGENTS</span><span class="p">[</span><span class="s1">'ua'</span><span class="p">]</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">os</span><span class="o">=</span><span class="n">os_string</span> <span class="ow">or</span> <span class="n">choice</span><span class="p">(</span><span class="n">USER_AGENTS</span><span class="p">[</span><span class="s1">'os'</span><span class="p">]),</span> <span class="n">version</span><span class="o">=</span><span class="n">choice</span><span class="p">(</span><span class="n">USER_AGENTS</span><span class="p">[</span><span class="s1">'versions'</span><span class="p">]))</span></div> + + + +<span class="k">class</span> <span class="nc">_HTMLTextExtractorException</span><span class="p">(</span><span class="ne">Exception</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Internal exception raised when the HTML is invalid"""</span> + + +<span class="k">class</span> <span class="nc">_HTMLTextExtractor</span><span class="p">(</span><span class="n">HTMLParser</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Internal class to extract text from HTML"""</span> + + <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="n">HTMLParser</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">result</span> <span class="o">=</span> <span class="p">[]</span> + <span class="bp">self</span><span class="o">.</span><span class="n">tags</span> <span class="o">=</span> <span class="p">[]</span> + + <span class="k">def</span> <span class="nf">handle_starttag</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tag</span><span class="p">,</span> <span class="n">attrs</span><span class="p">):</span> + <span class="bp">self</span><span class="o">.</span><span class="n">tags</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">tag</span><span class="p">)</span> + <span class="k">if</span> <span class="n">tag</span> <span class="o">==</span> <span class="s1">'br'</span><span class="p">:</span> + <span class="bp">self</span><span class="o">.</span><span class="n">result</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s1">' '</span><span class="p">)</span> + + <span class="k">def</span> <span class="nf">handle_endtag</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tag</span><span class="p">):</span> + <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">tags</span><span class="p">:</span> + <span class="k">return</span> + + <span class="k">if</span> <span class="n">tag</span> <span class="o">!=</span> <span class="bp">self</span><span class="o">.</span><span class="n">tags</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]:</span> + <span class="k">raise</span> <span class="n">_HTMLTextExtractorException</span><span class="p">()</span> + + <span class="bp">self</span><span class="o">.</span><span class="n">tags</span><span class="o">.</span><span class="n">pop</span><span class="p">()</span> + + <span class="k">def</span> <span class="nf">is_valid_tag</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">tags</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">tags</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">_BLOCKED_TAGS</span> + + <span class="k">def</span> <span class="nf">handle_data</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">):</span> + <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_valid_tag</span><span class="p">():</span> + <span class="k">return</span> + <span class="bp">self</span><span class="o">.</span><span class="n">result</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">data</span><span class="p">)</span> + + <span class="k">def</span> <span class="nf">handle_charref</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">):</span> + <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_valid_tag</span><span class="p">():</span> + <span class="k">return</span> + <span class="k">if</span> <span class="n">name</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="ow">in</span> <span class="p">(</span><span class="s1">'x'</span><span class="p">,</span> <span class="s1">'X'</span><span class="p">):</span> + <span class="n">codepoint</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">name</span><span class="p">[</span><span class="mi">1</span><span class="p">:],</span> <span class="mi">16</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">codepoint</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">name</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">result</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">chr</span><span class="p">(</span><span class="n">codepoint</span><span class="p">))</span> + + <span class="k">def</span> <span class="nf">handle_entityref</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">):</span> + <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_valid_tag</span><span class="p">():</span> + <span class="k">return</span> + <span class="c1"># codepoint = htmlentitydefs.name2codepoint[name]</span> + <span class="c1"># self.result.append(chr(codepoint))</span> + <span class="bp">self</span><span class="o">.</span><span class="n">result</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">name</span><span class="p">)</span> + + <span class="k">def</span> <span class="nf">get_text</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="s1">''</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">result</span><span class="p">)</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span> + + <span class="k">def</span> <span class="nf">error</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">message</span><span class="p">):</span> + <span class="c1"># error handle is needed in <py3.10</span> + <span class="c1"># https://github.com/python/cpython/pull/8562/files</span> + <span class="k">raise</span> <span class="ne">AssertionError</span><span class="p">(</span><span class="n">message</span><span class="p">)</span> + + +<div class="viewcode-block" id="html_to_text"> +<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.html_to_text">[docs]</a> +<span class="k">def</span> <span class="nf">html_to_text</span><span class="p">(</span><span class="n">html_str</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Extract text from a HTML string</span> + +<span class="sd"> Args:</span> +<span class="sd"> * html_str (str): string HTML</span> + +<span class="sd"> Returns:</span> +<span class="sd"> * str: extracted text</span> + +<span class="sd"> Examples:</span> +<span class="sd"> >>> html_to_text('Example <span id="42">#2</span>')</span> +<span class="sd"> 'Example #2'</span> + +<span class="sd"> >>> html_to_text('<style>.span { color: red; }</style><span>Example</span>')</span> +<span class="sd"> 'Example'</span> + +<span class="sd"> >>> html_to_text(r'regexp: (?<![a-zA-Z]')</span> +<span class="sd"> 'regexp: (?<![a-zA-Z]'</span> +<span class="sd"> """</span> + <span class="n">html_str</span> <span class="o">=</span> <span class="n">html_str</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="se">\n</span><span class="s1">'</span><span class="p">,</span> <span class="s1">' '</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="se">\r</span><span class="s1">'</span><span class="p">,</span> <span class="s1">' '</span><span class="p">)</span> + <span class="n">html_str</span> <span class="o">=</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">html_str</span><span class="o">.</span><span class="n">split</span><span class="p">())</span> + <span class="n">s</span> <span class="o">=</span> <span class="n">_HTMLTextExtractor</span><span class="p">()</span> + <span class="k">try</span><span class="p">:</span> + <span class="n">s</span><span class="o">.</span><span class="n">feed</span><span class="p">(</span><span class="n">html_str</span><span class="p">)</span> + <span class="k">except</span> <span class="ne">AssertionError</span><span class="p">:</span> + <span class="n">s</span> <span class="o">=</span> <span class="n">_HTMLTextExtractor</span><span class="p">()</span> + <span class="n">s</span><span class="o">.</span><span class="n">feed</span><span class="p">(</span><span class="n">escape</span><span class="p">(</span><span class="n">html_str</span><span class="p">,</span> <span class="n">quote</span><span class="o">=</span><span class="kc">True</span><span class="p">))</span> + <span class="k">except</span> <span class="n">_HTMLTextExtractorException</span><span class="p">:</span> + <span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"HTMLTextExtractor: invalid HTML</span><span class="se">\n</span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">html_str</span><span class="p">)</span> + <span class="k">return</span> <span class="n">s</span><span class="o">.</span><span class="n">get_text</span><span class="p">()</span></div> + + + +<div class="viewcode-block" id="markdown_to_text"> +<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.markdown_to_text">[docs]</a> +<span class="k">def</span> <span class="nf">markdown_to_text</span><span class="p">(</span><span class="n">markdown_str</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Extract text from a Markdown string</span> + +<span class="sd"> Args:</span> +<span class="sd"> * markdown_str (str): string Markdown</span> + +<span class="sd"> Returns:</span> +<span class="sd"> * str: extracted text</span> + +<span class="sd"> Examples:</span> +<span class="sd"> >>> markdown_to_text('[example](https://example.com)')</span> +<span class="sd"> 'example'</span> + +<span class="sd"> >>> markdown_to_text('## Headline')</span> +<span class="sd"> 'Headline'</span> +<span class="sd"> """</span> + + <span class="n">html_str</span> <span class="o">=</span> <span class="p">(</span> + <span class="n">MarkdownIt</span><span class="p">(</span><span class="s2">"commonmark"</span><span class="p">,</span> <span class="p">{</span><span class="s2">"typographer"</span><span class="p">:</span> <span class="kc">True</span><span class="p">})</span><span class="o">.</span><span class="n">enable</span><span class="p">([</span><span class="s2">"replacements"</span><span class="p">,</span> <span class="s2">"smartquotes"</span><span class="p">])</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">markdown_str</span><span class="p">)</span> + <span class="p">)</span> + <span class="k">return</span> <span class="n">html_to_text</span><span class="p">(</span><span class="n">html_str</span><span class="p">)</span></div> + + + +<div class="viewcode-block" id="extract_text"> +<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.extract_text">[docs]</a> +<span class="k">def</span> <span class="nf">extract_text</span><span class="p">(</span><span class="n">xpath_results</span><span class="p">,</span> <span class="n">allow_none</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-></span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span> +<span class="w"> </span><span class="sd">"""Extract text from a lxml result</span> + +<span class="sd"> * if xpath_results is list, extract the text from each result and concat the list</span> +<span class="sd"> * if xpath_results is a xml element, extract all the text node from it</span> +<span class="sd"> ( text_content() method from lxml )</span> +<span class="sd"> * if xpath_results is a string element, then it's already done</span> +<span class="sd"> """</span> + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">xpath_results</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span> + <span class="c1"># it's list of result : concat everything using recursive call</span> + <span class="n">result</span> <span class="o">=</span> <span class="s1">''</span> + <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">xpath_results</span><span class="p">:</span> + <span class="n">result</span> <span class="o">=</span> <span class="n">result</span> <span class="o">+</span> <span class="p">(</span><span class="n">extract_text</span><span class="p">(</span><span class="n">e</span><span class="p">)</span> <span class="ow">or</span> <span class="s1">''</span><span class="p">)</span> + <span class="k">return</span> <span class="n">result</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span> + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">xpath_results</span><span class="p">,</span> <span class="n">ElementBase</span><span class="p">):</span> + <span class="c1"># it's a element</span> + <span class="n">text</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">tostring</span><span class="p">(</span><span class="n">xpath_results</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">'unicode'</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="s1">'text'</span><span class="p">,</span> <span class="n">with_tail</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> + <span class="n">text</span> <span class="o">=</span> <span class="n">text</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="se">\n</span><span class="s1">'</span><span class="p">,</span> <span class="s1">' '</span><span class="p">)</span> + <span class="k">return</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">text</span><span class="o">.</span><span class="n">split</span><span class="p">())</span> + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">xpath_results</span><span class="p">,</span> <span class="p">(</span><span class="nb">str</span><span class="p">,</span> <span class="n">Number</span><span class="p">,</span> <span class="nb">bool</span><span class="p">)):</span> + <span class="k">return</span> <span class="nb">str</span><span class="p">(</span><span class="n">xpath_results</span><span class="p">)</span> + <span class="k">if</span> <span class="n">xpath_results</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">allow_none</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">None</span> + <span class="k">if</span> <span class="n">xpath_results</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">allow_none</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'extract_text(None, allow_none=False)'</span><span class="p">)</span> + <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'unsupported type'</span><span class="p">)</span></div> + + + +<div class="viewcode-block" id="normalize_url"> +<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.normalize_url">[docs]</a> +<span class="k">def</span> <span class="nf">normalize_url</span><span class="p">(</span><span class="n">url</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">base_url</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Normalize URL: add protocol, join URL with base_url, add trailing slash if there is no path</span> + +<span class="sd"> Args:</span> +<span class="sd"> * url (str): Relative URL</span> +<span class="sd"> * base_url (str): Base URL, it must be an absolute URL.</span> + +<span class="sd"> Example:</span> +<span class="sd"> >>> normalize_url('https://example.com', 'http://example.com/')</span> +<span class="sd"> 'https://example.com/'</span> +<span class="sd"> >>> normalize_url('//example.com', 'http://example.com/')</span> +<span class="sd"> 'http://example.com/'</span> +<span class="sd"> >>> normalize_url('//example.com', 'https://example.com/')</span> +<span class="sd"> 'https://example.com/'</span> +<span class="sd"> >>> normalize_url('/path?a=1', 'https://example.com')</span> +<span class="sd"> 'https://example.com/path?a=1'</span> +<span class="sd"> >>> normalize_url('', 'https://example.com')</span> +<span class="sd"> 'https://example.com/'</span> +<span class="sd"> >>> normalize_url('/test', '/path')</span> +<span class="sd"> raise ValueError</span> + +<span class="sd"> Raises:</span> +<span class="sd"> * lxml.etree.ParserError</span> + +<span class="sd"> Returns:</span> +<span class="sd"> * str: normalized URL</span> +<span class="sd"> """</span> + <span class="k">if</span> <span class="n">url</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'//'</span><span class="p">):</span> + <span class="c1"># add http or https to this kind of url //example.com/</span> + <span class="n">parsed_search_url</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">base_url</span><span class="p">)</span> + <span class="n">url</span> <span class="o">=</span> <span class="s1">'</span><span class="si">{0}</span><span class="s1">:</span><span class="si">{1}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">parsed_search_url</span><span class="o">.</span><span class="n">scheme</span> <span class="ow">or</span> <span class="s1">'http'</span><span class="p">,</span> <span class="n">url</span><span class="p">)</span> + <span class="k">elif</span> <span class="n">url</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'/'</span><span class="p">):</span> + <span class="c1"># fix relative url to the search engine</span> + <span class="n">url</span> <span class="o">=</span> <span class="n">urljoin</span><span class="p">(</span><span class="n">base_url</span><span class="p">,</span> <span class="n">url</span><span class="p">)</span> + + <span class="c1"># fix relative urls that fall through the crack</span> + <span class="k">if</span> <span class="s1">'://'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">url</span><span class="p">:</span> + <span class="n">url</span> <span class="o">=</span> <span class="n">urljoin</span><span class="p">(</span><span class="n">base_url</span><span class="p">,</span> <span class="n">url</span><span class="p">)</span> + + <span class="n">parsed_url</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">url</span><span class="p">)</span> + + <span class="c1"># add a / at this end of the url if there is no path</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">netloc</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'Cannot parse url'</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">path</span><span class="p">:</span> + <span class="n">url</span> <span class="o">+=</span> <span class="s1">'/'</span> + + <span class="k">return</span> <span class="n">url</span></div> + + + +<div class="viewcode-block" id="extract_url"> +<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.extract_url">[docs]</a> +<span class="k">def</span> <span class="nf">extract_url</span><span class="p">(</span><span class="n">xpath_results</span><span class="p">,</span> <span class="n">base_url</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Extract and normalize URL from lxml Element</span> + +<span class="sd"> Args:</span> +<span class="sd"> * xpath_results (Union[List[html.HtmlElement], html.HtmlElement]): lxml Element(s)</span> +<span class="sd"> * base_url (str): Base URL</span> + +<span class="sd"> Example:</span> +<span class="sd"> >>> def f(s, search_url):</span> +<span class="sd"> >>> return searx.utils.extract_url(html.fromstring(s), search_url)</span> +<span class="sd"> >>> f('<span id="42">https://example.com</span>', 'http://example.com/')</span> +<span class="sd"> 'https://example.com/'</span> +<span class="sd"> >>> f('https://example.com', 'http://example.com/')</span> +<span class="sd"> 'https://example.com/'</span> +<span class="sd"> >>> f('//example.com', 'http://example.com/')</span> +<span class="sd"> 'http://example.com/'</span> +<span class="sd"> >>> f('//example.com', 'https://example.com/')</span> +<span class="sd"> 'https://example.com/'</span> +<span class="sd"> >>> f('/path?a=1', 'https://example.com')</span> +<span class="sd"> 'https://example.com/path?a=1'</span> +<span class="sd"> >>> f('', 'https://example.com')</span> +<span class="sd"> raise lxml.etree.ParserError</span> +<span class="sd"> >>> searx.utils.extract_url([], 'https://example.com')</span> +<span class="sd"> raise ValueError</span> + +<span class="sd"> Raises:</span> +<span class="sd"> * ValueError</span> +<span class="sd"> * lxml.etree.ParserError</span> + +<span class="sd"> Returns:</span> +<span class="sd"> * str: normalized URL</span> +<span class="sd"> """</span> + <span class="k">if</span> <span class="n">xpath_results</span> <span class="o">==</span> <span class="p">[]:</span> + <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'Empty url resultset'</span><span class="p">)</span> + + <span class="n">url</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">xpath_results</span><span class="p">)</span> + <span class="k">if</span> <span class="n">url</span><span class="p">:</span> + <span class="k">return</span> <span class="n">normalize_url</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">base_url</span><span class="p">)</span> + <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'URL not found'</span><span class="p">)</span></div> + + + +<div class="viewcode-block" id="dict_subset"> +<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.dict_subset">[docs]</a> +<span class="k">def</span> <span class="nf">dict_subset</span><span class="p">(</span><span class="n">dictionary</span><span class="p">:</span> <span class="n">MutableMapping</span><span class="p">,</span> <span class="n">properties</span><span class="p">:</span> <span class="n">Set</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="n">Dict</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Extract a subset of a dict</span> + +<span class="sd"> Examples:</span> +<span class="sd"> >>> dict_subset({'A': 'a', 'B': 'b', 'C': 'c'}, ['A', 'C'])</span> +<span class="sd"> {'A': 'a', 'C': 'c'}</span> +<span class="sd"> >>> >> dict_subset({'A': 'a', 'B': 'b', 'C': 'c'}, ['A', 'D'])</span> +<span class="sd"> {'A': 'a'}</span> +<span class="sd"> """</span> + <span class="k">return</span> <span class="p">{</span><span class="n">k</span><span class="p">:</span> <span class="n">dictionary</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> <span class="k">for</span> <span class="n">k</span> <span class="ow">in</span> <span class="n">properties</span> <span class="k">if</span> <span class="n">k</span> <span class="ow">in</span> <span class="n">dictionary</span><span class="p">}</span></div> + + + +<div class="viewcode-block" id="humanize_bytes"> +<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.humanize_bytes">[docs]</a> +<span class="k">def</span> <span class="nf">humanize_bytes</span><span class="p">(</span><span class="n">size</span><span class="p">,</span> <span class="n">precision</span><span class="o">=</span><span class="mi">2</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Determine the *human readable* value of bytes on 1024 base (1KB=1024B)."""</span> + <span class="n">s</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'B '</span><span class="p">,</span> <span class="s1">'KB'</span><span class="p">,</span> <span class="s1">'MB'</span><span class="p">,</span> <span class="s1">'GB'</span><span class="p">,</span> <span class="s1">'TB'</span><span class="p">]</span> + + <span class="n">x</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">s</span><span class="p">)</span> + <span class="n">p</span> <span class="o">=</span> <span class="mi">0</span> + <span class="k">while</span> <span class="n">size</span> <span class="o">></span> <span class="mi">1024</span> <span class="ow">and</span> <span class="n">p</span> <span class="o"><</span> <span class="n">x</span><span class="p">:</span> + <span class="n">p</span> <span class="o">+=</span> <span class="mi">1</span> + <span class="n">size</span> <span class="o">=</span> <span class="n">size</span> <span class="o">/</span> <span class="mf">1024.0</span> + <span class="k">return</span> <span class="s2">"</span><span class="si">%.*f</span><span class="s2"> </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">precision</span><span class="p">,</span> <span class="n">size</span><span class="p">,</span> <span class="n">s</span><span class="p">[</span><span class="n">p</span><span class="p">])</span></div> + + + +<div class="viewcode-block" id="humanize_number"> +<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.humanize_number">[docs]</a> +<span class="k">def</span> <span class="nf">humanize_number</span><span class="p">(</span><span class="n">size</span><span class="p">,</span> <span class="n">precision</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Determine the *human readable* value of a decimal number."""</span> + <span class="n">s</span> <span class="o">=</span> <span class="p">[</span><span class="s1">''</span><span class="p">,</span> <span class="s1">'K'</span><span class="p">,</span> <span class="s1">'M'</span><span class="p">,</span> <span class="s1">'B'</span><span class="p">,</span> <span class="s1">'T'</span><span class="p">]</span> + + <span class="n">x</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">s</span><span class="p">)</span> + <span class="n">p</span> <span class="o">=</span> <span class="mi">0</span> + <span class="k">while</span> <span class="n">size</span> <span class="o">></span> <span class="mi">1000</span> <span class="ow">and</span> <span class="n">p</span> <span class="o"><</span> <span class="n">x</span><span class="p">:</span> + <span class="n">p</span> <span class="o">+=</span> <span class="mi">1</span> + <span class="n">size</span> <span class="o">=</span> <span class="n">size</span> <span class="o">/</span> <span class="mf">1000.0</span> + <span class="k">return</span> <span class="s2">"</span><span class="si">%.*f%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">precision</span><span class="p">,</span> <span class="n">size</span><span class="p">,</span> <span class="n">s</span><span class="p">[</span><span class="n">p</span><span class="p">])</span></div> + + + +<div class="viewcode-block" id="convert_str_to_int"> +<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.convert_str_to_int">[docs]</a> +<span class="k">def</span> <span class="nf">convert_str_to_int</span><span class="p">(</span><span class="n">number_str</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Convert number_str to int or 0 if number_str is not a number."""</span> + <span class="k">if</span> <span class="n">number_str</span><span class="o">.</span><span class="n">isdigit</span><span class="p">():</span> + <span class="k">return</span> <span class="nb">int</span><span class="p">(</span><span class="n">number_str</span><span class="p">)</span> + <span class="k">return</span> <span class="mi">0</span></div> + + + +<div class="viewcode-block" id="extr"> +<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.extr">[docs]</a> +<span class="k">def</span> <span class="nf">extr</span><span class="p">(</span><span class="n">txt</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">begin</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">end</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">default</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">""</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Extract the string between ``begin`` and ``end`` from ``txt``</span> + +<span class="sd"> :param txt: String to search in</span> +<span class="sd"> :param begin: First string to be searched for</span> +<span class="sd"> :param end: Second string to be searched for after ``begin``</span> +<span class="sd"> :param default: Default value if one of ``begin`` or ``end`` is not</span> +<span class="sd"> found. Defaults to an empty string.</span> +<span class="sd"> :return: The string between the two search-strings ``begin`` and ``end``.</span> +<span class="sd"> If at least one of ``begin`` or ``end`` is not found, the value of</span> +<span class="sd"> ``default`` is returned.</span> + +<span class="sd"> Examples:</span> +<span class="sd"> >>> extr("abcde", "a", "e")</span> +<span class="sd"> "bcd"</span> +<span class="sd"> >>> extr("abcde", "a", "z", deafult="nothing")</span> +<span class="sd"> "nothing"</span> + +<span class="sd"> """</span> + + <span class="c1"># From https://github.com/mikf/gallery-dl/blob/master/gallery_dl/text.py#L129</span> + + <span class="k">try</span><span class="p">:</span> + <span class="n">first</span> <span class="o">=</span> <span class="n">txt</span><span class="o">.</span><span class="n">index</span><span class="p">(</span><span class="n">begin</span><span class="p">)</span> <span class="o">+</span> <span class="nb">len</span><span class="p">(</span><span class="n">begin</span><span class="p">)</span> + <span class="k">return</span> <span class="n">txt</span><span class="p">[</span><span class="n">first</span> <span class="p">:</span> <span class="n">txt</span><span class="o">.</span><span class="n">index</span><span class="p">(</span><span class="n">end</span><span class="p">,</span> <span class="n">first</span><span class="p">)]</span> + <span class="k">except</span> <span class="ne">ValueError</span><span class="p">:</span> + <span class="k">return</span> <span class="n">default</span></div> + + + +<div class="viewcode-block" id="int_or_zero"> +<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.int_or_zero">[docs]</a> +<span class="k">def</span> <span class="nf">int_or_zero</span><span class="p">(</span><span class="n">num</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Convert num to int or 0. num can be either a str or a list.</span> +<span class="sd"> If num is a list, the first element is converted to int (or return 0 if the list is empty).</span> +<span class="sd"> If num is a str, see convert_str_to_int</span> +<span class="sd"> """</span> + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">num</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span> + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">num</span><span class="p">)</span> <span class="o"><</span> <span class="mi">1</span><span class="p">:</span> + <span class="k">return</span> <span class="mi">0</span> + <span class="n">num</span> <span class="o">=</span> <span class="n">num</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> + <span class="k">return</span> <span class="n">convert_str_to_int</span><span class="p">(</span><span class="n">num</span><span class="p">)</span></div> + + + +<div class="viewcode-block" id="is_valid_lang"> +<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.is_valid_lang">[docs]</a> +<span class="k">def</span> <span class="nf">is_valid_lang</span><span class="p">(</span><span class="n">lang</span><span class="p">)</span> <span class="o">-></span> <span class="n">Optional</span><span class="p">[</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]:</span> +<span class="w"> </span><span class="sd">"""Return language code and name if lang describe a language.</span> + +<span class="sd"> Examples:</span> +<span class="sd"> >>> is_valid_lang('zz')</span> +<span class="sd"> None</span> +<span class="sd"> >>> is_valid_lang('uk')</span> +<span class="sd"> (True, 'uk', 'ukrainian')</span> +<span class="sd"> >>> is_valid_lang(b'uk')</span> +<span class="sd"> (True, 'uk', 'ukrainian')</span> +<span class="sd"> >>> is_valid_lang('en')</span> +<span class="sd"> (True, 'en', 'english')</span> +<span class="sd"> >>> searx.utils.is_valid_lang('Español')</span> +<span class="sd"> (True, 'es', 'spanish')</span> +<span class="sd"> >>> searx.utils.is_valid_lang('Spanish')</span> +<span class="sd"> (True, 'es', 'spanish')</span> +<span class="sd"> """</span> + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">lang</span><span class="p">,</span> <span class="nb">bytes</span><span class="p">):</span> + <span class="n">lang</span> <span class="o">=</span> <span class="n">lang</span><span class="o">.</span><span class="n">decode</span><span class="p">()</span> + <span class="n">is_abbr</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">lang</span><span class="p">)</span> <span class="o">==</span> <span class="mi">2</span> + <span class="n">lang</span> <span class="o">=</span> <span class="n">lang</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> + <span class="k">if</span> <span class="n">is_abbr</span><span class="p">:</span> + <span class="k">for</span> <span class="n">l</span> <span class="ow">in</span> <span class="n">sxng_locales</span><span class="p">:</span> + <span class="k">if</span> <span class="n">l</span><span class="p">[</span><span class="mi">0</span><span class="p">][:</span><span class="mi">2</span><span class="p">]</span> <span class="o">==</span> <span class="n">lang</span><span class="p">:</span> + <span class="k">return</span> <span class="p">(</span><span class="kc">True</span><span class="p">,</span> <span class="n">l</span><span class="p">[</span><span class="mi">0</span><span class="p">][:</span><span class="mi">2</span><span class="p">],</span> <span class="n">l</span><span class="p">[</span><span class="mi">3</span><span class="p">]</span><span class="o">.</span><span class="n">lower</span><span class="p">())</span> + <span class="k">return</span> <span class="kc">None</span> + <span class="k">for</span> <span class="n">l</span> <span class="ow">in</span> <span class="n">sxng_locales</span><span class="p">:</span> + <span class="k">if</span> <span class="n">l</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="o">==</span> <span class="n">lang</span> <span class="ow">or</span> <span class="n">l</span><span class="p">[</span><span class="mi">3</span><span class="p">]</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="o">==</span> <span class="n">lang</span><span class="p">:</span> + <span class="k">return</span> <span class="p">(</span><span class="kc">True</span><span class="p">,</span> <span class="n">l</span><span class="p">[</span><span class="mi">0</span><span class="p">][:</span><span class="mi">2</span><span class="p">],</span> <span class="n">l</span><span class="p">[</span><span class="mi">3</span><span class="p">]</span><span class="o">.</span><span class="n">lower</span><span class="p">())</span> + <span class="k">return</span> <span class="kc">None</span></div> + + + +<span class="k">def</span> <span class="nf">load_module</span><span class="p">(</span><span class="n">filename</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">module_dir</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">types</span><span class="o">.</span><span class="n">ModuleType</span><span class="p">:</span> + <span class="n">modname</span> <span class="o">=</span> <span class="n">splitext</span><span class="p">(</span><span class="n">filename</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span> + <span class="n">modpath</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">module_dir</span><span class="p">,</span> <span class="n">filename</span><span class="p">)</span> + <span class="c1"># and https://docs.python.org/3/library/importlib.html#importing-a-source-file-directly</span> + <span class="n">spec</span> <span class="o">=</span> <span class="n">importlib</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">spec_from_file_location</span><span class="p">(</span><span class="n">modname</span><span class="p">,</span> <span class="n">modpath</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">spec</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Error loading '</span><span class="si">{</span><span class="n">modpath</span><span class="si">}</span><span class="s2">' module"</span><span class="p">)</span> + <span class="n">module</span> <span class="o">=</span> <span class="n">importlib</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">module_from_spec</span><span class="p">(</span><span class="n">spec</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">spec</span><span class="o">.</span><span class="n">loader</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Error loading '</span><span class="si">{</span><span class="n">modpath</span><span class="si">}</span><span class="s2">' module"</span><span class="p">)</span> + <span class="n">spec</span><span class="o">.</span><span class="n">loader</span><span class="o">.</span><span class="n">exec_module</span><span class="p">(</span><span class="n">module</span><span class="p">)</span> + <span class="k">return</span> <span class="n">module</span> + + +<div class="viewcode-block" id="to_string"> +<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.to_string">[docs]</a> +<span class="k">def</span> <span class="nf">to_string</span><span class="p">(</span><span class="n">obj</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Convert obj to its string representation."""</span> + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span> + <span class="k">return</span> <span class="n">obj</span> + <span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span> <span class="s1">'__str__'</span><span class="p">):</span> + <span class="k">return</span> <span class="nb">str</span><span class="p">(</span><span class="n">obj</span><span class="p">)</span> + <span class="k">return</span> <span class="nb">repr</span><span class="p">(</span><span class="n">obj</span><span class="p">)</span></div> + + + +<div class="viewcode-block" id="ecma_unescape"> +<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.ecma_unescape">[docs]</a> +<span class="k">def</span> <span class="nf">ecma_unescape</span><span class="p">(</span><span class="n">string</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Python implementation of the unescape javascript function</span> + +<span class="sd"> https://www.ecma-international.org/ecma-262/6.0/#sec-unescape-string</span> +<span class="sd"> https://developer.mozilla.org/fr/docs/Web/JavaScript/Reference/Objets_globaux/unescape</span> + +<span class="sd"> Examples:</span> +<span class="sd"> >>> ecma_unescape('%u5409')</span> +<span class="sd"> '吉'</span> +<span class="sd"> >>> ecma_unescape('%20')</span> +<span class="sd"> ' '</span> +<span class="sd"> >>> ecma_unescape('%F3')</span> +<span class="sd"> 'ó'</span> +<span class="sd"> """</span> + <span class="c1"># "%u5409" becomes "吉"</span> + <span class="n">string</span> <span class="o">=</span> <span class="n">_ECMA_UNESCAPE4_RE</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="k">lambda</span> <span class="n">e</span><span class="p">:</span> <span class="nb">chr</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">e</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">1</span><span class="p">),</span> <span class="mi">16</span><span class="p">)),</span> <span class="n">string</span><span class="p">)</span> + <span class="c1"># "%20" becomes " ", "%F3" becomes "ó"</span> + <span class="n">string</span> <span class="o">=</span> <span class="n">_ECMA_UNESCAPE2_RE</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="k">lambda</span> <span class="n">e</span><span class="p">:</span> <span class="nb">chr</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">e</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">1</span><span class="p">),</span> <span class="mi">16</span><span class="p">)),</span> <span class="n">string</span><span class="p">)</span> + <span class="k">return</span> <span class="n">string</span></div> + + + +<span class="k">def</span> <span class="nf">get_string_replaces_function</span><span class="p">(</span><span class="n">replaces</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="n">Callable</span><span class="p">[[</span><span class="nb">str</span><span class="p">],</span> <span class="nb">str</span><span class="p">]:</span> + <span class="n">rep</span> <span class="o">=</span> <span class="p">{</span><span class="n">re</span><span class="o">.</span><span class="n">escape</span><span class="p">(</span><span class="n">k</span><span class="p">):</span> <span class="n">v</span> <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">replaces</span><span class="o">.</span><span class="n">items</span><span class="p">()}</span> + <span class="n">pattern</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s2">"|"</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">rep</span><span class="o">.</span><span class="n">keys</span><span class="p">()))</span> + + <span class="k">def</span> <span class="nf">func</span><span class="p">(</span><span class="n">text</span><span class="p">):</span> + <span class="k">return</span> <span class="n">pattern</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="k">lambda</span> <span class="n">m</span><span class="p">:</span> <span class="n">rep</span><span class="p">[</span><span class="n">re</span><span class="o">.</span><span class="n">escape</span><span class="p">(</span><span class="n">m</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">0</span><span class="p">))],</span> <span class="n">text</span><span class="p">)</span> + + <span class="k">return</span> <span class="n">func</span> + + +<div class="viewcode-block" id="get_engine_from_settings"> +<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.get_engine_from_settings">[docs]</a> +<span class="k">def</span> <span class="nf">get_engine_from_settings</span><span class="p">(</span><span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">Dict</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Return engine configuration from settings.yml of a given engine name"""</span> + + <span class="k">if</span> <span class="s1">'engines'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">settings</span><span class="p">:</span> + <span class="k">return</span> <span class="p">{}</span> + + <span class="k">for</span> <span class="n">engine</span> <span class="ow">in</span> <span class="n">settings</span><span class="p">[</span><span class="s1">'engines'</span><span class="p">]:</span> + <span class="k">if</span> <span class="s1">'name'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">engine</span><span class="p">:</span> + <span class="k">continue</span> + <span class="k">if</span> <span class="n">name</span> <span class="o">==</span> <span class="n">engine</span><span class="p">[</span><span class="s1">'name'</span><span class="p">]:</span> + <span class="k">return</span> <span class="n">engine</span> + + <span class="k">return</span> <span class="p">{}</span></div> + + + +<div class="viewcode-block" id="get_xpath"> +<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.get_xpath">[docs]</a> +<span class="k">def</span> <span class="nf">get_xpath</span><span class="p">(</span><span class="n">xpath_spec</span><span class="p">:</span> <span class="n">XPathSpecType</span><span class="p">)</span> <span class="o">-></span> <span class="n">XPath</span><span class="p">:</span> +<span class="w"> </span><span class="sd">"""Return cached compiled XPath</span> + +<span class="sd"> There is no thread lock.</span> +<span class="sd"> Worst case scenario, xpath_str is compiled more than one time.</span> + +<span class="sd"> Args:</span> +<span class="sd"> * xpath_spec (str|lxml.etree.XPath): XPath as a str or lxml.etree.XPath</span> + +<span class="sd"> Returns:</span> +<span class="sd"> * result (bool, float, list, str): Results.</span> + +<span class="sd"> Raises:</span> +<span class="sd"> * TypeError: Raise when xpath_spec is neither a str nor a lxml.etree.XPath</span> +<span class="sd"> * SearxXPathSyntaxException: Raise when there is a syntax error in the XPath</span> +<span class="sd"> """</span> + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">xpath_spec</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span> + <span class="n">result</span> <span class="o">=</span> <span class="n">_XPATH_CACHE</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">xpath_spec</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> + <span class="k">if</span> <span class="n">result</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="k">try</span><span class="p">:</span> + <span class="n">result</span> <span class="o">=</span> <span class="n">XPath</span><span class="p">(</span><span class="n">xpath_spec</span><span class="p">)</span> + <span class="k">except</span> <span class="n">XPathSyntaxError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> + <span class="k">raise</span> <span class="n">SearxXPathSyntaxException</span><span class="p">(</span><span class="n">xpath_spec</span><span class="p">,</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="o">.</span><span class="n">msg</span><span class="p">))</span> <span class="kn">from</span> <span class="nn">e</span> + <span class="n">_XPATH_CACHE</span><span class="p">[</span><span class="n">xpath_spec</span><span class="p">]</span> <span class="o">=</span> <span class="n">result</span> + <span class="k">return</span> <span class="n">result</span> + + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">xpath_spec</span><span class="p">,</span> <span class="n">XPath</span><span class="p">):</span> + <span class="k">return</span> <span class="n">xpath_spec</span> + + <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s1">'xpath_spec must be either a str or a lxml.etree.XPath'</span><span class="p">)</span></div> + + + +<div class="viewcode-block" id="eval_xpath"> +<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.eval_xpath">[docs]</a> +<span class="k">def</span> <span class="nf">eval_xpath</span><span class="p">(</span><span class="n">element</span><span class="p">:</span> <span class="n">ElementBase</span><span class="p">,</span> <span class="n">xpath_spec</span><span class="p">:</span> <span class="n">XPathSpecType</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Equivalent of element.xpath(xpath_str) but compile xpath_str once for all.</span> +<span class="sd"> See https://lxml.de/xpathxslt.html#xpath-return-values</span> + +<span class="sd"> Args:</span> +<span class="sd"> * element (ElementBase): [description]</span> +<span class="sd"> * xpath_spec (str|lxml.etree.XPath): XPath as a str or lxml.etree.XPath</span> + +<span class="sd"> Returns:</span> +<span class="sd"> * result (bool, float, list, str): Results.</span> + +<span class="sd"> Raises:</span> +<span class="sd"> * TypeError: Raise when xpath_spec is neither a str nor a lxml.etree.XPath</span> +<span class="sd"> * SearxXPathSyntaxException: Raise when there is a syntax error in the XPath</span> +<span class="sd"> * SearxEngineXPathException: Raise when the XPath can't be evaluated.</span> +<span class="sd"> """</span> + <span class="n">xpath</span> <span class="o">=</span> <span class="n">get_xpath</span><span class="p">(</span><span class="n">xpath_spec</span><span class="p">)</span> + <span class="k">try</span><span class="p">:</span> + <span class="k">return</span> <span class="n">xpath</span><span class="p">(</span><span class="n">element</span><span class="p">)</span> + <span class="k">except</span> <span class="n">XPathError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> + <span class="n">arg</span> <span class="o">=</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="nb">str</span><span class="p">(</span><span class="n">i</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">e</span><span class="o">.</span><span class="n">args</span><span class="p">])</span> + <span class="k">raise</span> <span class="n">SearxEngineXPathException</span><span class="p">(</span><span class="n">xpath_spec</span><span class="p">,</span> <span class="n">arg</span><span class="p">)</span> <span class="kn">from</span> <span class="nn">e</span></div> + + + +<div class="viewcode-block" id="eval_xpath_list"> +<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.eval_xpath_list">[docs]</a> +<span class="k">def</span> <span class="nf">eval_xpath_list</span><span class="p">(</span><span class="n">element</span><span class="p">:</span> <span class="n">ElementBase</span><span class="p">,</span> <span class="n">xpath_spec</span><span class="p">:</span> <span class="n">XPathSpecType</span><span class="p">,</span> <span class="n">min_len</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Same as eval_xpath, check if the result is a list</span> + +<span class="sd"> Args:</span> +<span class="sd"> * element (ElementBase): [description]</span> +<span class="sd"> * xpath_spec (str|lxml.etree.XPath): XPath as a str or lxml.etree.XPath</span> +<span class="sd"> * min_len (int, optional): [description]. Defaults to None.</span> + +<span class="sd"> Raises:</span> +<span class="sd"> * TypeError: Raise when xpath_spec is neither a str nor a lxml.etree.XPath</span> +<span class="sd"> * SearxXPathSyntaxException: Raise when there is a syntax error in the XPath</span> +<span class="sd"> * SearxEngineXPathException: raise if the result is not a list</span> + +<span class="sd"> Returns:</span> +<span class="sd"> * result (bool, float, list, str): Results.</span> +<span class="sd"> """</span> + <span class="n">result</span> <span class="o">=</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">element</span><span class="p">,</span> <span class="n">xpath_spec</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span> + <span class="k">raise</span> <span class="n">SearxEngineXPathException</span><span class="p">(</span><span class="n">xpath_spec</span><span class="p">,</span> <span class="s1">'the result is not a list'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">min_len</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">min_len</span> <span class="o">></span> <span class="nb">len</span><span class="p">(</span><span class="n">result</span><span class="p">):</span> + <span class="k">raise</span> <span class="n">SearxEngineXPathException</span><span class="p">(</span><span class="n">xpath_spec</span><span class="p">,</span> <span class="s1">'len(xpath_str) < '</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">min_len</span><span class="p">))</span> + <span class="k">return</span> <span class="n">result</span></div> + + + +<div class="viewcode-block" id="eval_xpath_getindex"> +<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.eval_xpath_getindex">[docs]</a> +<span class="k">def</span> <span class="nf">eval_xpath_getindex</span><span class="p">(</span><span class="n">elements</span><span class="p">:</span> <span class="n">ElementBase</span><span class="p">,</span> <span class="n">xpath_spec</span><span class="p">:</span> <span class="n">XPathSpecType</span><span class="p">,</span> <span class="n">index</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="n">_NOTSET</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Call eval_xpath_list then get one element using the index parameter.</span> +<span class="sd"> If the index does not exist, either raise an exception is default is not set,</span> +<span class="sd"> other return the default value (can be None).</span> + +<span class="sd"> Args:</span> +<span class="sd"> * elements (ElementBase): lxml element to apply the xpath.</span> +<span class="sd"> * xpath_spec (str|lxml.etree.XPath): XPath as a str or lxml.etree.XPath.</span> +<span class="sd"> * index (int): index to get</span> +<span class="sd"> * default (Object, optional): Defaults if index doesn't exist.</span> + +<span class="sd"> Raises:</span> +<span class="sd"> * TypeError: Raise when xpath_spec is neither a str nor a lxml.etree.XPath</span> +<span class="sd"> * SearxXPathSyntaxException: Raise when there is a syntax error in the XPath</span> +<span class="sd"> * SearxEngineXPathException: if the index is not found. Also see eval_xpath.</span> + +<span class="sd"> Returns:</span> +<span class="sd"> * result (bool, float, list, str): Results.</span> +<span class="sd"> """</span> + <span class="n">result</span> <span class="o">=</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">elements</span><span class="p">,</span> <span class="n">xpath_spec</span><span class="p">)</span> + <span class="k">if</span> <span class="o">-</span><span class="nb">len</span><span class="p">(</span><span class="n">result</span><span class="p">)</span> <span class="o"><=</span> <span class="n">index</span> <span class="o"><</span> <span class="nb">len</span><span class="p">(</span><span class="n">result</span><span class="p">):</span> + <span class="k">return</span> <span class="n">result</span><span class="p">[</span><span class="n">index</span><span class="p">]</span> + <span class="k">if</span> <span class="n">default</span> <span class="o">==</span> <span class="n">_NOTSET</span><span class="p">:</span> + <span class="c1"># raise an SearxEngineXPathException instead of IndexError</span> + <span class="c1"># to record xpath_spec</span> + <span class="k">raise</span> <span class="n">SearxEngineXPathException</span><span class="p">(</span><span class="n">xpath_spec</span><span class="p">,</span> <span class="s1">'index '</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">index</span><span class="p">)</span> <span class="o">+</span> <span class="s1">' not found'</span><span class="p">)</span> + <span class="k">return</span> <span class="n">default</span></div> + + + +<span class="k">def</span> <span class="nf">_get_fasttext_model</span><span class="p">()</span> <span class="o">-></span> <span class="s2">"fasttext.FastText._FastText"</span><span class="p">:</span> <span class="c1"># type: ignore</span> + <span class="k">global</span> <span class="n">_FASTTEXT_MODEL</span> <span class="c1"># pylint: disable=global-statement</span> + <span class="k">if</span> <span class="n">_FASTTEXT_MODEL</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> + <span class="kn">import</span> <span class="nn">fasttext</span> <span class="c1"># pylint: disable=import-outside-toplevel</span> + + <span class="c1"># Monkey patch: prevent fasttext from showing a (useless) warning when loading a model.</span> + <span class="n">fasttext</span><span class="o">.</span><span class="n">FastText</span><span class="o">.</span><span class="n">eprint</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="kc">None</span> + <span class="n">_FASTTEXT_MODEL</span> <span class="o">=</span> <span class="n">fasttext</span><span class="o">.</span><span class="n">load_model</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">data_dir</span> <span class="o">/</span> <span class="s1">'lid.176.ftz'</span><span class="p">))</span> + <span class="k">return</span> <span class="n">_FASTTEXT_MODEL</span> + + +<div class="viewcode-block" id="get_embeded_stream_url"> +<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.get_embeded_stream_url">[docs]</a> +<span class="k">def</span> <span class="nf">get_embeded_stream_url</span><span class="p">(</span><span class="n">url</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""</span> +<span class="sd"> Converts a standard video URL into its embed format. Supported services include Youtube,</span> +<span class="sd"> Facebook, Instagram, TikTok, and Dailymotion.</span> +<span class="sd"> """</span> + <span class="n">parsed_url</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">url</span><span class="p">)</span> + <span class="n">iframe_src</span> <span class="o">=</span> <span class="kc">None</span> + + <span class="c1"># YouTube</span> + <span class="k">if</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">netloc</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'www.youtube.com'</span><span class="p">,</span> <span class="s1">'youtube.com'</span><span class="p">]</span> <span class="ow">and</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">path</span> <span class="o">==</span> <span class="s1">'/watch'</span> <span class="ow">and</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">query</span><span class="p">:</span> + <span class="n">video_id</span> <span class="o">=</span> <span class="n">parse_qs</span><span class="p">(</span><span class="n">parsed_url</span><span class="o">.</span><span class="n">query</span><span class="p">)</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'v'</span><span class="p">,</span> <span class="p">[])</span> + <span class="k">if</span> <span class="n">video_id</span><span class="p">:</span> + <span class="n">iframe_src</span> <span class="o">=</span> <span class="s1">'https://www.youtube-nocookie.com/embed/'</span> <span class="o">+</span> <span class="n">video_id</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> + + <span class="c1"># Facebook</span> + <span class="k">elif</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">netloc</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'www.facebook.com'</span><span class="p">,</span> <span class="s1">'facebook.com'</span><span class="p">]:</span> + <span class="n">encoded_href</span> <span class="o">=</span> <span class="n">urlencode</span><span class="p">({</span><span class="s1">'href'</span><span class="p">:</span> <span class="n">url</span><span class="p">})</span> + <span class="n">iframe_src</span> <span class="o">=</span> <span class="s1">'https://www.facebook.com/plugins/video.php?allowfullscreen=true&'</span> <span class="o">+</span> <span class="n">encoded_href</span> + + <span class="c1"># Instagram</span> + <span class="k">elif</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">netloc</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'www.instagram.com'</span><span class="p">,</span> <span class="s1">'instagram.com'</span><span class="p">]</span> <span class="ow">and</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'/p/'</span><span class="p">):</span> + <span class="k">if</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">endswith</span><span class="p">(</span><span class="s1">'/'</span><span class="p">):</span> + <span class="n">iframe_src</span> <span class="o">=</span> <span class="n">url</span> <span class="o">+</span> <span class="s1">'embed'</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">iframe_src</span> <span class="o">=</span> <span class="n">url</span> <span class="o">+</span> <span class="s1">'/embed'</span> + + <span class="c1"># TikTok</span> + <span class="k">elif</span> <span class="p">(</span> + <span class="n">parsed_url</span><span class="o">.</span><span class="n">netloc</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'www.tiktok.com'</span><span class="p">,</span> <span class="s1">'tiktok.com'</span><span class="p">]</span> + <span class="ow">and</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'/@'</span><span class="p">)</span> + <span class="ow">and</span> <span class="s1">'/video/'</span> <span class="ow">in</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">path</span> + <span class="p">):</span> + <span class="n">path_parts</span> <span class="o">=</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'/video/'</span><span class="p">)</span> + <span class="n">video_id</span> <span class="o">=</span> <span class="n">path_parts</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> + <span class="n">iframe_src</span> <span class="o">=</span> <span class="s1">'https://www.tiktok.com/embed/'</span> <span class="o">+</span> <span class="n">video_id</span> + + <span class="c1"># Dailymotion</span> + <span class="k">elif</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">netloc</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'www.dailymotion.com'</span><span class="p">,</span> <span class="s1">'dailymotion.com'</span><span class="p">]</span> <span class="ow">and</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'/video/'</span><span class="p">):</span> + <span class="n">path_parts</span> <span class="o">=</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'/'</span><span class="p">)</span> + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">path_parts</span><span class="p">)</span> <span class="o">==</span> <span class="mi">3</span><span class="p">:</span> + <span class="n">video_id</span> <span class="o">=</span> <span class="n">path_parts</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span> + <span class="n">iframe_src</span> <span class="o">=</span> <span class="s1">'https://www.dailymotion.com/embed/video/'</span> <span class="o">+</span> <span class="n">video_id</span> + + <span class="k">return</span> <span class="n">iframe_src</span></div> + + + +<div class="viewcode-block" id="detect_language"> +<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.detect_language">[docs]</a> +<span class="k">def</span> <span class="nf">detect_language</span><span class="p">(</span><span class="n">text</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">threshold</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.3</span><span class="p">,</span> <span class="n">only_search_languages</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-></span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span> +<span class="w"> </span><span class="sd">"""Detect the language of the ``text`` parameter.</span> + +<span class="sd"> :param str text: The string whose language is to be detected.</span> + +<span class="sd"> :param float threshold: Threshold filters the returned labels by a threshold</span> +<span class="sd"> on probability. A choice of 0.3 will return labels with at least 0.3</span> +<span class="sd"> probability.</span> + +<span class="sd"> :param bool only_search_languages: If ``True``, returns only supported</span> +<span class="sd"> SearXNG search languages. see :py:obj:`searx.languages`</span> + +<span class="sd"> :rtype: str, None</span> +<span class="sd"> :returns:</span> +<span class="sd"> The detected language code or ``None``. See below.</span> + +<span class="sd"> :raises ValueError: If ``text`` is not a string.</span> + +<span class="sd"> The language detection is done by using `a fork`_ of the fastText_ library</span> +<span class="sd"> (`python fasttext`_). fastText_ distributes the `language identification</span> +<span class="sd"> model`_, for reference:</span> + +<span class="sd"> - `FastText.zip: Compressing text classification models`_</span> +<span class="sd"> - `Bag of Tricks for Efficient Text Classification`_</span> + +<span class="sd"> The `language identification model`_ support the language codes</span> +<span class="sd"> (ISO-639-3)::</span> + +<span class="sd"> af als am an ar arz as ast av az azb ba bar bcl be bg bh bn bo bpy br bs</span> +<span class="sd"> bxr ca cbk ce ceb ckb co cs cv cy da de diq dsb dty dv el eml en eo es</span> +<span class="sd"> et eu fa fi fr frr fy ga gd gl gn gom gu gv he hi hif hr hsb ht hu hy ia</span> +<span class="sd"> id ie ilo io is it ja jbo jv ka kk km kn ko krc ku kv kw ky la lb lez li</span> +<span class="sd"> lmo lo lrc lt lv mai mg mhr min mk ml mn mr mrj ms mt mwl my myv mzn nah</span> +<span class="sd"> nap nds ne new nl nn no oc or os pa pam pfl pl pms pnb ps pt qu rm ro ru</span> +<span class="sd"> rue sa sah sc scn sco sd sh si sk sl so sq sr su sv sw ta te tg th tk tl</span> +<span class="sd"> tr tt tyv ug uk ur uz vec vep vi vls vo wa war wuu xal xmf yi yo yue zh</span> + +<span class="sd"> By using ``only_search_languages=True`` the `language identification model`_</span> +<span class="sd"> is harmonized with the SearXNG's language (locale) model. General</span> +<span class="sd"> conditions of SearXNG's locale model are:</span> + +<span class="sd"> a. SearXNG's locale of a query is passed to the</span> +<span class="sd"> :py:obj:`searx.locales.get_engine_locale` to get a language and/or region</span> +<span class="sd"> code that is used by an engine.</span> + +<span class="sd"> b. Most of SearXNG's engines do not support all the languages from `language</span> +<span class="sd"> identification model`_ and there is also a discrepancy in the ISO-639-3</span> +<span class="sd"> (fasttext) and ISO-639-2 (SearXNG)handling. Further more, in SearXNG the</span> +<span class="sd"> locales like ``zh-TH`` (``zh-CN``) are mapped to ``zh_Hant``</span> +<span class="sd"> (``zh_Hans``) while the `language identification model`_ reduce both to</span> +<span class="sd"> ``zh``.</span> + +<span class="sd"> .. _a fork: https://github.com/searxng/fasttext-predict</span> +<span class="sd"> .. _fastText: https://fasttext.cc/</span> +<span class="sd"> .. _python fasttext: https://pypi.org/project/fasttext/</span> +<span class="sd"> .. _language identification model: https://fasttext.cc/docs/en/language-identification.html</span> +<span class="sd"> .. _Bag of Tricks for Efficient Text Classification: https://arxiv.org/abs/1607.01759</span> +<span class="sd"> .. _`FastText.zip: Compressing text classification models`: https://arxiv.org/abs/1612.03651</span> + +<span class="sd"> """</span> + <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">text</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span> + <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'text must a str'</span><span class="p">)</span> + <span class="n">r</span> <span class="o">=</span> <span class="n">_get_fasttext_model</span><span class="p">()</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">text</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="se">\n</span><span class="s1">'</span><span class="p">,</span> <span class="s1">' '</span><span class="p">),</span> <span class="n">k</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">threshold</span><span class="o">=</span><span class="n">threshold</span><span class="p">)</span> + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">r</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">r</span><span class="p">)</span> <span class="o">==</span> <span class="mi">2</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">r</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> <span class="o">></span> <span class="mi">0</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">r</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span> + <span class="n">language</span> <span class="o">=</span> <span class="n">r</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'__label__'</span><span class="p">)[</span><span class="mi">1</span><span class="p">]</span> + <span class="k">if</span> <span class="n">only_search_languages</span> <span class="ow">and</span> <span class="n">language</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">SEARCH_LANGUAGE_CODES</span><span class="p">:</span> + <span class="k">return</span> <span class="kc">None</span> + <span class="k">return</span> <span class="n">language</span> + <span class="k">return</span> <span class="kc">None</span></div> + + + +<div class="viewcode-block" id="js_variable_to_python"> +<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.js_variable_to_python">[docs]</a> +<span class="k">def</span> <span class="nf">js_variable_to_python</span><span class="p">(</span><span class="n">js_variable</span><span class="p">):</span> +<span class="w"> </span><span class="sd">"""Convert a javascript variable into JSON and then load the value</span> + +<span class="sd"> It does not deal with all cases, but it is good enough for now.</span> +<span class="sd"> chompjs has a better implementation.</span> +<span class="sd"> """</span> + <span class="c1"># when in_string is not None, it contains the character that has opened the string</span> + <span class="c1"># either simple quote or double quote</span> + <span class="n">in_string</span> <span class="o">=</span> <span class="kc">None</span> + <span class="c1"># cut the string:</span> + <span class="c1"># r"""{ a:"f\"irst", c:'sec"ond'}"""</span> + <span class="c1"># becomes</span> + <span class="c1"># ['{ a:', '"', 'f\\', '"', 'irst', '"', ', c:', "'", 'sec', '"', 'ond', "'", '}']</span> + <span class="n">parts</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="sa">r</span><span class="s1">'(["</span><span class="se">\'</span><span class="s1">])'</span><span class="p">,</span> <span class="n">js_variable</span><span class="p">)</span> + <span class="c1"># previous part (to check the escape character antislash)</span> + <span class="n">previous_p</span> <span class="o">=</span> <span class="s2">""</span> + <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">p</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">parts</span><span class="p">):</span> + <span class="c1"># parse characters inside a ECMA string</span> + <span class="k">if</span> <span class="n">in_string</span><span class="p">:</span> + <span class="c1"># we are in a JS string: replace the colon by a temporary character</span> + <span class="c1"># so quote_keys_regex doesn't have to deal with colon inside the JS strings</span> + <span class="n">parts</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="n">parts</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">':'</span><span class="p">,</span> <span class="nb">chr</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span> + <span class="k">if</span> <span class="n">in_string</span> <span class="o">==</span> <span class="s2">"'"</span><span class="p">:</span> + <span class="c1"># the JS string is delimited by simple quote.</span> + <span class="c1"># This is not supported by JSON.</span> + <span class="c1"># simple quote delimited string are converted to double quote delimited string</span> + <span class="c1"># here, inside a JS string, we escape the double quote</span> + <span class="n">parts</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="n">parts</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'"'</span><span class="p">,</span> <span class="sa">r</span><span class="s1">'\"'</span><span class="p">)</span> + + <span class="c1"># deal with delimiters and escape character</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">in_string</span> <span class="ow">and</span> <span class="n">p</span> <span class="ow">in</span> <span class="p">(</span><span class="s1">'"'</span><span class="p">,</span> <span class="s2">"'"</span><span class="p">):</span> + <span class="c1"># we are not in string</span> + <span class="c1"># but p is double or simple quote</span> + <span class="c1"># that's the start of a new string</span> + <span class="c1"># replace simple quote by double quote</span> + <span class="c1"># (JSON doesn't support simple quote)</span> + <span class="n">parts</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'"'</span> + <span class="n">in_string</span> <span class="o">=</span> <span class="n">p</span> + <span class="k">continue</span> + <span class="k">if</span> <span class="n">p</span> <span class="o">==</span> <span class="n">in_string</span><span class="p">:</span> + <span class="c1"># we are in a string and the current part MAY close the string</span> + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">previous_p</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span> <span class="ow">and</span> <span class="n">previous_p</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">==</span> <span class="s1">'</span><span class="se">\\</span><span class="s1">'</span><span class="p">:</span> + <span class="c1"># there is an antislash just before: the ECMA string continue</span> + <span class="k">continue</span> + <span class="c1"># the current p close the string</span> + <span class="c1"># replace simple quote by double quote</span> + <span class="n">parts</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'"'</span> + <span class="n">in_string</span> <span class="o">=</span> <span class="kc">None</span> + + <span class="k">if</span> <span class="ow">not</span> <span class="n">in_string</span><span class="p">:</span> + <span class="c1"># replace void 0 by null</span> + <span class="c1"># https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/void</span> + <span class="c1"># we are sure there is no string in p</span> + <span class="n">parts</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="n">_JS_VOID_RE</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="s2">"null"</span><span class="p">,</span> <span class="n">p</span><span class="p">)</span> + <span class="c1"># update previous_p</span> + <span class="n">previous_p</span> <span class="o">=</span> <span class="n">p</span> + <span class="c1"># join the string</span> + <span class="n">s</span> <span class="o">=</span> <span class="s1">''</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">parts</span><span class="p">)</span> + <span class="c1"># add quote around the key</span> + <span class="c1"># { a: 12 }</span> + <span class="c1"># becomes</span> + <span class="c1"># { "a": 12 }</span> + <span class="n">s</span> <span class="o">=</span> <span class="n">_JS_QUOTE_KEYS_RE</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="sa">r</span><span class="s1">'\1"\2"\3'</span><span class="p">,</span> <span class="n">s</span><span class="p">)</span> + <span class="n">s</span> <span class="o">=</span> <span class="n">_JS_DECIMAL_RE</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="s2">":0."</span><span class="p">,</span> <span class="n">s</span><span class="p">)</span> + <span class="c1"># replace the surogate character by colon</span> + <span class="n">s</span> <span class="o">=</span> <span class="n">s</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="nb">chr</span><span class="p">(</span><span class="mi">1</span><span class="p">),</span> <span class="s1">':'</span><span class="p">)</span> + <span class="c1"># load the JSON and return the result</span> + <span class="k">return</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">s</span><span class="p">)</span></div> + +</pre></div> + + <div class="clearer"></div> + </div> + </div> + </div> + <span id="sidebar-top"></span> + <div class="sphinxsidebar" role="navigation" aria-label="Main"> + <div class="sphinxsidebarwrapper"> + + + <p class="logo"><a href="../../index.html"> + <img class="logo" src="../../_static/searxng-wordmark.svg" alt="Logo of SearXNG"/> + </a></p> + + +<h3><a href="../../index.html">Table of Contents</a></h3> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../user/index.html">User information</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../own-instance.html">Why use a private instance?</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../admin/index.html">Administrator documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../dev/index.html">Developer documentation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../utils/index.html">DevOps tooling box</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../src/index.html">Source-Code</a></li> +</ul> + + <h3>Project Links</h3> + <ul> + <li><a href="https://github.com/searxng/searxng/tree/master">Source</a> + + <li><a href="https://github.com/searxng/searxng/wiki">Wiki</a> + + <li><a href="https://searx.space">Public instances</a> + + <li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a> + </ul><h3>Navigation</h3> +<ul> + <li><a href="../../index.html">Overview</a> + <ul> + <li><a href="../index.html">Module code</a> + + + </ul> + </li> + </ul> + </li> +</ul> +<search id="searchbox" style="display: none" role="search"> + <h3 id="searchlabel">Quick search</h3> + <div class="searchformwrapper"> + <form class="search" action="../../search.html" method="get"> + <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> + <input type="submit" value="Go" /> + </form> + </div> +</search> +<script>document.getElementById('searchbox').style.display = "block"</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="footer" role="contentinfo"> + © Copyright SearXNG team. + </div> + </body> +</html>
\ No newline at end of file |