diff options
author | Markus Heiser <markus.heiser@darmarit.de> | 2022-01-03 12:40:06 +0100 |
---|---|---|
committer | Markus Heiser <markus.heiser@darmarit.de> | 2022-01-05 16:09:40 +0100 |
commit | ffea5d8ef5540bc4be08b2b26e1819d5401f854d (patch) | |
tree | b2285d42d9ed6debec82ebd25fca31358a452794 /searxng_extra | |
parent | b630c5d7bc0bf5a4281ad402bb32adc7f6ab257f (diff) | |
download | searxng-ffea5d8ef5540bc4be08b2b26e1819d5401f854d.tar.gz searxng-ffea5d8ef5540bc4be08b2b26e1819d5401f854d.zip |
[docs] add documentation for the scripts in searxng_extra/update
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searxng_extra')
-rwxr-xr-x | searxng_extra/update/update_ahmia_blacklist.py | 17 | ||||
-rwxr-xr-x | searxng_extra/update/update_currencies.py | 6 | ||||
-rwxr-xr-x | searxng_extra/update/update_engine_descriptions.py | 7 | ||||
-rwxr-xr-x | searxng_extra/update/update_external_bangs.py | 13 | ||||
-rwxr-xr-x | searxng_extra/update/update_firefox_version.py | 14 | ||||
-rwxr-xr-x | searxng_extra/update/update_languages.py | 10 | ||||
-rwxr-xr-x | searxng_extra/update/update_osm_keys_tags.py | 5 | ||||
-rwxr-xr-x | searxng_extra/update/update_wikidata_units.py | 12 |
8 files changed, 64 insertions, 20 deletions
diff --git a/searxng_extra/update/update_ahmia_blacklist.py b/searxng_extra/update/update_ahmia_blacklist.py index f7695deae..57fb78b34 100755 --- a/searxng_extra/update/update_ahmia_blacklist.py +++ b/searxng_extra/update/update_ahmia_blacklist.py @@ -1,10 +1,14 @@ #!/usr/bin/env python # SPDX-License-Identifier: AGPL-3.0-or-later +"""This script saves `Ahmia's blacklist`_ for onion sites. -# This script saves Ahmia's blacklist for onion sites. -# More info in https://ahmia.fi/blacklist/ +Output file: :origin:`searx/data/ahmia_blacklist.txt` (:origin:`CI Update data +... <.github/workflows/data-update.yml>`). + +.. _Ahmia's blacklist: https://ahmia.fi/blacklist/ + +""" -# set path from os.path import join import requests @@ -26,6 +30,7 @@ def get_ahmia_blacklist_filename(): return join(join(searx_dir, "data"), "ahmia_blacklist.txt") -blacklist = fetch_ahmia_blacklist() -with open(get_ahmia_blacklist_filename(), "w") as f: - f.write('\n'.join(blacklist)) +if __name__ == '__main__': + blacklist = fetch_ahmia_blacklist() + with open(get_ahmia_blacklist_filename(), "w") as f: + f.write('\n'.join(blacklist)) diff --git a/searxng_extra/update/update_currencies.py b/searxng_extra/update/update_currencies.py index 3373e2455..cdff4cbc9 100755 --- a/searxng_extra/update/update_currencies.py +++ b/searxng_extra/update/update_currencies.py @@ -1,6 +1,12 @@ #!/usr/bin/env python # SPDX-License-Identifier: AGPL-3.0-or-later +"""Fetch currencies from :origin:`searx/engines/wikidata.py` engine. + +Output file: :origin:`searx/data/currencies.json` (:origin:`CI Update data ... +<.github/workflows/data-update.yml>`). + +""" import re import unicodedata import json diff --git a/searxng_extra/update/update_engine_descriptions.py b/searxng_extra/update/update_engine_descriptions.py index 51cfc7cc2..bab1a0349 100755 --- a/searxng_extra/update/update_engine_descriptions.py +++ b/searxng_extra/update/update_engine_descriptions.py @@ -1,6 +1,13 @@ #!/usr/bin/env python # SPDX-License-Identifier: AGPL-3.0-or-later +"""Fetch website description from websites and from +:origin:`searx/engines/wikidata.py` engine. + +Output file: :origin:`searx/data/engine_descriptions.json`. + +""" + import json from urllib.parse import urlparse from os.path import join diff --git a/searxng_extra/update/update_external_bangs.py b/searxng_extra/update/update_external_bangs.py index d5c6b585a..be3aade0f 100755 --- a/searxng_extra/update/update_external_bangs.py +++ b/searxng_extra/update/update_external_bangs.py @@ -1,17 +1,20 @@ #!/usr/bin/env python # lint: pylint # SPDX-License-Identifier: AGPL-3.0-or-later -""" -Update searx/data/external_bangs.json using the duckduckgo bangs. +"""Update :origin:`searx/data/external_bangs.json` using the duckduckgo bangs +(:origin:`CI Update data ... <.github/workflows/data-update.yml>`). + +https://duckduckgo.com/newbang loads: -https://duckduckgo.com/newbang loads * a javascript which provides the bang version ( https://duckduckgo.com/bv1.js ) * a JSON file which contains the bangs ( https://duckduckgo.com/bang.v260.js for example ) This script loads the javascript, then the bangs. -The javascript URL may change in the future ( for example https://duckduckgo.com/bv2.js ), -but most probably it will requires to update RE_BANG_VERSION +The javascript URL may change in the future ( for example +https://duckduckgo.com/bv2.js ), but most probably it will requires to update +RE_BANG_VERSION + """ # pylint: disable=C0116 diff --git a/searxng_extra/update/update_firefox_version.py b/searxng_extra/update/update_firefox_version.py index 750e955fd..163982b16 100755 --- a/searxng_extra/update/update_firefox_version.py +++ b/searxng_extra/update/update_firefox_version.py @@ -1,6 +1,13 @@ #!/usr/bin/env python # SPDX-License-Identifier: AGPL-3.0-or-later +"""Fetch firefox useragent signatures + +Output file: :origin:`searx/data/useragents.json` (:origin:`CI Update data ... +<.github/workflows/data-update.yml>`). + +""" + import json import requests import re @@ -66,6 +73,7 @@ def get_useragents_filename(): return join(join(searx_dir, "data"), "useragents.json") -useragents["versions"] = fetch_firefox_last_versions() -with open(get_useragents_filename(), "w") as f: - json.dump(useragents, f, indent=4, ensure_ascii=False) +if __name__ == '__main__': + useragents["versions"] = fetch_firefox_last_versions() + with open(get_useragents_filename(), "w", encoding='utf-8') as f: + json.dump(useragents, f, indent=4, ensure_ascii=False) diff --git a/searxng_extra/update/update_languages.py b/searxng_extra/update/update_languages.py index f37345808..9a71566a9 100755 --- a/searxng_extra/update/update_languages.py +++ b/searxng_extra/update/update_languages.py @@ -1,9 +1,13 @@ #!/usr/bin/env python # SPDX-License-Identifier: AGPL-3.0-or-later +"""This script generates languages.py from intersecting each engine's supported +languages. -# This script generates languages.py from intersecting each engine's supported languages. -# -# Output files: searx/data/engines_languages.json and searx/languages.py +Output files: :origin:`searx/data/engines_languages.json` and +:origin:`searx/languages.py` (:origin:`CI Update data ... +<.github/workflows/data-update.yml>`). + +""" import json from pathlib import Path diff --git a/searxng_extra/update/update_osm_keys_tags.py b/searxng_extra/update/update_osm_keys_tags.py index 2916cbff1..1d691c194 100755 --- a/searxng_extra/update/update_osm_keys_tags.py +++ b/searxng_extra/update/update_osm_keys_tags.py @@ -5,7 +5,10 @@ To get the i18n names, the scripts uses `Wikidata Query Service`_ instead of for example `OSM tags API`_ (sidenote: the actual change log from -map.atownsend.org.uk_ might be useful to normalize OSM tags) +map.atownsend.org.uk_ might be useful to normalize OSM tags). + +Output file: :origin:`searx/data/osm_keys_tags` (:origin:`CI Update data ... +<.github/workflows/data-update.yml>`). .. _Wikidata Query Service: https://query.wikidata.org/ .. _OSM tags API: https://taginfo.openstreetmap.org/taginfo/apidoc diff --git a/searxng_extra/update/update_wikidata_units.py b/searxng_extra/update/update_wikidata_units.py index 43a872b1b..e999b6cfd 100755 --- a/searxng_extra/update/update_wikidata_units.py +++ b/searxng_extra/update/update_wikidata_units.py @@ -3,6 +3,13 @@ # lint: pylint # pylint: disable=missing-module-docstring +"""Fetch units from :origin:`searx/engines/wikidata.py` engine. + +Output file: :origin:`searx/data/wikidata_units.json` (:origin:`CI Update data +... <.github/workflows/data-update.yml>`). + +""" + import json import collections @@ -54,5 +61,6 @@ def get_wikidata_units_filename(): return join(join(searx_dir, "data"), "wikidata_units.json") -with open(get_wikidata_units_filename(), 'w', encoding="utf8") as f: - json.dump(get_data(), f, indent=4, ensure_ascii=False) +if __name__ == '__main__': + with open(get_wikidata_units_filename(), 'w', encoding="utf8") as f: + json.dump(get_data(), f, indent=4, ensure_ascii=False) |