diff options
author | rachmadani haryono <rachmadaniHaryono@users.noreply.github.com> | 2020-11-04 20:38:54 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-11-04 12:38:54 +0000 |
commit | c03e4c86bc49d6ef4664c038066d9f1c16e7dafc (patch) | |
tree | 48853358314a01448dd651f6f94620cad7dfc88f /utils | |
parent | 1b42d42695070f4a93f4df1605dcdff70f4d4d10 (diff) | |
download | searxng-c03e4c86bc49d6ef4664c038066d9f1c16e7dafc.tar.gz searxng-c03e4c86bc49d6ef4664c038066d9f1c16e7dafc.zip |
Feature/standalone searx update (#1591)
* chg: dev: update standalone_searx
parent d8a5df721b33dd8a7cc9e21dba4060f21d629f69
author rachmadaniHaryono <foreturiga@gmail.com> 1603896594 +0800
committer rachmadaniHaryono <foreturiga@gmail.com> 1603896619 +0800
chg: dev: debug engine_shortcuts
chg: dev: only initilize if engine is given
chg: dev: split main
chg: dev: standalone_searx
chg: dev: update standalone_searx
chg: doc: remove unnecessary log
chg: test: differentiate travis
chg: test: disable shortcut
chg: test: use default engine settings
fix: dev: category choices
fix: dev: duplicate engine shortcut
fix: dev: travis python3
fix: test: use empty string as shortcut
fix: test: apkm
fix: test: engine shortcut
fix: test: mypy
fix: test: parameter
fix: test: pep8
fix: test: py2 compatibilities
fix: test: searx settings
fix: test: travis engines
new: dev: deduplicate engine
new: dev: main receive engines parameter
new: dev: parse_argument accept engines parameter
new: dev: split search query from get_result func
new: test: basic result case
Suggestions: use RawTextQuery to make the suggestions URLs. Update all themes accordingly.
* new: doc: searx import and init
* chg: dev: parse_argument
- doc
- run on __main__
- simple parse_args
* chg: doc: module
* chg: dev: import section
- remove unused python path modification
- new required package
* chg: dev: script run
- parse_argument func return directly parsed results
- main func return dict instead json text
- dump directly on sys.stdout.write
* chg: dev: get_search_query and get_search_query func
* chg: dev: main func
- move inner function outside
- return dict instead of json text
* new: dev: add utils to doc sys path
* new: doc: standalone_searx
* fix: doc: run script
* chg: dev: mypy type hint
* chg: dev: SearchQuery don't have attr engines
* chg: dev: reset engines __init__
* chg: test: unit test update
* chg: dev: pylint and flake8
* new: test: standalone_searx
* chg: dev: main func and doc
* chg: dev: import and type hint
* new: dev: main func
- remove get_result func
- single func which just translate dict
* chg: test: put mypy on dev requirement
* chg: doc: update
* new: doc: add standalone_searx module member
* chg: doc: shell command line
* chg: dev: remove mypy
* chg: doc: module docstring
Diffstat (limited to 'utils')
-rwxr-xr-x | utils/standalone_searx.py | 255 |
1 files changed, 184 insertions, 71 deletions
diff --git a/utils/standalone_searx.py b/utils/standalone_searx.py index 3aab7a6cc..0a35cc4a2 100755 --- a/utils/standalone_searx.py +++ b/utils/standalone_searx.py @@ -1,5 +1,63 @@ #!/usr/bin/env python +"""Script to run searx from terminal. +Getting categories without initiate the engine will only return `['general']` + +>>> import searx.engines +... list(searx.engines.categories.keys()) +['general'] +>>> import searx +... searx.engines.initialize_engines(searx.settings['engines']) +... list(searx.engines.categories.keys()) +['general', 'it', 'science', 'images', 'news', 'videos', 'music', 'files', 'social media', 'map'] + +Example to use this script: + +.. code:: bash + + $ SEARX_DEBUG=1 python3 utils/standalone_searx.py rain + +Example to run it from python: + +>>> import importlib +... import json +... import sys +... import searx +... import searx.engines +... search_query = 'rain' +... # initialize engines +... searx.engines.initialize_engines(searx.settings['engines']) +... # load engines categories once instead of each time the function called +... engine_cs = list(searx.engines.categories.keys()) +... # load module +... spec = importlib.util.spec_from_file_location( +... 'utils.standalone_searx', 'utils/standalone_searx.py') +... sas = importlib.util.module_from_spec(spec) +... spec.loader.exec_module(sas) +... # use function from module +... prog_args = sas.parse_argument([search_query], category_choices=engine_cs) +... search_q = sas.get_search_query(prog_args, engine_categories=engine_cs) +... res_dict = sas.to_dict(search_q) +... sys.stdout.write(json.dumps( +... res_dict, sort_keys=True, indent=4, ensure_ascii=False, +... default=sas.json_serial)) +{ + "answers": [], + "infoboxes": [ {...} ], + "paging": true, + "results": [... ], + "results_number": 820000000.0, + "search": { + "lang": "all", + "pageno": 1, + "q": "rain", + "safesearch": 0, + "timerange": null + }, + "suggestions": [...] +} +""" # noqa: E501 +# pylint: disable=pointless-string-statement ''' searx is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by @@ -16,90 +74,145 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. (C) 2016- by Alexandre Flament, <alex@al-f.net> ''' - -# set path -from sys import path -from os.path import realpath, dirname -path.append(realpath(dirname(realpath(__file__)) + '/../')) - -# initialization -from json import dumps -from searx import settings +# pylint: disable=wrong-import-position +import argparse import sys -import codecs -import searx.query -import searx.search +from datetime import datetime +from json import dumps +from typing import Any, Dict, List, Optional + +import searx import searx.engines -import searx.webapdater import searx.preferences +import searx.query +import searx.search import searx.webadapter -import argparse -searx.engines.initialize_engines(settings['engines']) - -# command line parsing -parser = argparse.ArgumentParser(description='Standalone searx.') -parser.add_argument('query', type=str, - help='Text query') -parser.add_argument('--category', type=str, nargs='?', - choices=searx.engines.categories.keys(), - default='general', - help='Search category') -parser.add_argument('--lang', type=str, nargs='?',default='all', - help='Search language') -parser.add_argument('--pageno', type=int, nargs='?', default=1, - help='Page number starting from 1') -parser.add_argument('--safesearch', type=str, nargs='?', choices=['0', '1', '2'], default='0', - help='Safe content filter from none to strict') -parser.add_argument('--timerange', type=str, nargs='?', choices=['day', 'week', 'month', 'year'], - help='Filter by time range') -args = parser.parse_args() - -# search results for the query -form = { - "q":args.query, - "categories":args.category.decode(), - "pageno":str(args.pageno), - "language":args.lang, - "time_range":args.timerange -} -preferences = searx.preferences.Preferences(['oscar'], searx.engines.categories.keys(), searx.engines.engines, []) -preferences.key_value_settings['safesearch'].parse(args.safesearch) +EngineCategoriesVar = Optional[List[str]] -search_query, raw_text_query, _, _ = searx.webadapter.get_search_query_from_webapp(preferences, form) -search = searx.search.Search(search_query) -result_container = search.search() -# output -from datetime import datetime +def get_search_query( + args: argparse.Namespace, engine_categories: EngineCategoriesVar = None +) -> searx.search.SearchQuery: + """Get search results for the query""" + if engine_categories is None: + engine_categories = list(searx.engines.categories.keys()) + try: + category = args.category.decode('utf-8') + except AttributeError: + category = args.category + form = { + "q": args.query, + "categories": category, + "pageno": str(args.pageno), + "language": args.lang, + "time_range": args.timerange + } + preferences = searx.preferences.Preferences( + ['oscar'], engine_categories, searx.engines.engines, []) + preferences.key_value_settings['safesearch'].parse(args.safesearch) + + search_query = searx.webadapter.get_search_query_from_webapp( + preferences, form)[0] + return search_query -def no_parsed_url(results): + +def no_parsed_url(results: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Remove parsed url from dict.""" for result in results: del result['parsed_url'] return results -def json_serial(obj): - """JSON serializer for objects not serializable by default json code""" + +def json_serial(obj: Any) -> Any: + """JSON serializer for objects not serializable by default json code. + + :raise TypeError: raised when **obj** is not serializable + """ if isinstance(obj, datetime): serial = obj.isoformat() return serial - raise TypeError ("Type not serializable") + if isinstance(obj, bytes): + return obj.decode('utf8') + if isinstance(obj, set): + return list(obj) + raise TypeError("Type ({}) not serializable".format(type(obj))) -result_container_json = { - "search": { - "q": search_query.query, - "pageno": search_query.pageno, - "lang": search_query.lang, - "safesearch": search_query.safesearch, - "timerange": search_query.time_range, - "engines": search_query.engines - }, - "results": no_parsed_url(result_container.get_ordered_results()), - "infoboxes": result_container.infoboxes, - "suggestions": list(result_container.suggestions), - "answers": list(result_container.answers), - "paging": result_container.paging, - "results_number": result_container.results_number() -} -sys.stdout = codecs.getwriter("UTF-8")(sys.stdout) -sys.stdout.write(dumps(result_container_json, sort_keys=True, indent=4, ensure_ascii=False, encoding="utf-8", default=json_serial)) + +def to_dict(search_query: searx.search.SearchQuery) -> Dict[str, Any]: + """Get result from parsed arguments.""" + result_container = searx.search.Search(search_query).search() + result_container_json = { + "search": { + "q": search_query.query, + "pageno": search_query.pageno, + "lang": search_query.lang, + "safesearch": search_query.safesearch, + "timerange": search_query.time_range, + }, + "results": no_parsed_url(result_container.get_ordered_results()), + "infoboxes": result_container.infoboxes, + "suggestions": list(result_container.suggestions), + "answers": list(result_container.answers), + "paging": result_container.paging, + "results_number": result_container.results_number() + } + return result_container_json + + +def parse_argument( + args: Optional[List[str]]=None, + category_choices: EngineCategoriesVar=None +) -> argparse.Namespace: + """Parse command line. + + :raise SystemExit: Query argument required on `args` + + Examples: + + >>> import importlib + ... # load module + ... spec = importlib.util.spec_from_file_location( + ... 'utils.standalone_searx', 'utils/standalone_searx.py') + ... sas = importlib.util.module_from_spec(spec) + ... spec.loader.exec_module(sas) + ... sas.parse_argument() + usage: ptipython [-h] [--category [{general}]] [--lang [LANG]] [--pageno [PAGENO]] [--safesearch [{0,1,2}]] [--timerange [{day,week,month,year}]] + query + SystemExit: 2 + >>> sas.parse_argument(['rain']) + Namespace(category='general', lang='all', pageno=1, query='rain', safesearch='0', timerange=None) + """ # noqa: E501 + if not category_choices: + category_choices = list(searx.engines.categories.keys()) + parser = argparse.ArgumentParser(description='Standalone searx.') + parser.add_argument('query', type=str, + help='Text query') + parser.add_argument('--category', type=str, nargs='?', + choices=category_choices, + default='general', + help='Search category') + parser.add_argument('--lang', type=str, nargs='?', default='all', + help='Search language') + parser.add_argument('--pageno', type=int, nargs='?', default=1, + help='Page number starting from 1') + parser.add_argument( + '--safesearch', type=str, nargs='?', + choices=['0', '1', '2'], default='0', + help='Safe content filter from none to strict') + parser.add_argument( + '--timerange', type=str, + nargs='?', choices=['day', 'week', 'month', 'year'], + help='Filter by time range') + return parser.parse_args(args) + + +if __name__ == '__main__': + searx.engines.initialize_engines(searx.settings['engines']) + engine_cs = list(searx.engines.categories.keys()) + prog_args = parse_argument(category_choices=engine_cs) + search_q = get_search_query(prog_args, engine_categories=engine_cs) + res_dict = to_dict(search_q) + sys.stdout.write(dumps( + res_dict, sort_keys=True, indent=4, ensure_ascii=False, + default=json_serial)) |