diff options
author | Bnyro <bnyro@tutanota.com> | 2023-08-05 20:25:10 +0200 |
---|---|---|
committer | Markus Heiser <markus.heiser@darmarIT.de> | 2023-08-08 16:21:45 +0200 |
commit | cae06f278136cdd10073b431a48969238a7ccbdb (patch) | |
tree | 71cb0016377b22ccb176023f7ea4d27e19f8f569 /searx | |
parent | 73364e158ec88a56383bd3b56023e4fb768d0b9b (diff) | |
download | searxng-cae06f278136cdd10073b431a48969238a7ccbdb.tar.gz searxng-cae06f278136cdd10073b431a48969238a7ccbdb.zip |
[feat] engine: brave - support for videos
Diffstat (limited to 'searx')
-rw-r--r-- | searx/engines/brave.py | 68 | ||||
-rw-r--r-- | searx/settings.yml | 6 |
2 files changed, 51 insertions, 23 deletions
diff --git a/searx/engines/brave.py b/searx/engines/brave.py index e2acf64f4..61601faf5 100644 --- a/searx/engines/brave.py +++ b/searx/engines/brave.py @@ -4,10 +4,8 @@ """ from urllib.parse import urlencode -from lxml import html -from searx.utils import extract_text, eval_xpath, eval_xpath_list -import chompjs, json -import re +import chompjs +import json about = { "website": 'https://search.brave.com/', @@ -19,7 +17,8 @@ about = { } base_url = "https://search.brave.com/" paging = False -categories = ['images', 'videos', 'news'] # images, videos, news +categories = ['images', 'videos', 'news'] # images, videos, news + def request(query, params): args = { @@ -28,38 +27,61 @@ def request(query, params): } params["url"] = f"{base_url}{categories[0]}?{urlencode(args)}" -def get_image_results(text): - results = [] - datastr = "" - for line in text.split("\n"): - if "const data = " in line: - datastr = line.replace("const data = ", "").strip()[:-1] - break - - json_data = chompjs.parse_js_object(datastr) +def get_video_results(json_data): + results = [] - for result in json_data[1]["data"]["body"]["response"]["results"]: + for result in json_data: results.append( { - 'template': 'images.html', + 'template': 'videos.html', 'url': result['url'], 'thumbnail_src': result['thumbnail']['src'], 'img_src': result['properties']['url'], 'content': result['description'], 'title': result['title'], 'source': result['source'], - 'img_format': result['properties']['format'], + 'duration': result['video']['duration'], } ) return results + def response(resp): - dom = html.fromstring(resp.text) + results = [] - match categories[0]: - case 'images': - return get_image_results(resp.text) - case _: - return []
\ No newline at end of file + datastr = "" + for line in resp.text.split("\n"): + if "const data = " in line: + datastr = line.replace("const data = ", "").strip()[:-1] + break + + json_data = chompjs.parse_js_object(datastr) + json_results = json_data[1]["data"]["body"]["response"]["results"] + + with open("outfile.json", "w") as f: + json.dump(json_data, f) + + for result in json_results: + item = { + 'url': result['url'], + 'title': result['title'], + 'content': result['description'], + } + if result['thumbnail'] != "null": + item['thumbnail'] = result['thumbnail']['src'] + + match categories[0]: + case 'images': + item['template'] = 'images.html' + item['img_format'] = result['properties']['format'] + item['source'] = result['source'] + item['img_src'] = result['properties']['url'] + case 'videos': + item['template'] = 'videos.html' + item['length'] = result['video']['duration'] + + results.append(item) + + return results diff --git a/searx/settings.yml b/searx/settings.yml index d41b1edfd..a903785e8 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1849,6 +1849,12 @@ engines: categories: images disabled: false + - name: brave.videos + shortcut: bravevid + engine: brave + categories: videos + disabled: false + - name: petalsearch shortcut: pts engine: xpath |