summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexandre Flament <alexandre.flament@hesge.ch>2024-02-28 19:22:00 +0100
committerMarkus Heiser <markus.heiser@darmarIT.de>2024-02-29 07:48:44 +0100
commitd58760ef751a2df47fa7f118c04d0502d1b37d16 (patch)
treed3cd2485941e4952682a79a589ce2f46e0664236
parent35873b5a1cd450963b7b5379b5bcb1c9d1677672 (diff)
downloadsearxng-d58760ef751a2df47fa7f118c04d0502d1b37d16.tar.gz
searxng-d58760ef751a2df47fa7f118c04d0502d1b37d16.zip
[mod] pypi engine: use packages.html
-rw-r--r--searx/engines/pypi.py68
-rw-r--r--searx/settings.yml18
2 files changed, 69 insertions, 17 deletions
diff --git a/searx/engines/pypi.py b/searx/engines/pypi.py
new file mode 100644
index 000000000..e49de11e5
--- /dev/null
+++ b/searx/engines/pypi.py
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""pypi.org
+
+"""
+
+from urllib.parse import urlencode
+from dateutil import parser
+
+from lxml import html
+from searx.utils import (
+ eval_xpath_getindex,
+ eval_xpath_list,
+ extract_text,
+)
+
+# about
+about = {
+ "website": "https://pypi.org",
+ "wikidata_id": "Q2984686",
+ "official_api_documentation": "https://warehouse.readthedocs.io/api-reference/index.html",
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": "HTML",
+}
+
+categories = ['it', 'packages']
+
+
+# engine dependent config
+first_page_num = 1
+base_url = "https://pypi.org"
+search_url = base_url + '/search/?{query}'
+
+
+def request(query, params):
+ args = {
+ "q": query,
+ "page": params['pageno'],
+ }
+ params['url'] = search_url.format(query=urlencode(args))
+ return params
+
+
+def response(resp):
+ results = []
+ dom = html.fromstring(resp.text)
+ for entry in eval_xpath_list(dom, '/html/body/main/div/div/div/form/div/ul/li/a[@class="package-snippet"]'):
+ url = base_url + extract_text(eval_xpath_getindex(entry, './@href', 0)) # type: ignore
+ title = extract_text(eval_xpath_getindex(entry, './h3/span[@class="package-snippet__name"]', 0))
+ version = extract_text(eval_xpath_getindex(entry, './h3/span[@class="package-snippet__version"]', 0))
+ created_at = extract_text(
+ eval_xpath_getindex(entry, './h3/span[@class="package-snippet__created"]/time/@datetime', 0)
+ )
+ content = extract_text(eval_xpath_getindex(entry, './p', 0))
+ results.append(
+ {
+ "template": "packages.html",
+ "url": url,
+ "title": title,
+ 'package_name': title,
+ "content": content,
+ "version": version,
+ 'publishedDate': parser.parse(created_at), # type: ignore
+ }
+ )
+
+ return results
diff --git a/searx/settings.yml b/searx/settings.yml
index 7d1eeb190..6c1f7bdd0 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -1457,23 +1457,7 @@ engines:
- name: pypi
shortcut: pypi
- engine: xpath
- paging: true
- search_url: https://pypi.org/search/?q={query}&page={pageno}
- results_xpath: /html/body/main/div/div/div/form/div/ul/li/a[@class="package-snippet"]
- url_xpath: ./@href
- title_xpath: ./h3/span[@class="package-snippet__name"]
- content_xpath: ./p
- suggestion_xpath: /html/body/main/div/div/div/form/div/div[@class="callout-block"]/p/span/a[@class="link"]
- first_page_num: 1
- categories: [it, packages]
- about:
- website: https://pypi.org
- wikidata_id: Q2984686
- official_api_documentation: https://warehouse.readthedocs.io/api-reference/index.html
- use_official_api: false
- require_api_key: false
- results: HTML
+ engine: pypi
- name: qwant
qwant_categ: web