summaryrefslogtreecommitdiff
path: root/searx/engines/qwant.py
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2023-09-19 12:55:21 +0200
committerMarkus Heiser <markus.heiser@darmarIT.de>2023-09-19 17:06:56 +0200
commit043dcbf7c5dbd0778a60c75bb2636cb72ea5793b (patch)
treeeefdebe1832bb1c35cece6fda1e254ac7d68c9d6 /searx/engines/qwant.py
parent3ac7c40b6ad16ebdc448e54e22bd0d735dd7b8c0 (diff)
downloadsearxng-043dcbf7c5dbd0778a60c75bb2636cb72ea5793b.tar.gz
searxng-043dcbf7c5dbd0778a60c75bb2636cb72ea5793b.zip
[fix] engine qwant (web-lite) - ignore advertising adds
Closes: https://github.com/searxng/searxng/issues/2812 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/engines/qwant.py')
-rw-r--r--searx/engines/qwant.py5
1 files changed, 4 insertions, 1 deletions
diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py
index 654a76337..168eb860e 100644
--- a/searx/engines/qwant.py
+++ b/searx/engines/qwant.py
@@ -159,9 +159,12 @@ def parse_web_lite(resp):
dom = lxml.html.fromstring(resp.text)
for item in eval_xpath_list(dom, '//section/article'):
+ if eval_xpath(item, "./span[contains(@class, 'tooltip')]"):
+ # ignore randomly interspersed advertising adds
+ continue
results.append(
{
- 'url': extract_text(eval_xpath(item, './span')),
+ 'url': extract_text(eval_xpath(item, "./span[contains(@class, 'url partner')]")),
'title': extract_text(eval_xpath(item, './h2/a')),
'content': extract_text(eval_xpath(item, './p')),
}