[fix] bing: parsing result; check to see if the element contains links

This patch is to hardening the parsing of the bing response: 1. To fix [2087] check if the selected result item contains a link, otherwise skip result item and continue in the result loop. Increment the result pointer when a result has been added / the enumerate that counts for skipped items is no longer valid when result items are skipped. To test the bugfix use: ``!bi :all cerbot`` 2. Limit the XPath selection of result items to direct children nodes (list items ``li``) of the ordered list (``ol``). To test the selector use: ``!bi :en pontiac aztek wiki`` .. in the result list you should find the wikipedia entry on top, compare [2068] [2087] https://github.com/searxng/searxng/issues/2087 [2068] https://github.com/searxng/searxng/issues/2068
author: Ahmad Alkadri <ahmad.alkadri@outlook.com> 2023-01-08 19:12:52 +0100
committer: Markus Heiser <markus.heiser@darmarit.de> 2023-01-09 15:08:24 +0100
commit: 7fc8d72889742399abb0d327e355ab350ddb395b (patch)
tree: 557b602a3e5f1e6e6528511d2cf369238916693b /searx/engines
parent: a90ed481ed28ca35bdc9e694c355120ec2ccebb2 (diff)
download: searxng-7fc8d72889742399abb0d327e355ab350ddb395b.tar.gz
searxng-7fc8d72889742399abb0d327e355ab350ddb395b.zip
1 files changed, 8 insertions, 3 deletions
diff --git a/searx/engines/bing.py b/searx/engines/bing.py
index 5c4681cd8..783c0056a 100644
--- a/searx/engines/bing.py
+++ b/searx/engines/bing.py
@@ -9,7 +9,7 @@
 import re
 from urllib.parse import urlencode, urlparse, parse_qs
 from lxml import html
-from searx.utils import eval_xpath, extract_text, eval_xpath_list, match_language
+from searx.utils import eval_xpath, extract_text, eval_xpath_list, match_language, eval_xpath_getindex
 from searx.network import multi_requests, Request
 
 about = {
@@ -84,9 +84,12 @@ def response(resp):
 
     url_to_resolve = []
     url_to_resolve_index = []
-    for i, result in enumerate(eval_xpath_list(dom, '//li[contains(@class, "b_algo")]')):
+    i = 0
+    for result in eval_xpath_list(dom, '//ol[@id="b_results"]/li[contains(@class, "b_algo")]'):
 
-        link = eval_xpath(result, './/h2/a')[0]
+        link = eval_xpath_getindex(result, './/h2/a', 0, None)
+        if link is None:
+            continue
         url = link.attrib.get('href')
         title = extract_text(link)
 
@@ -119,6 +122,8 @@ def response(resp):
 
         # append result
         results.append({'url': url, 'title': title, 'content': content})
+        # increment result pointer for the next iteration in this loop
+        i += 1
 
     # resolve all Bing redirections in parallel
     request_list = [
author	Ahmad Alkadri <ahmad.alkadri@outlook.com>	2023-01-08 19:12:52 +0100
committer	Markus Heiser <markus.heiser@darmarit.de>	2023-01-09 15:08:24 +0100
commit	7fc8d72889742399abb0d327e355ab350ddb395b (patch)
tree	557b602a3e5f1e6e6528511d2cf369238916693b /searx/engines
parent	a90ed481ed28ca35bdc9e694c355120ec2ccebb2 (diff)
download	searxng-7fc8d72889742399abb0d327e355ab350ddb395b.tar.gz searxng-7fc8d72889742399abb0d327e355ab350ddb395b.zip