diff options
author | Markus Heiser <markus.heiser@darmarit.de> | 2020-10-01 09:44:29 +0200 |
---|---|---|
committer | Markus Heiser <markus.heiser@darmarit.de> | 2020-10-01 09:44:29 +0200 |
commit | 8162d7aff4a61801615d8044b71c7c477b2b2816 (patch) | |
tree | cceb122f6428e9b69182ccc67dedf5c0c1fe0bec /searx | |
parent | fd5fe369844e481aecc0d731b08ee8b29c9b47e6 (diff) | |
download | searxng-8162d7aff4a61801615d8044b71c7c477b2b2816.tar.gz searxng-8162d7aff4a61801615d8044b71c7c477b2b2816.zip |
[fix] google engine - div classes has been renamed in HTML reult
Since 1. October 2020 google has changed the 'class' attribute of the HTML
result page.
Fix the xpath expressions and ignore <div class="g" ../> sections which do not
match to title's xpath expression.
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx')
-rw-r--r-- | searx/engines/google.py | 17 |
1 files changed, 11 insertions, 6 deletions
diff --git a/searx/engines/google.py b/searx/engines/google.py index 7dc6b44ac..c9faadb6e 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -116,12 +116,12 @@ g_section_with_header = './g-section-with-header' # the title is a h3 tag relative to the result group title_xpath = './/h3[1]' -# in the result group there is <div class="r" ../> it's first child is a <a -# href=...> (on some results, the <a> is the first "descendant", not ""child") -href_xpath = './/div[@class="r"]//a/@href' +# in the result group there is <div class="yuRUbf" ../> it's first child is a <a +# href=...> +href_xpath = './/div[@class="yuRUbf"]//a/@href' -# in the result group there is <div class="s" ../> containing he *content* -content_xpath = './/div[@class="s"]' +# in the result group there is <div class="IsZvec" ../> containing he *content* +content_xpath = './/div[@class="IsZvec"]' # Suggestions are links placed in a *card-section*, we extract only the text # from the links not the links itself. @@ -249,7 +249,12 @@ def response(resp): continue try: - title = extract_text(eval_xpath(result, title_xpath)[0]) + title_tag = eval_xpath(result, title_xpath) + if not title_tag: + # this not one of the common google results *section* + logger.debug('ingoring <div class="g" ../> section: missing title') + continue + title = extract_text(title_tag[0]) url = eval_xpath(result, href_xpath)[0] content = extract_text_from_dom(result, content_xpath) results.append({ |