diff options
author | Markus Heiser <markus.heiser@darmarIT.de> | 2019-12-10 13:10:51 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-12-10 13:10:51 +0000 |
commit | 7beb49b1fb6f4bea5f4b99b853ab30ac47505790 (patch) | |
tree | f9756e0a1304d07bd6e90a23d19c128ed39086e5 | |
parent | 1b90e1403ba832672d8bde02e15a20834ee02e0e (diff) | |
parent | e9311ee77658607ad6d607950850e83c5af38d64 (diff) | |
download | searxng-7beb49b1fb6f4bea5f4b99b853ab30ac47505790.tar.gz searxng-7beb49b1fb6f4bea5f4b99b853ab30ac47505790.zip |
Merge branch 'master' into boilerplate
-rw-r--r-- | AUTHORS.rst | 4 | ||||
-rw-r--r-- | searx/engines/google.py | 21 | ||||
-rw-r--r-- | tests/unit/engines/test_google.py | 117 |
3 files changed, 47 insertions, 95 deletions
diff --git a/AUTHORS.rst b/AUTHORS.rst index 674bfd758..2a2f19219 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -1,4 +1,4 @@ -Searx was created by Adam Tauber and is maintained by Adam Tauber, Alexandre Flament and Noémi Ványi. +Searx was created by Adam Tauber and is maintained by Adam Tauber, Alexandre Flament, Noémi Ványi, @pofilo and Markus Heiser. Major contributing authors: @@ -9,6 +9,8 @@ Major contributing authors: - @Cqoicebordel - Noémi Ványi - Marc Abonce Seguin @a01200356 +- @pofilo +- Markus Heiser @return42 People who have submitted patches/translates, reported bugs, consulted features or generally made searx better: diff --git a/searx/engines/google.py b/searx/engines/google.py index 19bde710d..eed3a044e 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -107,13 +107,12 @@ images_path = '/images' supported_languages_url = 'https://www.google.com/preferences?#languages' # specific xpath variables -results_xpath = '//div[@class="g"]' -url_xpath = './/h3/a/@href' -title_xpath = './/h3' -content_xpath = './/span[@class="st"]' -content_misc_xpath = './/div[@class="f slp"]' -suggestion_xpath = '//p[@class="_Bmc"]' -spelling_suggestion_xpath = '//a[@class="spell"]' +results_xpath = '//div[contains(@class, "ZINbbc")]' +url_xpath = './/div[@class="kCrYT"][1]/a/@href' +title_xpath = './/div[@class="kCrYT"][1]/a/div[1]' +content_xpath = './/div[@class="kCrYT"][2]//div[contains(@class, "BNeawe")]//div[contains(@class, "BNeawe")]' +suggestion_xpath = '//div[contains(@class, "ZINbbc")][last()]//div[@class="rVLSBd"]/a//div[contains(@class, "BNeawe")]' +spelling_suggestion_xpath = '//div[@id="scc"]//a' # map : detail location map_address_xpath = './/div[@class="s"]//table//td[2]/span/text()' @@ -199,10 +198,6 @@ def request(query, params): params['headers']['Accept-Language'] = language + ',' + language + '-' + country params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' - # Force Safari 3.1 on Mac OS X (Leopard) user agent to avoid loading the new UI that Searx can't parse - params['headers']['User-Agent'] = ("Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_4)" - "AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.20.1") - params['google_hostname'] = google_hostname return params @@ -274,9 +269,7 @@ def response(resp): content = extract_text_from_dom(result, content_xpath) if content is None: continue - content_misc = extract_text_from_dom(result, content_misc_xpath) - if content_misc is not None: - content = content_misc + "<br />" + content + # append result results.append({'url': url, 'title': title, diff --git a/tests/unit/engines/test_google.py b/tests/unit/engines/test_google.py index a73e9d2be..9d0edd439 100644 --- a/tests/unit/engines/test_google.py +++ b/tests/unit/engines/test_google.py @@ -58,93 +58,50 @@ class TestGoogleEngine(SearxTestCase): self.assertEqual(google.response(response), []) html = """ - <div class="g"> - <h3 class="r"> - <a href="http://this.should.be.the.link/"> - <b>This</b> is <b>the</b> title - </a> - </h3> - <div class="s"> - <div class="kv" style="margin-bottom:2px"> - <cite> - <b>test</b>.psychologies.com/ - </cite> - <div class="_nBb"> - <div style="display:inline" onclick="google.sham(this);" aria-expanded="false" - aria-haspopup="true" tabindex="0" data-ved="0CBUQ7B0wAA"> - <span class="_O0"> - </span> + <div class="ZINbbc xpd O9g5cc uUPGi"> + <div> + <div class="kCrYT"> + <a href="/url?q=http://this.should.be.the.link/"> + <div class="BNeawe"> + <b>This</b> is <b>the</b> title </div> - <div style="display:none" class="am-dropdown-menu" role="menu" tabindex="-1"> - <ul> - <li class="_Ykb"> - <a class="_Zkb" href="http://www.google.fr/url?url=http://webcache.googleusercontent - .com/search%3Fcache:R1Z_4pGXjuIJ:http://test.psychologies.com/"> - En cache - </a> - </li> - <li class="_Ykb"> - <a class="_Zkb" href="/search?safe=off&q=related:test.psy.com/"> - Pages similaires - </a> - </li> - </ul> + <div class="BNeawe"> + http://website + </div> + </a> + </div> + <div class="kCrYT"> + <div> + <div class="BNeawe"> + <div> + <div class="BNeawe"> + This should be the content. + </div> + </div> </div> </div> </div> - <span class="st"> - This should be the content. - </span> - <br> - <div class="osl"> - <a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/"> - Test Personnalité - </a> - - <a href="http://www.google.fr/url?url=http://test.psychologies.com/test/"> - Tests - Moi - </a> - - <a href="http://www.google.fr/url?url=http://test.psychologies.com/test/tests-couple"> - Test Couple - </a> - - - <a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/tests-amour"> - Test Amour + </div> + </p> + <div class="ZINbbc xpd O9g5cc uUPGi"> + <div> + <div class="kCrYT"> + <span> + <div class="BNeawe"> + Related searches + </div> + </span> + </div> + <div class="rVLSBd"> + <a> + <div> + <div class="BNeawe"> + suggestion title + </div> + </div> </a> </div> </div> - </div> - <div class="g"> - <h3 class="r"> - <a href="http://www.google.com/images?q=toto"> - <b>This</b> - </a> - </h3> - </div> - <div class="g"> - <h3 class="r"> - <a href="http://www.google.com/search?q=toto"> - <b>This</b> is - </a> - </h3> - </div> - <div class="g"> - <h3 class="r"> - <a href="€"> - <b>This</b> is <b>the</b> - </a> - </h3> - </div> - <div class="g"> - <h3 class="r"> - <a href="/url?q=url"> - <b>This</b> is <b>the</b> - </a> - </h3> - </div> - <p class="_Bmc" style="margin:3px 8px"> - <a href="/search?num=20&safe=off&q=t&revid=1754833769&sa=X&ei=-&ved="> - suggestion <b>title</b> - </a> </p> """ response = self.mock_response(html) |