summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarIT.de>2019-12-10 13:10:51 +0000
committerGitHub <noreply@github.com>2019-12-10 13:10:51 +0000
commit7beb49b1fb6f4bea5f4b99b853ab30ac47505790 (patch)
treef9756e0a1304d07bd6e90a23d19c128ed39086e5
parent1b90e1403ba832672d8bde02e15a20834ee02e0e (diff)
parente9311ee77658607ad6d607950850e83c5af38d64 (diff)
downloadsearxng-7beb49b1fb6f4bea5f4b99b853ab30ac47505790.tar.gz
searxng-7beb49b1fb6f4bea5f4b99b853ab30ac47505790.zip
Merge branch 'master' into boilerplate
-rw-r--r--AUTHORS.rst4
-rw-r--r--searx/engines/google.py21
-rw-r--r--tests/unit/engines/test_google.py117
3 files changed, 47 insertions, 95 deletions
diff --git a/AUTHORS.rst b/AUTHORS.rst
index 674bfd758..2a2f19219 100644
--- a/AUTHORS.rst
+++ b/AUTHORS.rst
@@ -1,4 +1,4 @@
-Searx was created by Adam Tauber and is maintained by Adam Tauber, Alexandre Flament and Noémi Ványi.
+Searx was created by Adam Tauber and is maintained by Adam Tauber, Alexandre Flament, Noémi Ványi, @pofilo and Markus Heiser.
Major contributing authors:
@@ -9,6 +9,8 @@ Major contributing authors:
- @Cqoicebordel
- Noémi Ványi
- Marc Abonce Seguin @a01200356
+- @pofilo
+- Markus Heiser @return42
People who have submitted patches/translates, reported bugs, consulted features or
generally made searx better:
diff --git a/searx/engines/google.py b/searx/engines/google.py
index 19bde710d..eed3a044e 100644
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -107,13 +107,12 @@ images_path = '/images'
supported_languages_url = 'https://www.google.com/preferences?#languages'
# specific xpath variables
-results_xpath = '//div[@class="g"]'
-url_xpath = './/h3/a/@href'
-title_xpath = './/h3'
-content_xpath = './/span[@class="st"]'
-content_misc_xpath = './/div[@class="f slp"]'
-suggestion_xpath = '//p[@class="_Bmc"]'
-spelling_suggestion_xpath = '//a[@class="spell"]'
+results_xpath = '//div[contains(@class, "ZINbbc")]'
+url_xpath = './/div[@class="kCrYT"][1]/a/@href'
+title_xpath = './/div[@class="kCrYT"][1]/a/div[1]'
+content_xpath = './/div[@class="kCrYT"][2]//div[contains(@class, "BNeawe")]//div[contains(@class, "BNeawe")]'
+suggestion_xpath = '//div[contains(@class, "ZINbbc")][last()]//div[@class="rVLSBd"]/a//div[contains(@class, "BNeawe")]'
+spelling_suggestion_xpath = '//div[@id="scc"]//a'
# map : detail location
map_address_xpath = './/div[@class="s"]//table//td[2]/span/text()'
@@ -199,10 +198,6 @@ def request(query, params):
params['headers']['Accept-Language'] = language + ',' + language + '-' + country
params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
- # Force Safari 3.1 on Mac OS X (Leopard) user agent to avoid loading the new UI that Searx can't parse
- params['headers']['User-Agent'] = ("Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_4)"
- "AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.20.1")
-
params['google_hostname'] = google_hostname
return params
@@ -274,9 +269,7 @@ def response(resp):
content = extract_text_from_dom(result, content_xpath)
if content is None:
continue
- content_misc = extract_text_from_dom(result, content_misc_xpath)
- if content_misc is not None:
- content = content_misc + "<br />" + content
+
# append result
results.append({'url': url,
'title': title,
diff --git a/tests/unit/engines/test_google.py b/tests/unit/engines/test_google.py
index a73e9d2be..9d0edd439 100644
--- a/tests/unit/engines/test_google.py
+++ b/tests/unit/engines/test_google.py
@@ -58,93 +58,50 @@ class TestGoogleEngine(SearxTestCase):
self.assertEqual(google.response(response), [])
html = """
- <div class="g">
- <h3 class="r">
- <a href="http://this.should.be.the.link/">
- <b>This</b> is <b>the</b> title
- </a>
- </h3>
- <div class="s">
- <div class="kv" style="margin-bottom:2px">
- <cite>
- <b>test</b>.psychologies.com/
- </cite>
- <div class="_nBb">‎
- <div style="display:inline" onclick="google.sham(this);" aria-expanded="false"
- aria-haspopup="true" tabindex="0" data-ved="0CBUQ7B0wAA">
- <span class="_O0">
- </span>
+ <div class="ZINbbc xpd O9g5cc uUPGi">
+ <div>
+ <div class="kCrYT">
+ <a href="/url?q=http://this.should.be.the.link/">
+ <div class="BNeawe">
+ <b>This</b> is <b>the</b> title
</div>
- <div style="display:none" class="am-dropdown-menu" role="menu" tabindex="-1">
- <ul>
- <li class="_Ykb">
- <a class="_Zkb" href="http://www.google.fr/url?url=http://webcache.googleusercontent
- .com/search%3Fcache:R1Z_4pGXjuIJ:http://test.psychologies.com/">
- En cache
- </a>
- </li>
- <li class="_Ykb">
- <a class="_Zkb" href="/search?safe=off&amp;q=related:test.psy.com/">
- Pages similaires
- </a>
- </li>
- </ul>
+ <div class="BNeawe">
+ http://website
+ </div>
+ </a>
+ </div>
+ <div class="kCrYT">
+ <div>
+ <div class="BNeawe">
+ <div>
+ <div class="BNeawe">
+ This should be the content.
+ </div>
+ </div>
</div>
</div>
</div>
- <span class="st">
- This should be the content.
- </span>
- <br>
- <div class="osl">‎
- <a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/">
- Test Personnalité
- </a> - ‎
- <a href="http://www.google.fr/url?url=http://test.psychologies.com/test/">
- Tests - Moi
- </a> - ‎
- <a href="http://www.google.fr/url?url=http://test.psychologies.com/test/tests-couple">
- Test Couple
- </a>
- - ‎
- <a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/tests-amour">
- Test Amour
+ </div>
+ </p>
+ <div class="ZINbbc xpd O9g5cc uUPGi">
+ <div>
+ <div class="kCrYT">
+ <span>
+ <div class="BNeawe">
+ Related searches
+ </div>
+ </span>
+ </div>
+ <div class="rVLSBd">
+ <a>
+ <div>
+ <div class="BNeawe">
+ suggestion title
+ </div>
+ </div>
</a>
</div>
</div>
- </div>
- <div class="g">
- <h3 class="r">
- <a href="http://www.google.com/images?q=toto">
- <b>This</b>
- </a>
- </h3>
- </div>
- <div class="g">
- <h3 class="r">
- <a href="http://www.google.com/search?q=toto">
- <b>This</b> is
- </a>
- </h3>
- </div>
- <div class="g">
- <h3 class="r">
- <a href="€">
- <b>This</b> is <b>the</b>
- </a>
- </h3>
- </div>
- <div class="g">
- <h3 class="r">
- <a href="/url?q=url">
- <b>This</b> is <b>the</b>
- </a>
- </h3>
- </div>
- <p class="_Bmc" style="margin:3px 8px">
- <a href="/search?num=20&amp;safe=off&amp;q=t&amp;revid=1754833769&amp;sa=X&amp;ei=-&amp;ved=">
- suggestion <b>title</b>
- </a>
</p>
"""
response = self.mock_response(html)