diff options
author | Adam Tauber <asciimoo@gmail.com> | 2016-08-13 00:43:21 +0200 |
---|---|---|
committer | Adam Tauber <asciimoo@gmail.com> | 2016-08-13 01:13:41 +0200 |
commit | eb57481450e950cb057f97858d29dd3b01960120 (patch) | |
tree | f31d3d1e99375e27079497bb824bc06797d2ab6d /searx | |
parent | d6fe37f9ff0f4ea291869d540b220de6f02b5e33 (diff) | |
download | searxng-eb57481450e950cb057f97858d29dd3b01960120.tar.gz searxng-eb57481450e950cb057f97858d29dd3b01960120.zip |
[fix] google images paging - closes #571
Diffstat (limited to 'searx')
-rw-r--r-- | searx/engines/google_images.py | 34 |
1 files changed, 23 insertions, 11 deletions
diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index 090d44704..77bdc13b2 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -19,13 +19,16 @@ categories = ['images'] paging = True safesearch = True time_range_support = True +number_of_results = 100 search_url = 'https://www.google.com/search'\ '?{query}'\ + '&asearch=ichunk'\ + '&async=_id:rg_s,_pms:s'\ '&tbm=isch'\ - '&ijn=1'\ - '&start={offset}' -time_range_search = "&tbs=qdr:{range}" + '&yv=2'\ + '&{search_options}' +time_range_attr = "qdr:{range}" time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm'} @@ -33,16 +36,20 @@ time_range_dict = {'day': 'd', # do search-request def request(query, params): - offset = (params['pageno'] - 1) * 100 - params['url'] = search_url.format(query=urlencode({'q': query}), - offset=offset, - safesearch=safesearch) + search_options = { + 'ijn': params['pageno'] - 1, + 'start': (params['pageno'] - 1) * number_of_results + } + if params['time_range'] in time_range_dict: - params['url'] += time_range_search.format(range=time_range_dict[params['time_range']]) + search_options['tbs'] = time_range_attr.format(range=time_range_dict[params['time_range']]) if safesearch and params['safesearch']: - params['url'] += '&' + urlencode({'safe': 'active'}) + search_options['safe'] = 'on' + + params['url'] = search_url.format(query=urlencode({'q': query}), + search_options=urlencode(search_options)) return params @@ -51,12 +58,17 @@ def request(query, params): def response(resp): results = [] - dom = html.fromstring(resp.text) + g_result = loads(resp.text) + + dom = html.fromstring(g_result[1][1]) # parse results for result in dom.xpath('//div[@data-ved]'): - metadata = loads(result.xpath('./div[@class="rg_meta"]/text()')[0]) + try: + metadata = loads(''.join(result.xpath('./div[@class="rg_meta"]/text()'))) + except: + continue thumbnail_src = metadata['tu'] |