diff options
author | Noemi Vanyi <sitbackandwait@gmail.com> | 2016-07-17 18:42:30 +0200 |
---|---|---|
committer | Noemi Vanyi <sitbackandwait@gmail.com> | 2016-07-25 23:19:46 +0200 |
commit | 93c0c49e9aba719c8c8e6b171e0dd515a586d32b (patch) | |
tree | 061d9394011fc36a805f5e04c227962fc1372dfb /searx/engines | |
parent | 3a9c3fbd68486ad8c5d98038f16f29f8f233f7a3 (diff) | |
download | searxng-93c0c49e9aba719c8c8e6b171e0dd515a586d32b.tar.gz searxng-93c0c49e9aba719c8c8e6b171e0dd515a586d32b.zip |
add time range search with yahoo
Diffstat (limited to 'searx/engines')
-rw-r--r-- | searx/engines/__init__.py | 3 | ||||
-rw-r--r-- | searx/engines/yahoo.py | 33 |
2 files changed, 27 insertions, 9 deletions
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 099baa587..2c735a188 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -42,7 +42,8 @@ engine_default_args = {'paging': False, 'shortcut': '-', 'disabled': False, 'suspend_end_time': 0, - 'continuous_errors': 0} + 'continuous_errors': 0, + 'time_range_support': False} def load_module(filename): diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py index b8b40e4aa..2334614cb 100644 --- a/searx/engines/yahoo.py +++ b/searx/engines/yahoo.py @@ -20,10 +20,12 @@ from searx.engines.xpath import extract_text, extract_url categories = ['general'] paging = True language_support = True +time_range_support = True # search-url base_url = 'https://search.yahoo.com/' search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}' +search_url_with_time = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}&age={age}&btf={btf}&fr2=time' # specific xpath variables results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]" @@ -32,6 +34,9 @@ title_xpath = './/h3/a' content_xpath = './/div[@class="compText aAbs"]' suggestion_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' AlsoTry ')]//a" +time_range_dict = {'day': ['1d', 'd'], + 'week': ['1w', 'w'], + 'month': ['1m', 'm']} # remove yahoo-specific tracking-url def parse_url(url_string): @@ -51,18 +56,30 @@ def parse_url(url_string): return unquote(url_string[start:end]) +def _get_url(query, offset, language, time_range): + if time_range: + return base_url + search_url_with_time.format(offset=offset, + query=urlencode({'p': query}), + lang=language, + age=time_range_dict[time_range][0], + btf=time_range_dict[time_range][1]) + return base_url + search_url.format(offset=offset, + query=urlencode({'p': query}), + lang=language) + + +def _get_language(params): + if params['language'] == 'all': + return 'en' + return params['language'].split('_')[0] + + # do search-request def request(query, params): offset = (params['pageno'] - 1) * 10 + 1 + language = _get_language(params) - if params['language'] == 'all': - language = 'en' - else: - language = params['language'].split('_')[0] - - params['url'] = base_url + search_url.format(offset=offset, - query=urlencode({'p': query}), - lang=language) + params['url'] = _get_url(query, offset, language, params['time_range']) # TODO required? params['cookies']['sB'] = 'fl=1&vl=lang_{lang}&sh=1&rw=new&v=1'\ |