diff options
author | Adam Tauber <asciimoo@gmail.com> | 2014-10-19 12:41:04 +0200 |
---|---|---|
committer | Adam Tauber <asciimoo@gmail.com> | 2014-10-19 12:41:04 +0200 |
commit | 5740cfbf1cb468af74d0e7e1c9358ce702eb4f59 (patch) | |
tree | c93752a16cd24ce1b72e0f0520ed04d0e0f25c9e /searx/search.py | |
parent | b0fd71b7b3463864e6c60156610d383af2ac9709 (diff) | |
download | searxng-5740cfbf1cb468af74d0e7e1c9358ce702eb4f59.tar.gz searxng-5740cfbf1cb468af74d0e7e1c9358ce702eb4f59.zip |
[fix] pep8 part II.
Diffstat (limited to 'searx/search.py')
-rw-r--r-- | searx/search.py | 104 |
1 files changed, 67 insertions, 37 deletions
diff --git a/searx/search.py b/searx/search.py index 064c68844..f051d6df2 100644 --- a/searx/search.py +++ b/searx/search.py @@ -22,7 +22,7 @@ from datetime import datetime from operator import itemgetter from urlparse import urlparse, unquote from searx.engines import ( - categories, engines, engine_shortcuts + categories, engines ) from searx.languages import language_codes from searx.utils import gen_useragent @@ -39,7 +39,13 @@ def default_request_params(): # create a callback wrapper for the search engine results -def make_callback(engine_name, results, suggestions, answers, infoboxes, callback, params): +def make_callback(engine_name, + results, + suggestions, + answers, + infoboxes, + callback, + params): # creating a callback wrapper for the search engine results def process_callback(response, **kwargs): @@ -95,7 +101,7 @@ def make_callback(engine_name, results, suggestions, answers, infoboxes, callbac def content_result_len(content): if isinstance(content, basestring): content = re.sub('[,;:!?\./\\\\ ()-_]', '', content) - return len(content) + return len(content) else: return 0 @@ -126,7 +132,8 @@ def score_results(results): # strip multiple spaces and cariage returns from content if 'content' in res: - res['content'] = re.sub(' +', ' ', res['content'].strip().replace('\n', '')) + res['content'] = re.sub(' +', ' ', + res['content'].strip().replace('\n', '')) # get weight of this engine if possible if hasattr(engines[res['engine']], 'weight'): @@ -139,8 +146,12 @@ def score_results(results): duplicated = False for new_res in results: # remove / from the end of the url if required - p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path # noqa - p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path # noqa + p1 = res['parsed_url'].path[:-1]\ + if res['parsed_url'].path.endswith('/')\ + else res['parsed_url'].path + p2 = new_res['parsed_url'].path[:-1]\ + if new_res['parsed_url'].path.endswith('/')\ + else new_res['parsed_url'].path # check if that result is a duplicate if res['host'] == new_res['host'] and\ @@ -153,7 +164,8 @@ def score_results(results): # merge duplicates together if duplicated: # using content with more text - if content_result_len(res.get('content', '')) > content_result_len(duplicated.get('content', '')): + if content_result_len(res.get('content', '')) >\ + content_result_len(duplicated.get('content', '')): duplicated['content'] = res['content'] # increase result-score @@ -182,17 +194,25 @@ def score_results(results): for i, res in enumerate(results): # FIXME : handle more than one category per engine - category = engines[res['engine']].categories[0] + ':' + '' if 'template' not in res else res['template'] - - current = None if category not in categoryPositions else categoryPositions[category] - - # group with previous results using the same category if the group can accept more result and is not too far from the current position - if current != None and (current['count'] > 0) and (len(gresults) - current['index'] < 20): - # group with the previous results using the same category with this one + category = engines[res['engine']].categories[0] + ':' + ''\ + if 'template' not in res\ + else res['template'] + + current = None if category not in categoryPositions\ + else categoryPositions[category] + + # group with previous results using the same category + # if the group can accept more result and is not too far + # from the current position + if current is not None and (current['count'] > 0)\ + and (len(gresults) - current['index'] < 20): + # group with the previous results using + # the same category with this one index = current['index'] gresults.insert(index, res) - # update every index after the current one (including the current one) + # update every index after the current one + # (including the current one) for k in categoryPositions: v = categoryPositions[k]['index'] if v >= index: @@ -206,7 +226,7 @@ def score_results(results): gresults.append(res) # update categoryIndex - categoryPositions[category] = { 'index' : len(gresults), 'count' : 8 } + categoryPositions[category] = {'index': len(gresults), 'count': 8} # return gresults return gresults @@ -215,21 +235,21 @@ def score_results(results): def merge_two_infoboxes(infobox1, infobox2): if 'urls' in infobox2: urls1 = infobox1.get('urls', None) - if urls1 == None: + if urls1 is None: urls1 = [] infobox1.set('urls', urls1) urlSet = set() for url in infobox1.get('urls', []): urlSet.add(url.get('url', None)) - + for url in infobox2.get('urls', []): if url.get('url', None) not in urlSet: urls1.append(url) if 'attributes' in infobox2: attributes1 = infobox1.get('attributes', None) - if attributes1 == None: + if attributes1 is None: attributes1 = [] infobox1.set('attributes', attributes1) @@ -237,14 +257,14 @@ def merge_two_infoboxes(infobox1, infobox2): for attribute in infobox1.get('attributes', []): if attribute.get('label', None) not in attributeSet: attributeSet.add(attribute.get('label', None)) - + for attribute in infobox2.get('attributes', []): attributes1.append(attribute) if 'content' in infobox2: content1 = infobox1.get('content', None) content2 = infobox2.get('content', '') - if content1 != None: + if content1 is not None: if content_result_len(content2) > content_result_len(content1): infobox1['content'] = content2 else: @@ -257,12 +277,12 @@ def merge_infoboxes(infoboxes): for infobox in infoboxes: add_infobox = True infobox_id = infobox.get('id', None) - if infobox_id != None: + if infobox_id is not None: existingIndex = infoboxes_id.get(infobox_id, None) - if existingIndex != None: + if existingIndex is not None: merge_two_infoboxes(results[existingIndex], infobox) - add_infobox=False - + add_infobox = False + if add_infobox: results.append(infobox) infoboxes_id[infobox_id] = len(results)-1 @@ -318,7 +338,8 @@ class Search(object): self.pageno = int(pageno_param) - # parse query, if tags are set, which change the serch engine or search-language + # parse query, if tags are set, which change + # the serch engine or search-language query_obj = Query(self.request_data['q'], self.blocked_engines) query_obj.parse_query() @@ -334,25 +355,29 @@ class Search(object): self.categories = [] - # if engines are calculated from query, set categories by using that informations + # if engines are calculated from query, + # set categories by using that informations if self.engines: self.categories = list(set(engine['category'] for engine in self.engines)) - # otherwise, using defined categories to calculate which engines should be used + # otherwise, using defined categories to + # calculate which engines should be used else: # set used categories for pd_name, pd in self.request_data.items(): if pd_name.startswith('category_'): category = pd_name[9:] # if category is not found in list, skip - if not category in categories: + if category not in categories: continue # add category to list self.categories.append(category) - # if no category is specified for this search, using user-defined default-configuration which (is stored in cookie) + # if no category is specified for this search, + # using user-defined default-configuration which + # (is stored in cookie) if not self.categories: cookie_categories = request.cookies.get('categories', '') cookie_categories = cookie_categories.split(',') @@ -360,16 +385,18 @@ class Search(object): if ccateg in categories: self.categories.append(ccateg) - # if still no category is specified, using general as default-category + # if still no category is specified, using general + # as default-category if not self.categories: self.categories = ['general'] - # using all engines for that search, which are declared under the specific categories + # using all engines for that search, which are + # declared under the specific categories for categ in self.categories: self.engines.extend({'category': categ, 'name': x.name} for x in categories[categ] - if not x.name in self.blocked_engines) + if x.name not in self.blocked_engines) # do search-request def search(self, request): @@ -386,7 +413,7 @@ class Search(object): number_of_searches += 1 # set default useragent - #user_agent = request.headers.get('User-Agent', '') + # user_agent = request.headers.get('User-Agent', '') user_agent = gen_useragent() # start search-reqest for all selected engines @@ -400,7 +427,8 @@ class Search(object): if self.pageno > 1 and not engine.paging: continue - # if search-language is set and engine does not provide language-support, skip + # if search-language is set and engine does not + # provide language-support, skip if self.lang != 'all' and not engine.language_support: continue @@ -412,7 +440,8 @@ class Search(object): request_params['pageno'] = self.pageno request_params['language'] = self.lang - # update request parameters dependent on search-engine (contained in engines folder) + # update request parameters dependent on + # search-engine (contained in engines folder) request_params = engine.request(self.query.encode('utf-8'), request_params) @@ -431,7 +460,8 @@ class Search(object): request_params ) - # create dictionary which contain all informations about the request + # create dictionary which contain all + # informations about the request request_args = dict( headers=request_params['headers'], hooks=dict(response=callback), |