summaryrefslogtreecommitdiff
path: root/searx/search.py
diff options
context:
space:
mode:
authorAdam Tauber <asciimoo@gmail.com>2014-10-19 12:41:04 +0200
committerAdam Tauber <asciimoo@gmail.com>2014-10-19 12:41:04 +0200
commit5740cfbf1cb468af74d0e7e1c9358ce702eb4f59 (patch)
treec93752a16cd24ce1b72e0f0520ed04d0e0f25c9e /searx/search.py
parentb0fd71b7b3463864e6c60156610d383af2ac9709 (diff)
downloadsearxng-5740cfbf1cb468af74d0e7e1c9358ce702eb4f59.tar.gz
searxng-5740cfbf1cb468af74d0e7e1c9358ce702eb4f59.zip
[fix] pep8 part II.
Diffstat (limited to 'searx/search.py')
-rw-r--r--searx/search.py104
1 files changed, 67 insertions, 37 deletions
diff --git a/searx/search.py b/searx/search.py
index 064c68844..f051d6df2 100644
--- a/searx/search.py
+++ b/searx/search.py
@@ -22,7 +22,7 @@ from datetime import datetime
from operator import itemgetter
from urlparse import urlparse, unquote
from searx.engines import (
- categories, engines, engine_shortcuts
+ categories, engines
)
from searx.languages import language_codes
from searx.utils import gen_useragent
@@ -39,7 +39,13 @@ def default_request_params():
# create a callback wrapper for the search engine results
-def make_callback(engine_name, results, suggestions, answers, infoboxes, callback, params):
+def make_callback(engine_name,
+ results,
+ suggestions,
+ answers,
+ infoboxes,
+ callback,
+ params):
# creating a callback wrapper for the search engine results
def process_callback(response, **kwargs):
@@ -95,7 +101,7 @@ def make_callback(engine_name, results, suggestions, answers, infoboxes, callbac
def content_result_len(content):
if isinstance(content, basestring):
content = re.sub('[,;:!?\./\\\\ ()-_]', '', content)
- return len(content)
+ return len(content)
else:
return 0
@@ -126,7 +132,8 @@ def score_results(results):
# strip multiple spaces and cariage returns from content
if 'content' in res:
- res['content'] = re.sub(' +', ' ', res['content'].strip().replace('\n', ''))
+ res['content'] = re.sub(' +', ' ',
+ res['content'].strip().replace('\n', ''))
# get weight of this engine if possible
if hasattr(engines[res['engine']], 'weight'):
@@ -139,8 +146,12 @@ def score_results(results):
duplicated = False
for new_res in results:
# remove / from the end of the url if required
- p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path # noqa
- p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path # noqa
+ p1 = res['parsed_url'].path[:-1]\
+ if res['parsed_url'].path.endswith('/')\
+ else res['parsed_url'].path
+ p2 = new_res['parsed_url'].path[:-1]\
+ if new_res['parsed_url'].path.endswith('/')\
+ else new_res['parsed_url'].path
# check if that result is a duplicate
if res['host'] == new_res['host'] and\
@@ -153,7 +164,8 @@ def score_results(results):
# merge duplicates together
if duplicated:
# using content with more text
- if content_result_len(res.get('content', '')) > content_result_len(duplicated.get('content', '')):
+ if content_result_len(res.get('content', '')) >\
+ content_result_len(duplicated.get('content', '')):
duplicated['content'] = res['content']
# increase result-score
@@ -182,17 +194,25 @@ def score_results(results):
for i, res in enumerate(results):
# FIXME : handle more than one category per engine
- category = engines[res['engine']].categories[0] + ':' + '' if 'template' not in res else res['template']
-
- current = None if category not in categoryPositions else categoryPositions[category]
-
- # group with previous results using the same category if the group can accept more result and is not too far from the current position
- if current != None and (current['count'] > 0) and (len(gresults) - current['index'] < 20):
- # group with the previous results using the same category with this one
+ category = engines[res['engine']].categories[0] + ':' + ''\
+ if 'template' not in res\
+ else res['template']
+
+ current = None if category not in categoryPositions\
+ else categoryPositions[category]
+
+ # group with previous results using the same category
+ # if the group can accept more result and is not too far
+ # from the current position
+ if current is not None and (current['count'] > 0)\
+ and (len(gresults) - current['index'] < 20):
+ # group with the previous results using
+ # the same category with this one
index = current['index']
gresults.insert(index, res)
- # update every index after the current one (including the current one)
+ # update every index after the current one
+ # (including the current one)
for k in categoryPositions:
v = categoryPositions[k]['index']
if v >= index:
@@ -206,7 +226,7 @@ def score_results(results):
gresults.append(res)
# update categoryIndex
- categoryPositions[category] = { 'index' : len(gresults), 'count' : 8 }
+ categoryPositions[category] = {'index': len(gresults), 'count': 8}
# return gresults
return gresults
@@ -215,21 +235,21 @@ def score_results(results):
def merge_two_infoboxes(infobox1, infobox2):
if 'urls' in infobox2:
urls1 = infobox1.get('urls', None)
- if urls1 == None:
+ if urls1 is None:
urls1 = []
infobox1.set('urls', urls1)
urlSet = set()
for url in infobox1.get('urls', []):
urlSet.add(url.get('url', None))
-
+
for url in infobox2.get('urls', []):
if url.get('url', None) not in urlSet:
urls1.append(url)
if 'attributes' in infobox2:
attributes1 = infobox1.get('attributes', None)
- if attributes1 == None:
+ if attributes1 is None:
attributes1 = []
infobox1.set('attributes', attributes1)
@@ -237,14 +257,14 @@ def merge_two_infoboxes(infobox1, infobox2):
for attribute in infobox1.get('attributes', []):
if attribute.get('label', None) not in attributeSet:
attributeSet.add(attribute.get('label', None))
-
+
for attribute in infobox2.get('attributes', []):
attributes1.append(attribute)
if 'content' in infobox2:
content1 = infobox1.get('content', None)
content2 = infobox2.get('content', '')
- if content1 != None:
+ if content1 is not None:
if content_result_len(content2) > content_result_len(content1):
infobox1['content'] = content2
else:
@@ -257,12 +277,12 @@ def merge_infoboxes(infoboxes):
for infobox in infoboxes:
add_infobox = True
infobox_id = infobox.get('id', None)
- if infobox_id != None:
+ if infobox_id is not None:
existingIndex = infoboxes_id.get(infobox_id, None)
- if existingIndex != None:
+ if existingIndex is not None:
merge_two_infoboxes(results[existingIndex], infobox)
- add_infobox=False
-
+ add_infobox = False
+
if add_infobox:
results.append(infobox)
infoboxes_id[infobox_id] = len(results)-1
@@ -318,7 +338,8 @@ class Search(object):
self.pageno = int(pageno_param)
- # parse query, if tags are set, which change the serch engine or search-language
+ # parse query, if tags are set, which change
+ # the serch engine or search-language
query_obj = Query(self.request_data['q'], self.blocked_engines)
query_obj.parse_query()
@@ -334,25 +355,29 @@ class Search(object):
self.categories = []
- # if engines are calculated from query, set categories by using that informations
+ # if engines are calculated from query,
+ # set categories by using that informations
if self.engines:
self.categories = list(set(engine['category']
for engine in self.engines))
- # otherwise, using defined categories to calculate which engines should be used
+ # otherwise, using defined categories to
+ # calculate which engines should be used
else:
# set used categories
for pd_name, pd in self.request_data.items():
if pd_name.startswith('category_'):
category = pd_name[9:]
# if category is not found in list, skip
- if not category in categories:
+ if category not in categories:
continue
# add category to list
self.categories.append(category)
- # if no category is specified for this search, using user-defined default-configuration which (is stored in cookie)
+ # if no category is specified for this search,
+ # using user-defined default-configuration which
+ # (is stored in cookie)
if not self.categories:
cookie_categories = request.cookies.get('categories', '')
cookie_categories = cookie_categories.split(',')
@@ -360,16 +385,18 @@ class Search(object):
if ccateg in categories:
self.categories.append(ccateg)
- # if still no category is specified, using general as default-category
+ # if still no category is specified, using general
+ # as default-category
if not self.categories:
self.categories = ['general']
- # using all engines for that search, which are declared under the specific categories
+ # using all engines for that search, which are
+ # declared under the specific categories
for categ in self.categories:
self.engines.extend({'category': categ,
'name': x.name}
for x in categories[categ]
- if not x.name in self.blocked_engines)
+ if x.name not in self.blocked_engines)
# do search-request
def search(self, request):
@@ -386,7 +413,7 @@ class Search(object):
number_of_searches += 1
# set default useragent
- #user_agent = request.headers.get('User-Agent', '')
+ # user_agent = request.headers.get('User-Agent', '')
user_agent = gen_useragent()
# start search-reqest for all selected engines
@@ -400,7 +427,8 @@ class Search(object):
if self.pageno > 1 and not engine.paging:
continue
- # if search-language is set and engine does not provide language-support, skip
+ # if search-language is set and engine does not
+ # provide language-support, skip
if self.lang != 'all' and not engine.language_support:
continue
@@ -412,7 +440,8 @@ class Search(object):
request_params['pageno'] = self.pageno
request_params['language'] = self.lang
- # update request parameters dependent on search-engine (contained in engines folder)
+ # update request parameters dependent on
+ # search-engine (contained in engines folder)
request_params = engine.request(self.query.encode('utf-8'),
request_params)
@@ -431,7 +460,8 @@ class Search(object):
request_params
)
- # create dictionary which contain all informations about the request
+ # create dictionary which contain all
+ # informations about the request
request_args = dict(
headers=request_params['headers'],
hooks=dict(response=callback),