summaryrefslogtreecommitdiff
path: root/searx
diff options
context:
space:
mode:
authorAdam Tauber <asciimoo@gmail.com>2014-05-20 01:16:49 +0200
committerAdam Tauber <asciimoo@gmail.com>2014-05-20 01:16:49 +0200
commitb226e6462b1a8fa18ee670f3f2738145426b6f41 (patch)
tree393bb0879d6e90b6391f31debf194b3c4386f3b0 /searx
parent78d42f094cd17c4152518a592cbc28c432a1ef22 (diff)
downloadsearxng-b226e6462b1a8fa18ee670f3f2738145426b6f41.tar.gz
searxng-b226e6462b1a8fa18ee670f3f2738145426b6f41.zip
[fix] www. domain duplications
Diffstat (limited to 'searx')
-rw-r--r--searx/engines/__init__.py10
1 files changed, 9 insertions, 1 deletions
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index 72e537423..31e28216c 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -154,16 +154,24 @@ def score_results(results):
# deduplication + scoring
for i, res in enumerate(flat_res):
res['parsed_url'] = urlparse(res['url'])
+ res['host'] = res['parsed_url'].netloc
+
+ if res['host'].startswith('www.'):
+ res['host'] = res['host'].replace('www.', '', 1)
+
res['engines'] = [res['engine']]
weight = 1.0
+
if hasattr(engines[res['engine']], 'weight'):
weight = float(engines[res['engine']].weight)
+
score = int((flat_len - i) / engines_len) * weight + 1
duplicated = False
+
for new_res in results:
p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path # noqa
p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path # noqa
- if res['parsed_url'].netloc == new_res['parsed_url'].netloc and\
+ if res['host'] == new_res['host'] and\
p1 == p2 and\
res['parsed_url'].query == new_res['parsed_url'].query and\
res.get('template') == new_res.get('template'):