summaryrefslogtreecommitdiff
path: root/searx/plugins/oa_doi_rewrite.py
diff options
context:
space:
mode:
authorjibe-b <user701@orange.fr>2017-09-22 23:43:05 +0200
committerNoémi Ványi <sitbackandwait@gmail.com>2017-11-01 14:22:26 +0100
commit575159b194440052d7b48aa073d7e03c80799c90 (patch)
tree693e1db791842058d11c6ddb2cb181bb64b10da3 /searx/plugins/oa_doi_rewrite.py
parent7de8b43eb2081853ae15b2a52cc0cae43647320b (diff)
downloadsearxng-575159b194440052d7b48aa073d7e03c80799c90.tar.gz
searxng-575159b194440052d7b48aa073d7e03c80799c90.zip
[enh] oa_doi_rewrite plugin broadens doai_rewrite
Diffstat (limited to 'searx/plugins/oa_doi_rewrite.py')
-rw-r--r--searx/plugins/oa_doi_rewrite.py45
1 files changed, 45 insertions, 0 deletions
diff --git a/searx/plugins/oa_doi_rewrite.py b/searx/plugins/oa_doi_rewrite.py
new file mode 100644
index 000000000..e952c86f5
--- /dev/null
+++ b/searx/plugins/oa_doi_rewrite.py
@@ -0,0 +1,45 @@
+from flask_babel import gettext
+import re
+from searx.url_utils import urlparse, parse_qsl
+from flask import request
+from searx import settings
+
+
+regex = re.compile(r'10\.\d{4,9}/[^\s]+')
+
+name = gettext('Open Access DOI rewrite')
+description = gettext('Avoid paywalls by redirecting to open-access versions of publications when available')
+default_on = False
+preference_section = 'privacy'
+
+doi_resolvers = settings['doi_resolvers']
+
+
+def extract_doi(url):
+ match = regex.search(url.path)
+ if match:
+ return match.group(0)
+ for _, v in parse_qsl(url.query):
+ match = regex.search(v)
+ if match:
+ return match.group(0)
+ return None
+
+
+def get_doi_resolver():
+ doi_resolvers = settings['doi_resolvers']
+ doi_resolver = request.args.get('doi_resolver', request.preferences.get_value('doi_resolver'))[0]
+ if doi_resolver not in doi_resolvers:
+ doi_resolvers = settings['default_doi_resolver']
+ return doi_resolvers[doi_resolver]
+
+
+def on_result(request, search, result):
+ doi = extract_doi(result['parsed_url'])
+ if doi and len(doi) < 50:
+ for suffix in ('/', '.pdf', '/full', '/meta', '/abstract'):
+ if doi.endswith(suffix):
+ doi = doi[:-len(suffix)]
+ result['url'] = get_doi_resolver() + doi
+ result['parsed_url'] = urlparse(result['url'])
+ return True