aboutsummaryrefslogtreecommitdiff
path: root/gen-ignores.py
diff options
context:
space:
mode:
authorJordan <me@jordan.im>2022-02-10 16:53:48 -0700
committerJordan <me@jordan.im>2022-02-10 16:53:48 -0700
commit3897d5bbdcc9aa52d88b6602e3542e690ee74f6c (patch)
tree4ec03fcd6ee70964e0f032762f98f9d484d6e775 /gen-ignores.py
parent07f4f6e08341ba60a7c49ed55c8e2682147b5156 (diff)
downloadcrawl-3897d5bbdcc9aa52d88b6602e3542e690ee74f6c.tar.gz
crawl-3897d5bbdcc9aa52d88b6602e3542e690ee74f6c.zip
gen-ignores, ignore_patterns: update to exclude unsupported Perl syntax, backreferences
Diffstat (limited to 'gen-ignores.py')
-rwxr-xr-xgen-ignores.py6
1 files changed, 5 insertions, 1 deletions
diff --git a/gen-ignores.py b/gen-ignores.py
index 25b3cac..ede0529 100755
--- a/gen-ignores.py
+++ b/gen-ignores.py
@@ -13,13 +13,16 @@ import os
import sys
archivebot_ignore_path = sys.argv[1]
+unsupported = ['\\1', '\\2', '(?!', '(?=']
+
print 'package crawl\n\nvar defaultIgnorePatterns = []string{'
+
for fn in glob.glob(os.path.join(archivebot_ignore_path, '*.json')):
try:
with open(fn) as fd:
print '\n\t// %s' % os.path.basename(fn)
for p in json.load(fd)['patterns']:
- if '\\\\1' in p or '(?!' in p:
+ if any(x in p for x in unsupported):
# RE2 does not support backreferences or other
# fancy PCRE constructs. This excludes <10
# patterns from the ignore list.
@@ -27,5 +30,6 @@ for fn in glob.glob(os.path.join(archivebot_ignore_path, '*.json')):
print '\t%s,' % json.dumps(p)
except Exception, e:
print >>sys.stderr, 'error in %s: %s' % (fn, e)
+
print '}'