From 3897d5bbdcc9aa52d88b6602e3542e690ee74f6c Mon Sep 17 00:00:00 2001 From: Jordan Date: Thu, 10 Feb 2022 16:53:48 -0700 Subject: gen-ignores, ignore_patterns: update to exclude unsupported Perl syntax, backreferences --- gen-ignores.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'gen-ignores.py') diff --git a/gen-ignores.py b/gen-ignores.py index 25b3cac..ede0529 100755 --- a/gen-ignores.py +++ b/gen-ignores.py @@ -13,13 +13,16 @@ import os import sys archivebot_ignore_path = sys.argv[1] +unsupported = ['\\1', '\\2', '(?!', '(?='] + print 'package crawl\n\nvar defaultIgnorePatterns = []string{' + for fn in glob.glob(os.path.join(archivebot_ignore_path, '*.json')): try: with open(fn) as fd: print '\n\t// %s' % os.path.basename(fn) for p in json.load(fd)['patterns']: - if '\\\\1' in p or '(?!' in p: + if any(x in p for x in unsupported): # RE2 does not support backreferences or other # fancy PCRE constructs. This excludes <10 # patterns from the ignore list. @@ -27,5 +30,6 @@ for fn in glob.glob(os.path.join(archivebot_ignore_path, '*.json')): print '\t%s,' % json.dumps(p) except Exception, e: print >>sys.stderr, 'error in %s: %s' % (fn, e) + print '}' -- cgit v1.2.3-54-g00ecf