diff options
author | ale <ale@incal.net> | 2018-09-02 11:16:49 +0100 |
---|---|---|
committer | ale <ale@incal.net> | 2018-09-02 11:16:49 +0100 |
commit | 66ce654d5be9c26ba69cc75ac12ff6662410c69d (patch) | |
tree | 484ce9f4d6444dbfe0a9595e8cfae5e6bf4e337c /scope.go | |
parent | a5d20a9a30397cf2ddc900fc58f66ce8f515f769 (diff) | |
download | crawl-66ce654d5be9c26ba69cc75ac12ff6662410c69d.tar.gz crawl-66ce654d5be9c26ba69cc75ac12ff6662410c69d.zip |
Add --exclude and --exclude-file options
Allow users to add to the exclude regexp lists easily.
Diffstat (limited to 'scope.go')
-rw-r--r-- | scope.go | 22 |
1 files changed, 12 insertions, 10 deletions
@@ -115,19 +115,21 @@ func (s *regexpIgnoreScope) Check(link Outlink, depth int) bool { return true } +func compileDefaultIgnorePatterns() []*regexp.Regexp { + out := make([]*regexp.Regexp, 0, len(defaultIgnorePatterns)) + for _, p := range defaultIgnorePatterns { + out = append(out, regexp.MustCompile(p)) + } + return out +} + // NewRegexpIgnoreScope returns a Scope that filters out URLs // according to a list of regular expressions. -func NewRegexpIgnoreScope(ignores []string) Scope { - if ignores == nil { - ignores = defaultIgnorePatterns - } - r := regexpIgnoreScope{ - ignores: make([]*regexp.Regexp, 0, len(ignores)), - } - for _, i := range ignores { - r.ignores = append(r.ignores, regexp.MustCompile(i)) +func NewRegexpIgnoreScope(ignores []*regexp.Regexp) Scope { + ignores = append(compileDefaultIgnorePatterns(), ignores...) + return ®expIgnoreScope{ + ignores: ignores, } - return &r } // NewIncludeRelatedScope always includes resources with TagRelated. |