aboutsummaryrefslogtreecommitdiff
path: root/scope.go
diff options
context:
space:
mode:
Diffstat (limited to 'scope.go')
-rw-r--r--scope.go28
1 files changed, 28 insertions, 0 deletions
diff --git a/scope.go b/scope.go
index a2c06b6..ccba5f5 100644
--- a/scope.go
+++ b/scope.go
@@ -3,6 +3,7 @@ package crawl
import (
"fmt"
"net/url"
+ "regexp"
"strings"
)
@@ -95,3 +96,30 @@ func NewSeedScope(seeds []*url.URL) Scope {
}
return NewURLPrefixScope(pfx)
}
+
+type regexpIgnoreScope struct {
+ ignores []*regexp.Regexp
+}
+
+func (s *regexpIgnoreScope) Check(uri *url.URL, depth int) bool {
+ uriStr := uri.String()
+ for _, i := range s.ignores {
+ if i.MatchString(uriStr) {
+ return false
+ }
+ }
+ return true
+}
+
+func NewRegexpIgnoreScope(ignores []string) Scope {
+ if ignores == nil {
+ ignores = defaultIgnorePatterns
+ }
+ r := regexpIgnoreScope{
+ ignores: make([]*regexp.Regexp, 0, len(ignores)),
+ }
+ for _, i := range ignores {
+ r.ignores = append(r.ignores, regexp.MustCompile(i))
+ }
+ return &r
+}