aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorale <ale@incal.net>2014-12-20 11:58:12 +0000
committerale <ale@incal.net>2014-12-20 11:58:12 +0000
commitc31b2d91e508e7ec1d9640f92fa07ee6575c46e6 (patch)
tree2787a4a40f798eb262113bcdf83bd506da345994
parent4c82422d2e75b9b8f4d034b1f43fda566416d6af (diff)
downloadcrawl-c31b2d91e508e7ec1d9640f92fa07ee6575c46e6.tar.gz
crawl-c31b2d91e508e7ec1d9640f92fa07ee6575c46e6.zip
add tests to scope.go
-rw-r--r--scope.go3
-rw-r--r--scope_test.go66
2 files changed, 69 insertions, 0 deletions
diff --git a/scope.go b/scope.go
index de909f4..a2c06b6 100644
--- a/scope.go
+++ b/scope.go
@@ -57,6 +57,9 @@ func (m URLPrefixMap) Add(uri *url.URL) {
func (m URLPrefixMap) Contains(uri *url.URL) bool {
s := strings.TrimPrefix(uri.Host, "www.")
+ if _, ok := m[s]; ok {
+ return true
+ }
for _, p := range strings.Split(uri.Path, "/") {
if p == "" {
continue
diff --git a/scope_test.go b/scope_test.go
new file mode 100644
index 0000000..bccf93c
--- /dev/null
+++ b/scope_test.go
@@ -0,0 +1,66 @@
+package crawl
+
+import (
+ "net/url"
+ "testing"
+)
+
+func mustParseURL(s string) *url.URL {
+ u, _ := url.Parse(s)
+ return u
+}
+
+type testScopeEntry struct {
+ uri string
+ depth int
+ expected bool
+}
+
+func runScopeTest(t *testing.T, sc Scope, testdata []testScopeEntry) {
+ for _, td := range testdata {
+ uri := mustParseURL(td.uri)
+ result := sc.Check(uri, td.depth)
+ if result != td.expected {
+ t.Errorf("Check(%s, %d) -> got %v, want %v", td.uri, td.depth, result, td.expected)
+ }
+ }
+}
+
+func TestDepthScope(t *testing.T) {
+ td := []testScopeEntry{
+ {"http://example.com", 1, true},
+ {"http://example.com", 10, false},
+ {"http://example.com", 100, false},
+ }
+ runScopeTest(t, NewDepthScope(10), td)
+}
+
+func TestSchemeScope(t *testing.T) {
+ td := []testScopeEntry{
+ {"http://example.com", 0, true},
+ {"https://example.com", 0, false},
+ {"ftp://example.com", 0, false},
+ }
+ runScopeTest(t, NewSchemeScope([]string{"http"}), td)
+}
+
+func TestURLPrefixScope(t *testing.T) {
+ td := []testScopeEntry{
+ {"http://example1.com", 0, true},
+ {"http://example1.com/", 0, true},
+ {"http://example1.com/some/path/", 0, true},
+ {"http://www.example1.com", 0, true},
+ {"http://subdomain.example1.com", 0, false},
+
+ {"http://example2.com", 0, false},
+ {"http://example2.com/", 0, false},
+ {"http://www.example2.com", 0, false},
+ {"http://example2.com/allowed/path/is/ok", 0, true},
+ {"http://example2.com/allowed/path", 0, true},
+ {"http://example2.com/another/path/is/not/ok", 0, false},
+ }
+ pfx := make(URLPrefixMap)
+ pfx.Add(mustParseURL("http://example1.com"))
+ pfx.Add(mustParseURL("http://example2.com/allowed/path/"))
+ runScopeTest(t, NewURLPrefixScope(pfx), td)
+}