1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
|
package analysis
import (
"fmt"
"io"
"io/ioutil"
"net/http"
"net/url"
"os"
"strings"
"testing"
"github.com/google/go-cmp/cmp"
)
func makeResponse(ctype, body string) *http.Response {
u, _ := url.Parse("https://example.com/")
r := &http.Response{
Header: make(http.Header),
Body: ioutil.NopCloser(strings.NewReader(body)),
Request: &http.Request{
URL: u,
},
}
r.Header.Set("Content-Type", ctype)
return r
}
type testdata struct {
ctype string
body string
expectedLinks []string
}
func (td *testdata) runTestCase() error {
r, err := ioutil.TempFile("temp", "crawl")
if err != nil {
return err
}
defer os.Remove(r.Name())
w, err := os.OpenFile(r.Name(), os.O_RDWR, 0777)
if err != nil {
return err
}
resp := makeResponse(td.ctype, td.body)
if _, err := io.Copy(w, resp.Body); err != nil {
return err
}
if _, err := r.Seek(0, io.SeekStart); err != nil {
return err
}
links, err := GetLinks(resp, r)
if err != nil {
return fmt.Errorf("GetLinks() error: %v", err)
}
var linkStr []string
for _, l := range links {
linkStr = append(linkStr, l.URL.String())
}
if diff := cmp.Diff(td.expectedLinks, linkStr); diff != "" {
return fmt.Errorf("unexpected result:\n%s", diff)
}
return nil
}
var tests = []testdata{
{
"text/html",
`
<html><body>
<a href="/link1">link</a>
</body></html>
`,
[]string{
"https://example.com/link1",
},
},
{
"text/html",
`
<html><head><style type="text/css">
body { background: url('/link1'); }
</style></head>
<body></body></html>
`,
[]string{
"https://example.com/link1",
},
},
}
func TestLinks(t *testing.T) {
for _, tt := range tests {
if err := tt.runTestCase(); err != nil {
t.Error(err)
}
}
}
|