aboutsummaryrefslogtreecommitdiff
path: root/urlesc
diff options
context:
space:
mode:
authorJordan <me@jordan.im>2022-01-15 10:09:56 -0700
committerJordan <me@jordan.im>2022-01-15 10:09:56 -0700
commit4a92c26fbc739734ce5227c1be8efd440306cebf (patch)
tree3a009e9ae3543725a597b77a838d0b41690b2e0e /urlesc
parent995a8fad1461734108b5266f51a3c913936c7203 (diff)
downloadkeep-4a92c26fbc739734ce5227c1be8efd440306cebf.tar.gz
keep-4a92c26fbc739734ce5227c1be8efd440306cebf.zip
keep, archive: normalize URLs (RFC 3986), improve logging
Diffstat (limited to 'urlesc')
-rw-r--r--urlesc/LICENSE27
-rw-r--r--urlesc/urlesc.go180
-rw-r--r--urlesc/urlesc_test.go641
3 files changed, 848 insertions, 0 deletions
diff --git a/urlesc/LICENSE b/urlesc/LICENSE
new file mode 100644
index 0000000..7448756
--- /dev/null
+++ b/urlesc/LICENSE
@@ -0,0 +1,27 @@
+Copyright (c) 2012 The Go Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+ * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/urlesc/urlesc.go b/urlesc/urlesc.go
new file mode 100644
index 0000000..1b84624
--- /dev/null
+++ b/urlesc/urlesc.go
@@ -0,0 +1,180 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package urlesc implements query escaping as per RFC 3986.
+// It contains some parts of the net/url package, modified so as to allow
+// some reserved characters incorrectly escaped by net/url.
+// See https://github.com/golang/go/issues/5684
+package urlesc
+
+import (
+ "bytes"
+ "net/url"
+ "strings"
+)
+
+type encoding int
+
+const (
+ encodePath encoding = 1 + iota
+ encodeUserPassword
+ encodeQueryComponent
+ encodeFragment
+)
+
+// Return true if the specified character should be escaped when
+// appearing in a URL string, according to RFC 3986.
+func shouldEscape(c byte, mode encoding) bool {
+ // §2.3 Unreserved characters (alphanum)
+ if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' {
+ return false
+ }
+
+ switch c {
+ case '-', '.', '_', '~': // §2.3 Unreserved characters (mark)
+ return false
+
+ // §2.2 Reserved characters (reserved)
+ case ':', '/', '?', '#', '[', ']', '@', // gen-delims
+ '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=': // sub-delims
+ // Different sections of the URL allow a few of
+ // the reserved characters to appear unescaped.
+ switch mode {
+ case encodePath: // §3.3
+ // The RFC allows sub-delims and : @.
+ // '/', '[' and ']' can be used to assign meaning to individual path
+ // segments. This package only manipulates the path as a whole,
+ // so we allow those as well. That leaves only ? and # to escape.
+ return c == '?' || c == '#'
+
+ case encodeUserPassword: // §3.2.1
+ // The RFC allows : and sub-delims in
+ // userinfo. The parsing of userinfo treats ':' as special so we must escape
+ // all the gen-delims.
+ return c == ':' || c == '/' || c == '?' || c == '#' || c == '[' || c == ']' || c == '@'
+
+ case encodeQueryComponent: // §3.4
+ // The RFC allows / and ?.
+ return c != '/' && c != '?'
+
+ case encodeFragment: // §4.1
+ // The RFC text is silent but the grammar allows
+ // everything, so escape nothing but #
+ return c == '#'
+ }
+ }
+
+ // Everything else must be escaped.
+ return true
+}
+
+// QueryEscape escapes the string so it can be safely placed
+// inside a URL query.
+func QueryEscape(s string) string {
+ return escape(s, encodeQueryComponent)
+}
+
+func escape(s string, mode encoding) string {
+ spaceCount, hexCount := 0, 0
+ for i := 0; i < len(s); i++ {
+ c := s[i]
+ if shouldEscape(c, mode) {
+ if c == ' ' && mode == encodeQueryComponent {
+ spaceCount++
+ } else {
+ hexCount++
+ }
+ }
+ }
+
+ if spaceCount == 0 && hexCount == 0 {
+ return s
+ }
+
+ t := make([]byte, len(s)+2*hexCount)
+ j := 0
+ for i := 0; i < len(s); i++ {
+ switch c := s[i]; {
+ case c == ' ' && mode == encodeQueryComponent:
+ t[j] = '+'
+ j++
+ case shouldEscape(c, mode):
+ t[j] = '%'
+ t[j+1] = "0123456789ABCDEF"[c>>4]
+ t[j+2] = "0123456789ABCDEF"[c&15]
+ j += 3
+ default:
+ t[j] = s[i]
+ j++
+ }
+ }
+ return string(t)
+}
+
+var uiReplacer = strings.NewReplacer(
+ "%21", "!",
+ "%27", "'",
+ "%28", "(",
+ "%29", ")",
+ "%2A", "*",
+)
+
+// unescapeUserinfo unescapes some characters that need not to be escaped as per RFC3986.
+func unescapeUserinfo(s string) string {
+ return uiReplacer.Replace(s)
+}
+
+// Escape reassembles the URL into a valid URL string.
+// The general form of the result is one of:
+//
+// scheme:opaque
+// scheme://userinfo@host/path?query#fragment
+//
+// If u.Opaque is non-empty, String uses the first form;
+// otherwise it uses the second form.
+//
+// In the second form, the following rules apply:
+// - if u.Scheme is empty, scheme: is omitted.
+// - if u.User is nil, userinfo@ is omitted.
+// - if u.Host is empty, host/ is omitted.
+// - if u.Scheme and u.Host are empty and u.User is nil,
+// the entire scheme://userinfo@host/ is omitted.
+// - if u.Host is non-empty and u.Path begins with a /,
+// the form host/path does not add its own /.
+// - if u.RawQuery is empty, ?query is omitted.
+// - if u.Fragment is empty, #fragment is omitted.
+func Escape(u *url.URL) string {
+ var buf bytes.Buffer
+ if u.Scheme != "" {
+ buf.WriteString(u.Scheme)
+ buf.WriteByte(':')
+ }
+ if u.Opaque != "" {
+ buf.WriteString(u.Opaque)
+ } else {
+ if u.Scheme != "" || u.Host != "" || u.User != nil {
+ buf.WriteString("//")
+ if ui := u.User; ui != nil {
+ buf.WriteString(unescapeUserinfo(ui.String()))
+ buf.WriteByte('@')
+ }
+ if h := u.Host; h != "" {
+ buf.WriteString(h)
+ }
+ }
+ if u.Path != "" && u.Path[0] != '/' && u.Host != "" {
+ buf.WriteByte('/')
+ }
+ buf.WriteString(escape(u.Path, encodePath))
+ }
+ if u.RawQuery != "" {
+ buf.WriteByte('?')
+ buf.WriteString(u.RawQuery)
+ }
+ if u.Fragment != "" {
+ buf.WriteByte('#')
+ buf.WriteString(escape(u.Fragment, encodeFragment))
+ }
+ return buf.String()
+}
diff --git a/urlesc/urlesc_test.go b/urlesc/urlesc_test.go
new file mode 100644
index 0000000..45202e1
--- /dev/null
+++ b/urlesc/urlesc_test.go
@@ -0,0 +1,641 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package urlesc
+
+import (
+ "net/url"
+ "testing"
+)
+
+type URLTest struct {
+ in string
+ out *url.URL
+ roundtrip string // expected result of reserializing the URL; empty means same as "in".
+}
+
+var urltests = []URLTest{
+ // no path
+ {
+ "http://www.google.com",
+ &url.URL{
+ Scheme: "http",
+ Host: "www.google.com",
+ },
+ "",
+ },
+ // path
+ {
+ "http://www.google.com/",
+ &url.URL{
+ Scheme: "http",
+ Host: "www.google.com",
+ Path: "/",
+ },
+ "",
+ },
+ // path with hex escaping
+ {
+ "http://www.google.com/file%20one%26two",
+ &url.URL{
+ Scheme: "http",
+ Host: "www.google.com",
+ Path: "/file one&two",
+ },
+ "http://www.google.com/file%20one&two",
+ },
+ // user
+ {
+ "ftp://webmaster@www.google.com/",
+ &url.URL{
+ Scheme: "ftp",
+ User: url.User("webmaster"),
+ Host: "www.google.com",
+ Path: "/",
+ },
+ "",
+ },
+ // escape sequence in username
+ {
+ "ftp://john%20doe@www.google.com/",
+ &url.URL{
+ Scheme: "ftp",
+ User: url.User("john doe"),
+ Host: "www.google.com",
+ Path: "/",
+ },
+ "ftp://john%20doe@www.google.com/",
+ },
+ // query
+ {
+ "http://www.google.com/?q=go+language",
+ &url.URL{
+ Scheme: "http",
+ Host: "www.google.com",
+ Path: "/",
+ RawQuery: "q=go+language",
+ },
+ "",
+ },
+ // query with hex escaping: NOT parsed
+ {
+ "http://www.google.com/?q=go%20language",
+ &url.URL{
+ Scheme: "http",
+ Host: "www.google.com",
+ Path: "/",
+ RawQuery: "q=go%20language",
+ },
+ "",
+ },
+ // %20 outside query
+ {
+ "http://www.google.com/a%20b?q=c+d",
+ &url.URL{
+ Scheme: "http",
+ Host: "www.google.com",
+ Path: "/a b",
+ RawQuery: "q=c+d",
+ },
+ "",
+ },
+ // path without leading /, so no parsing
+ {
+ "http:www.google.com/?q=go+language",
+ &url.URL{
+ Scheme: "http",
+ Opaque: "www.google.com/",
+ RawQuery: "q=go+language",
+ },
+ "http:www.google.com/?q=go+language",
+ },
+ // path without leading /, so no parsing
+ {
+ "http:%2f%2fwww.google.com/?q=go+language",
+ &url.URL{
+ Scheme: "http",
+ Opaque: "%2f%2fwww.google.com/",
+ RawQuery: "q=go+language",
+ },
+ "http:%2f%2fwww.google.com/?q=go+language",
+ },
+ // non-authority with path
+ {
+ "mailto:/webmaster@golang.org",
+ &url.URL{
+ Scheme: "mailto",
+ Path: "/webmaster@golang.org",
+ },
+ "mailto:///webmaster@golang.org", // unfortunate compromise
+ },
+ // non-authority
+ {
+ "mailto:webmaster@golang.org",
+ &url.URL{
+ Scheme: "mailto",
+ Opaque: "webmaster@golang.org",
+ },
+ "",
+ },
+ // unescaped :// in query should not create a scheme
+ {
+ "/foo?query=http://bad",
+ &url.URL{
+ Path: "/foo",
+ RawQuery: "query=http://bad",
+ },
+ "",
+ },
+ // leading // without scheme should create an authority
+ {
+ "//foo",
+ &url.URL{
+ Host: "foo",
+ },
+ "",
+ },
+ // leading // without scheme, with userinfo, path, and query
+ {
+ "//user@foo/path?a=b",
+ &url.URL{
+ User: url.User("user"),
+ Host: "foo",
+ Path: "/path",
+ RawQuery: "a=b",
+ },
+ "",
+ },
+ // Three leading slashes isn't an authority, but doesn't return an error.
+ // (We can't return an error, as this code is also used via
+ // ServeHTTP -> ReadRequest -> Parse, which is arguably a
+ // different URL parsing context, but currently shares the
+ // same codepath)
+ {
+ "///threeslashes",
+ &url.URL{
+ Path: "///threeslashes",
+ },
+ "",
+ },
+ {
+ "http://user:password@google.com",
+ &url.URL{
+ Scheme: "http",
+ User: url.UserPassword("user", "password"),
+ Host: "google.com",
+ },
+ "http://user:password@google.com",
+ },
+ // unescaped @ in username should not confuse host
+ {
+ "http://j@ne:password@google.com",
+ &url.URL{
+ Scheme: "http",
+ User: url.UserPassword("j@ne", "password"),
+ Host: "google.com",
+ },
+ "http://j%40ne:password@google.com",
+ },
+ // unescaped @ in password should not confuse host
+ {
+ "http://jane:p@ssword@google.com",
+ &url.URL{
+ Scheme: "http",
+ User: url.UserPassword("jane", "p@ssword"),
+ Host: "google.com",
+ },
+ "http://jane:p%40ssword@google.com",
+ },
+ {
+ "http://j@ne:password@google.com/p@th?q=@go",
+ &url.URL{
+ Scheme: "http",
+ User: url.UserPassword("j@ne", "password"),
+ Host: "google.com",
+ Path: "/p@th",
+ RawQuery: "q=@go",
+ },
+ "http://j%40ne:password@google.com/p@th?q=@go",
+ },
+ {
+ "http://www.google.com/?q=go+language#foo",
+ &url.URL{
+ Scheme: "http",
+ Host: "www.google.com",
+ Path: "/",
+ RawQuery: "q=go+language",
+ Fragment: "foo",
+ },
+ "",
+ },
+ {
+ "http://www.google.com/?q=go+language#foo%26bar",
+ &url.URL{
+ Scheme: "http",
+ Host: "www.google.com",
+ Path: "/",
+ RawQuery: "q=go+language",
+ Fragment: "foo&bar",
+ },
+ "http://www.google.com/?q=go+language#foo&bar",
+ },
+ {
+ "file:///home/adg/rabbits",
+ &url.URL{
+ Scheme: "file",
+ Host: "",
+ Path: "/home/adg/rabbits",
+ },
+ "file:///home/adg/rabbits",
+ },
+ // "Windows" paths are no exception to the rule.
+ // See golang.org/issue/6027, especially comment #9.
+ {
+ "file:///C:/FooBar/Baz.txt",
+ &url.URL{
+ Scheme: "file",
+ Host: "",
+ Path: "/C:/FooBar/Baz.txt",
+ },
+ "file:///C:/FooBar/Baz.txt",
+ },
+ // case-insensitive scheme
+ {
+ "MaIlTo:webmaster@golang.org",
+ &url.URL{
+ Scheme: "mailto",
+ Opaque: "webmaster@golang.org",
+ },
+ "mailto:webmaster@golang.org",
+ },
+ // Relative path
+ {
+ "a/b/c",
+ &url.URL{
+ Path: "a/b/c",
+ },
+ "a/b/c",
+ },
+ // escaped '?' in username and password
+ {
+ "http://%3Fam:pa%3Fsword@google.com",
+ &url.URL{
+ Scheme: "http",
+ User: url.UserPassword("?am", "pa?sword"),
+ Host: "google.com",
+ },
+ "",
+ },
+ // escaped '?' and '#' in path
+ {
+ "http://example.com/%3F%23",
+ &url.URL{
+ Scheme: "http",
+ Host: "example.com",
+ Path: "?#",
+ },
+ "",
+ },
+ // unescaped [ ] ! ' ( ) * in path
+ {
+ "http://example.com/[]!'()*",
+ &url.URL{
+ Scheme: "http",
+ Host: "example.com",
+ Path: "[]!'()*",
+ },
+ "http://example.com/[]!'()*",
+ },
+ // escaped : / ? # [ ] @ in username and password
+ {
+ "http://%3A%2F%3F:%23%5B%5D%40@example.com",
+ &url.URL{
+ Scheme: "http",
+ User: url.UserPassword(":/?", "#[]@"),
+ Host: "example.com",
+ },
+ "",
+ },
+ // unescaped ! $ & ' ( ) * + , ; = in username and password
+ {
+ "http://!$&'():*+,;=@example.com",
+ &url.URL{
+ Scheme: "http",
+ User: url.UserPassword("!$&'()", "*+,;="),
+ Host: "example.com",
+ },
+ "",
+ },
+ // unescaped = : / . ? = in query component
+ {
+ "http://example.com/?q=http://google.com/?q=",
+ &url.URL{
+ Scheme: "http",
+ Host: "example.com",
+ Path: "/",
+ RawQuery: "q=http://google.com/?q=",
+ },
+ "",
+ },
+ // unescaped : / ? [ ] @ ! $ & ' ( ) * + , ; = in fragment
+ {
+ "http://example.com/#:/?%23[]@!$&'()*+,;=",
+ &url.URL{
+ Scheme: "http",
+ Host: "example.com",
+ Path: "/",
+ Fragment: ":/?#[]@!$&'()*+,;=",
+ },
+ "",
+ },
+}
+
+func DoTestString(t *testing.T, parse func(string) (*url.URL, error), name string, tests []URLTest) {
+ for _, tt := range tests {
+ u, err := parse(tt.in)
+ if err != nil {
+ t.Errorf("%s(%q) returned error %s", name, tt.in, err)
+ continue
+ }
+ expected := tt.in
+ if len(tt.roundtrip) > 0 {
+ expected = tt.roundtrip
+ }
+ s := Escape(u)
+ if s != expected {
+ t.Errorf("Escape(%s(%q)) == %q (expected %q)", name, tt.in, s, expected)
+ }
+ }
+}
+
+func TestURLString(t *testing.T) {
+ DoTestString(t, url.Parse, "Parse", urltests)
+
+ // no leading slash on path should prepend
+ // slash on String() call
+ noslash := URLTest{
+ "http://www.google.com/search",
+ &url.URL{
+ Scheme: "http",
+ Host: "www.google.com",
+ Path: "search",
+ },
+ "",
+ }
+ s := Escape(noslash.out)
+ if s != noslash.in {
+ t.Errorf("Expected %s; go %s", noslash.in, s)
+ }
+}
+
+type EscapeTest struct {
+ in string
+ out string
+ err error
+}
+
+var escapeTests = []EscapeTest{
+ {
+ "",
+ "",
+ nil,
+ },
+ {
+ "abc",
+ "abc",
+ nil,
+ },
+ {
+ "one two",
+ "one+two",
+ nil,
+ },
+ {
+ "10%",
+ "10%25",
+ nil,
+ },
+ {
+ " ?&=#+%!<>#\"{}|\\^[]`☺\t:/@$'()*,;",
+ "+?%26%3D%23%2B%25%21%3C%3E%23%22%7B%7D%7C%5C%5E%5B%5D%60%E2%98%BA%09%3A/%40%24%27%28%29%2A%2C%3B",
+ nil,
+ },
+}
+
+func TestEscape(t *testing.T) {
+ for _, tt := range escapeTests {
+ actual := QueryEscape(tt.in)
+ if tt.out != actual {
+ t.Errorf("QueryEscape(%q) = %q, want %q", tt.in, actual, tt.out)
+ }
+
+ // for bonus points, verify that escape:unescape is an identity.
+ roundtrip, err := url.QueryUnescape(actual)
+ if roundtrip != tt.in || err != nil {
+ t.Errorf("QueryUnescape(%q) = %q, %s; want %q, %s", actual, roundtrip, err, tt.in, "[no error]")
+ }
+ }
+}
+
+var resolveReferenceTests = []struct {
+ base, rel, expected string
+}{
+ // Absolute URL references
+ {"http://foo.com?a=b", "https://bar.com/", "https://bar.com/"},
+ {"http://foo.com/", "https://bar.com/?a=b", "https://bar.com/?a=b"},
+ {"http://foo.com/bar", "mailto:foo@example.com", "mailto:foo@example.com"},
+
+ // Path-absolute references
+ {"http://foo.com/bar", "/baz", "http://foo.com/baz"},
+ {"http://foo.com/bar?a=b#f", "/baz", "http://foo.com/baz"},
+ {"http://foo.com/bar?a=b", "/baz?c=d", "http://foo.com/baz?c=d"},
+
+ // Scheme-relative
+ {"https://foo.com/bar?a=b", "//bar.com/quux", "https://bar.com/quux"},
+
+ // Path-relative references:
+
+ // ... current directory
+ {"http://foo.com", ".", "http://foo.com/"},
+ {"http://foo.com/bar", ".", "http://foo.com/"},
+ {"http://foo.com/bar/", ".", "http://foo.com/bar/"},
+
+ // ... going down
+ {"http://foo.com", "bar", "http://foo.com/bar"},
+ {"http://foo.com/", "bar", "http://foo.com/bar"},
+ {"http://foo.com/bar/baz", "quux", "http://foo.com/bar/quux"},
+
+ // ... going up
+ {"http://foo.com/bar/baz", "../quux", "http://foo.com/quux"},
+ {"http://foo.com/bar/baz", "../../../../../quux", "http://foo.com/quux"},
+ {"http://foo.com/bar", "..", "http://foo.com/"},
+ {"http://foo.com/bar/baz", "./..", "http://foo.com/"},
+ // ".." in the middle (issue 3560)
+ {"http://foo.com/bar/baz", "quux/dotdot/../tail", "http://foo.com/bar/quux/tail"},
+ {"http://foo.com/bar/baz", "quux/./dotdot/../tail", "http://foo.com/bar/quux/tail"},
+ {"http://foo.com/bar/baz", "quux/./dotdot/.././tail", "http://foo.com/bar/quux/tail"},
+ {"http://foo.com/bar/baz", "quux/./dotdot/./../tail", "http://foo.com/bar/quux/tail"},
+ {"http://foo.com/bar/baz", "quux/./dotdot/dotdot/././../../tail", "http://foo.com/bar/quux/tail"},
+ {"http://foo.com/bar/baz", "quux/./dotdot/dotdot/./.././../tail", "http://foo.com/bar/quux/tail"},
+ {"http://foo.com/bar/baz", "quux/./dotdot/dotdot/dotdot/./../../.././././tail", "http://foo.com/bar/quux/tail"},
+ {"http://foo.com/bar/baz", "quux/./dotdot/../dotdot/../dot/./tail/..", "http://foo.com/bar/quux/dot/"},
+
+ // Remove any dot-segments prior to forming the target URI.
+ // http://tools.ietf.org/html/rfc3986#section-5.2.4
+ {"http://foo.com/dot/./dotdot/../foo/bar", "../baz", "http://foo.com/dot/baz"},
+
+ // Triple dot isn't special
+ {"http://foo.com/bar", "...", "http://foo.com/..."},
+
+ // Fragment
+ {"http://foo.com/bar", ".#frag", "http://foo.com/#frag"},
+
+ // RFC 3986: Normal Examples
+ // http://tools.ietf.org/html/rfc3986#section-5.4.1
+ {"http://a/b/c/d;p?q", "g:h", "g:h"},
+ {"http://a/b/c/d;p?q", "g", "http://a/b/c/g"},
+ {"http://a/b/c/d;p?q", "./g", "http://a/b/c/g"},
+ {"http://a/b/c/d;p?q", "g/", "http://a/b/c/g/"},
+ {"http://a/b/c/d;p?q", "/g", "http://a/g"},
+ {"http://a/b/c/d;p?q", "//g", "http://g"},
+ {"http://a/b/c/d;p?q", "?y", "http://a/b/c/d;p?y"},
+ {"http://a/b/c/d;p?q", "g?y", "http://a/b/c/g?y"},
+ {"http://a/b/c/d;p?q", "#s", "http://a/b/c/d;p?q#s"},
+ {"http://a/b/c/d;p?q", "g#s", "http://a/b/c/g#s"},
+ {"http://a/b/c/d;p?q", "g?y#s", "http://a/b/c/g?y#s"},
+ {"http://a/b/c/d;p?q", ";x", "http://a/b/c/;x"},
+ {"http://a/b/c/d;p?q", "g;x", "http://a/b/c/g;x"},
+ {"http://a/b/c/d;p?q", "g;x?y#s", "http://a/b/c/g;x?y#s"},
+ {"http://a/b/c/d;p?q", "", "http://a/b/c/d;p?q"},
+ {"http://a/b/c/d;p?q", ".", "http://a/b/c/"},
+ {"http://a/b/c/d;p?q", "./", "http://a/b/c/"},
+ {"http://a/b/c/d;p?q", "..", "http://a/b/"},
+ {"http://a/b/c/d;p?q", "../", "http://a/b/"},
+ {"http://a/b/c/d;p?q", "../g", "http://a/b/g"},
+ {"http://a/b/c/d;p?q", "../..", "http://a/"},
+ {"http://a/b/c/d;p?q", "../../", "http://a/"},
+ {"http://a/b/c/d;p?q", "../../g", "http://a/g"},
+
+ // RFC 3986: Abnormal Examples
+ // http://tools.ietf.org/html/rfc3986#section-5.4.2
+ {"http://a/b/c/d;p?q", "../../../g", "http://a/g"},
+ {"http://a/b/c/d;p?q", "../../../../g", "http://a/g"},
+ {"http://a/b/c/d;p?q", "/./g", "http://a/g"},
+ {"http://a/b/c/d;p?q", "/../g", "http://a/g"},
+ {"http://a/b/c/d;p?q", "g.", "http://a/b/c/g."},
+ {"http://a/b/c/d;p?q", ".g", "http://a/b/c/.g"},
+ {"http://a/b/c/d;p?q", "g..", "http://a/b/c/g.."},
+ {"http://a/b/c/d;p?q", "..g", "http://a/b/c/..g"},
+ {"http://a/b/c/d;p?q", "./../g", "http://a/b/g"},
+ {"http://a/b/c/d;p?q", "./g/.", "http://a/b/c/g/"},
+ {"http://a/b/c/d;p?q", "g/./h", "http://a/b/c/g/h"},
+ {"http://a/b/c/d;p?q", "g/../h", "http://a/b/c/h"},
+ {"http://a/b/c/d;p?q", "g;x=1/./y", "http://a/b/c/g;x=1/y"},
+ {"http://a/b/c/d;p?q", "g;x=1/../y", "http://a/b/c/y"},
+ {"http://a/b/c/d;p?q", "g?y/./x", "http://a/b/c/g?y/./x"},
+ {"http://a/b/c/d;p?q", "g?y/../x", "http://a/b/c/g?y/../x"},
+ {"http://a/b/c/d;p?q", "g#s/./x", "http://a/b/c/g#s/./x"},
+ {"http://a/b/c/d;p?q", "g#s/../x", "http://a/b/c/g#s/../x"},
+
+ // Extras.
+ {"https://a/b/c/d;p?q", "//g?q", "https://g?q"},
+ {"https://a/b/c/d;p?q", "//g#s", "https://g#s"},
+ {"https://a/b/c/d;p?q", "//g/d/e/f?y#s", "https://g/d/e/f?y#s"},
+ {"https://a/b/c/d;p#s", "?y", "https://a/b/c/d;p?y"},
+ {"https://a/b/c/d;p?q#s", "?y", "https://a/b/c/d;p?y"},
+}
+
+func TestResolveReference(t *testing.T) {
+ mustParse := func(url_ string) *url.URL {
+ u, err := url.Parse(url_)
+ if err != nil {
+ t.Fatalf("Expected URL to parse: %q, got error: %v", url_, err)
+ }
+ return u
+ }
+ opaque := &url.URL{Scheme: "scheme", Opaque: "opaque"}
+ for _, test := range resolveReferenceTests {
+ base := mustParse(test.base)
+ rel := mustParse(test.rel)
+ url := base.ResolveReference(rel)
+ if Escape(url) != test.expected {
+ t.Errorf("URL(%q).ResolveReference(%q) == %q, got %q", test.base, test.rel, test.expected, Escape(url))
+ }
+ // Ensure that new instances are returned.
+ if base == url {
+ t.Errorf("Expected URL.ResolveReference to return new URL instance.")
+ }
+ // Test the convenience wrapper too.
+ url, err := base.Parse(test.rel)
+ if err != nil {
+ t.Errorf("URL(%q).Parse(%q) failed: %v", test.base, test.rel, err)
+ } else if Escape(url) != test.expected {
+ t.Errorf("URL(%q).Parse(%q) == %q, got %q", test.base, test.rel, test.expected, Escape(url))
+ } else if base == url {
+ // Ensure that new instances are returned for the wrapper too.
+ t.Errorf("Expected URL.Parse to return new URL instance.")
+ }
+ // Ensure Opaque resets the URL.
+ url = base.ResolveReference(opaque)
+ if *url != *opaque {
+ t.Errorf("ResolveReference failed to resolve opaque URL: want %#v, got %#v", url, opaque)
+ }
+ // Test the convenience wrapper with an opaque URL too.
+ url, err = base.Parse("scheme:opaque")
+ if err != nil {
+ t.Errorf(`URL(%q).Parse("scheme:opaque") failed: %v`, test.base, err)
+ } else if *url != *opaque {
+ t.Errorf("Parse failed to resolve opaque URL: want %#v, got %#v", url, opaque)
+ } else if base == url {
+ // Ensure that new instances are returned, again.
+ t.Errorf("Expected URL.Parse to return new URL instance.")
+ }
+ }
+}
+
+type shouldEscapeTest struct {
+ in byte
+ mode encoding
+ escape bool
+}
+
+var shouldEscapeTests = []shouldEscapeTest{
+ // Unreserved characters (§2.3)
+ {'a', encodePath, false},
+ {'a', encodeUserPassword, false},
+ {'a', encodeQueryComponent, false},
+ {'a', encodeFragment, false},
+ {'z', encodePath, false},
+ {'A', encodePath, false},
+ {'Z', encodePath, false},
+ {'0', encodePath, false},
+ {'9', encodePath, false},
+ {'-', encodePath, false},
+ {'-', encodeUserPassword, false},
+ {'-', encodeQueryComponent, false},
+ {'-', encodeFragment, false},
+ {'.', encodePath, false},
+ {'_', encodePath, false},
+ {'~', encodePath, false},
+
+ // User information (§3.2.1)
+ {':', encodeUserPassword, true},
+ {'/', encodeUserPassword, true},
+ {'?', encodeUserPassword, true},
+ {'@', encodeUserPassword, true},
+ {'$', encodeUserPassword, false},
+ {'&', encodeUserPassword, false},
+ {'+', encodeUserPassword, false},
+ {',', encodeUserPassword, false},
+ {';', encodeUserPassword, false},
+ {'=', encodeUserPassword, false},
+}
+
+func TestShouldEscape(t *testing.T) {
+ for _, tt := range shouldEscapeTests {
+ if shouldEscape(tt.in, tt.mode) != tt.escape {
+ t.Errorf("shouldEscape(%q, %v) returned %v; expected %v", tt.in, tt.mode, !tt.escape, tt.escape)
+ }
+ }
+}