From 4a92c26fbc739734ce5227c1be8efd440306cebf Mon Sep 17 00:00:00 2001 From: Jordan Date: Sat, 15 Jan 2022 10:09:56 -0700 Subject: keep, archive: normalize URLs (RFC 3986), improve logging --- urlesc/LICENSE | 27 +++ urlesc/urlesc.go | 180 ++++++++++++++ urlesc/urlesc_test.go | 641 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 848 insertions(+) create mode 100644 urlesc/LICENSE create mode 100644 urlesc/urlesc.go create mode 100644 urlesc/urlesc_test.go (limited to 'urlesc') diff --git a/urlesc/LICENSE b/urlesc/LICENSE new file mode 100644 index 0000000..7448756 --- /dev/null +++ b/urlesc/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2012 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/urlesc/urlesc.go b/urlesc/urlesc.go new file mode 100644 index 0000000..1b84624 --- /dev/null +++ b/urlesc/urlesc.go @@ -0,0 +1,180 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package urlesc implements query escaping as per RFC 3986. +// It contains some parts of the net/url package, modified so as to allow +// some reserved characters incorrectly escaped by net/url. +// See https://github.com/golang/go/issues/5684 +package urlesc + +import ( + "bytes" + "net/url" + "strings" +) + +type encoding int + +const ( + encodePath encoding = 1 + iota + encodeUserPassword + encodeQueryComponent + encodeFragment +) + +// Return true if the specified character should be escaped when +// appearing in a URL string, according to RFC 3986. +func shouldEscape(c byte, mode encoding) bool { + // §2.3 Unreserved characters (alphanum) + if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' { + return false + } + + switch c { + case '-', '.', '_', '~': // §2.3 Unreserved characters (mark) + return false + + // §2.2 Reserved characters (reserved) + case ':', '/', '?', '#', '[', ']', '@', // gen-delims + '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=': // sub-delims + // Different sections of the URL allow a few of + // the reserved characters to appear unescaped. + switch mode { + case encodePath: // §3.3 + // The RFC allows sub-delims and : @. + // '/', '[' and ']' can be used to assign meaning to individual path + // segments. This package only manipulates the path as a whole, + // so we allow those as well. That leaves only ? and # to escape. + return c == '?' || c == '#' + + case encodeUserPassword: // §3.2.1 + // The RFC allows : and sub-delims in + // userinfo. The parsing of userinfo treats ':' as special so we must escape + // all the gen-delims. + return c == ':' || c == '/' || c == '?' || c == '#' || c == '[' || c == ']' || c == '@' + + case encodeQueryComponent: // §3.4 + // The RFC allows / and ?. + return c != '/' && c != '?' + + case encodeFragment: // §4.1 + // The RFC text is silent but the grammar allows + // everything, so escape nothing but # + return c == '#' + } + } + + // Everything else must be escaped. + return true +} + +// QueryEscape escapes the string so it can be safely placed +// inside a URL query. +func QueryEscape(s string) string { + return escape(s, encodeQueryComponent) +} + +func escape(s string, mode encoding) string { + spaceCount, hexCount := 0, 0 + for i := 0; i < len(s); i++ { + c := s[i] + if shouldEscape(c, mode) { + if c == ' ' && mode == encodeQueryComponent { + spaceCount++ + } else { + hexCount++ + } + } + } + + if spaceCount == 0 && hexCount == 0 { + return s + } + + t := make([]byte, len(s)+2*hexCount) + j := 0 + for i := 0; i < len(s); i++ { + switch c := s[i]; { + case c == ' ' && mode == encodeQueryComponent: + t[j] = '+' + j++ + case shouldEscape(c, mode): + t[j] = '%' + t[j+1] = "0123456789ABCDEF"[c>>4] + t[j+2] = "0123456789ABCDEF"[c&15] + j += 3 + default: + t[j] = s[i] + j++ + } + } + return string(t) +} + +var uiReplacer = strings.NewReplacer( + "%21", "!", + "%27", "'", + "%28", "(", + "%29", ")", + "%2A", "*", +) + +// unescapeUserinfo unescapes some characters that need not to be escaped as per RFC3986. +func unescapeUserinfo(s string) string { + return uiReplacer.Replace(s) +} + +// Escape reassembles the URL into a valid URL string. +// The general form of the result is one of: +// +// scheme:opaque +// scheme://userinfo@host/path?query#fragment +// +// If u.Opaque is non-empty, String uses the first form; +// otherwise it uses the second form. +// +// In the second form, the following rules apply: +// - if u.Scheme is empty, scheme: is omitted. +// - if u.User is nil, userinfo@ is omitted. +// - if u.Host is empty, host/ is omitted. +// - if u.Scheme and u.Host are empty and u.User is nil, +// the entire scheme://userinfo@host/ is omitted. +// - if u.Host is non-empty and u.Path begins with a /, +// the form host/path does not add its own /. +// - if u.RawQuery is empty, ?query is omitted. +// - if u.Fragment is empty, #fragment is omitted. +func Escape(u *url.URL) string { + var buf bytes.Buffer + if u.Scheme != "" { + buf.WriteString(u.Scheme) + buf.WriteByte(':') + } + if u.Opaque != "" { + buf.WriteString(u.Opaque) + } else { + if u.Scheme != "" || u.Host != "" || u.User != nil { + buf.WriteString("//") + if ui := u.User; ui != nil { + buf.WriteString(unescapeUserinfo(ui.String())) + buf.WriteByte('@') + } + if h := u.Host; h != "" { + buf.WriteString(h) + } + } + if u.Path != "" && u.Path[0] != '/' && u.Host != "" { + buf.WriteByte('/') + } + buf.WriteString(escape(u.Path, encodePath)) + } + if u.RawQuery != "" { + buf.WriteByte('?') + buf.WriteString(u.RawQuery) + } + if u.Fragment != "" { + buf.WriteByte('#') + buf.WriteString(escape(u.Fragment, encodeFragment)) + } + return buf.String() +} diff --git a/urlesc/urlesc_test.go b/urlesc/urlesc_test.go new file mode 100644 index 0000000..45202e1 --- /dev/null +++ b/urlesc/urlesc_test.go @@ -0,0 +1,641 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package urlesc + +import ( + "net/url" + "testing" +) + +type URLTest struct { + in string + out *url.URL + roundtrip string // expected result of reserializing the URL; empty means same as "in". +} + +var urltests = []URLTest{ + // no path + { + "http://www.google.com", + &url.URL{ + Scheme: "http", + Host: "www.google.com", + }, + "", + }, + // path + { + "http://www.google.com/", + &url.URL{ + Scheme: "http", + Host: "www.google.com", + Path: "/", + }, + "", + }, + // path with hex escaping + { + "http://www.google.com/file%20one%26two", + &url.URL{ + Scheme: "http", + Host: "www.google.com", + Path: "/file one&two", + }, + "http://www.google.com/file%20one&two", + }, + // user + { + "ftp://webmaster@www.google.com/", + &url.URL{ + Scheme: "ftp", + User: url.User("webmaster"), + Host: "www.google.com", + Path: "/", + }, + "", + }, + // escape sequence in username + { + "ftp://john%20doe@www.google.com/", + &url.URL{ + Scheme: "ftp", + User: url.User("john doe"), + Host: "www.google.com", + Path: "/", + }, + "ftp://john%20doe@www.google.com/", + }, + // query + { + "http://www.google.com/?q=go+language", + &url.URL{ + Scheme: "http", + Host: "www.google.com", + Path: "/", + RawQuery: "q=go+language", + }, + "", + }, + // query with hex escaping: NOT parsed + { + "http://www.google.com/?q=go%20language", + &url.URL{ + Scheme: "http", + Host: "www.google.com", + Path: "/", + RawQuery: "q=go%20language", + }, + "", + }, + // %20 outside query + { + "http://www.google.com/a%20b?q=c+d", + &url.URL{ + Scheme: "http", + Host: "www.google.com", + Path: "/a b", + RawQuery: "q=c+d", + }, + "", + }, + // path without leading /, so no parsing + { + "http:www.google.com/?q=go+language", + &url.URL{ + Scheme: "http", + Opaque: "www.google.com/", + RawQuery: "q=go+language", + }, + "http:www.google.com/?q=go+language", + }, + // path without leading /, so no parsing + { + "http:%2f%2fwww.google.com/?q=go+language", + &url.URL{ + Scheme: "http", + Opaque: "%2f%2fwww.google.com/", + RawQuery: "q=go+language", + }, + "http:%2f%2fwww.google.com/?q=go+language", + }, + // non-authority with path + { + "mailto:/webmaster@golang.org", + &url.URL{ + Scheme: "mailto", + Path: "/webmaster@golang.org", + }, + "mailto:///webmaster@golang.org", // unfortunate compromise + }, + // non-authority + { + "mailto:webmaster@golang.org", + &url.URL{ + Scheme: "mailto", + Opaque: "webmaster@golang.org", + }, + "", + }, + // unescaped :// in query should not create a scheme + { + "/foo?query=http://bad", + &url.URL{ + Path: "/foo", + RawQuery: "query=http://bad", + }, + "", + }, + // leading // without scheme should create an authority + { + "//foo", + &url.URL{ + Host: "foo", + }, + "", + }, + // leading // without scheme, with userinfo, path, and query + { + "//user@foo/path?a=b", + &url.URL{ + User: url.User("user"), + Host: "foo", + Path: "/path", + RawQuery: "a=b", + }, + "", + }, + // Three leading slashes isn't an authority, but doesn't return an error. + // (We can't return an error, as this code is also used via + // ServeHTTP -> ReadRequest -> Parse, which is arguably a + // different URL parsing context, but currently shares the + // same codepath) + { + "///threeslashes", + &url.URL{ + Path: "///threeslashes", + }, + "", + }, + { + "http://user:password@google.com", + &url.URL{ + Scheme: "http", + User: url.UserPassword("user", "password"), + Host: "google.com", + }, + "http://user:password@google.com", + }, + // unescaped @ in username should not confuse host + { + "http://j@ne:password@google.com", + &url.URL{ + Scheme: "http", + User: url.UserPassword("j@ne", "password"), + Host: "google.com", + }, + "http://j%40ne:password@google.com", + }, + // unescaped @ in password should not confuse host + { + "http://jane:p@ssword@google.com", + &url.URL{ + Scheme: "http", + User: url.UserPassword("jane", "p@ssword"), + Host: "google.com", + }, + "http://jane:p%40ssword@google.com", + }, + { + "http://j@ne:password@google.com/p@th?q=@go", + &url.URL{ + Scheme: "http", + User: url.UserPassword("j@ne", "password"), + Host: "google.com", + Path: "/p@th", + RawQuery: "q=@go", + }, + "http://j%40ne:password@google.com/p@th?q=@go", + }, + { + "http://www.google.com/?q=go+language#foo", + &url.URL{ + Scheme: "http", + Host: "www.google.com", + Path: "/", + RawQuery: "q=go+language", + Fragment: "foo", + }, + "", + }, + { + "http://www.google.com/?q=go+language#foo%26bar", + &url.URL{ + Scheme: "http", + Host: "www.google.com", + Path: "/", + RawQuery: "q=go+language", + Fragment: "foo&bar", + }, + "http://www.google.com/?q=go+language#foo&bar", + }, + { + "file:///home/adg/rabbits", + &url.URL{ + Scheme: "file", + Host: "", + Path: "/home/adg/rabbits", + }, + "file:///home/adg/rabbits", + }, + // "Windows" paths are no exception to the rule. + // See golang.org/issue/6027, especially comment #9. + { + "file:///C:/FooBar/Baz.txt", + &url.URL{ + Scheme: "file", + Host: "", + Path: "/C:/FooBar/Baz.txt", + }, + "file:///C:/FooBar/Baz.txt", + }, + // case-insensitive scheme + { + "MaIlTo:webmaster@golang.org", + &url.URL{ + Scheme: "mailto", + Opaque: "webmaster@golang.org", + }, + "mailto:webmaster@golang.org", + }, + // Relative path + { + "a/b/c", + &url.URL{ + Path: "a/b/c", + }, + "a/b/c", + }, + // escaped '?' in username and password + { + "http://%3Fam:pa%3Fsword@google.com", + &url.URL{ + Scheme: "http", + User: url.UserPassword("?am", "pa?sword"), + Host: "google.com", + }, + "", + }, + // escaped '?' and '#' in path + { + "http://example.com/%3F%23", + &url.URL{ + Scheme: "http", + Host: "example.com", + Path: "?#", + }, + "", + }, + // unescaped [ ] ! ' ( ) * in path + { + "http://example.com/[]!'()*", + &url.URL{ + Scheme: "http", + Host: "example.com", + Path: "[]!'()*", + }, + "http://example.com/[]!'()*", + }, + // escaped : / ? # [ ] @ in username and password + { + "http://%3A%2F%3F:%23%5B%5D%40@example.com", + &url.URL{ + Scheme: "http", + User: url.UserPassword(":/?", "#[]@"), + Host: "example.com", + }, + "", + }, + // unescaped ! $ & ' ( ) * + , ; = in username and password + { + "http://!$&'():*+,;=@example.com", + &url.URL{ + Scheme: "http", + User: url.UserPassword("!$&'()", "*+,;="), + Host: "example.com", + }, + "", + }, + // unescaped = : / . ? = in query component + { + "http://example.com/?q=http://google.com/?q=", + &url.URL{ + Scheme: "http", + Host: "example.com", + Path: "/", + RawQuery: "q=http://google.com/?q=", + }, + "", + }, + // unescaped : / ? [ ] @ ! $ & ' ( ) * + , ; = in fragment + { + "http://example.com/#:/?%23[]@!$&'()*+,;=", + &url.URL{ + Scheme: "http", + Host: "example.com", + Path: "/", + Fragment: ":/?#[]@!$&'()*+,;=", + }, + "", + }, +} + +func DoTestString(t *testing.T, parse func(string) (*url.URL, error), name string, tests []URLTest) { + for _, tt := range tests { + u, err := parse(tt.in) + if err != nil { + t.Errorf("%s(%q) returned error %s", name, tt.in, err) + continue + } + expected := tt.in + if len(tt.roundtrip) > 0 { + expected = tt.roundtrip + } + s := Escape(u) + if s != expected { + t.Errorf("Escape(%s(%q)) == %q (expected %q)", name, tt.in, s, expected) + } + } +} + +func TestURLString(t *testing.T) { + DoTestString(t, url.Parse, "Parse", urltests) + + // no leading slash on path should prepend + // slash on String() call + noslash := URLTest{ + "http://www.google.com/search", + &url.URL{ + Scheme: "http", + Host: "www.google.com", + Path: "search", + }, + "", + } + s := Escape(noslash.out) + if s != noslash.in { + t.Errorf("Expected %s; go %s", noslash.in, s) + } +} + +type EscapeTest struct { + in string + out string + err error +} + +var escapeTests = []EscapeTest{ + { + "", + "", + nil, + }, + { + "abc", + "abc", + nil, + }, + { + "one two", + "one+two", + nil, + }, + { + "10%", + "10%25", + nil, + }, + { + " ?&=#+%!<>#\"{}|\\^[]`☺\t:/@$'()*,;", + "+?%26%3D%23%2B%25%21%3C%3E%23%22%7B%7D%7C%5C%5E%5B%5D%60%E2%98%BA%09%3A/%40%24%27%28%29%2A%2C%3B", + nil, + }, +} + +func TestEscape(t *testing.T) { + for _, tt := range escapeTests { + actual := QueryEscape(tt.in) + if tt.out != actual { + t.Errorf("QueryEscape(%q) = %q, want %q", tt.in, actual, tt.out) + } + + // for bonus points, verify that escape:unescape is an identity. + roundtrip, err := url.QueryUnescape(actual) + if roundtrip != tt.in || err != nil { + t.Errorf("QueryUnescape(%q) = %q, %s; want %q, %s", actual, roundtrip, err, tt.in, "[no error]") + } + } +} + +var resolveReferenceTests = []struct { + base, rel, expected string +}{ + // Absolute URL references + {"http://foo.com?a=b", "https://bar.com/", "https://bar.com/"}, + {"http://foo.com/", "https://bar.com/?a=b", "https://bar.com/?a=b"}, + {"http://foo.com/bar", "mailto:foo@example.com", "mailto:foo@example.com"}, + + // Path-absolute references + {"http://foo.com/bar", "/baz", "http://foo.com/baz"}, + {"http://foo.com/bar?a=b#f", "/baz", "http://foo.com/baz"}, + {"http://foo.com/bar?a=b", "/baz?c=d", "http://foo.com/baz?c=d"}, + + // Scheme-relative + {"https://foo.com/bar?a=b", "//bar.com/quux", "https://bar.com/quux"}, + + // Path-relative references: + + // ... current directory + {"http://foo.com", ".", "http://foo.com/"}, + {"http://foo.com/bar", ".", "http://foo.com/"}, + {"http://foo.com/bar/", ".", "http://foo.com/bar/"}, + + // ... going down + {"http://foo.com", "bar", "http://foo.com/bar"}, + {"http://foo.com/", "bar", "http://foo.com/bar"}, + {"http://foo.com/bar/baz", "quux", "http://foo.com/bar/quux"}, + + // ... going up + {"http://foo.com/bar/baz", "../quux", "http://foo.com/quux"}, + {"http://foo.com/bar/baz", "../../../../../quux", "http://foo.com/quux"}, + {"http://foo.com/bar", "..", "http://foo.com/"}, + {"http://foo.com/bar/baz", "./..", "http://foo.com/"}, + // ".." in the middle (issue 3560) + {"http://foo.com/bar/baz", "quux/dotdot/../tail", "http://foo.com/bar/quux/tail"}, + {"http://foo.com/bar/baz", "quux/./dotdot/../tail", "http://foo.com/bar/quux/tail"}, + {"http://foo.com/bar/baz", "quux/./dotdot/.././tail", "http://foo.com/bar/quux/tail"}, + {"http://foo.com/bar/baz", "quux/./dotdot/./../tail", "http://foo.com/bar/quux/tail"}, + {"http://foo.com/bar/baz", "quux/./dotdot/dotdot/././../../tail", "http://foo.com/bar/quux/tail"}, + {"http://foo.com/bar/baz", "quux/./dotdot/dotdot/./.././../tail", "http://foo.com/bar/quux/tail"}, + {"http://foo.com/bar/baz", "quux/./dotdot/dotdot/dotdot/./../../.././././tail", "http://foo.com/bar/quux/tail"}, + {"http://foo.com/bar/baz", "quux/./dotdot/../dotdot/../dot/./tail/..", "http://foo.com/bar/quux/dot/"}, + + // Remove any dot-segments prior to forming the target URI. + // http://tools.ietf.org/html/rfc3986#section-5.2.4 + {"http://foo.com/dot/./dotdot/../foo/bar", "../baz", "http://foo.com/dot/baz"}, + + // Triple dot isn't special + {"http://foo.com/bar", "...", "http://foo.com/..."}, + + // Fragment + {"http://foo.com/bar", ".#frag", "http://foo.com/#frag"}, + + // RFC 3986: Normal Examples + // http://tools.ietf.org/html/rfc3986#section-5.4.1 + {"http://a/b/c/d;p?q", "g:h", "g:h"}, + {"http://a/b/c/d;p?q", "g", "http://a/b/c/g"}, + {"http://a/b/c/d;p?q", "./g", "http://a/b/c/g"}, + {"http://a/b/c/d;p?q", "g/", "http://a/b/c/g/"}, + {"http://a/b/c/d;p?q", "/g", "http://a/g"}, + {"http://a/b/c/d;p?q", "//g", "http://g"}, + {"http://a/b/c/d;p?q", "?y", "http://a/b/c/d;p?y"}, + {"http://a/b/c/d;p?q", "g?y", "http://a/b/c/g?y"}, + {"http://a/b/c/d;p?q", "#s", "http://a/b/c/d;p?q#s"}, + {"http://a/b/c/d;p?q", "g#s", "http://a/b/c/g#s"}, + {"http://a/b/c/d;p?q", "g?y#s", "http://a/b/c/g?y#s"}, + {"http://a/b/c/d;p?q", ";x", "http://a/b/c/;x"}, + {"http://a/b/c/d;p?q", "g;x", "http://a/b/c/g;x"}, + {"http://a/b/c/d;p?q", "g;x?y#s", "http://a/b/c/g;x?y#s"}, + {"http://a/b/c/d;p?q", "", "http://a/b/c/d;p?q"}, + {"http://a/b/c/d;p?q", ".", "http://a/b/c/"}, + {"http://a/b/c/d;p?q", "./", "http://a/b/c/"}, + {"http://a/b/c/d;p?q", "..", "http://a/b/"}, + {"http://a/b/c/d;p?q", "../", "http://a/b/"}, + {"http://a/b/c/d;p?q", "../g", "http://a/b/g"}, + {"http://a/b/c/d;p?q", "../..", "http://a/"}, + {"http://a/b/c/d;p?q", "../../", "http://a/"}, + {"http://a/b/c/d;p?q", "../../g", "http://a/g"}, + + // RFC 3986: Abnormal Examples + // http://tools.ietf.org/html/rfc3986#section-5.4.2 + {"http://a/b/c/d;p?q", "../../../g", "http://a/g"}, + {"http://a/b/c/d;p?q", "../../../../g", "http://a/g"}, + {"http://a/b/c/d;p?q", "/./g", "http://a/g"}, + {"http://a/b/c/d;p?q", "/../g", "http://a/g"}, + {"http://a/b/c/d;p?q", "g.", "http://a/b/c/g."}, + {"http://a/b/c/d;p?q", ".g", "http://a/b/c/.g"}, + {"http://a/b/c/d;p?q", "g..", "http://a/b/c/g.."}, + {"http://a/b/c/d;p?q", "..g", "http://a/b/c/..g"}, + {"http://a/b/c/d;p?q", "./../g", "http://a/b/g"}, + {"http://a/b/c/d;p?q", "./g/.", "http://a/b/c/g/"}, + {"http://a/b/c/d;p?q", "g/./h", "http://a/b/c/g/h"}, + {"http://a/b/c/d;p?q", "g/../h", "http://a/b/c/h"}, + {"http://a/b/c/d;p?q", "g;x=1/./y", "http://a/b/c/g;x=1/y"}, + {"http://a/b/c/d;p?q", "g;x=1/../y", "http://a/b/c/y"}, + {"http://a/b/c/d;p?q", "g?y/./x", "http://a/b/c/g?y/./x"}, + {"http://a/b/c/d;p?q", "g?y/../x", "http://a/b/c/g?y/../x"}, + {"http://a/b/c/d;p?q", "g#s/./x", "http://a/b/c/g#s/./x"}, + {"http://a/b/c/d;p?q", "g#s/../x", "http://a/b/c/g#s/../x"}, + + // Extras. + {"https://a/b/c/d;p?q", "//g?q", "https://g?q"}, + {"https://a/b/c/d;p?q", "//g#s", "https://g#s"}, + {"https://a/b/c/d;p?q", "//g/d/e/f?y#s", "https://g/d/e/f?y#s"}, + {"https://a/b/c/d;p#s", "?y", "https://a/b/c/d;p?y"}, + {"https://a/b/c/d;p?q#s", "?y", "https://a/b/c/d;p?y"}, +} + +func TestResolveReference(t *testing.T) { + mustParse := func(url_ string) *url.URL { + u, err := url.Parse(url_) + if err != nil { + t.Fatalf("Expected URL to parse: %q, got error: %v", url_, err) + } + return u + } + opaque := &url.URL{Scheme: "scheme", Opaque: "opaque"} + for _, test := range resolveReferenceTests { + base := mustParse(test.base) + rel := mustParse(test.rel) + url := base.ResolveReference(rel) + if Escape(url) != test.expected { + t.Errorf("URL(%q).ResolveReference(%q) == %q, got %q", test.base, test.rel, test.expected, Escape(url)) + } + // Ensure that new instances are returned. + if base == url { + t.Errorf("Expected URL.ResolveReference to return new URL instance.") + } + // Test the convenience wrapper too. + url, err := base.Parse(test.rel) + if err != nil { + t.Errorf("URL(%q).Parse(%q) failed: %v", test.base, test.rel, err) + } else if Escape(url) != test.expected { + t.Errorf("URL(%q).Parse(%q) == %q, got %q", test.base, test.rel, test.expected, Escape(url)) + } else if base == url { + // Ensure that new instances are returned for the wrapper too. + t.Errorf("Expected URL.Parse to return new URL instance.") + } + // Ensure Opaque resets the URL. + url = base.ResolveReference(opaque) + if *url != *opaque { + t.Errorf("ResolveReference failed to resolve opaque URL: want %#v, got %#v", url, opaque) + } + // Test the convenience wrapper with an opaque URL too. + url, err = base.Parse("scheme:opaque") + if err != nil { + t.Errorf(`URL(%q).Parse("scheme:opaque") failed: %v`, test.base, err) + } else if *url != *opaque { + t.Errorf("Parse failed to resolve opaque URL: want %#v, got %#v", url, opaque) + } else if base == url { + // Ensure that new instances are returned, again. + t.Errorf("Expected URL.Parse to return new URL instance.") + } + } +} + +type shouldEscapeTest struct { + in byte + mode encoding + escape bool +} + +var shouldEscapeTests = []shouldEscapeTest{ + // Unreserved characters (§2.3) + {'a', encodePath, false}, + {'a', encodeUserPassword, false}, + {'a', encodeQueryComponent, false}, + {'a', encodeFragment, false}, + {'z', encodePath, false}, + {'A', encodePath, false}, + {'Z', encodePath, false}, + {'0', encodePath, false}, + {'9', encodePath, false}, + {'-', encodePath, false}, + {'-', encodeUserPassword, false}, + {'-', encodeQueryComponent, false}, + {'-', encodeFragment, false}, + {'.', encodePath, false}, + {'_', encodePath, false}, + {'~', encodePath, false}, + + // User information (§3.2.1) + {':', encodeUserPassword, true}, + {'/', encodeUserPassword, true}, + {'?', encodeUserPassword, true}, + {'@', encodeUserPassword, true}, + {'$', encodeUserPassword, false}, + {'&', encodeUserPassword, false}, + {'+', encodeUserPassword, false}, + {',', encodeUserPassword, false}, + {';', encodeUserPassword, false}, + {'=', encodeUserPassword, false}, +} + +func TestShouldEscape(t *testing.T) { + for _, tt := range shouldEscapeTests { + if shouldEscape(tt.in, tt.mode) != tt.escape { + t.Errorf("shouldEscape(%q, %v) returned %v; expected %v", tt.in, tt.mode, !tt.escape, tt.escape) + } + } +} -- cgit v1.2.3-54-g00ecf