// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package urlesc import ( "net/url" "testing" ) type URLTest struct { in string out *url.URL roundtrip string // expected result of reserializing the URL; empty means same as "in". } var urltests = []URLTest{ // no path { "http://www.google.com", &url.URL{ Scheme: "http", Host: "www.google.com", }, "", }, // path { "http://www.google.com/", &url.URL{ Scheme: "http", Host: "www.google.com", Path: "/", }, "", }, // path with hex escaping { "http://www.google.com/file%20one%26two", &url.URL{ Scheme: "http", Host: "www.google.com", Path: "/file one&two", }, "http://www.google.com/file%20one&two", }, // user { "ftp://webmaster@www.google.com/", &url.URL{ Scheme: "ftp", User: url.User("webmaster"), Host: "www.google.com", Path: "/", }, "", }, // escape sequence in username { "ftp://john%20doe@www.google.com/", &url.URL{ Scheme: "ftp", User: url.User("john doe"), Host: "www.google.com", Path: "/", }, "ftp://john%20doe@www.google.com/", }, // query { "http://www.google.com/?q=go+language", &url.URL{ Scheme: "http", Host: "www.google.com", Path: "/", RawQuery: "q=go+language", }, "", }, // query with hex escaping: NOT parsed { "http://www.google.com/?q=go%20language", &url.URL{ Scheme: "http", Host: "www.google.com", Path: "/", RawQuery: "q=go%20language", }, "", }, // %20 outside query { "http://www.google.com/a%20b?q=c+d", &url.URL{ Scheme: "http", Host: "www.google.com", Path: "/a b", RawQuery: "q=c+d", }, "", }, // path without leading /, so no parsing { "http:www.google.com/?q=go+language", &url.URL{ Scheme: "http", Opaque: "www.google.com/", RawQuery: "q=go+language", }, "http:www.google.com/?q=go+language", }, // path without leading /, so no parsing { "http:%2f%2fwww.google.com/?q=go+language", &url.URL{ Scheme: "http", Opaque: "%2f%2fwww.google.com/", RawQuery: "q=go+language", }, "http:%2f%2fwww.google.com/?q=go+language", }, // non-authority with path { "mailto:/webmaster@golang.org", &url.URL{ Scheme: "mailto", Path: "/webmaster@golang.org", }, "mailto:///webmaster@golang.org", // unfortunate compromise }, // non-authority { "mailto:webmaster@golang.org", &url.URL{ Scheme: "mailto", Opaque: "webmaster@golang.org", }, "", }, // unescaped :// in query should not create a scheme { "/foo?query=http://bad", &url.URL{ Path: "/foo", RawQuery: "query=http://bad", }, "", }, // leading // without scheme should create an authority { "//foo", &url.URL{ Host: "foo", }, "", }, // leading // without scheme, with userinfo, path, and query { "//user@foo/path?a=b", &url.URL{ User: url.User("user"), Host: "foo", Path: "/path", RawQuery: "a=b", }, "", }, // Three leading slashes isn't an authority, but doesn't return an error. // (We can't return an error, as this code is also used via // ServeHTTP -> ReadRequest -> Parse, which is arguably a // different URL parsing context, but currently shares the // same codepath) { "///threeslashes", &url.URL{ Path: "///threeslashes", }, "", }, { "http://user:password@google.com", &url.URL{ Scheme: "http", User: url.UserPassword("user", "password"), Host: "google.com", }, "http://user:password@google.com", }, // unescaped @ in username should not confuse host { "http://j@ne:password@google.com", &url.URL{ Scheme: "http", User: url.UserPassword("j@ne", "password"), Host: "google.com", }, "http://j%40ne:password@google.com", }, // unescaped @ in password should not confuse host { "http://jane:p@ssword@google.com", &url.URL{ Scheme: "http", User: url.UserPassword("jane", "p@ssword"), Host: "google.com", }, "http://jane:p%40ssword@google.com", }, { "http://j@ne:password@google.com/p@th?q=@go", &url.URL{ Scheme: "http", User: url.UserPassword("j@ne", "password"), Host: "google.com", Path: "/p@th", RawQuery: "q=@go", }, "http://j%40ne:password@google.com/p@th?q=@go", }, { "http://www.google.com/?q=go+language#foo", &url.URL{ Scheme: "http", Host: "www.google.com", Path: "/", RawQuery: "q=go+language", Fragment: "foo", }, "", }, { "http://www.google.com/?q=go+language#foo%26bar", &url.URL{ Scheme: "http", Host: "www.google.com", Path: "/", RawQuery: "q=go+language", Fragment: "foo&bar", }, "http://www.google.com/?q=go+language#foo&bar", }, { "file:///home/adg/rabbits", &url.URL{ Scheme: "file", Host: "", Path: "/home/adg/rabbits", }, "file:///home/adg/rabbits", }, // "Windows" paths are no exception to the rule. // See golang.org/issue/6027, especially comment #9. { "file:///C:/FooBar/Baz.txt", &url.URL{ Scheme: "file", Host: "", Path: "/C:/FooBar/Baz.txt", }, "file:///C:/FooBar/Baz.txt", }, // case-insensitive scheme { "MaIlTo:webmaster@golang.org", &url.URL{ Scheme: "mailto", Opaque: "webmaster@golang.org", }, "mailto:webmaster@golang.org", }, // Relative path { "a/b/c", &url.URL{ Path: "a/b/c", }, "a/b/c", }, // escaped '?' in username and password { "http://%3Fam:pa%3Fsword@google.com", &url.URL{ Scheme: "http", User: url.UserPassword("?am", "pa?sword"), Host: "google.com", }, "", }, // escaped '?' and '#' in path { "http://example.com/%3F%23", &url.URL{ Scheme: "http", Host: "example.com", Path: "?#", }, "", }, // unescaped [ ] ! ' ( ) * in path { "http://example.com/[]!'()*", &url.URL{ Scheme: "http", Host: "example.com", Path: "[]!'()*", }, "http://example.com/[]!'()*", }, // escaped : / ? # [ ] @ in username and password { "http://%3A%2F%3F:%23%5B%5D%40@example.com", &url.URL{ Scheme: "http", User: url.UserPassword(":/?", "#[]@"), Host: "example.com", }, "", }, // unescaped ! $ & ' ( ) * + , ; = in username and password { "http://!$&'():*+,;=@example.com", &url.URL{ Scheme: "http", User: url.UserPassword("!$&'()", "*+,;="), Host: "example.com", }, "", }, // unescaped = : / . ? = in query component { "http://example.com/?q=http://google.com/?q=", &url.URL{ Scheme: "http", Host: "example.com", Path: "/", RawQuery: "q=http://google.com/?q=", }, "", }, // unescaped : / ? [ ] @ ! $ & ' ( ) * + , ; = in fragment { "http://example.com/#:/?%23[]@!$&'()*+,;=", &url.URL{ Scheme: "http", Host: "example.com", Path: "/", Fragment: ":/?#[]@!$&'()*+,;=", }, "", }, } func DoTestString(t *testing.T, parse func(string) (*url.URL, error), name string, tests []URLTest) { for _, tt := range tests { u, err := parse(tt.in) if err != nil { t.Errorf("%s(%q) returned error %s", name, tt.in, err) continue } expected := tt.in if len(tt.roundtrip) > 0 { expected = tt.roundtrip } s := Escape(u) if s != expected { t.Errorf("Escape(%s(%q)) == %q (expected %q)", name, tt.in, s, expected) } } } func TestURLString(t *testing.T) { DoTestString(t, url.Parse, "Parse", urltests) // no leading slash on path should prepend // slash on String() call noslash := URLTest{ "http://www.google.com/search", &url.URL{ Scheme: "http", Host: "www.google.com", Path: "search", }, "", } s := Escape(noslash.out) if s != noslash.in { t.Errorf("Expected %s; go %s", noslash.in, s) } } type EscapeTest struct { in string out string err error } var escapeTests = []EscapeTest{ { "", "", nil, }, { "abc", "abc", nil, }, { "one two", "one+two", nil, }, { "10%", "10%25", nil, }, { " ?&=#+%!<>#\"{}|\\^[]`☺\t:/@$'()*,;", "+?%26%3D%23%2B%25%21%3C%3E%23%22%7B%7D%7C%5C%5E%5B%5D%60%E2%98%BA%09%3A/%40%24%27%28%29%2A%2C%3B", nil, }, } func TestEscape(t *testing.T) { for _, tt := range escapeTests { actual := QueryEscape(tt.in) if tt.out != actual { t.Errorf("QueryEscape(%q) = %q, want %q", tt.in, actual, tt.out) } // for bonus points, verify that escape:unescape is an identity. roundtrip, err := url.QueryUnescape(actual) if roundtrip != tt.in || err != nil { t.Errorf("QueryUnescape(%q) = %q, %s; want %q, %s", actual, roundtrip, err, tt.in, "[no error]") } } } var resolveReferenceTests = []struct { base, rel, expected string }{ // Absolute URL references {"http://foo.com?a=b", "https://bar.com/", "https://bar.com/"}, {"http://foo.com/", "https://bar.com/?a=b", "https://bar.com/?a=b"}, {"http://foo.com/bar", "mailto:foo@example.com", "mailto:foo@example.com"}, // Path-absolute references {"http://foo.com/bar", "/baz", "http://foo.com/baz"}, {"http://foo.com/bar?a=b#f", "/baz", "http://foo.com/baz"}, {"http://foo.com/bar?a=b", "/baz?c=d", "http://foo.com/baz?c=d"}, // Scheme-relative {"https://foo.com/bar?a=b", "//bar.com/quux", "https://bar.com/quux"}, // Path-relative references: // ... current directory {"http://foo.com", ".", "http://foo.com/"}, {"http://foo.com/bar", ".", "http://foo.com/"}, {"http://foo.com/bar/", ".", "http://foo.com/bar/"}, // ... going down {"http://foo.com", "bar", "http://foo.com/bar"}, {"http://foo.com/", "bar", "http://foo.com/bar"}, {"http://foo.com/bar/baz", "quux", "http://foo.com/bar/quux"}, // ... going up {"http://foo.com/bar/baz", "../quux", "http://foo.com/quux"}, {"http://foo.com/bar/baz", "../../../../../quux", "http://foo.com/quux"}, {"http://foo.com/bar", "..", "http://foo.com/"}, {"http://foo.com/bar/baz", "./..", "http://foo.com/"}, // ".." in the middle (issue 3560) {"http://foo.com/bar/baz", "quux/dotdot/../tail", "http://foo.com/bar/quux/tail"}, {"http://foo.com/bar/baz", "quux/./dotdot/../tail", "http://foo.com/bar/quux/tail"}, {"http://foo.com/bar/baz", "quux/./dotdot/.././tail", "http://foo.com/bar/quux/tail"}, {"http://foo.com/bar/baz", "quux/./dotdot/./../tail", "http://foo.com/bar/quux/tail"}, {"http://foo.com/bar/baz", "quux/./dotdot/dotdot/././../../tail", "http://foo.com/bar/quux/tail"}, {"http://foo.com/bar/baz", "quux/./dotdot/dotdot/./.././../tail", "http://foo.com/bar/quux/tail"}, {"http://foo.com/bar/baz", "quux/./dotdot/dotdot/dotdot/./../../.././././tail", "http://foo.com/bar/quux/tail"}, {"http://foo.com/bar/baz", "quux/./dotdot/../dotdot/../dot/./tail/..", "http://foo.com/bar/quux/dot/"}, // Remove any dot-segments prior to forming the target URI. // http://tools.ietf.org/html/rfc3986#section-5.2.4 {"http://foo.com/dot/./dotdot/../foo/bar", "../baz", "http://foo.com/dot/baz"}, // Triple dot isn't special {"http://foo.com/bar", "...", "http://foo.com/..."}, // Fragment {"http://foo.com/bar", ".#frag", "http://foo.com/#frag"}, // RFC 3986: Normal Examples // http://tools.ietf.org/html/rfc3986#section-5.4.1 {"http://a/b/c/d;p?q", "g:h", "g:h"}, {"http://a/b/c/d;p?q", "g", "http://a/b/c/g"}, {"http://a/b/c/d;p?q", "./g", "http://a/b/c/g"}, {"http://a/b/c/d;p?q", "g/", "http://a/b/c/g/"}, {"http://a/b/c/d;p?q", "/g", "http://a/g"}, {"http://a/b/c/d;p?q", "//g", "http://g"}, {"http://a/b/c/d;p?q", "?y", "http://a/b/c/d;p?y"}, {"http://a/b/c/d;p?q", "g?y", "http://a/b/c/g?y"}, {"http://a/b/c/d;p?q", "#s", "http://a/b/c/d;p?q#s"}, {"http://a/b/c/d;p?q", "g#s", "http://a/b/c/g#s"}, {"http://a/b/c/d;p?q", "g?y#s", "http://a/b/c/g?y#s"}, {"http://a/b/c/d;p?q", ";x", "http://a/b/c/;x"}, {"http://a/b/c/d;p?q", "g;x", "http://a/b/c/g;x"}, {"http://a/b/c/d;p?q", "g;x?y#s", "http://a/b/c/g;x?y#s"}, {"http://a/b/c/d;p?q", "", "http://a/b/c/d;p?q"}, {"http://a/b/c/d;p?q", ".", "http://a/b/c/"}, {"http://a/b/c/d;p?q", "./", "http://a/b/c/"}, {"http://a/b/c/d;p?q", "..", "http://a/b/"}, {"http://a/b/c/d;p?q", "../", "http://a/b/"}, {"http://a/b/c/d;p?q", "../g", "http://a/b/g"}, {"http://a/b/c/d;p?q", "../..", "http://a/"}, {"http://a/b/c/d;p?q", "../../", "http://a/"}, {"http://a/b/c/d;p?q", "../../g", "http://a/g"}, // RFC 3986: Abnormal Examples // http://tools.ietf.org/html/rfc3986#section-5.4.2 {"http://a/b/c/d;p?q", "../../../g", "http://a/g"}, {"http://a/b/c/d;p?q", "../../../../g", "http://a/g"}, {"http://a/b/c/d;p?q", "/./g", "http://a/g"}, {"http://a/b/c/d;p?q", "/../g", "http://a/g"}, {"http://a/b/c/d;p?q", "g.", "http://a/b/c/g."}, {"http://a/b/c/d;p?q", ".g", "http://a/b/c/.g"}, {"http://a/b/c/d;p?q", "g..", "http://a/b/c/g.."}, {"http://a/b/c/d;p?q", "..g", "http://a/b/c/..g"}, {"http://a/b/c/d;p?q", "./../g", "http://a/b/g"}, {"http://a/b/c/d;p?q", "./g/.", "http://a/b/c/g/"}, {"http://a/b/c/d;p?q", "g/./h", "http://a/b/c/g/h"}, {"http://a/b/c/d;p?q", "g/../h", "http://a/b/c/h"}, {"http://a/b/c/d;p?q", "g;x=1/./y", "http://a/b/c/g;x=1/y"}, {"http://a/b/c/d;p?q", "g;x=1/../y", "http://a/b/c/y"}, {"http://a/b/c/d;p?q", "g?y/./x", "http://a/b/c/g?y/./x"}, {"http://a/b/c/d;p?q", "g?y/../x", "http://a/b/c/g?y/../x"}, {"http://a/b/c/d;p?q", "g#s/./x", "http://a/b/c/g#s/./x"}, {"http://a/b/c/d;p?q", "g#s/../x", "http://a/b/c/g#s/../x"}, // Extras. {"https://a/b/c/d;p?q", "//g?q", "https://g?q"}, {"https://a/b/c/d;p?q", "//g#s", "https://g#s"}, {"https://a/b/c/d;p?q", "//g/d/e/f?y#s", "https://g/d/e/f?y#s"}, {"https://a/b/c/d;p#s", "?y", "https://a/b/c/d;p?y"}, {"https://a/b/c/d;p?q#s", "?y", "https://a/b/c/d;p?y"}, } func TestResolveReference(t *testing.T) { mustParse := func(url_ string) *url.URL { u, err := url.Parse(url_) if err != nil { t.Fatalf("Expected URL to parse: %q, got error: %v", url_, err) } return u } opaque := &url.URL{Scheme: "scheme", Opaque: "opaque"} for _, test := range resolveReferenceTests { base := mustParse(test.base) rel := mustParse(test.rel) url := base.ResolveReference(rel) if Escape(url) != test.expected { t.Errorf("URL(%q).ResolveReference(%q) == %q, got %q", test.base, test.rel, test.expected, Escape(url)) } // Ensure that new instances are returned. if base == url { t.Errorf("Expected URL.ResolveReference to return new URL instance.") } // Test the convenience wrapper too. url, err := base.Parse(test.rel) if err != nil { t.Errorf("URL(%q).Parse(%q) failed: %v", test.base, test.rel, err) } else if Escape(url) != test.expected { t.Errorf("URL(%q).Parse(%q) == %q, got %q", test.base, test.rel, test.expected, Escape(url)) } else if base == url { // Ensure that new instances are returned for the wrapper too. t.Errorf("Expected URL.Parse to return new URL instance.") } // Ensure Opaque resets the URL. url = base.ResolveReference(opaque) if *url != *opaque { t.Errorf("ResolveReference failed to resolve opaque URL: want %#v, got %#v", url, opaque) } // Test the convenience wrapper with an opaque URL too. url, err = base.Parse("scheme:opaque") if err != nil { t.Errorf(`URL(%q).Parse("scheme:opaque") failed: %v`, test.base, err) } else if *url != *opaque { t.Errorf("Parse failed to resolve opaque URL: want %#v, got %#v", url, opaque) } else if base == url { // Ensure that new instances are returned, again. t.Errorf("Expected URL.Parse to return new URL instance.") } } } type shouldEscapeTest struct { in byte mode encoding escape bool } var shouldEscapeTests = []shouldEscapeTest{ // Unreserved characters (§2.3) {'a', encodePath, false}, {'a', encodeUserPassword, false}, {'a', encodeQueryComponent, false}, {'a', encodeFragment, false}, {'z', encodePath, false}, {'A', encodePath, false}, {'Z', encodePath, false}, {'0', encodePath, false}, {'9', encodePath, false}, {'-', encodePath, false}, {'-', encodeUserPassword, false}, {'-', encodeQueryComponent, false}, {'-', encodeFragment, false}, {'.', encodePath, false}, {'_', encodePath, false}, {'~', encodePath, false}, // User information (§3.2.1) {':', encodeUserPassword, true}, {'/', encodeUserPassword, true}, {'?', encodeUserPassword, true}, {'@', encodeUserPassword, true}, {'$', encodeUserPassword, false}, {'&', encodeUserPassword, false}, {'+', encodeUserPassword, false}, {',', encodeUserPassword, false}, {';', encodeUserPassword, false}, {'=', encodeUserPassword, false}, } func TestShouldEscape(t *testing.T) { for _, tt := range shouldEscapeTests { if shouldEscape(tt.in, tt.mode) != tt.escape { t.Errorf("shouldEscape(%q, %v) returned %v; expected %v", tt.in, tt.mode, !tt.escape, tt.escape) } } }