diff options
-rw-r--r-- | src/go/build/deps_test.go | 6 | ||||
-rw-r--r-- | src/html/template/context.go | 14 | ||||
-rw-r--r-- | src/html/template/escape.go | 26 | ||||
-rw-r--r-- | src/html/template/escape_test.go | 47 | ||||
-rw-r--r-- | src/html/template/transition.go | 15 |
5 files changed, 104 insertions, 4 deletions
diff --git a/src/go/build/deps_test.go b/src/go/build/deps_test.go index 08452c7b1d..3e8fb9ea11 100644 --- a/src/go/build/deps_test.go +++ b/src/go/build/deps_test.go @@ -236,15 +236,15 @@ var depsRules = ` < text/template < internal/lazytemplate; - encoding/json, html, text/template - < html/template; - # regexp FMT < regexp/syntax < regexp < internal/lazyregexp; + encoding/json, html, text/template, regexp + < html/template; + # suffix array encoding/binary, regexp < index/suffixarray; diff --git a/src/html/template/context.go b/src/html/template/context.go index e07a0c4a02..7987713c65 100644 --- a/src/html/template/context.go +++ b/src/html/template/context.go @@ -174,6 +174,20 @@ func isInTag(s state) bool { return false } +// isInScriptLiteral returns true if s is one of the literal states within a +// <script> tag, and as such occurances of "<!--", "<script", and "</script" +// need to be treated specially. +func isInScriptLiteral(s state) bool { + // Ignore the comment states (stateJSBlockCmt, stateJSLineCmt, + // stateJSHTMLOpenCmt, stateJSHTMLCloseCmt) because their content is already + // omitted from the output. + switch s { + case stateJSDqStr, stateJSSqStr, stateJSBqStr, stateJSRegexp: + return true + } + return false +} + // delim is the delimiter that will end the current HTML attribute. type delim uint8 diff --git a/src/html/template/escape.go b/src/html/template/escape.go index 36021c0a8d..ba898b8677 100644 --- a/src/html/template/escape.go +++ b/src/html/template/escape.go @@ -10,6 +10,7 @@ import ( "html" "internal/godebug" "io" + "regexp" "text/template" "text/template/parse" ) @@ -728,6 +729,26 @@ var delimEnds = [...]string{ delimSpaceOrTagEnd: " \t\n\f\r>", } +var ( + // Per WHATWG HTML specification, section 4.12.1.3, there are extremely + // complicated rules for how to handle the set of opening tags <!--, + // <script, and </script when they appear in JS literals (i.e. strings, + // regexs, and comments). The specification suggests a simple solution, + // rather than implementing the arcane ABNF, which involves simply escaping + // the opening bracket with \x3C. We use the below regex for this, since it + // makes doing the case-insensitive find-replace much simpler. + specialScriptTagRE = regexp.MustCompile("(?i)<(script|/script|!--)") + specialScriptTagReplacement = []byte("\\x3C$1") +) + +func containsSpecialScriptTag(s []byte) bool { + return specialScriptTagRE.Match(s) +} + +func escapeSpecialScriptTags(s []byte) []byte { + return specialScriptTagRE.ReplaceAll(s, specialScriptTagReplacement) +} + var doctypeBytes = []byte("<!DOCTYPE") // escapeText escapes a text template node. @@ -786,6 +807,11 @@ func (e *escaper) escapeText(c context, n *parse.TextNode) context { b.Write(s[written:cs]) written = i1 } + if isInScriptLiteral(c.state) && containsSpecialScriptTag(s[i:i1]) { + b.Write(s[written:i]) + b.Write(escapeSpecialScriptTags(s[i:i1])) + written = i1 + } if i == i1 && c.state == c1.state { panic(fmt.Sprintf("infinite loop from %v to %v on %q..%q", c, c1, s[:i], s[i:])) } diff --git a/src/html/template/escape_test.go b/src/html/template/escape_test.go index f60c875927..8a4f62e92f 100644 --- a/src/html/template/escape_test.go +++ b/src/html/template/escape_test.go @@ -514,6 +514,21 @@ func TestEscape(t *testing.T) { "<script>\n</script>", }, { + "Special tags in <script> string literals", + `<script>var a = "asd < 123 <!-- 456 < fgh <script jkl < 789 </script"</script>`, + `<script>var a = "asd < 123 \x3C!-- 456 < fgh \x3Cscript jkl < 789 \x3C/script"</script>`, + }, + { + "Special tags in <script> string literals (mixed case)", + `<script>var a = "<!-- <ScripT </ScripT"</script>`, + `<script>var a = "\x3C!-- \x3CScripT \x3C/ScripT"</script>`, + }, + { + "Special tags in <script> regex literals (mixed case)", + `<script>var a = /<!-- <ScripT </ScripT/</script>`, + `<script>var a = /\x3C!-- \x3CScripT \x3C/ScripT/</script>`, + }, + { "CSS comments", "<style>p// paragraph\n" + `{border: 1px/* color */{{"#00f"}}}</style>`, @@ -1533,8 +1548,38 @@ func TestEscapeText(t *testing.T) { context{state: stateJS, element: elementScript}, }, { + // <script and </script tags are escaped, so </script> should not + // cause us to exit the JS state. `<script>document.write("<script>alert(1)</script>");`, - context{state: stateText}, + context{state: stateJS, element: elementScript}, + }, + { + `<script>document.write("<script>`, + context{state: stateJSDqStr, element: elementScript}, + }, + { + `<script>document.write("<script>alert(1)</script>`, + context{state: stateJSDqStr, element: elementScript}, + }, + { + `<script>document.write("<script>alert(1)<!--`, + context{state: stateJSDqStr, element: elementScript}, + }, + { + `<script>document.write("<script>alert(1)</Script>");`, + context{state: stateJS, element: elementScript}, + }, + { + `<script>document.write("<!--");`, + context{state: stateJS, element: elementScript}, + }, + { + `<script>let a = /</script`, + context{state: stateJSRegexp, element: elementScript}, + }, + { + `<script>let a = /</script/`, + context{state: stateJS, element: elementScript, jsCtx: jsCtxDivOp}, }, { `<script type="text/template">`, diff --git a/src/html/template/transition.go b/src/html/template/transition.go index 12aa4c41fe..3d2a37cdd9 100644 --- a/src/html/template/transition.go +++ b/src/html/template/transition.go @@ -214,6 +214,11 @@ var ( // element states. func tSpecialTagEnd(c context, s []byte) (context, int) { if c.element != elementNone { + // script end tags ("</script") within script literals are ignored, so that + // we can properly escape them. + if c.element == elementScript && (isInScriptLiteral(c.state) || isComment(c.state)) { + return c, len(s) + } if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 { return context{}, i } @@ -353,6 +358,16 @@ func tJSDelimited(c context, s []byte) (context, int) { inCharset = true case ']': inCharset = false + case '/': + // If "</script" appears in a regex literal, the '/' should not + // close the regex literal, and it will later be escaped to + // "\x3C/script" in escapeText. + if i > 0 && i+7 <= len(s) && bytes.Compare(bytes.ToLower(s[i-1:i+7]), []byte("</script")) == 0 { + i++ + } else if !inCharset { + c.state, c.jsCtx = stateJS, jsCtxDivOp + return c, i + 1 + } default: // end delimiter if !inCharset { |