aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoland Shoemaker <bracewell@google.com>2023-08-03 12:28:28 -0700
committerCherry Mui <cherryyz@google.com>2023-09-06 14:22:36 +0000
commit2070531d2f53df88e312edace6c8dfc9686ab2f5 (patch)
tree9d5303f210078861019c89edb496607ee537a233
parent023b542edf38e2a1f87fcefb9f75ff2f99401b4c (diff)
downloadgo-2070531d2f53df88e312edace6c8dfc9686ab2f5.tar.gz
go-2070531d2f53df88e312edace6c8dfc9686ab2f5.zip
[release-branch.go1.20] html/template: properly handle special tags within the script context
The HTML specification has incredibly complex rules for how to handle "<!--", "<script", and "</script" when they appear within literals in the script context. Rather than attempting to apply these restrictions (which require a significantly more complex state machine) we apply the workaround suggested in section 4.12.1.3 of the HTML specification [1]. More precisely, when "<!--", "<script", and "</script" appear within literals (strings and regular expressions, ignoring comments since we already elide their content) we replace the "<" with "\x3C". This avoids the unintuitive behavior that using these tags within literals can cause, by simply preventing the rendered content from triggering it. This may break some correct usages of these tags, but on balance is more likely to prevent XSS attacks where users are unknowingly either closing or not closing the script blocks where they think they are. Thanks to Takeshi Kaneko (GMO Cybersecurity by Ierae, Inc.) for reporting this issue. Fixes #62197 Fixes #62397 Fixes CVE-2023-39319 [1] https://html.spec.whatwg.org/#restrictions-for-contents-of-script-elements Change-Id: Iab57b0532694827e3eddf57a7497ba1fab1746dc Reviewed-on: https://team-review.git.corp.google.com/c/golang/go-private/+/1976594 Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> Reviewed-by: Tatiana Bradley <tatianabradley@google.com> Reviewed-by: Damien Neil <dneil@google.com> Run-TryBot: Roland Shoemaker <bracewell@google.com> Reviewed-on: https://team-review.git.corp.google.com/c/golang/go-private/+/2014621 TryBot-Result: Security TryBots <security-trybots@go-security-trybots.iam.gserviceaccount.com> Reviewed-on: https://go-review.googlesource.com/c/go/+/526099 TryBot-Result: Gopher Robot <gobot@golang.org> Run-TryBot: Cherry Mui <cherryyz@google.com>
-rw-r--r--src/go/build/deps_test.go6
-rw-r--r--src/html/template/context.go14
-rw-r--r--src/html/template/escape.go26
-rw-r--r--src/html/template/escape_test.go47
-rw-r--r--src/html/template/transition.go15
5 files changed, 104 insertions, 4 deletions
diff --git a/src/go/build/deps_test.go b/src/go/build/deps_test.go
index 08452c7b1d..3e8fb9ea11 100644
--- a/src/go/build/deps_test.go
+++ b/src/go/build/deps_test.go
@@ -236,15 +236,15 @@ var depsRules = `
< text/template
< internal/lazytemplate;
- encoding/json, html, text/template
- < html/template;
-
# regexp
FMT
< regexp/syntax
< regexp
< internal/lazyregexp;
+ encoding/json, html, text/template, regexp
+ < html/template;
+
# suffix array
encoding/binary, regexp
< index/suffixarray;
diff --git a/src/html/template/context.go b/src/html/template/context.go
index e07a0c4a02..7987713c65 100644
--- a/src/html/template/context.go
+++ b/src/html/template/context.go
@@ -174,6 +174,20 @@ func isInTag(s state) bool {
return false
}
+// isInScriptLiteral returns true if s is one of the literal states within a
+// <script> tag, and as such occurances of "<!--", "<script", and "</script"
+// need to be treated specially.
+func isInScriptLiteral(s state) bool {
+ // Ignore the comment states (stateJSBlockCmt, stateJSLineCmt,
+ // stateJSHTMLOpenCmt, stateJSHTMLCloseCmt) because their content is already
+ // omitted from the output.
+ switch s {
+ case stateJSDqStr, stateJSSqStr, stateJSBqStr, stateJSRegexp:
+ return true
+ }
+ return false
+}
+
// delim is the delimiter that will end the current HTML attribute.
type delim uint8
diff --git a/src/html/template/escape.go b/src/html/template/escape.go
index 36021c0a8d..ba898b8677 100644
--- a/src/html/template/escape.go
+++ b/src/html/template/escape.go
@@ -10,6 +10,7 @@ import (
"html"
"internal/godebug"
"io"
+ "regexp"
"text/template"
"text/template/parse"
)
@@ -728,6 +729,26 @@ var delimEnds = [...]string{
delimSpaceOrTagEnd: " \t\n\f\r>",
}
+var (
+ // Per WHATWG HTML specification, section 4.12.1.3, there are extremely
+ // complicated rules for how to handle the set of opening tags <!--,
+ // <script, and </script when they appear in JS literals (i.e. strings,
+ // regexs, and comments). The specification suggests a simple solution,
+ // rather than implementing the arcane ABNF, which involves simply escaping
+ // the opening bracket with \x3C. We use the below regex for this, since it
+ // makes doing the case-insensitive find-replace much simpler.
+ specialScriptTagRE = regexp.MustCompile("(?i)<(script|/script|!--)")
+ specialScriptTagReplacement = []byte("\\x3C$1")
+)
+
+func containsSpecialScriptTag(s []byte) bool {
+ return specialScriptTagRE.Match(s)
+}
+
+func escapeSpecialScriptTags(s []byte) []byte {
+ return specialScriptTagRE.ReplaceAll(s, specialScriptTagReplacement)
+}
+
var doctypeBytes = []byte("<!DOCTYPE")
// escapeText escapes a text template node.
@@ -786,6 +807,11 @@ func (e *escaper) escapeText(c context, n *parse.TextNode) context {
b.Write(s[written:cs])
written = i1
}
+ if isInScriptLiteral(c.state) && containsSpecialScriptTag(s[i:i1]) {
+ b.Write(s[written:i])
+ b.Write(escapeSpecialScriptTags(s[i:i1]))
+ written = i1
+ }
if i == i1 && c.state == c1.state {
panic(fmt.Sprintf("infinite loop from %v to %v on %q..%q", c, c1, s[:i], s[i:]))
}
diff --git a/src/html/template/escape_test.go b/src/html/template/escape_test.go
index f60c875927..8a4f62e92f 100644
--- a/src/html/template/escape_test.go
+++ b/src/html/template/escape_test.go
@@ -514,6 +514,21 @@ func TestEscape(t *testing.T) {
"<script>\n</script>",
},
{
+ "Special tags in <script> string literals",
+ `<script>var a = "asd < 123 <!-- 456 < fgh <script jkl < 789 </script"</script>`,
+ `<script>var a = "asd < 123 \x3C!-- 456 < fgh \x3Cscript jkl < 789 \x3C/script"</script>`,
+ },
+ {
+ "Special tags in <script> string literals (mixed case)",
+ `<script>var a = "<!-- <ScripT </ScripT"</script>`,
+ `<script>var a = "\x3C!-- \x3CScripT \x3C/ScripT"</script>`,
+ },
+ {
+ "Special tags in <script> regex literals (mixed case)",
+ `<script>var a = /<!-- <ScripT </ScripT/</script>`,
+ `<script>var a = /\x3C!-- \x3CScripT \x3C/ScripT/</script>`,
+ },
+ {
"CSS comments",
"<style>p// paragraph\n" +
`{border: 1px/* color */{{"#00f"}}}</style>`,
@@ -1533,8 +1548,38 @@ func TestEscapeText(t *testing.T) {
context{state: stateJS, element: elementScript},
},
{
+ // <script and </script tags are escaped, so </script> should not
+ // cause us to exit the JS state.
`<script>document.write("<script>alert(1)</script>");`,
- context{state: stateText},
+ context{state: stateJS, element: elementScript},
+ },
+ {
+ `<script>document.write("<script>`,
+ context{state: stateJSDqStr, element: elementScript},
+ },
+ {
+ `<script>document.write("<script>alert(1)</script>`,
+ context{state: stateJSDqStr, element: elementScript},
+ },
+ {
+ `<script>document.write("<script>alert(1)<!--`,
+ context{state: stateJSDqStr, element: elementScript},
+ },
+ {
+ `<script>document.write("<script>alert(1)</Script>");`,
+ context{state: stateJS, element: elementScript},
+ },
+ {
+ `<script>document.write("<!--");`,
+ context{state: stateJS, element: elementScript},
+ },
+ {
+ `<script>let a = /</script`,
+ context{state: stateJSRegexp, element: elementScript},
+ },
+ {
+ `<script>let a = /</script/`,
+ context{state: stateJS, element: elementScript, jsCtx: jsCtxDivOp},
},
{
`<script type="text/template">`,
diff --git a/src/html/template/transition.go b/src/html/template/transition.go
index 12aa4c41fe..3d2a37cdd9 100644
--- a/src/html/template/transition.go
+++ b/src/html/template/transition.go
@@ -214,6 +214,11 @@ var (
// element states.
func tSpecialTagEnd(c context, s []byte) (context, int) {
if c.element != elementNone {
+ // script end tags ("</script") within script literals are ignored, so that
+ // we can properly escape them.
+ if c.element == elementScript && (isInScriptLiteral(c.state) || isComment(c.state)) {
+ return c, len(s)
+ }
if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 {
return context{}, i
}
@@ -353,6 +358,16 @@ func tJSDelimited(c context, s []byte) (context, int) {
inCharset = true
case ']':
inCharset = false
+ case '/':
+ // If "</script" appears in a regex literal, the '/' should not
+ // close the regex literal, and it will later be escaped to
+ // "\x3C/script" in escapeText.
+ if i > 0 && i+7 <= len(s) && bytes.Compare(bytes.ToLower(s[i-1:i+7]), []byte("</script")) == 0 {
+ i++
+ } else if !inCharset {
+ c.state, c.jsCtx = stateJS, jsCtxDivOp
+ return c, i + 1
+ }
default:
// end delimiter
if !inCharset {