diff options
author | Mike Samuel <mikesamuel@gmail.com> | 2011-09-19 20:52:14 -0700 |
---|---|---|
committer | Mike Samuel <mikesamuel@gmail.com> | 2011-09-19 20:52:14 -0700 |
commit | 3a013f117566fa58300a6f1c8f6e5d0b481315ec (patch) | |
tree | d43b5c23eed7cbf6493918cdbee5d0682d406b95 | |
parent | 3c3a86ccc72853a3e7bb1783922d5303e54adcd7 (diff) | |
download | go-3a013f117566fa58300a6f1c8f6e5d0b481315ec.tar.gz go-3a013f117566fa58300a6f1c8f6e5d0b481315ec.zip |
exp/template/html: change transition functions to return indices
Formulaic changes to transition functions in preparation for CL 5074041.
This should be completely semantics preserving.
R=nigeltao
CC=golang-dev
https://golang.org/cl/5091041
-rw-r--r-- | src/pkg/exp/template/html/escape.go | 29 | ||||
-rw-r--r-- | src/pkg/exp/template/html/html.go | 29 | ||||
-rw-r--r-- | src/pkg/exp/template/html/transition.go | 232 |
3 files changed, 147 insertions, 143 deletions
diff --git a/src/pkg/exp/template/html/escape.go b/src/pkg/exp/template/html/escape.go index b859751140..6490c5f9e0 100644 --- a/src/pkg/exp/template/html/escape.go +++ b/src/pkg/exp/template/html/escape.go @@ -547,22 +547,22 @@ var delimEnds = [...]string{ // escapeText escapes a text template node. func (e *escaper) escapeText(c context, n *parse.TextNode) context { - s, written := n.Text, 0 - var b bytes.Buffer - for len(s) > 0 { - c1, s1 := contextAfterText(c, s) + s, written, i, b := n.Text, 0, 0, new(bytes.Buffer) + for i != len(s) { + c1, nread := contextAfterText(c, s[i:]) + i1 := i + nread if c.state == c1.state && (c.state == stateText || c.state == stateRCDATA) { - i0, i1 := len(n.Text)-len(s), len(n.Text)-len(s1) - for i := i0; i < i1; i++ { - if n.Text[i] == '<' { - b.Write(n.Text[written:i]) + for j := i; j < i1; j++ { + if s[j] == '<' { + b.Write(s[written:j]) b.WriteString("<") - written = i + 1 + written = j + 1 } } } - c, s = c1, s1 + c, i = c1, i1 } + if written != 0 && c.state != stateError { b.Write(n.Text[written:]) e.editTextNode(n, b.Bytes()) @@ -572,7 +572,7 @@ func (e *escaper) escapeText(c context, n *parse.TextNode) context { // contextAfterText starts in context c, consumes some tokens from the front of // s, then returns the context after those tokens and the unprocessed suffix. -func contextAfterText(c context, s []byte) (context, []byte) { +func contextAfterText(c context, s []byte) (context, int) { if c.delim == delimNone { return transitionFunc[c.state](c, s) } @@ -584,9 +584,10 @@ func contextAfterText(c context, s []byte) (context, []byte) { // <button onclick="alert("Hi!")"> // without having to entity decode token boundaries. for u := []byte(html.UnescapeString(string(s))); len(u) != 0; { - c, u = transitionFunc[c.state](c, u) + c1, i1 := transitionFunc[c.state](c, u) + c, u = c1, u[i1:] } - return c, nil + return c, len(s) } if c.delim != delimSpaceOrTagEnd { // Consume any quote. @@ -594,7 +595,7 @@ func contextAfterText(c context, s []byte) (context, []byte) { } // On exiting an attribute, we discard all state information // except the state and element. - return context{state: stateTag, element: c.element}, s[i:] + return context{state: stateTag, element: c.element}, i } // editActionNode records a change to an action pipeline for later commit. diff --git a/src/pkg/exp/template/html/html.go b/src/pkg/exp/template/html/html.go index 7b5fab0d93..27ef65229a 100644 --- a/src/pkg/exp/template/html/html.go +++ b/src/pkg/exp/template/html/html.go @@ -165,43 +165,44 @@ func htmlReplacer(s string, replacementTable []string, badRunes bool) string { // For example, `<b>¡Hi!</b> <script>...</script>` -> `¡Hi! `. func stripTags(html string) string { var b bytes.Buffer - s, c := []byte(html), context{} + s, c, i := []byte(html), context{}, 0 // Using the transition funcs helps us avoid mangling // `<div title="1>2">` or `I <3 Ponies!`. - for len(s) > 0 { + for i != len(s) { if c.delim == delimNone { - d, t := transitionFunc[c.state](c, s) + d, nread := transitionFunc[c.state](c, s[i:]) + i1 := i + nread if c.state == stateText || c.state == stateRCDATA { - i := len(s) - len(t) // Emit text up to the start of the tag or comment. + j := i1 if d.state != c.state { - for j := i - 1; j >= 0; j-- { - if s[j] == '<' { - i = j + for j1 := j - 1; j1 >= i; j1-- { + if s[j1] == '<' { + j = j1 break } } } - b.Write(s[:i]) + b.Write(s[i:j]) } - c, s = d, t + c, i = d, i1 continue } - i := bytes.IndexAny(s, delimEnds[c.delim]) - if i == -1 { + i1 := i + bytes.IndexAny(s[i:], delimEnds[c.delim]) + if i1 < i { break } if c.delim != delimSpaceOrTagEnd { // Consume any quote. - i++ + i1++ } - c, s = context{state: stateTag, element: c.element}, s[i:] + c, i = context{state: stateTag, element: c.element}, i1 } if c.state == stateText { if b.Len() == 0 { return html } - b.Write(s) + b.Write(s[i:]) } return b.String() } diff --git a/src/pkg/exp/template/html/transition.go b/src/pkg/exp/template/html/transition.go index 7c7f845b17..0274d9e710 100644 --- a/src/pkg/exp/template/html/transition.go +++ b/src/pkg/exp/template/html/transition.go @@ -14,8 +14,9 @@ import ( // transitionFunc is the array of context transition functions for text nodes. // A transition function takes a context and template text input, and returns -// the updated context and any unconsumed text. -var transitionFunc = [...]func(context, []byte) (context, []byte){ +// the updated context and the number of bytes consumed from the front of the +// input. +var transitionFunc = [...]func(context, []byte) (context, int){ stateText: tText, stateTag: tTag, stateAttrName: tAttrName, @@ -46,27 +47,28 @@ var commentStart = []byte("<!--") var commentEnd = []byte("-->") // tText is the context transition function for the text state. -func tText(c context, s []byte) (context, []byte) { +func tText(c context, s []byte) (context, int) { + k := 0 for { - i := bytes.IndexByte(s, '<') - if i == -1 || i+1 == len(s) { - return c, nil + i := k + bytes.IndexByte(s[k:], '<') + if i < k || i+1 == len(s) { + return c, len(s) } else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) { - return context{state: stateHTMLCmt}, s[i+4:] + return context{state: stateHTMLCmt}, i + 4 } i++ if s[i] == '/' { if i+1 == len(s) { - return c, nil + return c, len(s) } i++ } j, e := eatTagName(s, i) if j != i { // We've found an HTML tag. - return context{state: stateTag, element: e}, s[j:] + return context{state: stateTag, element: e}, j } - s = s[j:] + k = j } panic("unreachable") } @@ -80,21 +82,21 @@ var elementContentType = [...]state{ } // tTag is the context transition function for the tag state. -func tTag(c context, s []byte) (context, []byte) { +func tTag(c context, s []byte) (context, int) { // Find the attribute name. i := eatWhiteSpace(s, 0) if i == len(s) { - return c, nil + return c, len(s) } if s[i] == '>' { return context{ state: elementContentType[c.element], element: c.element, - }, s[i+1:] + }, i + 1 } j, err := eatAttrName(s, i) if err != nil { - return context{state: stateError, err: err}, nil + return context{state: stateError, err: err}, len(s) } state, attr := stateTag, attrNone if i != j { @@ -112,35 +114,35 @@ func tTag(c context, s []byte) (context, []byte) { state = stateAfterName } } - return context{state: state, element: c.element, attr: attr}, s[j:] + return context{state: state, element: c.element, attr: attr}, j } // tAttrName is the context transition function for stateAttrName. -func tAttrName(c context, s []byte) (context, []byte) { +func tAttrName(c context, s []byte) (context, int) { i, err := eatAttrName(s, 0) if err != nil { - return context{state: stateError, err: err}, nil + return context{state: stateError, err: err}, len(s) } else if i == len(s) { - return c, nil + return c, len(s) } c.state = stateAfterName - return c, s[i:] + return c, i } // tAfterName is the context transition function for stateAfterName. -func tAfterName(c context, s []byte) (context, []byte) { +func tAfterName(c context, s []byte) (context, int) { // Look for the start of the value. i := eatWhiteSpace(s, 0) if i == len(s) { - return c, nil + return c, len(s) } else if s[i] != '=' { // Occurs due to tag ending '>', and valueless attribute. c.state = stateTag - return c, s[i:] + return c, i } c.state = stateBeforeValue // Consume the "=". - return c, s[i+1:] + return c, i + 1 } var attrStartStates = [...]state{ @@ -151,10 +153,10 @@ var attrStartStates = [...]state{ } // tBeforeValue is the context transition function for stateBeforeValue. -func tBeforeValue(c context, s []byte) (context, []byte) { +func tBeforeValue(c context, s []byte) (context, int) { i := eatWhiteSpace(s, 0) if i == len(s) { - return c, nil + return c, len(s) } // Find the attribute delimiter. delim := delimSpaceOrTagEnd @@ -165,16 +167,16 @@ func tBeforeValue(c context, s []byte) (context, []byte) { delim, i = delimDoubleQuote, i+1 } c.state, c.delim, c.attr = attrStartStates[c.attr], delim, attrNone - return c, s[i:] + return c, i } // tHTMLCmt is the context transition function for stateHTMLCmt. -func tHTMLCmt(c context, s []byte) (context, []byte) { +func tHTMLCmt(c context, s []byte) (context, int) { i := bytes.Index(s, commentEnd) if i != -1 { - return context{}, s[i+3:] + return context{}, i + 3 } - return c, nil + return c, len(s) } // specialTagEndMarkers maps element types to the character sequence that @@ -188,24 +190,24 @@ var specialTagEndMarkers = [...]string{ // tSpecialTagEnd is the context transition function for raw text and RCDATA // element states. -func tSpecialTagEnd(c context, s []byte) (context, []byte) { +func tSpecialTagEnd(c context, s []byte) (context, int) { if c.element != elementNone { end := specialTagEndMarkers[c.element] i := strings.Index(strings.ToLower(string(s)), end) if i != -1 { - return context{state: stateTag}, s[i+len(end):] + return context{state: stateTag}, i + len(end) } } - return c, nil + return c, len(s) } // tAttr is the context transition function for the attribute state. -func tAttr(c context, s []byte) (context, []byte) { - return c, nil +func tAttr(c context, s []byte) (context, int) { + return c, len(s) } // tURL is the context transition function for the URL state. -func tURL(c context, s []byte) (context, []byte) { +func tURL(c context, s []byte) (context, int) { if bytes.IndexAny(s, "#?") >= 0 { c.urlPart = urlPartQueryOrFrag } else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone { @@ -213,20 +215,20 @@ func tURL(c context, s []byte) (context, []byte) { // attrs: http://www.w3.org/TR/html5/index.html#attributes-1 c.urlPart = urlPartPreQuery } - return c, nil + return c, len(s) } // tJS is the context transition function for the JS state. -func tJS(c context, s []byte) (context, []byte) { - if d, t := tSpecialTagEnd(c, s); t != nil { - return d, t +func tJS(c context, s []byte) (context, int) { + if d, i := tSpecialTagEnd(c, s); i != len(s) { + return d, i } i := bytes.IndexAny(s, `"'/`) if i == -1 { // Entire input is non string, comment, regexp tokens. c.jsCtx = nextJSCtx(s, c.jsCtx) - return c, nil + return c, len(s) } c.jsCtx = nextJSCtx(s[:i], c.jsCtx) switch s[i] { @@ -248,18 +250,18 @@ func tJS(c context, s []byte) (context, []byte) { return context{ state: stateError, err: errorf(ErrSlashAmbig, 0, "'/' could start div or regexp: %.32q", s[i:]), - }, nil + }, len(s) } default: panic("unreachable") } - return c, s[i+1:] + return c, i + 1 } // tJSStr is the context transition function for the JS string states. -func tJSStr(c context, s []byte) (context, []byte) { - if d, t := tSpecialTagEnd(c, s); t != nil { - return d, t +func tJSStr(c context, s []byte) (context, int) { + if d, i := tSpecialTagEnd(c, s); i != len(s) { + return d, i } quoteAndEsc := `\"` @@ -267,55 +269,54 @@ func tJSStr(c context, s []byte) (context, []byte) { quoteAndEsc = `\'` } - b := s + k := 0 for { - i := bytes.IndexAny(b, quoteAndEsc) - if i == -1 { - return c, nil + i := k + bytes.IndexAny(s[k:], quoteAndEsc) + if i < k { + return c, len(s) } - if b[i] == '\\' { + if s[i] == '\\' { i++ - if i == len(b) { + if i == len(s) { return context{ state: stateError, err: errorf(ErrPartialEscape, 0, "unfinished escape sequence in JS string: %q", s), - }, nil + }, len(s) } } else { c.state, c.jsCtx = stateJS, jsCtxDivOp - return c, b[i+1:] + return c, i + 1 } - b = b[i+1:] + k = i + 1 } panic("unreachable") } // tJSRegexp is the context transition function for the /RegExp/ literal state. -func tJSRegexp(c context, s []byte) (context, []byte) { - if d, t := tSpecialTagEnd(c, s); t != nil { - return d, t +func tJSRegexp(c context, s []byte) (context, int) { + if d, i := tSpecialTagEnd(c, s); i != len(s) { + return d, i } - b := s - inCharset := false + k, inCharset := 0, false for { - i := bytes.IndexAny(b, `/[\]`) - if i == -1 { + i := k + bytes.IndexAny(s[k:], `\/[]`) + if i < k { break } - switch b[i] { + switch s[i] { case '/': if !inCharset { c.state, c.jsCtx = stateJS, jsCtxDivOp - return c, b[i+1:] + return c, i + 1 } case '\\': i++ - if i == len(b) { + if i == len(s) { return context{ state: stateError, err: errorf(ErrPartialEscape, 0, "unfinished escape sequence in JS regexp: %q", s), - }, nil + }, len(s) } case '[': inCharset = true @@ -324,7 +325,7 @@ func tJSRegexp(c context, s []byte) (context, []byte) { default: panic("unreachable") } - b = b[i+1:] + k = i + 1 } if inCharset { @@ -333,22 +334,22 @@ func tJSRegexp(c context, s []byte) (context, []byte) { return context{ state: stateError, err: errorf(ErrPartialCharset, 0, "unfinished JS regexp charset: %q", s), - }, nil + }, len(s) } - return c, nil + return c, len(s) } var blockCommentEnd = []byte("*/") // tBlockCmt is the context transition function for /*comment*/ states. -func tBlockCmt(c context, s []byte) (context, []byte) { - if d, t := tSpecialTagEnd(c, s); t != nil { - return d, t +func tBlockCmt(c context, s []byte) (context, int) { + if d, i := tSpecialTagEnd(c, s); i != len(s) { + return d, i } i := bytes.Index(s, blockCommentEnd) if i == -1 { - return c, nil + return c, len(s) } switch c.state { case stateJSBlockCmt: @@ -358,13 +359,13 @@ func tBlockCmt(c context, s []byte) (context, []byte) { default: panic(c.state.String()) } - return c, s[i+2:] + return c, i + 2 } // tLineCmt is the context transition function for //comment states. -func tLineCmt(c context, s []byte) (context, []byte) { - if d, t := tSpecialTagEnd(c, s); t != nil { - return d, t +func tLineCmt(c context, s []byte) (context, int) { + if d, i := tSpecialTagEnd(c, s); i != len(s) { + return d, i } var lineTerminators string var endState state @@ -386,21 +387,21 @@ func tLineCmt(c context, s []byte) (context, []byte) { i := bytes.IndexAny(s, lineTerminators) if i == -1 { - return c, nil + return c, len(s) } c.state = endState // Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4 // "However, the LineTerminator at the end of the line is not - // considered to be part of the single-line comment; it is recognised - // separately by the lexical grammar and becomes part of the stream of - // input elements for the syntactic grammar." - return c, s[i:] + // considered to be part of the single-line comment; it is + // recognized separately by the lexical grammar and becomes part + // of the stream of input elements for the syntactic grammar." + return c, i } // tCSS is the context transition function for the CSS state. -func tCSS(c context, s []byte) (context, []byte) { - if d, t := tSpecialTagEnd(c, s); t != nil { - return d, t +func tCSS(c context, s []byte) (context, int) { + if d, i := tSpecialTagEnd(c, s); i != len(s) { + return d, i } // CSS quoted strings are almost never used except for: @@ -430,55 +431,55 @@ func tCSS(c context, s []byte) (context, []byte) { // have the attribute name available if our conservative assumption // proves problematic for real code. + k := 0 for { - i := bytes.IndexAny(s, `("'/`) - if i == -1 { - return c, nil + i := k + bytes.IndexAny(s[k:], `("'/`) + if i < k { + return c, len(s) } switch s[i] { case '(': // Look for url to the left. p := bytes.TrimRight(s[:i], "\t\n\f\r ") if endsWithCSSKeyword(p, "url") { - q := bytes.TrimLeft(s[i+1:], "\t\n\f\r ") + j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r ")) switch { - case len(q) != 0 && q[0] == '"': - c.state, s = stateCSSDqURL, q[1:] - case len(q) != 0 && q[0] == '\'': - c.state, s = stateCSSSqURL, q[1:] - + case j != len(s) && s[j] == '"': + c.state, j = stateCSSDqURL, j+1 + case j != len(s) && s[j] == '\'': + c.state, j = stateCSSSqURL, j+1 default: - c.state, s = stateCSSURL, q + c.state = stateCSSURL } - return c, s + return c, j } case '/': if i+1 < len(s) { switch s[i+1] { case '/': c.state = stateCSSLineCmt - return c, s[i+2:] + return c, i + 2 case '*': c.state = stateCSSBlockCmt - return c, s[i+2:] + return c, i + 2 } } case '"': c.state = stateCSSDqStr - return c, s[i+1:] + return c, i + 1 case '\'': c.state = stateCSSSqStr - return c, s[i+1:] + return c, i + 1 } - s = s[i+1:] + k = i + 1 } panic("unreachable") } // tCSSStr is the context transition function for the CSS string and URL states. -func tCSSStr(c context, s []byte) (context, []byte) { - if d, t := tSpecialTagEnd(c, s); t != nil { - return d, t +func tCSSStr(c context, s []byte) (context, int) { + if d, i := tSpecialTagEnd(c, s); i != len(s) { + return d, i } var endAndEsc string @@ -495,33 +496,34 @@ func tCSSStr(c context, s []byte) (context, []byte) { panic(c.state.String()) } - b := s + k := 0 for { - i := bytes.IndexAny(b, endAndEsc) - if i == -1 { - return tURL(c, decodeCSS(b)) + i := k + bytes.IndexAny(s[k:], endAndEsc) + if i < k { + c, nread := tURL(c, decodeCSS(s[k:])) + return c, k + nread } - if b[i] == '\\' { + if s[i] == '\\' { i++ - if i == len(b) { + if i == len(s) { return context{ state: stateError, err: errorf(ErrPartialEscape, 0, "unfinished escape sequence in CSS string: %q", s), - }, nil + }, len(s) } } else { c.state = stateCSS - return c, b[i+1:] + return c, i + 1 } - c, _ = tURL(c, decodeCSS(b[:i+1])) - b = b[i+1:] + c, _ = tURL(c, decodeCSS(s[:i+1])) + k = i + 1 } panic("unreachable") } // tError is the context transition function for the error state. -func tError(c context, s []byte) (context, []byte) { - return c, nil +func tError(c context, s []byte) (context, int) { + return c, len(s) } // eatAttrName returns the largest j such that s[i:j] is an attribute name. |