diff options
author | renovate <renovate-bot@autistici.org> | 2021-07-11 14:26:26 +0000 |
---|---|---|
committer | renovate <renovate-bot@autistici.org> | 2021-07-11 14:26:26 +0000 |
commit | 557f9d889812976293b4a668c190e0e1e0332857 (patch) | |
tree | 9d2e82374a063f3b568110e83ccb1b285f3247f1 /vendor/golang.org/x/net/html | |
parent | 877afafd950b84242204499b3ed8c1b2c8c75f31 (diff) | |
download | crawl-557f9d889812976293b4a668c190e0e1e0332857.tar.gz crawl-557f9d889812976293b4a668c190e0e1e0332857.zip |
Update module github.com/PuerkitoBio/goquery to v1.7.1
Diffstat (limited to 'vendor/golang.org/x/net/html')
-rw-r--r-- | vendor/golang.org/x/net/html/const.go | 3 | ||||
-rw-r--r-- | vendor/golang.org/x/net/html/foreign.go | 120 | ||||
-rw-r--r-- | vendor/golang.org/x/net/html/node.go | 5 | ||||
-rw-r--r-- | vendor/golang.org/x/net/html/parse.go | 337 | ||||
-rw-r--r-- | vendor/golang.org/x/net/html/render.go | 34 | ||||
-rw-r--r-- | vendor/golang.org/x/net/html/token.go | 9 |
6 files changed, 279 insertions, 229 deletions
diff --git a/vendor/golang.org/x/net/html/const.go b/vendor/golang.org/x/net/html/const.go index a3a918f..ff7acf2 100644 --- a/vendor/golang.org/x/net/html/const.go +++ b/vendor/golang.org/x/net/html/const.go @@ -52,8 +52,7 @@ var isSpecialElementMap = map[string]bool{ "iframe": true, "img": true, "input": true, - "isindex": true, // The 'isindex' element has been removed, but keep it for backwards compatibility. - "keygen": true, + "keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility. "li": true, "link": true, "listing": true, diff --git a/vendor/golang.org/x/net/html/foreign.go b/vendor/golang.org/x/net/html/foreign.go index 01477a9..9da9e9d 100644 --- a/vendor/golang.org/x/net/html/foreign.go +++ b/vendor/golang.org/x/net/html/foreign.go @@ -161,66 +161,62 @@ var mathMLAttributeAdjustments = map[string]string{ } var svgAttributeAdjustments = map[string]string{ - "attributename": "attributeName", - "attributetype": "attributeType", - "basefrequency": "baseFrequency", - "baseprofile": "baseProfile", - "calcmode": "calcMode", - "clippathunits": "clipPathUnits", - "contentscripttype": "contentScriptType", - "contentstyletype": "contentStyleType", - "diffuseconstant": "diffuseConstant", - "edgemode": "edgeMode", - "externalresourcesrequired": "externalResourcesRequired", - "filterres": "filterRes", - "filterunits": "filterUnits", - "glyphref": "glyphRef", - "gradienttransform": "gradientTransform", - "gradientunits": "gradientUnits", - "kernelmatrix": "kernelMatrix", - "kernelunitlength": "kernelUnitLength", - "keypoints": "keyPoints", - "keysplines": "keySplines", - "keytimes": "keyTimes", - "lengthadjust": "lengthAdjust", - "limitingconeangle": "limitingConeAngle", - "markerheight": "markerHeight", - "markerunits": "markerUnits", - "markerwidth": "markerWidth", - "maskcontentunits": "maskContentUnits", - "maskunits": "maskUnits", - "numoctaves": "numOctaves", - "pathlength": "pathLength", - "patterncontentunits": "patternContentUnits", - "patterntransform": "patternTransform", - "patternunits": "patternUnits", - "pointsatx": "pointsAtX", - "pointsaty": "pointsAtY", - "pointsatz": "pointsAtZ", - "preservealpha": "preserveAlpha", - "preserveaspectratio": "preserveAspectRatio", - "primitiveunits": "primitiveUnits", - "refx": "refX", - "refy": "refY", - "repeatcount": "repeatCount", - "repeatdur": "repeatDur", - "requiredextensions": "requiredExtensions", - "requiredfeatures": "requiredFeatures", - "specularconstant": "specularConstant", - "specularexponent": "specularExponent", - "spreadmethod": "spreadMethod", - "startoffset": "startOffset", - "stddeviation": "stdDeviation", - "stitchtiles": "stitchTiles", - "surfacescale": "surfaceScale", - "systemlanguage": "systemLanguage", - "tablevalues": "tableValues", - "targetx": "targetX", - "targety": "targetY", - "textlength": "textLength", - "viewbox": "viewBox", - "viewtarget": "viewTarget", - "xchannelselector": "xChannelSelector", - "ychannelselector": "yChannelSelector", - "zoomandpan": "zoomAndPan", + "attributename": "attributeName", + "attributetype": "attributeType", + "basefrequency": "baseFrequency", + "baseprofile": "baseProfile", + "calcmode": "calcMode", + "clippathunits": "clipPathUnits", + "diffuseconstant": "diffuseConstant", + "edgemode": "edgeMode", + "filterunits": "filterUnits", + "glyphref": "glyphRef", + "gradienttransform": "gradientTransform", + "gradientunits": "gradientUnits", + "kernelmatrix": "kernelMatrix", + "kernelunitlength": "kernelUnitLength", + "keypoints": "keyPoints", + "keysplines": "keySplines", + "keytimes": "keyTimes", + "lengthadjust": "lengthAdjust", + "limitingconeangle": "limitingConeAngle", + "markerheight": "markerHeight", + "markerunits": "markerUnits", + "markerwidth": "markerWidth", + "maskcontentunits": "maskContentUnits", + "maskunits": "maskUnits", + "numoctaves": "numOctaves", + "pathlength": "pathLength", + "patterncontentunits": "patternContentUnits", + "patterntransform": "patternTransform", + "patternunits": "patternUnits", + "pointsatx": "pointsAtX", + "pointsaty": "pointsAtY", + "pointsatz": "pointsAtZ", + "preservealpha": "preserveAlpha", + "preserveaspectratio": "preserveAspectRatio", + "primitiveunits": "primitiveUnits", + "refx": "refX", + "refy": "refY", + "repeatcount": "repeatCount", + "repeatdur": "repeatDur", + "requiredextensions": "requiredExtensions", + "requiredfeatures": "requiredFeatures", + "specularconstant": "specularConstant", + "specularexponent": "specularExponent", + "spreadmethod": "spreadMethod", + "startoffset": "startOffset", + "stddeviation": "stdDeviation", + "stitchtiles": "stitchTiles", + "surfacescale": "surfaceScale", + "systemlanguage": "systemLanguage", + "tablevalues": "tableValues", + "targetx": "targetX", + "targety": "targetY", + "textlength": "textLength", + "viewbox": "viewBox", + "viewtarget": "viewTarget", + "xchannelselector": "xChannelSelector", + "ychannelselector": "yChannelSelector", + "zoomandpan": "zoomAndPan", } diff --git a/vendor/golang.org/x/net/html/node.go b/vendor/golang.org/x/net/html/node.go index 633ee15..1350eef 100644 --- a/vendor/golang.org/x/net/html/node.go +++ b/vendor/golang.org/x/net/html/node.go @@ -18,6 +18,11 @@ const ( ElementNode CommentNode DoctypeNode + // RawNode nodes are not returned by the parser, but can be part of the + // Node tree passed to func Render to insert raw HTML (without escaping). + // If so, this package makes no guarantee that the rendered HTML is secure + // (from e.g. Cross Site Scripting attacks) or well-formed. + RawNode scopeMarkerNode ) diff --git a/vendor/golang.org/x/net/html/parse.go b/vendor/golang.org/x/net/html/parse.go index 992cff2..038941d 100644 --- a/vendor/golang.org/x/net/html/parse.go +++ b/vendor/golang.org/x/net/html/parse.go @@ -184,6 +184,17 @@ func (p *parser) clearStackToContext(s scope) { } } +// parseGenericRawTextElements implements the generic raw text element parsing +// algorithm defined in 12.2.6.2. +// https://html.spec.whatwg.org/multipage/parsing.html#parsing-elements-that-contain-only-text +// TODO: Since both RAWTEXT and RCDATA states are treated as tokenizer's part +// officially, need to make tokenizer consider both states. +func (p *parser) parseGenericRawTextElement() { + p.addElement() + p.originalIM = p.im + p.im = textIM +} + // generateImpliedEndTags pops nodes off the stack of open elements as long as // the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc. // If exceptions are specified, nodes with that name will not be popped off. @@ -192,16 +203,17 @@ func (p *parser) generateImpliedEndTags(exceptions ...string) { loop: for i = len(p.oe) - 1; i >= 0; i-- { n := p.oe[i] - if n.Type == ElementNode { - switch n.DataAtom { - case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc: - for _, except := range exceptions { - if n.Data == except { - break loop - } + if n.Type != ElementNode { + break + } + switch n.DataAtom { + case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc: + for _, except := range exceptions { + if n.Data == except { + break loop } - continue } + continue } break } @@ -369,8 +381,7 @@ findIdenticalElements: // Section 12.2.4.3. func (p *parser) clearActiveFormattingElements() { for { - n := p.afe.pop() - if len(p.afe) == 0 || n.Type == scopeMarkerNode { + if n := p.afe.pop(); len(p.afe) == 0 || n.Type == scopeMarkerNode { return } } @@ -625,29 +636,51 @@ func inHeadIM(p *parser) bool { switch p.tok.DataAtom { case a.Html: return inBodyIM(p) - case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta: + case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta: p.addElement() p.oe.pop() p.acknowledgeSelfClosingTag() return true case a.Noscript: - p.addElement() if p.scripting { - p.setOriginalIM() - p.im = textIM - } else { - p.im = inHeadNoscriptIM + p.parseGenericRawTextElement() + return true } + p.addElement() + p.im = inHeadNoscriptIM + // Don't let the tokenizer go into raw text mode when scripting is disabled. + p.tokenizer.NextIsNotRawText() return true - case a.Script, a.Title, a.Noframes, a.Style: + case a.Script, a.Title: p.addElement() p.setOriginalIM() p.im = textIM return true + case a.Noframes, a.Style: + p.parseGenericRawTextElement() + return true case a.Head: // Ignore the token. return true case a.Template: + // TODO: remove this divergence from the HTML5 spec. + // + // We don't handle all of the corner cases when mixing foreign + // content (i.e. <math> or <svg>) with <template>. Without this + // early return, we can get into an infinite loop, possibly because + // of the "TODO... further divergence" a little below. + // + // As a workaround, if we are mixing foreign content and templates, + // just ignore the rest of the HTML. Foreign content is rare and a + // relatively old HTML feature. Templates are also rare and a + // relatively new HTML feature. Their combination is very rare. + for _, e := range p.oe { + if e.Namespace != "" { + p.im = ignoreTheRemainingTokens + return true + } + } + p.addElement() p.afe = append(p.afe, &scopeMarker) p.framesetOK = false @@ -668,7 +701,7 @@ func inHeadIM(p *parser) bool { if !p.oe.contains(a.Template) { return true } - // TODO: remove this divergence from the HTML5 spec. + // TODO: remove this further divergence from the HTML5 spec. // // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668 p.generateImpliedEndTags() @@ -713,7 +746,13 @@ func inHeadNoscriptIM(p *parser) bool { return inBodyIM(p) case a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Style: return inHeadIM(p) - case a.Head, a.Noscript: + case a.Head: + // Ignore the token. + return true + case a.Noscript: + // Don't let the tokenizer go into raw text mode even when a <noscript> + // tag is in "in head noscript" insertion mode. + p.tokenizer.NextIsNotRawText() // Ignore the token. return true } @@ -855,7 +894,7 @@ func inBodyIM(p *parser) bool { return true } copyAttributes(p.oe[0], p.tok) - case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title: + case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title: return inHeadIM(p) case a.Body: if p.oe.contains(a.Template) { @@ -881,7 +920,7 @@ func inBodyIM(p *parser) bool { p.addElement() p.im = inFramesetIM return true - case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul: + case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Main, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul: p.popUntil(buttonScope, a.P) p.addElement() case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6: @@ -1014,53 +1053,6 @@ func inBodyIM(p *parser) bool { p.tok.DataAtom = a.Img p.tok.Data = a.Img.String() return false - case a.Isindex: - if p.form != nil { - // Ignore the token. - return true - } - action := "" - prompt := "This is a searchable index. Enter search keywords: " - attr := []Attribute{{Key: "name", Val: "isindex"}} - for _, t := range p.tok.Attr { - switch t.Key { - case "action": - action = t.Val - case "name": - // Ignore the attribute. - case "prompt": - prompt = t.Val - default: - attr = append(attr, t) - } - } - p.acknowledgeSelfClosingTag() - p.popUntil(buttonScope, a.P) - p.parseImpliedToken(StartTagToken, a.Form, a.Form.String()) - if p.form == nil { - // NOTE: The 'isindex' element has been removed, - // and the 'template' element has not been designed to be - // collaborative with the index element. - // - // Ignore the token. - return true - } - if action != "" { - p.form.Attr = []Attribute{{Key: "action", Val: action}} - } - p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String()) - p.parseImpliedToken(StartTagToken, a.Label, a.Label.String()) - p.addText(prompt) - p.addChild(&Node{ - Type: ElementNode, - DataAtom: a.Input, - Data: a.Input.String(), - Attr: attr, - }) - p.oe.pop() - p.parseImpliedToken(EndTagToken, a.Label, a.Label.String()) - p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String()) - p.parseImpliedToken(EndTagToken, a.Form, a.Form.String()) case a.Textarea: p.addElement() p.setOriginalIM() @@ -1070,18 +1062,21 @@ func inBodyIM(p *parser) bool { p.popUntil(buttonScope, a.P) p.reconstructActiveFormattingElements() p.framesetOK = false - p.addElement() - p.setOriginalIM() - p.im = textIM + p.parseGenericRawTextElement() case a.Iframe: p.framesetOK = false + p.parseGenericRawTextElement() + case a.Noembed: + p.parseGenericRawTextElement() + case a.Noscript: + if p.scripting { + p.parseGenericRawTextElement() + return true + } + p.reconstructActiveFormattingElements() p.addElement() - p.setOriginalIM() - p.im = textIM - case a.Noembed, a.Noscript: - p.addElement() - p.setOriginalIM() - p.im = textIM + // Don't let the tokenizer go into raw text mode when scripting is disabled. + p.tokenizer.NextIsNotRawText() case a.Select: p.reconstructActiveFormattingElements() p.addElement() @@ -1137,7 +1132,7 @@ func inBodyIM(p *parser) bool { return false } return true - case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul: + case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Main, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul: p.popUntil(defaultScope, p.tok.DataAtom) case a.Form: if p.oe.contains(a.Template) { @@ -1198,14 +1193,13 @@ func inBodyIM(p *parser) bool { if len(p.templateStack) > 0 { p.im = inTemplateIM return false - } else { - for _, e := range p.oe { - switch e.DataAtom { - case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th, - a.Thead, a.Tr, a.Body, a.Html: - default: - return true - } + } + for _, e := range p.oe { + switch e.DataAtom { + case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th, + a.Thead, a.Tr, a.Body, a.Html: + default: + return true } } } @@ -1221,9 +1215,15 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) { // Once the code successfully parses the comprehensive test suite, we should // refactor this code to be more idiomatic. - // Steps 1-4. The outer loop. + // Steps 1-2 + if current := p.oe.top(); current.Data == tagName && p.afe.index(current) == -1 { + p.oe.pop() + return + } + + // Steps 3-5. The outer loop. for i := 0; i < 8; i++ { - // Step 5. Find the formatting element. + // Step 6. Find the formatting element. var formattingElement *Node for j := len(p.afe) - 1; j >= 0; j-- { if p.afe[j].Type == scopeMarkerNode { @@ -1238,17 +1238,22 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) { p.inBodyEndTagOther(tagAtom, tagName) return } + + // Step 7. Ignore the tag if formatting element is not in the stack of open elements. feIndex := p.oe.index(formattingElement) if feIndex == -1 { p.afe.remove(formattingElement) return } + // Step 8. Ignore the tag if formatting element is not in the scope. if !p.elementInScope(defaultScope, tagAtom) { // Ignore the tag. return } - // Steps 9-10. Find the furthest block. + // Step 9. This step is omitted because it's just a parse error but no need to return. + + // Steps 10-11. Find the furthest block. var furthestBlock *Node for _, e := range p.oe[feIndex:] { if isSpecialElement(e) { @@ -1265,47 +1270,65 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) { return } - // Steps 11-12. Find the common ancestor and bookmark node. + // Steps 12-13. Find the common ancestor and bookmark node. commonAncestor := p.oe[feIndex-1] bookmark := p.afe.index(formattingElement) - // Step 13. The inner loop. Find the lastNode to reparent. + // Step 14. The inner loop. Find the lastNode to reparent. lastNode := furthestBlock node := furthestBlock x := p.oe.index(node) - // Steps 13.1-13.2 - for j := 0; j < 3; j++ { - // Step 13.3. + // Step 14.1. + j := 0 + for { + // Step 14.2. + j++ + // Step. 14.3. x-- node = p.oe[x] - // Step 13.4 - 13.5. + // Step 14.4. Go to the next step if node is formatting element. + if node == formattingElement { + break + } + // Step 14.5. Remove node from the list of active formatting elements if + // inner loop counter is greater than three and node is in the list of + // active formatting elements. + if ni := p.afe.index(node); j > 3 && ni > -1 { + p.afe.remove(node) + // If any element of the list of active formatting elements is removed, + // we need to take care whether bookmark should be decremented or not. + // This is because the value of bookmark may exceed the size of the + // list by removing elements from the list. + if ni <= bookmark { + bookmark-- + } + continue + } + // Step 14.6. Continue the next inner loop if node is not in the list of + // active formatting elements. if p.afe.index(node) == -1 { p.oe.remove(node) continue } - // Step 13.6. - if node == formattingElement { - break - } - // Step 13.7. + // Step 14.7. clone := node.clone() p.afe[p.afe.index(node)] = clone p.oe[p.oe.index(node)] = clone node = clone - // Step 13.8. + // Step 14.8. if lastNode == furthestBlock { bookmark = p.afe.index(node) + 1 } - // Step 13.9. + // Step 14.9. if lastNode.Parent != nil { lastNode.Parent.RemoveChild(lastNode) } node.AppendChild(lastNode) - // Step 13.10. + // Step 14.10. lastNode = node } - // Step 14. Reparent lastNode to the common ancestor, + // Step 15. Reparent lastNode to the common ancestor, // or for misnested table nodes, to the foster parent. if lastNode.Parent != nil { lastNode.Parent.RemoveChild(lastNode) @@ -1317,13 +1340,13 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) { commonAncestor.AppendChild(lastNode) } - // Steps 15-17. Reparent nodes from the furthest block's children + // Steps 16-18. Reparent nodes from the furthest block's children // to a clone of the formatting element. clone := formattingElement.clone() reparentChildren(clone, furthestBlock) furthestBlock.AppendChild(clone) - // Step 18. Fix up the list of active formatting elements. + // Step 19. Fix up the list of active formatting elements. if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark { // Move the bookmark with the rest of the list. bookmark-- @@ -1331,7 +1354,7 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) { p.afe.remove(formattingElement) p.afe.insert(bookmark, clone) - // Step 19. Fix up the stack of open elements. + // Step 20. Fix up the stack of open elements. p.oe.remove(formattingElement) p.oe.insert(p.oe.index(furthestBlock)+1, clone) } @@ -1502,14 +1525,13 @@ func inCaptionIM(p *parser) bool { case StartTagToken: switch p.tok.DataAtom { case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr: - if p.popUntil(tableScope, a.Caption) { - p.clearActiveFormattingElements() - p.im = inTableIM - return false - } else { + if !p.popUntil(tableScope, a.Caption) { // Ignore the token. return true } + p.clearActiveFormattingElements() + p.im = inTableIM + return false case a.Select: p.reconstructActiveFormattingElements() p.addElement() @@ -1526,14 +1548,13 @@ func inCaptionIM(p *parser) bool { } return true case a.Table: - if p.popUntil(tableScope, a.Caption) { - p.clearActiveFormattingElements() - p.im = inTableIM - return false - } else { + if !p.popUntil(tableScope, a.Caption) { // Ignore the token. return true } + p.clearActiveFormattingElements() + p.im = inTableIM + return false case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr: // Ignore the token. return true @@ -1777,12 +1798,11 @@ func inSelectIM(p *parser) bool { } p.addElement() case a.Select: - if p.popUntil(selectScope, a.Select) { - p.resetInsertionMode() - } else { + if !p.popUntil(selectScope, a.Select) { // Ignore the token. return true } + p.resetInsertionMode() case a.Input, a.Keygen, a.Textarea: if p.elementInScope(selectScope, a.Select) { p.parseImpliedToken(EndTagToken, a.Select, a.Select.String()) @@ -1794,6 +1814,13 @@ func inSelectIM(p *parser) bool { return true case a.Script, a.Template: return inHeadIM(p) + case a.Iframe, a.Noembed, a.Noframes, a.Noscript, a.Plaintext, a.Style, a.Title, a.Xmp: + // Don't let the tokenizer go into raw text mode when there are raw tags + // to be ignored. These tags should be ignored from the tokenizer + // properly. + p.tokenizer.NextIsNotRawText() + // Ignore the token. + return true } case EndTagToken: switch p.tok.DataAtom { @@ -1810,12 +1837,11 @@ func inSelectIM(p *parser) bool { p.oe = p.oe[:i] } case a.Select: - if p.popUntil(selectScope, a.Select) { - p.resetInsertionMode() - } else { + if !p.popUntil(selectScope, a.Select) { // Ignore the token. return true } + p.resetInsertionMode() case a.Template: return inHeadIM(p) } @@ -2119,6 +2145,10 @@ func afterAfterFramesetIM(p *parser) bool { return true } +func ignoreTheRemainingTokens(p *parser) bool { + return true +} + const whitespaceOrNUL = whitespace + "\x00" // Section 12.2.6.5 @@ -2136,28 +2166,31 @@ func parseForeignContent(p *parser) bool { Data: p.tok.Data, }) case StartTagToken: - b := breakout[p.tok.Data] - if p.tok.DataAtom == a.Font { - loop: - for _, attr := range p.tok.Attr { - switch attr.Key { - case "color", "face", "size": - b = true - break loop + if !p.fragment { + b := breakout[p.tok.Data] + if p.tok.DataAtom == a.Font { + loop: + for _, attr := range p.tok.Attr { + switch attr.Key { + case "color", "face", "size": + b = true + break loop + } } } - } - if b { - for i := len(p.oe) - 1; i >= 0; i-- { - n := p.oe[i] - if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) { - p.oe = p.oe[:i+1] - break + if b { + for i := len(p.oe) - 1; i >= 0; i-- { + n := p.oe[i] + if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) { + p.oe = p.oe[:i+1] + break + } } + return false } - return false } - switch p.top().Namespace { + current := p.adjustedCurrentNode() + switch current.Namespace { case "math": adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments) case "svg": @@ -2172,7 +2205,7 @@ func parseForeignContent(p *parser) bool { panic("html: bad parser state: unexpected namespace") } adjustForeignAttributes(p.tok.Attr) - namespace := p.top().Namespace + namespace := current.Namespace p.addElement() p.top().Namespace = namespace if namespace != "" { @@ -2201,12 +2234,20 @@ func parseForeignContent(p *parser) bool { return true } +// Section 12.2.4.2. +func (p *parser) adjustedCurrentNode() *Node { + if len(p.oe) == 1 && p.fragment && p.context != nil { + return p.context + } + return p.oe.top() +} + // Section 12.2.6. func (p *parser) inForeignContent() bool { if len(p.oe) == 0 { return false } - n := p.oe[len(p.oe)-1] + n := p.adjustedCurrentNode() if n.Namespace == "" { return false } @@ -2341,8 +2382,7 @@ func ParseWithOptions(r io.Reader, opts ...ParseOption) (*Node, error) { f(p) } - err := p.parse() - if err != nil { + if err := p.parse(); err != nil { return nil, err } return p.doc, nil @@ -2364,7 +2404,6 @@ func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) ( contextTag = context.DataAtom.String() } p := &parser{ - tokenizer: NewTokenizerFragment(r, contextTag), doc: &Node{ Type: DocumentNode, }, @@ -2372,6 +2411,11 @@ func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) ( fragment: true, context: context, } + if context != nil && context.Namespace != "" { + p.tokenizer = NewTokenizer(r) + } else { + p.tokenizer = NewTokenizerFragment(r, contextTag) + } for _, f := range opts { f(p) @@ -2396,8 +2440,7 @@ func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) ( } } - err := p.parse() - if err != nil { + if err := p.parse(); err != nil { return nil, err } diff --git a/vendor/golang.org/x/net/html/render.go b/vendor/golang.org/x/net/html/render.go index d34564f..b46d81c 100644 --- a/vendor/golang.org/x/net/html/render.go +++ b/vendor/golang.org/x/net/html/render.go @@ -134,6 +134,9 @@ func render1(w writer, n *Node) error { } } return w.WriteByte('>') + case RawNode: + _, err := w.WriteString(n.Data) + return err default: return errors.New("html: unknown node type") } @@ -252,20 +255,19 @@ func writeQuoted(w writer, s string) error { // Section 12.1.2, "Elements", gives this list of void elements. Void elements // are those that can't have any contents. var voidElements = map[string]bool{ - "area": true, - "base": true, - "br": true, - "col": true, - "command": true, - "embed": true, - "hr": true, - "img": true, - "input": true, - "keygen": true, - "link": true, - "meta": true, - "param": true, - "source": true, - "track": true, - "wbr": true, + "area": true, + "base": true, + "br": true, + "col": true, + "embed": true, + "hr": true, + "img": true, + "input": true, + "keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility. + "link": true, + "meta": true, + "param": true, + "source": true, + "track": true, + "wbr": true, } diff --git a/vendor/golang.org/x/net/html/token.go b/vendor/golang.org/x/net/html/token.go index e3c01d7..877709f 100644 --- a/vendor/golang.org/x/net/html/token.go +++ b/vendor/golang.org/x/net/html/token.go @@ -296,8 +296,7 @@ func (z *Tokenizer) Buffered() []byte { // too many times in succession. func readAtLeastOneByte(r io.Reader, b []byte) (int, error) { for i := 0; i < 100; i++ { - n, err := r.Read(b) - if n != 0 || err != nil { + if n, err := r.Read(b); n != 0 || err != nil { return n, err } } @@ -347,6 +346,7 @@ loop: break loop } if c != '/' { + z.raw.end-- continue loop } if z.readRawEndTag() || z.err != nil { @@ -1067,6 +1067,11 @@ loop: // Raw returns the unmodified text of the current token. Calling Next, Token, // Text, TagName or TagAttr may change the contents of the returned slice. +// +// The token stream's raw bytes partition the byte stream (up until an +// ErrorToken). There are no overlaps or gaps between two consecutive token's +// raw bytes. One implication is that the byte offset of the current token is +// the sum of the lengths of all previous tokens' raw bytes. func (z *Tokenizer) Raw() []byte { return z.buf[z.raw.start:z.raw.end] } |