From 557f9d889812976293b4a668c190e0e1e0332857 Mon Sep 17 00:00:00 2001 From: renovate Date: Sun, 11 Jul 2021 14:26:26 +0000 Subject: Update module github.com/PuerkitoBio/goquery to v1.7.1 --- vendor/golang.org/x/net/html/parse.go | 337 +++++++++++++++++++--------------- 1 file changed, 190 insertions(+), 147 deletions(-) (limited to 'vendor/golang.org/x/net/html/parse.go') diff --git a/vendor/golang.org/x/net/html/parse.go b/vendor/golang.org/x/net/html/parse.go index 992cff2..038941d 100644 --- a/vendor/golang.org/x/net/html/parse.go +++ b/vendor/golang.org/x/net/html/parse.go @@ -184,6 +184,17 @@ func (p *parser) clearStackToContext(s scope) { } } +// parseGenericRawTextElements implements the generic raw text element parsing +// algorithm defined in 12.2.6.2. +// https://html.spec.whatwg.org/multipage/parsing.html#parsing-elements-that-contain-only-text +// TODO: Since both RAWTEXT and RCDATA states are treated as tokenizer's part +// officially, need to make tokenizer consider both states. +func (p *parser) parseGenericRawTextElement() { + p.addElement() + p.originalIM = p.im + p.im = textIM +} + // generateImpliedEndTags pops nodes off the stack of open elements as long as // the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc. // If exceptions are specified, nodes with that name will not be popped off. @@ -192,16 +203,17 @@ func (p *parser) generateImpliedEndTags(exceptions ...string) { loop: for i = len(p.oe) - 1; i >= 0; i-- { n := p.oe[i] - if n.Type == ElementNode { - switch n.DataAtom { - case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc: - for _, except := range exceptions { - if n.Data == except { - break loop - } + if n.Type != ElementNode { + break + } + switch n.DataAtom { + case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc: + for _, except := range exceptions { + if n.Data == except { + break loop } - continue } + continue } break } @@ -369,8 +381,7 @@ findIdenticalElements: // Section 12.2.4.3. func (p *parser) clearActiveFormattingElements() { for { - n := p.afe.pop() - if len(p.afe) == 0 || n.Type == scopeMarkerNode { + if n := p.afe.pop(); len(p.afe) == 0 || n.Type == scopeMarkerNode { return } } @@ -625,29 +636,51 @@ func inHeadIM(p *parser) bool { switch p.tok.DataAtom { case a.Html: return inBodyIM(p) - case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta: + case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta: p.addElement() p.oe.pop() p.acknowledgeSelfClosingTag() return true case a.Noscript: - p.addElement() if p.scripting { - p.setOriginalIM() - p.im = textIM - } else { - p.im = inHeadNoscriptIM + p.parseGenericRawTextElement() + return true } + p.addElement() + p.im = inHeadNoscriptIM + // Don't let the tokenizer go into raw text mode when scripting is disabled. + p.tokenizer.NextIsNotRawText() return true - case a.Script, a.Title, a.Noframes, a.Style: + case a.Script, a.Title: p.addElement() p.setOriginalIM() p.im = textIM return true + case a.Noframes, a.Style: + p.parseGenericRawTextElement() + return true case a.Head: // Ignore the token. return true case a.Template: + // TODO: remove this divergence from the HTML5 spec. + // + // We don't handle all of the corner cases when mixing foreign + // content (i.e. or ) with