Update module github.com/PuerkitoBio/goquery to v1.7.1

author: renovate <renovate-bot@autistici.org> 2021-07-11 14:26:26 +0000
committer: renovate <renovate-bot@autistici.org> 2021-07-11 14:26:26 +0000
commit: 557f9d889812976293b4a668c190e0e1e0332857 (patch)
tree: 9d2e82374a063f3b568110e83ccb1b285f3247f1 /vendor/golang.org/x/net/html
parent: 877afafd950b84242204499b3ed8c1b2c8c75f31 (diff)
download: crawl-557f9d889812976293b4a668c190e0e1e0332857.tar.gz
crawl-557f9d889812976293b4a668c190e0e1e0332857.zip
6 files changed, 279 insertions, 229 deletions
diff --git a/vendor/golang.org/x/net/html/const.go b/vendor/golang.org/x/net/html/const.go
index a3a918f..ff7acf2 100644
--- a/vendor/golang.org/x/net/html/const.go
+++ b/vendor/golang.org/x/net/html/const.go
@@ -52,8 +52,7 @@ var isSpecialElementMap = map[string]bool{
 	"iframe":     true,
 	"img":        true,
 	"input":      true,
-	"isindex":    true, // The 'isindex' element has been removed, but keep it for backwards compatibility.
-	"keygen":     true,
+	"keygen":     true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility.
 	"li":         true,
 	"link":       true,
 	"listing":    true,
diff --git a/vendor/golang.org/x/net/html/foreign.go b/vendor/golang.org/x/net/html/foreign.go
index 01477a9..9da9e9d 100644
--- a/vendor/golang.org/x/net/html/foreign.go
+++ b/vendor/golang.org/x/net/html/foreign.go
@@ -161,66 +161,62 @@ var mathMLAttributeAdjustments = map[string]string{
 }
 
 var svgAttributeAdjustments = map[string]string{
-	"attributename":             "attributeName",
-	"attributetype":             "attributeType",
-	"basefrequency":             "baseFrequency",
-	"baseprofile":               "baseProfile",
-	"calcmode":                  "calcMode",
-	"clippathunits":             "clipPathUnits",
-	"contentscripttype":         "contentScriptType",
-	"contentstyletype":          "contentStyleType",
-	"diffuseconstant":           "diffuseConstant",
-	"edgemode":                  "edgeMode",
-	"externalresourcesrequired": "externalResourcesRequired",
-	"filterres":                 "filterRes",
-	"filterunits":               "filterUnits",
-	"glyphref":                  "glyphRef",
-	"gradienttransform":         "gradientTransform",
-	"gradientunits":             "gradientUnits",
-	"kernelmatrix":              "kernelMatrix",
-	"kernelunitlength":          "kernelUnitLength",
-	"keypoints":                 "keyPoints",
-	"keysplines":                "keySplines",
-	"keytimes":                  "keyTimes",
-	"lengthadjust":              "lengthAdjust",
-	"limitingconeangle":         "limitingConeAngle",
-	"markerheight":              "markerHeight",
-	"markerunits":               "markerUnits",
-	"markerwidth":               "markerWidth",
-	"maskcontentunits":          "maskContentUnits",
-	"maskunits":                 "maskUnits",
-	"numoctaves":                "numOctaves",
-	"pathlength":                "pathLength",
-	"patterncontentunits":       "patternContentUnits",
-	"patterntransform":          "patternTransform",
-	"patternunits":              "patternUnits",
-	"pointsatx":                 "pointsAtX",
-	"pointsaty":                 "pointsAtY",
-	"pointsatz":                 "pointsAtZ",
-	"preservealpha":             "preserveAlpha",
-	"preserveaspectratio":       "preserveAspectRatio",
-	"primitiveunits":            "primitiveUnits",
-	"refx":                      "refX",
-	"refy":                      "refY",
-	"repeatcount":               "repeatCount",
-	"repeatdur":                 "repeatDur",
-	"requiredextensions":        "requiredExtensions",
-	"requiredfeatures":          "requiredFeatures",
-	"specularconstant":          "specularConstant",
-	"specularexponent":          "specularExponent",
-	"spreadmethod":              "spreadMethod",
-	"startoffset":               "startOffset",
-	"stddeviation":              "stdDeviation",
-	"stitchtiles":               "stitchTiles",
-	"surfacescale":              "surfaceScale",
-	"systemlanguage":            "systemLanguage",
-	"tablevalues":               "tableValues",
-	"targetx":                   "targetX",
-	"targety":                   "targetY",
-	"textlength":                "textLength",
-	"viewbox":                   "viewBox",
-	"viewtarget":                "viewTarget",
-	"xchannelselector":          "xChannelSelector",
-	"ychannelselector":          "yChannelSelector",
-	"zoomandpan":                "zoomAndPan",
+	"attributename":       "attributeName",
+	"attributetype":       "attributeType",
+	"basefrequency":       "baseFrequency",
+	"baseprofile":         "baseProfile",
+	"calcmode":            "calcMode",
+	"clippathunits":       "clipPathUnits",
+	"diffuseconstant":     "diffuseConstant",
+	"edgemode":            "edgeMode",
+	"filterunits":         "filterUnits",
+	"glyphref":            "glyphRef",
+	"gradienttransform":   "gradientTransform",
+	"gradientunits":       "gradientUnits",
+	"kernelmatrix":        "kernelMatrix",
+	"kernelunitlength":    "kernelUnitLength",
+	"keypoints":           "keyPoints",
+	"keysplines":          "keySplines",
+	"keytimes":            "keyTimes",
+	"lengthadjust":        "lengthAdjust",
+	"limitingconeangle":   "limitingConeAngle",
+	"markerheight":        "markerHeight",
+	"markerunits":         "markerUnits",
+	"markerwidth":         "markerWidth",
+	"maskcontentunits":    "maskContentUnits",
+	"maskunits":           "maskUnits",
+	"numoctaves":          "numOctaves",
+	"pathlength":          "pathLength",
+	"patterncontentunits": "patternContentUnits",
+	"patterntransform":    "patternTransform",
+	"patternunits":        "patternUnits",
+	"pointsatx":           "pointsAtX",
+	"pointsaty":           "pointsAtY",
+	"pointsatz":           "pointsAtZ",
+	"preservealpha":       "preserveAlpha",
+	"preserveaspectratio": "preserveAspectRatio",
+	"primitiveunits":      "primitiveUnits",
+	"refx":                "refX",
+	"refy":                "refY",
+	"repeatcount":         "repeatCount",
+	"repeatdur":           "repeatDur",
+	"requiredextensions":  "requiredExtensions",
+	"requiredfeatures":    "requiredFeatures",
+	"specularconstant":    "specularConstant",
+	"specularexponent":    "specularExponent",
+	"spreadmethod":        "spreadMethod",
+	"startoffset":         "startOffset",
+	"stddeviation":        "stdDeviation",
+	"stitchtiles":         "stitchTiles",
+	"surfacescale":        "surfaceScale",
+	"systemlanguage":      "systemLanguage",
+	"tablevalues":         "tableValues",
+	"targetx":             "targetX",
+	"targety":             "targetY",
+	"textlength":          "textLength",
+	"viewbox":             "viewBox",
+	"viewtarget":          "viewTarget",
+	"xchannelselector":    "xChannelSelector",
+	"ychannelselector":    "yChannelSelector",
+	"zoomandpan":          "zoomAndPan",
 }
diff --git a/vendor/golang.org/x/net/html/node.go b/vendor/golang.org/x/net/html/node.go
index 633ee15..1350eef 100644
--- a/vendor/golang.org/x/net/html/node.go
+++ b/vendor/golang.org/x/net/html/node.go
@@ -18,6 +18,11 @@ const (
 	ElementNode
 	CommentNode
 	DoctypeNode
+	// RawNode nodes are not returned by the parser, but can be part of the
+	// Node tree passed to func Render to insert raw HTML (without escaping).
+	// If so, this package makes no guarantee that the rendered HTML is secure
+	// (from e.g. Cross Site Scripting attacks) or well-formed.
+	RawNode
 	scopeMarkerNode
 )
 
diff --git a/vendor/golang.org/x/net/html/parse.go b/vendor/golang.org/x/net/html/parse.go
index 992cff2..038941d 100644
--- a/vendor/golang.org/x/net/html/parse.go
+++ b/vendor/golang.org/x/net/html/parse.go
@@ -184,6 +184,17 @@ func (p *parser) clearStackToContext(s scope) {
 	}
 }
 
+// parseGenericRawTextElements implements the generic raw text element parsing
+// algorithm defined in 12.2.6.2.
+// https://html.spec.whatwg.org/multipage/parsing.html#parsing-elements-that-contain-only-text
+// TODO: Since both RAWTEXT and RCDATA states are treated as tokenizer's part
+// officially, need to make tokenizer consider both states.
+func (p *parser) parseGenericRawTextElement() {
+	p.addElement()
+	p.originalIM = p.im
+	p.im = textIM
+}
+
 // generateImpliedEndTags pops nodes off the stack of open elements as long as
 // the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc.
 // If exceptions are specified, nodes with that name will not be popped off.
@@ -192,16 +203,17 @@ func (p *parser) generateImpliedEndTags(exceptions ...string) {
 loop:
 	for i = len(p.oe) - 1; i >= 0; i-- {
 		n := p.oe[i]
-		if n.Type == ElementNode {
-			switch n.DataAtom {
-			case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
-				for _, except := range exceptions {
-					if n.Data == except {
-						break loop
-					}
+		if n.Type != ElementNode {
+			break
+		}
+		switch n.DataAtom {
+		case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
+			for _, except := range exceptions {
+				if n.Data == except {
+					break loop
 				}
-				continue
 			}
+			continue
 		}
 		break
 	}
@@ -369,8 +381,7 @@ findIdenticalElements:
 // Section 12.2.4.3.
 func (p *parser) clearActiveFormattingElements() {
 	for {
-		n := p.afe.pop()
-		if len(p.afe) == 0 || n.Type == scopeMarkerNode {
+		if n := p.afe.pop(); len(p.afe) == 0 || n.Type == scopeMarkerNode {
 			return
 		}
 	}
@@ -625,29 +636,51 @@ func inHeadIM(p *parser) bool {
 		switch p.tok.DataAtom {
 		case a.Html:
 			return inBodyIM(p)
-		case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta:
+		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta:
 			p.addElement()
 			p.oe.pop()
 			p.acknowledgeSelfClosingTag()
 			return true
 		case a.Noscript:
-			p.addElement()
 			if p.scripting {
-				p.setOriginalIM()
-				p.im = textIM
-			} else {
-				p.im = inHeadNoscriptIM
+				p.parseGenericRawTextElement()
+				return true
 			}
+			p.addElement()
+			p.im = inHeadNoscriptIM
+			// Don't let the tokenizer go into raw text mode when scripting is disabled.
+			p.tokenizer.NextIsNotRawText()
 			return true
-		case a.Script, a.Title, a.Noframes, a.Style:
+		case a.Script, a.Title:
 			p.addElement()
 			p.setOriginalIM()
 			p.im = textIM
 			return true
+		case a.Noframes, a.Style:
+			p.parseGenericRawTextElement()
+			return true
 		case a.Head:
 			// Ignore the token.
 			return true
 		case a.Template:
+			// TODO: remove this divergence from the HTML5 spec.
+			//
+			// We don't handle all of the corner cases when mixing foreign
+			// content (i.e. <math> or <svg>) with <template>. Without this
+			// early return, we can get into an infinite loop, possibly because
+			// of the "TODO... further divergence" a little below.
+			//
+			// As a workaround, if we are mixing foreign content and templates,
+			// just ignore the rest of the HTML. Foreign content is rare and a
+			// relatively old HTML feature. Templates are also rare and a
+			// relatively new HTML feature. Their combination is very rare.
+			for _, e := range p.oe {
+				if e.Namespace != "" {
+					p.im = ignoreTheRemainingTokens
+					return true
+				}
+			}
+
 			p.addElement()
 			p.afe = append(p.afe, &scopeMarker)
 			p.framesetOK = false
@@ -668,7 +701,7 @@ func inHeadIM(p *parser) bool {
 			if !p.oe.contains(a.Template) {
 				return true
 			}
-			// TODO: remove this divergence from the HTML5 spec.
+			// TODO: remove this further divergence from the HTML5 spec.
 			//
 			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
 			p.generateImpliedEndTags()
@@ -713,7 +746,13 @@ func inHeadNoscriptIM(p *parser) bool {
 			return inBodyIM(p)
 		case a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Style:
 			return inHeadIM(p)
-		case a.Head, a.Noscript:
+		case a.Head:
+			// Ignore the token.
+			return true
+		case a.Noscript:
+			// Don't let the tokenizer go into raw text mode even when a <noscript>
+			// tag is in "in head noscript" insertion mode.
+			p.tokenizer.NextIsNotRawText()
 			// Ignore the token.
 			return true
 		}
@@ -855,7 +894,7 @@ func inBodyIM(p *parser) bool {
 				return true
 			}
 			copyAttributes(p.oe[0], p.tok)
-		case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
+		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
 			return inHeadIM(p)
 		case a.Body:
 			if p.oe.contains(a.Template) {
@@ -881,7 +920,7 @@ func inBodyIM(p *parser) bool {
 			p.addElement()
 			p.im = inFramesetIM
 			return true
-		case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
+		case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Main, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
 			p.popUntil(buttonScope, a.P)
 			p.addElement()
 		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
@@ -1014,53 +1053,6 @@ func inBodyIM(p *parser) bool {
 			p.tok.DataAtom = a.Img
 			p.tok.Data = a.Img.String()
 			return false
-		case a.Isindex:
-			if p.form != nil {
-				// Ignore the token.
-				return true
-			}
-			action := ""
-			prompt := "This is a searchable index. Enter search keywords: "
-			attr := []Attribute{{Key: "name", Val: "isindex"}}
-			for _, t := range p.tok.Attr {
-				switch t.Key {
-				case "action":
-					action = t.Val
-				case "name":
-					// Ignore the attribute.
-				case "prompt":
-					prompt = t.Val
-				default:
-					attr = append(attr, t)
-				}
-			}
-			p.acknowledgeSelfClosingTag()
-			p.popUntil(buttonScope, a.P)
-			p.parseImpliedToken(StartTagToken, a.Form, a.Form.String())
-			if p.form == nil {
-				// NOTE: The 'isindex' element has been removed,
-				// and the 'template' element has not been designed to be
-				// collaborative with the index element.
-				//
-				// Ignore the token.
-				return true
-			}
-			if action != "" {
-				p.form.Attr = []Attribute{{Key: "action", Val: action}}
-			}
-			p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
-			p.parseImpliedToken(StartTagToken, a.Label, a.Label.String())
-			p.addText(prompt)
-			p.addChild(&Node{
-				Type:     ElementNode,
-				DataAtom: a.Input,
-				Data:     a.Input.String(),
-				Attr:     attr,
-			})
-			p.oe.pop()
-			p.parseImpliedToken(EndTagToken, a.Label, a.Label.String())
-			p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
-			p.parseImpliedToken(EndTagToken, a.Form, a.Form.String())
 		case a.Textarea:
 			p.addElement()
 			p.setOriginalIM()
@@ -1070,18 +1062,21 @@ func inBodyIM(p *parser) bool {
 			p.popUntil(buttonScope, a.P)
 			p.reconstructActiveFormattingElements()
 			p.framesetOK = false
-			p.addElement()
-			p.setOriginalIM()
-			p.im = textIM
+			p.parseGenericRawTextElement()
 		case a.Iframe:
 			p.framesetOK = false
+			p.parseGenericRawTextElement()
+		case a.Noembed:
+			p.parseGenericRawTextElement()
+		case a.Noscript:
+			if p.scripting {
+				p.parseGenericRawTextElement()
+				return true
+			}
+			p.reconstructActiveFormattingElements()
 			p.addElement()
-			p.setOriginalIM()
-			p.im = textIM
-		case a.Noembed, a.Noscript:
-			p.addElement()
-			p.setOriginalIM()
-			p.im = textIM
+			// Don't let the tokenizer go into raw text mode when scripting is disabled.
+			p.tokenizer.NextIsNotRawText()
 		case a.Select:
 			p.reconstructActiveFormattingElements()
 			p.addElement()
@@ -1137,7 +1132,7 @@ func inBodyIM(p *parser) bool {
 				return false
 			}
 			return true
-		case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
+		case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Main, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
 			p.popUntil(defaultScope, p.tok.DataAtom)
 		case a.Form:
 			if p.oe.contains(a.Template) {
@@ -1198,14 +1193,13 @@ func inBodyIM(p *parser) bool {
 		if len(p.templateStack) > 0 {
 			p.im = inTemplateIM
 			return false
-		} else {
-			for _, e := range p.oe {
-				switch e.DataAtom {
-				case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
-					a.Thead, a.Tr, a.Body, a.Html:
-				default:
-					return true
-				}
+		}
+		for _, e := range p.oe {
+			switch e.DataAtom {
+			case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
+				a.Thead, a.Tr, a.Body, a.Html:
+			default:
+				return true
 			}
 		}
 	}
@@ -1221,9 +1215,15 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
 	// Once the code successfully parses the comprehensive test suite, we should
 	// refactor this code to be more idiomatic.
 
-	// Steps 1-4. The outer loop.
+	// Steps 1-2
+	if current := p.oe.top(); current.Data == tagName && p.afe.index(current) == -1 {
+		p.oe.pop()
+		return
+	}
+
+	// Steps 3-5. The outer loop.
 	for i := 0; i < 8; i++ {
-		// Step 5. Find the formatting element.
+		// Step 6. Find the formatting element.
 		var formattingElement *Node
 		for j := len(p.afe) - 1; j >= 0; j-- {
 			if p.afe[j].Type == scopeMarkerNode {
@@ -1238,17 +1238,22 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
 			p.inBodyEndTagOther(tagAtom, tagName)
 			return
 		}
+
+		// Step 7. Ignore the tag if formatting element is not in the stack of open elements.
 		feIndex := p.oe.index(formattingElement)
 		if feIndex == -1 {
 			p.afe.remove(formattingElement)
 			return
 		}
+		// Step 8. Ignore the tag if formatting element is not in the scope.
 		if !p.elementInScope(defaultScope, tagAtom) {
 			// Ignore the tag.
 			return
 		}
 
-		// Steps 9-10. Find the furthest block.
+		// Step 9. This step is omitted because it's just a parse error but no need to return.
+
+		// Steps 10-11. Find the furthest block.
 		var furthestBlock *Node
 		for _, e := range p.oe[feIndex:] {
 			if isSpecialElement(e) {
@@ -1265,47 +1270,65 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
 			return
 		}
 
-		// Steps 11-12. Find the common ancestor and bookmark node.
+		// Steps 12-13. Find the common ancestor and bookmark node.
 		commonAncestor := p.oe[feIndex-1]
 		bookmark := p.afe.index(formattingElement)
 
-		// Step 13. The inner loop. Find the lastNode to reparent.
+		// Step 14. The inner loop. Find the lastNode to reparent.
 		lastNode := furthestBlock
 		node := furthestBlock
 		x := p.oe.index(node)
-		// Steps 13.1-13.2
-		for j := 0; j < 3; j++ {
-			// Step 13.3.
+		// Step 14.1.
+		j := 0
+		for {
+			// Step 14.2.
+			j++
+			// Step. 14.3.
 			x--
 			node = p.oe[x]
-			// Step 13.4 - 13.5.
+			// Step 14.4. Go to the next step if node is formatting element.
+			if node == formattingElement {
+				break
+			}
+			// Step 14.5. Remove node from the list of active formatting elements if
+			// inner loop counter is greater than three and node is in the list of
+			// active formatting elements.
+			if ni := p.afe.index(node); j > 3 && ni > -1 {
+				p.afe.remove(node)
+				// If any element of the list of active formatting elements is removed,
+				// we need to take care whether bookmark should be decremented or not.
+				// This is because the value of bookmark may exceed the size of the
+				// list by removing elements from the list.
+				if ni <= bookmark {
+					bookmark--
+				}
+				continue
+			}
+			// Step 14.6. Continue the next inner loop if node is not in the list of
+			// active formatting elements.
 			if p.afe.index(node) == -1 {
 				p.oe.remove(node)
 				continue
 			}
-			// Step 13.6.
-			if node == formattingElement {
-				break
-			}
-			// Step 13.7.
+			// Step 14.7.
 			clone := node.clone()
 			p.afe[p.afe.index(node)] = clone
 			p.oe[p.oe.index(node)] = clone
 			node = clone
-			// Step 13.8.
+			// Step 14.8.
 			if lastNode == furthestBlock {
 				bookmark = p.afe.index(node) + 1
 			}
-			// Step 13.9.
+			// Step 14.9.
 			if lastNode.Parent != nil {
 				lastNode.Parent.RemoveChild(lastNode)
 			}
 			node.AppendChild(lastNode)
-			// Step 13.10.
+			// Step 14.10.
 			lastNode = node
 		}
 
-		// Step 14. Reparent lastNode to the common ancestor,
+		// Step 15. Reparent lastNode to the common ancestor,
 		// or for misnested table nodes, to the foster parent.
 		if lastNode.Parent != nil {
 			lastNode.Parent.RemoveChild(lastNode)
@@ -1317,13 +1340,13 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
 			commonAncestor.AppendChild(lastNode)
 		}
 
-		// Steps 15-17. Reparent nodes from the furthest block's children
+		// Steps 16-18. Reparent nodes from the furthest block's children
 		// to a clone of the formatting element.
 		clone := formattingElement.clone()
 		reparentChildren(clone, furthestBlock)
 		furthestBlock.AppendChild(clone)
 
-		// Step 18. Fix up the list of active formatting elements.
+		// Step 19. Fix up the list of active formatting elements.
 		if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
 			// Move the bookmark with the rest of the list.
 			bookmark--
@@ -1331,7 +1354,7 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
 		p.afe.remove(formattingElement)
 		p.afe.insert(bookmark, clone)
 
-		// Step 19. Fix up the stack of open elements.
+		// Step 20. Fix up the stack of open elements.
 		p.oe.remove(formattingElement)
 		p.oe.insert(p.oe.index(furthestBlock)+1, clone)
 	}
@@ -1502,14 +1525,13 @@ func inCaptionIM(p *parser) bool {
 	case StartTagToken:
 		switch p.tok.DataAtom {
 		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
-			if p.popUntil(tableScope, a.Caption) {
-				p.clearActiveFormattingElements()
-				p.im = inTableIM
-				return false
-			} else {
+			if !p.popUntil(tableScope, a.Caption) {
 				// Ignore the token.
 				return true
 			}
+			p.clearActiveFormattingElements()
+			p.im = inTableIM
+			return false
 		case a.Select:
 			p.reconstructActiveFormattingElements()
 			p.addElement()
@@ -1526,14 +1548,13 @@ func inCaptionIM(p *parser) bool {
 			}
 			return true
 		case a.Table:
-			if p.popUntil(tableScope, a.Caption) {
-				p.clearActiveFormattingElements()
-				p.im = inTableIM
-				return false
-			} else {
+			if !p.popUntil(tableScope, a.Caption) {
 				// Ignore the token.
 				return true
 			}
+			p.clearActiveFormattingElements()
+			p.im = inTableIM
+			return false
 		case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
 			// Ignore the token.
 			return true
@@ -1777,12 +1798,11 @@ func inSelectIM(p *parser) bool {
 			}
 			p.addElement()
 		case a.Select:
-			if p.popUntil(selectScope, a.Select) {
-				p.resetInsertionMode()
-			} else {
+			if !p.popUntil(selectScope, a.Select) {
 				// Ignore the token.
 				return true
 			}
+			p.resetInsertionMode()
 		case a.Input, a.Keygen, a.Textarea:
 			if p.elementInScope(selectScope, a.Select) {
 				p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
@@ -1794,6 +1814,13 @@ func inSelectIM(p *parser) bool {
 			return true
 		case a.Script, a.Template:
 			return inHeadIM(p)
+		case a.Iframe, a.Noembed, a.Noframes, a.Noscript, a.Plaintext, a.Style, a.Title, a.Xmp:
+			// Don't let the tokenizer go into raw text mode when there are raw tags
+			// to be ignored. These tags should be ignored from the tokenizer
+			// properly.
+			p.tokenizer.NextIsNotRawText()
+			// Ignore the token.
+			return true
 		}
 	case EndTagToken:
 		switch p.tok.DataAtom {
@@ -1810,12 +1837,11 @@ func inSelectIM(p *parser) bool {
 				p.oe = p.oe[:i]
 			}
 		case a.Select:
-			if p.popUntil(selectScope, a.Select) {
-				p.resetInsertionMode()
-			} else {
+			if !p.popUntil(selectScope, a.Select) {
 				// Ignore the token.
 				return true
 			}
+			p.resetInsertionMode()
 		case a.Template:
 			return inHeadIM(p)
 		}
@@ -2119,6 +2145,10 @@ func afterAfterFramesetIM(p *parser) bool {
 	return true
 }
 
+func ignoreTheRemainingTokens(p *parser) bool {
+	return true
+}
+
 const whitespaceOrNUL = whitespace + "\x00"
 
 // Section 12.2.6.5
@@ -2136,28 +2166,31 @@ func parseForeignContent(p *parser) bool {
 			Data: p.tok.Data,
 		})
 	case StartTagToken:
-		b := breakout[p.tok.Data]
-		if p.tok.DataAtom == a.Font {
-		loop:
-			for _, attr := range p.tok.Attr {
-				switch attr.Key {
-				case "color", "face", "size":
-					b = true
-					break loop
+		if !p.fragment {
+			b := breakout[p.tok.Data]
+			if p.tok.DataAtom == a.Font {
+			loop:
+				for _, attr := range p.tok.Attr {
+					switch attr.Key {
+					case "color", "face", "size":
+						b = true
+						break loop
+					}
 				}
 			}
-		}
-		if b {
-			for i := len(p.oe) - 1; i >= 0; i-- {
-				n := p.oe[i]
-				if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
-					p.oe = p.oe[:i+1]
-					break
+			if b {
+				for i := len(p.oe) - 1; i >= 0; i-- {
+					n := p.oe[i]
+					if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
+						p.oe = p.oe[:i+1]
+						break
+					}
 				}
+				return false
 			}
-			return false
 		}
-		switch p.top().Namespace {
+		current := p.adjustedCurrentNode()
+		switch current.Namespace {
 		case "math":
 			adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
 		case "svg":
@@ -2172,7 +2205,7 @@ func parseForeignContent(p *parser) bool {
 			panic("html: bad parser state: unexpected namespace")
 		}
 		adjustForeignAttributes(p.tok.Attr)
-		namespace := p.top().Namespace
+		namespace := current.Namespace
 		p.addElement()
 		p.top().Namespace = namespace
 		if namespace != "" {
@@ -2201,12 +2234,20 @@ func parseForeignContent(p *parser) bool {
 	return true
 }
 
+// Section 12.2.4.2.
+func (p *parser) adjustedCurrentNode() *Node {
+	if len(p.oe) == 1 && p.fragment && p.context != nil {
+		return p.context
+	}
+	return p.oe.top()
+}
+
 // Section 12.2.6.
 func (p *parser) inForeignContent() bool {
 	if len(p.oe) == 0 {
 		return false
 	}
-	n := p.oe[len(p.oe)-1]
+	n := p.adjustedCurrentNode()
 	if n.Namespace == "" {
 		return false
 	}
@@ -2341,8 +2382,7 @@ func ParseWithOptions(r io.Reader, opts ...ParseOption) (*Node, error) {
 		f(p)
 	}
 
-	err := p.parse()
-	if err != nil {
+	if err := p.parse(); err != nil {
 		return nil, err
 	}
 	return p.doc, nil
@@ -2364,7 +2404,6 @@ func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) (
 		contextTag = context.DataAtom.String()
 	}
 	p := &parser{
-		tokenizer: NewTokenizerFragment(r, contextTag),
 		doc: &Node{
 			Type: DocumentNode,
 		},
@@ -2372,6 +2411,11 @@ func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) (
 		fragment:  true,
 		context:   context,
 	}
+	if context != nil && context.Namespace != "" {
+		p.tokenizer = NewTokenizer(r)
+	} else {
+		p.tokenizer = NewTokenizerFragment(r, contextTag)
+	}
 
 	for _, f := range opts {
 		f(p)
@@ -2396,8 +2440,7 @@ func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) (
 		}
 	}
 
-	err := p.parse()
-	if err != nil {
+	if err := p.parse(); err != nil {
 		return nil, err
 	}
 
diff --git a/vendor/golang.org/x/net/html/render.go b/vendor/golang.org/x/net/html/render.go
index d34564f..b46d81c 100644
--- a/vendor/golang.org/x/net/html/render.go
+++ b/vendor/golang.org/x/net/html/render.go
@@ -134,6 +134,9 @@ func render1(w writer, n *Node) error {
 			}
 		}
 		return w.WriteByte('>')
+	case RawNode:
+		_, err := w.WriteString(n.Data)
+		return err
 	default:
 		return errors.New("html: unknown node type")
 	}
@@ -252,20 +255,19 @@ func writeQuoted(w writer, s string) error {
 // Section 12.1.2, "Elements", gives this list of void elements. Void elements
 // are those that can't have any contents.
 var voidElements = map[string]bool{
-	"area":    true,
-	"base":    true,
-	"br":      true,
-	"col":     true,
-	"command": true,
-	"embed":   true,
-	"hr":      true,
-	"img":     true,
-	"input":   true,
-	"keygen":  true,
-	"link":    true,
-	"meta":    true,
-	"param":   true,
-	"source":  true,
-	"track":   true,
-	"wbr":     true,
+	"area":   true,
+	"base":   true,
+	"br":     true,
+	"col":    true,
+	"embed":  true,
+	"hr":     true,
+	"img":    true,
+	"input":  true,
+	"keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility.
+	"link":   true,
+	"meta":   true,
+	"param":  true,
+	"source": true,
+	"track":  true,
+	"wbr":    true,
 }
diff --git a/vendor/golang.org/x/net/html/token.go b/vendor/golang.org/x/net/html/token.go
index e3c01d7..877709f 100644
--- a/vendor/golang.org/x/net/html/token.go
+++ b/vendor/golang.org/x/net/html/token.go
@@ -296,8 +296,7 @@ func (z *Tokenizer) Buffered() []byte {
 // too many times in succession.
 func readAtLeastOneByte(r io.Reader, b []byte) (int, error) {
 	for i := 0; i < 100; i++ {
-		n, err := r.Read(b)
-		if n != 0 || err != nil {
+		if n, err := r.Read(b); n != 0 || err != nil {
 			return n, err
 		}
 	}
@@ -347,6 +346,7 @@ loop:
 			break loop
 		}
 		if c != '/' {
+			z.raw.end--
 			continue loop
 		}
 		if z.readRawEndTag() || z.err != nil {
@@ -1067,6 +1067,11 @@ loop:
 
 // Raw returns the unmodified text of the current token. Calling Next, Token,
 // Text, TagName or TagAttr may change the contents of the returned slice.
+//
+// The token stream's raw bytes partition the byte stream (up until an
+// ErrorToken). There are no overlaps or gaps between two consecutive token's
+// raw bytes. One implication is that the byte offset of the current token is
+// the sum of the lengths of all previous tokens' raw bytes.
 func (z *Tokenizer) Raw() []byte {
 	return z.buf[z.raw.start:z.raw.end]
 }
author	renovate <renovate-bot@autistici.org>	2021-07-11 14:26:26 +0000
committer	renovate <renovate-bot@autistici.org>	2021-07-11 14:26:26 +0000
commit	557f9d889812976293b4a668c190e0e1e0332857 (patch)
tree	9d2e82374a063f3b568110e83ccb1b285f3247f1 /vendor/golang.org/x/net/html
parent	877afafd950b84242204499b3ed8c1b2c8c75f31 (diff)
download	crawl-557f9d889812976293b4a668c190e0e1e0332857.tar.gz crawl-557f9d889812976293b4a668c190e0e1e0332857.zip