aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Balholm <andybalholm@gmail.com>2011-10-23 18:36:01 +1100
committerNigel Tao <nigeltao@golang.org>2011-10-23 18:36:01 +1100
commit2aa589c8438debaef249e7fbcd9dd3fa0546c9c8 (patch)
tree75cd9555b2200bd70abf1158982842dd1dc9ef7f
parent2f352ae48abf1a714f7b3bfb097fab6451067599 (diff)
downloadgo-2aa589c8438debaef249e7fbcd9dd3fa0546c9c8.tar.gz
go-2aa589c8438debaef249e7fbcd9dd3fa0546c9c8.zip
html: implement foster parenting
Implement the foster-parenting algorithm for content that is inside a table but not in a cell. Also fix a bug in reconstructing the active formatting elements. Pass test 30 in tests1.dat: <a><table><td><a><table></table><a></tr><a></table><b>X</b>C<a>Y R=nigeltao CC=golang-dev https://golang.org/cl/5309052
-rw-r--r--src/pkg/html/parse.go123
-rw-r--r--src/pkg/html/parse_test.go9
2 files changed, 103 insertions, 29 deletions
diff --git a/src/pkg/html/parse.go b/src/pkg/html/parse.go
index 5b25e2620d..2c7294b4f3 100644
--- a/src/pkg/html/parse.go
+++ b/src/pkg/html/parse.go
@@ -32,6 +32,9 @@ type parser struct {
// originalIM is the insertion mode to go back to after completing a text
// or inTableText insertion mode.
originalIM insertionMode
+ // fosterParenting is whether new elements should be inserted according to
+ // the foster parenting rules (section 11.2.5.3).
+ fosterParenting bool
}
func (p *parser) top() *Node {
@@ -103,12 +106,56 @@ func (p *parser) elementInScope(stopTags []string, matchTags ...string) bool {
// addChild adds a child node n to the top element, and pushes n onto the stack
// of open elements if it is an element node.
func (p *parser) addChild(n *Node) {
- p.top().Add(n)
+ if p.fosterParenting {
+ p.fosterParent(n)
+ } else {
+ p.top().Add(n)
+ }
+
if n.Type == ElementNode {
p.oe = append(p.oe, n)
}
}
+// fosterParent adds a child node according to the foster parenting rules.
+// Section 11.2.5.3, "foster parenting".
+func (p *parser) fosterParent(n *Node) {
+ var table, parent *Node
+ var i int
+ for i = len(p.oe) - 1; i >= 0; i-- {
+ if p.oe[i].Data == "table" {
+ table = p.oe[i]
+ break
+ }
+ }
+
+ if table == nil {
+ // The foster parent is the html element.
+ parent = p.oe[0]
+ } else {
+ parent = table.Parent
+ }
+ if parent == nil {
+ parent = p.oe[i-1]
+ }
+
+ var child *Node
+ for i, child = range parent.Child {
+ if child == table {
+ break
+ }
+ }
+
+ if i == len(parent.Child) {
+ parent.Add(n)
+ } else {
+ // Insert n into parent.Child at index i.
+ parent.Child = append(parent.Child[:i+1], parent.Child[i:]...)
+ parent.Child[i] = n
+ n.Parent = parent
+ }
+}
+
// addText adds text to the preceding node if it is a text node, or else it
// calls addChild with a new text node.
func (p *parser) addText(text string) {
@@ -170,9 +217,9 @@ func (p *parser) reconstructActiveFormattingElements() {
}
for {
i++
- n = p.afe[i]
- p.addChild(n.clone())
- p.afe[i] = n
+ clone := p.afe[i].clone()
+ p.addChild(clone)
+ p.afe[i] = clone
if i == len(p.afe)-1 {
break
}
@@ -536,10 +583,7 @@ func inBodyIM(p *parser) (insertionMode, bool) {
case "a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u":
p.inBodyEndTagFormatting(p.tok.Data)
default:
- // TODO: any other end tag
- if p.tok.Data == p.top().Data {
- p.oe.pop()
- }
+ p.inBodyEndTagOther(p.tok.Data)
}
case CommentToken:
p.addChild(&Node{
@@ -573,6 +617,7 @@ func (p *parser) inBodyEndTagFormatting(tag string) {
}
}
if formattingElement == nil {
+ p.inBodyEndTagOther(tag)
return
}
feIndex := p.oe.index(formattingElement)
@@ -645,8 +690,7 @@ func (p *parser) inBodyEndTagFormatting(tag string) {
}
switch commonAncestor.Data {
case "table", "tbody", "tfoot", "thead", "tr":
- // TODO: fix up misnested table nodes; find the foster parent.
- fallthrough
+ p.fosterParent(lastNode)
default:
commonAncestor.Add(lastNode)
}
@@ -667,6 +711,19 @@ func (p *parser) inBodyEndTagFormatting(tag string) {
}
}
+// inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
+func (p *parser) inBodyEndTagOther(tag string) {
+ for i := len(p.oe) - 1; i >= 0; i-- {
+ if p.oe[i].Data == tag {
+ p.oe = p.oe[:i]
+ break
+ }
+ if isSpecialElement[p.oe[i].Data] {
+ break
+ }
+ }
+}
+
// Section 11.2.5.4.8.
func textIM(p *parser) (insertionMode, bool) {
switch p.tok.Type {
@@ -683,12 +740,6 @@ func textIM(p *parser) (insertionMode, bool) {
// Section 11.2.5.4.9.
func inTableIM(p *parser) (insertionMode, bool) {
- var (
- add bool
- data string
- attr []Attribute
- consumed bool
- )
switch p.tok.Type {
case ErrorToken:
// Stop parsing.
@@ -698,13 +749,19 @@ func inTableIM(p *parser) (insertionMode, bool) {
case StartTagToken:
switch p.tok.Data {
case "tbody", "tfoot", "thead":
- add = true
- data = p.tok.Data
- attr = p.tok.Attr
- consumed = true
+ p.clearStackToTableContext()
+ p.addElement(p.tok.Data, p.tok.Attr)
+ return inTableBodyIM, true
case "td", "th", "tr":
- add = true
- data = "tbody"
+ p.clearStackToTableContext()
+ p.addElement("tbody", nil)
+ return inTableBodyIM, false
+ case "table":
+ if p.popUntil(tableScopeStopTags, "table") {
+ return p.resetInsertionMode(), false
+ }
+ // Ignore the token.
+ return inTableIM, true
default:
// TODO.
}
@@ -727,13 +784,23 @@ func inTableIM(p *parser) (insertionMode, bool) {
})
return inTableIM, true
}
- if add {
- // TODO: clear the stack back to a table context.
- p.addElement(data, attr)
- return inTableBodyIM, consumed
+
+ switch p.top().Data {
+ case "table", "tbody", "tfoot", "thead", "tr":
+ p.fosterParenting = true
+ defer func() { p.fosterParenting = false }()
+ }
+
+ return useTheRulesFor(p, inTableIM, inBodyIM)
+}
+
+func (p *parser) clearStackToTableContext() {
+ for i := len(p.oe) - 1; i >= 0; i-- {
+ if x := p.oe[i].Data; x == "table" || x == "html" {
+ p.oe = p.oe[:i+1]
+ return
+ }
}
- // TODO: return useTheRulesFor(inTableIM, inBodyIM, p) unless etc. etc. foster parenting.
- return inTableIM, true
}
// Section 11.2.5.4.13.
diff --git a/src/pkg/html/parse_test.go b/src/pkg/html/parse_test.go
index 2c56ffd6ad..652bf805de 100644
--- a/src/pkg/html/parse_test.go
+++ b/src/pkg/html/parse_test.go
@@ -123,7 +123,7 @@ func TestParser(t *testing.T) {
rc := make(chan io.Reader)
go readDat(filename, rc)
// TODO(nigeltao): Process all test cases, not just a subset.
- for i := 0; i < 30; i++ {
+ for i := 0; i < 31; i++ {
// Parse the #data section.
b, err := ioutil.ReadAll(<-rc)
if err != nil {
@@ -152,6 +152,13 @@ func TestParser(t *testing.T) {
continue
}
// Check that rendering and re-parsing results in an identical tree.
+ if filename == "tests1.dat" && i == 30 {
+ // Test 30 in tests1.dat is such messed-up markup that a correct parse
+ // results in a non-conforming tree (one <a> element nested inside another).
+ // Therefore when it is rendered and re-parsed, it isn't the same.
+ // So we skip rendering on that test.
+ continue
+ }
pr, pw := io.Pipe()
go func() {
pw.CloseWithError(Render(pw, doc))