aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Balholm <andybalholm@gmail.com>2011-10-26 11:36:46 +1100
committerNigel Tao <nigeltao@golang.org>2011-10-26 11:36:46 +1100
commit6e318bda6c4236caf5a7f02d5ce545f5365094e0 (patch)
tree720326c113118f3edb9215bb91a7663093cd3687
parent7959aeb0f9d143986f1230d907280b3b9de7f1f7 (diff)
downloadgo-6e318bda6c4236caf5a7f02d5ce545f5365094e0.tar.gz
go-6e318bda6c4236caf5a7f02d5ce545f5365094e0.zip
html: improve parsing of tables
When foster parenting, merge adjacent text nodes. Properly close table row at </tr> tag. Pass tests1.dat, test 32: <!-----><font><div>hello<table>excite!<b>me!<th><i>please!</tr><!--X--> | <!-- - --> | <html> | <head> | <body> | <font> | <div> | "helloexcite!" | <b> | "me!" | <table> | <tbody> | <tr> | <th> | <i> | "please!" | <!-- X --> R=nigeltao CC=golang-dev https://golang.org/cl/5323048
-rw-r--r--src/pkg/html/parse.go33
-rw-r--r--src/pkg/html/parse_test.go2
2 files changed, 27 insertions, 8 deletions
diff --git a/src/pkg/html/parse.go b/src/pkg/html/parse.go
index d1d4e483c5..292fbaf6be 100644
--- a/src/pkg/html/parse.go
+++ b/src/pkg/html/parse.go
@@ -52,6 +52,11 @@ var (
tableScopeStopTags = []string{"html", "table"}
)
+// stopTags for use in clearStackToContext.
+var (
+ tableRowContextStopTags = []string{"tr", "html"}
+)
+
// popUntil pops the stack of open elements at the highest element whose tag
// is in matchTags, provided there is no higher element in stopTags. It returns
// whether or not there was such an element. If there was not, popUntil leaves
@@ -146,6 +151,11 @@ func (p *parser) fosterParent(n *Node) {
}
}
+ if i > 0 && parent.Child[i-1].Type == TextNode && n.Type == TextNode {
+ parent.Child[i-1].Data += n.Data
+ return
+ }
+
if i == len(parent.Child) {
parent.Add(n)
} else {
@@ -749,11 +759,11 @@ func inTableIM(p *parser) (insertionMode, bool) {
case StartTagToken:
switch p.tok.Data {
case "tbody", "tfoot", "thead":
- p.clearStackToTableContext()
+ p.clearStackToContext(tableScopeStopTags)
p.addElement(p.tok.Data, p.tok.Attr)
return inTableBodyIM, true
case "td", "th", "tr":
- p.clearStackToTableContext()
+ p.clearStackToContext(tableScopeStopTags)
p.addElement("tbody", nil)
return inTableBodyIM, false
case "table":
@@ -794,11 +804,15 @@ func inTableIM(p *parser) (insertionMode, bool) {
return useTheRulesFor(p, inTableIM, inBodyIM)
}
-func (p *parser) clearStackToTableContext() {
+// clearStackToContext pops elements off the stack of open elements
+// until an element listed in stopTags is found.
+func (p *parser) clearStackToContext(stopTags []string) {
for i := len(p.oe) - 1; i >= 0; i-- {
- if x := p.oe[i].Data; x == "table" || x == "html" {
- p.oe = p.oe[:i+1]
- return
+ for _, tag := range stopTags {
+ if p.oe[i].Data == tag {
+ p.oe = p.oe[:i+1]
+ return
+ }
}
}
}
@@ -877,7 +891,12 @@ func inRowIM(p *parser) (insertionMode, bool) {
case EndTagToken:
switch p.tok.Data {
case "tr":
- // TODO.
+ if !p.elementInScope(tableScopeStopTags, "tr") {
+ return inRowIM, true
+ }
+ p.clearStackToContext(tableRowContextStopTags)
+ p.oe.pop()
+ return inTableBodyIM, true
case "table":
if p.popUntil(tableScopeStopTags, "tr") {
return inTableBodyIM, false
diff --git a/src/pkg/html/parse_test.go b/src/pkg/html/parse_test.go
index beba98d3ad..865a47dea1 100644
--- a/src/pkg/html/parse_test.go
+++ b/src/pkg/html/parse_test.go
@@ -132,7 +132,7 @@ func TestParser(t *testing.T) {
rc := make(chan io.Reader)
go readDat(filename, rc)
// TODO(nigeltao): Process all test cases, not just a subset.
- for i := 0; i < 32; i++ {
+ for i := 0; i < 33; i++ {
// Parse the #data section.
b, err := ioutil.ReadAll(<-rc)
if err != nil {