diff options
author | Andrew Balholm <andybalholm@gmail.com> | 2011-10-26 11:36:46 +1100 |
---|---|---|
committer | Nigel Tao <nigeltao@golang.org> | 2011-10-26 11:36:46 +1100 |
commit | 6e318bda6c4236caf5a7f02d5ce545f5365094e0 (patch) | |
tree | 720326c113118f3edb9215bb91a7663093cd3687 | |
parent | 7959aeb0f9d143986f1230d907280b3b9de7f1f7 (diff) | |
download | go-6e318bda6c4236caf5a7f02d5ce545f5365094e0.tar.gz go-6e318bda6c4236caf5a7f02d5ce545f5365094e0.zip |
html: improve parsing of tables
When foster parenting, merge adjacent text nodes.
Properly close table row at </tr> tag.
Pass tests1.dat, test 32:
<!-----><font><div>hello<table>excite!<b>me!<th><i>please!</tr><!--X-->
| <!-- - -->
| <html>
| <head>
| <body>
| <font>
| <div>
| "helloexcite!"
| <b>
| "me!"
| <table>
| <tbody>
| <tr>
| <th>
| <i>
| "please!"
| <!-- X -->
R=nigeltao
CC=golang-dev
https://golang.org/cl/5323048
-rw-r--r-- | src/pkg/html/parse.go | 33 | ||||
-rw-r--r-- | src/pkg/html/parse_test.go | 2 |
2 files changed, 27 insertions, 8 deletions
diff --git a/src/pkg/html/parse.go b/src/pkg/html/parse.go index d1d4e483c5..292fbaf6be 100644 --- a/src/pkg/html/parse.go +++ b/src/pkg/html/parse.go @@ -52,6 +52,11 @@ var ( tableScopeStopTags = []string{"html", "table"} ) +// stopTags for use in clearStackToContext. +var ( + tableRowContextStopTags = []string{"tr", "html"} +) + // popUntil pops the stack of open elements at the highest element whose tag // is in matchTags, provided there is no higher element in stopTags. It returns // whether or not there was such an element. If there was not, popUntil leaves @@ -146,6 +151,11 @@ func (p *parser) fosterParent(n *Node) { } } + if i > 0 && parent.Child[i-1].Type == TextNode && n.Type == TextNode { + parent.Child[i-1].Data += n.Data + return + } + if i == len(parent.Child) { parent.Add(n) } else { @@ -749,11 +759,11 @@ func inTableIM(p *parser) (insertionMode, bool) { case StartTagToken: switch p.tok.Data { case "tbody", "tfoot", "thead": - p.clearStackToTableContext() + p.clearStackToContext(tableScopeStopTags) p.addElement(p.tok.Data, p.tok.Attr) return inTableBodyIM, true case "td", "th", "tr": - p.clearStackToTableContext() + p.clearStackToContext(tableScopeStopTags) p.addElement("tbody", nil) return inTableBodyIM, false case "table": @@ -794,11 +804,15 @@ func inTableIM(p *parser) (insertionMode, bool) { return useTheRulesFor(p, inTableIM, inBodyIM) } -func (p *parser) clearStackToTableContext() { +// clearStackToContext pops elements off the stack of open elements +// until an element listed in stopTags is found. +func (p *parser) clearStackToContext(stopTags []string) { for i := len(p.oe) - 1; i >= 0; i-- { - if x := p.oe[i].Data; x == "table" || x == "html" { - p.oe = p.oe[:i+1] - return + for _, tag := range stopTags { + if p.oe[i].Data == tag { + p.oe = p.oe[:i+1] + return + } } } } @@ -877,7 +891,12 @@ func inRowIM(p *parser) (insertionMode, bool) { case EndTagToken: switch p.tok.Data { case "tr": - // TODO. + if !p.elementInScope(tableScopeStopTags, "tr") { + return inRowIM, true + } + p.clearStackToContext(tableRowContextStopTags) + p.oe.pop() + return inTableBodyIM, true case "table": if p.popUntil(tableScopeStopTags, "tr") { return inTableBodyIM, false diff --git a/src/pkg/html/parse_test.go b/src/pkg/html/parse_test.go index beba98d3ad..865a47dea1 100644 --- a/src/pkg/html/parse_test.go +++ b/src/pkg/html/parse_test.go @@ -132,7 +132,7 @@ func TestParser(t *testing.T) { rc := make(chan io.Reader) go readDat(filename, rc) // TODO(nigeltao): Process all test cases, not just a subset. - for i := 0; i < 32; i++ { + for i := 0; i < 33; i++ { // Parse the #data section. b, err := ioutil.ReadAll(<-rc) if err != nil { |