aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Balholm <andybalholm@gmail.com>2011-10-26 14:02:30 +1100
committerNigel Tao <nigeltao@golang.org>2011-10-26 14:02:30 +1100
commit05ed18f4f6c661bfe01db0d8c25e5d7b65658a54 (patch)
tree6e07f8042705d70da8f636c53be5ddb5f8cdbd65
parent6e318bda6c4236caf5a7f02d5ce545f5365094e0 (diff)
downloadgo-05ed18f4f6c661bfe01db0d8c25e5d7b65658a54.tar.gz
go-05ed18f4f6c661bfe01db0d8c25e5d7b65658a54.zip
html: improve parsing of lists
Make a <li> tag close the previous <li> element. Make a </ul> tag close <li> elements. Pass tests1.dat, test 33: <!DOCTYPE html><li>hello<li>world<ul>how<li>do</ul>you</body><!--do--> | <!DOCTYPE html> | <html> | <head> | <body> | <li> | "hello" | <li> | "world" | <ul> | "how" | <li> | "do" | "you" | <!-- do --> R=nigeltao CC=golang-dev https://golang.org/cl/5321051
-rw-r--r--src/pkg/html/parse.go20
-rw-r--r--src/pkg/html/parse_test.go2
2 files changed, 21 insertions, 1 deletions
diff --git a/src/pkg/html/parse.go b/src/pkg/html/parse.go
index 292fbaf6be..530942aa8f 100644
--- a/src/pkg/html/parse.go
+++ b/src/pkg/html/parse.go
@@ -576,6 +576,24 @@ func inBodyIM(p *parser) (insertionMode, bool) {
p.framesetOK = false
// TODO: detect <select> inside a table.
return inSelectIM, true
+ case "li":
+ p.framesetOK = false
+ for i := len(p.oe) - 1; i >= 0; i-- {
+ node := p.oe[i]
+ switch node.Data {
+ case "li":
+ p.popUntil(listItemScopeStopTags, "li")
+ case "address", "div", "p":
+ continue
+ default:
+ if !isSpecialElement[node.Data] {
+ continue
+ }
+ }
+ break
+ }
+ p.popUntil(buttonScopeStopTags, "p")
+ p.addElement("li", p.tok.Attr)
default:
// TODO.
p.addElement(p.tok.Data, p.tok.Attr)
@@ -592,6 +610,8 @@ func inBodyIM(p *parser) (insertionMode, bool) {
p.popUntil(buttonScopeStopTags, "p")
case "a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u":
p.inBodyEndTagFormatting(p.tok.Data)
+ case "address", "article", "aside", "blockquote", "button", "center", "details", "dir", "div", "dl", "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "listing", "menu", "nav", "ol", "pre", "section", "summary", "ul":
+ p.popUntil(defaultScopeStopTags, p.tok.Data)
default:
p.inBodyEndTagOther(p.tok.Data)
}
diff --git a/src/pkg/html/parse_test.go b/src/pkg/html/parse_test.go
index 865a47dea1..b0ddd92476 100644
--- a/src/pkg/html/parse_test.go
+++ b/src/pkg/html/parse_test.go
@@ -132,7 +132,7 @@ func TestParser(t *testing.T) {
rc := make(chan io.Reader)
go readDat(filename, rc)
// TODO(nigeltao): Process all test cases, not just a subset.
- for i := 0; i < 33; i++ {
+ for i := 0; i < 34; i++ {
// Parse the #data section.
b, err := ioutil.ReadAll(<-rc)
if err != nil {