diff options
author | Andrew Balholm <andybalholm@gmail.com> | 2011-10-26 14:02:30 +1100 |
---|---|---|
committer | Nigel Tao <nigeltao@golang.org> | 2011-10-26 14:02:30 +1100 |
commit | 05ed18f4f6c661bfe01db0d8c25e5d7b65658a54 (patch) | |
tree | 6e07f8042705d70da8f636c53be5ddb5f8cdbd65 | |
parent | 6e318bda6c4236caf5a7f02d5ce545f5365094e0 (diff) | |
download | go-05ed18f4f6c661bfe01db0d8c25e5d7b65658a54.tar.gz go-05ed18f4f6c661bfe01db0d8c25e5d7b65658a54.zip |
html: improve parsing of lists
Make a <li> tag close the previous <li> element.
Make a </ul> tag close <li> elements.
Pass tests1.dat, test 33:
<!DOCTYPE html><li>hello<li>world<ul>how<li>do</ul>you</body><!--do-->
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <li>
| "hello"
| <li>
| "world"
| <ul>
| "how"
| <li>
| "do"
| "you"
| <!-- do -->
R=nigeltao
CC=golang-dev
https://golang.org/cl/5321051
-rw-r--r-- | src/pkg/html/parse.go | 20 | ||||
-rw-r--r-- | src/pkg/html/parse_test.go | 2 |
2 files changed, 21 insertions, 1 deletions
diff --git a/src/pkg/html/parse.go b/src/pkg/html/parse.go index 292fbaf6be..530942aa8f 100644 --- a/src/pkg/html/parse.go +++ b/src/pkg/html/parse.go @@ -576,6 +576,24 @@ func inBodyIM(p *parser) (insertionMode, bool) { p.framesetOK = false // TODO: detect <select> inside a table. return inSelectIM, true + case "li": + p.framesetOK = false + for i := len(p.oe) - 1; i >= 0; i-- { + node := p.oe[i] + switch node.Data { + case "li": + p.popUntil(listItemScopeStopTags, "li") + case "address", "div", "p": + continue + default: + if !isSpecialElement[node.Data] { + continue + } + } + break + } + p.popUntil(buttonScopeStopTags, "p") + p.addElement("li", p.tok.Attr) default: // TODO. p.addElement(p.tok.Data, p.tok.Attr) @@ -592,6 +610,8 @@ func inBodyIM(p *parser) (insertionMode, bool) { p.popUntil(buttonScopeStopTags, "p") case "a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u": p.inBodyEndTagFormatting(p.tok.Data) + case "address", "article", "aside", "blockquote", "button", "center", "details", "dir", "div", "dl", "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "listing", "menu", "nav", "ol", "pre", "section", "summary", "ul": + p.popUntil(defaultScopeStopTags, p.tok.Data) default: p.inBodyEndTagOther(p.tok.Data) } diff --git a/src/pkg/html/parse_test.go b/src/pkg/html/parse_test.go index 865a47dea1..b0ddd92476 100644 --- a/src/pkg/html/parse_test.go +++ b/src/pkg/html/parse_test.go @@ -132,7 +132,7 @@ func TestParser(t *testing.T) { rc := make(chan io.Reader) go readDat(filename, rc) // TODO(nigeltao): Process all test cases, not just a subset. - for i := 0; i < 33; i++ { + for i := 0; i < 34; i++ { // Parse the #data section. b, err := ioutil.ReadAll(<-rc) if err != nil { |