From 557f9d889812976293b4a668c190e0e1e0332857 Mon Sep 17 00:00:00 2001 From: renovate Date: Sun, 11 Jul 2021 14:26:26 +0000 Subject: Update module github.com/PuerkitoBio/goquery to v1.7.1 --- vendor/github.com/PuerkitoBio/goquery/.travis.yml | 29 +- vendor/github.com/PuerkitoBio/goquery/LICENSE | 2 +- vendor/github.com/PuerkitoBio/goquery/README.md | 46 +- vendor/github.com/PuerkitoBio/goquery/go.mod | 6 +- vendor/github.com/PuerkitoBio/goquery/go.sum | 13 +- .../github.com/PuerkitoBio/goquery/manipulation.go | 167 +++- vendor/github.com/PuerkitoBio/goquery/type.go | 64 +- vendor/github.com/PuerkitoBio/goquery/utilities.go | 18 +- vendor/github.com/andybalholm/cascadia/README.md | 2 + vendor/github.com/andybalholm/cascadia/go.mod | 6 +- vendor/github.com/andybalholm/cascadia/parser.go | 321 +++---- vendor/github.com/andybalholm/cascadia/selector.go | 988 ++++++++++++++------- .../github.com/andybalholm/cascadia/serialize.go | 120 +++ .../github.com/andybalholm/cascadia/specificity.go | 26 + vendor/golang.org/x/net/html/const.go | 3 +- vendor/golang.org/x/net/html/foreign.go | 120 ++- vendor/golang.org/x/net/html/node.go | 5 + vendor/golang.org/x/net/html/parse.go | 337 ++++--- vendor/golang.org/x/net/html/render.go | 34 +- vendor/golang.org/x/net/html/token.go | 9 +- vendor/modules.txt | 7 +- 21 files changed, 1529 insertions(+), 794 deletions(-) create mode 100644 vendor/github.com/andybalholm/cascadia/serialize.go create mode 100644 vendor/github.com/andybalholm/cascadia/specificity.go (limited to 'vendor') diff --git a/vendor/github.com/PuerkitoBio/goquery/.travis.yml b/vendor/github.com/PuerkitoBio/goquery/.travis.yml index cc1402d..8430c86 100644 --- a/vendor/github.com/PuerkitoBio/goquery/.travis.yml +++ b/vendor/github.com/PuerkitoBio/goquery/.travis.yml @@ -1,16 +1,31 @@ +arch: + - amd64 + - ppc64le language: go go: - - 1.1 - - 1.2.x - - 1.3.x - - 1.4.x - - 1.5.x - - 1.6.x - 1.7.x - 1.8.x - 1.9.x - - "1.10.x" + - 1.10.x - 1.11.x + - 1.12.x + - 1.13.x + - 1.14.x + - 1.15.x - tip +jobs: + exclude: + - arch: ppc64le + go: 1.7.x + - arch: ppc64le + go: 1.8.x + - arch: ppc64le + go: 1.9.x + - arch: ppc64le + go: 1.10.x + - arch: ppc64le + go: 1.11.x + - arch: ppc64le + go: 1.12.x diff --git a/vendor/github.com/PuerkitoBio/goquery/LICENSE b/vendor/github.com/PuerkitoBio/goquery/LICENSE index f743d37..25372c2 100644 --- a/vendor/github.com/PuerkitoBio/goquery/LICENSE +++ b/vendor/github.com/PuerkitoBio/goquery/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2012-2016, Martin Angers & Contributors +Copyright (c) 2012-2021, Martin Angers & Contributors All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/vendor/github.com/PuerkitoBio/goquery/README.md b/vendor/github.com/PuerkitoBio/goquery/README.md index 84f9af3..6bb185c 100644 --- a/vendor/github.com/PuerkitoBio/goquery/README.md +++ b/vendor/github.com/PuerkitoBio/goquery/README.md @@ -1,5 +1,9 @@ # goquery - a little like that j-thing, only in Go -[![build status](https://secure.travis-ci.org/PuerkitoBio/goquery.svg?branch=master)](http://travis-ci.org/PuerkitoBio/goquery) [![GoDoc](https://godoc.org/github.com/PuerkitoBio/goquery?status.png)](http://godoc.org/github.com/PuerkitoBio/goquery) [![Sourcegraph Badge](https://sourcegraph.com/github.com/PuerkitoBio/goquery/-/badge.svg)](https://sourcegraph.com/github.com/PuerkitoBio/goquery?badge) + +[![builds.sr.ht status](https://builds.sr.ht/~mna/goquery/commits/fedora.yml.svg)](https://builds.sr.ht/~mna/goquery/commits/fedora.yml?) +[![build status](https://secure.travis-ci.org/PuerkitoBio/goquery.svg?branch=master)](http://travis-ci.org/PuerkitoBio/goquery) +[![Go Reference](https://pkg.go.dev/badge/github.com/PuerkitoBio/goquery.svg)](https://pkg.go.dev/github.com/PuerkitoBio/goquery) +[![Sourcegraph Badge](https://sourcegraph.com/github.com/PuerkitoBio/goquery/-/badge.svg)](https://sourcegraph.com/github.com/PuerkitoBio/goquery?badge) goquery brings a syntax and a set of features similar to [jQuery][] to the [Go language][go]. It is based on Go's [net/html package][html] and the CSS Selector library [cascadia][]. Since the net/html parser returns nodes, and not a full-featured DOM tree, jQuery's stateful manipulation functions (like height(), css(), detach()) have been left off. @@ -19,7 +23,7 @@ Syntax-wise, it is as close as possible to jQuery, with the same function names ## Installation -Please note that because of the net/html dependency, goquery requires Go1.1+. +Please note that because of the net/html dependency, goquery requires Go1.1+ and is tested on Go1.7+. $ go get github.com/PuerkitoBio/goquery @@ -37,6 +41,11 @@ Please note that because of the net/html dependency, goquery requires Go1.1+. **Note that goquery's API is now stable, and will not break.** +* **2021-07-11 (v1.7.1)** : Update go.mod dependencies and add dependabot config (thanks [@jauderho](https://github.com/jauderho)). +* **2021-06-14 (v1.7.0)** : Add `Single` and `SingleMatcher` functions to optimize first-match selection (thanks [@gdollardollar](https://github.com/gdollardollar)). +* **2021-01-11 (v1.6.1)** : Fix panic when calling `{Prepend,Append,Set}Html` on a `Selection` that contains non-Element nodes. +* **2020-10-08 (v1.6.0)** : Parse html in context of the container node for all functions that deal with html strings (`AfterHtml`, `AppendHtml`, etc.). Thanks to [@thiemok][thiemok] and [@davidjwilkins][djw] for their work on this. +* **2020-02-04 (v1.5.1)** : Update module dependencies. * **2018-11-15 (v1.5.0)** : Go module support (thanks @Zaba505). * **2018-06-07 (v1.4.1)** : Add `NewDocumentFromReader` examples. * **2018-03-24 (v1.4.0)** : Deprecate `NewDocument(url)` and `NewDocumentFromResponse(response)`. @@ -47,7 +56,7 @@ Please note that because of the net/html dependency, goquery requires Go1.1+. * **2016-08-28 (v1.0.1)** : Optimize performance for large documents. * **2016-07-27 (v1.0.0)** : Tag version 1.0.0. * **2016-06-15** : Invalid selector strings internally compile to a `Matcher` implementation that never matches any node (instead of a panic). So for example, `doc.Find("~")` returns an empty `*Selection` object. -* **2016-02-02** : Add `NodeName` utility function similar to the DOM's `nodeName` property. It returns the tag name of the first element in a selection, and other relevant values of non-element nodes (see godoc for details). Add `OuterHtml` utility function similar to the DOM's `outerHTML` property (named `OuterHtml` in small caps for consistency with the existing `Html` method on the `Selection`). +* **2016-02-02** : Add `NodeName` utility function similar to the DOM's `nodeName` property. It returns the tag name of the first element in a selection, and other relevant values of non-element nodes (see [doc][] for details). Add `OuterHtml` utility function similar to the DOM's `outerHTML` property (named `OuterHtml` in small caps for consistency with the existing `Html` method on the `Selection`). * **2015-04-20** : Add `AttrOr` helper method to return the attribute's value or a default value if absent. Thanks to [piotrkowalczuk][piotr]. * **2015-02-04** : Add more manipulation functions - Prepend* - thanks again to [Andrew Stone][thatguystone]. * **2014-11-28** : Add more manipulation functions - ReplaceWith*, Wrap* and Unwrap - thanks again to [Andrew Stone][thatguystone]. @@ -76,7 +85,7 @@ jQuery often has many variants for the same function (no argument, a selector st Utility functions that are not in jQuery but are useful in Go are implemented as functions (that take a `*Selection` as parameter), to avoid a potential naming clash on the `*Selection`'s methods (reserved for jQuery-equivalent behaviour). -The complete [godoc reference documentation can be found here][doc]. +The complete [package reference documentation can be found here][doc]. Please note that Cascadia's selectors do not necessarily match all supported selectors of jQuery (Sizzle). See the [cascadia project][cascadia] for details. Invalid selector strings compile to a `Matcher` that fails to match any node. Behaviour of the various functions that take a selector string as argument follows from that fact, e.g. (where `~` is an invalid selector string): @@ -120,12 +129,11 @@ func ExampleScrape() { } // Find the review items - doc.Find(".sidebar-reviews article .content-block").Each(func(i int, s *goquery.Selection) { - // For each item found, get the band and title - band := s.Find("a").Text() - title := s.Find("i").Text() - fmt.Printf("Review %d: %s - %s\n", i, band, title) - }) + doc.Find(".left-content article .post-title").Each(func(i int, s *goquery.Selection) { + // For each item found, get the title + title := s.Find("a").Text() + fmt.Printf("Review %d: %s\n", i, title) + }) } func main() { @@ -138,9 +146,14 @@ func main() { - [Goq][goq], an HTML deserialization and scraping library based on goquery and struct tags. - [andybalholm/cascadia][cascadia], the CSS selector library used by goquery. - [suntong/cascadia][cascadiacli], a command-line interface to the cascadia CSS selector library, useful to test selectors. -- [asciimoo/colly](https://github.com/asciimoo/colly), a lightning fast and elegant Scraping Framework +- [gocolly/colly](https://github.com/gocolly/colly), a lightning fast and elegant Scraping Framework - [gnulnx/goperf](https://github.com/gnulnx/goperf), a website performance test tool that also fetches static assets. - [MontFerret/ferret](https://github.com/MontFerret/ferret), declarative web scraping. +- [tacusci/berrycms](https://github.com/tacusci/berrycms), a modern simple to use CMS with easy to write plugins +- [Dataflow kit](https://github.com/slotix/dataflowkit), Web Scraping framework for Gophers. +- [Geziyor](https://github.com/geziyor/geziyor), a fast web crawling & scraping framework for Go. Supports JS rendering. +- [Pagser](https://github.com/foolin/pagser), a simple, easy, extensible, configurable HTML parser to struct based on goquery and struct tags. +- [stitcherd](https://github.com/vhodges/stitcherd), A server for doing server side includes using css selectors and DOM updates. ## Support @@ -153,8 +166,9 @@ There are a number of ways you can support the project: * Pull requests: please discuss new code in an issue first, unless the fix is really trivial. - Make sure new code is tested. - Be mindful of existing code - PRs that break existing code have a high probability of being declined, unless it fixes a serious issue. - -If you desperately want to send money my way, I have a BuyMeACoffee.com page: +* Sponsor the developer + - See the Github Sponsor button at the top of the repo on github + - or via BuyMeACoffee.com, below Buy Me A Coffee @@ -169,11 +183,13 @@ The [BSD 3-Clause license][bsd], the same as the [Go language][golic]. Cascadia' [bsd]: http://opensource.org/licenses/BSD-3-Clause [golic]: http://golang.org/LICENSE [caslic]: https://github.com/andybalholm/cascadia/blob/master/LICENSE -[doc]: http://godoc.org/github.com/PuerkitoBio/goquery +[doc]: https://pkg.go.dev/github.com/PuerkitoBio/goquery [index]: http://api.jquery.com/index/ [gonet]: https://github.com/golang/net/ -[html]: http://godoc.org/golang.org/x/net/html +[html]: https://pkg.go.dev/golang.org/x/net/html [wiki]: https://github.com/PuerkitoBio/goquery/wiki/Tips-and-tricks [thatguystone]: https://github.com/thatguystone [piotr]: https://github.com/piotrkowalczuk [goq]: https://github.com/andrewstuart/goq +[thiemok]: https://github.com/thiemok +[djw]: https://github.com/davidjwilkins diff --git a/vendor/github.com/PuerkitoBio/goquery/go.mod b/vendor/github.com/PuerkitoBio/goquery/go.mod index 2fa1332..3af3b15 100644 --- a/vendor/github.com/PuerkitoBio/goquery/go.mod +++ b/vendor/github.com/PuerkitoBio/goquery/go.mod @@ -1,6 +1,8 @@ module github.com/PuerkitoBio/goquery require ( - github.com/andybalholm/cascadia v1.0.0 - golang.org/x/net v0.0.0-20181114220301-adae6a3d119a + github.com/andybalholm/cascadia v1.2.0 + golang.org/x/net v0.0.0-20210614182718-04defd469f4e ) + +go 1.13 diff --git a/vendor/github.com/PuerkitoBio/goquery/go.sum b/vendor/github.com/PuerkitoBio/goquery/go.sum index 11c5757..13b128d 100644 --- a/vendor/github.com/PuerkitoBio/goquery/go.sum +++ b/vendor/github.com/PuerkitoBio/goquery/go.sum @@ -1,5 +1,10 @@ -github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o= -github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= +github.com/andybalholm/cascadia v1.2.0 h1:vuRCkM5Ozh/BfmsaTm26kbjm0mIOM3yS5Ek/F5h18aE= +github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY= golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181114220301-adae6a3d119a h1:gOpx8G595UYyvj8UK4+OFyY4rx037g3fmfhe5SasG3U= -golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20210614182718-04defd469f4e h1:XpT3nA5TvE525Ne3hInMh6+GETgn27Zfm9dxsThnX2Q= +golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/vendor/github.com/PuerkitoBio/goquery/manipulation.go b/vendor/github.com/PuerkitoBio/goquery/manipulation.go index 34eb757..35febf1 100644 --- a/vendor/github.com/PuerkitoBio/goquery/manipulation.go +++ b/vendor/github.com/PuerkitoBio/goquery/manipulation.go @@ -39,8 +39,15 @@ func (s *Selection) AfterSelection(sel *Selection) *Selection { // AfterHtml parses the html and inserts it after the set of matched elements. // // This follows the same rules as Selection.Append. -func (s *Selection) AfterHtml(html string) *Selection { - return s.AfterNodes(parseHtml(html)...) +func (s *Selection) AfterHtml(htmlStr string) *Selection { + return s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) { + nextSibling := node.NextSibling + for _, n := range nodes { + if node.Parent != nil { + node.Parent.InsertBefore(n, nextSibling) + } + } + }) } // AfterNodes inserts the nodes after each element in the set of matched elements. @@ -85,8 +92,12 @@ func (s *Selection) AppendSelection(sel *Selection) *Selection { } // AppendHtml parses the html and appends it to the set of matched elements. -func (s *Selection) AppendHtml(html string) *Selection { - return s.AppendNodes(parseHtml(html)...) +func (s *Selection) AppendHtml(htmlStr string) *Selection { + return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) { + for _, n := range nodes { + node.AppendChild(n) + } + }) } // AppendNodes appends the specified nodes to each node in the set of matched elements. @@ -123,8 +134,14 @@ func (s *Selection) BeforeSelection(sel *Selection) *Selection { // BeforeHtml parses the html and inserts it before the set of matched elements. // // This follows the same rules as Selection.Append. -func (s *Selection) BeforeHtml(html string) *Selection { - return s.BeforeNodes(parseHtml(html)...) +func (s *Selection) BeforeHtml(htmlStr string) *Selection { + return s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) { + for _, n := range nodes { + if node.Parent != nil { + node.Parent.InsertBefore(n, node) + } + } + }) } // BeforeNodes inserts the nodes before each element in the set of matched elements. @@ -184,8 +201,13 @@ func (s *Selection) PrependSelection(sel *Selection) *Selection { } // PrependHtml parses the html and prepends it to the set of matched elements. -func (s *Selection) PrependHtml(html string) *Selection { - return s.PrependNodes(parseHtml(html)...) +func (s *Selection) PrependHtml(htmlStr string) *Selection { + return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) { + firstChild := node.FirstChild + for _, n := range nodes { + node.InsertBefore(n, firstChild) + } + }) } // PrependNodes prepends the specified nodes to each node in the set of @@ -212,14 +234,19 @@ func (s *Selection) Remove() *Selection { return s } -// RemoveFiltered removes the set of matched elements by selector. -// It returns the Selection of removed nodes. +// RemoveFiltered removes from the current set of matched elements those that +// match the selector filter. It returns the Selection of removed nodes. +// +// For example if the selection s contains "

", "

" and "

" +// and s.RemoveFiltered("h2") is called, only the "

" node is removed +// (and returned), while "

" and "

" are kept in the document. func (s *Selection) RemoveFiltered(selector string) *Selection { return s.RemoveMatcher(compileMatcher(selector)) } -// RemoveMatcher removes the set of matched elements. -// It returns the Selection of removed nodes. +// RemoveMatcher removes from the current set of matched elements those that +// match the Matcher filter. It returns the Selection of removed nodes. +// See RemoveFiltered for additional information. func (s *Selection) RemoveMatcher(m Matcher) *Selection { return s.FilterMatcher(m).Remove() } @@ -256,8 +283,16 @@ func (s *Selection) ReplaceWithSelection(sel *Selection) *Selection { // It returns the removed elements. // // This follows the same rules as Selection.Append. -func (s *Selection) ReplaceWithHtml(html string) *Selection { - return s.ReplaceWithNodes(parseHtml(html)...) +func (s *Selection) ReplaceWithHtml(htmlStr string) *Selection { + s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) { + nextSibling := node.NextSibling + for _, n := range nodes { + if node.Parent != nil { + node.Parent.InsertBefore(n, nextSibling) + } + } + }) + return s.Remove() } // ReplaceWithNodes replaces each element in the set of matched elements with @@ -272,8 +307,17 @@ func (s *Selection) ReplaceWithNodes(ns ...*html.Node) *Selection { // SetHtml sets the html content of each element in the selection to // specified html string. -func (s *Selection) SetHtml(html string) *Selection { - return setHtmlNodes(s, parseHtml(html)...) +func (s *Selection) SetHtml(htmlStr string) *Selection { + for _, context := range s.Nodes { + for c := context.FirstChild; c != nil; c = context.FirstChild { + context.RemoveChild(c) + } + } + return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) { + for _, n := range nodes { + node.AppendChild(n) + } + }) } // SetText sets the content of each element in the selection to specified content. @@ -329,8 +373,23 @@ func (s *Selection) WrapSelection(sel *Selection) *Selection { // most child of the given HTML. // // It returns the original set of elements. -func (s *Selection) WrapHtml(html string) *Selection { - return s.wrapNodes(parseHtml(html)...) +func (s *Selection) WrapHtml(htmlStr string) *Selection { + nodesMap := make(map[string][]*html.Node) + for _, context := range s.Nodes { + var parent *html.Node + if context.Parent != nil { + parent = context.Parent + } else { + parent = &html.Node{Type: html.ElementNode} + } + nodes, found := nodesMap[nodeName(parent)] + if !found { + nodes = parseHtmlWithContext(htmlStr, parent) + nodesMap[nodeName(parent)] = nodes + } + newSingleSelection(context, s.document).wrapAllNodes(cloneNodes(nodes)...) + } + return s } // WrapNode wraps each element in the set of matched elements inside the inner- @@ -382,8 +441,18 @@ func (s *Selection) WrapAllSelection(sel *Selection) *Selection { // document. // // It returns the original set of elements. -func (s *Selection) WrapAllHtml(html string) *Selection { - return s.wrapAllNodes(parseHtml(html)...) +func (s *Selection) WrapAllHtml(htmlStr string) *Selection { + var context *html.Node + var nodes []*html.Node + if len(s.Nodes) > 0 { + context = s.Nodes[0] + if context.Parent != nil { + nodes = parseHtmlWithContext(htmlStr, context) + } else { + nodes = parseHtml(htmlStr) + } + } + return s.wrapAllNodes(nodes...) } func (s *Selection) wrapAllNodes(ns ...*html.Node) *Selection { @@ -452,8 +521,17 @@ func (s *Selection) WrapInnerSelection(sel *Selection) *Selection { // cloned before being inserted into the document. // // It returns the original set of elements. -func (s *Selection) WrapInnerHtml(html string) *Selection { - return s.wrapInnerNodes(parseHtml(html)...) +func (s *Selection) WrapInnerHtml(htmlStr string) *Selection { + nodesMap := make(map[string][]*html.Node) + for _, context := range s.Nodes { + nodes, found := nodesMap[nodeName(context)] + if !found { + nodes = parseHtmlWithContext(htmlStr, context) + nodesMap[nodeName(context)] = nodes + } + newSingleSelection(context, s.document).wrapInnerNodes(cloneNodes(nodes)...) + } + return s } // WrapInnerNode wraps an HTML structure, matched by the given selector, around @@ -493,16 +571,14 @@ func parseHtml(h string) []*html.Node { return nodes } -func setHtmlNodes(s *Selection, ns ...*html.Node) *Selection { - for _, n := range s.Nodes { - for c := n.FirstChild; c != nil; c = n.FirstChild { - n.RemoveChild(c) - } - for _, c := range ns { - n.AppendChild(cloneNode(c)) - } +func parseHtmlWithContext(h string, context *html.Node) []*html.Node { + // Errors are only returned when the io.Reader returns any error besides + // EOF, but strings.Reader never will + nodes, err := html.ParseFragment(strings.NewReader(h), context) + if err != nil { + panic("goquery: failed to parse HTML: " + err.Error()) } - return s + return nodes } // Get the first child that is an ElementNode @@ -572,3 +648,32 @@ func (s *Selection) manipulateNodes(ns []*html.Node, reverse bool, return s } + +// eachNodeHtml parses the given html string and inserts the resulting nodes in the dom with the mergeFn. +// The parsed nodes are inserted for each element of the selection. +// isParent can be used to indicate that the elements of the selection should be treated as the parent for the parsed html. +// A cache is used to avoid parsing the html multiple times should the elements of the selection result in the same context. +func (s *Selection) eachNodeHtml(htmlStr string, isParent bool, mergeFn func(n *html.Node, nodes []*html.Node)) *Selection { + // cache to avoid parsing the html for the same context multiple times + nodeCache := make(map[string][]*html.Node) + var context *html.Node + for _, n := range s.Nodes { + if isParent { + context = n.Parent + } else { + if n.Type != html.ElementNode { + continue + } + context = n + } + if context != nil { + nodes, found := nodeCache[nodeName(context)] + if !found { + nodes = parseHtmlWithContext(htmlStr, context) + nodeCache[nodeName(context)] = nodes + } + mergeFn(n, cloneNodes(nodes)) + } + } + return s +} diff --git a/vendor/github.com/PuerkitoBio/goquery/type.go b/vendor/github.com/PuerkitoBio/goquery/type.go index 6ad51db..6646c14 100644 --- a/vendor/github.com/PuerkitoBio/goquery/type.go +++ b/vendor/github.com/PuerkitoBio/goquery/type.go @@ -7,7 +7,6 @@ import ( "net/url" "github.com/andybalholm/cascadia" - "golang.org/x/net/html" ) @@ -122,6 +121,45 @@ type Matcher interface { Filter([]*html.Node) []*html.Node } +// Single compiles a selector string to a Matcher that stops after the first +// match is found. +// +// By default, Selection.Find and other functions that accept a selector string +// to select nodes will use all matches corresponding to that selector. By +// using the Matcher returned by Single, at most the first match will be +// selected. +// +// For example, those two statements are semantically equivalent: +// +// sel1 := doc.Find("a").First() +// sel2 := doc.FindMatcher(goquery.Single("a")) +// +// The one using Single is optimized to be potentially much faster on large +// documents. +// +// Only the behaviour of the MatchAll method of the Matcher interface is +// altered compared to standard Matchers. This means that the single-selection +// property of the Matcher only applies for Selection methods where the Matcher +// is used to select nodes, not to filter or check if a node matches the +// Matcher - in those cases, the behaviour of the Matcher is unchanged (e.g. +// FilterMatcher(Single("div")) will still result in a Selection with multiple +// "div"s if there were many "div"s in the Selection to begin with). +func Single(selector string) Matcher { + return singleMatcher{compileMatcher(selector)} +} + +// SingleMatcher returns a Matcher matches the same nodes as m, but that stops +// after the first match is found. +// +// See the documentation of function Single for more details. +func SingleMatcher(m Matcher) Matcher { + if _, ok := m.(singleMatcher); ok { + // m is already a singleMatcher + return m + } + return singleMatcher{m} +} + // compileMatcher compiles the selector string s and returns // the corresponding Matcher. If s is an invalid selector string, // it returns a Matcher that fails all matches. @@ -133,6 +171,30 @@ func compileMatcher(s string) Matcher { return cs } +type singleMatcher struct { + Matcher +} + +func (m singleMatcher) MatchAll(n *html.Node) []*html.Node { + // Optimized version - stops finding at the first match (cascadia-compiled + // matchers all use this code path). + if mm, ok := m.Matcher.(interface{ MatchFirst(*html.Node) *html.Node }); ok { + node := mm.MatchFirst(n) + if node == nil { + return nil + } + return []*html.Node{node} + } + + // Fallback version, for e.g. test mocks that don't provide the MatchFirst + // method. + nodes := m.Matcher.MatchAll(n) + if len(nodes) > 0 { + return nodes[:1:1] + } + return nil +} + // invalidMatcher is a Matcher that always fails to match. type invalidMatcher struct{} diff --git a/vendor/github.com/PuerkitoBio/goquery/utilities.go b/vendor/github.com/PuerkitoBio/goquery/utilities.go index b4c061a..3e11b1d 100644 --- a/vendor/github.com/PuerkitoBio/goquery/utilities.go +++ b/vendor/github.com/PuerkitoBio/goquery/utilities.go @@ -36,12 +36,22 @@ func NodeName(s *Selection) string { if s.Length() == 0 { return "" } - switch n := s.Get(0); n.Type { + return nodeName(s.Get(0)) +} + +// nodeName returns the node name of the given html node. +// See NodeName for additional details on behaviour. +func nodeName(node *html.Node) string { + if node == nil { + return "" + } + + switch node.Type { case html.ElementNode, html.DoctypeNode: - return n.Data + return node.Data default: - if n.Type >= 0 && int(n.Type) < len(nodeNames) { - return nodeNames[n.Type] + if node.Type >= 0 && int(node.Type) < len(nodeNames) { + return nodeNames[node.Type] } return "" } diff --git a/vendor/github.com/andybalholm/cascadia/README.md b/vendor/github.com/andybalholm/cascadia/README.md index 9021cb9..26f4c37 100644 --- a/vendor/github.com/andybalholm/cascadia/README.md +++ b/vendor/github.com/andybalholm/cascadia/README.md @@ -5,3 +5,5 @@ The Cascadia package implements CSS selectors for use with the parse trees produced by the html package. To test CSS selectors without writing Go code, check out [cascadia](https://github.com/suntong/cascadia) the command line tool, a thin wrapper around this package. + +[Refer to godoc here](https://godoc.org/github.com/andybalholm/cascadia). diff --git a/vendor/github.com/andybalholm/cascadia/go.mod b/vendor/github.com/andybalholm/cascadia/go.mod index e6febbb..51a330b 100644 --- a/vendor/github.com/andybalholm/cascadia/go.mod +++ b/vendor/github.com/andybalholm/cascadia/go.mod @@ -1,3 +1,5 @@ -module "github.com/andybalholm/cascadia" +module github.com/andybalholm/cascadia -require "golang.org/x/net" v0.0.0-20180218175443-cbe0f9307d01 +require golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01 + +go 1.13 diff --git a/vendor/github.com/andybalholm/cascadia/parser.go b/vendor/github.com/andybalholm/cascadia/parser.go index 495db9c..c40a39f 100644 --- a/vendor/github.com/andybalholm/cascadia/parser.go +++ b/vendor/github.com/andybalholm/cascadia/parser.go @@ -7,14 +7,16 @@ import ( "regexp" "strconv" "strings" - - "golang.org/x/net/html" ) // a parser for CSS selectors type parser struct { s string // the source text i int // the current position + + // if `false`, parsing a pseudo-element + // returns an error. + acceptPseudoElements bool } // parseEscape parses a backslash escape. @@ -31,7 +33,7 @@ func (p *parser) parseEscape() (result string, err error) { case hexDigit(c): // unicode escape (hex) var i int - for i = start; i < p.i+6 && i < len(p.s) && hexDigit(p.s[i]); i++ { + for i = start; i < start+6 && i < len(p.s) && hexDigit(p.s[i]); i++ { // empty } v, _ := strconv.ParseUint(p.s[start:i], 16, 21) @@ -56,6 +58,26 @@ func (p *parser) parseEscape() (result string, err error) { return result, nil } +// toLowerASCII returns s with all ASCII capital letters lowercased. +func toLowerASCII(s string) string { + var b []byte + for i := 0; i < len(s); i++ { + if c := s[i]; 'A' <= c && c <= 'Z' { + if b == nil { + b = make([]byte, len(s)) + copy(b, s) + } + b[i] = s[i] + ('a' - 'A') + } + } + + if b == nil { + return s + } + + return string(b) +} + func hexDigit(c byte) bool { return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' } @@ -280,92 +302,92 @@ func (p *parser) consumeClosingParenthesis() bool { } // parseTypeSelector parses a type selector (one that matches by tag name). -func (p *parser) parseTypeSelector() (result Selector, err error) { +func (p *parser) parseTypeSelector() (result tagSelector, err error) { tag, err := p.parseIdentifier() if err != nil { - return nil, err + return } - - return typeSelector(tag), nil + return tagSelector{tag: toLowerASCII(tag)}, nil } // parseIDSelector parses a selector that matches by id attribute. -func (p *parser) parseIDSelector() (Selector, error) { +func (p *parser) parseIDSelector() (idSelector, error) { if p.i >= len(p.s) { - return nil, fmt.Errorf("expected id selector (#id), found EOF instead") + return idSelector{}, fmt.Errorf("expected id selector (#id), found EOF instead") } if p.s[p.i] != '#' { - return nil, fmt.Errorf("expected id selector (#id), found '%c' instead", p.s[p.i]) + return idSelector{}, fmt.Errorf("expected id selector (#id), found '%c' instead", p.s[p.i]) } p.i++ id, err := p.parseName() if err != nil { - return nil, err + return idSelector{}, err } - return attributeEqualsSelector("id", id), nil + return idSelector{id: id}, nil } // parseClassSelector parses a selector that matches by class attribute. -func (p *parser) parseClassSelector() (Selector, error) { +func (p *parser) parseClassSelector() (classSelector, error) { if p.i >= len(p.s) { - return nil, fmt.Errorf("expected class selector (.class), found EOF instead") + return classSelector{}, fmt.Errorf("expected class selector (.class), found EOF instead") } if p.s[p.i] != '.' { - return nil, fmt.Errorf("expected class selector (.class), found '%c' instead", p.s[p.i]) + return classSelector{}, fmt.Errorf("expected class selector (.class), found '%c' instead", p.s[p.i]) } p.i++ class, err := p.parseIdentifier() if err != nil { - return nil, err + return classSelector{}, err } - return attributeIncludesSelector("class", class), nil + return classSelector{class: class}, nil } // parseAttributeSelector parses a selector that matches by attribute value. -func (p *parser) parseAttributeSelector() (Selector, error) { +func (p *parser) parseAttributeSelector() (attrSelector, error) { if p.i >= len(p.s) { - return nil, fmt.Errorf("expected attribute selector ([attribute]), found EOF instead") + return attrSelector{}, fmt.Errorf("expected attribute selector ([attribute]), found EOF instead") } if p.s[p.i] != '[' { - return nil, fmt.Errorf("expected attribute selector ([attribute]), found '%c' instead", p.s[p.i]) + return attrSelector{}, fmt.Errorf("expected attribute selector ([attribute]), found '%c' instead", p.s[p.i]) } p.i++ p.skipWhitespace() key, err := p.parseIdentifier() if err != nil { - return nil, err + return attrSelector{}, err } + key = toLowerASCII(key) p.skipWhitespace() if p.i >= len(p.s) { - return nil, errors.New("unexpected EOF in attribute selector") + return attrSelector{}, errors.New("unexpected EOF in attribute selector") } if p.s[p.i] == ']' { p.i++ - return attributeExistsSelector(key), nil + return attrSelector{key: key, operation: ""}, nil } if p.i+2 >= len(p.s) { - return nil, errors.New("unexpected EOF in attribute selector") + return attrSelector{}, errors.New("unexpected EOF in attribute selector") } op := p.s[p.i : p.i+2] if op[0] == '=' { op = "=" } else if op[1] != '=' { - return nil, fmt.Errorf(`expected equality operator, found "%s" instead`, op) + return attrSelector{}, fmt.Errorf(`expected equality operator, found "%s" instead`, op) } p.i += len(op) p.skipWhitespace() if p.i >= len(p.s) { - return nil, errors.New("unexpected EOF in attribute selector") + return attrSelector{}, errors.New("unexpected EOF in attribute selector") } var val string var rx *regexp.Regexp @@ -380,88 +402,84 @@ func (p *parser) parseAttributeSelector() (Selector, error) { } } if err != nil { - return nil, err + return attrSelector{}, err } p.skipWhitespace() if p.i >= len(p.s) { - return nil, errors.New("unexpected EOF in attribute selector") + return attrSelector{}, errors.New("unexpected EOF in attribute selector") } if p.s[p.i] != ']' { - return nil, fmt.Errorf("expected ']', found '%c' instead", p.s[p.i]) + return attrSelector{}, fmt.Errorf("expected ']', found '%c' instead", p.s[p.i]) } p.i++ switch op { - case "=": - return attributeEqualsSelector(key, val), nil - case "!=": - return attributeNotEqualSelector(key, val), nil - case "~=": - return attributeIncludesSelector(key, val), nil - case "|=": - return attributeDashmatchSelector(key, val), nil - case "^=": - return attributePrefixSelector(key, val), nil - case "$=": - return attributeSuffixSelector(key, val), nil - case "*=": - return attributeSubstringSelector(key, val), nil - case "#=": - return attributeRegexSelector(key, rx), nil - } - - return nil, fmt.Errorf("attribute operator %q is not supported", op) + case "=", "!=", "~=", "|=", "^=", "$=", "*=", "#=": + return attrSelector{key: key, val: val, operation: op, regexp: rx}, nil + default: + return attrSelector{}, fmt.Errorf("attribute operator %q is not supported", op) + } } var errExpectedParenthesis = errors.New("expected '(' but didn't find it") var errExpectedClosingParenthesis = errors.New("expected ')' but didn't find it") var errUnmatchedParenthesis = errors.New("unmatched '('") -// parsePseudoclassSelector parses a pseudoclass selector like :not(p). -func (p *parser) parsePseudoclassSelector() (Selector, error) { +// parsePseudoclassSelector parses a pseudoclass selector like :not(p) or a pseudo-element +// For backwards compatibility, both ':' and '::' prefix are allowed for pseudo-elements. +// https://drafts.csswg.org/selectors-3/#pseudo-elements +// Returning a nil `Sel` (and a nil `error`) means we found a pseudo-element. +func (p *parser) parsePseudoclassSelector() (out Sel, pseudoElement string, err error) { if p.i >= len(p.s) { - return nil, fmt.Errorf("expected pseudoclass selector (:pseudoclass), found EOF instead") + return nil, "", fmt.Errorf("expected pseudoclass selector (:pseudoclass), found EOF instead") } if p.s[p.i] != ':' { - return nil, fmt.Errorf("expected attribute selector (:pseudoclass), found '%c' instead", p.s[p.i]) + return nil, "", fmt.Errorf("expected attribute selector (:pseudoclass), found '%c' instead", p.s[p.i]) } p.i++ + var mustBePseudoElement bool + if p.i >= len(p.s) { + return nil, "", fmt.Errorf("got empty pseudoclass (or pseudoelement)") + } + if p.s[p.i] == ':' { // we found a pseudo-element + mustBePseudoElement = true + p.i++ + } + name, err := p.parseIdentifier() if err != nil { - return nil, err + return } name = toLowerASCII(name) + if mustBePseudoElement && (name != "after" && name != "backdrop" && name != "before" && + name != "cue" && name != "first-letter" && name != "first-line" && name != "grammar-error" && + name != "marker" && name != "placeholder" && name != "selection" && name != "spelling-error") { + return out, "", fmt.Errorf("unknown pseudoelement :%s", name) + } switch name { case "not", "has", "haschild": if !p.consumeParenthesis() { - return nil, errExpectedParenthesis + return out, "", errExpectedParenthesis } sel, parseErr := p.parseSelectorGroup() if parseErr != nil { - return nil, parseErr + return out, "", parseErr } if !p.consumeClosingParenthesis() { - return nil, errExpectedClosingParenthesis + return out, "", errExpectedClosingParenthesis } - switch name { - case "not": - return negatedSelector(sel), nil - case "has": - return hasDescendantSelector(sel), nil - case "haschild": - return hasChildSelector(sel), nil - } + out = relativePseudoClassSelector{name: name, match: sel} case "contains", "containsown": if !p.consumeParenthesis() { - return nil, errExpectedParenthesis + return out, "", errExpectedParenthesis } if p.i == len(p.s) { - return nil, errUnmatchedParenthesis + return out, "", errUnmatchedParenthesis } var val string switch p.s[p.i] { @@ -471,95 +489,75 @@ func (p *parser) parsePseudoclassSelector() (Selector, error) { val, err = p.parseIdentifier() } if err != nil { - return nil, err + return out, "", err } val = strings.ToLower(val) p.skipWhitespace() if p.i >= len(p.s) { - return nil, errors.New("unexpected EOF in pseudo selector") + return out, "", errors.New("unexpected EOF in pseudo selector") } if !p.consumeClosingParenthesis() { - return nil, errExpectedClosingParenthesis + return out, "", errExpectedClosingParenthesis } - switch name { - case "contains": - return textSubstrSelector(val), nil - case "containsown": - return ownTextSubstrSelector(val), nil - } + out = containsPseudoClassSelector{own: name == "containsown", value: val} case "matches", "matchesown": if !p.consumeParenthesis() { - return nil, errExpectedParenthesis + return out, "", errExpectedParenthesis } rx, err := p.parseRegex() if err != nil { - return nil, err + return out, "", err } if p.i >= len(p.s) { - return nil, errors.New("unexpected EOF in pseudo selector") + return out, "", errors.New("unexpected EOF in pseudo selector") } if !p.consumeClosingParenthesis() { - return nil, errExpectedClosingParenthesis + return out, "", errExpectedClosingParenthesis } - switch name { - case "matches": - return textRegexSelector(rx), nil - case "matchesown": - return ownTextRegexSelector(rx), nil - } + out = regexpPseudoClassSelector{own: name == "matchesown", regexp: rx} case "nth-child", "nth-last-child", "nth-of-type", "nth-last-of-type": if !p.consumeParenthesis() { - return nil, errExpectedParenthesis + return out, "", errExpectedParenthesis } a, b, err := p.parseNth() if err != nil { - return nil, err + return out, "", err } if !p.consumeClosingParenthesis() { - return nil, errExpectedClosingParenthesis - } - if a == 0 { - switch name { - case "nth-child": - return simpleNthChildSelector(b, false), nil - case "nth-of-type": - return simpleNthChildSelector(b, true), nil - case "nth-last-child": - return simpleNthLastChildSelector(b, false), nil - case "nth-last-of-type": - return simpleNthLastChildSelector(b, true), nil - } + return out, "", errExpectedClosingParenthesis } - return nthChildSelector(a, b, - name == "nth-last-child" || name == "nth-last-of-type", - name == "nth-of-type" || name == "nth-last-of-type"), - nil + last := name == "nth-last-child" || name == "nth-last-of-type" + ofType := name == "nth-of-type" || name == "nth-last-of-type" + out = nthPseudoClassSelector{a: a, b: b, last: last, ofType: ofType} case "first-child": - return simpleNthChildSelector(1, false), nil + out = nthPseudoClassSelector{a: 0, b: 1, ofType: false, last: false} case "last-child": - return simpleNthLastChildSelector(1, false), nil + out = nthPseudoClassSelector{a: 0, b: 1, ofType: false, last: true} case "first-of-type": - return simpleNthChildSelector(1, true), nil + out = nthPseudoClassSelector{a: 0, b: 1, ofType: true, last: false} case "last-of-type": - return simpleNthLastChildSelector(1, true), nil + out = nthPseudoClassSelector{a: 0, b: 1, ofType: true, last: true} case "only-child": - return onlyChildSelector(false), nil + out = onlyChildPseudoClassSelector{ofType: false} case "only-of-type": - return onlyChildSelector(true), nil + out = onlyChildPseudoClassSelector{ofType: true} case "input": - return inputSelector, nil + out = inputPseudoClassSelector{} case "empty": - return emptyElementSelector, nil + out = emptyElementPseudoClassSelector{} case "root": - return rootSelector, nil + out = rootPseudoClassSelector{} + case "after", "backdrop", "before", "cue", "first-letter", "first-line", "grammar-error", "marker", "placeholder", "selection", "spelling-error": + return nil, name, nil + default: + return out, "", fmt.Errorf("unknown pseudoclass or pseudoelement :%s", name) } - - return nil, fmt.Errorf("unknown pseudoclass :%s", name) + return } // parseInteger parses a decimal integer. @@ -705,8 +703,8 @@ invalid: // parseSimpleSelectorSequence parses a selector sequence that applies to // a single element. -func (p *parser) parseSimpleSelectorSequence() (Selector, error) { - var result Selector +func (p *parser) parseSimpleSelectorSequence() (Sel, error) { + var selectors []Sel if p.i >= len(p.s) { return nil, errors.New("expected selector, found EOF instead") @@ -723,13 +721,17 @@ func (p *parser) parseSimpleSelectorSequence() (Selector, error) { if err != nil { return nil, err } - result = r + selectors = append(selectors, r) } + var pseudoElement string loop: for p.i < len(p.s) { - var ns Selector - var err error + var ( + ns Sel + newPseudoElement string + err error + ) switch p.s[p.i] { case '#': ns, err = p.parseIDSelector() @@ -738,44 +740,57 @@ loop: case '[': ns, err = p.parseAttributeSelector() case ':': - ns, err = p.parsePseudoclassSelector() + ns, newPseudoElement, err = p.parsePseudoclassSelector() default: break loop } if err != nil { return nil, err } - if result == nil { - result = ns + // From https://drafts.csswg.org/selectors-3/#pseudo-elements : + // "Only one pseudo-element may appear per selector, and if present + // it must appear after the sequence of simple selectors that + // represents the subjects of the selector."" + if ns == nil { // we found a pseudo-element + if pseudoElement != "" { + return nil, fmt.Errorf("only one pseudo-element is accepted per selector, got %s and %s", pseudoElement, newPseudoElement) + } + if !p.acceptPseudoElements { + return nil, fmt.Errorf("pseudo-element %s found, but pseudo-elements support is disabled", newPseudoElement) + } + pseudoElement = newPseudoElement } else { - result = intersectionSelector(result, ns) + if pseudoElement != "" { + return nil, fmt.Errorf("pseudo-element %s must be at the end of selector", pseudoElement) + } + selectors = append(selectors, ns) } - } - if result == nil { - result = func(n *html.Node) bool { - return n.Type == html.ElementNode - } } - - return result, nil + if len(selectors) == 1 && pseudoElement == "" { // no need wrap the selectors in compoundSelector + return selectors[0], nil + } + return compoundSelector{selectors: selectors, pseudoElement: pseudoElement}, nil } // parseSelector parses a selector that may include combinators. -func (p *parser) parseSelector() (result Selector, err error) { +func (p *parser) parseSelector() (Sel, error) { p.skipWhitespace() - result, err = p.parseSimpleSelectorSequence() + result, err := p.parseSimpleSelectorSequence() if err != nil { - return + return nil, err } for { - var combinator byte + var ( + combinator byte + c Sel + ) if p.skipWhitespace() { combinator = ' ' } if p.i >= len(p.s) { - return + return result, nil } switch p.s[p.i] { @@ -785,51 +800,39 @@ func (p *parser) parseSelector() (result Selector, err error) { p.skipWhitespace() case ',', ')': // These characters can't begin a selector, but they can legally occur after one. - return + return result, nil } if combinator == 0 { - return + return result, nil } - c, err := p.parseSimpleSelectorSequence() + c, err = p.parseSimpleSelectorSequence() if err != nil { return nil, err } - - switch combinator { - case ' ': - result = descendantSelector(result, c) - case '>': - result = childSelector(result, c) - case '+': - result = siblingSelector(result, c, true) - case '~': - result = siblingSelector(result, c, false) - } + result = combinedSelector{first: result, combinator: combinator, second: c} } - - panic("unreachable") } // parseSelectorGroup parses a group of selectors, separated by commas. -func (p *parser) parseSelectorGroup() (result Selector, err error) { - result, err = p.parseSelector() +func (p *parser) parseSelectorGroup() (SelectorGroup, error) { + current, err := p.parseSelector() if err != nil { - return + return nil, err } + result := SelectorGroup{current} for p.i < len(p.s) { if p.s[p.i] != ',' { - return result, nil + break } p.i++ c, err := p.parseSelector() if err != nil { return nil, err } - result = unionSelector(result, c) + result = append(result, c) } - - return + return result, nil } diff --git a/vendor/github.com/andybalholm/cascadia/selector.go b/vendor/github.com/andybalholm/cascadia/selector.go index 9fb05cc..e2a6dc4 100644 --- a/vendor/github.com/andybalholm/cascadia/selector.go +++ b/vendor/github.com/andybalholm/cascadia/selector.go @@ -9,36 +9,60 @@ import ( "golang.org/x/net/html" ) -// the Selector type, and functions for creating them +// Matcher is the interface for basic selector functionality. +// Match returns whether a selector matches n. +type Matcher interface { + Match(n *html.Node) bool +} -// A Selector is a function which tells whether a node matches or not. -type Selector func(*html.Node) bool +// Sel is the interface for all the functionality provided by selectors. +type Sel interface { + Matcher + Specificity() Specificity -// hasChildMatch returns whether n has any child that matches a. -func hasChildMatch(n *html.Node, a Selector) bool { - for c := n.FirstChild; c != nil; c = c.NextSibling { - if a(c) { - return true - } + // Returns a CSS input compiling to this selector. + String() string + + // Returns a pseudo-element, or an empty string. + PseudoElement() string +} + +// Parse parses a selector. Use `ParseWithPseudoElement` +// if you need support for pseudo-elements. +func Parse(sel string) (Sel, error) { + p := &parser{s: sel} + compiled, err := p.parseSelector() + if err != nil { + return nil, err } - return false + + if p.i < len(sel) { + return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i) + } + + return compiled, nil } -// hasDescendantMatch performs a depth-first search of n's descendants, -// testing whether any of them match a. It returns true as soon as a match is -// found, or false if no match is found. -func hasDescendantMatch(n *html.Node, a Selector) bool { - for c := n.FirstChild; c != nil; c = c.NextSibling { - if a(c) || (c.Type == html.ElementNode && hasDescendantMatch(c, a)) { - return true - } +// ParseWithPseudoElement parses a single selector, +// with support for pseudo-element. +func ParseWithPseudoElement(sel string) (Sel, error) { + p := &parser{s: sel, acceptPseudoElements: true} + compiled, err := p.parseSelector() + if err != nil { + return nil, err } - return false + + if p.i < len(sel) { + return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i) + } + + return compiled, nil } -// Compile parses a selector and returns, if successful, a Selector object -// that can be used to match against html.Node objects. -func Compile(sel string) (Selector, error) { +// ParseGroup parses a selector, or a group of selectors separated by commas. +// Use `ParseGroupWithPseudoElements` +// if you need support for pseudo-elements. +func ParseGroup(sel string) (SelectorGroup, error) { p := &parser{s: sel} compiled, err := p.parseSelectorGroup() if err != nil { @@ -52,6 +76,39 @@ func Compile(sel string) (Selector, error) { return compiled, nil } +// ParseGroupWithPseudoElements parses a selector, or a group of selectors separated by commas. +// It supports pseudo-elements. +func ParseGroupWithPseudoElements(sel string) (SelectorGroup, error) { + p := &parser{s: sel, acceptPseudoElements: true} + compiled, err := p.parseSelectorGroup() + if err != nil { + return nil, err + } + + if p.i < len(sel) { + return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i) + } + + return compiled, nil +} + +// A Selector is a function which tells whether a node matches or not. +// +// This type is maintained for compatibility; I recommend using the newer and +// more idiomatic interfaces Sel and Matcher. +type Selector func(*html.Node) bool + +// Compile parses a selector and returns, if successful, a Selector object +// that can be used to match against html.Node objects. +func Compile(sel string) (Selector, error) { + compiled, err := ParseGroup(sel) + if err != nil { + return nil, err + } + + return Selector(compiled.Match), nil +} + // MustCompile is like Compile, but panics instead of returning an error. func MustCompile(sel string) Selector { compiled, err := Compile(sel) @@ -79,6 +136,23 @@ func (s Selector) matchAllInto(n *html.Node, storage []*html.Node) []*html.Node return storage } +func queryInto(n *html.Node, m Matcher, storage []*html.Node) []*html.Node { + for child := n.FirstChild; child != nil; child = child.NextSibling { + if m.Match(child) { + storage = append(storage, child) + } + storage = queryInto(child, m, storage) + } + + return storage +} + +// QueryAll returns a slice of all the nodes that match m, from the descendants +// of n. +func QueryAll(n *html.Node, m Matcher) []*html.Node { + return queryInto(n, m, nil) +} + // Match returns true if the node matches the selector. func (s Selector) Match(n *html.Node) bool { return s(n) @@ -99,6 +173,21 @@ func (s Selector) MatchFirst(n *html.Node) *html.Node { return nil } +// Query returns the first node that matches m, from the descendants of n. +// If none matches, it returns nil. +func Query(n *html.Node, m Matcher) *html.Node { + for c := n.FirstChild; c != nil; c = c.NextSibling { + if m.Match(c) { + return c + } + if matched := Query(c, m); matched != nil { + return matched + } + } + + return nil +} + // Filter returns the nodes in nodes that match the selector. func (s Selector) Filter(nodes []*html.Node) (result []*html.Node) { for _, n := range nodes { @@ -109,106 +198,148 @@ func (s Selector) Filter(nodes []*html.Node) (result []*html.Node) { return result } -// typeSelector returns a Selector that matches elements with a given tag name. -func typeSelector(tag string) Selector { - tag = toLowerASCII(tag) - return func(n *html.Node) bool { - return n.Type == html.ElementNode && n.Data == tag +// Filter returns the nodes that match m. +func Filter(nodes []*html.Node, m Matcher) (result []*html.Node) { + for _, n := range nodes { + if m.Match(n) { + result = append(result, n) + } } + return result } -// toLowerASCII returns s with all ASCII capital letters lowercased. -func toLowerASCII(s string) string { - var b []byte - for i := 0; i < len(s); i++ { - if c := s[i]; 'A' <= c && c <= 'Z' { - if b == nil { - b = make([]byte, len(s)) - copy(b, s) - } - b[i] = s[i] + ('a' - 'A') - } - } +type tagSelector struct { + tag string +} - if b == nil { - return s - } +// Matches elements with a given tag name. +func (t tagSelector) Match(n *html.Node) bool { + return n.Type == html.ElementNode && n.Data == t.tag +} - return string(b) +func (c tagSelector) Specificity() Specificity { + return Specificity{0, 0, 1} } -// attributeSelector returns a Selector that matches elements -// where the attribute named key satisifes the function f. -func attributeSelector(key string, f func(string) bool) Selector { - key = toLowerASCII(key) - return func(n *html.Node) bool { - if n.Type != html.ElementNode { - return false - } - for _, a := range n.Attr { - if a.Key == key && f(a.Val) { - return true - } - } - return false - } +func (c tagSelector) PseudoElement() string { + return "" } -// attributeExistsSelector returns a Selector that matches elements that have -// an attribute named key. -func attributeExistsSelector(key string) Selector { - return attributeSelector(key, func(string) bool { return true }) +type classSelector struct { + class string } -// attributeEqualsSelector returns a Selector that matches elements where -// the attribute named key has the value val. -func attributeEqualsSelector(key, val string) Selector { - return attributeSelector(key, - func(s string) bool { - return s == val - }) +// Matches elements by class attribute. +func (t classSelector) Match(n *html.Node) bool { + return matchAttribute(n, "class", func(s string) bool { + return matchInclude(t.class, s) + }) +} + +func (c classSelector) Specificity() Specificity { + return Specificity{0, 1, 0} +} + +func (c classSelector) PseudoElement() string { + return "" +} + +type idSelector struct { + id string +} + +// Matches elements by id attribute. +func (t idSelector) Match(n *html.Node) bool { + return matchAttribute(n, "id", func(s string) bool { + return s == t.id + }) +} + +func (c idSelector) Specificity() Specificity { + return Specificity{1, 0, 0} +} + +func (c idSelector) PseudoElement() string { + return "" +} + +type attrSelector struct { + key, val, operation string + regexp *regexp.Regexp +} + +// Matches elements by attribute value. +func (t attrSelector) Match(n *html.Node) bool { + switch t.operation { + case "": + return matchAttribute(n, t.key, func(string) bool { return true }) + case "=": + return matchAttribute(n, t.key, func(s string) bool { return s == t.val }) + case "!=": + return attributeNotEqualMatch(t.key, t.val, n) + case "~=": + // matches elements where the attribute named key is a whitespace-separated list that includes val. + return matchAttribute(n, t.key, func(s string) bool { return matchInclude(t.val, s) }) + case "|=": + return attributeDashMatch(t.key, t.val, n) + case "^=": + return attributePrefixMatch(t.key, t.val, n) + case "$=": + return attributeSuffixMatch(t.key, t.val, n) + case "*=": + return attributeSubstringMatch(t.key, t.val, n) + case "#=": + return attributeRegexMatch(t.key, t.regexp, n) + default: + panic(fmt.Sprintf("unsuported operation : %s", t.operation)) + } } -// attributeNotEqualSelector returns a Selector that matches elements where +// matches elements where the attribute named key satisifes the function f. +func matchAttribute(n *html.Node, key string, f func(string) bool) bool { + if n.Type != html.ElementNode { + return false + } + for _, a := range n.Attr { + if a.Key == key && f(a.Val) { + return true + } + } + return false +} + +// attributeNotEqualMatch matches elements where // the attribute named key does not have the value val. -func attributeNotEqualSelector(key, val string) Selector { - key = toLowerASCII(key) - return func(n *html.Node) bool { - if n.Type != html.ElementNode { +func attributeNotEqualMatch(key, val string, n *html.Node) bool { + if n.Type != html.ElementNode { + return false + } + for _, a := range n.Attr { + if a.Key == key && a.Val == val { return false } - for _, a := range n.Attr { - if a.Key == key && a.Val == val { - return false - } - } - return true } + return true } -// attributeIncludesSelector returns a Selector that matches elements where -// the attribute named key is a whitespace-separated list that includes val. -func attributeIncludesSelector(key, val string) Selector { - return attributeSelector(key, - func(s string) bool { - for s != "" { - i := strings.IndexAny(s, " \t\r\n\f") - if i == -1 { - return s == val - } - if s[:i] == val { - return true - } - s = s[i+1:] - } - return false - }) +// returns true if s is a whitespace-separated list that includes val. +func matchInclude(val, s string) bool { + for s != "" { + i := strings.IndexAny(s, " \t\r\n\f") + if i == -1 { + return s == val + } + if s[:i] == val { + return true + } + s = s[i+1:] + } + return false } -// attributeDashmatchSelector returns a Selector that matches elements where -// the attribute named key equals val or starts with val plus a hyphen. -func attributeDashmatchSelector(key, val string) Selector { - return attributeSelector(key, +// matches elements where the attribute named key equals val or starts with val plus a hyphen. +func attributeDashMatch(key, val string, n *html.Node) bool { + return matchAttribute(n, key, func(s string) bool { if s == val { return true @@ -223,10 +354,10 @@ func attributeDashmatchSelector(key, val string) Selector { }) } -// attributePrefixSelector returns a Selector that matches elements where +// attributePrefixMatch returns a Selector that matches elements where // the attribute named key starts with val. -func attributePrefixSelector(key, val string) Selector { - return attributeSelector(key, +func attributePrefixMatch(key, val string, n *html.Node) bool { + return matchAttribute(n, key, func(s string) bool { if strings.TrimSpace(s) == "" { return false @@ -235,10 +366,10 @@ func attributePrefixSelector(key, val string) Selector { }) } -// attributeSuffixSelector returns a Selector that matches elements where +// attributeSuffixMatch matches elements where // the attribute named key ends with val. -func attributeSuffixSelector(key, val string) Selector { - return attributeSelector(key, +func attributeSuffixMatch(key, val string, n *html.Node) bool { + return matchAttribute(n, key, func(s string) bool { if strings.TrimSpace(s) == "" { return false @@ -247,10 +378,10 @@ func attributeSuffixSelector(key, val string) Selector { }) } -// attributeSubstringSelector returns a Selector that matches nodes where +// attributeSubstringMatch matches nodes where // the attribute named key contains val. -func attributeSubstringSelector(key, val string) Selector { - return attributeSelector(key, +func attributeSubstringMatch(key, val string, n *html.Node) bool { + return matchAttribute(n, key, func(s string) bool { if strings.TrimSpace(s) == "" { return false @@ -259,39 +390,130 @@ func attributeSubstringSelector(key, val string) Selector { }) } -// attributeRegexSelector returns a Selector that matches nodes where +// attributeRegexMatch matches nodes where // the attribute named key matches the regular expression rx -func attributeRegexSelector(key string, rx *regexp.Regexp) Selector { - return attributeSelector(key, +func attributeRegexMatch(key string, rx *regexp.Regexp, n *html.Node) bool { + return matchAttribute(n, key, func(s string) bool { return rx.MatchString(s) }) } -// intersectionSelector returns a selector that matches nodes that match -// both a and b. -func intersectionSelector(a, b Selector) Selector { - return func(n *html.Node) bool { - return a(n) && b(n) +func (c attrSelector) Specificity() Specificity { + return Specificity{0, 1, 0} +} + +func (c attrSelector) PseudoElement() string { + return "" +} + +// ---------------- Pseudo class selectors ---------------- +// we use severals concrete types of pseudo-class selectors + +type relativePseudoClassSelector struct { + name string // one of "not", "has", "haschild" + match SelectorGroup +} + +func (s relativePseudoClassSelector) Match(n *html.Node) bool { + if n.Type != html.ElementNode { + return false + } + switch s.name { + case "not": + // matches elements that do not match a. + return !s.match.Match(n) + case "has": + // matches elements with any descendant that matches a. + return hasDescendantMatch(n, s.match) + case "haschild": + // matches elements with a child that matches a. + return hasChildMatch(n, s.match) + default: + panic(fmt.Sprintf("unsupported relative pseudo class selector : %s", s.name)) } } -// unionSelector returns a selector that matches elements that match -// either a or b. -func unionSelector(a, b Selector) Selector { - return func(n *html.Node) bool { - return a(n) || b(n) +// hasChildMatch returns whether n has any child that matches a. +func hasChildMatch(n *html.Node, a Matcher) bool { + for c := n.FirstChild; c != nil; c = c.NextSibling { + if a.Match(c) { + return true + } } + return false } -// negatedSelector returns a selector that matches elements that do not match a. -func negatedSelector(a Selector) Selector { - return func(n *html.Node) bool { - if n.Type != html.ElementNode { - return false +// hasDescendantMatch performs a depth-first search of n's descendants, +// testing whether any of them match a. It returns true as soon as a match is +// found, or false if no match is found. +func hasDescendantMatch(n *html.Node, a Matcher) bool { + for c := n.FirstChild; c != nil; c = c.NextSibling { + if a.Match(c) || (c.Type == html.ElementNode && hasDescendantMatch(c, a)) { + return true } - return !a(n) } + return false +} + +// Specificity returns the specificity of the most specific selectors +// in the pseudo-class arguments. +// See https://www.w3.org/TR/selectors/#specificity-rules +func (s relativePseudoClassSelector) Specificity() Specificity { + var max Specificity + for _, sel := range s.match { + newSpe := sel.Specificity() + if max.Less(newSpe) { + max = newSpe + } + } + return max +} + +func (c relativePseudoClassSelector) PseudoElement() string { + return "" +} + +type containsPseudoClassSelector struct { + own bool + value string +} + +func (s containsPseudoClassSelector) Match(n *html.Node) bool { + var text string + if s.own { + // matches nodes that directly contain the given text + text = strings.ToLower(nodeOwnText(n)) + } else { + // matches nodes that contain the given text. + text = strings.ToLower(nodeText(n)) + } + return strings.Contains(text, s.value) +} + +func (s containsPseudoClassSelector) Specificity() Specificity { + return Specificity{0, 1, 0} +} + +func (c containsPseudoClassSelector) PseudoElement() string { + return "" +} + +type regexpPseudoClassSelector struct { + own bool + regexp *regexp.Regexp +} + +func (s regexpPseudoClassSelector) Match(n *html.Node) bool { + var text string + if s.own { + // matches nodes whose text directly matches the specified regular expression + text = nodeOwnText(n) + } else { + // matches nodes whose text matches the specified regular expression + text = nodeText(n) + } + return s.regexp.MatchString(text) } // writeNodeText writes the text contained in n and its descendants to b. @@ -325,221 +547,214 @@ func nodeOwnText(n *html.Node) string { return b.String() } -// textSubstrSelector returns a selector that matches nodes that -// contain the given text. -func textSubstrSelector(val string) Selector { - return func(n *html.Node) bool { - text := strings.ToLower(nodeText(n)) - return strings.Contains(text, val) - } -} - -// ownTextSubstrSelector returns a selector that matches nodes that -// directly contain the given text -func ownTextSubstrSelector(val string) Selector { - return func(n *html.Node) bool { - text := strings.ToLower(nodeOwnText(n)) - return strings.Contains(text, val) - } -} - -// textRegexSelector returns a selector that matches nodes whose text matches -// the specified regular expression -func textRegexSelector(rx *regexp.Regexp) Selector { - return func(n *html.Node) bool { - return rx.MatchString(nodeText(n)) - } +func (s regexpPseudoClassSelector) Specificity() Specificity { + return Specificity{0, 1, 0} } -// ownTextRegexSelector returns a selector that matches nodes whose text -// directly matches the specified regular expression -func ownTextRegexSelector(rx *regexp.Regexp) Selector { - return func(n *html.Node) bool { - return rx.MatchString(nodeOwnText(n)) - } +func (c regexpPseudoClassSelector) PseudoElement() string { + return "" } -// hasChildSelector returns a selector that matches elements -// with a child that matches a. -func hasChildSelector(a Selector) Selector { - return func(n *html.Node) bool { - if n.Type != html.ElementNode { - return false - } - return hasChildMatch(n, a) - } +type nthPseudoClassSelector struct { + a, b int + last, ofType bool } -// hasDescendantSelector returns a selector that matches elements -// with any descendant that matches a. -func hasDescendantSelector(a Selector) Selector { - return func(n *html.Node) bool { - if n.Type != html.ElementNode { - return false +func (s nthPseudoClassSelector) Match(n *html.Node) bool { + if s.a == 0 { + if s.last { + return simpleNthLastChildMatch(s.b, s.ofType, n) + } else { + return simpleNthChildMatch(s.b, s.ofType, n) } - return hasDescendantMatch(n, a) } + return nthChildMatch(s.a, s.b, s.last, s.ofType, n) } -// nthChildSelector returns a selector that implements :nth-child(an+b). +// nthChildMatch implements :nth-child(an+b). // If last is true, implements :nth-last-child instead. // If ofType is true, implements :nth-of-type instead. -func nthChildSelector(a, b int, last, ofType bool) Selector { - return func(n *html.Node) bool { - if n.Type != html.ElementNode { - return false - } +func nthChildMatch(a, b int, last, ofType bool, n *html.Node) bool { + if n.Type != html.ElementNode { + return false + } - parent := n.Parent - if parent == nil { - return false - } + parent := n.Parent + if parent == nil { + return false + } - if parent.Type == html.DocumentNode { - return false - } + if parent.Type == html.DocumentNode { + return false + } - i := -1 - count := 0 - for c := parent.FirstChild; c != nil; c = c.NextSibling { - if (c.Type != html.ElementNode) || (ofType && c.Data != n.Data) { - continue - } - count++ - if c == n { - i = count - if !last { - break - } - } + i := -1 + count := 0 + for c := parent.FirstChild; c != nil; c = c.NextSibling { + if (c.Type != html.ElementNode) || (ofType && c.Data != n.Data) { + continue } - - if i == -1 { - // This shouldn't happen, since n should always be one of its parent's children. - return false + count++ + if c == n { + i = count + if !last { + break + } } + } - if last { - i = count - i + 1 - } + if i == -1 { + // This shouldn't happen, since n should always be one of its parent's children. + return false + } - i -= b - if a == 0 { - return i == 0 - } + if last { + i = count - i + 1 + } - return i%a == 0 && i/a >= 0 + i -= b + if a == 0 { + return i == 0 } + + return i%a == 0 && i/a >= 0 } -// simpleNthChildSelector returns a selector that implements :nth-child(b). +// simpleNthChildMatch implements :nth-child(b). // If ofType is true, implements :nth-of-type instead. -func simpleNthChildSelector(b int, ofType bool) Selector { - return func(n *html.Node) bool { - if n.Type != html.ElementNode { - return false - } +func simpleNthChildMatch(b int, ofType bool, n *html.Node) bool { + if n.Type != html.ElementNode { + return false + } - parent := n.Parent - if parent == nil { - return false - } + parent := n.Parent + if parent == nil { + return false + } + + if parent.Type == html.DocumentNode { + return false + } - if parent.Type == html.DocumentNode { + count := 0 + for c := parent.FirstChild; c != nil; c = c.NextSibling { + if c.Type != html.ElementNode || (ofType && c.Data != n.Data) { + continue + } + count++ + if c == n { + return count == b + } + if count >= b { return false } + } + return false +} - count := 0 - for c := parent.FirstChild; c != nil; c = c.NextSibling { - if c.Type != html.ElementNode || (ofType && c.Data != n.Data) { - continue - } - count++ - if c == n { - return count == b - } - if count >= b { - return false - } - } +// simpleNthLastChildMatch implements :nth-last-child(b). +// If ofType is true, implements :nth-last-of-type instead. +func simpleNthLastChildMatch(b int, ofType bool, n *html.Node) bool { + if n.Type != html.ElementNode { return false } -} -// simpleNthLastChildSelector returns a selector that implements -// :nth-last-child(b). If ofType is true, implements :nth-last-of-type -// instead. -func simpleNthLastChildSelector(b int, ofType bool) Selector { - return func(n *html.Node) bool { - if n.Type != html.ElementNode { - return false - } + parent := n.Parent + if parent == nil { + return false + } - parent := n.Parent - if parent == nil { - return false - } + if parent.Type == html.DocumentNode { + return false + } - if parent.Type == html.DocumentNode { + count := 0 + for c := parent.LastChild; c != nil; c = c.PrevSibling { + if c.Type != html.ElementNode || (ofType && c.Data != n.Data) { + continue + } + count++ + if c == n { + return count == b + } + if count >= b { return false } + } + return false +} - count := 0 - for c := parent.LastChild; c != nil; c = c.PrevSibling { - if c.Type != html.ElementNode || (ofType && c.Data != n.Data) { - continue - } - count++ - if c == n { - return count == b - } - if count >= b { - return false - } - } +// Specificity for nth-child pseudo-class. +// Does not support a list of selectors +func (s nthPseudoClassSelector) Specificity() Specificity { + return Specificity{0, 1, 0} +} + +func (c nthPseudoClassSelector) PseudoElement() string { + return "" +} + +type onlyChildPseudoClassSelector struct { + ofType bool +} + +// Match implements :only-child. +// If `ofType` is true, it implements :only-of-type instead. +func (s onlyChildPseudoClassSelector) Match(n *html.Node) bool { + if n.Type != html.ElementNode { return false } -} -// onlyChildSelector returns a selector that implements :only-child. -// If ofType is true, it implements :only-of-type instead. -func onlyChildSelector(ofType bool) Selector { - return func(n *html.Node) bool { - if n.Type != html.ElementNode { - return false - } + parent := n.Parent + if parent == nil { + return false + } - parent := n.Parent - if parent == nil { - return false - } + if parent.Type == html.DocumentNode { + return false + } - if parent.Type == html.DocumentNode { + count := 0 + for c := parent.FirstChild; c != nil; c = c.NextSibling { + if (c.Type != html.ElementNode) || (s.ofType && c.Data != n.Data) { + continue + } + count++ + if count > 1 { return false } + } - count := 0 - for c := parent.FirstChild; c != nil; c = c.NextSibling { - if (c.Type != html.ElementNode) || (ofType && c.Data != n.Data) { - continue - } - count++ - if count > 1 { - return false - } - } + return count == 1 +} - return count == 1 - } +func (s onlyChildPseudoClassSelector) Specificity() Specificity { + return Specificity{0, 1, 0} } -// inputSelector is a Selector that matches input, select, textarea and button elements. -func inputSelector(n *html.Node) bool { +func (c onlyChildPseudoClassSelector) PseudoElement() string { + return "" +} + +type inputPseudoClassSelector struct{} + +// Matches input, select, textarea and button elements. +func (s inputPseudoClassSelector) Match(n *html.Node) bool { return n.Type == html.ElementNode && (n.Data == "input" || n.Data == "select" || n.Data == "textarea" || n.Data == "button") } -// emptyElementSelector is a Selector that matches empty elements. -func emptyElementSelector(n *html.Node) bool { +func (s inputPseudoClassSelector) Specificity() Specificity { + return Specificity{0, 1, 0} +} + +func (c inputPseudoClassSelector) PseudoElement() string { + return "" +} + +type emptyElementPseudoClassSelector struct{} + +// Matches empty elements. +func (s emptyElementPseudoClassSelector) Match(n *html.Node) bool { if n.Type != html.ElementNode { return false } @@ -554,69 +769,170 @@ func emptyElementSelector(n *html.Node) bool { return true } -// descendantSelector returns a Selector that matches an element if -// it matches d and has an ancestor that matches a. -func descendantSelector(a, d Selector) Selector { - return func(n *html.Node) bool { - if !d(n) { +func (s emptyElementPseudoClassSelector) Specificity() Specificity { + return Specificity{0, 1, 0} +} + +func (c emptyElementPseudoClassSelector) PseudoElement() string { + return "" +} + +type rootPseudoClassSelector struct{} + +// Match implements :root +func (s rootPseudoClassSelector) Match(n *html.Node) bool { + if n.Type != html.ElementNode { + return false + } + if n.Parent == nil { + return false + } + return n.Parent.Type == html.DocumentNode +} + +func (s rootPseudoClassSelector) Specificity() Specificity { + return Specificity{0, 1, 0} +} + +func (c rootPseudoClassSelector) PseudoElement() string { + return "" +} + +type compoundSelector struct { + selectors []Sel + pseudoElement string +} + +// Matches elements if each sub-selectors matches. +func (t compoundSelector) Match(n *html.Node) bool { + if len(t.selectors) == 0 { + return n.Type == html.ElementNode + } + + for _, sel := range t.selectors { + if !sel.Match(n) { return false } + } + return true +} - for p := n.Parent; p != nil; p = p.Parent { - if a(p) { - return true - } - } +func (s compoundSelector) Specificity() Specificity { + var out Specificity + for _, sel := range s.selectors { + out = out.Add(sel.Specificity()) + } + if s.pseudoElement != "" { + // https://drafts.csswg.org/selectors-3/#specificity + out = out.Add(Specificity{0, 0, 1}) + } + return out +} - return false +func (c compoundSelector) PseudoElement() string { + return c.pseudoElement +} + +type combinedSelector struct { + first Sel + combinator byte + second Sel +} + +func (t combinedSelector) Match(n *html.Node) bool { + if t.first == nil { + return false // maybe we should panic + } + switch t.combinator { + case 0: + return t.first.Match(n) + case ' ': + return descendantMatch(t.first, t.second, n) + case '>': + return childMatch(t.first, t.second, n) + case '+': + return siblingMatch(t.first, t.second, true, n) + case '~': + return siblingMatch(t.first, t.second, false, n) + default: + panic("unknown combinator") } } -// childSelector returns a Selector that matches an element if -// it matches d and its parent matches a. -func childSelector(a, d Selector) Selector { - return func(n *html.Node) bool { - return d(n) && n.Parent != nil && a(n.Parent) +// matches an element if it matches d and has an ancestor that matches a. +func descendantMatch(a, d Matcher, n *html.Node) bool { + if !d.Match(n) { + return false } + + for p := n.Parent; p != nil; p = p.Parent { + if a.Match(p) { + return true + } + } + + return false } -// siblingSelector returns a Selector that matches an element -// if it matches s2 and in is preceded by an element that matches s1. +// matches an element if it matches d and its parent matches a. +func childMatch(a, d Matcher, n *html.Node) bool { + return d.Match(n) && n.Parent != nil && a.Match(n.Parent) +} + +// matches an element if it matches s2 and is preceded by an element that matches s1. // If adjacent is true, the sibling must be immediately before the element. -func siblingSelector(s1, s2 Selector, adjacent bool) Selector { - return func(n *html.Node) bool { - if !s2(n) { - return false - } +func siblingMatch(s1, s2 Matcher, adjacent bool, n *html.Node) bool { + if !s2.Match(n) { + return false + } - if adjacent { - for n = n.PrevSibling; n != nil; n = n.PrevSibling { - if n.Type == html.TextNode || n.Type == html.CommentNode { - continue - } - return s1(n) + if adjacent { + for n = n.PrevSibling; n != nil; n = n.PrevSibling { + if n.Type == html.TextNode || n.Type == html.CommentNode { + continue } - return false + return s1.Match(n) } + return false + } - // Walk backwards looking for element that matches s1 - for c := n.PrevSibling; c != nil; c = c.PrevSibling { - if s1(c) { - return true - } + // Walk backwards looking for element that matches s1 + for c := n.PrevSibling; c != nil; c = c.PrevSibling { + if s1.Match(c) { + return true } + } - return false + return false +} + +func (s combinedSelector) Specificity() Specificity { + spec := s.first.Specificity() + if s.second != nil { + spec = spec.Add(s.second.Specificity()) } + return spec } -// rootSelector implements :root -func rootSelector(n *html.Node) bool { - if n.Type != html.ElementNode { - return false +// on combinedSelector, a pseudo-element only makes sens on the last +// selector, although others increase specificity. +func (c combinedSelector) PseudoElement() string { + if c.second == nil { + return "" } - if n.Parent == nil { - return false + return c.second.PseudoElement() +} + +// A SelectorGroup is a list of selectors, which matches if any of the +// individual selectors matches. +type SelectorGroup []Sel + +// Match returns true if the node matches one of the single selectors. +func (s SelectorGroup) Match(n *html.Node) bool { + for _, sel := range s { + if sel.Match(n) { + return true + } } - return n.Parent.Type == html.DocumentNode + return false } diff --git a/vendor/github.com/andybalholm/cascadia/serialize.go b/vendor/github.com/andybalholm/cascadia/serialize.go new file mode 100644 index 0000000..f15b079 --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/serialize.go @@ -0,0 +1,120 @@ +package cascadia + +import ( + "fmt" + "strings" +) + +// implements the reverse operation Sel -> string + +func (c tagSelector) String() string { + return c.tag +} + +func (c idSelector) String() string { + return "#" + c.id +} + +func (c classSelector) String() string { + return "." + c.class +} + +func (c attrSelector) String() string { + val := c.val + if c.operation == "#=" { + val = c.regexp.String() + } else if c.operation != "" { + val = fmt.Sprintf(`"%s"`, val) + } + return fmt.Sprintf(`[%s%s%s]`, c.key, c.operation, val) +} + +func (c relativePseudoClassSelector) String() string { + return fmt.Sprintf(":%s(%s)", c.name, c.match.String()) +} +func (c containsPseudoClassSelector) String() string { + s := "contains" + if c.own { + s += "Own" + } + return fmt.Sprintf(`:%s("%s")`, s, c.value) +} +func (c regexpPseudoClassSelector) String() string { + s := "matches" + if c.own { + s += "Own" + } + return fmt.Sprintf(":%s(%s)", s, c.regexp.String()) +} +func (c nthPseudoClassSelector) String() string { + if c.a == 0 && c.b == 1 { // special cases + s := ":first-" + if c.last { + s = ":last-" + } + if c.ofType { + s += "of-type" + } else { + s += "child" + } + return s + } + var name string + switch [2]bool{c.last, c.ofType} { + case [2]bool{true, true}: + name = "nth-last-of-type" + case [2]bool{true, false}: + name = "nth-last-child" + case [2]bool{false, true}: + name = "nth-of-type" + case [2]bool{false, false}: + name = "nth-child" + } + return fmt.Sprintf(":%s(%dn+%d)", name, c.a, c.b) +} +func (c onlyChildPseudoClassSelector) String() string { + if c.ofType { + return ":only-of-type" + } + return ":only-child" +} +func (c inputPseudoClassSelector) String() string { + return ":input" +} +func (c emptyElementPseudoClassSelector) String() string { + return ":empty" +} +func (c rootPseudoClassSelector) String() string { + return ":root" +} + +func (c compoundSelector) String() string { + if len(c.selectors) == 0 && c.pseudoElement == "" { + return "*" + } + chunks := make([]string, len(c.selectors)) + for i, sel := range c.selectors { + chunks[i] = sel.String() + } + s := strings.Join(chunks, "") + if c.pseudoElement != "" { + s += "::" + c.pseudoElement + } + return s +} + +func (c combinedSelector) String() string { + start := c.first.String() + if c.second != nil { + start += fmt.Sprintf(" %s %s", string(c.combinator), c.second.String()) + } + return start +} + +func (c SelectorGroup) String() string { + ck := make([]string, len(c)) + for i, s := range c { + ck[i] = s.String() + } + return strings.Join(ck, ", ") +} diff --git a/vendor/github.com/andybalholm/cascadia/specificity.go b/vendor/github.com/andybalholm/cascadia/specificity.go new file mode 100644 index 0000000..8db864f --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/specificity.go @@ -0,0 +1,26 @@ +package cascadia + +// Specificity is the CSS specificity as defined in +// https://www.w3.org/TR/selectors/#specificity-rules +// with the convention Specificity = [A,B,C]. +type Specificity [3]int + +// returns `true` if s < other (strictly), false otherwise +func (s Specificity) Less(other Specificity) bool { + for i := range s { + if s[i] < other[i] { + return true + } + if s[i] > other[i] { + return false + } + } + return false +} + +func (s Specificity) Add(other Specificity) Specificity { + for i, sp := range other { + s[i] += sp + } + return s +} diff --git a/vendor/golang.org/x/net/html/const.go b/vendor/golang.org/x/net/html/const.go index a3a918f..ff7acf2 100644 --- a/vendor/golang.org/x/net/html/const.go +++ b/vendor/golang.org/x/net/html/const.go @@ -52,8 +52,7 @@ var isSpecialElementMap = map[string]bool{ "iframe": true, "img": true, "input": true, - "isindex": true, // The 'isindex' element has been removed, but keep it for backwards compatibility. - "keygen": true, + "keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility. "li": true, "link": true, "listing": true, diff --git a/vendor/golang.org/x/net/html/foreign.go b/vendor/golang.org/x/net/html/foreign.go index 01477a9..9da9e9d 100644 --- a/vendor/golang.org/x/net/html/foreign.go +++ b/vendor/golang.org/x/net/html/foreign.go @@ -161,66 +161,62 @@ var mathMLAttributeAdjustments = map[string]string{ } var svgAttributeAdjustments = map[string]string{ - "attributename": "attributeName", - "attributetype": "attributeType", - "basefrequency": "baseFrequency", - "baseprofile": "baseProfile", - "calcmode": "calcMode", - "clippathunits": "clipPathUnits", - "contentscripttype": "contentScriptType", - "contentstyletype": "contentStyleType", - "diffuseconstant": "diffuseConstant", - "edgemode": "edgeMode", - "externalresourcesrequired": "externalResourcesRequired", - "filterres": "filterRes", - "filterunits": "filterUnits", - "glyphref": "glyphRef", - "gradienttransform": "gradientTransform", - "gradientunits": "gradientUnits", - "kernelmatrix": "kernelMatrix", - "kernelunitlength": "kernelUnitLength", - "keypoints": "keyPoints", - "keysplines": "keySplines", - "keytimes": "keyTimes", - "lengthadjust": "lengthAdjust", - "limitingconeangle": "limitingConeAngle", - "markerheight": "markerHeight", - "markerunits": "markerUnits", - "markerwidth": "markerWidth", - "maskcontentunits": "maskContentUnits", - "maskunits": "maskUnits", - "numoctaves": "numOctaves", - "pathlength": "pathLength", - "patterncontentunits": "patternContentUnits", - "patterntransform": "patternTransform", - "patternunits": "patternUnits", - "pointsatx": "pointsAtX", - "pointsaty": "pointsAtY", - "pointsatz": "pointsAtZ", - "preservealpha": "preserveAlpha", - "preserveaspectratio": "preserveAspectRatio", - "primitiveunits": "primitiveUnits", - "refx": "refX", - "refy": "refY", - "repeatcount": "repeatCount", - "repeatdur": "repeatDur", - "requiredextensions": "requiredExtensions", - "requiredfeatures": "requiredFeatures", - "specularconstant": "specularConstant", - "specularexponent": "specularExponent", - "spreadmethod": "spreadMethod", - "startoffset": "startOffset", - "stddeviation": "stdDeviation", - "stitchtiles": "stitchTiles", - "surfacescale": "surfaceScale", - "systemlanguage": "systemLanguage", - "tablevalues": "tableValues", - "targetx": "targetX", - "targety": "targetY", - "textlength": "textLength", - "viewbox": "viewBox", - "viewtarget": "viewTarget", - "xchannelselector": "xChannelSelector", - "ychannelselector": "yChannelSelector", - "zoomandpan": "zoomAndPan", + "attributename": "attributeName", + "attributetype": "attributeType", + "basefrequency": "baseFrequency", + "baseprofile": "baseProfile", + "calcmode": "calcMode", + "clippathunits": "clipPathUnits", + "diffuseconstant": "diffuseConstant", + "edgemode": "edgeMode", + "filterunits": "filterUnits", + "glyphref": "glyphRef", + "gradienttransform": "gradientTransform", + "gradientunits": "gradientUnits", + "kernelmatrix": "kernelMatrix", + "kernelunitlength": "kernelUnitLength", + "keypoints": "keyPoints", + "keysplines": "keySplines", + "keytimes": "keyTimes", + "lengthadjust": "lengthAdjust", + "limitingconeangle": "limitingConeAngle", + "markerheight": "markerHeight", + "markerunits": "markerUnits", + "markerwidth": "markerWidth", + "maskcontentunits": "maskContentUnits", + "maskunits": "maskUnits", + "numoctaves": "numOctaves", + "pathlength": "pathLength", + "patterncontentunits": "patternContentUnits", + "patterntransform": "patternTransform", + "patternunits": "patternUnits", + "pointsatx": "pointsAtX", + "pointsaty": "pointsAtY", + "pointsatz": "pointsAtZ", + "preservealpha": "preserveAlpha", + "preserveaspectratio": "preserveAspectRatio", + "primitiveunits": "primitiveUnits", + "refx": "refX", + "refy": "refY", + "repeatcount": "repeatCount", + "repeatdur": "repeatDur", + "requiredextensions": "requiredExtensions", + "requiredfeatures": "requiredFeatures", + "specularconstant": "specularConstant", + "specularexponent": "specularExponent", + "spreadmethod": "spreadMethod", + "startoffset": "startOffset", + "stddeviation": "stdDeviation", + "stitchtiles": "stitchTiles", + "surfacescale": "surfaceScale", + "systemlanguage": "systemLanguage", + "tablevalues": "tableValues", + "targetx": "targetX", + "targety": "targetY", + "textlength": "textLength", + "viewbox": "viewBox", + "viewtarget": "viewTarget", + "xchannelselector": "xChannelSelector", + "ychannelselector": "yChannelSelector", + "zoomandpan": "zoomAndPan", } diff --git a/vendor/golang.org/x/net/html/node.go b/vendor/golang.org/x/net/html/node.go index 633ee15..1350eef 100644 --- a/vendor/golang.org/x/net/html/node.go +++ b/vendor/golang.org/x/net/html/node.go @@ -18,6 +18,11 @@ const ( ElementNode CommentNode DoctypeNode + // RawNode nodes are not returned by the parser, but can be part of the + // Node tree passed to func Render to insert raw HTML (without escaping). + // If so, this package makes no guarantee that the rendered HTML is secure + // (from e.g. Cross Site Scripting attacks) or well-formed. + RawNode scopeMarkerNode ) diff --git a/vendor/golang.org/x/net/html/parse.go b/vendor/golang.org/x/net/html/parse.go index 992cff2..038941d 100644 --- a/vendor/golang.org/x/net/html/parse.go +++ b/vendor/golang.org/x/net/html/parse.go @@ -184,6 +184,17 @@ func (p *parser) clearStackToContext(s scope) { } } +// parseGenericRawTextElements implements the generic raw text element parsing +// algorithm defined in 12.2.6.2. +// https://html.spec.whatwg.org/multipage/parsing.html#parsing-elements-that-contain-only-text +// TODO: Since both RAWTEXT and RCDATA states are treated as tokenizer's part +// officially, need to make tokenizer consider both states. +func (p *parser) parseGenericRawTextElement() { + p.addElement() + p.originalIM = p.im + p.im = textIM +} + // generateImpliedEndTags pops nodes off the stack of open elements as long as // the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc. // If exceptions are specified, nodes with that name will not be popped off. @@ -192,16 +203,17 @@ func (p *parser) generateImpliedEndTags(exceptions ...string) { loop: for i = len(p.oe) - 1; i >= 0; i-- { n := p.oe[i] - if n.Type == ElementNode { - switch n.DataAtom { - case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc: - for _, except := range exceptions { - if n.Data == except { - break loop - } + if n.Type != ElementNode { + break + } + switch n.DataAtom { + case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc: + for _, except := range exceptions { + if n.Data == except { + break loop } - continue } + continue } break } @@ -369,8 +381,7 @@ findIdenticalElements: // Section 12.2.4.3. func (p *parser) clearActiveFormattingElements() { for { - n := p.afe.pop() - if len(p.afe) == 0 || n.Type == scopeMarkerNode { + if n := p.afe.pop(); len(p.afe) == 0 || n.Type == scopeMarkerNode { return } } @@ -625,29 +636,51 @@ func inHeadIM(p *parser) bool { switch p.tok.DataAtom { case a.Html: return inBodyIM(p) - case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta: + case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta: p.addElement() p.oe.pop() p.acknowledgeSelfClosingTag() return true case a.Noscript: - p.addElement() if p.scripting { - p.setOriginalIM() - p.im = textIM - } else { - p.im = inHeadNoscriptIM + p.parseGenericRawTextElement() + return true } + p.addElement() + p.im = inHeadNoscriptIM + // Don't let the tokenizer go into raw text mode when scripting is disabled. + p.tokenizer.NextIsNotRawText() return true - case a.Script, a.Title, a.Noframes, a.Style: + case a.Script, a.Title: p.addElement() p.setOriginalIM() p.im = textIM return true + case a.Noframes, a.Style: + p.parseGenericRawTextElement() + return true case a.Head: // Ignore the token. return true case a.Template: + // TODO: remove this divergence from the HTML5 spec. + // + // We don't handle all of the corner cases when mixing foreign + // content (i.e. or ) with