diff options
Diffstat (limited to 'vendor/github.com/PuerkitoBio')
-rw-r--r-- | vendor/github.com/PuerkitoBio/goquery/.travis.yml | 29 | ||||
-rw-r--r-- | vendor/github.com/PuerkitoBio/goquery/LICENSE | 2 | ||||
-rw-r--r-- | vendor/github.com/PuerkitoBio/goquery/README.md | 46 | ||||
-rw-r--r-- | vendor/github.com/PuerkitoBio/goquery/go.mod | 6 | ||||
-rw-r--r-- | vendor/github.com/PuerkitoBio/goquery/go.sum | 13 | ||||
-rw-r--r-- | vendor/github.com/PuerkitoBio/goquery/manipulation.go | 167 | ||||
-rw-r--r-- | vendor/github.com/PuerkitoBio/goquery/type.go | 64 | ||||
-rw-r--r-- | vendor/github.com/PuerkitoBio/goquery/utilities.go | 18 |
8 files changed, 280 insertions, 65 deletions
diff --git a/vendor/github.com/PuerkitoBio/goquery/.travis.yml b/vendor/github.com/PuerkitoBio/goquery/.travis.yml index cc1402d..8430c86 100644 --- a/vendor/github.com/PuerkitoBio/goquery/.travis.yml +++ b/vendor/github.com/PuerkitoBio/goquery/.travis.yml @@ -1,16 +1,31 @@ +arch: + - amd64 + - ppc64le language: go go: - - 1.1 - - 1.2.x - - 1.3.x - - 1.4.x - - 1.5.x - - 1.6.x - 1.7.x - 1.8.x - 1.9.x - - "1.10.x" + - 1.10.x - 1.11.x + - 1.12.x + - 1.13.x + - 1.14.x + - 1.15.x - tip +jobs: + exclude: + - arch: ppc64le + go: 1.7.x + - arch: ppc64le + go: 1.8.x + - arch: ppc64le + go: 1.9.x + - arch: ppc64le + go: 1.10.x + - arch: ppc64le + go: 1.11.x + - arch: ppc64le + go: 1.12.x diff --git a/vendor/github.com/PuerkitoBio/goquery/LICENSE b/vendor/github.com/PuerkitoBio/goquery/LICENSE index f743d37..25372c2 100644 --- a/vendor/github.com/PuerkitoBio/goquery/LICENSE +++ b/vendor/github.com/PuerkitoBio/goquery/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2012-2016, Martin Angers & Contributors +Copyright (c) 2012-2021, Martin Angers & Contributors All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/vendor/github.com/PuerkitoBio/goquery/README.md b/vendor/github.com/PuerkitoBio/goquery/README.md index 84f9af3..6bb185c 100644 --- a/vendor/github.com/PuerkitoBio/goquery/README.md +++ b/vendor/github.com/PuerkitoBio/goquery/README.md @@ -1,5 +1,9 @@ # goquery - a little like that j-thing, only in Go -[![build status](https://secure.travis-ci.org/PuerkitoBio/goquery.svg?branch=master)](http://travis-ci.org/PuerkitoBio/goquery) [![GoDoc](https://godoc.org/github.com/PuerkitoBio/goquery?status.png)](http://godoc.org/github.com/PuerkitoBio/goquery) [![Sourcegraph Badge](https://sourcegraph.com/github.com/PuerkitoBio/goquery/-/badge.svg)](https://sourcegraph.com/github.com/PuerkitoBio/goquery?badge) + +[![builds.sr.ht status](https://builds.sr.ht/~mna/goquery/commits/fedora.yml.svg)](https://builds.sr.ht/~mna/goquery/commits/fedora.yml?) +[![build status](https://secure.travis-ci.org/PuerkitoBio/goquery.svg?branch=master)](http://travis-ci.org/PuerkitoBio/goquery) +[![Go Reference](https://pkg.go.dev/badge/github.com/PuerkitoBio/goquery.svg)](https://pkg.go.dev/github.com/PuerkitoBio/goquery) +[![Sourcegraph Badge](https://sourcegraph.com/github.com/PuerkitoBio/goquery/-/badge.svg)](https://sourcegraph.com/github.com/PuerkitoBio/goquery?badge) goquery brings a syntax and a set of features similar to [jQuery][] to the [Go language][go]. It is based on Go's [net/html package][html] and the CSS Selector library [cascadia][]. Since the net/html parser returns nodes, and not a full-featured DOM tree, jQuery's stateful manipulation functions (like height(), css(), detach()) have been left off. @@ -19,7 +23,7 @@ Syntax-wise, it is as close as possible to jQuery, with the same function names ## Installation -Please note that because of the net/html dependency, goquery requires Go1.1+. +Please note that because of the net/html dependency, goquery requires Go1.1+ and is tested on Go1.7+. $ go get github.com/PuerkitoBio/goquery @@ -37,6 +41,11 @@ Please note that because of the net/html dependency, goquery requires Go1.1+. **Note that goquery's API is now stable, and will not break.** +* **2021-07-11 (v1.7.1)** : Update go.mod dependencies and add dependabot config (thanks [@jauderho](https://github.com/jauderho)). +* **2021-06-14 (v1.7.0)** : Add `Single` and `SingleMatcher` functions to optimize first-match selection (thanks [@gdollardollar](https://github.com/gdollardollar)). +* **2021-01-11 (v1.6.1)** : Fix panic when calling `{Prepend,Append,Set}Html` on a `Selection` that contains non-Element nodes. +* **2020-10-08 (v1.6.0)** : Parse html in context of the container node for all functions that deal with html strings (`AfterHtml`, `AppendHtml`, etc.). Thanks to [@thiemok][thiemok] and [@davidjwilkins][djw] for their work on this. +* **2020-02-04 (v1.5.1)** : Update module dependencies. * **2018-11-15 (v1.5.0)** : Go module support (thanks @Zaba505). * **2018-06-07 (v1.4.1)** : Add `NewDocumentFromReader` examples. * **2018-03-24 (v1.4.0)** : Deprecate `NewDocument(url)` and `NewDocumentFromResponse(response)`. @@ -47,7 +56,7 @@ Please note that because of the net/html dependency, goquery requires Go1.1+. * **2016-08-28 (v1.0.1)** : Optimize performance for large documents. * **2016-07-27 (v1.0.0)** : Tag version 1.0.0. * **2016-06-15** : Invalid selector strings internally compile to a `Matcher` implementation that never matches any node (instead of a panic). So for example, `doc.Find("~")` returns an empty `*Selection` object. -* **2016-02-02** : Add `NodeName` utility function similar to the DOM's `nodeName` property. It returns the tag name of the first element in a selection, and other relevant values of non-element nodes (see godoc for details). Add `OuterHtml` utility function similar to the DOM's `outerHTML` property (named `OuterHtml` in small caps for consistency with the existing `Html` method on the `Selection`). +* **2016-02-02** : Add `NodeName` utility function similar to the DOM's `nodeName` property. It returns the tag name of the first element in a selection, and other relevant values of non-element nodes (see [doc][] for details). Add `OuterHtml` utility function similar to the DOM's `outerHTML` property (named `OuterHtml` in small caps for consistency with the existing `Html` method on the `Selection`). * **2015-04-20** : Add `AttrOr` helper method to return the attribute's value or a default value if absent. Thanks to [piotrkowalczuk][piotr]. * **2015-02-04** : Add more manipulation functions - Prepend* - thanks again to [Andrew Stone][thatguystone]. * **2014-11-28** : Add more manipulation functions - ReplaceWith*, Wrap* and Unwrap - thanks again to [Andrew Stone][thatguystone]. @@ -76,7 +85,7 @@ jQuery often has many variants for the same function (no argument, a selector st Utility functions that are not in jQuery but are useful in Go are implemented as functions (that take a `*Selection` as parameter), to avoid a potential naming clash on the `*Selection`'s methods (reserved for jQuery-equivalent behaviour). -The complete [godoc reference documentation can be found here][doc]. +The complete [package reference documentation can be found here][doc]. Please note that Cascadia's selectors do not necessarily match all supported selectors of jQuery (Sizzle). See the [cascadia project][cascadia] for details. Invalid selector strings compile to a `Matcher` that fails to match any node. Behaviour of the various functions that take a selector string as argument follows from that fact, e.g. (where `~` is an invalid selector string): @@ -120,12 +129,11 @@ func ExampleScrape() { } // Find the review items - doc.Find(".sidebar-reviews article .content-block").Each(func(i int, s *goquery.Selection) { - // For each item found, get the band and title - band := s.Find("a").Text() - title := s.Find("i").Text() - fmt.Printf("Review %d: %s - %s\n", i, band, title) - }) + doc.Find(".left-content article .post-title").Each(func(i int, s *goquery.Selection) { + // For each item found, get the title + title := s.Find("a").Text() + fmt.Printf("Review %d: %s\n", i, title) + }) } func main() { @@ -138,9 +146,14 @@ func main() { - [Goq][goq], an HTML deserialization and scraping library based on goquery and struct tags. - [andybalholm/cascadia][cascadia], the CSS selector library used by goquery. - [suntong/cascadia][cascadiacli], a command-line interface to the cascadia CSS selector library, useful to test selectors. -- [asciimoo/colly](https://github.com/asciimoo/colly), a lightning fast and elegant Scraping Framework +- [gocolly/colly](https://github.com/gocolly/colly), a lightning fast and elegant Scraping Framework - [gnulnx/goperf](https://github.com/gnulnx/goperf), a website performance test tool that also fetches static assets. - [MontFerret/ferret](https://github.com/MontFerret/ferret), declarative web scraping. +- [tacusci/berrycms](https://github.com/tacusci/berrycms), a modern simple to use CMS with easy to write plugins +- [Dataflow kit](https://github.com/slotix/dataflowkit), Web Scraping framework for Gophers. +- [Geziyor](https://github.com/geziyor/geziyor), a fast web crawling & scraping framework for Go. Supports JS rendering. +- [Pagser](https://github.com/foolin/pagser), a simple, easy, extensible, configurable HTML parser to struct based on goquery and struct tags. +- [stitcherd](https://github.com/vhodges/stitcherd), A server for doing server side includes using css selectors and DOM updates. ## Support @@ -153,8 +166,9 @@ There are a number of ways you can support the project: * Pull requests: please discuss new code in an issue first, unless the fix is really trivial. - Make sure new code is tested. - Be mindful of existing code - PRs that break existing code have a high probability of being declined, unless it fixes a serious issue. - -If you desperately want to send money my way, I have a BuyMeACoffee.com page: +* Sponsor the developer + - See the Github Sponsor button at the top of the repo on github + - or via BuyMeACoffee.com, below <a href="https://www.buymeacoffee.com/mna" target="_blank"><img src="https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png" alt="Buy Me A Coffee" style="height: 41px !important;width: 174px !important;box-shadow: 0px 3px 2px 0px rgba(190, 190, 190, 0.5) !important;-webkit-box-shadow: 0px 3px 2px 0px rgba(190, 190, 190, 0.5) !important;" ></a> @@ -169,11 +183,13 @@ The [BSD 3-Clause license][bsd], the same as the [Go language][golic]. Cascadia' [bsd]: http://opensource.org/licenses/BSD-3-Clause [golic]: http://golang.org/LICENSE [caslic]: https://github.com/andybalholm/cascadia/blob/master/LICENSE -[doc]: http://godoc.org/github.com/PuerkitoBio/goquery +[doc]: https://pkg.go.dev/github.com/PuerkitoBio/goquery [index]: http://api.jquery.com/index/ [gonet]: https://github.com/golang/net/ -[html]: http://godoc.org/golang.org/x/net/html +[html]: https://pkg.go.dev/golang.org/x/net/html [wiki]: https://github.com/PuerkitoBio/goquery/wiki/Tips-and-tricks [thatguystone]: https://github.com/thatguystone [piotr]: https://github.com/piotrkowalczuk [goq]: https://github.com/andrewstuart/goq +[thiemok]: https://github.com/thiemok +[djw]: https://github.com/davidjwilkins diff --git a/vendor/github.com/PuerkitoBio/goquery/go.mod b/vendor/github.com/PuerkitoBio/goquery/go.mod index 2fa1332..3af3b15 100644 --- a/vendor/github.com/PuerkitoBio/goquery/go.mod +++ b/vendor/github.com/PuerkitoBio/goquery/go.mod @@ -1,6 +1,8 @@ module github.com/PuerkitoBio/goquery require ( - github.com/andybalholm/cascadia v1.0.0 - golang.org/x/net v0.0.0-20181114220301-adae6a3d119a + github.com/andybalholm/cascadia v1.2.0 + golang.org/x/net v0.0.0-20210614182718-04defd469f4e ) + +go 1.13 diff --git a/vendor/github.com/PuerkitoBio/goquery/go.sum b/vendor/github.com/PuerkitoBio/goquery/go.sum index 11c5757..13b128d 100644 --- a/vendor/github.com/PuerkitoBio/goquery/go.sum +++ b/vendor/github.com/PuerkitoBio/goquery/go.sum @@ -1,5 +1,10 @@ -github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o= -github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= +github.com/andybalholm/cascadia v1.2.0 h1:vuRCkM5Ozh/BfmsaTm26kbjm0mIOM3yS5Ek/F5h18aE= +github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY= golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181114220301-adae6a3d119a h1:gOpx8G595UYyvj8UK4+OFyY4rx037g3fmfhe5SasG3U= -golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20210614182718-04defd469f4e h1:XpT3nA5TvE525Ne3hInMh6+GETgn27Zfm9dxsThnX2Q= +golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/vendor/github.com/PuerkitoBio/goquery/manipulation.go b/vendor/github.com/PuerkitoBio/goquery/manipulation.go index 34eb757..35febf1 100644 --- a/vendor/github.com/PuerkitoBio/goquery/manipulation.go +++ b/vendor/github.com/PuerkitoBio/goquery/manipulation.go @@ -39,8 +39,15 @@ func (s *Selection) AfterSelection(sel *Selection) *Selection { // AfterHtml parses the html and inserts it after the set of matched elements. // // This follows the same rules as Selection.Append. -func (s *Selection) AfterHtml(html string) *Selection { - return s.AfterNodes(parseHtml(html)...) +func (s *Selection) AfterHtml(htmlStr string) *Selection { + return s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) { + nextSibling := node.NextSibling + for _, n := range nodes { + if node.Parent != nil { + node.Parent.InsertBefore(n, nextSibling) + } + } + }) } // AfterNodes inserts the nodes after each element in the set of matched elements. @@ -85,8 +92,12 @@ func (s *Selection) AppendSelection(sel *Selection) *Selection { } // AppendHtml parses the html and appends it to the set of matched elements. -func (s *Selection) AppendHtml(html string) *Selection { - return s.AppendNodes(parseHtml(html)...) +func (s *Selection) AppendHtml(htmlStr string) *Selection { + return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) { + for _, n := range nodes { + node.AppendChild(n) + } + }) } // AppendNodes appends the specified nodes to each node in the set of matched elements. @@ -123,8 +134,14 @@ func (s *Selection) BeforeSelection(sel *Selection) *Selection { // BeforeHtml parses the html and inserts it before the set of matched elements. // // This follows the same rules as Selection.Append. -func (s *Selection) BeforeHtml(html string) *Selection { - return s.BeforeNodes(parseHtml(html)...) +func (s *Selection) BeforeHtml(htmlStr string) *Selection { + return s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) { + for _, n := range nodes { + if node.Parent != nil { + node.Parent.InsertBefore(n, node) + } + } + }) } // BeforeNodes inserts the nodes before each element in the set of matched elements. @@ -184,8 +201,13 @@ func (s *Selection) PrependSelection(sel *Selection) *Selection { } // PrependHtml parses the html and prepends it to the set of matched elements. -func (s *Selection) PrependHtml(html string) *Selection { - return s.PrependNodes(parseHtml(html)...) +func (s *Selection) PrependHtml(htmlStr string) *Selection { + return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) { + firstChild := node.FirstChild + for _, n := range nodes { + node.InsertBefore(n, firstChild) + } + }) } // PrependNodes prepends the specified nodes to each node in the set of @@ -212,14 +234,19 @@ func (s *Selection) Remove() *Selection { return s } -// RemoveFiltered removes the set of matched elements by selector. -// It returns the Selection of removed nodes. +// RemoveFiltered removes from the current set of matched elements those that +// match the selector filter. It returns the Selection of removed nodes. +// +// For example if the selection s contains "<h1>", "<h2>" and "<h3>" +// and s.RemoveFiltered("h2") is called, only the "<h2>" node is removed +// (and returned), while "<h1>" and "<h3>" are kept in the document. func (s *Selection) RemoveFiltered(selector string) *Selection { return s.RemoveMatcher(compileMatcher(selector)) } -// RemoveMatcher removes the set of matched elements. -// It returns the Selection of removed nodes. +// RemoveMatcher removes from the current set of matched elements those that +// match the Matcher filter. It returns the Selection of removed nodes. +// See RemoveFiltered for additional information. func (s *Selection) RemoveMatcher(m Matcher) *Selection { return s.FilterMatcher(m).Remove() } @@ -256,8 +283,16 @@ func (s *Selection) ReplaceWithSelection(sel *Selection) *Selection { // It returns the removed elements. // // This follows the same rules as Selection.Append. -func (s *Selection) ReplaceWithHtml(html string) *Selection { - return s.ReplaceWithNodes(parseHtml(html)...) +func (s *Selection) ReplaceWithHtml(htmlStr string) *Selection { + s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) { + nextSibling := node.NextSibling + for _, n := range nodes { + if node.Parent != nil { + node.Parent.InsertBefore(n, nextSibling) + } + } + }) + return s.Remove() } // ReplaceWithNodes replaces each element in the set of matched elements with @@ -272,8 +307,17 @@ func (s *Selection) ReplaceWithNodes(ns ...*html.Node) *Selection { // SetHtml sets the html content of each element in the selection to // specified html string. -func (s *Selection) SetHtml(html string) *Selection { - return setHtmlNodes(s, parseHtml(html)...) +func (s *Selection) SetHtml(htmlStr string) *Selection { + for _, context := range s.Nodes { + for c := context.FirstChild; c != nil; c = context.FirstChild { + context.RemoveChild(c) + } + } + return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) { + for _, n := range nodes { + node.AppendChild(n) + } + }) } // SetText sets the content of each element in the selection to specified content. @@ -329,8 +373,23 @@ func (s *Selection) WrapSelection(sel *Selection) *Selection { // most child of the given HTML. // // It returns the original set of elements. -func (s *Selection) WrapHtml(html string) *Selection { - return s.wrapNodes(parseHtml(html)...) +func (s *Selection) WrapHtml(htmlStr string) *Selection { + nodesMap := make(map[string][]*html.Node) + for _, context := range s.Nodes { + var parent *html.Node + if context.Parent != nil { + parent = context.Parent + } else { + parent = &html.Node{Type: html.ElementNode} + } + nodes, found := nodesMap[nodeName(parent)] + if !found { + nodes = parseHtmlWithContext(htmlStr, parent) + nodesMap[nodeName(parent)] = nodes + } + newSingleSelection(context, s.document).wrapAllNodes(cloneNodes(nodes)...) + } + return s } // WrapNode wraps each element in the set of matched elements inside the inner- @@ -382,8 +441,18 @@ func (s *Selection) WrapAllSelection(sel *Selection) *Selection { // document. // // It returns the original set of elements. -func (s *Selection) WrapAllHtml(html string) *Selection { - return s.wrapAllNodes(parseHtml(html)...) +func (s *Selection) WrapAllHtml(htmlStr string) *Selection { + var context *html.Node + var nodes []*html.Node + if len(s.Nodes) > 0 { + context = s.Nodes[0] + if context.Parent != nil { + nodes = parseHtmlWithContext(htmlStr, context) + } else { + nodes = parseHtml(htmlStr) + } + } + return s.wrapAllNodes(nodes...) } func (s *Selection) wrapAllNodes(ns ...*html.Node) *Selection { @@ -452,8 +521,17 @@ func (s *Selection) WrapInnerSelection(sel *Selection) *Selection { // cloned before being inserted into the document. // // It returns the original set of elements. -func (s *Selection) WrapInnerHtml(html string) *Selection { - return s.wrapInnerNodes(parseHtml(html)...) +func (s *Selection) WrapInnerHtml(htmlStr string) *Selection { + nodesMap := make(map[string][]*html.Node) + for _, context := range s.Nodes { + nodes, found := nodesMap[nodeName(context)] + if !found { + nodes = parseHtmlWithContext(htmlStr, context) + nodesMap[nodeName(context)] = nodes + } + newSingleSelection(context, s.document).wrapInnerNodes(cloneNodes(nodes)...) + } + return s } // WrapInnerNode wraps an HTML structure, matched by the given selector, around @@ -493,16 +571,14 @@ func parseHtml(h string) []*html.Node { return nodes } -func setHtmlNodes(s *Selection, ns ...*html.Node) *Selection { - for _, n := range s.Nodes { - for c := n.FirstChild; c != nil; c = n.FirstChild { - n.RemoveChild(c) - } - for _, c := range ns { - n.AppendChild(cloneNode(c)) - } +func parseHtmlWithContext(h string, context *html.Node) []*html.Node { + // Errors are only returned when the io.Reader returns any error besides + // EOF, but strings.Reader never will + nodes, err := html.ParseFragment(strings.NewReader(h), context) + if err != nil { + panic("goquery: failed to parse HTML: " + err.Error()) } - return s + return nodes } // Get the first child that is an ElementNode @@ -572,3 +648,32 @@ func (s *Selection) manipulateNodes(ns []*html.Node, reverse bool, return s } + +// eachNodeHtml parses the given html string and inserts the resulting nodes in the dom with the mergeFn. +// The parsed nodes are inserted for each element of the selection. +// isParent can be used to indicate that the elements of the selection should be treated as the parent for the parsed html. +// A cache is used to avoid parsing the html multiple times should the elements of the selection result in the same context. +func (s *Selection) eachNodeHtml(htmlStr string, isParent bool, mergeFn func(n *html.Node, nodes []*html.Node)) *Selection { + // cache to avoid parsing the html for the same context multiple times + nodeCache := make(map[string][]*html.Node) + var context *html.Node + for _, n := range s.Nodes { + if isParent { + context = n.Parent + } else { + if n.Type != html.ElementNode { + continue + } + context = n + } + if context != nil { + nodes, found := nodeCache[nodeName(context)] + if !found { + nodes = parseHtmlWithContext(htmlStr, context) + nodeCache[nodeName(context)] = nodes + } + mergeFn(n, cloneNodes(nodes)) + } + } + return s +} diff --git a/vendor/github.com/PuerkitoBio/goquery/type.go b/vendor/github.com/PuerkitoBio/goquery/type.go index 6ad51db..6646c14 100644 --- a/vendor/github.com/PuerkitoBio/goquery/type.go +++ b/vendor/github.com/PuerkitoBio/goquery/type.go @@ -7,7 +7,6 @@ import ( "net/url" "github.com/andybalholm/cascadia" - "golang.org/x/net/html" ) @@ -122,6 +121,45 @@ type Matcher interface { Filter([]*html.Node) []*html.Node } +// Single compiles a selector string to a Matcher that stops after the first +// match is found. +// +// By default, Selection.Find and other functions that accept a selector string +// to select nodes will use all matches corresponding to that selector. By +// using the Matcher returned by Single, at most the first match will be +// selected. +// +// For example, those two statements are semantically equivalent: +// +// sel1 := doc.Find("a").First() +// sel2 := doc.FindMatcher(goquery.Single("a")) +// +// The one using Single is optimized to be potentially much faster on large +// documents. +// +// Only the behaviour of the MatchAll method of the Matcher interface is +// altered compared to standard Matchers. This means that the single-selection +// property of the Matcher only applies for Selection methods where the Matcher +// is used to select nodes, not to filter or check if a node matches the +// Matcher - in those cases, the behaviour of the Matcher is unchanged (e.g. +// FilterMatcher(Single("div")) will still result in a Selection with multiple +// "div"s if there were many "div"s in the Selection to begin with). +func Single(selector string) Matcher { + return singleMatcher{compileMatcher(selector)} +} + +// SingleMatcher returns a Matcher matches the same nodes as m, but that stops +// after the first match is found. +// +// See the documentation of function Single for more details. +func SingleMatcher(m Matcher) Matcher { + if _, ok := m.(singleMatcher); ok { + // m is already a singleMatcher + return m + } + return singleMatcher{m} +} + // compileMatcher compiles the selector string s and returns // the corresponding Matcher. If s is an invalid selector string, // it returns a Matcher that fails all matches. @@ -133,6 +171,30 @@ func compileMatcher(s string) Matcher { return cs } +type singleMatcher struct { + Matcher +} + +func (m singleMatcher) MatchAll(n *html.Node) []*html.Node { + // Optimized version - stops finding at the first match (cascadia-compiled + // matchers all use this code path). + if mm, ok := m.Matcher.(interface{ MatchFirst(*html.Node) *html.Node }); ok { + node := mm.MatchFirst(n) + if node == nil { + return nil + } + return []*html.Node{node} + } + + // Fallback version, for e.g. test mocks that don't provide the MatchFirst + // method. + nodes := m.Matcher.MatchAll(n) + if len(nodes) > 0 { + return nodes[:1:1] + } + return nil +} + // invalidMatcher is a Matcher that always fails to match. type invalidMatcher struct{} diff --git a/vendor/github.com/PuerkitoBio/goquery/utilities.go b/vendor/github.com/PuerkitoBio/goquery/utilities.go index b4c061a..3e11b1d 100644 --- a/vendor/github.com/PuerkitoBio/goquery/utilities.go +++ b/vendor/github.com/PuerkitoBio/goquery/utilities.go @@ -36,12 +36,22 @@ func NodeName(s *Selection) string { if s.Length() == 0 { return "" } - switch n := s.Get(0); n.Type { + return nodeName(s.Get(0)) +} + +// nodeName returns the node name of the given html node. +// See NodeName for additional details on behaviour. +func nodeName(node *html.Node) string { + if node == nil { + return "" + } + + switch node.Type { case html.ElementNode, html.DoctypeNode: - return n.Data + return node.Data default: - if n.Type >= 0 && int(n.Type) < len(nodeNames) { - return nodeNames[n.Type] + if node.Type >= 0 && int(node.Type) < len(nodeNames) { + return nodeNames[node.Type] } return "" } |