aboutsummaryrefslogtreecommitdiff
path: root/vendor/github.com/PuerkitoBio/goquery
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/PuerkitoBio/goquery')
-rw-r--r--vendor/github.com/PuerkitoBio/goquery/.travis.yml29
-rw-r--r--vendor/github.com/PuerkitoBio/goquery/LICENSE2
-rw-r--r--vendor/github.com/PuerkitoBio/goquery/README.md46
-rw-r--r--vendor/github.com/PuerkitoBio/goquery/go.mod6
-rw-r--r--vendor/github.com/PuerkitoBio/goquery/go.sum13
-rw-r--r--vendor/github.com/PuerkitoBio/goquery/manipulation.go167
-rw-r--r--vendor/github.com/PuerkitoBio/goquery/type.go64
-rw-r--r--vendor/github.com/PuerkitoBio/goquery/utilities.go18
8 files changed, 280 insertions, 65 deletions
diff --git a/vendor/github.com/PuerkitoBio/goquery/.travis.yml b/vendor/github.com/PuerkitoBio/goquery/.travis.yml
index cc1402d..8430c86 100644
--- a/vendor/github.com/PuerkitoBio/goquery/.travis.yml
+++ b/vendor/github.com/PuerkitoBio/goquery/.travis.yml
@@ -1,16 +1,31 @@
+arch:
+ - amd64
+ - ppc64le
language: go
go:
- - 1.1
- - 1.2.x
- - 1.3.x
- - 1.4.x
- - 1.5.x
- - 1.6.x
- 1.7.x
- 1.8.x
- 1.9.x
- - "1.10.x"
+ - 1.10.x
- 1.11.x
+ - 1.12.x
+ - 1.13.x
+ - 1.14.x
+ - 1.15.x
- tip
+jobs:
+ exclude:
+ - arch: ppc64le
+ go: 1.7.x
+ - arch: ppc64le
+ go: 1.8.x
+ - arch: ppc64le
+ go: 1.9.x
+ - arch: ppc64le
+ go: 1.10.x
+ - arch: ppc64le
+ go: 1.11.x
+ - arch: ppc64le
+ go: 1.12.x
diff --git a/vendor/github.com/PuerkitoBio/goquery/LICENSE b/vendor/github.com/PuerkitoBio/goquery/LICENSE
index f743d37..25372c2 100644
--- a/vendor/github.com/PuerkitoBio/goquery/LICENSE
+++ b/vendor/github.com/PuerkitoBio/goquery/LICENSE
@@ -1,4 +1,4 @@
-Copyright (c) 2012-2016, Martin Angers & Contributors
+Copyright (c) 2012-2021, Martin Angers & Contributors
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
diff --git a/vendor/github.com/PuerkitoBio/goquery/README.md b/vendor/github.com/PuerkitoBio/goquery/README.md
index 84f9af3..6bb185c 100644
--- a/vendor/github.com/PuerkitoBio/goquery/README.md
+++ b/vendor/github.com/PuerkitoBio/goquery/README.md
@@ -1,5 +1,9 @@
# goquery - a little like that j-thing, only in Go
-[![build status](https://secure.travis-ci.org/PuerkitoBio/goquery.svg?branch=master)](http://travis-ci.org/PuerkitoBio/goquery) [![GoDoc](https://godoc.org/github.com/PuerkitoBio/goquery?status.png)](http://godoc.org/github.com/PuerkitoBio/goquery) [![Sourcegraph Badge](https://sourcegraph.com/github.com/PuerkitoBio/goquery/-/badge.svg)](https://sourcegraph.com/github.com/PuerkitoBio/goquery?badge)
+
+[![builds.sr.ht status](https://builds.sr.ht/~mna/goquery/commits/fedora.yml.svg)](https://builds.sr.ht/~mna/goquery/commits/fedora.yml?)
+[![build status](https://secure.travis-ci.org/PuerkitoBio/goquery.svg?branch=master)](http://travis-ci.org/PuerkitoBio/goquery)
+[![Go Reference](https://pkg.go.dev/badge/github.com/PuerkitoBio/goquery.svg)](https://pkg.go.dev/github.com/PuerkitoBio/goquery)
+[![Sourcegraph Badge](https://sourcegraph.com/github.com/PuerkitoBio/goquery/-/badge.svg)](https://sourcegraph.com/github.com/PuerkitoBio/goquery?badge)
goquery brings a syntax and a set of features similar to [jQuery][] to the [Go language][go]. It is based on Go's [net/html package][html] and the CSS Selector library [cascadia][]. Since the net/html parser returns nodes, and not a full-featured DOM tree, jQuery's stateful manipulation functions (like height(), css(), detach()) have been left off.
@@ -19,7 +23,7 @@ Syntax-wise, it is as close as possible to jQuery, with the same function names
## Installation
-Please note that because of the net/html dependency, goquery requires Go1.1+.
+Please note that because of the net/html dependency, goquery requires Go1.1+ and is tested on Go1.7+.
$ go get github.com/PuerkitoBio/goquery
@@ -37,6 +41,11 @@ Please note that because of the net/html dependency, goquery requires Go1.1+.
**Note that goquery's API is now stable, and will not break.**
+* **2021-07-11 (v1.7.1)** : Update go.mod dependencies and add dependabot config (thanks [@jauderho](https://github.com/jauderho)).
+* **2021-06-14 (v1.7.0)** : Add `Single` and `SingleMatcher` functions to optimize first-match selection (thanks [@gdollardollar](https://github.com/gdollardollar)).
+* **2021-01-11 (v1.6.1)** : Fix panic when calling `{Prepend,Append,Set}Html` on a `Selection` that contains non-Element nodes.
+* **2020-10-08 (v1.6.0)** : Parse html in context of the container node for all functions that deal with html strings (`AfterHtml`, `AppendHtml`, etc.). Thanks to [@thiemok][thiemok] and [@davidjwilkins][djw] for their work on this.
+* **2020-02-04 (v1.5.1)** : Update module dependencies.
* **2018-11-15 (v1.5.0)** : Go module support (thanks @Zaba505).
* **2018-06-07 (v1.4.1)** : Add `NewDocumentFromReader` examples.
* **2018-03-24 (v1.4.0)** : Deprecate `NewDocument(url)` and `NewDocumentFromResponse(response)`.
@@ -47,7 +56,7 @@ Please note that because of the net/html dependency, goquery requires Go1.1+.
* **2016-08-28 (v1.0.1)** : Optimize performance for large documents.
* **2016-07-27 (v1.0.0)** : Tag version 1.0.0.
* **2016-06-15** : Invalid selector strings internally compile to a `Matcher` implementation that never matches any node (instead of a panic). So for example, `doc.Find("~")` returns an empty `*Selection` object.
-* **2016-02-02** : Add `NodeName` utility function similar to the DOM's `nodeName` property. It returns the tag name of the first element in a selection, and other relevant values of non-element nodes (see godoc for details). Add `OuterHtml` utility function similar to the DOM's `outerHTML` property (named `OuterHtml` in small caps for consistency with the existing `Html` method on the `Selection`).
+* **2016-02-02** : Add `NodeName` utility function similar to the DOM's `nodeName` property. It returns the tag name of the first element in a selection, and other relevant values of non-element nodes (see [doc][] for details). Add `OuterHtml` utility function similar to the DOM's `outerHTML` property (named `OuterHtml` in small caps for consistency with the existing `Html` method on the `Selection`).
* **2015-04-20** : Add `AttrOr` helper method to return the attribute's value or a default value if absent. Thanks to [piotrkowalczuk][piotr].
* **2015-02-04** : Add more manipulation functions - Prepend* - thanks again to [Andrew Stone][thatguystone].
* **2014-11-28** : Add more manipulation functions - ReplaceWith*, Wrap* and Unwrap - thanks again to [Andrew Stone][thatguystone].
@@ -76,7 +85,7 @@ jQuery often has many variants for the same function (no argument, a selector st
Utility functions that are not in jQuery but are useful in Go are implemented as functions (that take a `*Selection` as parameter), to avoid a potential naming clash on the `*Selection`'s methods (reserved for jQuery-equivalent behaviour).
-The complete [godoc reference documentation can be found here][doc].
+The complete [package reference documentation can be found here][doc].
Please note that Cascadia's selectors do not necessarily match all supported selectors of jQuery (Sizzle). See the [cascadia project][cascadia] for details. Invalid selector strings compile to a `Matcher` that fails to match any node. Behaviour of the various functions that take a selector string as argument follows from that fact, e.g. (where `~` is an invalid selector string):
@@ -120,12 +129,11 @@ func ExampleScrape() {
}
// Find the review items
- doc.Find(".sidebar-reviews article .content-block").Each(func(i int, s *goquery.Selection) {
- // For each item found, get the band and title
- band := s.Find("a").Text()
- title := s.Find("i").Text()
- fmt.Printf("Review %d: %s - %s\n", i, band, title)
- })
+ doc.Find(".left-content article .post-title").Each(func(i int, s *goquery.Selection) {
+ // For each item found, get the title
+ title := s.Find("a").Text()
+ fmt.Printf("Review %d: %s\n", i, title)
+ })
}
func main() {
@@ -138,9 +146,14 @@ func main() {
- [Goq][goq], an HTML deserialization and scraping library based on goquery and struct tags.
- [andybalholm/cascadia][cascadia], the CSS selector library used by goquery.
- [suntong/cascadia][cascadiacli], a command-line interface to the cascadia CSS selector library, useful to test selectors.
-- [asciimoo/colly](https://github.com/asciimoo/colly), a lightning fast and elegant Scraping Framework
+- [gocolly/colly](https://github.com/gocolly/colly), a lightning fast and elegant Scraping Framework
- [gnulnx/goperf](https://github.com/gnulnx/goperf), a website performance test tool that also fetches static assets.
- [MontFerret/ferret](https://github.com/MontFerret/ferret), declarative web scraping.
+- [tacusci/berrycms](https://github.com/tacusci/berrycms), a modern simple to use CMS with easy to write plugins
+- [Dataflow kit](https://github.com/slotix/dataflowkit), Web Scraping framework for Gophers.
+- [Geziyor](https://github.com/geziyor/geziyor), a fast web crawling & scraping framework for Go. Supports JS rendering.
+- [Pagser](https://github.com/foolin/pagser), a simple, easy, extensible, configurable HTML parser to struct based on goquery and struct tags.
+- [stitcherd](https://github.com/vhodges/stitcherd), A server for doing server side includes using css selectors and DOM updates.
## Support
@@ -153,8 +166,9 @@ There are a number of ways you can support the project:
* Pull requests: please discuss new code in an issue first, unless the fix is really trivial.
- Make sure new code is tested.
- Be mindful of existing code - PRs that break existing code have a high probability of being declined, unless it fixes a serious issue.
-
-If you desperately want to send money my way, I have a BuyMeACoffee.com page:
+* Sponsor the developer
+ - See the Github Sponsor button at the top of the repo on github
+ - or via BuyMeACoffee.com, below
<a href="https://www.buymeacoffee.com/mna" target="_blank"><img src="https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png" alt="Buy Me A Coffee" style="height: 41px !important;width: 174px !important;box-shadow: 0px 3px 2px 0px rgba(190, 190, 190, 0.5) !important;-webkit-box-shadow: 0px 3px 2px 0px rgba(190, 190, 190, 0.5) !important;" ></a>
@@ -169,11 +183,13 @@ The [BSD 3-Clause license][bsd], the same as the [Go language][golic]. Cascadia'
[bsd]: http://opensource.org/licenses/BSD-3-Clause
[golic]: http://golang.org/LICENSE
[caslic]: https://github.com/andybalholm/cascadia/blob/master/LICENSE
-[doc]: http://godoc.org/github.com/PuerkitoBio/goquery
+[doc]: https://pkg.go.dev/github.com/PuerkitoBio/goquery
[index]: http://api.jquery.com/index/
[gonet]: https://github.com/golang/net/
-[html]: http://godoc.org/golang.org/x/net/html
+[html]: https://pkg.go.dev/golang.org/x/net/html
[wiki]: https://github.com/PuerkitoBio/goquery/wiki/Tips-and-tricks
[thatguystone]: https://github.com/thatguystone
[piotr]: https://github.com/piotrkowalczuk
[goq]: https://github.com/andrewstuart/goq
+[thiemok]: https://github.com/thiemok
+[djw]: https://github.com/davidjwilkins
diff --git a/vendor/github.com/PuerkitoBio/goquery/go.mod b/vendor/github.com/PuerkitoBio/goquery/go.mod
index 2fa1332..3af3b15 100644
--- a/vendor/github.com/PuerkitoBio/goquery/go.mod
+++ b/vendor/github.com/PuerkitoBio/goquery/go.mod
@@ -1,6 +1,8 @@
module github.com/PuerkitoBio/goquery
require (
- github.com/andybalholm/cascadia v1.0.0
- golang.org/x/net v0.0.0-20181114220301-adae6a3d119a
+ github.com/andybalholm/cascadia v1.2.0
+ golang.org/x/net v0.0.0-20210614182718-04defd469f4e
)
+
+go 1.13
diff --git a/vendor/github.com/PuerkitoBio/goquery/go.sum b/vendor/github.com/PuerkitoBio/goquery/go.sum
index 11c5757..13b128d 100644
--- a/vendor/github.com/PuerkitoBio/goquery/go.sum
+++ b/vendor/github.com/PuerkitoBio/goquery/go.sum
@@ -1,5 +1,10 @@
-github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
-github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
+github.com/andybalholm/cascadia v1.2.0 h1:vuRCkM5Ozh/BfmsaTm26kbjm0mIOM3yS5Ek/F5h18aE=
+github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
-golang.org/x/net v0.0.0-20181114220301-adae6a3d119a h1:gOpx8G595UYyvj8UK4+OFyY4rx037g3fmfhe5SasG3U=
-golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20210614182718-04defd469f4e h1:XpT3nA5TvE525Ne3hInMh6+GETgn27Zfm9dxsThnX2Q=
+golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
diff --git a/vendor/github.com/PuerkitoBio/goquery/manipulation.go b/vendor/github.com/PuerkitoBio/goquery/manipulation.go
index 34eb757..35febf1 100644
--- a/vendor/github.com/PuerkitoBio/goquery/manipulation.go
+++ b/vendor/github.com/PuerkitoBio/goquery/manipulation.go
@@ -39,8 +39,15 @@ func (s *Selection) AfterSelection(sel *Selection) *Selection {
// AfterHtml parses the html and inserts it after the set of matched elements.
//
// This follows the same rules as Selection.Append.
-func (s *Selection) AfterHtml(html string) *Selection {
- return s.AfterNodes(parseHtml(html)...)
+func (s *Selection) AfterHtml(htmlStr string) *Selection {
+ return s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) {
+ nextSibling := node.NextSibling
+ for _, n := range nodes {
+ if node.Parent != nil {
+ node.Parent.InsertBefore(n, nextSibling)
+ }
+ }
+ })
}
// AfterNodes inserts the nodes after each element in the set of matched elements.
@@ -85,8 +92,12 @@ func (s *Selection) AppendSelection(sel *Selection) *Selection {
}
// AppendHtml parses the html and appends it to the set of matched elements.
-func (s *Selection) AppendHtml(html string) *Selection {
- return s.AppendNodes(parseHtml(html)...)
+func (s *Selection) AppendHtml(htmlStr string) *Selection {
+ return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) {
+ for _, n := range nodes {
+ node.AppendChild(n)
+ }
+ })
}
// AppendNodes appends the specified nodes to each node in the set of matched elements.
@@ -123,8 +134,14 @@ func (s *Selection) BeforeSelection(sel *Selection) *Selection {
// BeforeHtml parses the html and inserts it before the set of matched elements.
//
// This follows the same rules as Selection.Append.
-func (s *Selection) BeforeHtml(html string) *Selection {
- return s.BeforeNodes(parseHtml(html)...)
+func (s *Selection) BeforeHtml(htmlStr string) *Selection {
+ return s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) {
+ for _, n := range nodes {
+ if node.Parent != nil {
+ node.Parent.InsertBefore(n, node)
+ }
+ }
+ })
}
// BeforeNodes inserts the nodes before each element in the set of matched elements.
@@ -184,8 +201,13 @@ func (s *Selection) PrependSelection(sel *Selection) *Selection {
}
// PrependHtml parses the html and prepends it to the set of matched elements.
-func (s *Selection) PrependHtml(html string) *Selection {
- return s.PrependNodes(parseHtml(html)...)
+func (s *Selection) PrependHtml(htmlStr string) *Selection {
+ return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) {
+ firstChild := node.FirstChild
+ for _, n := range nodes {
+ node.InsertBefore(n, firstChild)
+ }
+ })
}
// PrependNodes prepends the specified nodes to each node in the set of
@@ -212,14 +234,19 @@ func (s *Selection) Remove() *Selection {
return s
}
-// RemoveFiltered removes the set of matched elements by selector.
-// It returns the Selection of removed nodes.
+// RemoveFiltered removes from the current set of matched elements those that
+// match the selector filter. It returns the Selection of removed nodes.
+//
+// For example if the selection s contains "<h1>", "<h2>" and "<h3>"
+// and s.RemoveFiltered("h2") is called, only the "<h2>" node is removed
+// (and returned), while "<h1>" and "<h3>" are kept in the document.
func (s *Selection) RemoveFiltered(selector string) *Selection {
return s.RemoveMatcher(compileMatcher(selector))
}
-// RemoveMatcher removes the set of matched elements.
-// It returns the Selection of removed nodes.
+// RemoveMatcher removes from the current set of matched elements those that
+// match the Matcher filter. It returns the Selection of removed nodes.
+// See RemoveFiltered for additional information.
func (s *Selection) RemoveMatcher(m Matcher) *Selection {
return s.FilterMatcher(m).Remove()
}
@@ -256,8 +283,16 @@ func (s *Selection) ReplaceWithSelection(sel *Selection) *Selection {
// It returns the removed elements.
//
// This follows the same rules as Selection.Append.
-func (s *Selection) ReplaceWithHtml(html string) *Selection {
- return s.ReplaceWithNodes(parseHtml(html)...)
+func (s *Selection) ReplaceWithHtml(htmlStr string) *Selection {
+ s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) {
+ nextSibling := node.NextSibling
+ for _, n := range nodes {
+ if node.Parent != nil {
+ node.Parent.InsertBefore(n, nextSibling)
+ }
+ }
+ })
+ return s.Remove()
}
// ReplaceWithNodes replaces each element in the set of matched elements with
@@ -272,8 +307,17 @@ func (s *Selection) ReplaceWithNodes(ns ...*html.Node) *Selection {
// SetHtml sets the html content of each element in the selection to
// specified html string.
-func (s *Selection) SetHtml(html string) *Selection {
- return setHtmlNodes(s, parseHtml(html)...)
+func (s *Selection) SetHtml(htmlStr string) *Selection {
+ for _, context := range s.Nodes {
+ for c := context.FirstChild; c != nil; c = context.FirstChild {
+ context.RemoveChild(c)
+ }
+ }
+ return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) {
+ for _, n := range nodes {
+ node.AppendChild(n)
+ }
+ })
}
// SetText sets the content of each element in the selection to specified content.
@@ -329,8 +373,23 @@ func (s *Selection) WrapSelection(sel *Selection) *Selection {
// most child of the given HTML.
//
// It returns the original set of elements.
-func (s *Selection) WrapHtml(html string) *Selection {
- return s.wrapNodes(parseHtml(html)...)
+func (s *Selection) WrapHtml(htmlStr string) *Selection {
+ nodesMap := make(map[string][]*html.Node)
+ for _, context := range s.Nodes {
+ var parent *html.Node
+ if context.Parent != nil {
+ parent = context.Parent
+ } else {
+ parent = &html.Node{Type: html.ElementNode}
+ }
+ nodes, found := nodesMap[nodeName(parent)]
+ if !found {
+ nodes = parseHtmlWithContext(htmlStr, parent)
+ nodesMap[nodeName(parent)] = nodes
+ }
+ newSingleSelection(context, s.document).wrapAllNodes(cloneNodes(nodes)...)
+ }
+ return s
}
// WrapNode wraps each element in the set of matched elements inside the inner-
@@ -382,8 +441,18 @@ func (s *Selection) WrapAllSelection(sel *Selection) *Selection {
// document.
//
// It returns the original set of elements.
-func (s *Selection) WrapAllHtml(html string) *Selection {
- return s.wrapAllNodes(parseHtml(html)...)
+func (s *Selection) WrapAllHtml(htmlStr string) *Selection {
+ var context *html.Node
+ var nodes []*html.Node
+ if len(s.Nodes) > 0 {
+ context = s.Nodes[0]
+ if context.Parent != nil {
+ nodes = parseHtmlWithContext(htmlStr, context)
+ } else {
+ nodes = parseHtml(htmlStr)
+ }
+ }
+ return s.wrapAllNodes(nodes...)
}
func (s *Selection) wrapAllNodes(ns ...*html.Node) *Selection {
@@ -452,8 +521,17 @@ func (s *Selection) WrapInnerSelection(sel *Selection) *Selection {
// cloned before being inserted into the document.
//
// It returns the original set of elements.
-func (s *Selection) WrapInnerHtml(html string) *Selection {
- return s.wrapInnerNodes(parseHtml(html)...)
+func (s *Selection) WrapInnerHtml(htmlStr string) *Selection {
+ nodesMap := make(map[string][]*html.Node)
+ for _, context := range s.Nodes {
+ nodes, found := nodesMap[nodeName(context)]
+ if !found {
+ nodes = parseHtmlWithContext(htmlStr, context)
+ nodesMap[nodeName(context)] = nodes
+ }
+ newSingleSelection(context, s.document).wrapInnerNodes(cloneNodes(nodes)...)
+ }
+ return s
}
// WrapInnerNode wraps an HTML structure, matched by the given selector, around
@@ -493,16 +571,14 @@ func parseHtml(h string) []*html.Node {
return nodes
}
-func setHtmlNodes(s *Selection, ns ...*html.Node) *Selection {
- for _, n := range s.Nodes {
- for c := n.FirstChild; c != nil; c = n.FirstChild {
- n.RemoveChild(c)
- }
- for _, c := range ns {
- n.AppendChild(cloneNode(c))
- }
+func parseHtmlWithContext(h string, context *html.Node) []*html.Node {
+ // Errors are only returned when the io.Reader returns any error besides
+ // EOF, but strings.Reader never will
+ nodes, err := html.ParseFragment(strings.NewReader(h), context)
+ if err != nil {
+ panic("goquery: failed to parse HTML: " + err.Error())
}
- return s
+ return nodes
}
// Get the first child that is an ElementNode
@@ -572,3 +648,32 @@ func (s *Selection) manipulateNodes(ns []*html.Node, reverse bool,
return s
}
+
+// eachNodeHtml parses the given html string and inserts the resulting nodes in the dom with the mergeFn.
+// The parsed nodes are inserted for each element of the selection.
+// isParent can be used to indicate that the elements of the selection should be treated as the parent for the parsed html.
+// A cache is used to avoid parsing the html multiple times should the elements of the selection result in the same context.
+func (s *Selection) eachNodeHtml(htmlStr string, isParent bool, mergeFn func(n *html.Node, nodes []*html.Node)) *Selection {
+ // cache to avoid parsing the html for the same context multiple times
+ nodeCache := make(map[string][]*html.Node)
+ var context *html.Node
+ for _, n := range s.Nodes {
+ if isParent {
+ context = n.Parent
+ } else {
+ if n.Type != html.ElementNode {
+ continue
+ }
+ context = n
+ }
+ if context != nil {
+ nodes, found := nodeCache[nodeName(context)]
+ if !found {
+ nodes = parseHtmlWithContext(htmlStr, context)
+ nodeCache[nodeName(context)] = nodes
+ }
+ mergeFn(n, cloneNodes(nodes))
+ }
+ }
+ return s
+}
diff --git a/vendor/github.com/PuerkitoBio/goquery/type.go b/vendor/github.com/PuerkitoBio/goquery/type.go
index 6ad51db..6646c14 100644
--- a/vendor/github.com/PuerkitoBio/goquery/type.go
+++ b/vendor/github.com/PuerkitoBio/goquery/type.go
@@ -7,7 +7,6 @@ import (
"net/url"
"github.com/andybalholm/cascadia"
-
"golang.org/x/net/html"
)
@@ -122,6 +121,45 @@ type Matcher interface {
Filter([]*html.Node) []*html.Node
}
+// Single compiles a selector string to a Matcher that stops after the first
+// match is found.
+//
+// By default, Selection.Find and other functions that accept a selector string
+// to select nodes will use all matches corresponding to that selector. By
+// using the Matcher returned by Single, at most the first match will be
+// selected.
+//
+// For example, those two statements are semantically equivalent:
+//
+// sel1 := doc.Find("a").First()
+// sel2 := doc.FindMatcher(goquery.Single("a"))
+//
+// The one using Single is optimized to be potentially much faster on large
+// documents.
+//
+// Only the behaviour of the MatchAll method of the Matcher interface is
+// altered compared to standard Matchers. This means that the single-selection
+// property of the Matcher only applies for Selection methods where the Matcher
+// is used to select nodes, not to filter or check if a node matches the
+// Matcher - in those cases, the behaviour of the Matcher is unchanged (e.g.
+// FilterMatcher(Single("div")) will still result in a Selection with multiple
+// "div"s if there were many "div"s in the Selection to begin with).
+func Single(selector string) Matcher {
+ return singleMatcher{compileMatcher(selector)}
+}
+
+// SingleMatcher returns a Matcher matches the same nodes as m, but that stops
+// after the first match is found.
+//
+// See the documentation of function Single for more details.
+func SingleMatcher(m Matcher) Matcher {
+ if _, ok := m.(singleMatcher); ok {
+ // m is already a singleMatcher
+ return m
+ }
+ return singleMatcher{m}
+}
+
// compileMatcher compiles the selector string s and returns
// the corresponding Matcher. If s is an invalid selector string,
// it returns a Matcher that fails all matches.
@@ -133,6 +171,30 @@ func compileMatcher(s string) Matcher {
return cs
}
+type singleMatcher struct {
+ Matcher
+}
+
+func (m singleMatcher) MatchAll(n *html.Node) []*html.Node {
+ // Optimized version - stops finding at the first match (cascadia-compiled
+ // matchers all use this code path).
+ if mm, ok := m.Matcher.(interface{ MatchFirst(*html.Node) *html.Node }); ok {
+ node := mm.MatchFirst(n)
+ if node == nil {
+ return nil
+ }
+ return []*html.Node{node}
+ }
+
+ // Fallback version, for e.g. test mocks that don't provide the MatchFirst
+ // method.
+ nodes := m.Matcher.MatchAll(n)
+ if len(nodes) > 0 {
+ return nodes[:1:1]
+ }
+ return nil
+}
+
// invalidMatcher is a Matcher that always fails to match.
type invalidMatcher struct{}
diff --git a/vendor/github.com/PuerkitoBio/goquery/utilities.go b/vendor/github.com/PuerkitoBio/goquery/utilities.go
index b4c061a..3e11b1d 100644
--- a/vendor/github.com/PuerkitoBio/goquery/utilities.go
+++ b/vendor/github.com/PuerkitoBio/goquery/utilities.go
@@ -36,12 +36,22 @@ func NodeName(s *Selection) string {
if s.Length() == 0 {
return ""
}
- switch n := s.Get(0); n.Type {
+ return nodeName(s.Get(0))
+}
+
+// nodeName returns the node name of the given html node.
+// See NodeName for additional details on behaviour.
+func nodeName(node *html.Node) string {
+ if node == nil {
+ return ""
+ }
+
+ switch node.Type {
case html.ElementNode, html.DoctypeNode:
- return n.Data
+ return node.Data
default:
- if n.Type >= 0 && int(n.Type) < len(nodeNames) {
- return nodeNames[n.Type]
+ if node.Type >= 0 && int(node.Type) < len(nodeNames) {
+ return nodeNames[node.Type]
}
return ""
}