aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorale <ale@incal.net>2021-07-12 17:29:19 +0000
committerale <ale@incal.net>2021-07-12 17:29:19 +0000
commitef2c410063d3be2632ad7449cab6f51511face6b (patch)
tree9d2e82374a063f3b568110e83ccb1b285f3247f1
parent877afafd950b84242204499b3ed8c1b2c8c75f31 (diff)
parent557f9d889812976293b4a668c190e0e1e0332857 (diff)
downloadcrawl-ef2c410063d3be2632ad7449cab6f51511face6b.tar.gz
crawl-ef2c410063d3be2632ad7449cab6f51511face6b.zip
Merge branch 'renovate/github.com-puerkitobio-goquery-1.x' into 'master'
Update module github.com/PuerkitoBio/goquery to v1.7.1 See merge request ale/crawl!3
-rw-r--r--go.mod4
-rw-r--r--go.sum11
-rw-r--r--vendor/github.com/PuerkitoBio/goquery/.travis.yml29
-rw-r--r--vendor/github.com/PuerkitoBio/goquery/LICENSE2
-rw-r--r--vendor/github.com/PuerkitoBio/goquery/README.md46
-rw-r--r--vendor/github.com/PuerkitoBio/goquery/go.mod6
-rw-r--r--vendor/github.com/PuerkitoBio/goquery/go.sum13
-rw-r--r--vendor/github.com/PuerkitoBio/goquery/manipulation.go167
-rw-r--r--vendor/github.com/PuerkitoBio/goquery/type.go64
-rw-r--r--vendor/github.com/PuerkitoBio/goquery/utilities.go18
-rw-r--r--vendor/github.com/andybalholm/cascadia/README.md2
-rw-r--r--vendor/github.com/andybalholm/cascadia/go.mod6
-rw-r--r--vendor/github.com/andybalholm/cascadia/parser.go321
-rw-r--r--vendor/github.com/andybalholm/cascadia/selector.go988
-rw-r--r--vendor/github.com/andybalholm/cascadia/serialize.go120
-rw-r--r--vendor/github.com/andybalholm/cascadia/specificity.go26
-rw-r--r--vendor/golang.org/x/net/html/const.go3
-rw-r--r--vendor/golang.org/x/net/html/foreign.go120
-rw-r--r--vendor/golang.org/x/net/html/node.go5
-rw-r--r--vendor/golang.org/x/net/html/parse.go337
-rw-r--r--vendor/golang.org/x/net/html/render.go34
-rw-r--r--vendor/golang.org/x/net/html/token.go9
-rw-r--r--vendor/modules.txt7
23 files changed, 1542 insertions, 796 deletions
diff --git a/go.mod b/go.mod
index 33a9c8e..f302ca6 100644
--- a/go.mod
+++ b/go.mod
@@ -3,12 +3,12 @@ module git.autistici.org/ale/crawl
go 1.15
require (
- github.com/PuerkitoBio/goquery v1.5.0
+ github.com/PuerkitoBio/goquery v1.7.1
github.com/PuerkitoBio/purell v0.1.0
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
github.com/google/go-cmp v0.5.6
github.com/google/uuid v1.1.1 // indirect
github.com/pborman/uuid v1.2.1
github.com/syndtr/goleveldb v0.0.0-20190923125748-758128399b1d
- golang.org/x/net v0.0.0-20190926025831-c00fd9afed17 // indirect
+ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 // indirect
)
diff --git a/go.sum b/go.sum
index 08853ca..b977e72 100644
--- a/go.sum
+++ b/go.sum
@@ -1,5 +1,7 @@
github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk=
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
+github.com/PuerkitoBio/goquery v1.7.1 h1:oE+T06D+1T7LNrn91B4aERsRIeCLJ/oPSa6xB9FPnz4=
+github.com/PuerkitoBio/goquery v1.7.1/go.mod h1:XY0pP4kfraEmmV1O7Uf6XyjoslwsneBbgeDjLYuN8xY=
github.com/PuerkitoBio/purell v0.0.0-20180310210909-975f53781597 h1:1H3FyRw7YsqIty9WHPOVEGJaFJ1sfGVZ3PPDUw3ob2w=
github.com/PuerkitoBio/purell v0.0.0-20180310210909-975f53781597/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/purell v0.1.0 h1:N8Bcc53nei5frgNYgAKo93qMUVdU5LUGHCBv8efdVcM=
@@ -8,6 +10,8 @@ github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
+github.com/andybalholm/cascadia v1.2.0 h1:vuRCkM5Ozh/BfmsaTm26kbjm0mIOM3yS5Ek/F5h18aE=
+github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY=
github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM=
@@ -42,13 +46,20 @@ golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73r
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190926025831-c00fd9afed17 h1:qPnAdmjNA41t3QBTx2mFGf/SD1IoslhYu7AmdsVzCcs=
golang.org/x/net v0.0.0-20190926025831-c00fd9afed17/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20210614182718-04defd469f4e h1:XpT3nA5TvE525Ne3hInMh6+GETgn27Zfm9dxsThnX2Q=
+golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f h1:wMNYb4v58l5UBM7MYRLPG6ZhfOqbKu7X5eyFl8ZhKvA=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a h1:1BGLXjeY4akVXGgbC9HugT3Jv3hCI0z56oJR5vAMgBU=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
diff --git a/vendor/github.com/PuerkitoBio/goquery/.travis.yml b/vendor/github.com/PuerkitoBio/goquery/.travis.yml
index cc1402d..8430c86 100644
--- a/vendor/github.com/PuerkitoBio/goquery/.travis.yml
+++ b/vendor/github.com/PuerkitoBio/goquery/.travis.yml
@@ -1,16 +1,31 @@
+arch:
+ - amd64
+ - ppc64le
language: go
go:
- - 1.1
- - 1.2.x
- - 1.3.x
- - 1.4.x
- - 1.5.x
- - 1.6.x
- 1.7.x
- 1.8.x
- 1.9.x
- - "1.10.x"
+ - 1.10.x
- 1.11.x
+ - 1.12.x
+ - 1.13.x
+ - 1.14.x
+ - 1.15.x
- tip
+jobs:
+ exclude:
+ - arch: ppc64le
+ go: 1.7.x
+ - arch: ppc64le
+ go: 1.8.x
+ - arch: ppc64le
+ go: 1.9.x
+ - arch: ppc64le
+ go: 1.10.x
+ - arch: ppc64le
+ go: 1.11.x
+ - arch: ppc64le
+ go: 1.12.x
diff --git a/vendor/github.com/PuerkitoBio/goquery/LICENSE b/vendor/github.com/PuerkitoBio/goquery/LICENSE
index f743d37..25372c2 100644
--- a/vendor/github.com/PuerkitoBio/goquery/LICENSE
+++ b/vendor/github.com/PuerkitoBio/goquery/LICENSE
@@ -1,4 +1,4 @@
-Copyright (c) 2012-2016, Martin Angers & Contributors
+Copyright (c) 2012-2021, Martin Angers & Contributors
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
diff --git a/vendor/github.com/PuerkitoBio/goquery/README.md b/vendor/github.com/PuerkitoBio/goquery/README.md
index 84f9af3..6bb185c 100644
--- a/vendor/github.com/PuerkitoBio/goquery/README.md
+++ b/vendor/github.com/PuerkitoBio/goquery/README.md
@@ -1,5 +1,9 @@
# goquery - a little like that j-thing, only in Go
-[![build status](https://secure.travis-ci.org/PuerkitoBio/goquery.svg?branch=master)](http://travis-ci.org/PuerkitoBio/goquery) [![GoDoc](https://godoc.org/github.com/PuerkitoBio/goquery?status.png)](http://godoc.org/github.com/PuerkitoBio/goquery) [![Sourcegraph Badge](https://sourcegraph.com/github.com/PuerkitoBio/goquery/-/badge.svg)](https://sourcegraph.com/github.com/PuerkitoBio/goquery?badge)
+
+[![builds.sr.ht status](https://builds.sr.ht/~mna/goquery/commits/fedora.yml.svg)](https://builds.sr.ht/~mna/goquery/commits/fedora.yml?)
+[![build status](https://secure.travis-ci.org/PuerkitoBio/goquery.svg?branch=master)](http://travis-ci.org/PuerkitoBio/goquery)
+[![Go Reference](https://pkg.go.dev/badge/github.com/PuerkitoBio/goquery.svg)](https://pkg.go.dev/github.com/PuerkitoBio/goquery)
+[![Sourcegraph Badge](https://sourcegraph.com/github.com/PuerkitoBio/goquery/-/badge.svg)](https://sourcegraph.com/github.com/PuerkitoBio/goquery?badge)
goquery brings a syntax and a set of features similar to [jQuery][] to the [Go language][go]. It is based on Go's [net/html package][html] and the CSS Selector library [cascadia][]. Since the net/html parser returns nodes, and not a full-featured DOM tree, jQuery's stateful manipulation functions (like height(), css(), detach()) have been left off.
@@ -19,7 +23,7 @@ Syntax-wise, it is as close as possible to jQuery, with the same function names
## Installation
-Please note that because of the net/html dependency, goquery requires Go1.1+.
+Please note that because of the net/html dependency, goquery requires Go1.1+ and is tested on Go1.7+.
$ go get github.com/PuerkitoBio/goquery
@@ -37,6 +41,11 @@ Please note that because of the net/html dependency, goquery requires Go1.1+.
**Note that goquery's API is now stable, and will not break.**
+* **2021-07-11 (v1.7.1)** : Update go.mod dependencies and add dependabot config (thanks [@jauderho](https://github.com/jauderho)).
+* **2021-06-14 (v1.7.0)** : Add `Single` and `SingleMatcher` functions to optimize first-match selection (thanks [@gdollardollar](https://github.com/gdollardollar)).
+* **2021-01-11 (v1.6.1)** : Fix panic when calling `{Prepend,Append,Set}Html` on a `Selection` that contains non-Element nodes.
+* **2020-10-08 (v1.6.0)** : Parse html in context of the container node for all functions that deal with html strings (`AfterHtml`, `AppendHtml`, etc.). Thanks to [@thiemok][thiemok] and [@davidjwilkins][djw] for their work on this.
+* **2020-02-04 (v1.5.1)** : Update module dependencies.
* **2018-11-15 (v1.5.0)** : Go module support (thanks @Zaba505).
* **2018-06-07 (v1.4.1)** : Add `NewDocumentFromReader` examples.
* **2018-03-24 (v1.4.0)** : Deprecate `NewDocument(url)` and `NewDocumentFromResponse(response)`.
@@ -47,7 +56,7 @@ Please note that because of the net/html dependency, goquery requires Go1.1+.
* **2016-08-28 (v1.0.1)** : Optimize performance for large documents.
* **2016-07-27 (v1.0.0)** : Tag version 1.0.0.
* **2016-06-15** : Invalid selector strings internally compile to a `Matcher` implementation that never matches any node (instead of a panic). So for example, `doc.Find("~")` returns an empty `*Selection` object.
-* **2016-02-02** : Add `NodeName` utility function similar to the DOM's `nodeName` property. It returns the tag name of the first element in a selection, and other relevant values of non-element nodes (see godoc for details). Add `OuterHtml` utility function similar to the DOM's `outerHTML` property (named `OuterHtml` in small caps for consistency with the existing `Html` method on the `Selection`).
+* **2016-02-02** : Add `NodeName` utility function similar to the DOM's `nodeName` property. It returns the tag name of the first element in a selection, and other relevant values of non-element nodes (see [doc][] for details). Add `OuterHtml` utility function similar to the DOM's `outerHTML` property (named `OuterHtml` in small caps for consistency with the existing `Html` method on the `Selection`).
* **2015-04-20** : Add `AttrOr` helper method to return the attribute's value or a default value if absent. Thanks to [piotrkowalczuk][piotr].
* **2015-02-04** : Add more manipulation functions - Prepend* - thanks again to [Andrew Stone][thatguystone].
* **2014-11-28** : Add more manipulation functions - ReplaceWith*, Wrap* and Unwrap - thanks again to [Andrew Stone][thatguystone].
@@ -76,7 +85,7 @@ jQuery often has many variants for the same function (no argument, a selector st
Utility functions that are not in jQuery but are useful in Go are implemented as functions (that take a `*Selection` as parameter), to avoid a potential naming clash on the `*Selection`'s methods (reserved for jQuery-equivalent behaviour).
-The complete [godoc reference documentation can be found here][doc].
+The complete [package reference documentation can be found here][doc].
Please note that Cascadia's selectors do not necessarily match all supported selectors of jQuery (Sizzle). See the [cascadia project][cascadia] for details. Invalid selector strings compile to a `Matcher` that fails to match any node. Behaviour of the various functions that take a selector string as argument follows from that fact, e.g. (where `~` is an invalid selector string):
@@ -120,12 +129,11 @@ func ExampleScrape() {
}
// Find the review items
- doc.Find(".sidebar-reviews article .content-block").Each(func(i int, s *goquery.Selection) {
- // For each item found, get the band and title
- band := s.Find("a").Text()
- title := s.Find("i").Text()
- fmt.Printf("Review %d: %s - %s\n", i, band, title)
- })
+ doc.Find(".left-content article .post-title").Each(func(i int, s *goquery.Selection) {
+ // For each item found, get the title
+ title := s.Find("a").Text()
+ fmt.Printf("Review %d: %s\n", i, title)
+ })
}
func main() {
@@ -138,9 +146,14 @@ func main() {
- [Goq][goq], an HTML deserialization and scraping library based on goquery and struct tags.
- [andybalholm/cascadia][cascadia], the CSS selector library used by goquery.
- [suntong/cascadia][cascadiacli], a command-line interface to the cascadia CSS selector library, useful to test selectors.
-- [asciimoo/colly](https://github.com/asciimoo/colly), a lightning fast and elegant Scraping Framework
+- [gocolly/colly](https://github.com/gocolly/colly), a lightning fast and elegant Scraping Framework
- [gnulnx/goperf](https://github.com/gnulnx/goperf), a website performance test tool that also fetches static assets.
- [MontFerret/ferret](https://github.com/MontFerret/ferret), declarative web scraping.
+- [tacusci/berrycms](https://github.com/tacusci/berrycms), a modern simple to use CMS with easy to write plugins
+- [Dataflow kit](https://github.com/slotix/dataflowkit), Web Scraping framework for Gophers.
+- [Geziyor](https://github.com/geziyor/geziyor), a fast web crawling & scraping framework for Go. Supports JS rendering.
+- [Pagser](https://github.com/foolin/pagser), a simple, easy, extensible, configurable HTML parser to struct based on goquery and struct tags.
+- [stitcherd](https://github.com/vhodges/stitcherd), A server for doing server side includes using css selectors and DOM updates.
## Support
@@ -153,8 +166,9 @@ There are a number of ways you can support the project:
* Pull requests: please discuss new code in an issue first, unless the fix is really trivial.
- Make sure new code is tested.
- Be mindful of existing code - PRs that break existing code have a high probability of being declined, unless it fixes a serious issue.
-
-If you desperately want to send money my way, I have a BuyMeACoffee.com page:
+* Sponsor the developer
+ - See the Github Sponsor button at the top of the repo on github
+ - or via BuyMeACoffee.com, below
<a href="https://www.buymeacoffee.com/mna" target="_blank"><img src="https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png" alt="Buy Me A Coffee" style="height: 41px !important;width: 174px !important;box-shadow: 0px 3px 2px 0px rgba(190, 190, 190, 0.5) !important;-webkit-box-shadow: 0px 3px 2px 0px rgba(190, 190, 190, 0.5) !important;" ></a>
@@ -169,11 +183,13 @@ The [BSD 3-Clause license][bsd], the same as the [Go language][golic]. Cascadia'
[bsd]: http://opensource.org/licenses/BSD-3-Clause
[golic]: http://golang.org/LICENSE
[caslic]: https://github.com/andybalholm/cascadia/blob/master/LICENSE
-[doc]: http://godoc.org/github.com/PuerkitoBio/goquery
+[doc]: https://pkg.go.dev/github.com/PuerkitoBio/goquery
[index]: http://api.jquery.com/index/
[gonet]: https://github.com/golang/net/
-[html]: http://godoc.org/golang.org/x/net/html
+[html]: https://pkg.go.dev/golang.org/x/net/html
[wiki]: https://github.com/PuerkitoBio/goquery/wiki/Tips-and-tricks
[thatguystone]: https://github.com/thatguystone
[piotr]: https://github.com/piotrkowalczuk
[goq]: https://github.com/andrewstuart/goq
+[thiemok]: https://github.com/thiemok
+[djw]: https://github.com/davidjwilkins
diff --git a/vendor/github.com/PuerkitoBio/goquery/go.mod b/vendor/github.com/PuerkitoBio/goquery/go.mod
index 2fa1332..3af3b15 100644
--- a/vendor/github.com/PuerkitoBio/goquery/go.mod
+++ b/vendor/github.com/PuerkitoBio/goquery/go.mod
@@ -1,6 +1,8 @@
module github.com/PuerkitoBio/goquery
require (
- github.com/andybalholm/cascadia v1.0.0
- golang.org/x/net v0.0.0-20181114220301-adae6a3d119a
+ github.com/andybalholm/cascadia v1.2.0
+ golang.org/x/net v0.0.0-20210614182718-04defd469f4e
)
+
+go 1.13
diff --git a/vendor/github.com/PuerkitoBio/goquery/go.sum b/vendor/github.com/PuerkitoBio/goquery/go.sum
index 11c5757..13b128d 100644
--- a/vendor/github.com/PuerkitoBio/goquery/go.sum
+++ b/vendor/github.com/PuerkitoBio/goquery/go.sum
@@ -1,5 +1,10 @@
-github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
-github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
+github.com/andybalholm/cascadia v1.2.0 h1:vuRCkM5Ozh/BfmsaTm26kbjm0mIOM3yS5Ek/F5h18aE=
+github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
-golang.org/x/net v0.0.0-20181114220301-adae6a3d119a h1:gOpx8G595UYyvj8UK4+OFyY4rx037g3fmfhe5SasG3U=
-golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20210614182718-04defd469f4e h1:XpT3nA5TvE525Ne3hInMh6+GETgn27Zfm9dxsThnX2Q=
+golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
diff --git a/vendor/github.com/PuerkitoBio/goquery/manipulation.go b/vendor/github.com/PuerkitoBio/goquery/manipulation.go
index 34eb757..35febf1 100644
--- a/vendor/github.com/PuerkitoBio/goquery/manipulation.go
+++ b/vendor/github.com/PuerkitoBio/goquery/manipulation.go
@@ -39,8 +39,15 @@ func (s *Selection) AfterSelection(sel *Selection) *Selection {
// AfterHtml parses the html and inserts it after the set of matched elements.
//
// This follows the same rules as Selection.Append.
-func (s *Selection) AfterHtml(html string) *Selection {
- return s.AfterNodes(parseHtml(html)...)
+func (s *Selection) AfterHtml(htmlStr string) *Selection {
+ return s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) {
+ nextSibling := node.NextSibling
+ for _, n := range nodes {
+ if node.Parent != nil {
+ node.Parent.InsertBefore(n, nextSibling)
+ }
+ }
+ })
}
// AfterNodes inserts the nodes after each element in the set of matched elements.
@@ -85,8 +92,12 @@ func (s *Selection) AppendSelection(sel *Selection) *Selection {
}
// AppendHtml parses the html and appends it to the set of matched elements.
-func (s *Selection) AppendHtml(html string) *Selection {
- return s.AppendNodes(parseHtml(html)...)
+func (s *Selection) AppendHtml(htmlStr string) *Selection {
+ return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) {
+ for _, n := range nodes {
+ node.AppendChild(n)
+ }
+ })
}
// AppendNodes appends the specified nodes to each node in the set of matched elements.
@@ -123,8 +134,14 @@ func (s *Selection) BeforeSelection(sel *Selection) *Selection {
// BeforeHtml parses the html and inserts it before the set of matched elements.
//
// This follows the same rules as Selection.Append.
-func (s *Selection) BeforeHtml(html string) *Selection {
- return s.BeforeNodes(parseHtml(html)...)
+func (s *Selection) BeforeHtml(htmlStr string) *Selection {
+ return s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) {
+ for _, n := range nodes {
+ if node.Parent != nil {
+ node.Parent.InsertBefore(n, node)
+ }
+ }
+ })
}
// BeforeNodes inserts the nodes before each element in the set of matched elements.
@@ -184,8 +201,13 @@ func (s *Selection) PrependSelection(sel *Selection) *Selection {
}
// PrependHtml parses the html and prepends it to the set of matched elements.
-func (s *Selection) PrependHtml(html string) *Selection {
- return s.PrependNodes(parseHtml(html)...)
+func (s *Selection) PrependHtml(htmlStr string) *Selection {
+ return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) {
+ firstChild := node.FirstChild
+ for _, n := range nodes {
+ node.InsertBefore(n, firstChild)
+ }
+ })
}
// PrependNodes prepends the specified nodes to each node in the set of
@@ -212,14 +234,19 @@ func (s *Selection) Remove() *Selection {
return s
}
-// RemoveFiltered removes the set of matched elements by selector.
-// It returns the Selection of removed nodes.
+// RemoveFiltered removes from the current set of matched elements those that
+// match the selector filter. It returns the Selection of removed nodes.
+//
+// For example if the selection s contains "<h1>", "<h2>" and "<h3>"
+// and s.RemoveFiltered("h2") is called, only the "<h2>" node is removed
+// (and returned), while "<h1>" and "<h3>" are kept in the document.
func (s *Selection) RemoveFiltered(selector string) *Selection {
return s.RemoveMatcher(compileMatcher(selector))
}
-// RemoveMatcher removes the set of matched elements.
-// It returns the Selection of removed nodes.
+// RemoveMatcher removes from the current set of matched elements those that
+// match the Matcher filter. It returns the Selection of removed nodes.
+// See RemoveFiltered for additional information.
func (s *Selection) RemoveMatcher(m Matcher) *Selection {
return s.FilterMatcher(m).Remove()
}
@@ -256,8 +283,16 @@ func (s *Selection) ReplaceWithSelection(sel *Selection) *Selection {
// It returns the removed elements.
//
// This follows the same rules as Selection.Append.
-func (s *Selection) ReplaceWithHtml(html string) *Selection {
- return s.ReplaceWithNodes(parseHtml(html)...)
+func (s *Selection) ReplaceWithHtml(htmlStr string) *Selection {
+ s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) {
+ nextSibling := node.NextSibling
+ for _, n := range nodes {
+ if node.Parent != nil {
+ node.Parent.InsertBefore(n, nextSibling)
+ }
+ }
+ })
+ return s.Remove()
}
// ReplaceWithNodes replaces each element in the set of matched elements with
@@ -272,8 +307,17 @@ func (s *Selection) ReplaceWithNodes(ns ...*html.Node) *Selection {
// SetHtml sets the html content of each element in the selection to
// specified html string.
-func (s *Selection) SetHtml(html string) *Selection {
- return setHtmlNodes(s, parseHtml(html)...)
+func (s *Selection) SetHtml(htmlStr string) *Selection {
+ for _, context := range s.Nodes {
+ for c := context.FirstChild; c != nil; c = context.FirstChild {
+ context.RemoveChild(c)
+ }
+ }
+ return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) {
+ for _, n := range nodes {
+ node.AppendChild(n)
+ }
+ })
}
// SetText sets the content of each element in the selection to specified content.
@@ -329,8 +373,23 @@ func (s *Selection) WrapSelection(sel *Selection) *Selection {
// most child of the given HTML.
//
// It returns the original set of elements.
-func (s *Selection) WrapHtml(html string) *Selection {
- return s.wrapNodes(parseHtml(html)...)
+func (s *Selection) WrapHtml(htmlStr string) *Selection {
+ nodesMap := make(map[string][]*html.Node)
+ for _, context := range s.Nodes {
+ var parent *html.Node
+ if context.Parent != nil {
+ parent = context.Parent
+ } else {
+ parent = &html.Node{Type: html.ElementNode}
+ }
+ nodes, found := nodesMap[nodeName(parent)]
+ if !found {
+ nodes = parseHtmlWithContext(htmlStr, parent)
+ nodesMap[nodeName(parent)] = nodes
+ }
+ newSingleSelection(context, s.document).wrapAllNodes(cloneNodes(nodes)...)
+ }
+ return s
}
// WrapNode wraps each element in the set of matched elements inside the inner-
@@ -382,8 +441,18 @@ func (s *Selection) WrapAllSelection(sel *Selection) *Selection {
// document.
//
// It returns the original set of elements.
-func (s *Selection) WrapAllHtml(html string) *Selection {
- return s.wrapAllNodes(parseHtml(html)...)
+func (s *Selection) WrapAllHtml(htmlStr string) *Selection {
+ var context *html.Node
+ var nodes []*html.Node
+ if len(s.Nodes) > 0 {
+ context = s.Nodes[0]
+ if context.Parent != nil {
+ nodes = parseHtmlWithContext(htmlStr, context)
+ } else {
+ nodes = parseHtml(htmlStr)
+ }
+ }
+ return s.wrapAllNodes(nodes...)
}
func (s *Selection) wrapAllNodes(ns ...*html.Node) *Selection {
@@ -452,8 +521,17 @@ func (s *Selection) WrapInnerSelection(sel *Selection) *Selection {
// cloned before being inserted into the document.
//
// It returns the original set of elements.
-func (s *Selection) WrapInnerHtml(html string) *Selection {
- return s.wrapInnerNodes(parseHtml(html)...)
+func (s *Selection) WrapInnerHtml(htmlStr string) *Selection {
+ nodesMap := make(map[string][]*html.Node)
+ for _, context := range s.Nodes {
+ nodes, found := nodesMap[nodeName(context)]
+ if !found {
+ nodes = parseHtmlWithContext(htmlStr, context)
+ nodesMap[nodeName(context)] = nodes
+ }
+ newSingleSelection(context, s.document).wrapInnerNodes(cloneNodes(nodes)...)
+ }
+ return s
}
// WrapInnerNode wraps an HTML structure, matched by the given selector, around
@@ -493,16 +571,14 @@ func parseHtml(h string) []*html.Node {
return nodes
}
-func setHtmlNodes(s *Selection, ns ...*html.Node) *Selection {
- for _, n := range s.Nodes {
- for c := n.FirstChild; c != nil; c = n.FirstChild {
- n.RemoveChild(c)
- }
- for _, c := range ns {
- n.AppendChild(cloneNode(c))
- }
+func parseHtmlWithContext(h string, context *html.Node) []*html.Node {
+ // Errors are only returned when the io.Reader returns any error besides
+ // EOF, but strings.Reader never will
+ nodes, err := html.ParseFragment(strings.NewReader(h), context)
+ if err != nil {
+ panic("goquery: failed to parse HTML: " + err.Error())
}
- return s
+ return nodes
}
// Get the first child that is an ElementNode
@@ -572,3 +648,32 @@ func (s *Selection) manipulateNodes(ns []*html.Node, reverse bool,
return s
}
+
+// eachNodeHtml parses the given html string and inserts the resulting nodes in the dom with the mergeFn.
+// The parsed nodes are inserted for each element of the selection.
+// isParent can be used to indicate that the elements of the selection should be treated as the parent for the parsed html.
+// A cache is used to avoid parsing the html multiple times should the elements of the selection result in the same context.
+func (s *Selection) eachNodeHtml(htmlStr string, isParent bool, mergeFn func(n *html.Node, nodes []*html.Node)) *Selection {
+ // cache to avoid parsing the html for the same context multiple times
+ nodeCache := make(map[string][]*html.Node)
+ var context *html.Node
+ for _, n := range s.Nodes {
+ if isParent {
+ context = n.Parent
+ } else {
+ if n.Type != html.ElementNode {
+ continue
+ }
+ context = n
+ }
+ if context != nil {
+ nodes, found := nodeCache[nodeName(context)]
+ if !found {
+ nodes = parseHtmlWithContext(htmlStr, context)
+ nodeCache[nodeName(context)] = nodes
+ }
+ mergeFn(n, cloneNodes(nodes))
+ }
+ }
+ return s
+}
diff --git a/vendor/github.com/PuerkitoBio/goquery/type.go b/vendor/github.com/PuerkitoBio/goquery/type.go
index 6ad51db..6646c14 100644
--- a/vendor/github.com/PuerkitoBio/goquery/type.go
+++ b/vendor/github.com/PuerkitoBio/goquery/type.go
@@ -7,7 +7,6 @@ import (
"net/url"
"github.com/andybalholm/cascadia"
-
"golang.org/x/net/html"
)
@@ -122,6 +121,45 @@ type Matcher interface {
Filter([]*html.Node) []*html.Node
}
+// Single compiles a selector string to a Matcher that stops after the first
+// match is found.
+//
+// By default, Selection.Find and other functions that accept a selector string
+// to select nodes will use all matches corresponding to that selector. By
+// using the Matcher returned by Single, at most the first match will be
+// selected.
+//
+// For example, those two statements are semantically equivalent:
+//
+// sel1 := doc.Find("a").First()
+// sel2 := doc.FindMatcher(goquery.Single("a"))
+//
+// The one using Single is optimized to be potentially much faster on large
+// documents.
+//
+// Only the behaviour of the MatchAll method of the Matcher interface is
+// altered compared to standard Matchers. This means that the single-selection
+// property of the Matcher only applies for Selection methods where the Matcher
+// is used to select nodes, not to filter or check if a node matches the
+// Matcher - in those cases, the behaviour of the Matcher is unchanged (e.g.
+// FilterMatcher(Single("div")) will still result in a Selection with multiple
+// "div"s if there were many "div"s in the Selection to begin with).
+func Single(selector string) Matcher {
+ return singleMatcher{compileMatcher(selector)}
+}
+
+// SingleMatcher returns a Matcher matches the same nodes as m, but that stops
+// after the first match is found.
+//
+// See the documentation of function Single for more details.
+func SingleMatcher(m Matcher) Matcher {
+ if _, ok := m.(singleMatcher); ok {
+ // m is already a singleMatcher
+ return m
+ }
+ return singleMatcher{m}
+}
+
// compileMatcher compiles the selector string s and returns
// the corresponding Matcher. If s is an invalid selector string,
// it returns a Matcher that fails all matches.
@@ -133,6 +171,30 @@ func compileMatcher(s string) Matcher {
return cs
}
+type singleMatcher struct {
+ Matcher
+}
+
+func (m singleMatcher) MatchAll(n *html.Node) []*html.Node {
+ // Optimized version - stops finding at the first match (cascadia-compiled
+ // matchers all use this code path).
+ if mm, ok := m.Matcher.(interface{ MatchFirst(*html.Node) *html.Node }); ok {
+ node := mm.MatchFirst(n)
+ if node == nil {
+ return nil
+ }
+ return []*html.Node{node}
+ }
+
+ // Fallback version, for e.g. test mocks that don't provide the MatchFirst
+ // method.
+ nodes := m.Matcher.MatchAll(n)
+ if len(nodes) > 0 {
+ return nodes[:1:1]
+ }
+ return nil
+}
+
// invalidMatcher is a Matcher that always fails to match.
type invalidMatcher struct{}
diff --git a/vendor/github.com/PuerkitoBio/goquery/utilities.go b/vendor/github.com/PuerkitoBio/goquery/utilities.go
index b4c061a..3e11b1d 100644
--- a/vendor/github.com/PuerkitoBio/goquery/utilities.go
+++ b/vendor/github.com/PuerkitoBio/goquery/utilities.go
@@ -36,12 +36,22 @@ func NodeName(s *Selection) string {
if s.Length() == 0 {
return ""
}
- switch n := s.Get(0); n.Type {
+ return nodeName(s.Get(0))
+}
+
+// nodeName returns the node name of the given html node.
+// See NodeName for additional details on behaviour.
+func nodeName(node *html.Node) string {
+ if node == nil {
+ return ""
+ }
+
+ switch node.Type {
case html.ElementNode, html.DoctypeNode:
- return n.Data
+ return node.Data
default:
- if n.Type >= 0 && int(n.Type) < len(nodeNames) {
- return nodeNames[n.Type]
+ if node.Type >= 0 && int(node.Type) < len(nodeNames) {
+ return nodeNames[node.Type]
}
return ""
}
diff --git a/vendor/github.com/andybalholm/cascadia/README.md b/vendor/github.com/andybalholm/cascadia/README.md
index 9021cb9..26f4c37 100644
--- a/vendor/github.com/andybalholm/cascadia/README.md
+++ b/vendor/github.com/andybalholm/cascadia/README.md
@@ -5,3 +5,5 @@
The Cascadia package implements CSS selectors for use with the parse trees produced by the html package.
To test CSS selectors without writing Go code, check out [cascadia](https://github.com/suntong/cascadia) the command line tool, a thin wrapper around this package.
+
+[Refer to godoc here](https://godoc.org/github.com/andybalholm/cascadia).
diff --git a/vendor/github.com/andybalholm/cascadia/go.mod b/vendor/github.com/andybalholm/cascadia/go.mod
index e6febbb..51a330b 100644
--- a/vendor/github.com/andybalholm/cascadia/go.mod
+++ b/vendor/github.com/andybalholm/cascadia/go.mod
@@ -1,3 +1,5 @@
-module "github.com/andybalholm/cascadia"
+module github.com/andybalholm/cascadia
-require "golang.org/x/net" v0.0.0-20180218175443-cbe0f9307d01
+require golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01
+
+go 1.13
diff --git a/vendor/github.com/andybalholm/cascadia/parser.go b/vendor/github.com/andybalholm/cascadia/parser.go
index 495db9c..c40a39f 100644
--- a/vendor/github.com/andybalholm/cascadia/parser.go
+++ b/vendor/github.com/andybalholm/cascadia/parser.go
@@ -7,14 +7,16 @@ import (
"regexp"
"strconv"
"strings"
-
- "golang.org/x/net/html"
)
// a parser for CSS selectors
type parser struct {
s string // the source text
i int // the current position
+
+ // if `false`, parsing a pseudo-element
+ // returns an error.
+ acceptPseudoElements bool
}
// parseEscape parses a backslash escape.
@@ -31,7 +33,7 @@ func (p *parser) parseEscape() (result string, err error) {
case hexDigit(c):
// unicode escape (hex)
var i int
- for i = start; i < p.i+6 && i < len(p.s) && hexDigit(p.s[i]); i++ {
+ for i = start; i < start+6 && i < len(p.s) && hexDigit(p.s[i]); i++ {
// empty
}
v, _ := strconv.ParseUint(p.s[start:i], 16, 21)
@@ -56,6 +58,26 @@ func (p *parser) parseEscape() (result string, err error) {
return result, nil
}
+// toLowerASCII returns s with all ASCII capital letters lowercased.
+func toLowerASCII(s string) string {
+ var b []byte
+ for i := 0; i < len(s); i++ {
+ if c := s[i]; 'A' <= c && c <= 'Z' {
+ if b == nil {
+ b = make([]byte, len(s))
+ copy(b, s)
+ }
+ b[i] = s[i] + ('a' - 'A')
+ }
+ }
+
+ if b == nil {
+ return s
+ }
+
+ return string(b)
+}
+
func hexDigit(c byte) bool {
return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
}
@@ -280,92 +302,92 @@ func (p *parser) consumeClosingParenthesis() bool {
}
// parseTypeSelector parses a type selector (one that matches by tag name).
-func (p *parser) parseTypeSelector() (result Selector, err error) {
+func (p *parser) parseTypeSelector() (result tagSelector, err error) {
tag, err := p.parseIdentifier()
if err != nil {
- return nil, err
+ return
}
-
- return typeSelector(tag), nil
+ return tagSelector{tag: toLowerASCII(tag)}, nil
}
// parseIDSelector parses a selector that matches by id attribute.
-func (p *parser) parseIDSelector() (Selector, error) {
+func (p *parser) parseIDSelector() (idSelector, error) {
if p.i >= len(p.s) {
- return nil, fmt.Errorf("expected id selector (#id), found EOF instead")
+ return idSelector{}, fmt.Errorf("expected id selector (#id), found EOF instead")
}
if p.s[p.i] != '#' {
- return nil, fmt.Errorf("expected id selector (#id), found '%c' instead", p.s[p.i])
+ return idSelector{}, fmt.Errorf("expected id selector (#id), found '%c' instead", p.s[p.i])
}
p.i++
id, err := p.parseName()
if err != nil {
- return nil, err
+ return idSelector{}, err
}
- return attributeEqualsSelector("id", id), nil
+ return idSelector{id: id}, nil
}
// parseClassSelector parses a selector that matches by class attribute.
-func (p *parser) parseClassSelector() (Selector, error) {
+func (p *parser) parseClassSelector() (classSelector, error) {
if p.i >= len(p.s) {
- return nil, fmt.Errorf("expected class selector (.class), found EOF instead")
+ return classSelector{}, fmt.Errorf("expected class selector (.class), found EOF instead")
}
if p.s[p.i] != '.' {
- return nil, fmt.Errorf("expected class selector (.class), found '%c' instead", p.s[p.i])
+ return classSelector{}, fmt.Errorf("expected class selector (.class), found '%c' instead", p.s[p.i])
}
p.i++
class, err := p.parseIdentifier()
if err != nil {
- return nil, err
+ return classSelector{}, err
}
- return attributeIncludesSelector("class", class), nil
+ return classSelector{class: class}, nil
}
// parseAttributeSelector parses a selector that matches by attribute value.
-func (p *parser) parseAttributeSelector() (Selector, error) {
+func (p *parser) parseAttributeSelector() (attrSelector, error) {
if p.i >= len(p.s) {
- return nil, fmt.Errorf("expected attribute selector ([attribute]), found EOF instead")
+ return attrSelector{}, fmt.Errorf("expected attribute selector ([attribute]), found EOF instead")
}
if p.s[p.i] != '[' {
- return nil, fmt.Errorf("expected attribute selector ([attribute]), found '%c' instead", p.s[p.i])
+ return attrSelector{}, fmt.Errorf("expected attribute selector ([attribute]), found '%c' instead", p.s[p.i])
}
p.i++
p.skipWhitespace()
key, err := p.parseIdentifier()
if err != nil {
- return nil, err
+ return attrSelector{}, err
}
+ key = toLowerASCII(key)
p.skipWhitespace()
if p.i >= len(p.s) {
- return nil, errors.New("unexpected EOF in attribute selector")
+ return attrSelector{}, errors.New("unexpected EOF in attribute selector")
}
if p.s[p.i] == ']' {
p.i++
- return attributeExistsSelector(key), nil
+ return attrSelector{key: key, operation: ""}, nil
}
if p.i+2 >= len(p.s) {
- return nil, errors.New("unexpected EOF in attribute selector")
+ return attrSelector{}, errors.New("unexpected EOF in attribute selector")
}
op := p.s[p.i : p.i+2]
if op[0] == '=' {
op = "="
} else if op[1] != '=' {
- return nil, fmt.Errorf(`expected equality operator, found "%s" instead`, op)
+ return attrSelector{}, fmt.Errorf(`expected equality operator, found "%s" instead`, op)
}
p.i += len(op)
p.skipWhitespace()
if p.i >= len(p.s) {
- return nil, errors.New("unexpected EOF in attribute selector")
+ return attrSelector{}, errors.New("unexpected EOF in attribute selector")
}
var val string
var rx *regexp.Regexp
@@ -380,88 +402,84 @@ func (p *parser) parseAttributeSelector() (Selector, error) {
}
}
if err != nil {
- return nil, err
+ return attrSelector{}, err
}
p.skipWhitespace()
if p.i >= len(p.s) {
- return nil, errors.New("unexpected EOF in attribute selector")
+ return attrSelector{}, errors.New("unexpected EOF in attribute selector")
}
if p.s[p.i] != ']' {
- return nil, fmt.Errorf("expected ']', found '%c' instead", p.s[p.i])
+ return attrSelector{}, fmt.Errorf("expected ']', found '%c' instead", p.s[p.i])
}
p.i++
switch op {
- case "=":
- return attributeEqualsSelector(key, val), nil
- case "!=":
- return attributeNotEqualSelector(key, val), nil
- case "~=":
- return attributeIncludesSelector(key, val), nil
- case "|=":
- return attributeDashmatchSelector(key, val), nil
- case "^=":
- return attributePrefixSelector(key, val), nil
- case "$=":
- return attributeSuffixSelector(key, val), nil
- case "*=":
- return attributeSubstringSelector(key, val), nil
- case "#=":
- return attributeRegexSelector(key, rx), nil
- }
-
- return nil, fmt.Errorf("attribute operator %q is not supported", op)
+ case "=", "!=", "~=", "|=", "^=", "$=", "*=", "#=":
+ return attrSelector{key: key, val: val, operation: op, regexp: rx}, nil
+ default:
+ return attrSelector{}, fmt.Errorf("attribute operator %q is not supported", op)
+ }
}
var errExpectedParenthesis = errors.New("expected '(' but didn't find it")
var errExpectedClosingParenthesis = errors.New("expected ')' but didn't find it")
var errUnmatchedParenthesis = errors.New("unmatched '('")
-// parsePseudoclassSelector parses a pseudoclass selector like :not(p).
-func (p *parser) parsePseudoclassSelector() (Selector, error) {
+// parsePseudoclassSelector parses a pseudoclass selector like :not(p) or a pseudo-element
+// For backwards compatibility, both ':' and '::' prefix are allowed for pseudo-elements.
+// https://drafts.csswg.org/selectors-3/#pseudo-elements
+// Returning a nil `Sel` (and a nil `error`) means we found a pseudo-element.
+func (p *parser) parsePseudoclassSelector() (out Sel, pseudoElement string, err error) {
if p.i >= len(p.s) {
- return nil, fmt.Errorf("expected pseudoclass selector (:pseudoclass), found EOF instead")
+ return nil, "", fmt.Errorf("expected pseudoclass selector (:pseudoclass), found EOF instead")
}
if p.s[p.i] != ':' {
- return nil, fmt.Errorf("expected attribute selector (:pseudoclass), found '%c' instead", p.s[p.i])
+ return nil, "", fmt.Errorf("expected attribute selector (:pseudoclass), found '%c' instead", p.s[p.i])
}
p.i++
+ var mustBePseudoElement bool
+ if p.i >= len(p.s) {
+ return nil, "", fmt.Errorf("got empty pseudoclass (or pseudoelement)")
+ }
+ if p.s[p.i] == ':' { // we found a pseudo-element
+ mustBePseudoElement = true
+ p.i++
+ }
+
name, err := p.parseIdentifier()
if err != nil {
- return nil, err
+ return
}
name = toLowerASCII(name)
+ if mustBePseudoElement && (name != "after" && name != "backdrop" && name != "before" &&
+ name != "cue" && name != "first-letter" && name != "first-line" && name != "grammar-error" &&
+ name != "marker" && name != "placeholder" && name != "selection" && name != "spelling-error") {
+ return out, "", fmt.Errorf("unknown pseudoelement :%s", name)
+ }
switch name {
case "not", "has", "haschild":
if !p.consumeParenthesis() {
- return nil, errExpectedParenthesis
+ return out, "", errExpectedParenthesis
}
sel, parseErr := p.parseSelectorGroup()
if parseErr != nil {
- return nil, parseErr
+ return out, "", parseErr
}
if !p.consumeClosingParenthesis() {
- return nil, errExpectedClosingParenthesis
+ return out, "", errExpectedClosingParenthesis
}
- switch name {
- case "not":
- return negatedSelector(sel), nil
- case "has":
- return hasDescendantSelector(sel), nil
- case "haschild":
- return hasChildSelector(sel), nil
- }
+ out = relativePseudoClassSelector{name: name, match: sel}
case "contains", "containsown":
if !p.consumeParenthesis() {
- return nil, errExpectedParenthesis
+ return out, "", errExpectedParenthesis
}
if p.i == len(p.s) {
- return nil, errUnmatchedParenthesis
+ return out, "", errUnmatchedParenthesis
}
var val string
switch p.s[p.i] {
@@ -471,95 +489,75 @@ func (p *parser) parsePseudoclassSelector() (Selector, error) {
val, err = p.parseIdentifier()
}
if err != nil {
- return nil, err
+ return out, "", err
}
val = strings.ToLower(val)
p.skipWhitespace()
if p.i >= len(p.s) {
- return nil, errors.New("unexpected EOF in pseudo selector")
+ return out, "", errors.New("unexpected EOF in pseudo selector")
}
if !p.consumeClosingParenthesis() {
- return nil, errExpectedClosingParenthesis
+ return out, "", errExpectedClosingParenthesis
}
- switch name {
- case "contains":
- return textSubstrSelector(val), nil
- case "containsown":
- return ownTextSubstrSelector(val), nil
- }
+ out = containsPseudoClassSelector{own: name == "containsown", value: val}
case "matches", "matchesown":
if !p.consumeParenthesis() {
- return nil, errExpectedParenthesis
+ return out, "", errExpectedParenthesis
}
rx, err := p.parseRegex()
if err != nil {
- return nil, err
+ return out, "", err
}
if p.i >= len(p.s) {
- return nil, errors.New("unexpected EOF in pseudo selector")
+ return out, "", errors.New("unexpected EOF in pseudo selector")
}
if !p.consumeClosingParenthesis() {
- return nil, errExpectedClosingParenthesis
+ return out, "", errExpectedClosingParenthesis
}
- switch name {
- case "matches":
- return textRegexSelector(rx), nil
- case "matchesown":
- return ownTextRegexSelector(rx), nil
- }
+ out = regexpPseudoClassSelector{own: name == "matchesown", regexp: rx}
case "nth-child", "nth-last-child", "nth-of-type", "nth-last-of-type":
if !p.consumeParenthesis() {
- return nil, errExpectedParenthesis
+ return out, "", errExpectedParenthesis
}
a, b, err := p.parseNth()
if err != nil {
- return nil, err
+ return out, "", err
}
if !p.consumeClosingParenthesis() {
- return nil, errExpectedClosingParenthesis
- }
- if a == 0 {
- switch name {
- case "nth-child":
- return simpleNthChildSelector(b, false), nil
- case "nth-of-type":
- return simpleNthChildSelector(b, true), nil
- case "nth-last-child":
- return simpleNthLastChildSelector(b, false), nil
- case "nth-last-of-type":
- return simpleNthLastChildSelector(b, true), nil
- }
+ return out, "", errExpectedClosingParenthesis
}
- return nthChildSelector(a, b,
- name == "nth-last-child" || name == "nth-last-of-type",
- name == "nth-of-type" || name == "nth-last-of-type"),
- nil
+ last := name == "nth-last-child" || name == "nth-last-of-type"
+ ofType := name == "nth-of-type" || name == "nth-last-of-type"
+ out = nthPseudoClassSelector{a: a, b: b, last: last, ofType: ofType}
case "first-child":
- return simpleNthChildSelector(1, false), nil
+ out = nthPseudoClassSelector{a: 0, b: 1, ofType: false, last: false}
case "last-child":
- return simpleNthLastChildSelector(1, false), nil
+ out = nthPseudoClassSelector{a: 0, b: 1, ofType: false, last: true}
case "first-of-type":
- return simpleNthChildSelector(1, true), nil
+ out = nthPseudoClassSelector{a: 0, b: 1, ofType: true, last: false}
case "last-of-type":
- return simpleNthLastChildSelector(1, true), nil
+ out = nthPseudoClassSelector{a: 0, b: 1, ofType: true, last: true}
case "only-child":
- return onlyChildSelector(false), nil
+ out = onlyChildPseudoClassSelector{ofType: false}
case "only-of-type":
- return onlyChildSelector(true), nil
+ out = onlyChildPseudoClassSelector{ofType: true}
case "input":
- return inputSelector, nil
+ out = inputPseudoClassSelector{}
case "empty":
- return emptyElementSelector, nil
+ out = emptyElementPseudoClassSelector{}
case "root":
- return rootSelector, nil
+ out = rootPseudoClassSelector{}
+ case "after", "backdrop", "before", "cue", "first-letter", "first-line", "grammar-error", "marker", "placeholder", "selection", "spelling-error":
+ return nil, name, nil
+ default:
+ return out, "", fmt.Errorf("unknown pseudoclass or pseudoelement :%s", name)
}
-
- return nil, fmt.Errorf("unknown pseudoclass :%s", name)
+ return
}
// parseInteger parses a decimal integer.
@@ -705,8 +703,8 @@ invalid:
// parseSimpleSelectorSequence parses a selector sequence that applies to
// a single element.
-func (p *parser) parseSimpleSelectorSequence() (Selector, error) {
- var result Selector
+func (p *parser) parseSimpleSelectorSequence() (Sel, error) {
+ var selectors []Sel
if p.i >= len(p.s) {
return nil, errors.New("expected selector, found EOF instead")
@@ -723,13 +721,17 @@ func (p *parser) parseSimpleSelectorSequence() (Selector, error) {
if err != nil {
return nil, err
}
- result = r
+ selectors = append(selectors, r)
}
+ var pseudoElement string
loop:
for p.i < len(p.s) {
- var ns Selector
- var err error
+ var (
+ ns Sel
+ newPseudoElement string
+ err error
+ )
switch p.s[p.i] {
case '#':
ns, err = p.parseIDSelector()
@@ -738,44 +740,57 @@ loop:
case '[':
ns, err = p.parseAttributeSelector()
case ':':
- ns, err = p.parsePseudoclassSelector()
+ ns, newPseudoElement, err = p.parsePseudoclassSelector()
default:
break loop
}
if err != nil {
return nil, err
}
- if result == nil {
- result = ns
+ // From https://drafts.csswg.org/selectors-3/#pseudo-elements :
+ // "Only one pseudo-element may appear per selector, and if present
+ // it must appear after the sequence of simple selectors that
+ // represents the subjects of the selector.""
+ if ns == nil { // we found a pseudo-element
+ if pseudoElement != "" {
+ return nil, fmt.Errorf("only one pseudo-element is accepted per selector, got %s and %s", pseudoElement, newPseudoElement)
+ }
+ if !p.acceptPseudoElements {
+ return nil, fmt.Errorf("pseudo-element %s found, but pseudo-elements support is disabled", newPseudoElement)
+ }
+ pseudoElement = newPseudoElement
} else {
- result = intersectionSelector(result, ns)
+ if pseudoElement != "" {
+ return nil, fmt.Errorf("pseudo-element %s must be at the end of selector", pseudoElement)
+ }
+ selectors = append(selectors, ns)
}
- }
- if result == nil {
- result = func(n *html.Node) bool {
- return n.Type == html.ElementNode
- }
}
-
- return result, nil
+ if len(selectors) == 1 && pseudoElement == "" { // no need wrap the selectors in compoundSelector
+ return selectors[0], nil
+ }
+ return compoundSelector{selectors: selectors, pseudoElement: pseudoElement}, nil
}
// parseSelector parses a selector that may include combinators.
-func (p *parser) parseSelector() (result Selector, err error) {
+func (p *parser) parseSelector() (Sel, error) {
p.skipWhitespace()
- result, err = p.parseSimpleSelectorSequence()
+ result, err := p.parseSimpleSelectorSequence()
if err != nil {
- return
+ return nil, err
}
for {
- var combinator byte
+ var (
+ combinator byte
+ c Sel
+ )
if p.skipWhitespace() {
combinator = ' '
}
if p.i >= len(p.s) {
- return
+ return result, nil
}
switch p.s[p.i] {
@@ -785,51 +800,39 @@ func (p *parser) parseSelector() (result Selector, err error) {
p.skipWhitespace()
case ',', ')':
// These characters can't begin a selector, but they can legally occur after one.
- return
+ return result, nil
}
if combinator == 0 {
- return
+ return result, nil
}
- c, err := p.parseSimpleSelectorSequence()
+ c, err = p.parseSimpleSelectorSequence()
if err != nil {
return nil, err
}
-
- switch combinator {
- case ' ':
- result = descendantSelector(result, c)
- case '>':
- result = childSelector(result, c)
- case '+':
- result = siblingSelector(result, c, true)
- case '~':
- result = siblingSelector(result, c, false)
- }
+ result = combinedSelector{first: result, combinator: combinator, second: c}
}
-
- panic("unreachable")
}
// parseSelectorGroup parses a group of selectors, separated by commas.
-func (p *parser) parseSelectorGroup() (result Selector, err error) {
- result, err = p.parseSelector()
+func (p *parser) parseSelectorGroup() (SelectorGroup, error) {
+ current, err := p.parseSelector()
if err != nil {
- return
+ return nil, err
}
+ result := SelectorGroup{current}
for p.i < len(p.s) {
if p.s[p.i] != ',' {
- return result, nil
+ break
}
p.i++
c, err := p.parseSelector()
if err != nil {
return nil, err
}
- result = unionSelector(result, c)
+ result = append(result, c)
}
-
- return
+ return result, nil
}
diff --git a/vendor/github.com/andybalholm/cascadia/selector.go b/vendor/github.com/andybalholm/cascadia/selector.go
index 9fb05cc..e2a6dc4 100644
--- a/vendor/github.com/andybalholm/cascadia/selector.go
+++ b/vendor/github.com/andybalholm/cascadia/selector.go
@@ -9,36 +9,60 @@ import (
"golang.org/x/net/html"
)
-// the Selector type, and functions for creating them
+// Matcher is the interface for basic selector functionality.
+// Match returns whether a selector matches n.
+type Matcher interface {
+ Match(n *html.Node) bool
+}
-// A Selector is a function which tells whether a node matches or not.
-type Selector func(*html.Node) bool
+// Sel is the interface for all the functionality provided by selectors.
+type Sel interface {
+ Matcher
+ Specificity() Specificity
-// hasChildMatch returns whether n has any child that matches a.
-func hasChildMatch(n *html.Node, a Selector) bool {
- for c := n.FirstChild; c != nil; c = c.NextSibling {
- if a(c) {
- return true
- }
+ // Returns a CSS input compiling to this selector.
+ String() string
+
+ // Returns a pseudo-element, or an empty string.
+ PseudoElement() string
+}
+
+// Parse parses a selector. Use `ParseWithPseudoElement`
+// if you need support for pseudo-elements.
+func Parse(sel string) (Sel, error) {
+ p := &parser{s: sel}
+ compiled, err := p.parseSelector()
+ if err != nil {
+ return nil, err
}
- return false
+
+ if p.i < len(sel) {
+ return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i)
+ }
+
+ return compiled, nil
}
-// hasDescendantMatch performs a depth-first search of n's descendants,
-// testing whether any of them match a. It returns true as soon as a match is
-// found, or false if no match is found.
-func hasDescendantMatch(n *html.Node, a Selector) bool {
- for c := n.FirstChild; c != nil; c = c.NextSibling {
- if a(c) || (c.Type == html.ElementNode && hasDescendantMatch(c, a)) {
- return true
- }
+// ParseWithPseudoElement parses a single selector,
+// with support for pseudo-element.
+func ParseWithPseudoElement(sel string) (Sel, error) {
+ p := &parser{s: sel, acceptPseudoElements: true}
+ compiled, err := p.parseSelector()
+ if err != nil {
+ return nil, err
}
- return false
+
+ if p.i < len(sel) {
+ return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i)
+ }
+
+ return compiled, nil
}
-// Compile parses a selector and returns, if successful, a Selector object
-// that can be used to match against html.Node objects.
-func Compile(sel string) (Selector, error) {
+// ParseGroup parses a selector, or a group of selectors separated by commas.
+// Use `ParseGroupWithPseudoElements`
+// if you need support for pseudo-elements.
+func ParseGroup(sel string) (SelectorGroup, error) {
p := &parser{s: sel}
compiled, err := p.parseSelectorGroup()
if err != nil {
@@ -52,6 +76,39 @@ func Compile(sel string) (Selector, error) {
return compiled, nil
}
+// ParseGroupWithPseudoElements parses a selector, or a group of selectors separated by commas.
+// It supports pseudo-elements.
+func ParseGroupWithPseudoElements(sel string) (SelectorGroup, error) {
+ p := &parser{s: sel, acceptPseudoElements: true}
+ compiled, err := p.parseSelectorGroup()
+ if err != nil {
+ return nil, err
+ }
+
+ if p.i < len(sel) {
+ return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i)
+ }
+
+ return compiled, nil
+}
+
+// A Selector is a function which tells whether a node matches or not.
+//
+// This type is maintained for compatibility; I recommend using the newer and
+// more idiomatic interfaces Sel and Matcher.
+type Selector func(*html.Node) bool
+
+// Compile parses a selector and returns, if successful, a Selector object
+// that can be used to match against html.Node objects.
+func Compile(sel string) (Selector, error) {
+ compiled, err := ParseGroup(sel)
+ if err != nil {
+ return nil, err
+ }
+
+ return Selector(compiled.Match), nil
+}
+
// MustCompile is like Compile, but panics instead of returning an error.
func MustCompile(sel string) Selector {
compiled, err := Compile(sel)
@@ -79,6 +136,23 @@ func (s Selector) matchAllInto(n *html.Node, storage []*html.Node) []*html.Node
return storage
}
+func queryInto(n *html.Node, m Matcher, storage []*html.Node) []*html.Node {
+ for child := n.FirstChild; child != nil; child = child.NextSibling {
+ if m.Match(child) {
+ storage = append(storage, child)
+ }
+ storage = queryInto(child, m, storage)
+ }
+
+ return storage
+}
+
+// QueryAll returns a slice of all the nodes that match m, from the descendants
+// of n.
+func QueryAll(n *html.Node, m Matcher) []*html.Node {
+ return queryInto(n, m, nil)
+}
+
// Match returns true if the node matches the selector.
func (s Selector) Match(n *html.Node) bool {
return s(n)
@@ -99,6 +173,21 @@ func (s Selector) MatchFirst(n *html.Node) *html.Node {
return nil
}
+// Query returns the first node that matches m, from the descendants of n.
+// If none matches, it returns nil.
+func Query(n *html.Node, m Matcher) *html.Node {
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ if m.Match(c) {
+ return c
+ }
+ if matched := Query(c, m); matched != nil {
+ return matched
+ }
+ }
+
+ return nil
+}
+
// Filter returns the nodes in nodes that match the selector.
func (s Selector) Filter(nodes []*html.Node) (result []*html.Node) {
for _, n := range nodes {
@@ -109,106 +198,148 @@ func (s Selector) Filter(nodes []*html.Node) (result []*html.Node) {
return result
}
-// typeSelector returns a Selector that matches elements with a given tag name.
-func typeSelector(tag string) Selector {
- tag = toLowerASCII(tag)
- return func(n *html.Node) bool {
- return n.Type == html.ElementNode && n.Data == tag
+// Filter returns the nodes that match m.
+func Filter(nodes []*html.Node, m Matcher) (result []*html.Node) {
+ for _, n := range nodes {
+ if m.Match(n) {
+ result = append(result, n)
+ }
}
+ return result
}
-// toLowerASCII returns s with all ASCII capital letters lowercased.
-func toLowerASCII(s string) string {
- var b []byte
- for i := 0; i < len(s); i++ {
- if c := s[i]; 'A' <= c && c <= 'Z' {
- if b == nil {
- b = make([]byte, len(s))
- copy(b, s)
- }
- b[i] = s[i] + ('a' - 'A')
- }
- }
+type tagSelector struct {
+ tag string
+}
- if b == nil {
- return s
- }
+// Matches elements with a given tag name.
+func (t tagSelector) Match(n *html.Node) bool {
+ return n.Type == html.ElementNode && n.Data == t.tag
+}
- return string(b)
+func (c tagSelector) Specificity() Specificity {
+ return Specificity{0, 0, 1}
}
-// attributeSelector returns a Selector that matches elements
-// where the attribute named key satisifes the function f.
-func attributeSelector(key string, f func(string) bool) Selector {
- key = toLowerASCII(key)
- return func(n *html.Node) bool {
- if n.Type != html.ElementNode {
- return false
- }
- for _, a := range n.Attr {
- if a.Key == key && f(a.Val) {
- return true
- }
- }
- return false
- }
+func (c tagSelector) PseudoElement() string {
+ return ""
}
-// attributeExistsSelector returns a Selector that matches elements that have
-// an attribute named key.
-func attributeExistsSelector(key string) Selector {
- return attributeSelector(key, func(string) bool { return true })
+type classSelector struct {
+ class string
}
-// attributeEqualsSelector returns a Selector that matches elements where
-// the attribute named key has the value val.
-func attributeEqualsSelector(key, val string) Selector {
- return attributeSelector(key,
- func(s string) bool {
- return s == val
- })
+// Matches elements by class attribute.
+func (t classSelector) Match(n *html.Node) bool {
+ return matchAttribute(n, "class", func(s string) bool {
+ return matchInclude(t.class, s)
+ })
+}
+
+func (c classSelector) Specificity() Specificity {
+ return Specificity{0, 1, 0}
+}
+
+func (c classSelector) PseudoElement() string {
+ return ""
+}
+
+type idSelector struct {
+ id string
+}
+
+// Matches elements by id attribute.
+func (t idSelector) Match(n *html.Node) bool {
+ return matchAttribute(n, "id", func(s string) bool {
+ return s == t.id
+ })
+}
+
+func (c idSelector) Specificity() Specificity {
+ return Specificity{1, 0, 0}
+}
+
+func (c idSelector) PseudoElement() string {
+ return ""
+}
+
+type attrSelector struct {
+ key, val, operation string
+ regexp *regexp.Regexp
+}
+
+// Matches elements by attribute value.
+func (t attrSelector) Match(n *html.Node) bool {
+ switch t.operation {
+ case "":
+ return matchAttribute(n, t.key, func(string) bool { return true })
+ case "=":
+ return matchAttribute(n, t.key, func(s string) bool { return s == t.val })
+ case "!=":
+ return attributeNotEqualMatch(t.key, t.val, n)
+ case "~=":
+ // matches elements where the attribute named key is a whitespace-separated list that includes val.
+ return matchAttribute(n, t.key, func(s string) bool { return matchInclude(t.val, s) })
+ case "|=":
+ return attributeDashMatch(t.key, t.val, n)
+ case "^=":
+ return attributePrefixMatch(t.key, t.val, n)
+ case "$=":
+ return attributeSuffixMatch(t.key, t.val, n)
+ case "*=":
+ return attributeSubstringMatch(t.key, t.val, n)
+ case "#=":
+ return attributeRegexMatch(t.key, t.regexp, n)
+ default:
+ panic(fmt.Sprintf("unsuported operation : %s", t.operation))
+ }
}
-// attributeNotEqualSelector returns a Selector that matches elements where
+// matches elements where the attribute named key satisifes the function f.
+func matchAttribute(n *html.Node, key string, f func(string) bool) bool {
+ if n.Type != html.ElementNode {
+ return false
+ }
+ for _, a := range n.Attr {
+ if a.Key == key && f(a.Val) {
+ return true
+ }
+ }
+ return false
+}
+
+// attributeNotEqualMatch matches elements where
// the attribute named key does not have the value val.
-func attributeNotEqualSelector(key, val string) Selector {
- key = toLowerASCII(key)
- return func(n *html.Node) bool {
- if n.Type != html.ElementNode {
+func attributeNotEqualMatch(key, val string, n *html.Node) bool {
+ if n.Type != html.ElementNode {
+ return false
+ }
+ for _, a := range n.Attr {
+ if a.Key == key && a.Val == val {
return false
}
- for _, a := range n.Attr {
- if a.Key == key && a.Val == val {
- return false
- }
- }
- return true
}
+ return true
}
-// attributeIncludesSelector returns a Selector that matches elements where
-// the attribute named key is a whitespace-separated list that includes val.
-func attributeIncludesSelector(key, val string) Selector {
- return attributeSelector(key,
- func(s string) bool {
- for s != "" {
- i := strings.IndexAny(s, " \t\r\n\f")
- if i == -1 {
- return s == val
- }
- if s[:i] == val {
- return true
- }
- s = s[i+1:]
- }
- return false
- })
+// returns true if s is a whitespace-separated list that includes val.
+func matchInclude(val, s string) bool {
+ for s != "" {
+ i := strings.IndexAny(s, " \t\r\n\f")
+ if i == -1 {
+ return s == val
+ }
+ if s[:i] == val {
+ return true
+ }
+ s = s[i+1:]
+ }
+ return false
}
-// attributeDashmatchSelector returns a Selector that matches elements where
-// the attribute named key equals val or starts with val plus a hyphen.
-func attributeDashmatchSelector(key, val string) Selector {
- return attributeSelector(key,
+// matches elements where the attribute named key equals val or starts with val plus a hyphen.
+func attributeDashMatch(key, val string, n *html.Node) bool {
+ return matchAttribute(n, key,
func(s string) bool {
if s == val {
return true
@@ -223,10 +354,10 @@ func attributeDashmatchSelector(key, val string) Selector {
})
}
-// attributePrefixSelector returns a Selector that matches elements where
+// attributePrefixMatch returns a Selector that matches elements where
// the attribute named key starts with val.
-func attributePrefixSelector(key, val string) Selector {
- return attributeSelector(key,
+func attributePrefixMatch(key, val string, n *html.Node) bool {
+ return matchAttribute(n, key,
func(s string) bool {
if strings.TrimSpace(s) == "" {
return false
@@ -235,10 +366,10 @@ func attributePrefixSelector(key, val string) Selector {
})
}
-// attributeSuffixSelector returns a Selector that matches elements where
+// attributeSuffixMatch matches elements where
// the attribute named key ends with val.
-func attributeSuffixSelector(key, val string) Selector {
- return attributeSelector(key,
+func attributeSuffixMatch(key, val string, n *html.Node) bool {
+ return matchAttribute(n, key,
func(s string) bool {
if strings.TrimSpace(s) == "" {
return false
@@ -247,10 +378,10 @@ func attributeSuffixSelector(key, val string) Selector {
})
}
-// attributeSubstringSelector returns a Selector that matches nodes where
+// attributeSubstringMatch matches nodes where
// the attribute named key contains val.
-func attributeSubstringSelector(key, val string) Selector {
- return attributeSelector(key,
+func attributeSubstringMatch(key, val string, n *html.Node) bool {
+ return matchAttribute(n, key,
func(s string) bool {
if strings.TrimSpace(s) == "" {
return false
@@ -259,39 +390,130 @@ func attributeSubstringSelector(key, val string) Selector {
})
}
-// attributeRegexSelector returns a Selector that matches nodes where
+// attributeRegexMatch matches nodes where
// the attribute named key matches the regular expression rx
-func attributeRegexSelector(key string, rx *regexp.Regexp) Selector {
- return attributeSelector(key,
+func attributeRegexMatch(key string, rx *regexp.Regexp, n *html.Node) bool {
+ return matchAttribute(n, key,
func(s string) bool {
return rx.MatchString(s)
})
}
-// intersectionSelector returns a selector that matches nodes that match
-// both a and b.
-func intersectionSelector(a, b Selector) Selector {
- return func(n *html.Node) bool {
- return a(n) && b(n)
+func (c attrSelector) Specificity() Specificity {
+ return Specificity{0, 1, 0}
+}
+
+func (c attrSelector) PseudoElement() string {
+ return ""
+}
+
+// ---------------- Pseudo class selectors ----------------
+// we use severals concrete types of pseudo-class selectors
+
+type relativePseudoClassSelector struct {
+ name string // one of "not", "has", "haschild"
+ match SelectorGroup
+}
+
+func (s relativePseudoClassSelector) Match(n *html.Node) bool {
+ if n.Type != html.ElementNode {
+ return false
+ }
+ switch s.name {
+ case "not":
+ // matches elements that do not match a.
+ return !s.match.Match(n)
+ case "has":
+ // matches elements with any descendant that matches a.
+ return hasDescendantMatch(n, s.match)
+ case "haschild":
+ // matches elements with a child that matches a.
+ return hasChildMatch(n, s.match)
+ default:
+ panic(fmt.Sprintf("unsupported relative pseudo class selector : %s", s.name))
}
}
-// unionSelector returns a selector that matches elements that match
-// either a or b.
-func unionSelector(a, b Selector) Selector {
- return func(n *html.Node) bool {
- return a(n) || b(n)
+// hasChildMatch returns whether n has any child that matches a.
+func hasChildMatch(n *html.Node, a Matcher) bool {
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ if a.Match(c) {
+ return true
+ }
}
+ return false
}
-// negatedSelector returns a selector that matches elements that do not match a.
-func negatedSelector(a Selector) Selector {
- return func(n *html.Node) bool {
- if n.Type != html.ElementNode {
- return false
+// hasDescendantMatch performs a depth-first search of n's descendants,
+// testing whether any of them match a. It returns true as soon as a match is
+// found, or false if no match is found.
+func hasDescendantMatch(n *html.Node, a Matcher) bool {
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ if a.Match(c) || (c.Type == html.ElementNode && hasDescendantMatch(c, a)) {
+ return true
}
- return !a(n)
}
+ return false
+}
+
+// Specificity returns the specificity of the most specific selectors
+// in the pseudo-class arguments.
+// See https://www.w3.org/TR/selectors/#specificity-rules
+func (s relativePseudoClassSelector) Specificity() Specificity {
+ var max Specificity
+ for _, sel := range s.match {
+ newSpe := sel.Specificity()
+ if max.Less(newSpe) {
+ max = newSpe
+ }
+ }
+ return max
+}
+
+func (c relativePseudoClassSelector) PseudoElement() string {
+ return ""
+}
+
+type containsPseudoClassSelector struct {
+ own bool
+ value string
+}
+
+func (s containsPseudoClassSelector) Match(n *html.Node) bool {
+ var text string
+ if s.own {
+ // matches nodes that directly contain the given text
+ text = strings.ToLower(nodeOwnText(n))
+ } else {
+ // matches nodes that contain the given text.
+ text = strings.ToLower(nodeText(n))
+ }
+ return strings.Contains(text, s.value)
+}
+
+func (s containsPseudoClassSelector) Specificity() Specificity {
+ return Specificity{0, 1, 0}
+}
+
+func (c containsPseudoClassSelector) PseudoElement() string {
+ return ""
+}
+
+type regexpPseudoClassSelector struct {
+ own bool
+ regexp *regexp.Regexp
+}
+
+func (s regexpPseudoClassSelector) Match(n *html.Node) bool {
+ var text string
+ if s.own {
+ // matches nodes whose text directly matches the specified regular expression
+ text = nodeOwnText(n)
+ } else {
+ // matches nodes whose text matches the specified regular expression
+ text = nodeText(n)
+ }
+ return s.regexp.MatchString(text)
}
// writeNodeText writes the text contained in n and its descendants to b.
@@ -325,221 +547,214 @@ func nodeOwnText(n *html.Node) string {
return b.String()
}
-// textSubstrSelector returns a selector that matches nodes that
-// contain the given text.
-func textSubstrSelector(val string) Selector {
- return func(n *html.Node) bool {
- text := strings.ToLower(nodeText(n))
- return strings.Contains(text, val)
- }
-}
-
-// ownTextSubstrSelector returns a selector that matches nodes that
-// directly contain the given text
-func ownTextSubstrSelector(val string) Selector {
- return func(n *html.Node) bool {
- text := strings.ToLower(nodeOwnText(n))
- return strings.Contains(text, val)
- }
-}
-
-// textRegexSelector returns a selector that matches nodes whose text matches
-// the specified regular expression
-func textRegexSelector(rx *regexp.Regexp) Selector {
- return func(n *html.Node) bool {
- return rx.MatchString(nodeText(n))
- }
+func (s regexpPseudoClassSelector) Specificity() Specificity {
+ return Specificity{0, 1, 0}
}
-// ownTextRegexSelector returns a selector that matches nodes whose text
-// directly matches the specified regular expression
-func ownTextRegexSelector(rx *regexp.Regexp) Selector {
- return func(n *html.Node) bool {
- return rx.MatchString(nodeOwnText(n))
- }
+func (c regexpPseudoClassSelector) PseudoElement() string {
+ return ""
}
-// hasChildSelector returns a selector that matches elements
-// with a child that matches a.
-func hasChildSelector(a Selector) Selector {
- return func(n *html.Node) bool {
- if n.Type != html.ElementNode {
- return false
- }
- return hasChildMatch(n, a)
- }
+type nthPseudoClassSelector struct {
+ a, b int
+ last, ofType bool
}
-// hasDescendantSelector returns a selector that matches elements
-// with any descendant that matches a.
-func hasDescendantSelector(a Selector) Selector {
- return func(n *html.Node) bool {
- if n.Type != html.ElementNode {
- return false
+func (s nthPseudoClassSelector) Match(n *html.Node) bool {
+ if s.a == 0 {
+ if s.last {
+ return simpleNthLastChildMatch(s.b, s.ofType, n)
+ } else {
+ return simpleNthChildMatch(s.b, s.ofType, n)
}
- return hasDescendantMatch(n, a)
}
+ return nthChildMatch(s.a, s.b, s.last, s.ofType, n)
}
-// nthChildSelector returns a selector that implements :nth-child(an+b).
+// nthChildMatch implements :nth-child(an+b).
// If last is true, implements :nth-last-child instead.
// If ofType is true, implements :nth-of-type instead.
-func nthChildSelector(a, b int, last, ofType bool) Selector {
- return func(n *html.Node) bool {
- if n.Type != html.ElementNode {
- return false
- }
+func nthChildMatch(a, b int, last, ofType bool, n *html.Node) bool {
+ if n.Type != html.ElementNode {
+ return false
+ }
- parent := n.Parent
- if parent == nil {
- return false
- }
+ parent := n.Parent
+ if parent == nil {
+ return false
+ }
- if parent.Type == html.DocumentNode {
- return false
- }
+ if parent.Type == html.DocumentNode {
+ return false
+ }
- i := -1
- count := 0
- for c := parent.FirstChild; c != nil; c = c.NextSibling {
- if (c.Type != html.ElementNode) || (ofType && c.Data != n.Data) {
- continue
- }
- count++
- if c == n {
- i = count
- if !last {
- break
- }
- }
+ i := -1
+ count := 0
+ for c := parent.FirstChild; c != nil; c = c.NextSibling {
+ if (c.Type != html.ElementNode) || (ofType && c.Data != n.Data) {
+ continue
}
-
- if i == -1 {
- // This shouldn't happen, since n should always be one of its parent's children.
- return false
+ count++
+ if c == n {
+ i = count
+ if !last {
+ break
+ }
}
+ }
- if last {
- i = count - i + 1
- }
+ if i == -1 {
+ // This shouldn't happen, since n should always be one of its parent's children.
+ return false
+ }
- i -= b
- if a == 0 {
- return i == 0
- }
+ if last {
+ i = count - i + 1
+ }
- return i%a == 0 && i/a >= 0
+ i -= b
+ if a == 0 {
+ return i == 0
}
+
+ return i%a == 0 && i/a >= 0
}
-// simpleNthChildSelector returns a selector that implements :nth-child(b).
+// simpleNthChildMatch implements :nth-child(b).
// If ofType is true, implements :nth-of-type instead.
-func simpleNthChildSelector(b int, ofType bool) Selector {
- return func(n *html.Node) bool {
- if n.Type != html.ElementNode {
- return false
- }
+func simpleNthChildMatch(b int, ofType bool, n *html.Node) bool {
+ if n.Type != html.ElementNode {
+ return false
+ }
- parent := n.Parent
- if parent == nil {
- return false
- }
+ parent := n.Parent
+ if parent == nil {
+ return false
+ }
+
+ if parent.Type == html.DocumentNode {
+ return false
+ }
- if parent.Type == html.DocumentNode {
+ count := 0
+ for c := parent.FirstChild; c != nil; c = c.NextSibling {
+ if c.Type != html.ElementNode || (ofType && c.Data != n.Data) {
+ continue
+ }
+ count++
+ if c == n {
+ return count == b
+ }
+ if count >= b {
return false
}
+ }
+ return false
+}
- count := 0
- for c := parent.FirstChild; c != nil; c = c.NextSibling {
- if c.Type != html.ElementNode || (ofType && c.Data != n.Data) {
- continue
- }
- count++
- if c == n {
- return count == b
- }
- if count >= b {
- return false
- }
- }
+// simpleNthLastChildMatch implements :nth-last-child(b).
+// If ofType is true, implements :nth-last-of-type instead.
+func simpleNthLastChildMatch(b int, ofType bool, n *html.Node) bool {
+ if n.Type != html.ElementNode {
return false
}
-}
-// simpleNthLastChildSelector returns a selector that implements
-// :nth-last-child(b). If ofType is true, implements :nth-last-of-type
-// instead.
-func simpleNthLastChildSelector(b int, ofType bool) Selector {
- return func(n *html.Node) bool {
- if n.Type != html.ElementNode {
- return false
- }
+ parent := n.Parent
+ if parent == nil {
+ return false
+ }
- parent := n.Parent
- if parent == nil {
- return false
- }
+ if parent.Type == html.DocumentNode {
+ return false
+ }
- if parent.Type == html.DocumentNode {
+ count := 0
+ for c := parent.LastChild; c != nil; c = c.PrevSibling {
+ if c.Type != html.ElementNode || (ofType && c.Data != n.Data) {
+ continue
+ }
+ count++
+ if c == n {
+ return count == b
+ }
+ if count >= b {
return false
}
+ }
+ return false
+}
- count := 0
- for c := parent.LastChild; c != nil; c = c.PrevSibling {
- if c.Type != html.ElementNode || (ofType && c.Data != n.Data) {
- continue
- }
- count++
- if c == n {
- return count == b
- }
- if count >= b {
- return false
- }
- }
+// Specificity for nth-child pseudo-class.
+// Does not support a list of selectors
+func (s nthPseudoClassSelector) Specificity() Specificity {
+ return Specificity{0, 1, 0}
+}
+
+func (c nthPseudoClassSelector) PseudoElement() string {
+ return ""
+}
+
+type onlyChildPseudoClassSelector struct {
+ ofType bool
+}
+
+// Match implements :only-child.
+// If `ofType` is true, it implements :only-of-type instead.
+func (s onlyChildPseudoClassSelector) Match(n *html.Node) bool {
+ if n.Type != html.ElementNode {
return false
}
-}
-// onlyChildSelector returns a selector that implements :only-child.
-// If ofType is true, it implements :only-of-type instead.
-func onlyChildSelector(ofType bool) Selector {
- return func(n *html.Node) bool {
- if n.Type != html.ElementNode {
- return false
- }
+ parent := n.Parent
+ if parent == nil {
+ return false
+ }
- parent := n.Parent
- if parent == nil {
- return false
- }
+ if parent.Type == html.DocumentNode {
+ return false
+ }
- if parent.Type == html.DocumentNode {
+ count := 0
+ for c := parent.FirstChild; c != nil; c = c.NextSibling {
+ if (c.Type != html.ElementNode) || (s.ofType && c.Data != n.Data) {
+ continue
+ }
+ count++
+ if count > 1 {
return false
}
+ }
- count := 0
- for c := parent.FirstChild; c != nil; c = c.NextSibling {
- if (c.Type != html.ElementNode) || (ofType && c.Data != n.Data) {
- continue
- }
- count++
- if count > 1 {
- return false
- }
- }
+ return count == 1
+}
- return count == 1
- }
+func (s onlyChildPseudoClassSelector) Specificity() Specificity {
+ return Specificity{0, 1, 0}
}
-// inputSelector is a Selector that matches input, select, textarea and button elements.
-func inputSelector(n *html.Node) bool {
+func (c onlyChildPseudoClassSelector) PseudoElement() string {
+ return ""
+}
+
+type inputPseudoClassSelector struct{}
+
+// Matches input, select, textarea and button elements.
+func (s inputPseudoClassSelector) Match(n *html.Node) bool {
return n.Type == html.ElementNode && (n.Data == "input" || n.Data == "select" || n.Data == "textarea" || n.Data == "button")
}
-// emptyElementSelector is a Selector that matches empty elements.
-func emptyElementSelector(n *html.Node) bool {
+func (s inputPseudoClassSelector) Specificity() Specificity {
+ return Specificity{0, 1, 0}
+}
+
+func (c inputPseudoClassSelector) PseudoElement() string {
+ return ""
+}
+
+type emptyElementPseudoClassSelector struct{}
+
+// Matches empty elements.
+func (s emptyElementPseudoClassSelector) Match(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
@@ -554,69 +769,170 @@ func emptyElementSelector(n *html.Node) bool {
return true
}
-// descendantSelector returns a Selector that matches an element if
-// it matches d and has an ancestor that matches a.
-func descendantSelector(a, d Selector) Selector {
- return func(n *html.Node) bool {
- if !d(n) {
+func (s emptyElementPseudoClassSelector) Specificity() Specificity {
+ return Specificity{0, 1, 0}
+}
+
+func (c emptyElementPseudoClassSelector) PseudoElement() string {
+ return ""
+}
+
+type rootPseudoClassSelector struct{}
+
+// Match implements :root
+func (s rootPseudoClassSelector) Match(n *html.Node) bool {
+ if n.Type != html.ElementNode {
+ return false
+ }
+ if n.Parent == nil {
+ return false
+ }
+ return n.Parent.Type == html.DocumentNode
+}
+
+func (s rootPseudoClassSelector) Specificity() Specificity {
+ return Specificity{0, 1, 0}
+}
+
+func (c rootPseudoClassSelector) PseudoElement() string {
+ return ""
+}
+
+type compoundSelector struct {
+ selectors []Sel
+ pseudoElement string
+}
+
+// Matches elements if each sub-selectors matches.
+func (t compoundSelector) Match(n *html.Node) bool {
+ if len(t.selectors) == 0 {
+ return n.Type == html.ElementNode
+ }
+
+ for _, sel := range t.selectors {
+ if !sel.Match(n) {
return false
}
+ }
+ return true
+}
- for p := n.Parent; p != nil; p = p.Parent {
- if a(p) {
- return true
- }
- }
+func (s compoundSelector) Specificity() Specificity {
+ var out Specificity
+ for _, sel := range s.selectors {
+ out = out.Add(sel.Specificity())
+ }
+ if s.pseudoElement != "" {
+ // https://drafts.csswg.org/selectors-3/#specificity
+ out = out.Add(Specificity{0, 0, 1})
+ }
+ return out
+}
- return false
+func (c compoundSelector) PseudoElement() string {
+ return c.pseudoElement
+}
+
+type combinedSelector struct {
+ first Sel
+ combinator byte
+ second Sel
+}
+
+func (t combinedSelector) Match(n *html.Node) bool {
+ if t.first == nil {
+ return false // maybe we should panic
+ }
+ switch t.combinator {
+ case 0:
+ return t.first.Match(n)
+ case ' ':
+ return descendantMatch(t.first, t.second, n)
+ case '>':
+ return childMatch(t.first, t.second, n)
+ case '+':
+ return siblingMatch(t.first, t.second, true, n)
+ case '~':
+ return siblingMatch(t.first, t.second, false, n)
+ default:
+ panic("unknown combinator")
}
}
-// childSelector returns a Selector that matches an element if
-// it matches d and its parent matches a.
-func childSelector(a, d Selector) Selector {
- return func(n *html.Node) bool {
- return d(n) && n.Parent != nil && a(n.Parent)
+// matches an element if it matches d and has an ancestor that matches a.
+func descendantMatch(a, d Matcher, n *html.Node) bool {
+ if !d.Match(n) {
+ return false
}
+
+ for p := n.Parent; p != nil; p = p.Parent {
+ if a.Match(p) {
+ return true
+ }
+ }
+
+ return false
}
-// siblingSelector returns a Selector that matches an element
-// if it matches s2 and in is preceded by an element that matches s1.
+// matches an element if it matches d and its parent matches a.
+func childMatch(a, d Matcher, n *html.Node) bool {
+ return d.Match(n) && n.Parent != nil && a.Match(n.Parent)
+}
+
+// matches an element if it matches s2 and is preceded by an element that matches s1.
// If adjacent is true, the sibling must be immediately before the element.
-func siblingSelector(s1, s2 Selector, adjacent bool) Selector {
- return func(n *html.Node) bool {
- if !s2(n) {
- return false
- }
+func siblingMatch(s1, s2 Matcher, adjacent bool, n *html.Node) bool {
+ if !s2.Match(n) {
+ return false
+ }
- if adjacent {
- for n = n.PrevSibling; n != nil; n = n.PrevSibling {
- if n.Type == html.TextNode || n.Type == html.CommentNode {
- continue
- }
- return s1(n)
+ if adjacent {
+ for n = n.PrevSibling; n != nil; n = n.PrevSibling {
+ if n.Type == html.TextNode || n.Type == html.CommentNode {
+ continue
}
- return false
+ return s1.Match(n)
}
+ return false
+ }
- // Walk backwards looking for element that matches s1
- for c := n.PrevSibling; c != nil; c = c.PrevSibling {
- if s1(c) {
- return true
- }
+ // Walk backwards looking for element that matches s1
+ for c := n.PrevSibling; c != nil; c = c.PrevSibling {
+ if s1.Match(c) {
+ return true
}
+ }
- return false
+ return false
+}
+
+func (s combinedSelector) Specificity() Specificity {
+ spec := s.first.Specificity()
+ if s.second != nil {
+ spec = spec.Add(s.second.Specificity())
}
+ return spec
}
-// rootSelector implements :root
-func rootSelector(n *html.Node) bool {
- if n.Type != html.ElementNode {
- return false
+// on combinedSelector, a pseudo-element only makes sens on the last
+// selector, although others increase specificity.
+func (c combinedSelector) PseudoElement() string {
+ if c.second == nil {
+ return ""
}
- if n.Parent == nil {
- return false
+ return c.second.PseudoElement()
+}
+
+// A SelectorGroup is a list of selectors, which matches if any of the
+// individual selectors matches.
+type SelectorGroup []Sel
+
+// Match returns true if the node matches one of the single selectors.
+func (s SelectorGroup) Match(n *html.Node) bool {
+ for _, sel := range s {
+ if sel.Match(n) {
+ return true
+ }
}
- return n.Parent.Type == html.DocumentNode
+ return false
}
diff --git a/vendor/github.com/andybalholm/cascadia/serialize.go b/vendor/github.com/andybalholm/cascadia/serialize.go
new file mode 100644
index 0000000..f15b079
--- /dev/null
+++ b/vendor/github.com/andybalholm/cascadia/serialize.go
@@ -0,0 +1,120 @@
+package cascadia
+
+import (
+ "fmt"
+ "strings"
+)
+
+// implements the reverse operation Sel -> string
+
+func (c tagSelector) String() string {
+ return c.tag
+}
+
+func (c idSelector) String() string {
+ return "#" + c.id
+}
+
+func (c classSelector) String() string {
+ return "." + c.class
+}
+
+func (c attrSelector) String() string {
+ val := c.val
+ if c.operation == "#=" {
+ val = c.regexp.String()
+ } else if c.operation != "" {
+ val = fmt.Sprintf(`"%s"`, val)
+ }
+ return fmt.Sprintf(`[%s%s%s]`, c.key, c.operation, val)
+}
+
+func (c relativePseudoClassSelector) String() string {
+ return fmt.Sprintf(":%s(%s)", c.name, c.match.String())
+}
+func (c containsPseudoClassSelector) String() string {
+ s := "contains"
+ if c.own {
+ s += "Own"
+ }
+ return fmt.Sprintf(`:%s("%s")`, s, c.value)
+}
+func (c regexpPseudoClassSelector) String() string {
+ s := "matches"
+ if c.own {
+ s += "Own"
+ }
+ return fmt.Sprintf(":%s(%s)", s, c.regexp.String())
+}
+func (c nthPseudoClassSelector) String() string {
+ if c.a == 0 && c.b == 1 { // special cases
+ s := ":first-"
+ if c.last {
+ s = ":last-"
+ }
+ if c.ofType {
+ s += "of-type"
+ } else {
+ s += "child"
+ }
+ return s
+ }
+ var name string
+ switch [2]bool{c.last, c.ofType} {
+ case [2]bool{true, true}:
+ name = "nth-last-of-type"
+ case [2]bool{true, false}:
+ name = "nth-last-child"
+ case [2]bool{false, true}:
+ name = "nth-of-type"
+ case [2]bool{false, false}:
+ name = "nth-child"
+ }
+ return fmt.Sprintf(":%s(%dn+%d)", name, c.a, c.b)
+}
+func (c onlyChildPseudoClassSelector) String() string {
+ if c.ofType {
+ return ":only-of-type"
+ }
+ return ":only-child"
+}
+func (c inputPseudoClassSelector) String() string {
+ return ":input"
+}
+func (c emptyElementPseudoClassSelector) String() string {
+ return ":empty"
+}
+func (c rootPseudoClassSelector) String() string {
+ return ":root"
+}
+
+func (c compoundSelector) String() string {
+ if len(c.selectors) == 0 && c.pseudoElement == "" {
+ return "*"
+ }
+ chunks := make([]string, len(c.selectors))
+ for i, sel := range c.selectors {
+ chunks[i] = sel.String()
+ }
+ s := strings.Join(chunks, "")
+ if c.pseudoElement != "" {
+ s += "::" + c.pseudoElement
+ }
+ return s
+}
+
+func (c combinedSelector) String() string {
+ start := c.first.String()
+ if c.second != nil {
+ start += fmt.Sprintf(" %s %s", string(c.combinator), c.second.String())
+ }
+ return start
+}
+
+func (c SelectorGroup) String() string {
+ ck := make([]string, len(c))
+ for i, s := range c {
+ ck[i] = s.String()
+ }
+ return strings.Join(ck, ", ")
+}
diff --git a/vendor/github.com/andybalholm/cascadia/specificity.go b/vendor/github.com/andybalholm/cascadia/specificity.go
new file mode 100644
index 0000000..8db864f
--- /dev/null
+++ b/vendor/github.com/andybalholm/cascadia/specificity.go
@@ -0,0 +1,26 @@
+package cascadia
+
+// Specificity is the CSS specificity as defined in
+// https://www.w3.org/TR/selectors/#specificity-rules
+// with the convention Specificity = [A,B,C].
+type Specificity [3]int
+
+// returns `true` if s < other (strictly), false otherwise
+func (s Specificity) Less(other Specificity) bool {
+ for i := range s {
+ if s[i] < other[i] {
+ return true
+ }
+ if s[i] > other[i] {
+ return false
+ }
+ }
+ return false
+}
+
+func (s Specificity) Add(other Specificity) Specificity {
+ for i, sp := range other {
+ s[i] += sp
+ }
+ return s
+}
diff --git a/vendor/golang.org/x/net/html/const.go b/vendor/golang.org/x/net/html/const.go
index a3a918f..ff7acf2 100644
--- a/vendor/golang.org/x/net/html/const.go
+++ b/vendor/golang.org/x/net/html/const.go
@@ -52,8 +52,7 @@ var isSpecialElementMap = map[string]bool{
"iframe": true,
"img": true,
"input": true,
- "isindex": true, // The 'isindex' element has been removed, but keep it for backwards compatibility.
- "keygen": true,
+ "keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility.
"li": true,
"link": true,
"listing": true,
diff --git a/vendor/golang.org/x/net/html/foreign.go b/vendor/golang.org/x/net/html/foreign.go
index 01477a9..9da9e9d 100644
--- a/vendor/golang.org/x/net/html/foreign.go
+++ b/vendor/golang.org/x/net/html/foreign.go
@@ -161,66 +161,62 @@ var mathMLAttributeAdjustments = map[string]string{
}
var svgAttributeAdjustments = map[string]string{
- "attributename": "attributeName",
- "attributetype": "attributeType",
- "basefrequency": "baseFrequency",
- "baseprofile": "baseProfile",
- "calcmode": "calcMode",
- "clippathunits": "clipPathUnits",
- "contentscripttype": "contentScriptType",
- "contentstyletype": "contentStyleType",
- "diffuseconstant": "diffuseConstant",
- "edgemode": "edgeMode",
- "externalresourcesrequired": "externalResourcesRequired",
- "filterres": "filterRes",
- "filterunits": "filterUnits",
- "glyphref": "glyphRef",
- "gradienttransform": "gradientTransform",
- "gradientunits": "gradientUnits",
- "kernelmatrix": "kernelMatrix",
- "kernelunitlength": "kernelUnitLength",
- "keypoints": "keyPoints",
- "keysplines": "keySplines",
- "keytimes": "keyTimes",
- "lengthadjust": "lengthAdjust",
- "limitingconeangle": "limitingConeAngle",
- "markerheight": "markerHeight",
- "markerunits": "markerUnits",
- "markerwidth": "markerWidth",
- "maskcontentunits": "maskContentUnits",
- "maskunits": "maskUnits",
- "numoctaves": "numOctaves",
- "pathlength": "pathLength",
- "patterncontentunits": "patternContentUnits",
- "patterntransform": "patternTransform",
- "patternunits": "patternUnits",
- "pointsatx": "pointsAtX",
- "pointsaty": "pointsAtY",
- "pointsatz": "pointsAtZ",
- "preservealpha": "preserveAlpha",
- "preserveaspectratio": "preserveAspectRatio",
- "primitiveunits": "primitiveUnits",
- "refx": "refX",
- "refy": "refY",
- "repeatcount": "repeatCount",
- "repeatdur": "repeatDur",
- "requiredextensions": "requiredExtensions",
- "requiredfeatures": "requiredFeatures",
- "specularconstant": "specularConstant",
- "specularexponent": "specularExponent",
- "spreadmethod": "spreadMethod",
- "startoffset": "startOffset",
- "stddeviation": "stdDeviation",
- "stitchtiles": "stitchTiles",
- "surfacescale": "surfaceScale",
- "systemlanguage": "systemLanguage",
- "tablevalues": "tableValues",
- "targetx": "targetX",
- "targety": "targetY",
- "textlength": "textLength",
- "viewbox": "viewBox",
- "viewtarget": "viewTarget",
- "xchannelselector": "xChannelSelector",
- "ychannelselector": "yChannelSelector",
- "zoomandpan": "zoomAndPan",
+ "attributename": "attributeName",
+ "attributetype": "attributeType",
+ "basefrequency": "baseFrequency",
+ "baseprofile": "baseProfile",
+ "calcmode": "calcMode",
+ "clippathunits": "clipPathUnits",
+ "diffuseconstant": "diffuseConstant",
+ "edgemode": "edgeMode",
+ "filterunits": "filterUnits",
+ "glyphref": "glyphRef",
+ "gradienttransform": "gradientTransform",
+ "gradientunits": "gradientUnits",
+ "kernelmatrix": "kernelMatrix",
+ "kernelunitlength": "kernelUnitLength",
+ "keypoints": "keyPoints",
+ "keysplines": "keySplines",
+ "keytimes": "keyTimes",
+ "lengthadjust": "lengthAdjust",
+ "limitingconeangle": "limitingConeAngle",
+ "markerheight": "markerHeight",
+ "markerunits": "markerUnits",
+ "markerwidth": "markerWidth",
+ "maskcontentunits": "maskContentUnits",
+ "maskunits": "maskUnits",
+ "numoctaves": "numOctaves",
+ "pathlength": "pathLength",
+ "patterncontentunits": "patternContentUnits",
+ "patterntransform": "patternTransform",
+ "patternunits": "patternUnits",
+ "pointsatx": "pointsAtX",
+ "pointsaty": "pointsAtY",
+ "pointsatz": "pointsAtZ",
+ "preservealpha": "preserveAlpha",
+ "preserveaspectratio": "preserveAspectRatio",
+ "primitiveunits": "primitiveUnits",
+ "refx": "refX",
+ "refy": "refY",
+ "repeatcount": "repeatCount",
+ "repeatdur": "repeatDur",
+ "requiredextensions": "requiredExtensions",
+ "requiredfeatures": "requiredFeatures",
+ "specularconstant": "specularConstant",
+ "specularexponent": "specularExponent",
+ "spreadmethod": "spreadMethod",
+ "startoffset": "startOffset",
+ "stddeviation": "stdDeviation",
+ "stitchtiles": "stitchTiles",
+ "surfacescale": "surfaceScale",
+ "systemlanguage": "systemLanguage",
+ "tablevalues": "tableValues",
+ "targetx": "targetX",
+ "targety": "targetY",
+ "textlength": "textLength",
+ "viewbox": "viewBox",
+ "viewtarget": "viewTarget",
+ "xchannelselector": "xChannelSelector",
+ "ychannelselector": "yChannelSelector",
+ "zoomandpan": "zoomAndPan",
}
diff --git a/vendor/golang.org/x/net/html/node.go b/vendor/golang.org/x/net/html/node.go
index 633ee15..1350eef 100644
--- a/vendor/golang.org/x/net/html/node.go
+++ b/vendor/golang.org/x/net/html/node.go
@@ -18,6 +18,11 @@ const (
ElementNode
CommentNode
DoctypeNode
+ // RawNode nodes are not returned by the parser, but can be part of the
+ // Node tree passed to func Render to insert raw HTML (without escaping).
+ // If so, this package makes no guarantee that the rendered HTML is secure
+ // (from e.g. Cross Site Scripting attacks) or well-formed.
+ RawNode
scopeMarkerNode
)
diff --git a/vendor/golang.org/x/net/html/parse.go b/vendor/golang.org/x/net/html/parse.go
index 992cff2..038941d 100644
--- a/vendor/golang.org/x/net/html/parse.go
+++ b/vendor/golang.org/x/net/html/parse.go
@@ -184,6 +184,17 @@ func (p *parser) clearStackToContext(s scope) {
}
}
+// parseGenericRawTextElements implements the generic raw text element parsing
+// algorithm defined in 12.2.6.2.
+// https://html.spec.whatwg.org/multipage/parsing.html#parsing-elements-that-contain-only-text
+// TODO: Since both RAWTEXT and RCDATA states are treated as tokenizer's part
+// officially, need to make tokenizer consider both states.
+func (p *parser) parseGenericRawTextElement() {
+ p.addElement()
+ p.originalIM = p.im
+ p.im = textIM
+}
+
// generateImpliedEndTags pops nodes off the stack of open elements as long as
// the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc.
// If exceptions are specified, nodes with that name will not be popped off.
@@ -192,16 +203,17 @@ func (p *parser) generateImpliedEndTags(exceptions ...string) {
loop:
for i = len(p.oe) - 1; i >= 0; i-- {
n := p.oe[i]
- if n.Type == ElementNode {
- switch n.DataAtom {
- case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
- for _, except := range exceptions {
- if n.Data == except {
- break loop
- }
+ if n.Type != ElementNode {
+ break
+ }
+ switch n.DataAtom {
+ case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
+ for _, except := range exceptions {
+ if n.Data == except {
+ break loop
}
- continue
}
+ continue
}
break
}
@@ -369,8 +381,7 @@ findIdenticalElements:
// Section 12.2.4.3.
func (p *parser) clearActiveFormattingElements() {
for {
- n := p.afe.pop()
- if len(p.afe) == 0 || n.Type == scopeMarkerNode {
+ if n := p.afe.pop(); len(p.afe) == 0 || n.Type == scopeMarkerNode {
return
}
}
@@ -625,29 +636,51 @@ func inHeadIM(p *parser) bool {
switch p.tok.DataAtom {
case a.Html:
return inBodyIM(p)
- case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta:
+ case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta:
p.addElement()
p.oe.pop()
p.acknowledgeSelfClosingTag()
return true
case a.Noscript:
- p.addElement()
if p.scripting {
- p.setOriginalIM()
- p.im = textIM
- } else {
- p.im = inHeadNoscriptIM
+ p.parseGenericRawTextElement()
+ return true
}
+ p.addElement()
+ p.im = inHeadNoscriptIM
+ // Don't let the tokenizer go into raw text mode when scripting is disabled.
+ p.tokenizer.NextIsNotRawText()
return true
- case a.Script, a.Title, a.Noframes, a.Style:
+ case a.Script, a.Title:
p.addElement()
p.setOriginalIM()
p.im = textIM
return true
+ case a.Noframes, a.Style:
+ p.parseGenericRawTextElement()
+ return true
case a.Head:
// Ignore the token.
return true
case a.Template:
+ // TODO: remove this divergence from the HTML5 spec.
+ //
+ // We don't handle all of the corner cases when mixing foreign
+ // content (i.e. <math> or <svg>) with <template>. Without this
+ // early return, we can get into an infinite loop, possibly because
+ // of the "TODO... further divergence" a little below.
+ //
+ // As a workaround, if we are mixing foreign content and templates,
+ // just ignore the rest of the HTML. Foreign content is rare and a
+ // relatively old HTML feature. Templates are also rare and a
+ // relatively new HTML feature. Their combination is very rare.
+ for _, e := range p.oe {
+ if e.Namespace != "" {
+ p.im = ignoreTheRemainingTokens
+ return true
+ }
+ }
+
p.addElement()
p.afe = append(p.afe, &scopeMarker)
p.framesetOK = false
@@ -668,7 +701,7 @@ func inHeadIM(p *parser) bool {
if !p.oe.contains(a.Template) {
return true
}
- // TODO: remove this divergence from the HTML5 spec.
+ // TODO: remove this further divergence from the HTML5 spec.
//
// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
p.generateImpliedEndTags()
@@ -713,7 +746,13 @@ func inHeadNoscriptIM(p *parser) bool {
return inBodyIM(p)
case a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Style:
return inHeadIM(p)
- case a.Head, a.Noscript:
+ case a.Head:
+ // Ignore the token.
+ return true
+ case a.Noscript:
+ // Don't let the tokenizer go into raw text mode even when a <noscript>
+ // tag is in "in head noscript" insertion mode.
+ p.tokenizer.NextIsNotRawText()
// Ignore the token.
return true
}
@@ -855,7 +894,7 @@ func inBodyIM(p *parser) bool {
return true
}
copyAttributes(p.oe[0], p.tok)
- case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
+ case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
return inHeadIM(p)
case a.Body:
if p.oe.contains(a.Template) {
@@ -881,7 +920,7 @@ func inBodyIM(p *parser) bool {
p.addElement()
p.im = inFramesetIM
return true
- case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
+ case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Main, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
p.popUntil(buttonScope, a.P)
p.addElement()
case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
@@ -1014,53 +1053,6 @@ func inBodyIM(p *parser) bool {
p.tok.DataAtom = a.Img
p.tok.Data = a.Img.String()
return false
- case a.Isindex:
- if p.form != nil {
- // Ignore the token.
- return true
- }
- action := ""
- prompt := "This is a searchable index. Enter search keywords: "
- attr := []Attribute{{Key: "name", Val: "isindex"}}
- for _, t := range p.tok.Attr {
- switch t.Key {
- case "action":
- action = t.Val
- case "name":
- // Ignore the attribute.
- case "prompt":
- prompt = t.Val
- default:
- attr = append(attr, t)
- }
- }
- p.acknowledgeSelfClosingTag()
- p.popUntil(buttonScope, a.P)
- p.parseImpliedToken(StartTagToken, a.Form, a.Form.String())
- if p.form == nil {
- // NOTE: The 'isindex' element has been removed,
- // and the 'template' element has not been designed to be
- // collaborative with the index element.
- //
- // Ignore the token.
- return true
- }
- if action != "" {
- p.form.Attr = []Attribute{{Key: "action", Val: action}}
- }
- p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
- p.parseImpliedToken(StartTagToken, a.Label, a.Label.String())
- p.addText(prompt)
- p.addChild(&Node{
- Type: ElementNode,
- DataAtom: a.Input,
- Data: a.Input.String(),
- Attr: attr,
- })
- p.oe.pop()
- p.parseImpliedToken(EndTagToken, a.Label, a.Label.String())
- p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
- p.parseImpliedToken(EndTagToken, a.Form, a.Form.String())
case a.Textarea:
p.addElement()
p.setOriginalIM()
@@ -1070,18 +1062,21 @@ func inBodyIM(p *parser) bool {
p.popUntil(buttonScope, a.P)
p.reconstructActiveFormattingElements()
p.framesetOK = false
- p.addElement()
- p.setOriginalIM()
- p.im = textIM
+ p.parseGenericRawTextElement()
case a.Iframe:
p.framesetOK = false
+ p.parseGenericRawTextElement()
+ case a.Noembed:
+ p.parseGenericRawTextElement()
+ case a.Noscript:
+ if p.scripting {
+ p.parseGenericRawTextElement()
+ return true
+ }
+ p.reconstructActiveFormattingElements()
p.addElement()
- p.setOriginalIM()
- p.im = textIM
- case a.Noembed, a.Noscript:
- p.addElement()
- p.setOriginalIM()
- p.im = textIM
+ // Don't let the tokenizer go into raw text mode when scripting is disabled.
+ p.tokenizer.NextIsNotRawText()
case a.Select:
p.reconstructActiveFormattingElements()
p.addElement()
@@ -1137,7 +1132,7 @@ func inBodyIM(p *parser) bool {
return false
}
return true
- case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
+ case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Main, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
p.popUntil(defaultScope, p.tok.DataAtom)
case a.Form:
if p.oe.contains(a.Template) {
@@ -1198,14 +1193,13 @@ func inBodyIM(p *parser) bool {
if len(p.templateStack) > 0 {
p.im = inTemplateIM
return false
- } else {
- for _, e := range p.oe {
- switch e.DataAtom {
- case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
- a.Thead, a.Tr, a.Body, a.Html:
- default:
- return true
- }
+ }
+ for _, e := range p.oe {
+ switch e.DataAtom {
+ case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
+ a.Thead, a.Tr, a.Body, a.Html:
+ default:
+ return true
}
}
}
@@ -1221,9 +1215,15 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
// Once the code successfully parses the comprehensive test suite, we should
// refactor this code to be more idiomatic.
- // Steps 1-4. The outer loop.
+ // Steps 1-2
+ if current := p.oe.top(); current.Data == tagName && p.afe.index(current) == -1 {
+ p.oe.pop()
+ return
+ }
+
+ // Steps 3-5. The outer loop.
for i := 0; i < 8; i++ {
- // Step 5. Find the formatting element.
+ // Step 6. Find the formatting element.
var formattingElement *Node
for j := len(p.afe) - 1; j >= 0; j-- {
if p.afe[j].Type == scopeMarkerNode {
@@ -1238,17 +1238,22 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
p.inBodyEndTagOther(tagAtom, tagName)
return
}
+
+ // Step 7. Ignore the tag if formatting element is not in the stack of open elements.
feIndex := p.oe.index(formattingElement)
if feIndex == -1 {
p.afe.remove(formattingElement)
return
}
+ // Step 8. Ignore the tag if formatting element is not in the scope.
if !p.elementInScope(defaultScope, tagAtom) {
// Ignore the tag.
return
}
- // Steps 9-10. Find the furthest block.
+ // Step 9. This step is omitted because it's just a parse error but no need to return.
+
+ // Steps 10-11. Find the furthest block.
var furthestBlock *Node
for _, e := range p.oe[feIndex:] {
if isSpecialElement(e) {
@@ -1265,47 +1270,65 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
return
}
- // Steps 11-12. Find the common ancestor and bookmark node.
+ // Steps 12-13. Find the common ancestor and bookmark node.
commonAncestor := p.oe[feIndex-1]
bookmark := p.afe.index(formattingElement)
- // Step 13. The inner loop. Find the lastNode to reparent.
+ // Step 14. The inner loop. Find the lastNode to reparent.
lastNode := furthestBlock
node := furthestBlock
x := p.oe.index(node)
- // Steps 13.1-13.2
- for j := 0; j < 3; j++ {
- // Step 13.3.
+ // Step 14.1.
+ j := 0
+ for {
+ // Step 14.2.
+ j++
+ // Step. 14.3.
x--
node = p.oe[x]
- // Step 13.4 - 13.5.
+ // Step 14.4. Go to the next step if node is formatting element.
+ if node == formattingElement {
+ break
+ }
+ // Step 14.5. Remove node from the list of active formatting elements if
+ // inner loop counter is greater than three and node is in the list of
+ // active formatting elements.
+ if ni := p.afe.index(node); j > 3 && ni > -1 {
+ p.afe.remove(node)
+ // If any element of the list of active formatting elements is removed,
+ // we need to take care whether bookmark should be decremented or not.
+ // This is because the value of bookmark may exceed the size of the
+ // list by removing elements from the list.
+ if ni <= bookmark {
+ bookmark--
+ }
+ continue
+ }
+ // Step 14.6. Continue the next inner loop if node is not in the list of
+ // active formatting elements.
if p.afe.index(node) == -1 {
p.oe.remove(node)
continue
}
- // Step 13.6.
- if node == formattingElement {
- break
- }
- // Step 13.7.
+ // Step 14.7.
clone := node.clone()
p.afe[p.afe.index(node)] = clone
p.oe[p.oe.index(node)] = clone
node = clone
- // Step 13.8.
+ // Step 14.8.
if lastNode == furthestBlock {
bookmark = p.afe.index(node) + 1
}
- // Step 13.9.
+ // Step 14.9.
if lastNode.Parent != nil {
lastNode.Parent.RemoveChild(lastNode)
}
node.AppendChild(lastNode)
- // Step 13.10.
+ // Step 14.10.
lastNode = node
}
- // Step 14. Reparent lastNode to the common ancestor,
+ // Step 15. Reparent lastNode to the common ancestor,
// or for misnested table nodes, to the foster parent.
if lastNode.Parent != nil {
lastNode.Parent.RemoveChild(lastNode)
@@ -1317,13 +1340,13 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
commonAncestor.AppendChild(lastNode)
}
- // Steps 15-17. Reparent nodes from the furthest block's children
+ // Steps 16-18. Reparent nodes from the furthest block's children
// to a clone of the formatting element.
clone := formattingElement.clone()
reparentChildren(clone, furthestBlock)
furthestBlock.AppendChild(clone)
- // Step 18. Fix up the list of active formatting elements.
+ // Step 19. Fix up the list of active formatting elements.
if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
// Move the bookmark with the rest of the list.
bookmark--
@@ -1331,7 +1354,7 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
p.afe.remove(formattingElement)
p.afe.insert(bookmark, clone)
- // Step 19. Fix up the stack of open elements.
+ // Step 20. Fix up the stack of open elements.
p.oe.remove(formattingElement)
p.oe.insert(p.oe.index(furthestBlock)+1, clone)
}
@@ -1502,14 +1525,13 @@ func inCaptionIM(p *parser) bool {
case StartTagToken:
switch p.tok.DataAtom {
case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
- if p.popUntil(tableScope, a.Caption) {
- p.clearActiveFormattingElements()
- p.im = inTableIM
- return false
- } else {
+ if !p.popUntil(tableScope, a.Caption) {
// Ignore the token.
return true
}
+ p.clearActiveFormattingElements()
+ p.im = inTableIM
+ return false
case a.Select:
p.reconstructActiveFormattingElements()
p.addElement()
@@ -1526,14 +1548,13 @@ func inCaptionIM(p *parser) bool {
}
return true
case a.Table:
- if p.popUntil(tableScope, a.Caption) {
- p.clearActiveFormattingElements()
- p.im = inTableIM
- return false
- } else {
+ if !p.popUntil(tableScope, a.Caption) {
// Ignore the token.
return true
}
+ p.clearActiveFormattingElements()
+ p.im = inTableIM
+ return false
case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
// Ignore the token.
return true
@@ -1777,12 +1798,11 @@ func inSelectIM(p *parser) bool {
}
p.addElement()
case a.Select:
- if p.popUntil(selectScope, a.Select) {
- p.resetInsertionMode()
- } else {
+ if !p.popUntil(selectScope, a.Select) {
// Ignore the token.
return true
}
+ p.resetInsertionMode()
case a.Input, a.Keygen, a.Textarea:
if p.elementInScope(selectScope, a.Select) {
p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
@@ -1794,6 +1814,13 @@ func inSelectIM(p *parser) bool {
return true
case a.Script, a.Template:
return inHeadIM(p)
+ case a.Iframe, a.Noembed, a.Noframes, a.Noscript, a.Plaintext, a.Style, a.Title, a.Xmp:
+ // Don't let the tokenizer go into raw text mode when there are raw tags
+ // to be ignored. These tags should be ignored from the tokenizer
+ // properly.
+ p.tokenizer.NextIsNotRawText()
+ // Ignore the token.
+ return true
}
case EndTagToken:
switch p.tok.DataAtom {
@@ -1810,12 +1837,11 @@ func inSelectIM(p *parser) bool {
p.oe = p.oe[:i]
}
case a.Select:
- if p.popUntil(selectScope, a.Select) {
- p.resetInsertionMode()
- } else {
+ if !p.popUntil(selectScope, a.Select) {
// Ignore the token.
return true
}
+ p.resetInsertionMode()
case a.Template:
return inHeadIM(p)
}
@@ -2119,6 +2145,10 @@ func afterAfterFramesetIM(p *parser) bool {
return true
}
+func ignoreTheRemainingTokens(p *parser) bool {
+ return true
+}
+
const whitespaceOrNUL = whitespace + "\x00"
// Section 12.2.6.5
@@ -2136,28 +2166,31 @@ func parseForeignContent(p *parser) bool {
Data: p.tok.Data,
})
case StartTagToken:
- b := breakout[p.tok.Data]
- if p.tok.DataAtom == a.Font {
- loop:
- for _, attr := range p.tok.Attr {
- switch attr.Key {
- case "color", "face", "size":
- b = true
- break loop
+ if !p.fragment {
+ b := breakout[p.tok.Data]
+ if p.tok.DataAtom == a.Font {
+ loop:
+ for _, attr := range p.tok.Attr {
+ switch attr.Key {
+ case "color", "face", "size":
+ b = true
+ break loop
+ }
}
}
- }
- if b {
- for i := len(p.oe) - 1; i >= 0; i-- {
- n := p.oe[i]
- if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
- p.oe = p.oe[:i+1]
- break
+ if b {
+ for i := len(p.oe) - 1; i >= 0; i-- {
+ n := p.oe[i]
+ if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
+ p.oe = p.oe[:i+1]
+ break
+ }
}
+ return false
}
- return false
}
- switch p.top().Namespace {
+ current := p.adjustedCurrentNode()
+ switch current.Namespace {
case "math":
adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
case "svg":
@@ -2172,7 +2205,7 @@ func parseForeignContent(p *parser) bool {
panic("html: bad parser state: unexpected namespace")
}
adjustForeignAttributes(p.tok.Attr)
- namespace := p.top().Namespace
+ namespace := current.Namespace
p.addElement()
p.top().Namespace = namespace
if namespace != "" {
@@ -2201,12 +2234,20 @@ func parseForeignContent(p *parser) bool {
return true
}
+// Section 12.2.4.2.
+func (p *parser) adjustedCurrentNode() *Node {
+ if len(p.oe) == 1 && p.fragment && p.context != nil {
+ return p.context
+ }
+ return p.oe.top()
+}
+
// Section 12.2.6.
func (p *parser) inForeignContent() bool {
if len(p.oe) == 0 {
return false
}
- n := p.oe[len(p.oe)-1]
+ n := p.adjustedCurrentNode()
if n.Namespace == "" {
return false
}
@@ -2341,8 +2382,7 @@ func ParseWithOptions(r io.Reader, opts ...ParseOption) (*Node, error) {
f(p)
}
- err := p.parse()
- if err != nil {
+ if err := p.parse(); err != nil {
return nil, err
}
return p.doc, nil
@@ -2364,7 +2404,6 @@ func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) (
contextTag = context.DataAtom.String()
}
p := &parser{
- tokenizer: NewTokenizerFragment(r, contextTag),
doc: &Node{
Type: DocumentNode,
},
@@ -2372,6 +2411,11 @@ func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) (
fragment: true,
context: context,
}
+ if context != nil && context.Namespace != "" {
+ p.tokenizer = NewTokenizer(r)
+ } else {
+ p.tokenizer = NewTokenizerFragment(r, contextTag)
+ }
for _, f := range opts {
f(p)
@@ -2396,8 +2440,7 @@ func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) (
}
}
- err := p.parse()
- if err != nil {
+ if err := p.parse(); err != nil {
return nil, err
}
diff --git a/vendor/golang.org/x/net/html/render.go b/vendor/golang.org/x/net/html/render.go
index d34564f..b46d81c 100644
--- a/vendor/golang.org/x/net/html/render.go
+++ b/vendor/golang.org/x/net/html/render.go
@@ -134,6 +134,9 @@ func render1(w writer, n *Node) error {
}
}
return w.WriteByte('>')
+ case RawNode:
+ _, err := w.WriteString(n.Data)
+ return err
default:
return errors.New("html: unknown node type")
}
@@ -252,20 +255,19 @@ func writeQuoted(w writer, s string) error {
// Section 12.1.2, "Elements", gives this list of void elements. Void elements
// are those that can't have any contents.
var voidElements = map[string]bool{
- "area": true,
- "base": true,
- "br": true,
- "col": true,
- "command": true,
- "embed": true,
- "hr": true,
- "img": true,
- "input": true,
- "keygen": true,
- "link": true,
- "meta": true,
- "param": true,
- "source": true,
- "track": true,
- "wbr": true,
+ "area": true,
+ "base": true,
+ "br": true,
+ "col": true,
+ "embed": true,
+ "hr": true,
+ "img": true,
+ "input": true,
+ "keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility.
+ "link": true,
+ "meta": true,
+ "param": true,
+ "source": true,
+ "track": true,
+ "wbr": true,
}
diff --git a/vendor/golang.org/x/net/html/token.go b/vendor/golang.org/x/net/html/token.go
index e3c01d7..877709f 100644
--- a/vendor/golang.org/x/net/html/token.go
+++ b/vendor/golang.org/x/net/html/token.go
@@ -296,8 +296,7 @@ func (z *Tokenizer) Buffered() []byte {
// too many times in succession.
func readAtLeastOneByte(r io.Reader, b []byte) (int, error) {
for i := 0; i < 100; i++ {
- n, err := r.Read(b)
- if n != 0 || err != nil {
+ if n, err := r.Read(b); n != 0 || err != nil {
return n, err
}
}
@@ -347,6 +346,7 @@ loop:
break loop
}
if c != '/' {
+ z.raw.end--
continue loop
}
if z.readRawEndTag() || z.err != nil {
@@ -1067,6 +1067,11 @@ loop:
// Raw returns the unmodified text of the current token. Calling Next, Token,
// Text, TagName or TagAttr may change the contents of the returned slice.
+//
+// The token stream's raw bytes partition the byte stream (up until an
+// ErrorToken). There are no overlaps or gaps between two consecutive token's
+// raw bytes. One implication is that the byte offset of the current token is
+// the sum of the lengths of all previous tokens' raw bytes.
func (z *Tokenizer) Raw() []byte {
return z.buf[z.raw.start:z.raw.end]
}
diff --git a/vendor/modules.txt b/vendor/modules.txt
index 244a93f..1d2f65d 100644
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@@ -1,4 +1,4 @@
-# github.com/PuerkitoBio/goquery v1.5.0
+# github.com/PuerkitoBio/goquery v1.7.1
## explicit
github.com/PuerkitoBio/goquery
# github.com/PuerkitoBio/purell v0.1.0
@@ -6,7 +6,7 @@ github.com/PuerkitoBio/goquery
github.com/PuerkitoBio/purell
# github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578
## explicit
-# github.com/andybalholm/cascadia v1.0.0
+# github.com/andybalholm/cascadia v1.2.0
github.com/andybalholm/cascadia
# github.com/golang/snappy v0.0.1
github.com/golang/snappy
@@ -37,7 +37,8 @@ github.com/syndtr/goleveldb/leveldb/opt
github.com/syndtr/goleveldb/leveldb/storage
github.com/syndtr/goleveldb/leveldb/table
github.com/syndtr/goleveldb/leveldb/util
-# golang.org/x/net v0.0.0-20190926025831-c00fd9afed17
+# golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2
## explicit
+# golang.org/x/net v0.0.0-20210614182718-04defd469f4e
golang.org/x/net/html
golang.org/x/net/html/atom