From 833da3f33a5c727a8d7ccd034c43497c66793026 Mon Sep 17 00:00:00 2001 From: ale Date: Sat, 19 Jun 2021 16:43:43 +0100 Subject: go mod vendor --- vendor/github.com/andybalholm/cascadia/parser.go | 319 +++++++++++------------ 1 file changed, 158 insertions(+), 161 deletions(-) (limited to 'vendor/github.com/andybalholm/cascadia/parser.go') diff --git a/vendor/github.com/andybalholm/cascadia/parser.go b/vendor/github.com/andybalholm/cascadia/parser.go index bebf0af..495db9c 100644 --- a/vendor/github.com/andybalholm/cascadia/parser.go +++ b/vendor/github.com/andybalholm/cascadia/parser.go @@ -7,16 +7,14 @@ import ( "regexp" "strconv" "strings" + + "golang.org/x/net/html" ) // a parser for CSS selectors type parser struct { s string // the source text i int // the current position - - // if `false`, parsing a pseudo-element - // returns an error. - acceptPseudoElements bool } // parseEscape parses a backslash escape. @@ -58,26 +56,6 @@ func (p *parser) parseEscape() (result string, err error) { return result, nil } -// toLowerASCII returns s with all ASCII capital letters lowercased. -func toLowerASCII(s string) string { - var b []byte - for i := 0; i < len(s); i++ { - if c := s[i]; 'A' <= c && c <= 'Z' { - if b == nil { - b = make([]byte, len(s)) - copy(b, s) - } - b[i] = s[i] + ('a' - 'A') - } - } - - if b == nil { - return s - } - - return string(b) -} - func hexDigit(c byte) bool { return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' } @@ -302,92 +280,92 @@ func (p *parser) consumeClosingParenthesis() bool { } // parseTypeSelector parses a type selector (one that matches by tag name). -func (p *parser) parseTypeSelector() (result tagSelector, err error) { +func (p *parser) parseTypeSelector() (result Selector, err error) { tag, err := p.parseIdentifier() if err != nil { - return + return nil, err } - return tagSelector{tag: toLowerASCII(tag)}, nil + + return typeSelector(tag), nil } // parseIDSelector parses a selector that matches by id attribute. -func (p *parser) parseIDSelector() (idSelector, error) { +func (p *parser) parseIDSelector() (Selector, error) { if p.i >= len(p.s) { - return idSelector{}, fmt.Errorf("expected id selector (#id), found EOF instead") + return nil, fmt.Errorf("expected id selector (#id), found EOF instead") } if p.s[p.i] != '#' { - return idSelector{}, fmt.Errorf("expected id selector (#id), found '%c' instead", p.s[p.i]) + return nil, fmt.Errorf("expected id selector (#id), found '%c' instead", p.s[p.i]) } p.i++ id, err := p.parseName() if err != nil { - return idSelector{}, err + return nil, err } - return idSelector{id: id}, nil + return attributeEqualsSelector("id", id), nil } // parseClassSelector parses a selector that matches by class attribute. -func (p *parser) parseClassSelector() (classSelector, error) { +func (p *parser) parseClassSelector() (Selector, error) { if p.i >= len(p.s) { - return classSelector{}, fmt.Errorf("expected class selector (.class), found EOF instead") + return nil, fmt.Errorf("expected class selector (.class), found EOF instead") } if p.s[p.i] != '.' { - return classSelector{}, fmt.Errorf("expected class selector (.class), found '%c' instead", p.s[p.i]) + return nil, fmt.Errorf("expected class selector (.class), found '%c' instead", p.s[p.i]) } p.i++ class, err := p.parseIdentifier() if err != nil { - return classSelector{}, err + return nil, err } - return classSelector{class: class}, nil + return attributeIncludesSelector("class", class), nil } // parseAttributeSelector parses a selector that matches by attribute value. -func (p *parser) parseAttributeSelector() (attrSelector, error) { +func (p *parser) parseAttributeSelector() (Selector, error) { if p.i >= len(p.s) { - return attrSelector{}, fmt.Errorf("expected attribute selector ([attribute]), found EOF instead") + return nil, fmt.Errorf("expected attribute selector ([attribute]), found EOF instead") } if p.s[p.i] != '[' { - return attrSelector{}, fmt.Errorf("expected attribute selector ([attribute]), found '%c' instead", p.s[p.i]) + return nil, fmt.Errorf("expected attribute selector ([attribute]), found '%c' instead", p.s[p.i]) } p.i++ p.skipWhitespace() key, err := p.parseIdentifier() if err != nil { - return attrSelector{}, err + return nil, err } - key = toLowerASCII(key) p.skipWhitespace() if p.i >= len(p.s) { - return attrSelector{}, errors.New("unexpected EOF in attribute selector") + return nil, errors.New("unexpected EOF in attribute selector") } if p.s[p.i] == ']' { p.i++ - return attrSelector{key: key, operation: ""}, nil + return attributeExistsSelector(key), nil } if p.i+2 >= len(p.s) { - return attrSelector{}, errors.New("unexpected EOF in attribute selector") + return nil, errors.New("unexpected EOF in attribute selector") } op := p.s[p.i : p.i+2] if op[0] == '=' { op = "=" } else if op[1] != '=' { - return attrSelector{}, fmt.Errorf(`expected equality operator, found "%s" instead`, op) + return nil, fmt.Errorf(`expected equality operator, found "%s" instead`, op) } p.i += len(op) p.skipWhitespace() if p.i >= len(p.s) { - return attrSelector{}, errors.New("unexpected EOF in attribute selector") + return nil, errors.New("unexpected EOF in attribute selector") } var val string var rx *regexp.Regexp @@ -402,84 +380,88 @@ func (p *parser) parseAttributeSelector() (attrSelector, error) { } } if err != nil { - return attrSelector{}, err + return nil, err } p.skipWhitespace() if p.i >= len(p.s) { - return attrSelector{}, errors.New("unexpected EOF in attribute selector") + return nil, errors.New("unexpected EOF in attribute selector") } if p.s[p.i] != ']' { - return attrSelector{}, fmt.Errorf("expected ']', found '%c' instead", p.s[p.i]) + return nil, fmt.Errorf("expected ']', found '%c' instead", p.s[p.i]) } p.i++ switch op { - case "=", "!=", "~=", "|=", "^=", "$=", "*=", "#=": - return attrSelector{key: key, val: val, operation: op, regexp: rx}, nil - default: - return attrSelector{}, fmt.Errorf("attribute operator %q is not supported", op) - } + case "=": + return attributeEqualsSelector(key, val), nil + case "!=": + return attributeNotEqualSelector(key, val), nil + case "~=": + return attributeIncludesSelector(key, val), nil + case "|=": + return attributeDashmatchSelector(key, val), nil + case "^=": + return attributePrefixSelector(key, val), nil + case "$=": + return attributeSuffixSelector(key, val), nil + case "*=": + return attributeSubstringSelector(key, val), nil + case "#=": + return attributeRegexSelector(key, rx), nil + } + + return nil, fmt.Errorf("attribute operator %q is not supported", op) } var errExpectedParenthesis = errors.New("expected '(' but didn't find it") var errExpectedClosingParenthesis = errors.New("expected ')' but didn't find it") var errUnmatchedParenthesis = errors.New("unmatched '('") -// parsePseudoclassSelector parses a pseudoclass selector like :not(p) or a pseudo-element -// For backwards compatibility, both ':' and '::' prefix are allowed for pseudo-elements. -// https://drafts.csswg.org/selectors-3/#pseudo-elements -// Returning a nil `Sel` (and a nil `error`) means we found a pseudo-element. -func (p *parser) parsePseudoclassSelector() (out Sel, pseudoElement string, err error) { +// parsePseudoclassSelector parses a pseudoclass selector like :not(p). +func (p *parser) parsePseudoclassSelector() (Selector, error) { if p.i >= len(p.s) { - return nil, "", fmt.Errorf("expected pseudoclass selector (:pseudoclass), found EOF instead") + return nil, fmt.Errorf("expected pseudoclass selector (:pseudoclass), found EOF instead") } if p.s[p.i] != ':' { - return nil, "", fmt.Errorf("expected attribute selector (:pseudoclass), found '%c' instead", p.s[p.i]) + return nil, fmt.Errorf("expected attribute selector (:pseudoclass), found '%c' instead", p.s[p.i]) } p.i++ - var mustBePseudoElement bool - if p.i >= len(p.s) { - return nil, "", fmt.Errorf("got empty pseudoclass (or pseudoelement)") - } - if p.s[p.i] == ':' { // we found a pseudo-element - mustBePseudoElement = true - p.i++ - } - name, err := p.parseIdentifier() if err != nil { - return + return nil, err } name = toLowerASCII(name) - if mustBePseudoElement && (name != "after" && name != "backdrop" && name != "before" && - name != "cue" && name != "first-letter" && name != "first-line" && name != "grammar-error" && - name != "marker" && name != "placeholder" && name != "selection" && name != "spelling-error") { - return out, "", fmt.Errorf("unknown pseudoelement :%s", name) - } switch name { case "not", "has", "haschild": if !p.consumeParenthesis() { - return out, "", errExpectedParenthesis + return nil, errExpectedParenthesis } sel, parseErr := p.parseSelectorGroup() if parseErr != nil { - return out, "", parseErr + return nil, parseErr } if !p.consumeClosingParenthesis() { - return out, "", errExpectedClosingParenthesis + return nil, errExpectedClosingParenthesis } - out = relativePseudoClassSelector{name: name, match: sel} + switch name { + case "not": + return negatedSelector(sel), nil + case "has": + return hasDescendantSelector(sel), nil + case "haschild": + return hasChildSelector(sel), nil + } case "contains", "containsown": if !p.consumeParenthesis() { - return out, "", errExpectedParenthesis + return nil, errExpectedParenthesis } if p.i == len(p.s) { - return out, "", errUnmatchedParenthesis + return nil, errUnmatchedParenthesis } var val string switch p.s[p.i] { @@ -489,75 +471,95 @@ func (p *parser) parsePseudoclassSelector() (out Sel, pseudoElement string, err val, err = p.parseIdentifier() } if err != nil { - return out, "", err + return nil, err } val = strings.ToLower(val) p.skipWhitespace() if p.i >= len(p.s) { - return out, "", errors.New("unexpected EOF in pseudo selector") + return nil, errors.New("unexpected EOF in pseudo selector") } if !p.consumeClosingParenthesis() { - return out, "", errExpectedClosingParenthesis + return nil, errExpectedClosingParenthesis } - out = containsPseudoClassSelector{own: name == "containsown", value: val} + switch name { + case "contains": + return textSubstrSelector(val), nil + case "containsown": + return ownTextSubstrSelector(val), nil + } case "matches", "matchesown": if !p.consumeParenthesis() { - return out, "", errExpectedParenthesis + return nil, errExpectedParenthesis } rx, err := p.parseRegex() if err != nil { - return out, "", err + return nil, err } if p.i >= len(p.s) { - return out, "", errors.New("unexpected EOF in pseudo selector") + return nil, errors.New("unexpected EOF in pseudo selector") } if !p.consumeClosingParenthesis() { - return out, "", errExpectedClosingParenthesis + return nil, errExpectedClosingParenthesis } - out = regexpPseudoClassSelector{own: name == "matchesown", regexp: rx} + switch name { + case "matches": + return textRegexSelector(rx), nil + case "matchesown": + return ownTextRegexSelector(rx), nil + } case "nth-child", "nth-last-child", "nth-of-type", "nth-last-of-type": if !p.consumeParenthesis() { - return out, "", errExpectedParenthesis + return nil, errExpectedParenthesis } a, b, err := p.parseNth() if err != nil { - return out, "", err + return nil, err } if !p.consumeClosingParenthesis() { - return out, "", errExpectedClosingParenthesis + return nil, errExpectedClosingParenthesis + } + if a == 0 { + switch name { + case "nth-child": + return simpleNthChildSelector(b, false), nil + case "nth-of-type": + return simpleNthChildSelector(b, true), nil + case "nth-last-child": + return simpleNthLastChildSelector(b, false), nil + case "nth-last-of-type": + return simpleNthLastChildSelector(b, true), nil + } } - last := name == "nth-last-child" || name == "nth-last-of-type" - ofType := name == "nth-of-type" || name == "nth-last-of-type" - out = nthPseudoClassSelector{a: a, b: b, last: last, ofType: ofType} + return nthChildSelector(a, b, + name == "nth-last-child" || name == "nth-last-of-type", + name == "nth-of-type" || name == "nth-last-of-type"), + nil case "first-child": - out = nthPseudoClassSelector{a: 0, b: 1, ofType: false, last: false} + return simpleNthChildSelector(1, false), nil case "last-child": - out = nthPseudoClassSelector{a: 0, b: 1, ofType: false, last: true} + return simpleNthLastChildSelector(1, false), nil case "first-of-type": - out = nthPseudoClassSelector{a: 0, b: 1, ofType: true, last: false} + return simpleNthChildSelector(1, true), nil case "last-of-type": - out = nthPseudoClassSelector{a: 0, b: 1, ofType: true, last: true} + return simpleNthLastChildSelector(1, true), nil case "only-child": - out = onlyChildPseudoClassSelector{ofType: false} + return onlyChildSelector(false), nil case "only-of-type": - out = onlyChildPseudoClassSelector{ofType: true} + return onlyChildSelector(true), nil case "input": - out = inputPseudoClassSelector{} + return inputSelector, nil case "empty": - out = emptyElementPseudoClassSelector{} + return emptyElementSelector, nil case "root": - out = rootPseudoClassSelector{} - case "after", "backdrop", "before", "cue", "first-letter", "first-line", "grammar-error", "marker", "placeholder", "selection", "spelling-error": - return nil, name, nil - default: - return out, "", fmt.Errorf("unknown pseudoclass or pseudoelement :%s", name) + return rootSelector, nil } - return + + return nil, fmt.Errorf("unknown pseudoclass :%s", name) } // parseInteger parses a decimal integer. @@ -703,8 +705,8 @@ invalid: // parseSimpleSelectorSequence parses a selector sequence that applies to // a single element. -func (p *parser) parseSimpleSelectorSequence() (Sel, error) { - var selectors []Sel +func (p *parser) parseSimpleSelectorSequence() (Selector, error) { + var result Selector if p.i >= len(p.s) { return nil, errors.New("expected selector, found EOF instead") @@ -721,17 +723,13 @@ func (p *parser) parseSimpleSelectorSequence() (Sel, error) { if err != nil { return nil, err } - selectors = append(selectors, r) + result = r } - var pseudoElement string loop: for p.i < len(p.s) { - var ( - ns Sel - newPseudoElement string - err error - ) + var ns Selector + var err error switch p.s[p.i] { case '#': ns, err = p.parseIDSelector() @@ -740,57 +738,44 @@ loop: case '[': ns, err = p.parseAttributeSelector() case ':': - ns, newPseudoElement, err = p.parsePseudoclassSelector() + ns, err = p.parsePseudoclassSelector() default: break loop } if err != nil { return nil, err } - // From https://drafts.csswg.org/selectors-3/#pseudo-elements : - // "Only one pseudo-element may appear per selector, and if present - // it must appear after the sequence of simple selectors that - // represents the subjects of the selector."" - if ns == nil { // we found a pseudo-element - if pseudoElement != "" { - return nil, fmt.Errorf("only one pseudo-element is accepted per selector, got %s and %s", pseudoElement, newPseudoElement) - } - if !p.acceptPseudoElements { - return nil, fmt.Errorf("pseudo-element %s found, but pseudo-elements support is disabled", newPseudoElement) - } - pseudoElement = newPseudoElement + if result == nil { + result = ns } else { - if pseudoElement != "" { - return nil, fmt.Errorf("pseudo-element %s must be at the end of selector", pseudoElement) - } - selectors = append(selectors, ns) + result = intersectionSelector(result, ns) } - } - if len(selectors) == 1 && pseudoElement == "" { // no need wrap the selectors in compoundSelector - return selectors[0], nil + + if result == nil { + result = func(n *html.Node) bool { + return n.Type == html.ElementNode + } } - return compoundSelector{selectors: selectors, pseudoElement: pseudoElement}, nil + + return result, nil } // parseSelector parses a selector that may include combinators. -func (p *parser) parseSelector() (Sel, error) { +func (p *parser) parseSelector() (result Selector, err error) { p.skipWhitespace() - result, err := p.parseSimpleSelectorSequence() + result, err = p.parseSimpleSelectorSequence() if err != nil { - return nil, err + return } for { - var ( - combinator byte - c Sel - ) + var combinator byte if p.skipWhitespace() { combinator = ' ' } if p.i >= len(p.s) { - return result, nil + return } switch p.s[p.i] { @@ -800,39 +785,51 @@ func (p *parser) parseSelector() (Sel, error) { p.skipWhitespace() case ',', ')': // These characters can't begin a selector, but they can legally occur after one. - return result, nil + return } if combinator == 0 { - return result, nil + return } - c, err = p.parseSimpleSelectorSequence() + c, err := p.parseSimpleSelectorSequence() if err != nil { return nil, err } - result = combinedSelector{first: result, combinator: combinator, second: c} + + switch combinator { + case ' ': + result = descendantSelector(result, c) + case '>': + result = childSelector(result, c) + case '+': + result = siblingSelector(result, c, true) + case '~': + result = siblingSelector(result, c, false) + } } + + panic("unreachable") } // parseSelectorGroup parses a group of selectors, separated by commas. -func (p *parser) parseSelectorGroup() (SelectorGroup, error) { - current, err := p.parseSelector() +func (p *parser) parseSelectorGroup() (result Selector, err error) { + result, err = p.parseSelector() if err != nil { - return nil, err + return } - result := SelectorGroup{current} for p.i < len(p.s) { if p.s[p.i] != ',' { - break + return result, nil } p.i++ c, err := p.parseSelector() if err != nil { return nil, err } - result = append(result, c) + result = unionSelector(result, c) } - return result, nil + + return } -- cgit v1.2.3-54-g00ecf