aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/vendor/golang.org/x/tools/internal/lsp/fuzzy
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmd/vendor/golang.org/x/tools/internal/lsp/fuzzy')
-rw-r--r--src/cmd/vendor/golang.org/x/tools/internal/lsp/fuzzy/input.go168
-rw-r--r--src/cmd/vendor/golang.org/x/tools/internal/lsp/fuzzy/matcher.go398
2 files changed, 566 insertions, 0 deletions
diff --git a/src/cmd/vendor/golang.org/x/tools/internal/lsp/fuzzy/input.go b/src/cmd/vendor/golang.org/x/tools/internal/lsp/fuzzy/input.go
new file mode 100644
index 0000000000..ac377035ec
--- /dev/null
+++ b/src/cmd/vendor/golang.org/x/tools/internal/lsp/fuzzy/input.go
@@ -0,0 +1,168 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package fuzzy
+
+import (
+ "unicode"
+)
+
+// RuneRole specifies the role of a rune in the context of an input.
+type RuneRole byte
+
+const (
+ // RNone specifies a rune without any role in the input (i.e., whitespace/non-ASCII).
+ RNone RuneRole = iota
+ // RSep specifies a rune with the role of segment separator.
+ RSep
+ // RTail specifies a rune which is a lower-case tail in a word in the input.
+ RTail
+ // RUCTail specifies a rune which is an upper-case tail in a word in the input.
+ RUCTail
+ // RHead specifies a rune which is the first character in a word in the input.
+ RHead
+)
+
+// RuneRoles detects the roles of each byte rune in an input string and stores it in the output
+// slice. The rune role depends on the input type. Stops when it parsed all the runes in the string
+// or when it filled the output. If output is nil, then it gets created.
+func RuneRoles(str string, reuse []RuneRole) []RuneRole {
+ var output []RuneRole
+ if cap(reuse) < len(str) {
+ output = make([]RuneRole, 0, len(str))
+ } else {
+ output = reuse[:0]
+ }
+
+ prev, prev2 := rtNone, rtNone
+ for i := 0; i < len(str); i++ {
+ r := rune(str[i])
+
+ role := RNone
+
+ curr := rtLower
+ if str[i] <= unicode.MaxASCII {
+ curr = runeType(rt[str[i]] - '0')
+ }
+
+ if curr == rtLower {
+ if prev == rtNone || prev == rtPunct {
+ role = RHead
+ } else {
+ role = RTail
+ }
+ } else if curr == rtUpper {
+ role = RHead
+
+ if prev == rtUpper {
+ // This and previous characters are both upper case.
+
+ if i+1 == len(str) {
+ // This is last character, previous was also uppercase -> this is UCTail
+ // i.e., (current char is C): aBC / BC / ABC
+ role = RUCTail
+ }
+ }
+ } else if curr == rtPunct {
+ switch r {
+ case '.', ':':
+ role = RSep
+ }
+ }
+ if curr != rtLower {
+ if i > 1 && output[i-1] == RHead && prev2 == rtUpper && (output[i-2] == RHead || output[i-2] == RUCTail) {
+ // The previous two characters were uppercase. The current one is not a lower case, so the
+ // previous one can't be a HEAD. Make it a UCTail.
+ // i.e., (last char is current char - B must be a UCTail): ABC / ZABC / AB.
+ output[i-1] = RUCTail
+ }
+ }
+
+ output = append(output, role)
+ prev2 = prev
+ prev = curr
+ }
+ return output
+}
+
+type runeType byte
+
+const (
+ rtNone runeType = iota
+ rtPunct
+ rtLower
+ rtUpper
+)
+
+const rt = "00000000000000000000000000000000000000000000001122222222221000000333333333333333333333333330000002222222222222222222222222200000"
+
+// LastSegment returns the substring representing the last segment from the input, where each
+// byte has an associated RuneRole in the roles slice. This makes sense only for inputs of Symbol
+// or Filename type.
+func LastSegment(input string, roles []RuneRole) string {
+ // Exclude ending separators.
+ end := len(input) - 1
+ for end >= 0 && roles[end] == RSep {
+ end--
+ }
+ if end < 0 {
+ return ""
+ }
+
+ start := end - 1
+ for start >= 0 && roles[start] != RSep {
+ start--
+ }
+
+ return input[start+1 : end+1]
+}
+
+// ToLower transforms the input string to lower case, which is stored in the output byte slice.
+// The lower casing considers only ASCII values - non ASCII values are left unmodified.
+// Stops when parsed all input or when it filled the output slice. If output is nil, then it gets
+// created.
+func ToLower(input string, reuse []byte) []byte {
+ output := reuse
+ if cap(reuse) < len(input) {
+ output = make([]byte, len(input))
+ }
+
+ for i := 0; i < len(input); i++ {
+ r := rune(input[i])
+ if r <= unicode.MaxASCII {
+ if 'A' <= r && r <= 'Z' {
+ r += 'a' - 'A'
+ }
+ }
+ output[i] = byte(r)
+ }
+ return output[:len(input)]
+}
+
+// WordConsumer defines a consumer for a word delimited by the [start,end) byte offsets in an input
+// (start is inclusive, end is exclusive).
+type WordConsumer func(start, end int)
+
+// Words find word delimiters in an input based on its bytes' mappings to rune roles. The offset
+// delimiters for each word are fed to the provided consumer function.
+func Words(roles []RuneRole, consume WordConsumer) {
+ var wordStart int
+ for i, r := range roles {
+ switch r {
+ case RUCTail, RTail:
+ case RHead, RNone, RSep:
+ if i != wordStart {
+ consume(wordStart, i)
+ }
+ wordStart = i
+ if r != RHead {
+ // Skip this character.
+ wordStart = i + 1
+ }
+ }
+ }
+ if wordStart != len(roles) {
+ consume(wordStart, len(roles))
+ }
+}
diff --git a/src/cmd/vendor/golang.org/x/tools/internal/lsp/fuzzy/matcher.go b/src/cmd/vendor/golang.org/x/tools/internal/lsp/fuzzy/matcher.go
new file mode 100644
index 0000000000..16a643097d
--- /dev/null
+++ b/src/cmd/vendor/golang.org/x/tools/internal/lsp/fuzzy/matcher.go
@@ -0,0 +1,398 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package fuzzy implements a fuzzy matching algorithm.
+package fuzzy
+
+import (
+ "bytes"
+ "fmt"
+)
+
+const (
+ // MaxInputSize is the maximum size of the input scored against the fuzzy matcher. Longer inputs
+ // will be truncated to this size.
+ MaxInputSize = 127
+ // MaxPatternSize is the maximum size of the pattern used to construct the fuzzy matcher. Longer
+ // inputs are truncated to this size.
+ MaxPatternSize = 63
+)
+
+type scoreVal int
+
+func (s scoreVal) val() int {
+ return int(s) >> 1
+}
+
+func (s scoreVal) prevK() int {
+ return int(s) & 1
+}
+
+func score(val int, prevK int /*0 or 1*/) scoreVal {
+ return scoreVal(val<<1 + prevK)
+}
+
+// Matcher implements a fuzzy matching algorithm for scoring candidates against a pattern.
+// The matcher does not support parallel usage.
+type Matcher struct {
+ pattern string
+ patternLower []byte // lower-case version of the pattern
+ patternShort []byte // first characters of the pattern
+ caseSensitive bool // set if the pattern is mix-cased
+
+ patternRoles []RuneRole // the role of each character in the pattern
+ roles []RuneRole // the role of each character in the tested string
+
+ scores [MaxInputSize + 1][MaxPatternSize + 1][2]scoreVal
+
+ scoreScale float32
+
+ lastCandidateLen int // in bytes
+ lastCandidateMatched bool
+
+ // Here we save the last candidate in lower-case. This is basically a byte slice we reuse for
+ // performance reasons, so the slice is not reallocated for every candidate.
+ lowerBuf [MaxInputSize]byte
+ rolesBuf [MaxInputSize]RuneRole
+}
+
+func (m *Matcher) bestK(i, j int) int {
+ if m.scores[i][j][0].val() < m.scores[i][j][1].val() {
+ return 1
+ }
+ return 0
+}
+
+// NewMatcher returns a new fuzzy matcher for scoring candidates against the provided pattern.
+func NewMatcher(pattern string) *Matcher {
+ if len(pattern) > MaxPatternSize {
+ pattern = pattern[:MaxPatternSize]
+ }
+
+ m := &Matcher{
+ pattern: pattern,
+ patternLower: ToLower(pattern, nil),
+ }
+
+ for i, c := range m.patternLower {
+ if pattern[i] != c {
+ m.caseSensitive = true
+ break
+ }
+ }
+
+ if len(pattern) > 3 {
+ m.patternShort = m.patternLower[:3]
+ } else {
+ m.patternShort = m.patternLower
+ }
+
+ m.patternRoles = RuneRoles(pattern, nil)
+
+ if len(pattern) > 0 {
+ maxCharScore := 4
+ m.scoreScale = 1 / float32(maxCharScore*len(pattern))
+ }
+
+ return m
+}
+
+// Score returns the score returned by matching the candidate to the pattern.
+// This is not designed for parallel use. Multiple candidates must be scored sequentially.
+// Returns a score between 0 and 1 (0 - no match, 1 - perfect match).
+func (m *Matcher) Score(candidate string) float32 {
+ if len(candidate) > MaxInputSize {
+ candidate = candidate[:MaxInputSize]
+ }
+ lower := ToLower(candidate, m.lowerBuf[:])
+ m.lastCandidateLen = len(candidate)
+
+ if len(m.pattern) == 0 {
+ // Empty patterns perfectly match candidates.
+ return 1
+ }
+
+ if m.match(candidate, lower) {
+ sc := m.computeScore(candidate, lower)
+ if sc > minScore/2 && !m.poorMatch() {
+ m.lastCandidateMatched = true
+ if len(m.pattern) == len(candidate) {
+ // Perfect match.
+ return 1
+ }
+
+ if sc < 0 {
+ sc = 0
+ }
+ normalizedScore := float32(sc) * m.scoreScale
+ if normalizedScore > 1 {
+ normalizedScore = 1
+ }
+
+ return normalizedScore
+ }
+ }
+
+ m.lastCandidateMatched = false
+ return 0
+}
+
+const minScore = -10000
+
+// MatchedRanges returns matches ranges for the last scored string as a flattened array of
+// [begin, end) byte offset pairs.
+func (m *Matcher) MatchedRanges() []int {
+ if len(m.pattern) == 0 || !m.lastCandidateMatched {
+ return nil
+ }
+ i, j := m.lastCandidateLen, len(m.pattern)
+ if m.scores[i][j][0].val() < minScore/2 && m.scores[i][j][1].val() < minScore/2 {
+ return nil
+ }
+
+ var ret []int
+ k := m.bestK(i, j)
+ for i > 0 {
+ take := (k == 1)
+ k = m.scores[i][j][k].prevK()
+ if take {
+ if len(ret) == 0 || ret[len(ret)-1] != i {
+ ret = append(ret, i)
+ ret = append(ret, i-1)
+ } else {
+ ret[len(ret)-1] = i - 1
+ }
+ j--
+ }
+ i--
+ }
+ // Reverse slice.
+ for i := 0; i < len(ret)/2; i++ {
+ ret[i], ret[len(ret)-1-i] = ret[len(ret)-1-i], ret[i]
+ }
+ return ret
+}
+
+func (m *Matcher) match(candidate string, candidateLower []byte) bool {
+ i, j := 0, 0
+ for ; i < len(candidateLower) && j < len(m.patternLower); i++ {
+ if candidateLower[i] == m.patternLower[j] {
+ j++
+ }
+ }
+ if j != len(m.patternLower) {
+ return false
+ }
+
+ // The input passes the simple test against pattern, so it is time to classify its characters.
+ // Character roles are used below to find the last segment.
+ m.roles = RuneRoles(candidate, m.rolesBuf[:])
+
+ return true
+}
+
+func (m *Matcher) computeScore(candidate string, candidateLower []byte) int {
+ pattLen, candLen := len(m.pattern), len(candidate)
+
+ for j := 0; j <= len(m.pattern); j++ {
+ m.scores[0][j][0] = minScore << 1
+ m.scores[0][j][1] = minScore << 1
+ }
+ m.scores[0][0][0] = score(0, 0) // Start with 0.
+
+ segmentsLeft, lastSegStart := 1, 0
+ for i := 0; i < candLen; i++ {
+ if m.roles[i] == RSep {
+ segmentsLeft++
+ lastSegStart = i + 1
+ }
+ }
+
+ // A per-character bonus for a consecutive match.
+ consecutiveBonus := 2
+ wordIdx := 0 // Word count within segment.
+ for i := 1; i <= candLen; i++ {
+
+ role := m.roles[i-1]
+ isHead := role == RHead
+
+ if isHead {
+ wordIdx++
+ } else if role == RSep && segmentsLeft > 1 {
+ wordIdx = 0
+ segmentsLeft--
+ }
+
+ var skipPenalty int
+ if i == 1 || (i-1) == lastSegStart {
+ // Skipping the start of first or last segment.
+ skipPenalty++
+ }
+
+ for j := 0; j <= pattLen; j++ {
+ // By default, we don't have a match. Fill in the skip data.
+ m.scores[i][j][1] = minScore << 1
+
+ // Compute the skip score.
+ k := 0
+ if m.scores[i-1][j][0].val() < m.scores[i-1][j][1].val() {
+ k = 1
+ }
+
+ skipScore := m.scores[i-1][j][k].val()
+ // Do not penalize missing characters after the last matched segment.
+ if j != pattLen {
+ skipScore -= skipPenalty
+ }
+ m.scores[i][j][0] = score(skipScore, k)
+
+ if j == 0 || candidateLower[i-1] != m.patternLower[j-1] {
+ // Not a match.
+ continue
+ }
+ pRole := m.patternRoles[j-1]
+
+ if role == RTail && pRole == RHead {
+ if j > 1 {
+ // Not a match: a head in the pattern matches a tail character in the candidate.
+ continue
+ }
+ // Special treatment for the first character of the pattern. We allow
+ // matches in the middle of a word if they are long enough, at least
+ // min(3, pattern.length) characters.
+ if !bytes.HasPrefix(candidateLower[i-1:], m.patternShort) {
+ continue
+ }
+ }
+
+ // Compute the char score.
+ var charScore int
+ // Bonus 1: the char is in the candidate's last segment.
+ if segmentsLeft <= 1 {
+ charScore++
+ }
+ // Bonus 2: Case match or a Head in the pattern aligns with one in the word.
+ // Single-case patterns lack segmentation signals and we assume any character
+ // can be a head of a segment.
+ if candidate[i-1] == m.pattern[j-1] || role == RHead && (!m.caseSensitive || pRole == RHead) {
+ charScore++
+ }
+
+ // Penalty 1: pattern char is Head, candidate char is Tail.
+ if role == RTail && pRole == RHead {
+ charScore--
+ }
+ // Penalty 2: first pattern character matched in the middle of a word.
+ if j == 1 && role == RTail {
+ charScore -= 4
+ }
+
+ // Third dimension encodes whether there is a gap between the previous match and the current
+ // one.
+ for k := 0; k < 2; k++ {
+ sc := m.scores[i-1][j-1][k].val() + charScore
+
+ isConsecutive := k == 1 || i-1 == 0 || i-1 == lastSegStart
+ if isConsecutive {
+ // Bonus 3: a consecutive match. First character match also gets a bonus to
+ // ensure prefix final match score normalizes to 1.0.
+ // Logically, this is a part of charScore, but we have to compute it here because it
+ // only applies for consecutive matches (k == 1).
+ sc += consecutiveBonus
+ }
+ if k == 0 {
+ // Penalty 3: Matching inside a segment (and previous char wasn't matched). Penalize for the lack
+ // of alignment.
+ if role == RTail || role == RUCTail {
+ sc -= 3
+ }
+ }
+
+ if sc > m.scores[i][j][1].val() {
+ m.scores[i][j][1] = score(sc, k)
+ }
+ }
+ }
+ }
+
+ result := m.scores[len(candidate)][len(m.pattern)][m.bestK(len(candidate), len(m.pattern))].val()
+
+ return result
+}
+
+// ScoreTable returns the score table computed for the provided candidate. Used only for debugging.
+func (m *Matcher) ScoreTable(candidate string) string {
+ var buf bytes.Buffer
+
+ var line1, line2, separator bytes.Buffer
+ line1.WriteString("\t")
+ line2.WriteString("\t")
+ for j := 0; j < len(m.pattern); j++ {
+ line1.WriteString(fmt.Sprintf("%c\t\t", m.pattern[j]))
+ separator.WriteString("----------------")
+ }
+
+ buf.WriteString(line1.String())
+ buf.WriteString("\n")
+ buf.WriteString(separator.String())
+ buf.WriteString("\n")
+
+ for i := 1; i <= len(candidate); i++ {
+ line1.Reset()
+ line2.Reset()
+
+ line1.WriteString(fmt.Sprintf("%c\t", candidate[i-1]))
+ line2.WriteString("\t")
+
+ for j := 1; j <= len(m.pattern); j++ {
+ line1.WriteString(fmt.Sprintf("M%6d(%c)\t", m.scores[i][j][0].val(), dir(m.scores[i][j][0].prevK())))
+ line2.WriteString(fmt.Sprintf("H%6d(%c)\t", m.scores[i][j][1].val(), dir(m.scores[i][j][1].prevK())))
+ }
+ buf.WriteString(line1.String())
+ buf.WriteString("\n")
+ buf.WriteString(line2.String())
+ buf.WriteString("\n")
+ buf.WriteString(separator.String())
+ buf.WriteString("\n")
+ }
+
+ return buf.String()
+}
+
+func dir(prevK int) rune {
+ if prevK == 0 {
+ return 'M'
+ }
+ return 'H'
+}
+
+func (m *Matcher) poorMatch() bool {
+ if len(m.pattern) < 2 {
+ return false
+ }
+
+ i, j := m.lastCandidateLen, len(m.pattern)
+ k := m.bestK(i, j)
+
+ var counter, len int
+ for i > 0 {
+ take := (k == 1)
+ k = m.scores[i][j][k].prevK()
+ if take {
+ len++
+ if k == 0 && len < 3 && m.roles[i-1] == RTail {
+ // Short match in the middle of a word
+ counter++
+ if counter > 1 {
+ return true
+ }
+ }
+ j--
+ } else {
+ len = 0
+ }
+ i--
+ }
+ return false
+}