diff options
author | Rob Findley <rfindley@google.com> | 2020-09-11 14:23:34 -0400 |
---|---|---|
committer | Rob Findley <rfindley@google.com> | 2020-09-11 14:23:34 -0400 |
commit | f8b1c17aced24a1618c6984794be9770c5d260be (patch) | |
tree | 45af8d39b5c3d9f43d439ebec0a2ba42b49efe70 /src/cmd/vendor/golang.org/x/tools/internal/lsp/fuzzy/matcher.go | |
parent | e5d91ab096a9ff9673311f1a7f3f860a7f9c2062 (diff) | |
parent | 07c1788357cfe6a4ee5f6f6a54d4fe9f579fa844 (diff) | |
download | go-f8b1c17aced24a1618c6984794be9770c5d260be.tar.gz go-f8b1c17aced24a1618c6984794be9770c5d260be.zip |
[dev.types] all: merge master into dev.typesdev.types
Change-Id: Ia6964cb4e09153c15cc9c5b441373d1b3cb8f757
Diffstat (limited to 'src/cmd/vendor/golang.org/x/tools/internal/lsp/fuzzy/matcher.go')
-rw-r--r-- | src/cmd/vendor/golang.org/x/tools/internal/lsp/fuzzy/matcher.go | 398 |
1 files changed, 398 insertions, 0 deletions
diff --git a/src/cmd/vendor/golang.org/x/tools/internal/lsp/fuzzy/matcher.go b/src/cmd/vendor/golang.org/x/tools/internal/lsp/fuzzy/matcher.go new file mode 100644 index 0000000000..16a643097d --- /dev/null +++ b/src/cmd/vendor/golang.org/x/tools/internal/lsp/fuzzy/matcher.go @@ -0,0 +1,398 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package fuzzy implements a fuzzy matching algorithm. +package fuzzy + +import ( + "bytes" + "fmt" +) + +const ( + // MaxInputSize is the maximum size of the input scored against the fuzzy matcher. Longer inputs + // will be truncated to this size. + MaxInputSize = 127 + // MaxPatternSize is the maximum size of the pattern used to construct the fuzzy matcher. Longer + // inputs are truncated to this size. + MaxPatternSize = 63 +) + +type scoreVal int + +func (s scoreVal) val() int { + return int(s) >> 1 +} + +func (s scoreVal) prevK() int { + return int(s) & 1 +} + +func score(val int, prevK int /*0 or 1*/) scoreVal { + return scoreVal(val<<1 + prevK) +} + +// Matcher implements a fuzzy matching algorithm for scoring candidates against a pattern. +// The matcher does not support parallel usage. +type Matcher struct { + pattern string + patternLower []byte // lower-case version of the pattern + patternShort []byte // first characters of the pattern + caseSensitive bool // set if the pattern is mix-cased + + patternRoles []RuneRole // the role of each character in the pattern + roles []RuneRole // the role of each character in the tested string + + scores [MaxInputSize + 1][MaxPatternSize + 1][2]scoreVal + + scoreScale float32 + + lastCandidateLen int // in bytes + lastCandidateMatched bool + + // Here we save the last candidate in lower-case. This is basically a byte slice we reuse for + // performance reasons, so the slice is not reallocated for every candidate. + lowerBuf [MaxInputSize]byte + rolesBuf [MaxInputSize]RuneRole +} + +func (m *Matcher) bestK(i, j int) int { + if m.scores[i][j][0].val() < m.scores[i][j][1].val() { + return 1 + } + return 0 +} + +// NewMatcher returns a new fuzzy matcher for scoring candidates against the provided pattern. +func NewMatcher(pattern string) *Matcher { + if len(pattern) > MaxPatternSize { + pattern = pattern[:MaxPatternSize] + } + + m := &Matcher{ + pattern: pattern, + patternLower: ToLower(pattern, nil), + } + + for i, c := range m.patternLower { + if pattern[i] != c { + m.caseSensitive = true + break + } + } + + if len(pattern) > 3 { + m.patternShort = m.patternLower[:3] + } else { + m.patternShort = m.patternLower + } + + m.patternRoles = RuneRoles(pattern, nil) + + if len(pattern) > 0 { + maxCharScore := 4 + m.scoreScale = 1 / float32(maxCharScore*len(pattern)) + } + + return m +} + +// Score returns the score returned by matching the candidate to the pattern. +// This is not designed for parallel use. Multiple candidates must be scored sequentially. +// Returns a score between 0 and 1 (0 - no match, 1 - perfect match). +func (m *Matcher) Score(candidate string) float32 { + if len(candidate) > MaxInputSize { + candidate = candidate[:MaxInputSize] + } + lower := ToLower(candidate, m.lowerBuf[:]) + m.lastCandidateLen = len(candidate) + + if len(m.pattern) == 0 { + // Empty patterns perfectly match candidates. + return 1 + } + + if m.match(candidate, lower) { + sc := m.computeScore(candidate, lower) + if sc > minScore/2 && !m.poorMatch() { + m.lastCandidateMatched = true + if len(m.pattern) == len(candidate) { + // Perfect match. + return 1 + } + + if sc < 0 { + sc = 0 + } + normalizedScore := float32(sc) * m.scoreScale + if normalizedScore > 1 { + normalizedScore = 1 + } + + return normalizedScore + } + } + + m.lastCandidateMatched = false + return 0 +} + +const minScore = -10000 + +// MatchedRanges returns matches ranges for the last scored string as a flattened array of +// [begin, end) byte offset pairs. +func (m *Matcher) MatchedRanges() []int { + if len(m.pattern) == 0 || !m.lastCandidateMatched { + return nil + } + i, j := m.lastCandidateLen, len(m.pattern) + if m.scores[i][j][0].val() < minScore/2 && m.scores[i][j][1].val() < minScore/2 { + return nil + } + + var ret []int + k := m.bestK(i, j) + for i > 0 { + take := (k == 1) + k = m.scores[i][j][k].prevK() + if take { + if len(ret) == 0 || ret[len(ret)-1] != i { + ret = append(ret, i) + ret = append(ret, i-1) + } else { + ret[len(ret)-1] = i - 1 + } + j-- + } + i-- + } + // Reverse slice. + for i := 0; i < len(ret)/2; i++ { + ret[i], ret[len(ret)-1-i] = ret[len(ret)-1-i], ret[i] + } + return ret +} + +func (m *Matcher) match(candidate string, candidateLower []byte) bool { + i, j := 0, 0 + for ; i < len(candidateLower) && j < len(m.patternLower); i++ { + if candidateLower[i] == m.patternLower[j] { + j++ + } + } + if j != len(m.patternLower) { + return false + } + + // The input passes the simple test against pattern, so it is time to classify its characters. + // Character roles are used below to find the last segment. + m.roles = RuneRoles(candidate, m.rolesBuf[:]) + + return true +} + +func (m *Matcher) computeScore(candidate string, candidateLower []byte) int { + pattLen, candLen := len(m.pattern), len(candidate) + + for j := 0; j <= len(m.pattern); j++ { + m.scores[0][j][0] = minScore << 1 + m.scores[0][j][1] = minScore << 1 + } + m.scores[0][0][0] = score(0, 0) // Start with 0. + + segmentsLeft, lastSegStart := 1, 0 + for i := 0; i < candLen; i++ { + if m.roles[i] == RSep { + segmentsLeft++ + lastSegStart = i + 1 + } + } + + // A per-character bonus for a consecutive match. + consecutiveBonus := 2 + wordIdx := 0 // Word count within segment. + for i := 1; i <= candLen; i++ { + + role := m.roles[i-1] + isHead := role == RHead + + if isHead { + wordIdx++ + } else if role == RSep && segmentsLeft > 1 { + wordIdx = 0 + segmentsLeft-- + } + + var skipPenalty int + if i == 1 || (i-1) == lastSegStart { + // Skipping the start of first or last segment. + skipPenalty++ + } + + for j := 0; j <= pattLen; j++ { + // By default, we don't have a match. Fill in the skip data. + m.scores[i][j][1] = minScore << 1 + + // Compute the skip score. + k := 0 + if m.scores[i-1][j][0].val() < m.scores[i-1][j][1].val() { + k = 1 + } + + skipScore := m.scores[i-1][j][k].val() + // Do not penalize missing characters after the last matched segment. + if j != pattLen { + skipScore -= skipPenalty + } + m.scores[i][j][0] = score(skipScore, k) + + if j == 0 || candidateLower[i-1] != m.patternLower[j-1] { + // Not a match. + continue + } + pRole := m.patternRoles[j-1] + + if role == RTail && pRole == RHead { + if j > 1 { + // Not a match: a head in the pattern matches a tail character in the candidate. + continue + } + // Special treatment for the first character of the pattern. We allow + // matches in the middle of a word if they are long enough, at least + // min(3, pattern.length) characters. + if !bytes.HasPrefix(candidateLower[i-1:], m.patternShort) { + continue + } + } + + // Compute the char score. + var charScore int + // Bonus 1: the char is in the candidate's last segment. + if segmentsLeft <= 1 { + charScore++ + } + // Bonus 2: Case match or a Head in the pattern aligns with one in the word. + // Single-case patterns lack segmentation signals and we assume any character + // can be a head of a segment. + if candidate[i-1] == m.pattern[j-1] || role == RHead && (!m.caseSensitive || pRole == RHead) { + charScore++ + } + + // Penalty 1: pattern char is Head, candidate char is Tail. + if role == RTail && pRole == RHead { + charScore-- + } + // Penalty 2: first pattern character matched in the middle of a word. + if j == 1 && role == RTail { + charScore -= 4 + } + + // Third dimension encodes whether there is a gap between the previous match and the current + // one. + for k := 0; k < 2; k++ { + sc := m.scores[i-1][j-1][k].val() + charScore + + isConsecutive := k == 1 || i-1 == 0 || i-1 == lastSegStart + if isConsecutive { + // Bonus 3: a consecutive match. First character match also gets a bonus to + // ensure prefix final match score normalizes to 1.0. + // Logically, this is a part of charScore, but we have to compute it here because it + // only applies for consecutive matches (k == 1). + sc += consecutiveBonus + } + if k == 0 { + // Penalty 3: Matching inside a segment (and previous char wasn't matched). Penalize for the lack + // of alignment. + if role == RTail || role == RUCTail { + sc -= 3 + } + } + + if sc > m.scores[i][j][1].val() { + m.scores[i][j][1] = score(sc, k) + } + } + } + } + + result := m.scores[len(candidate)][len(m.pattern)][m.bestK(len(candidate), len(m.pattern))].val() + + return result +} + +// ScoreTable returns the score table computed for the provided candidate. Used only for debugging. +func (m *Matcher) ScoreTable(candidate string) string { + var buf bytes.Buffer + + var line1, line2, separator bytes.Buffer + line1.WriteString("\t") + line2.WriteString("\t") + for j := 0; j < len(m.pattern); j++ { + line1.WriteString(fmt.Sprintf("%c\t\t", m.pattern[j])) + separator.WriteString("----------------") + } + + buf.WriteString(line1.String()) + buf.WriteString("\n") + buf.WriteString(separator.String()) + buf.WriteString("\n") + + for i := 1; i <= len(candidate); i++ { + line1.Reset() + line2.Reset() + + line1.WriteString(fmt.Sprintf("%c\t", candidate[i-1])) + line2.WriteString("\t") + + for j := 1; j <= len(m.pattern); j++ { + line1.WriteString(fmt.Sprintf("M%6d(%c)\t", m.scores[i][j][0].val(), dir(m.scores[i][j][0].prevK()))) + line2.WriteString(fmt.Sprintf("H%6d(%c)\t", m.scores[i][j][1].val(), dir(m.scores[i][j][1].prevK()))) + } + buf.WriteString(line1.String()) + buf.WriteString("\n") + buf.WriteString(line2.String()) + buf.WriteString("\n") + buf.WriteString(separator.String()) + buf.WriteString("\n") + } + + return buf.String() +} + +func dir(prevK int) rune { + if prevK == 0 { + return 'M' + } + return 'H' +} + +func (m *Matcher) poorMatch() bool { + if len(m.pattern) < 2 { + return false + } + + i, j := m.lastCandidateLen, len(m.pattern) + k := m.bestK(i, j) + + var counter, len int + for i > 0 { + take := (k == 1) + k = m.scores[i][j][k].prevK() + if take { + len++ + if k == 0 && len < 3 && m.roles[i-1] == RTail { + // Short match in the middle of a word + counter++ + if counter > 1 { + return true + } + } + j-- + } else { + len = 0 + } + i-- + } + return false +} |