diff options
Diffstat (limited to 'src/cmd/vendor/golang.org/x/tools/internal/lsp/fuzzy/symbol.go')
-rw-r--r-- | src/cmd/vendor/golang.org/x/tools/internal/lsp/fuzzy/symbol.go | 237 |
1 files changed, 0 insertions, 237 deletions
diff --git a/src/cmd/vendor/golang.org/x/tools/internal/lsp/fuzzy/symbol.go b/src/cmd/vendor/golang.org/x/tools/internal/lsp/fuzzy/symbol.go deleted file mode 100644 index 073a4cd101..0000000000 --- a/src/cmd/vendor/golang.org/x/tools/internal/lsp/fuzzy/symbol.go +++ /dev/null @@ -1,237 +0,0 @@ -// Copyright 2021 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package fuzzy - -import ( - "unicode" -) - -// SymbolMatcher implements a fuzzy matching algorithm optimized for Go symbols -// of the form: -// -// example.com/path/to/package.object.field -// -// Knowing that we are matching symbols like this allows us to make the -// following optimizations: -// - We can incorporate right-to-left relevance directly into the score -// calculation. -// - We can match from right to left, discarding leading bytes if the input is -// too long. -// - We just take the right-most match without losing too much precision. This -// allows us to use an O(n) algorithm. -// - We can operate directly on chunked strings; in many cases we will -// be storing the package path and/or package name separately from the -// symbol or identifiers, so doing this avoids allocating strings. -// - We can return the index of the right-most match, allowing us to trim -// irrelevant qualification. -// -// This implementation is experimental, serving as a reference fast algorithm -// to compare to the fuzzy algorithm implemented by Matcher. -type SymbolMatcher struct { - // Using buffers of length 256 is both a reasonable size for most qualified - // symbols, and makes it easy to avoid bounds checks by using uint8 indexes. - pattern [256]rune - patternLen uint8 - inputBuffer [256]rune // avoid allocating when considering chunks - roles [256]uint32 // which roles does a rune play (word start, etc.) - segments [256]uint8 // how many segments from the right is each rune -} - -const ( - segmentStart uint32 = 1 << iota - wordStart - separator -) - -// NewSymbolMatcher creates a SymbolMatcher that may be used to match the given -// search pattern. -// -// Currently this matcher only accepts case-insensitive fuzzy patterns. -// -// An empty pattern matches no input. -func NewSymbolMatcher(pattern string) *SymbolMatcher { - m := &SymbolMatcher{} - for _, p := range pattern { - m.pattern[m.patternLen] = unicode.ToLower(p) - m.patternLen++ - if m.patternLen == 255 || int(m.patternLen) == len(pattern) { - // break at 255 so that we can represent patternLen with a uint8. - break - } - } - return m -} - -// Match looks for the right-most match of the search pattern within the symbol -// represented by concatenating the given chunks, returning its offset and -// score. -// -// If a match is found, the first return value will hold the absolute byte -// offset within all chunks for the start of the symbol. In other words, the -// index of the match within strings.Join(chunks, ""). If no match is found, -// the first return value will be -1. -// -// The second return value will be the score of the match, which is always -// between 0 and 1, inclusive. A score of 0 indicates no match. -func (m *SymbolMatcher) Match(chunks []string) (int, float64) { - // Explicit behavior for an empty pattern. - // - // As a minor optimization, this also avoids nilness checks later on, since - // the compiler can prove that m != nil. - if m.patternLen == 0 { - return -1, 0 - } - - // First phase: populate the input buffer with lower-cased runes. - // - // We could also check for a forward match here, but since we'd have to write - // the entire input anyway this has negligible impact on performance. - - var ( - inputLen = uint8(0) - modifiers = wordStart | segmentStart - ) - -input: - for _, chunk := range chunks { - for _, r := range chunk { - if r == '.' || r == '/' { - modifiers |= separator - } - // optimization: avoid calls to unicode.ToLower, which can't be inlined. - l := r - if r <= unicode.MaxASCII { - if 'A' <= r && r <= 'Z' { - l = r + 'a' - 'A' - } - } else { - l = unicode.ToLower(r) - } - if l != r { - modifiers |= wordStart - } - m.inputBuffer[inputLen] = l - m.roles[inputLen] = modifiers - inputLen++ - if m.roles[inputLen-1]&separator != 0 { - modifiers = wordStart | segmentStart - } else { - modifiers = 0 - } - // TODO: we should prefer the right-most input if it overflows, rather - // than the left-most as we're doing here. - if inputLen == 255 { - break input - } - } - } - - // Second phase: find the right-most match, and count segments from the - // right. - - var ( - pi = uint8(m.patternLen - 1) // pattern index - p = m.pattern[pi] // pattern rune - start = -1 // start offset of match - rseg = uint8(0) - ) - const maxSeg = 3 // maximum number of segments from the right to count, for scoring purposes. - - for ii := inputLen - 1; ; ii-- { - r := m.inputBuffer[ii] - if rseg < maxSeg && m.roles[ii]&separator != 0 { - rseg++ - } - m.segments[ii] = rseg - if p == r { - if pi == 0 { - start = int(ii) - break - } - pi-- - p = m.pattern[pi] - } - // Don't check ii >= 0 in the loop condition: ii is a uint8. - if ii == 0 { - break - } - } - - if start < 0 { - // no match: skip scoring - return -1, 0 - } - - // Third phase: find the shortest match, and compute the score. - - // Score is the average score for each character. - // - // A character score is the multiple of: - // 1. 1.0 if the character starts a segment, .8 if the character start a - // mid-segment word, otherwise 0.6. This carries over to immediately - // following characters. - // 2. For the final character match, the multiplier from (1) is reduced to - // .8 if the next character in the input is a mid-segment word, or 0.6 if - // the next character in the input is not a word or segment start. This - // ensures that we favor whole-word or whole-segment matches over prefix - // matches. - // 3. 1.0 if the character is part of the last segment, otherwise - // 1.0-.2*<segments from the right>, with a max segment count of 3. - // - // This is a very naive algorithm, but it is fast. There's lots of prior art - // here, and we should leverage it. For example, we could explicitly consider - // character distance, and exact matches of words or segments. - // - // Also note that this might not actually find the highest scoring match, as - // doing so could require a non-linear algorithm, depending on how the score - // is calculated. - - pi = 0 - p = m.pattern[pi] - - const ( - segStreak = 1.0 - wordStreak = 0.8 - noStreak = 0.6 - perSegment = 0.2 // we count at most 3 segments above - ) - - streakBonus := noStreak - totScore := 0.0 - for ii := uint8(start); ii < inputLen; ii++ { - r := m.inputBuffer[ii] - if r == p { - pi++ - p = m.pattern[pi] - // Note: this could be optimized with some bit operations. - switch { - case m.roles[ii]&segmentStart != 0 && segStreak > streakBonus: - streakBonus = segStreak - case m.roles[ii]&wordStart != 0 && wordStreak > streakBonus: - streakBonus = wordStreak - } - finalChar := pi >= m.patternLen - // finalCost := 1.0 - if finalChar && streakBonus > noStreak { - switch { - case ii == inputLen-1 || m.roles[ii+1]&segmentStart != 0: - // Full segment: no reduction - case m.roles[ii+1]&wordStart != 0: - streakBonus = wordStreak - default: - streakBonus = noStreak - } - } - totScore += streakBonus * (1.0 - float64(m.segments[ii])*perSegment) - if finalChar { - break - } - } else { - streakBonus = noStreak - } - } - - return start, totScore / float64(m.patternLen) -} |