aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/vendor/golang.org/x/tools/internal/lsp/fuzzy/symbol.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmd/vendor/golang.org/x/tools/internal/lsp/fuzzy/symbol.go')
-rw-r--r--src/cmd/vendor/golang.org/x/tools/internal/lsp/fuzzy/symbol.go224
1 files changed, 224 insertions, 0 deletions
diff --git a/src/cmd/vendor/golang.org/x/tools/internal/lsp/fuzzy/symbol.go b/src/cmd/vendor/golang.org/x/tools/internal/lsp/fuzzy/symbol.go
new file mode 100644
index 0000000000..062f491fb5
--- /dev/null
+++ b/src/cmd/vendor/golang.org/x/tools/internal/lsp/fuzzy/symbol.go
@@ -0,0 +1,224 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package fuzzy
+
+import (
+ "unicode"
+)
+
+// SymbolMatcher implements a fuzzy matching algorithm optimized for Go symbols
+// of the form:
+// example.com/path/to/package.object.field
+//
+// Knowing that we are matching symbols like this allows us to make the
+// following optimizations:
+// - We can incorporate right-to-left relevance directly into the score
+// calculation.
+// - We can match from right to left, discarding leading bytes if the input is
+// too long.
+// - We just take the right-most match without losing too much precision. This
+// allows us to use an O(n) algorithm.
+// - We can operate directly on chunked strings; in many cases we will
+// be storing the package path and/or package name separately from the
+// symbol or identifiers, so doing this avoids allocating strings.
+// - We can return the index of the right-most match, allowing us to trim
+// irrelevant qualification.
+//
+// This implementation is experimental, serving as a reference fast algorithm
+// to compare to the fuzzy algorithm implemented by Matcher.
+type SymbolMatcher struct {
+ // Using buffers of length 256 is both a reasonable size for most qualified
+ // symbols, and makes it easy to avoid bounds checks by using uint8 indexes.
+ pattern [256]rune
+ patternLen uint8
+ inputBuffer [256]rune // avoid allocating when considering chunks
+ roles [256]uint32 // which roles does a rune play (word start, etc.)
+ segments [256]uint8 // how many segments from the right is each rune
+}
+
+const (
+ segmentStart uint32 = 1 << iota
+ wordStart
+ separator
+)
+
+// NewSymbolMatcher creates a SymbolMatcher that may be used to match the given
+// search pattern.
+//
+// Currently this matcher only accepts case-insensitive fuzzy patterns.
+//
+// TODO(rfindley):
+// - implement smart-casing
+// - implement space-separated groups
+// - implement ', ^, and $ modifiers
+//
+// An empty pattern matches no input.
+func NewSymbolMatcher(pattern string) *SymbolMatcher {
+ m := &SymbolMatcher{}
+ for _, p := range pattern {
+ m.pattern[m.patternLen] = unicode.ToLower(p)
+ m.patternLen++
+ if m.patternLen == 255 || int(m.patternLen) == len(pattern) {
+ // break at 255 so that we can represent patternLen with a uint8.
+ break
+ }
+ }
+ return m
+}
+
+// Match looks for the right-most match of the search pattern within the symbol
+// represented by concatenating the given chunks, returning its offset and
+// score.
+//
+// If a match is found, the first return value will hold the absolute byte
+// offset within all chunks for the start of the symbol. In other words, the
+// index of the match within strings.Join(chunks, ""). If no match is found,
+// the first return value will be -1.
+//
+// The second return value will be the score of the match, which is always
+// between 0 and 1, inclusive. A score of 0 indicates no match.
+func (m *SymbolMatcher) Match(chunks []string) (int, float64) {
+ // Explicit behavior for an empty pattern.
+ //
+ // As a minor optimization, this also avoids nilness checks later on, since
+ // the compiler can prove that m != nil.
+ if m.patternLen == 0 {
+ return -1, 0
+ }
+
+ // First phase: populate the input buffer with lower-cased runes.
+ //
+ // We could also check for a forward match here, but since we'd have to write
+ // the entire input anyway this has negligible impact on performance.
+
+ var (
+ inputLen = uint8(0)
+ modifiers = wordStart | segmentStart
+ )
+
+input:
+ for _, chunk := range chunks {
+ for _, r := range chunk {
+ if r == '.' || r == '/' {
+ modifiers |= separator
+ }
+ // optimization: avoid calls to unicode.ToLower, which can't be inlined.
+ l := r
+ if r <= unicode.MaxASCII {
+ if 'A' <= r && r <= 'Z' {
+ l = r + 'a' - 'A'
+ }
+ } else {
+ l = unicode.ToLower(r)
+ }
+ if l != r {
+ modifiers |= wordStart
+ }
+ m.inputBuffer[inputLen] = l
+ m.roles[inputLen] = modifiers
+ inputLen++
+ if m.roles[inputLen-1]&separator != 0 {
+ modifiers = wordStart | segmentStart
+ } else {
+ modifiers = 0
+ }
+ // TODO: we should prefer the right-most input if it overflows, rather
+ // than the left-most as we're doing here.
+ if inputLen == 255 {
+ break input
+ }
+ }
+ }
+
+ // Second phase: find the right-most match, and count segments from the
+ // right.
+
+ var (
+ pi = uint8(m.patternLen - 1) // pattern index
+ p = m.pattern[pi] // pattern rune
+ start = -1 // start offset of match
+ rseg = uint8(0)
+ )
+ const maxSeg = 3 // maximum number of segments from the right to count, for scoring purposes.
+
+ for ii := inputLen - 1; ; ii-- {
+ r := m.inputBuffer[ii]
+ if rseg < maxSeg && m.roles[ii]&separator != 0 {
+ rseg++
+ }
+ m.segments[ii] = rseg
+ if p == r {
+ if pi == 0 {
+ start = int(ii)
+ break
+ }
+ pi--
+ p = m.pattern[pi]
+ }
+ // Don't check ii >= 0 in the loop condition: ii is a uint8.
+ if ii == 0 {
+ break
+ }
+ }
+
+ if start < 0 {
+ // no match: skip scoring
+ return -1, 0
+ }
+
+ // Third phase: find the shortest match, and compute the score.
+
+ // Score is the average score for each character.
+ //
+ // A character score is the multiple of:
+ // 1. 1.0 if the character starts a segment, .8 if the character start a
+ // mid-segment word, otherwise 0.6. This carries over to immediately
+ // following characters.
+ // 2. 1.0 if the character is part of the last segment, otherwise
+ // 1.0-.2*<segments from the right>, with a max segment count of 3.
+ //
+ // This is a very naive algorithm, but it is fast. There's lots of prior art
+ // here, and we should leverage it. For example, we could explicitly consider
+ // character distance, and exact matches of words or segments.
+ //
+ // Also note that this might not actually find the highest scoring match, as
+ // doing so could require a non-linear algorithm, depending on how the score
+ // is calculated.
+
+ pi = 0
+ p = m.pattern[pi]
+
+ const (
+ segStreak = 1.0
+ wordStreak = 0.8
+ noStreak = 0.6
+ perSegment = 0.2 // we count at most 3 segments above
+ )
+
+ streakBonus := noStreak
+ totScore := 0.0
+ for ii := uint8(start); ii < inputLen; ii++ {
+ r := m.inputBuffer[ii]
+ if r == p {
+ pi++
+ p = m.pattern[pi]
+ // Note: this could be optimized with some bit operations.
+ switch {
+ case m.roles[ii]&segmentStart != 0 && segStreak > streakBonus:
+ streakBonus = segStreak
+ case m.roles[ii]&wordStart != 0 && wordStreak > streakBonus:
+ streakBonus = wordStreak
+ }
+ totScore += streakBonus * (1.0 - float64(m.segments[ii])*perSegment)
+ if pi >= m.patternLen {
+ break
+ }
+ } else {
+ streakBonus = noStreak
+ }
+ }
+
+ return start, totScore / float64(m.patternLen)
+}