aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/vendor/golang.org/x/tools/internal/lsp/fuzzy/input.go
blob: ac377035ec63a342841c4e458a5e6b313e2475e2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package fuzzy

import (
	"unicode"
)

// RuneRole specifies the role of a rune in the context of an input.
type RuneRole byte

const (
	// RNone specifies a rune without any role in the input (i.e., whitespace/non-ASCII).
	RNone RuneRole = iota
	// RSep specifies a rune with the role of segment separator.
	RSep
	// RTail specifies a rune which is a lower-case tail in a word in the input.
	RTail
	// RUCTail specifies a rune which is an upper-case tail in a word in the input.
	RUCTail
	// RHead specifies a rune which is the first character in a word in the input.
	RHead
)

// RuneRoles detects the roles of each byte rune in an input string and stores it in the output
// slice. The rune role depends on the input type. Stops when it parsed all the runes in the string
// or when it filled the output. If output is nil, then it gets created.
func RuneRoles(str string, reuse []RuneRole) []RuneRole {
	var output []RuneRole
	if cap(reuse) < len(str) {
		output = make([]RuneRole, 0, len(str))
	} else {
		output = reuse[:0]
	}

	prev, prev2 := rtNone, rtNone
	for i := 0; i < len(str); i++ {
		r := rune(str[i])

		role := RNone

		curr := rtLower
		if str[i] <= unicode.MaxASCII {
			curr = runeType(rt[str[i]] - '0')
		}

		if curr == rtLower {
			if prev == rtNone || prev == rtPunct {
				role = RHead
			} else {
				role = RTail
			}
		} else if curr == rtUpper {
			role = RHead

			if prev == rtUpper {
				// This and previous characters are both upper case.

				if i+1 == len(str) {
					// This is last character, previous was also uppercase -> this is UCTail
					// i.e., (current char is C): aBC / BC / ABC
					role = RUCTail
				}
			}
		} else if curr == rtPunct {
			switch r {
			case '.', ':':
				role = RSep
			}
		}
		if curr != rtLower {
			if i > 1 && output[i-1] == RHead && prev2 == rtUpper && (output[i-2] == RHead || output[i-2] == RUCTail) {
				// The previous two characters were uppercase. The current one is not a lower case, so the
				// previous one can't be a HEAD. Make it a UCTail.
				// i.e., (last char is current char - B must be a UCTail): ABC / ZABC / AB.
				output[i-1] = RUCTail
			}
		}

		output = append(output, role)
		prev2 = prev
		prev = curr
	}
	return output
}

type runeType byte

const (
	rtNone runeType = iota
	rtPunct
	rtLower
	rtUpper
)

const rt = "00000000000000000000000000000000000000000000001122222222221000000333333333333333333333333330000002222222222222222222222222200000"

// LastSegment returns the substring representing the last segment from the input, where each
// byte has an associated RuneRole in the roles slice. This makes sense only for inputs of Symbol
// or Filename type.
func LastSegment(input string, roles []RuneRole) string {
	// Exclude ending separators.
	end := len(input) - 1
	for end >= 0 && roles[end] == RSep {
		end--
	}
	if end < 0 {
		return ""
	}

	start := end - 1
	for start >= 0 && roles[start] != RSep {
		start--
	}

	return input[start+1 : end+1]
}

// ToLower transforms the input string to lower case, which is stored in the output byte slice.
// The lower casing considers only ASCII values - non ASCII values are left unmodified.
// Stops when parsed all input or when it filled the output slice. If output is nil, then it gets
// created.
func ToLower(input string, reuse []byte) []byte {
	output := reuse
	if cap(reuse) < len(input) {
		output = make([]byte, len(input))
	}

	for i := 0; i < len(input); i++ {
		r := rune(input[i])
		if r <= unicode.MaxASCII {
			if 'A' <= r && r <= 'Z' {
				r += 'a' - 'A'
			}
		}
		output[i] = byte(r)
	}
	return output[:len(input)]
}

// WordConsumer defines a consumer for a word delimited by the [start,end) byte offsets in an input
// (start is inclusive, end is exclusive).
type WordConsumer func(start, end int)

// Words find word delimiters in an input based on its bytes' mappings to rune roles. The offset
// delimiters for each word are fed to the provided consumer function.
func Words(roles []RuneRole, consume WordConsumer) {
	var wordStart int
	for i, r := range roles {
		switch r {
		case RUCTail, RTail:
		case RHead, RNone, RSep:
			if i != wordStart {
				consume(wordStart, i)
			}
			wordStart = i
			if r != RHead {
				// Skip this character.
				wordStart = i + 1
			}
		}
	}
	if wordStart != len(roles) {
		consume(wordStart, len(roles))
	}
}