diff options
author | Christian Duerr <contact@christianduerr.com> | 2023-10-09 15:32:51 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-10-09 17:32:51 +0400 |
commit | e1859e80f6196da6f52aacf8829c039378e13275 (patch) | |
tree | 8be0102781e22e38aa1ff93952cb7610edf8d798 /alacritty_terminal | |
parent | e07bf6f24a7fa7438dd757d6e761c100dd920d68 (diff) | |
download | alacritty-e1859e80f6196da6f52aacf8829c039378e13275.tar.gz alacritty-e1859e80f6196da6f52aacf8829c039378e13275.zip |
Fix regex patterns allowing for empty strings
This patch changes the mode we search for patterns which allow an empty
string, by anchoring all searches. As a result we will match the longest
possible match when multiple patterns are present (like `;*|rust`),
instead of using the leftmost pattern only.
This is only possible with empty matches since our parser is reset on
every byte anyway, so anchoring the search makes no difference.
Fixes #7276.
Diffstat (limited to 'alacritty_terminal')
-rw-r--r-- | alacritty_terminal/src/term/search.rs | 78 |
1 files changed, 40 insertions, 38 deletions
diff --git a/alacritty_terminal/src/term/search.rs b/alacritty_terminal/src/term/search.rs index 1a3c7de8..c10ef40d 100644 --- a/alacritty_terminal/src/term/search.rs +++ b/alacritty_terminal/src/term/search.rs @@ -43,35 +43,36 @@ impl RegexSearch { let max_size = config.get_cache_capacity(); let thompson_config = ThompsonConfig::new().nfa_size_limit(Some(max_size)); - // Create DFAs to find start/end in left-to-right search. - let right_fdfa = LazyDfa::new( + // Create DFAs to find start/end in right-to-left search. + let left_rdfa = LazyDfa::new( search, config.clone(), syntax_config, thompson_config.clone(), Direction::Right, - false, + true, )?; - let right_rdfa = LazyDfa::new( + let has_empty = left_rdfa.dfa.get_nfa().has_empty(); + let left_fdfa = LazyDfa::new( search, config.clone(), syntax_config, thompson_config.clone(), Direction::Left, - true, + has_empty, )?; - // Create DFAs to find start/end in right-to-left search. - let left_fdfa = LazyDfa::new( + // Create DFAs to find start/end in left-to-right search. + let right_fdfa = LazyDfa::new( search, config.clone(), syntax_config, thompson_config.clone(), - Direction::Left, - false, + Direction::Right, + has_empty, )?; - let left_rdfa = - LazyDfa::new(search, config, syntax_config, thompson_config, Direction::Right, true)?; + let right_rdfa = + LazyDfa::new(search, config, syntax_config, thompson_config, Direction::Left, true)?; Ok(RegexSearch { left_fdfa, left_rdfa, right_fdfa, right_rdfa }) } @@ -82,6 +83,8 @@ impl RegexSearch { struct LazyDfa { dfa: DFA, cache: Cache, + direction: Direction, + match_all: bool, } impl LazyDfa { @@ -91,13 +94,13 @@ impl LazyDfa { syntax: SyntaxConfig, mut thompson: ThompsonConfig, direction: Direction, - reverse: bool, + match_all: bool, ) -> Result<Self, Box<BuildError>> { thompson = match direction { Direction::Left => thompson.reverse(true), Direction::Right => thompson.reverse(false), }; - config = if reverse { + config = if match_all { config.match_kind(MatchKind::All) } else { config.match_kind(MatchKind::LeftmostFirst) @@ -109,7 +112,7 @@ impl LazyDfa { let cache = dfa.create_cache(); - Ok(Self { dfa, cache }) + Ok(Self { direction, cache, dfa, match_all }) } } @@ -229,10 +232,8 @@ impl<T> Term<T> { end: Point, ) -> Option<Match> { // Find start and end of match. - let match_start = - self.regex_search(start, end, Direction::Left, false, &mut regex.left_fdfa)?; - let match_end = - self.regex_search(match_start, start, Direction::Right, true, &mut regex.left_rdfa)?; + let match_start = self.regex_search(start, end, &mut regex.left_fdfa)?; + let match_end = self.regex_search(match_start, start, &mut regex.left_rdfa)?; Some(match_start..=match_end) } @@ -247,10 +248,8 @@ impl<T> Term<T> { end: Point, ) -> Option<Match> { // Find start and end of match. - let match_end = - self.regex_search(start, end, Direction::Right, false, &mut regex.right_fdfa)?; - let match_start = - self.regex_search(match_end, start, Direction::Left, true, &mut regex.right_rdfa)?; + let match_end = self.regex_search(start, end, &mut regex.right_fdfa)?; + let match_start = self.regex_search(match_end, start, &mut regex.right_rdfa)?; Some(match_start..=match_end) } @@ -258,15 +257,8 @@ impl<T> Term<T> { /// Find the next regex match. /// /// This will always return the side of the first match which is farthest from the start point. - fn regex_search( - &self, - start: Point, - end: Point, - direction: Direction, - anchored: bool, - regex: &mut LazyDfa, - ) -> Option<Point> { - match self.regex_search_internal(start, end, direction, anchored, regex) { + fn regex_search(&self, start: Point, end: Point, regex: &mut LazyDfa) -> Option<Point> { + match self.regex_search_internal(start, end, regex) { Ok(regex_match) => regex_match, Err(err) => { warn!("Regex exceeded complexity limit"); @@ -283,8 +275,6 @@ impl<T> Term<T> { &self, start: Point, end: Point, - direction: Direction, - anchored: bool, regex: &mut LazyDfa, ) -> Result<Option<Point>, Box<dyn Error>> { let topmost_line = self.topmost_line(); @@ -292,13 +282,13 @@ impl<T> Term<T> { let last_column = self.last_column(); // Advance the iterator. - let next = match direction { + let next = match regex.direction { Direction::Right => GridIterator::next, Direction::Left => GridIterator::prev, }; // Get start state for the DFA. - let regex_anchored = if anchored { Anchored::Yes } else { Anchored::No }; + let regex_anchored = if regex.match_all { Anchored::Yes } else { Anchored::No }; let input = Input::new(&[]).anchored(regex_anchored); let mut state = regex.dfa.start_state_forward(&mut regex.cache, &input).unwrap(); @@ -308,7 +298,7 @@ impl<T> Term<T> { let mut done = false; let mut cell = iter.cell(); - self.skip_fullwidth(&mut iter, &mut cell, direction); + self.skip_fullwidth(&mut iter, &mut cell, regex.direction); let mut c = cell.c; let mut point = iter.point(); @@ -332,7 +322,7 @@ impl<T> Term<T> { // Pass char to DFA as individual bytes. for i in 0..utf8_len { // Inverse byte order when going left. - let byte = match direction { + let byte = match regex.direction { Direction::Right => buf[i], Direction::Left => buf[utf8_len - i - 1], }; @@ -409,7 +399,7 @@ impl<T> Term<T> { // Check for completion before potentially skipping over fullwidth characters. done = iter.point() == end; - self.skip_fullwidth(&mut iter, &mut cell, direction); + self.skip_fullwidth(&mut iter, &mut cell, regex.direction); let wrapped = cell.flags.contains(Flags::WRAPLINE); c = cell.c; @@ -1109,4 +1099,16 @@ mod tests { let end = Point::new(Line(0), Column(17)); assert_eq!(term.regex_search_right(&mut regex, start, end), Some(start..=end)); } + + #[test] + fn anchored_empty() { + #[rustfmt::skip] + let term = mock_term("rust"); + + // Bottom to top. + let mut regex = RegexSearch::new(";*|rust").unwrap(); + let start = Point::new(Line(0), Column(0)); + let end = Point::new(Line(0), Column(3)); + assert_eq!(term.regex_search_right(&mut regex, start, end), Some(start..=end)); + } } |