diff options
author | Christian Duerr <contact@christianduerr.com> | 2023-09-17 11:04:05 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-09-17 13:04:05 +0400 |
commit | e35e5ad14fce8456afdd89f2b392b9924bb27471 (patch) | |
tree | b4ba4f415457eecf618fc25fd6ce8a4a295b2bf3 | |
parent | 77aa9f42bac4377efe26512d71098d21b9b547fd (diff) | |
download | alacritty-e35e5ad14fce8456afdd89f2b392b9924bb27471.tar.gz alacritty-e35e5ad14fce8456afdd89f2b392b9924bb27471.zip |
Fix regex memory usage
This fixes an issue where regexes with a large number of possible states
would consume excessive memory, since the entire DFA was compiled ahead
of time.
To solve this, the DFA is now built at runtime using `regex-automata`'s
hybrid DFA.
There are however still some checks performed ahead of time, causing
errors with obscenely large regexes (`[0-9A-Za-z]{999999999}`), which
shouldn't cause any issues.
A regex which is large, but not large enough to fail the NFA
construction (like `[0-9A-Za-z]{999999}`) will cause a long search of
the entire grid, but will complete and show the match.
Closes #7097.
-rw-r--r-- | CHANGELOG.md | 1 | ||||
-rw-r--r-- | alacritty/src/config/ui_config.rs | 12 | ||||
-rw-r--r-- | alacritty/src/display/content.rs | 4 | ||||
-rw-r--r-- | alacritty/src/display/hint.rs | 19 | ||||
-rw-r--r-- | alacritty/src/display/mod.rs | 2 | ||||
-rw-r--r-- | alacritty/src/event.rs | 8 | ||||
-rw-r--r-- | alacritty/src/window_context.rs | 2 | ||||
-rw-r--r-- | alacritty_terminal/src/term/search.rs | 254 |
8 files changed, 180 insertions, 122 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 894c998e..801b059f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -49,6 +49,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Cut off wide characters in preedit string - Scrolling on touchscreens - Double clicking on CSD titlebar not always maximizing a window on Wayland +- Excessive memory usage when using regexes with a large number of possible states ### Removed diff --git a/alacritty/src/config/ui_config.rs b/alacritty/src/config/ui_config.rs index eb64f5b8..15423da5 100644 --- a/alacritty/src/config/ui_config.rs +++ b/alacritty/src/config/ui_config.rs @@ -485,7 +485,7 @@ impl LazyRegex { /// Execute a function with the compiled regex DFAs as parameter. pub fn with_compiled<T, F>(&self, f: F) -> Option<T> where - F: FnMut(&RegexSearch) -> T, + F: FnMut(&mut RegexSearch) -> T, { self.0.borrow_mut().compiled().map(f) } @@ -514,7 +514,7 @@ impl LazyRegexVariant { /// /// If the regex is not already compiled, this will compile the DFAs and store them for future /// access. - fn compiled(&mut self) -> Option<&RegexSearch> { + fn compiled(&mut self) -> Option<&mut RegexSearch> { // Check if the regex has already been compiled. let regex = match self { Self::Compiled(regex_search) => return Some(regex_search), @@ -578,8 +578,8 @@ mod tests { "ftp://ftp.example.org", ] { let term = mock_term(regular_url); - let regex = RegexSearch::new(URL_REGEX).unwrap(); - let matches = visible_regex_match_iter(&term, ®ex).collect::<Vec<_>>(); + let mut regex = RegexSearch::new(URL_REGEX).unwrap(); + let matches = visible_regex_match_iter(&term, &mut regex).collect::<Vec<_>>(); assert_eq!( matches.len(), 1, @@ -599,8 +599,8 @@ mod tests { "mailto:", ] { let term = mock_term(url_like); - let regex = RegexSearch::new(URL_REGEX).unwrap(); - let matches = visible_regex_match_iter(&term, ®ex).collect::<Vec<_>>(); + let mut regex = RegexSearch::new(URL_REGEX).unwrap(); + let matches = visible_regex_match_iter(&term, &mut regex).collect::<Vec<_>>(); assert!( matches.is_empty(), "Should not match url in string {url_like}, but instead got: {matches:?}" diff --git a/alacritty/src/display/content.rs b/alacritty/src/display/content.rs index 039808eb..ec0f4b6a 100644 --- a/alacritty/src/display/content.rs +++ b/alacritty/src/display/content.rs @@ -41,7 +41,7 @@ impl<'a> RenderableContent<'a> { config: &'a UiConfig, display: &'a mut Display, term: &'a Term<T>, - search_state: &'a SearchState, + search_state: &'a mut SearchState, ) -> Self { let search = search_state.dfas().map(|dfas| HintMatches::visible_regex_matches(term, dfas)); let focused_match = search_state.focused_match(); @@ -486,7 +486,7 @@ impl<'a> HintMatches<'a> { } /// Create from regex matches on term visable part. - fn visible_regex_matches<T>(term: &Term<T>, dfas: &RegexSearch) -> Self { + fn visible_regex_matches<T>(term: &Term<T>, dfas: &mut RegexSearch) -> Self { let matches = hint::visible_regex_match_iter(term, dfas).collect::<Vec<_>>(); Self::new(matches) } diff --git a/alacritty/src/display/hint.rs b/alacritty/src/display/hint.rs index 774b3cdb..8dffdeb5 100644 --- a/alacritty/src/display/hint.rs +++ b/alacritty/src/display/hint.rs @@ -90,7 +90,8 @@ impl HintState { // Apply post-processing and search for sub-matches if necessary. if hint.post_processing { - self.matches.extend(matches.flat_map(|rm| { + let mut matches = matches.collect::<Vec<_>>(); + self.matches.extend(matches.drain(..).flat_map(|rm| { HintPostProcessor::new(term, regex, rm).collect::<Vec<_>>() })); } else { @@ -289,7 +290,7 @@ impl HintLabels { /// Iterate over all visible regex matches. pub fn visible_regex_match_iter<'a, T>( term: &'a Term<T>, - regex: &'a RegexSearch, + regex: &'a mut RegexSearch, ) -> impl Iterator<Item = Match> + 'a { let viewport_start = Line(-(term.grid().display_offset() as i32)); let viewport_end = viewport_start + term.bottommost_line(); @@ -344,7 +345,7 @@ pub fn visible_unique_hyperlinks_iter<T>(term: &Term<T>) -> impl Iterator<Item = fn regex_match_at<T>( term: &Term<T>, point: Point, - regex: &RegexSearch, + regex: &mut RegexSearch, post_processing: bool, ) -> Option<Match> { let regex_match = visible_regex_match_iter(term, regex).find(|rm| rm.contains(&point))?; @@ -450,7 +451,7 @@ fn hyperlink_at<T>(term: &Term<T>, point: Point) -> Option<(Hyperlink, Match)> { /// Iterator over all post-processed matches inside an existing hint match. struct HintPostProcessor<'a, T> { /// Regex search DFAs. - regex: &'a RegexSearch, + regex: &'a mut RegexSearch, /// Terminal reference. term: &'a Term<T>, @@ -467,7 +468,7 @@ struct HintPostProcessor<'a, T> { impl<'a, T> HintPostProcessor<'a, T> { /// Create a new iterator for an unprocessed match. - fn new(term: &'a Term<T>, regex: &'a RegexSearch, regex_match: Match) -> Self { + fn new(term: &'a Term<T>, regex: &'a mut RegexSearch, regex_match: Match) -> Self { let mut post_processor = Self { next_match: None, start: *regex_match.start(), @@ -638,11 +639,11 @@ mod tests { fn closed_bracket_does_not_result_in_infinite_iterator() { let term = mock_term(" ) "); - let search = RegexSearch::new("[^/ ]").unwrap(); + let mut search = RegexSearch::new("[^/ ]").unwrap(); let count = HintPostProcessor::new( &term, - &search, + &mut search, Point::new(Line(0), Column(1))..=Point::new(Line(0), Column(1)), ) .take(1) @@ -694,9 +695,9 @@ mod tests { // The Term returned from this call will have a viewport starting at 0 and ending at 4096. // That's good enough for this test, since it only cares about visible content. let term = mock_term(&content); - let regex = RegexSearch::new("match!").unwrap(); + let mut regex = RegexSearch::new("match!").unwrap(); // The interator should match everything in the viewport. - assert_eq!(visible_regex_match_iter(&term, ®ex).count(), 4096); + assert_eq!(visible_regex_match_iter(&term, &mut regex).count(), 4096); } } diff --git a/alacritty/src/display/mod.rs b/alacritty/src/display/mod.rs index 4037cd2e..255b587a 100644 --- a/alacritty/src/display/mod.rs +++ b/alacritty/src/display/mod.rs @@ -759,7 +759,7 @@ impl Display { scheduler: &mut Scheduler, message_buffer: &MessageBuffer, config: &UiConfig, - search_state: &SearchState, + search_state: &mut SearchState, ) { // Collect renderable content before the terminal is dropped. let mut content = RenderableContent::new(config, self, &terminal, search_state); diff --git a/alacritty/src/event.rs b/alacritty/src/event.rs index 387e768e..1b7e280c 100644 --- a/alacritty/src/event.rs +++ b/alacritty/src/event.rs @@ -154,8 +154,8 @@ impl SearchState { } /// Active search dfas. - pub fn dfas(&self) -> Option<&RegexSearch> { - self.dfas.as_ref() + pub fn dfas(&mut self) -> Option<&mut RegexSearch> { + self.dfas.as_mut() } /// Search regex text if a search is active. @@ -637,7 +637,7 @@ impl<'a, N: Notify + 'a, T: EventListener> input::ActionContext<T> for ActionCon fn search_next(&mut self, origin: Point, direction: Direction, side: Side) -> Option<Match> { self.search_state .dfas - .as_ref() + .as_mut() .and_then(|dfas| self.terminal.search_next(dfas, origin, direction, side, None)) } @@ -913,7 +913,7 @@ impl<'a, N: Notify + 'a, T: EventListener> ActionContext<'a, N, T> { /// Jump to the first regex match from the search origin. fn goto_match(&mut self, mut limit: Option<usize>) { - let dfas = match &self.search_state.dfas { + let dfas = match &mut self.search_state.dfas { Some(dfas) => dfas, None => return, }; diff --git a/alacritty/src/window_context.rs b/alacritty/src/window_context.rs index 332bdcd9..06dd68d6 100644 --- a/alacritty/src/window_context.rs +++ b/alacritty/src/window_context.rs @@ -398,7 +398,7 @@ impl WindowContext { scheduler, &self.message_buffer, &self.config, - &self.search_state, + &mut self.search_state, ); } diff --git a/alacritty_terminal/src/term/search.rs b/alacritty_terminal/src/term/search.rs index 8707de68..da994eec 100644 --- a/alacritty_terminal/src/term/search.rs +++ b/alacritty_terminal/src/term/search.rs @@ -1,10 +1,11 @@ use std::cmp::max; +use std::error::Error; use std::mem; use std::ops::RangeInclusive; -pub use regex_automata::dfa::dense::BuildError; -use regex_automata::dfa::dense::{Builder, Config, DFA}; -use regex_automata::dfa::Automaton; +use log::{debug, warn}; +use regex_automata::hybrid::dfa::{Builder, Cache, Config, DFA}; +pub use regex_automata::hybrid::BuildError; use regex_automata::nfa::thompson::Config as ThompsonConfig; use regex_automata::util::syntax::Config as SyntaxConfig; use regex_automata::{Anchored, Input}; @@ -17,38 +18,59 @@ use crate::term::Term; /// Used to match equal brackets, when performing a bracket-pair selection. const BRACKET_PAIRS: [(char, char); 4] = [('(', ')'), ('[', ']'), ('{', '}'), ('<', '>')]; -/// Maximum DFA size to prevent pathological regexes taking down the entire system. -const MAX_DFA_SIZE: usize = 100_000_000; - pub type Match = RangeInclusive<Point>; /// Terminal regex search state. #[derive(Clone, Debug)] pub struct RegexSearch { - dfa: DFA<Vec<u32>>, - rdfa: DFA<Vec<u32>>, + fdfa: LazyDfa, + rdfa: LazyDfa, } impl RegexSearch { /// Build the forward and backward search DFAs. pub fn new(search: &str) -> Result<RegexSearch, Box<BuildError>> { // Setup configs for both DFA directions. + // + // Bounds are based on Regex's meta engine: + // https://github.com/rust-lang/regex/blob/061ee815ef2c44101dba7b0b124600fcb03c1912/regex-automata/src/meta/wrappers.rs#L581-L599 let has_uppercase = search.chars().any(|c| c.is_uppercase()); let syntax_config = SyntaxConfig::new().case_insensitive(!has_uppercase); - let config = Config::new().dfa_size_limit(Some(MAX_DFA_SIZE)); + let config = + Config::new().minimum_cache_clear_count(Some(3)).minimum_bytes_per_state(Some(10)); + let max_size = config.get_cache_capacity(); + let mut thompson_config = ThompsonConfig::new().nfa_size_limit(Some(max_size)); // Create Regex DFA for left-to-right search. - let dfa = Builder::new().configure(config.clone()).syntax(syntax_config).build(search)?; + let fdfa = Builder::new() + .configure(config.clone()) + .syntax(syntax_config) + .thompson(thompson_config.clone()) + .build(search)?; // Create Regex DFA for right-to-left search. - let thompson_config = ThompsonConfig::new().reverse(true); + thompson_config = thompson_config.reverse(true); let rdfa = Builder::new() .configure(config) .syntax(syntax_config) .thompson(thompson_config) .build(search)?; - Ok(RegexSearch { dfa, rdfa }) + Ok(RegexSearch { fdfa: fdfa.into(), rdfa: rdfa.into() }) + } +} + +/// Runtime-evaluated DFA. +#[derive(Clone, Debug)] +struct LazyDfa { + dfa: DFA, + cache: Cache, +} + +impl From<DFA> for LazyDfa { + fn from(dfa: DFA) -> Self { + let cache = dfa.create_cache(); + Self { dfa, cache } } } @@ -56,7 +78,7 @@ impl<T> Term<T> { /// Get next search match in the specified direction. pub fn search_next( &self, - regex: &RegexSearch, + regex: &mut RegexSearch, mut origin: Point, direction: Direction, side: Side, @@ -75,7 +97,7 @@ impl<T> Term<T> { /// Find the next match to the right of the origin. fn next_match_right( &self, - regex: &RegexSearch, + regex: &mut RegexSearch, origin: Point, side: Side, max_lines: Option<usize>, @@ -114,7 +136,7 @@ impl<T> Term<T> { /// Find the next match to the left of the origin. fn next_match_left( &self, - regex: &RegexSearch, + regex: &mut RegexSearch, origin: Point, side: Side, max_lines: Option<usize>, @@ -163,14 +185,14 @@ impl<T> Term<T> { /// The origin is always included in the regex. pub fn regex_search_left( &self, - regex: &RegexSearch, + regex: &mut RegexSearch, start: Point, end: Point, ) -> Option<Match> { // Find start and end of match. - let match_start = self.regex_search(start, end, Direction::Left, false, ®ex.rdfa)?; + let match_start = self.regex_search(start, end, Direction::Left, false, &mut regex.rdfa)?; let match_end = - self.regex_search(match_start, start, Direction::Right, true, ®ex.dfa)?; + self.regex_search(match_start, start, Direction::Right, true, &mut regex.fdfa)?; Some(match_start..=match_end) } @@ -180,14 +202,14 @@ impl<T> Term<T> { /// The origin is always included in the regex. pub fn regex_search_right( &self, - regex: &RegexSearch, + regex: &mut RegexSearch, start: Point, end: Point, ) -> Option<Match> { // Find start and end of match. - let match_end = self.regex_search(start, end, Direction::Right, false, ®ex.dfa)?; + let match_end = self.regex_search(start, end, Direction::Right, false, &mut regex.fdfa)?; let match_start = - self.regex_search(match_end, start, Direction::Left, true, ®ex.rdfa)?; + self.regex_search(match_end, start, Direction::Left, true, &mut regex.rdfa)?; Some(match_start..=match_end) } @@ -201,8 +223,29 @@ impl<T> Term<T> { end: Point, direction: Direction, anchored: bool, - regex: &impl Automaton, + regex: &mut LazyDfa, ) -> Option<Point> { + match self.regex_search_internal(start, end, direction, anchored, regex) { + Ok(regex_match) => regex_match, + Err(err) => { + warn!("Regex exceeded complexity limit"); + debug!(" {err}"); + None + }, + } + } + + /// Find the next regex match. + /// + /// To automatically log regex complexity errors, use [`Self::regex_search`] instead. + fn regex_search_internal( + &self, + start: Point, + end: Point, + direction: Direction, + anchored: bool, + regex: &mut LazyDfa, + ) -> Result<Option<Point>, Box<dyn Error>> { let topmost_line = self.topmost_line(); let screen_lines = self.screen_lines() as i32; let last_column = self.last_column(); @@ -216,8 +259,7 @@ impl<T> Term<T> { // Get start state for the DFA. let regex_anchored = if anchored { Anchored::Yes } else { Anchored::No }; let input = Input::new(&[]).anchored(regex_anchored); - let start_state = regex.start_state_forward(&input).unwrap(); - let mut state = start_state; + let mut state = regex.dfa.start_state_forward(&mut regex.cache, &input).unwrap(); let mut iter = self.grid.iter_from(start); let mut last_wrapped = false; @@ -244,19 +286,18 @@ impl<T> Term<T> { Direction::Left => buf[utf8_len - i - 1], }; - // Since we get the state from the DFA, it doesn't need to be checked. - state = unsafe { regex.next_state_unchecked(state, byte) }; + state = regex.dfa.next_state(&mut regex.cache, state, byte)?; // Matches require one additional BYTE of lookahead, so we check the match state for // the first byte of every new character to determine if the last character was a // match. - if i == 0 && regex.is_match_state(state) { + if i == 0 && state.is_match() { regex_match = Some(last_point); } } // Abort on dead states. - if regex.is_dead_state(state) { + if state.is_dead() { break; } @@ -264,8 +305,8 @@ impl<T> Term<T> { if point == end || done { // When reaching the end-of-input, we need to notify the parser that no look-ahead // is possible and check if the current state is still a match. - state = regex.next_eoi_state(state); - if regex.is_match_state(state) { + state = regex.dfa.next_eoi_state(&mut regex.cache, state)?; + if state.is_match() { regex_match = Some(point); } @@ -303,12 +344,12 @@ impl<T> Term<T> { None => { // When reaching the end-of-input, we need to notify the parser that no // look-ahead is possible and check if the current state is still a match. - state = regex.next_eoi_state(state); - if regex.is_match_state(state) { + state = regex.dfa.next_eoi_state(&mut regex.cache, state)?; + if state.is_match() { regex_match = Some(last_point); } - state = start_state; + state = regex.dfa.start_state_forward(&mut regex.cache, &input)?; }, } } @@ -316,7 +357,7 @@ impl<T> Term<T> { last_wrapped = wrapped; } - regex_match + Ok(regex_match) } /// Advance a grid iterator over fullwidth characters. @@ -478,7 +519,7 @@ pub struct RegexIter<'a, T> { point: Point, end: Point, direction: Direction, - regex: &'a RegexSearch, + regex: &'a mut RegexSearch, term: &'a Term<T>, done: bool, } @@ -489,7 +530,7 @@ impl<'a, T> RegexIter<'a, T> { end: Point, direction: Direction, term: &'a Term<T>, - regex: &'a RegexSearch, + regex: &'a mut RegexSearch, ) -> Self { Self { point: start, done: false, end, direction, term, regex } } @@ -505,7 +546,7 @@ impl<'a, T> RegexIter<'a, T> { } /// Get the next match in the specified direction. - fn next_match(&self) -> Option<Match> { + fn next_match(&mut self) -> Option<Match> { match self.direction { Direction::Right => self.term.regex_search_right(self.regex, self.point, self.end), Direction::Left => self.term.regex_search_left(self.regex, self.point, self.end), @@ -561,12 +602,12 @@ mod tests { "); // Check regex across wrapped and unwrapped lines. - let regex = RegexSearch::new("Ala.*123").unwrap(); + let mut regex = RegexSearch::new("Ala.*123").unwrap(); let start = Point::new(Line(1), Column(0)); let end = Point::new(Line(4), Column(2)); let match_start = Point::new(Line(1), Column(0)); let match_end = Point::new(Line(2), Column(2)); - assert_eq!(term.regex_search_right(®ex, start, end), Some(match_start..=match_end)); + assert_eq!(term.regex_search_right(&mut regex, start, end), Some(match_start..=match_end)); } #[test] @@ -581,12 +622,12 @@ mod tests { "); // Check regex across wrapped and unwrapped lines. - let regex = RegexSearch::new("Ala.*123").unwrap(); + let mut regex = RegexSearch::new("Ala.*123").unwrap(); let start = Point::new(Line(4), Column(2)); let end = Point::new(Line(1), Column(0)); let match_start = Point::new(Line(1), Column(0)); let match_end = Point::new(Line(2), Column(2)); - assert_eq!(term.regex_search_left(®ex, start, end), Some(match_start..=match_end)); + assert_eq!(term.regex_search_left(&mut regex, start, end), Some(match_start..=match_end)); } #[test] @@ -598,16 +639,16 @@ mod tests { "); // Greedy stopped at linebreak. - let regex = RegexSearch::new("Ala.*critty").unwrap(); + let mut regex = RegexSearch::new("Ala.*critty").unwrap(); let start = Point::new(Line(0), Column(0)); let end = Point::new(Line(0), Column(25)); - assert_eq!(term.regex_search_right(®ex, start, end), Some(start..=end)); + assert_eq!(term.regex_search_right(&mut regex, start, end), Some(start..=end)); // Greedy stopped at dead state. - let regex = RegexSearch::new("Ala[^y]*critty").unwrap(); + let mut regex = RegexSearch::new("Ala[^y]*critty").unwrap(); let start = Point::new(Line(0), Column(0)); let end = Point::new(Line(0), Column(15)); - assert_eq!(term.regex_search_right(®ex, start, end), Some(start..=end)); + assert_eq!(term.regex_search_right(&mut regex, start, end), Some(start..=end)); } #[test] @@ -619,10 +660,10 @@ mod tests { third\ "); - let regex = RegexSearch::new("nothing").unwrap(); + let mut regex = RegexSearch::new("nothing").unwrap(); let start = Point::new(Line(0), Column(0)); let end = Point::new(Line(2), Column(4)); - assert_eq!(term.regex_search_right(®ex, start, end), None); + assert_eq!(term.regex_search_right(&mut regex, start, end), None); } #[test] @@ -634,10 +675,10 @@ mod tests { third\ "); - let regex = RegexSearch::new("nothing").unwrap(); + let mut regex = RegexSearch::new("nothing").unwrap(); let start = Point::new(Line(2), Column(4)); let end = Point::new(Line(0), Column(0)); - assert_eq!(term.regex_search_left(®ex, start, end), None); + assert_eq!(term.regex_search_left(&mut regex, start, end), None); } #[test] @@ -649,12 +690,12 @@ mod tests { "); // Make sure the cell containing the linebreak is not skipped. - let regex = RegexSearch::new("te.*123").unwrap(); + let mut regex = RegexSearch::new("te.*123").unwrap(); let start = Point::new(Line(1), Column(0)); let end = Point::new(Line(0), Column(0)); let match_start = Point::new(Line(0), Column(0)); let match_end = Point::new(Line(0), Column(9)); - assert_eq!(term.regex_search_left(®ex, start, end), Some(match_start..=match_end)); + assert_eq!(term.regex_search_left(&mut regex, start, end), Some(match_start..=match_end)); } #[test] @@ -666,11 +707,11 @@ mod tests { "); // Make sure the cell containing the linebreak is not skipped. - let regex = RegexSearch::new("te.*123").unwrap(); + let mut regex = RegexSearch::new("te.*123").unwrap(); let start = Point::new(Line(0), Column(2)); let end = Point::new(Line(1), Column(9)); let match_start = Point::new(Line(1), Column(0)); - assert_eq!(term.regex_search_right(®ex, start, end), Some(match_start..=end)); + assert_eq!(term.regex_search_right(&mut regex, start, end), Some(match_start..=end)); } #[test] @@ -678,10 +719,10 @@ mod tests { let term = mock_term("alacritty"); // Make sure dead state cell is skipped when reversing. - let regex = RegexSearch::new("alacrit").unwrap(); + let mut regex = RegexSearch::new("alacrit").unwrap(); let start = Point::new(Line(0), Column(0)); let end = Point::new(Line(0), Column(6)); - assert_eq!(term.regex_search_right(®ex, start, end), Some(start..=end)); + assert_eq!(term.regex_search_right(&mut regex, start, end), Some(start..=end)); } #[test] @@ -689,68 +730,68 @@ mod tests { let term = mock_term("zooo lense"); // Make sure the reverse DFA operates the same as a forward DFA. - let regex = RegexSearch::new("zoo").unwrap(); + let mut regex = RegexSearch::new("zoo").unwrap(); let start = Point::new(Line(0), Column(9)); let end = Point::new(Line(0), Column(0)); let match_start = Point::new(Line(0), Column(0)); let match_end = Point::new(Line(0), Column(2)); - assert_eq!(term.regex_search_left(®ex, start, end), Some(match_start..=match_end)); + assert_eq!(term.regex_search_left(&mut regex, start, end), Some(match_start..=match_end)); } #[test] fn multibyte_unicode() { let term = mock_term("testвосибing"); - let regex = RegexSearch::new("te.*ing").unwrap(); + let mut regex = RegexSearch::new("te.*ing").unwrap(); let start = Point::new(Line(0), Column(0)); let end = Point::new(Line(0), Column(11)); - assert_eq!(term.regex_search_right(®ex, start, end), Some(start..=end)); + assert_eq!(term.regex_search_right(&mut regex, start, end), Some(start..=end)); - let regex = RegexSearch::new("te.*ing").unwrap(); + let mut regex = RegexSearch::new("te.*ing").unwrap(); let start = Point::new(Line(0), Column(11)); let end = Point::new(Line(0), Column(0)); - assert_eq!(term.regex_search_left(®ex, start, end), Some(end..=start)); + assert_eq!(term.regex_search_left(&mut regex, start, end), Some(end..=start)); } #[test] fn end_on_multibyte_unicode() { let term = mock_term("testвосиб"); - let regex = RegexSearch::new("te.*и").unwrap(); + let mut regex = RegexSearch::new("te.*и").unwrap(); let start = Point::new(Line(0), Column(0)); let end = Point::new(Line(0), Column(8)); let match_end = Point::new(Line(0), Column(7)); - assert_eq!(term.regex_search_right(®ex, start, end), Some(start..=match_end)); + assert_eq!(term.regex_search_right(&mut regex, start, end), Some(start..=match_end)); } #[test] fn fullwidth() { let term = mock_term("a🦇x🦇"); - let regex = RegexSearch::new("[^ ]*").unwrap(); + let mut regex = RegexSearch::new("[^ ]*").unwrap(); let start = Point::new(Line(0), Column(0)); let end = Point::new(Line(0), Column(5)); - assert_eq!(term.regex_search_right(®ex, start, end), Some(start..=end)); + assert_eq!(term.regex_search_right(&mut regex, start, end), Some(start..=end)); - let regex = RegexSearch::new("[^ ]*").unwrap(); + let mut regex = RegexSearch::new("[^ ]*").unwrap(); let start = Point::new(Line(0), Column(5)); let end = Point::new(Line(0), Column(0)); - assert_eq!(term.regex_search_left(®ex, start, end), Some(end..=start)); + assert_eq!(term.regex_search_left(&mut regex, start, end), Some(end..=start)); } #[test] fn singlecell_fullwidth() { let term = mock_term("🦇"); - let regex = RegexSearch::new("🦇").unwrap(); + let mut regex = RegexSearch::new("🦇").unwrap(); let start = Point::new(Line(0), Column(0)); let end = Point::new(Line(0), Column(1)); - assert_eq!(term.regex_search_right(®ex, start, end), Some(start..=end)); + assert_eq!(term.regex_search_right(&mut regex, start, end), Some(start..=end)); - let regex = RegexSearch::new("🦇").unwrap(); + let mut regex = RegexSearch::new("🦇").unwrap(); let start = Point::new(Line(0), Column(1)); let end = Point::new(Line(0), Column(0)); - assert_eq!(term.regex_search_left(®ex, start, end), Some(end..=start)); + assert_eq!(term.regex_search_left(&mut regex, start, end), Some(end..=start)); } #[test] @@ -761,16 +802,16 @@ mod tests { let end = Point::new(Line(0), Column(4)); // Ensure ending without a match doesn't loop indefinitely. - let regex = RegexSearch::new("x").unwrap(); - assert_eq!(term.regex_search_right(®ex, start, end), None); + let mut regex = RegexSearch::new("x").unwrap(); + assert_eq!(term.regex_search_right(&mut regex, start, end), None); - let regex = RegexSearch::new("x").unwrap(); + let mut regex = RegexSearch::new("x").unwrap(); let match_end = Point::new(Line(0), Column(5)); - assert_eq!(term.regex_search_right(®ex, start, match_end), None); + assert_eq!(term.regex_search_right(&mut regex, start, match_end), None); // Ensure match is captured when only partially inside range. - let regex = RegexSearch::new("jarr🦇").unwrap(); - assert_eq!(term.regex_search_right(®ex, start, end), Some(start..=match_end)); + let mut regex = RegexSearch::new("jarr🦇").unwrap(); + assert_eq!(term.regex_search_right(&mut regex, start, end), Some(start..=match_end)); } #[test] @@ -781,17 +822,17 @@ mod tests { xxx\ "); - let regex = RegexSearch::new("xxx").unwrap(); + let mut regex = RegexSearch::new("xxx").unwrap(); let start = Point::new(Line(0), Column(2)); let end = Point::new(Line(1), Column(2)); let match_start = Point::new(Line(1), Column(0)); - assert_eq!(term.regex_search_right(®ex, start, end), Some(match_start..=end)); + assert_eq!(term.regex_search_right(&mut regex, start, end), Some(match_start..=end)); - let regex = RegexSearch::new("xxx").unwrap(); + let mut regex = RegexSearch::new("xxx").unwrap(); let start = Point::new(Line(1), Column(0)); let end = Point::new(Line(0), Column(0)); let match_end = Point::new(Line(0), Column(2)); - assert_eq!(term.regex_search_left(®ex, start, end), Some(end..=match_end)); + assert_eq!(term.regex_search_left(&mut regex, start, end), Some(end..=match_end)); } #[test] @@ -802,19 +843,19 @@ mod tests { xx🦇\ "); - let regex = RegexSearch::new("🦇x").unwrap(); + let mut regex = RegexSearch::new("🦇x").unwrap(); let start = Point::new(Line(0), Column(0)); let end = Point::new(Line(1), Column(3)); let match_start = Point::new(Line(0), Column(0)); let match_end = Point::new(Line(0), Column(2)); - assert_eq!(term.regex_search_right(®ex, start, end), Some(match_start..=match_end)); + assert_eq!(term.regex_search_right(&mut regex, start, end), Some(match_start..=match_end)); - let regex = RegexSearch::new("x🦇").unwrap(); + let mut regex = RegexSearch::new("x🦇").unwrap(); let start = Point::new(Line(1), Column(2)); let end = Point::new(Line(0), Column(0)); let match_start = Point::new(Line(1), Column(1)); let match_end = Point::new(Line(1), Column(3)); - assert_eq!(term.regex_search_left(®ex, start, end), Some(match_start..=match_end)); + assert_eq!(term.regex_search_left(&mut regex, start, end), Some(match_start..=match_end)); } #[test] @@ -826,33 +867,33 @@ mod tests { "); term.grid[Line(0)][Column(3)].flags.insert(Flags::LEADING_WIDE_CHAR_SPACER); - let regex = RegexSearch::new("🦇x").unwrap(); + let mut regex = RegexSearch::new("🦇x").unwrap(); let start = Point::new(Line(0), Column(0)); let end = Point::new(Line(1), Column(3)); let match_start = Point::new(Line(0), Column(3)); let match_end = Point::new(Line(1), Column(2)); - assert_eq!(term.regex_search_right(®ex, start, end), Some(match_start..=match_end)); + assert_eq!(term.regex_search_right(&mut regex, start, end), Some(match_start..=match_end)); - let regex = RegexSearch::new("🦇x").unwrap(); + let mut regex = RegexSearch::new("🦇x").unwrap(); let start = Point::new(Line(1), Column(3)); let end = Point::new(Line(0), Column(0)); let match_start = Point::new(Line(0), Column(3)); let match_end = Point::new(Line(1), Column(2)); - assert_eq!(term.regex_search_left(®ex, start, end), Some(match_start..=match_end)); + assert_eq!(term.regex_search_left(&mut regex, start, end), Some(match_start..=match_end)); - let regex = RegexSearch::new("x🦇").unwrap(); + let mut regex = RegexSearch::new("x🦇").unwrap(); let start = Point::new(Line(0), Column(0)); let end = Point::new(Line(1), Column(3)); let match_start = Point::new(Line(0), Column(2)); let match_end = Point::new(Line(1), Column(1)); - assert_eq!(term.regex_search_right(®ex, start, end), Some(match_start..=match_end)); + assert_eq!(term.regex_search_right(&mut regex, start, end), Some(match_start..=match_end)); - let regex = RegexSearch::new("x🦇").unwrap(); + let mut regex = RegexSearch::new("x🦇").unwrap(); let start = Point::new(Line(1), Column(3)); let end = Point::new(Line(0), Column(0)); let match_start = Point::new(Line(0), Column(2)); let match_end = Point::new(Line(1), Column(1)); - assert_eq!(term.regex_search_left(®ex, start, end), Some(match_start..=match_end)); + assert_eq!(term.regex_search_left(&mut regex, start, end), Some(match_start..=match_end)); } #[test] @@ -863,12 +904,12 @@ mod tests { term.grid[Line(0)][Column(1)].c = '字'; term.grid[Line(0)][Column(1)].flags = Flags::WIDE_CHAR; - let regex = RegexSearch::new("test").unwrap(); + let mut regex = RegexSearch::new("test").unwrap(); let start = Point::new(Line(0), Column(0)); let end = Point::new(Line(0), Column(1)); - let mut iter = RegexIter::new(start, end, Direction::Right, &term, ®ex); + let mut iter = RegexIter::new(start, end, Direction::Right, &term, &mut regex); assert_eq!(iter.next(), None); } @@ -881,19 +922,34 @@ mod tests { "); // Bottom to top. - let regex = RegexSearch::new("abc").unwrap(); + let mut regex = RegexSearch::new("abc").unwrap(); let start = Point::new(Line(1), Column(0)); let end = Point::new(Line(0), Column(2)); let match_start = Point::new(Line(0), Column(0)); let match_end = Point::new(Line(0), Column(2)); - assert_eq!(term.regex_search_right(®ex, start, end), Some(match_start..=match_end)); + assert_eq!(term.regex_search_right(&mut regex, start, end), Some(match_start..=match_end)); // Top to bottom. - let regex = RegexSearch::new("def").unwrap(); + let mut regex = RegexSearch::new("def").unwrap(); let start = Point::new(Line(0), Column(2)); let end = Point::new(Line(1), Column(0)); let match_start = Point::new(Line(1), Column(0)); let match_end = Point::new(Line(1), Column(2)); - assert_eq!(term.regex_search_left(®ex, start, end), Some(match_start..=match_end)); + assert_eq!(term.regex_search_left(&mut regex, start, end), Some(match_start..=match_end)); + } + + #[test] + fn nfa_compile_error() { + assert!(RegexSearch::new("[0-9A-Za-z]{9999999}").is_err()); + } + + #[test] + fn runtime_cache_error() { + let term = mock_term(&str::repeat("i", 9999)); + + let mut regex = RegexSearch::new("[0-9A-Za-z]{9999}").unwrap(); + let start = Point::new(Line(0), Column(0)); + let end = Point::new(Line(0), Column(9999)); + assert_eq!(term.regex_search_right(&mut regex, start, end), None); } } |