aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/lib.rs1
-rw-r--r--src/term/mod.rs194
-rw-r--r--src/url.rs230
3 files changed, 244 insertions, 181 deletions
diff --git a/src/lib.rs b/src/lib.rs
index d6873f96..f99510f2 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -46,6 +46,7 @@ pub mod term;
pub mod tty;
pub mod util;
pub mod window;
+mod url;
use std::ops::Mul;
diff --git a/src/term/mod.rs b/src/term/mod.rs
index dcde62c8..dadfaf78 100644
--- a/src/term/mod.rs
+++ b/src/term/mod.rs
@@ -20,7 +20,6 @@ use std::time::{Duration, Instant};
use arraydeque::ArrayDeque;
use unicode_width::UnicodeWidthChar;
-use url::Url;
use font::{self, Size};
use crate::ansi::{self, Color, NamedColor, Attr, Handler, CharsetIndex, StandardCharset, CursorStyle};
@@ -32,17 +31,13 @@ use crate::{MouseCursor, Rgb};
use copypasta::{Clipboard, Load, Store};
use crate::input::FONT_SIZE_STEP;
use crate::logging::LoggerProxy;
+use crate::url::UrlParser;
pub mod cell;
pub mod color;
pub use self::cell::Cell;
use self::cell::LineLength;
-// See https://tools.ietf.org/html/rfc3987#page-13
-const URL_SEPARATOR_CHARS: [char; 10] = ['<', '>', '"', ' ', '{', '}', '|', '\\', '^', '`'];
-const URL_DENY_END_CHARS: [char; 7] = ['.', ',', ';', ':', '?', '!', '/'];
-const URL_SCHEMES: [&str; 8] = ["http", "https", "mailto", "news", "file", "git", "ssh", "ftp"];
-
/// A type that can expand a given point to a region
///
/// Usually this is implemented for some 2-D array type since
@@ -112,52 +107,19 @@ impl Search for Term {
point.col += 1;
let mut iterb = self.grid.iter_from(point);
- // Put all characters until separators into a string
- let mut buf = String::new();
+ // Find URLs
+ let mut url_parser = UrlParser::new();
while let Some(cell) = iterb.prev() {
- if URL_SEPARATOR_CHARS.contains(&cell.c) {
+ if url_parser.advance_left(cell.c) {
break;
}
- buf.insert(0, cell.c);
}
for cell in iterf {
- if URL_SEPARATOR_CHARS.contains(&cell.c) {
+ if url_parser.advance_right(cell.c) {
break;
}
- buf.push(cell.c);
- }
-
- // Remove all leading '('
- while buf.starts_with('(') {
- buf.remove(0);
- }
-
- // Remove all ')' from end of URLs without matching '('
- let open_count = buf.chars().filter(|&c| c == '(').count();
- let closed_count = buf.chars().filter(|&c| c == ')').count();
- let mut parens_diff = closed_count - open_count;
-
- // Remove all characters which aren't allowed at the end of a URL
- while !buf.is_empty()
- && (URL_DENY_END_CHARS.contains(&buf.chars().last().unwrap())
- || (parens_diff > 0 && buf.ends_with(')')))
- {
- if buf.pop().unwrap() == ')' {
- parens_diff -= 1;
- }
- }
-
- // Check if string is valid url
- match Url::parse(&buf) {
- Ok(url) => {
- if URL_SCHEMES.contains(&url.scheme()) {
- Some(buf)
- } else {
- None
- }
- }
- Err(_) => None,
}
+ url_parser.url()
}
}
@@ -1163,6 +1125,12 @@ impl Term {
&self.grid
}
+ // Mutable access for swapping out the grid during tests
+ #[cfg(test)]
+ pub fn grid_mut(&mut self) -> &mut Grid<Cell> {
+ &mut self.grid
+ }
+
/// Iterate over the *renderable* cells in the terminal
///
/// A renderable cell is any cell which has content other than the default
@@ -2147,7 +2115,7 @@ mod tests {
use serde_json;
use super::{Cell, Term, SizeInfo};
- use crate::term::{cell, Search};
+ use crate::term::cell;
use crate::grid::{Grid, Scroll};
use crate::index::{Point, Line, Column, Side};
@@ -2387,142 +2355,6 @@ mod tests {
scrolled_grid.scroll_display(Scroll::Top);
assert_eq!(term.grid, scrolled_grid);
}
-
- // `((ftp://a.de))` -> `Some("ftp://a.de")`
- #[test]
- fn url_trim_unmatched_parens() {
- let size = SizeInfo {
- width: 21.0,
- height: 51.0,
- cell_width: 3.0,
- cell_height: 3.0,
- padding_x: 0.0,
- padding_y: 0.0,
- dpr: 1.0,
- };
- let mut term = Term::new(&Default::default(), size);
- let mut grid: Grid<Cell> = Grid::new(Line(1), Column(15), 0, Cell::default());
- grid[Line(0)][Column(0)].c = '(';
- grid[Line(0)][Column(1)].c = '(';
- grid[Line(0)][Column(2)].c = 'f';
- grid[Line(0)][Column(3)].c = 't';
- grid[Line(0)][Column(4)].c = 'p';
- grid[Line(0)][Column(5)].c = ':';
- grid[Line(0)][Column(6)].c = '/';
- grid[Line(0)][Column(7)].c = '/';
- grid[Line(0)][Column(8)].c = 'a';
- grid[Line(0)][Column(9)].c = '.';
- grid[Line(0)][Column(10)].c = 'd';
- grid[Line(0)][Column(11)].c = 'e';
- grid[Line(0)][Column(12)].c = ')';
- grid[Line(0)][Column(13)].c = ')';
- mem::swap(&mut term.grid, &mut grid);
-
- // Search for URL in grid
- let url = term.url_search(Point::new(0, Column(4)));
-
- assert_eq!(url, Some("ftp://a.de".into()));
- }
-
- // `ftp://a.de/()` -> `Some("ftp://a.de/()")`
- #[test]
- fn url_allow_matching_parens() {
- let size = SizeInfo {
- width: 21.0,
- height: 51.0,
- cell_width: 3.0,
- cell_height: 3.0,
- padding_x: 0.0,
- padding_y: 0.0,
- dpr: 1.0,
- };
- let mut term = Term::new(&Default::default(), size);
- let mut grid: Grid<Cell> = Grid::new(Line(1), Column(15), 0, Cell::default());
- grid[Line(0)][Column(0)].c = 'f';
- grid[Line(0)][Column(1)].c = 't';
- grid[Line(0)][Column(2)].c = 'p';
- grid[Line(0)][Column(3)].c = ':';
- grid[Line(0)][Column(4)].c = '/';
- grid[Line(0)][Column(5)].c = '/';
- grid[Line(0)][Column(6)].c = 'a';
- grid[Line(0)][Column(7)].c = '.';
- grid[Line(0)][Column(8)].c = 'd';
- grid[Line(0)][Column(9)].c = 'e';
- grid[Line(0)][Column(10)].c = '/';
- grid[Line(0)][Column(11)].c = '(';
- grid[Line(0)][Column(12)].c = ')';
- mem::swap(&mut term.grid, &mut grid);
-
- // Search for URL in grid
- let url = term.url_search(Point::new(0, Column(4)));
-
- assert_eq!(url, Some("ftp://a.de/()".into()));
- }
-
- // `aze` -> `None`
- #[test]
- fn url_skip_invalid() {
- let size = SizeInfo {
- width: 21.0,
- height: 51.0,
- cell_width: 3.0,
- cell_height: 3.0,
- padding_x: 0.0,
- padding_y: 0.0,
- dpr: 1.0,
- };
- let mut term = Term::new(&Default::default(), size);
- let mut grid: Grid<Cell> = Grid::new(Line(1), Column(15), 0, Cell::default());
- grid[Line(0)][Column(0)].c = 'a';
- grid[Line(0)][Column(1)].c = 'z';
- grid[Line(0)][Column(2)].c = 'e';
- mem::swap(&mut term.grid, &mut grid);
-
- // Search for URL in grid
- let url = term.url_search(Point::new(0, Column(1)));
-
- assert_eq!(url, None);
- }
-
- // `ftp://a.de.,;:)!/?` -> `Some("ftp://a.de")`
- #[test]
- fn url_remove_end_chars() {
- let size = SizeInfo {
- width: 21.0,
- height: 51.0,
- cell_width: 3.0,
- cell_height: 3.0,
- padding_x: 0.0,
- padding_y: 0.0,
- dpr: 1.0,
- };
- let mut term = Term::new(&Default::default(), size);
- let mut grid: Grid<Cell> = Grid::new(Line(1), Column(18), 0, Cell::default());
- grid[Line(0)][Column(0)].c = 'f';
- grid[Line(0)][Column(1)].c = 't';
- grid[Line(0)][Column(2)].c = 'p';
- grid[Line(0)][Column(3)].c = ':';
- grid[Line(0)][Column(4)].c = '/';
- grid[Line(0)][Column(5)].c = '/';
- grid[Line(0)][Column(6)].c = 'a';
- grid[Line(0)][Column(7)].c = '.';
- grid[Line(0)][Column(8)].c = 'd';
- grid[Line(0)][Column(9)].c = 'e';
- grid[Line(0)][Column(10)].c = '.';
- grid[Line(0)][Column(11)].c = ',';
- grid[Line(0)][Column(12)].c = ';';
- grid[Line(0)][Column(13)].c = ':';
- grid[Line(0)][Column(14)].c = ')';
- grid[Line(0)][Column(15)].c = '!';
- grid[Line(0)][Column(16)].c = '/';
- grid[Line(0)][Column(17)].c = '?';
- mem::swap(&mut term.grid, &mut grid);
-
- // Search for URL in grid
- let url = term.url_search(Point::new(0, Column(4)));
-
- assert_eq!(url, Some("ftp://a.de".into()));
- }
}
#[cfg(all(test, feature = "bench"))]
diff --git a/src/url.rs b/src/url.rs
new file mode 100644
index 00000000..385b484b
--- /dev/null
+++ b/src/url.rs
@@ -0,0 +1,230 @@
+// Copyright 2016 Joe Wilm, The Alacritty Project Contributors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use url::Url;
+
+// See https://tools.ietf.org/html/rfc3987#page-13
+const URL_SEPARATOR_CHARS: [char; 10] = ['<', '>', '"', ' ', '{', '}', '|', '\\', '^', '`'];
+const URL_DENY_END_CHARS: [char; 8] = ['.', ',', ';', ':', '?', '!', '/', '('];
+const URL_SCHEMES: [&str; 8] = [
+ "http", "https", "mailto", "news", "file", "git", "ssh", "ftp",
+];
+
+// Parser for streaming inside-out detection of URLs.
+pub struct UrlParser {
+ state: String,
+}
+
+impl UrlParser {
+ pub fn new() -> Self {
+ UrlParser {
+ state: String::new(),
+ }
+ }
+
+ /// Advance the parser one character to the left.
+ pub fn advance_left(&mut self, c: char) -> bool {
+ self.advance(c, 0)
+ }
+
+ /// Advance the parser one character to the right.
+ pub fn advance_right(&mut self, c: char) -> bool {
+ self.advance(c, self.state.len())
+ }
+
+ /// Returns the URL if the parser has found any.
+ pub fn url(mut self) -> Option<String> {
+ // Remove non-alphabetical characters before scheme
+ if let Some(index) = self.state.find("://") {
+ for i in (0..index - 1).rev() {
+ match self.state.chars().nth(i).unwrap() {
+ 'a'...'z' | 'A'...'Z' => (),
+ _ => {
+ self.state = self.state.split_off(i + 1);
+ break;
+ }
+ }
+ }
+ }
+
+ // Remove non-matching parenthesis and brackets
+ let mut open_parens_count: isize = 0;
+ let mut open_bracks_count: isize = 0;
+ for (i, c) in self.state.chars().enumerate() {
+ match c {
+ '(' => open_parens_count += 1,
+ ')' if open_parens_count > 0 => open_parens_count -= 1,
+ '[' => open_bracks_count += 1,
+ ']' if open_bracks_count > 0 => open_bracks_count -= 1,
+ ')' | ']' => {
+ self.state.truncate(i);
+ break;
+ }
+ _ => (),
+ }
+ }
+
+ // Track number of quotes
+ let mut num_quotes = self.state.chars().filter(|&c| c == '\'').count();
+
+ // Remove all characters which aren't allowed at the end of a URL
+ while !self.state.is_empty()
+ && (URL_DENY_END_CHARS.contains(&self.state.chars().last().unwrap())
+ || (num_quotes % 2 != 0 && self.state.ends_with('\''))
+ || self.state.ends_with("''")
+ || self.state.ends_with("()"))
+ {
+ if self.state.pop().unwrap() == '\'' {
+ num_quotes -= 1;
+ }
+ }
+
+ // Check if string is valid url
+ match Url::parse(&self.state) {
+ Ok(url) => {
+ if URL_SCHEMES.contains(&url.scheme()) {
+ Some(self.state)
+ } else {
+ None
+ }
+ }
+ Err(_) => None,
+ }
+ }
+
+ fn advance(&mut self, c: char, pos: usize) -> bool {
+ if URL_SEPARATOR_CHARS.contains(&c)
+ || (c >= '\u{00}' && c <= '\u{1F}')
+ || (c >= '\u{7F}' && c <= '\u{9F}')
+ {
+ true
+ } else {
+ self.state.insert(pos, c);
+ false
+ }
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use std::mem;
+
+ use crate::grid::Grid;
+ use crate::index::{Column, Line, Point};
+ use crate::term::{Cell, Search, SizeInfo, Term};
+
+ fn url_create_term(input: &str) -> Term {
+ let size = SizeInfo {
+ width: 21.0,
+ height: 51.0,
+ cell_width: 3.0,
+ cell_height: 3.0,
+ padding_x: 0.0,
+ padding_y: 0.0,
+ dpr: 1.0,
+ };
+
+ let mut term = Term::new(&Default::default(), size);
+ let mut grid: Grid<Cell> = Grid::new(Line(1), Column(input.len()), 0, Cell::default());
+
+ for (i, c) in input.chars().enumerate() {
+ grid[Line(0)][Column(i)].c = c;
+ }
+
+ mem::swap(term.grid_mut(), &mut grid);
+
+ term
+ }
+
+ fn url_test(input: &str, expected: &str, click_index: usize) {
+ let term = url_create_term(input);
+
+ let url = term.url_search(Point::new(0, Column(click_index)));
+
+ assert_eq!(url, Some(expected.into()));
+ }
+
+ #[test]
+ fn url_skip_invalid() {
+ let term = url_create_term("no url here");
+ let url = term.url_search(Point::new(0, Column(4)));
+ assert_eq!(url, None);
+ }
+
+ #[test]
+ fn url_matching_chars() {
+ url_test("(https://example.org/test(ing))", "https://example.org/test(ing)", 5);
+ url_test("https://example.org/test(ing)", "https://example.org/test(ing)", 5);
+ url_test("((https://example.org))", "https://example.org", 5);
+ url_test(")https://example.org(", "https://example.org", 5);
+ url_test("https://example.org)", "https://example.org", 5);
+ url_test("https://example.org(", "https://example.org", 5);
+ url_test("(https://one.org/)(https://two.org/)", "https://one.org", 5);
+
+ url_test("https://[2001:db8:a0b:12f0::1]:80", "https://[2001:db8:a0b:12f0::1]:80", 5);
+ url_test("([(https://example.org/test(ing))])", "https://example.org/test(ing)", 5);
+ url_test("https://example.org/]()", "https://example.org", 5);
+ url_test("[https://example.org]", "https://example.org", 5);
+
+ url_test("'https://example.org/test'ing'''", "https://example.org/test'ing'", 5);
+ url_test("https://example.org/test'ing'", "https://example.org/test'ing'", 5);
+ url_test("'https://example.org'", "https://example.org", 5);
+ url_test("'https://example.org", "https://example.org", 5);
+ url_test("https://example.org'", "https://example.org", 5);
+ }
+
+ #[test]
+ fn url_detect_end() {
+ url_test("https://example.org/test\u{00}ing", "https://example.org/test", 5);
+ url_test("https://example.org/test\u{1F}ing", "https://example.org/test", 5);
+ url_test("https://example.org/test\u{7F}ing", "https://example.org/test", 5);
+ url_test("https://example.org/test\u{9F}ing", "https://example.org/test", 5);
+ url_test("https://example.org/test\ting", "https://example.org/test", 5);
+ url_test("https://example.org/test ing", "https://example.org/test", 5);
+ }
+
+ #[test]
+ fn url_remove_end_chars() {
+ url_test("https://example.org/test?ing", "https://example.org/test?ing", 5);
+ url_test("https://example.org.,;:)'!/?", "https://example.org", 5);
+ url_test("https://example.org'.", "https://example.org", 5);
+ }
+
+ #[test]
+ fn url_remove_start_chars() {
+ url_test("complicated:https://example.org", "https://example.org", 15);
+ url_test("test.https://example.org", "https://example.org", 10);
+ url_test(",https://example.org", "https://example.org", 5);
+ }
+
+ #[test]
+ fn url_unicode() {
+ url_test("https://xn--example-2b07f.org", "https://xn--example-2b07f.org", 5);
+ url_test("https://example.org/\u{2008A}", "https://example.org/\u{2008A}", 5);
+ url_test("https://example.org/\u{f17c}", "https://example.org/\u{f17c}", 5);
+ url_test("https://üñîçøðé.com/ä", "https://üñîçøðé.com/ä", 5);
+ }
+
+ #[test]
+ fn url_schemes() {
+ url_test("mailto://example.org", "mailto://example.org", 5);
+ url_test("https://example.org", "https://example.org", 5);
+ url_test("http://example.org", "http://example.org", 5);
+ url_test("news://example.org", "news://example.org", 5);
+ url_test("file://example.org", "file://example.org", 5);
+ url_test("git://example.org", "git://example.org", 5);
+ url_test("ssh://example.org", "ssh://example.org", 5);
+ url_test("ftp://example.org", "ftp://example.org", 5);
+ }
+}