aboutsummaryrefslogtreecommitdiff
path: root/src/url.rs
diff options
context:
space:
mode:
authorChristian Duerr <chrisduerr@users.noreply.github.com>2019-03-23 11:56:46 +0000
committerGitHub <noreply@github.com>2019-03-23 11:56:46 +0000
commitd8272662db4a4dc1ef58b8379dc88162066a3241 (patch)
tree8a2fbbac88d867252fd825e3959f470abb89517f /src/url.rs
parentd29c30900743f3e7f864f7951edf34c7423accd8 (diff)
downloadalacritty-d8272662db4a4dc1ef58b8379dc88162066a3241.tar.gz
alacritty-d8272662db4a4dc1ef58b8379dc88162066a3241.zip
Fix URL parsing with double-width characters
Since double-width characters are followed by an empty cell containing only the `WIDE_CELL_SPACER` flag, the URL parser would stop once encountering the cell after a double-width character. By skipping cells that contain the `WIDE_CELL_SPACER` flag and incrementing the URL length by unicode width of the character instead of cell count, this can be resolved for both URL launching and URL highlighting. Fixes #2158.
Diffstat (limited to 'src/url.rs')
-rw-r--r--src/url.rs83
1 files changed, 75 insertions, 8 deletions
diff --git a/src/url.rs b/src/url.rs
index a493dd37..59207499 100644
--- a/src/url.rs
+++ b/src/url.rs
@@ -14,6 +14,8 @@
use url;
+use crate::term::cell::{Cell, Flags};
+
// See https://tools.ietf.org/html/rfc3987#page-13
const URL_SEPARATOR_CHARS: [char; 10] = ['<', '>', '"', ' ', '{', '}', '|', '\\', '^', '`'];
const URL_DENY_END_CHARS: [char; 8] = ['.', ',', ';', ':', '?', '!', '/', '('];
@@ -26,12 +28,14 @@ const URL_SCHEMES: [&str; 8] = [
pub struct Url {
pub text: String,
pub origin: usize,
+ pub len: usize,
}
/// Parser for streaming inside-out detection of URLs.
pub struct UrlParser {
state: String,
origin: usize,
+ len: usize,
}
impl UrlParser {
@@ -39,22 +43,40 @@ impl UrlParser {
UrlParser {
state: String::new(),
origin: 0,
+ len: 0,
}
}
/// Advance the parser one character to the left.
- pub fn advance_left(&mut self, c: char) -> bool {
- if self.advance(c, 0) {
+ pub fn advance_left(&mut self, cell: &Cell) -> bool {
+ if cell.flags.contains(Flags::WIDE_CHAR_SPACER) {
+ self.origin += 1;
+ self.len += 1;
+ return false;
+ }
+
+ if self.advance(cell.c, 0) {
true
} else {
self.origin += 1;
+ self.len += 1;
false
}
}
/// Advance the parser one character to the right.
- pub fn advance_right(&mut self, c: char) -> bool {
- self.advance(c, self.state.len())
+ pub fn advance_right(&mut self, cell: &Cell) -> bool {
+ if cell.flags.contains(Flags::WIDE_CHAR_SPACER) {
+ self.len += 1;
+ return false;
+ }
+
+ if self.advance(cell.c, self.state.len()) {
+ true
+ } else {
+ self.len += 1;
+ false
+ }
}
/// Returns the URL if the parser has found any.
@@ -116,8 +138,9 @@ impl UrlParser {
Ok(url) => {
if URL_SCHEMES.contains(&url.scheme()) && self.origin > 0 {
Some(Url {
- text: self.state,
origin: self.origin - 1,
+ text: self.state,
+ len: self.len,
})
} else {
None
@@ -144,10 +167,12 @@ impl UrlParser {
mod tests {
use std::mem;
+ use unicode_width::UnicodeWidthChar;
+
use crate::grid::Grid;
use crate::index::{Column, Line, Point};
use crate::term::{Search, SizeInfo, Term};
- use crate::term::cell::Cell;
+ use crate::term::cell::{Cell, Flags};
use crate::message_bar::MessageBuffer;
fn url_create_term(input: &str) -> Term {
@@ -161,11 +186,22 @@ mod tests {
dpr: 1.0,
};
+ let width = input.chars().map(|c| if c.width() == Some(2) { 2 } else { 1 }).sum();
let mut term = Term::new(&Default::default(), size, MessageBuffer::new());
- let mut grid: Grid<Cell> = Grid::new(Line(1), Column(input.len()), 0, Cell::default());
+ let mut grid: Grid<Cell> = Grid::new(Line(1), Column(width), 0, Cell::default());
- for (i, c) in input.chars().enumerate() {
+ let mut i = 0;
+ for c in input.chars() {
grid[Line(0)][Column(i)].c = c;
+
+ if c.width() == Some(2) {
+ grid[Line(0)][Column(i)].flags.insert(Flags::WIDE_CHAR);
+ grid[Line(0)][Column(i + 1)].flags.insert(Flags::WIDE_CHAR_SPACER);
+ grid[Line(0)][Column(i + 1)].c = ' ';
+ i += 1;
+ }
+
+ i += 1;
}
mem::swap(term.grid_mut(), &mut grid);
@@ -199,6 +235,37 @@ mod tests {
let term = url_create_term("https://example.org");
let url = term.url_search(Point::new(0, Column(0)));
assert_eq!(url.map(|u| u.origin), Some(0));
+
+ let term = url_create_term("https://全.org");
+ let url = term.url_search(Point::new(0, Column(10)));
+ assert_eq!(url.map(|u| u.origin), Some(10));
+
+ let term = url_create_term("https://全.org");
+ let url = term.url_search(Point::new(0, Column(8)));
+ assert_eq!(url.map(|u| u.origin), Some(8));
+
+ let term = url_create_term("https://全.org");
+ let url = term.url_search(Point::new(0, Column(9)));
+ assert_eq!(url.map(|u| u.origin), Some(9));
+ }
+
+ #[test]
+ fn url_len() {
+ let term = url_create_term(" test https://example.org ");
+ let url = term.url_search(Point::new(0, Column(10)));
+ assert_eq!(url.map(|u| u.len), Some(19));
+
+ let term = url_create_term("https://全.org");
+ let url = term.url_search(Point::new(0, Column(0)));
+ assert_eq!(url.map(|u| u.len), Some(14));
+
+ let term = url_create_term("https://全.org");
+ let url = term.url_search(Point::new(0, Column(10)));
+ assert_eq!(url.map(|u| u.len), Some(14));
+
+ let term = url_create_term("https://全.org");
+ let url = term.url_search(Point::new(0, Column(9)));
+ assert_eq!(url.map(|u| u.len), Some(14));
}
#[test]