Fix URL parsing with double-width characters

Since double-width characters are followed by an empty cell containing only the `WIDE_CELL_SPACER` flag, the URL parser would stop once encountering the cell after a double-width character. By skipping cells that contain the `WIDE_CELL_SPACER` flag and incrementing the URL length by unicode width of the character instead of cell count, this can be resolved for both URL launching and URL highlighting. Fixes #2158.
author: Christian Duerr <chrisduerr@users.noreply.github.com> 2019-03-23 11:56:46 +0000
committer: GitHub <noreply@github.com> 2019-03-23 11:56:46 +0000
commit: d8272662db4a4dc1ef58b8379dc88162066a3241 (patch)
tree: 8a2fbbac88d867252fd825e3959f470abb89517f /src
parent: d29c30900743f3e7f864f7951edf34c7423accd8 (diff)
download: alacritty-d8272662db4a4dc1ef58b8379dc88162066a3241.tar.gz
alacritty-d8272662db4a4dc1ef58b8379dc88162066a3241.zip
3 files changed, 80 insertions, 13 deletions
diff --git a/src/input.rs b/src/input.rs
index 8e72c8b7..abb98c44 100644
--- a/src/input.rs
+++ b/src/input.rs
@@ -447,7 +447,7 @@ impl<'a, A: ActionContext + 'a> Processor<'a, A> {
             None
         };
 
-        if let Some(Url { text, origin }) = url {
+        if let Some(Url { origin, len, .. }) = url {
             let mouse_cursor = if self.ctx.terminal().mode().intersects(mouse_mode) {
                 MouseCursor::Default
             } else {
@@ -473,9 +473,9 @@ impl<'a, A: ActionContext + 'a> Processor<'a, A> {
             }
 
             // Underline all cells and store their current underline state
-            let mut underlined = Vec::with_capacity(text.len());
+            let mut underlined = Vec::with_capacity(len);
             let iter = once(start).chain(start.iter(Column(cols - 1), last_line));
-            for point in iter.take(text.len()) {
+            for point in iter.take(len) {
                 let cell = &mut self.ctx.terminal_mut().grid_mut()[point.line][point.col];
                 underlined.push(cell.flags.contains(Flags::UNDERLINE));
                 cell.flags.insert(Flags::UNDERLINE);
diff --git a/src/term/mod.rs b/src/term/mod.rs
index a99098bd..ac5a9a5f 100644
--- a/src/term/mod.rs
+++ b/src/term/mod.rs
@@ -120,14 +120,14 @@ impl Search for Term {
         let mut url_parser = UrlParser::new();
         while let Some(cell) = iterb.prev() {
             if (iterb.cur().col == last_col && !cell.flags.contains(cell::Flags::WRAPLINE))
-                || url_parser.advance_left(cell.c)
+                || url_parser.advance_left(cell)
             {
                 break;
             }
         }
 
         while let Some(cell) = iterf.next() {
-            if url_parser.advance_right(cell.c)
+            if url_parser.advance_right(cell)
                 || (iterf.cur().col == last_col && !cell.flags.contains(cell::Flags::WRAPLINE))
             {
                 break;
diff --git a/src/url.rs b/src/url.rs
index a493dd37..59207499 100644
--- a/src/url.rs
+++ b/src/url.rs
@@ -14,6 +14,8 @@
 
 use url;
 
+use crate::term::cell::{Cell, Flags};
+
 // See https://tools.ietf.org/html/rfc3987#page-13
 const URL_SEPARATOR_CHARS: [char; 10] = ['<', '>', '"', ' ', '{', '}', '|', '\\', '^', '`'];
 const URL_DENY_END_CHARS: [char; 8] = ['.', ',', ';', ':', '?', '!', '/', '('];
@@ -26,12 +28,14 @@ const URL_SCHEMES: [&str; 8] = [
 pub struct Url {
     pub text: String,
     pub origin: usize,
+    pub len: usize,
 }
 
 /// Parser for streaming inside-out detection of URLs.
 pub struct UrlParser {
     state: String,
     origin: usize,
+    len: usize,
 }
 
 impl UrlParser {
@@ -39,22 +43,40 @@ impl UrlParser {
         UrlParser {
             state: String::new(),
             origin: 0,
+            len: 0,
         }
     }
 
     /// Advance the parser one character to the left.
-    pub fn advance_left(&mut self, c: char) -> bool {
-        if self.advance(c, 0) {
+    pub fn advance_left(&mut self, cell: &Cell) -> bool {
+        if cell.flags.contains(Flags::WIDE_CHAR_SPACER) {
+            self.origin += 1;
+            self.len += 1;
+            return false;
+        }
+
+        if self.advance(cell.c, 0) {
             true
         } else {
             self.origin += 1;
+            self.len += 1;
             false
         }
     }
 
     /// Advance the parser one character to the right.
-    pub fn advance_right(&mut self, c: char) -> bool {
-        self.advance(c, self.state.len())
+    pub fn advance_right(&mut self, cell: &Cell) -> bool {
+        if cell.flags.contains(Flags::WIDE_CHAR_SPACER) {
+            self.len += 1;
+            return false;
+        }
+
+        if self.advance(cell.c, self.state.len()) {
+            true
+        } else {
+            self.len += 1;
+            false
+        }
     }
 
     /// Returns the URL if the parser has found any.
@@ -116,8 +138,9 @@ impl UrlParser {
             Ok(url) => {
                 if URL_SCHEMES.contains(&url.scheme()) && self.origin > 0 {
                     Some(Url {
-                        text: self.state,
                         origin: self.origin - 1,
+                        text: self.state,
+                        len: self.len,
                     })
                 } else {
                     None
@@ -144,10 +167,12 @@ impl UrlParser {
 mod tests {
     use std::mem;
 
+    use unicode_width::UnicodeWidthChar;
+
     use crate::grid::Grid;
     use crate::index::{Column, Line, Point};
     use crate::term::{Search, SizeInfo, Term};
-    use crate::term::cell::Cell;
+    use crate::term::cell::{Cell, Flags};
     use crate::message_bar::MessageBuffer;
 
     fn url_create_term(input: &str) -> Term {
@@ -161,11 +186,22 @@ mod tests {
             dpr: 1.0,
         };
 
+        let width = input.chars().map(|c| if c.width() == Some(2) { 2 } else { 1 }).sum();
         let mut term = Term::new(&Default::default(), size, MessageBuffer::new());
-        let mut grid: Grid<Cell> = Grid::new(Line(1), Column(input.len()), 0, Cell::default());
+        let mut grid: Grid<Cell> = Grid::new(Line(1), Column(width), 0, Cell::default());
 
-        for (i, c) in input.chars().enumerate() {
+        let mut i = 0;
+        for c in input.chars() {
             grid[Line(0)][Column(i)].c = c;
+
+            if c.width() == Some(2) {
+                grid[Line(0)][Column(i)].flags.insert(Flags::WIDE_CHAR);
+                grid[Line(0)][Column(i + 1)].flags.insert(Flags::WIDE_CHAR_SPACER);
+                grid[Line(0)][Column(i + 1)].c = ' ';
+                i += 1;
+            }
+
+            i += 1;
         }
 
         mem::swap(term.grid_mut(), &mut grid);
@@ -199,6 +235,37 @@ mod tests {
         let term = url_create_term("https://example.org");
         let url = term.url_search(Point::new(0, Column(0)));
         assert_eq!(url.map(|u| u.origin), Some(0));
+
+        let term = url_create_term("https://全.org");
+        let url = term.url_search(Point::new(0, Column(10)));
+        assert_eq!(url.map(|u| u.origin), Some(10));
+
+        let term = url_create_term("https://全.org");
+        let url = term.url_search(Point::new(0, Column(8)));
+        assert_eq!(url.map(|u| u.origin), Some(8));
+
+        let term = url_create_term("https://全.org");
+        let url = term.url_search(Point::new(0, Column(9)));
+        assert_eq!(url.map(|u| u.origin), Some(9));
+    }
+
+    #[test]
+    fn url_len() {
+        let term = url_create_term(" test https://example.org ");
+        let url = term.url_search(Point::new(0, Column(10)));
+        assert_eq!(url.map(|u| u.len), Some(19));
+
+        let term = url_create_term("https://全.org");
+        let url = term.url_search(Point::new(0, Column(0)));
+        assert_eq!(url.map(|u| u.len), Some(14));
+
+        let term = url_create_term("https://全.org");
+        let url = term.url_search(Point::new(0, Column(10)));
+        assert_eq!(url.map(|u| u.len), Some(14));
+
+        let term = url_create_term("https://全.org");
+        let url = term.url_search(Point::new(0, Column(9)));
+        assert_eq!(url.map(|u| u.len), Some(14));
     }
 
     #[test]
author	Christian Duerr <chrisduerr@users.noreply.github.com>	2019-03-23 11:56:46 +0000
committer	GitHub <noreply@github.com>	2019-03-23 11:56:46 +0000
commit	d8272662db4a4dc1ef58b8379dc88162066a3241 (patch)
tree	8a2fbbac88d867252fd825e3959f470abb89517f /src
parent	d29c30900743f3e7f864f7951edf34c7423accd8 (diff)
download	alacritty-d8272662db4a4dc1ef58b8379dc88162066a3241.tar.gz alacritty-d8272662db4a4dc1ef58b8379dc88162066a3241.zip