diff options
author | Joe Wilm <joe@jwilm.com> | 2018-05-29 21:37:56 -0700 |
---|---|---|
committer | Joe Wilm <joe@jwilm.com> | 2018-06-02 09:56:50 -0700 |
commit | a2f99883773676a9dcc537afff4bce54e04e412b (patch) | |
tree | f513fb62ea5608d2589d98616a734376df4767d4 | |
parent | 9a98d5e0ee9139d5a2988d125352c5d70a39ad20 (diff) | |
download | alacritty-a2f99883773676a9dcc537afff4bce54e04e412b.tar.gz alacritty-a2f99883773676a9dcc537afff4bce54e04e412b.zip |
Optimize Storage::swap
Removes 4 movaps instructions from generated assembly.
-rw-r--r-- | src/grid/storage.rs | 38 |
1 files changed, 32 insertions, 6 deletions
diff --git a/src/grid/storage.rs b/src/grid/storage.rs index 57afde82..0f0f611b 100644 --- a/src/grid/storage.rs +++ b/src/grid/storage.rs @@ -14,7 +14,7 @@ use std::ops::{Index, IndexMut}; use std::slice; -use index::{Column, Line}; +use index::Line; use super::Row; /// Maximum number of invisible lines before buffer is resized @@ -198,17 +198,40 @@ impl<T> Storage<T> { self.inner.swap(a, b); } - /// Swap two lines in raw buffer + /// Swap implementation for Row<T>. /// - /// # Panics + /// Exploits the known size of Row<T> to produce a slightly more efficient + /// swap than going through slice::swap. /// - /// `swap` will panic if either `a` or `b` are out-of-bounds of the - /// underlying storage. + /// The default implementation from swap generates 8 movups and 4 movaps + /// instructions. This implementation achieves the swap in only 8 movups + /// instructions. + /// + // TODO Once specialization is available, Storage<T> can be fully generic + // again instead of enforcing inner: Vec<Row<T>>. pub fn swap(&mut self, a: usize, b: usize) { + debug_assert!(::std::mem::size_of::<Row<T>>() == 32); + let a = self.compute_index(a); let b = self.compute_index(b); - self.inner.swap(a, b); + unsafe { + // Cast to a qword array to opt out of copy restrictions and avoid + // drop hazards. Byte array is no good here since for whatever + // reason LLVM won't optimized it. + let a_ptr = self.inner.as_mut_ptr().offset(a as isize) as *mut u64; + let b_ptr = self.inner.as_mut_ptr().offset(b as isize) as *mut u64; + + // Copy 1 qword at a time + // + // The optimizer unrolls this loop and vectorizes it. + let mut tmp: u64; + for i in 0..4 { + tmp = *a_ptr.offset(i); + *a_ptr.offset(i) = *b_ptr.offset(i); + *b_ptr.offset(i) = tmp; + } + } } /// Iterator over *logical* entries in the storage @@ -299,6 +322,9 @@ impl<'a, T: 'a> Iterator for IterMut<'a, T> { } } +#[cfg(test)] +use index::Column; + /// Grow the buffer one line at the end of the buffer /// /// Before: |