Skip to content

Improve deserialization performance #1834

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 10, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion webrender_api/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ ipc = ["ipc-channel"]

[dependencies]
app_units = "0.5.6"
bincode = "0.8"
bincode = "0.8.1"
bitflags = "0.9"
byteorder = "1.0"
euclid = "0.15"
Expand Down
75 changes: 71 additions & 4 deletions webrender_api/src/display_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@ use YuvImageDisplayItem;
use bincode;
use serde::{Deserialize, Serialize, Serializer};
use serde::ser::{SerializeMap, SerializeSeq};
use std::io::Write;
use std::io::{Read, Write};
use std::{io, ptr};
use std::marker::PhantomData;
use std::slice;
use time::precise_time_ns;

// We don't want to push a long text-run. If a text-run is too long, split it into several parts.
Expand Down Expand Up @@ -171,6 +172,7 @@ fn skip_slice<T: for<'de> Deserialize<'de>>(
(range, count)
}


impl<'a> BuiltDisplayListIter<'a> {
pub fn new(list: &'a BuiltDisplayList) -> Self {
Self::new_with_list_and_data(list, list.item_slice())
Expand Down Expand Up @@ -221,7 +223,7 @@ impl<'a> BuiltDisplayListIter<'a> {
return None;
}

self.cur_item = bincode::deserialize_from(&mut self.data, bincode::Infinite)
self.cur_item = bincode::deserialize_from(&mut UnsafeReader::new(&mut self.data), bincode::Infinite)
.expect("MEH: malicious process?");

match self.cur_item.item {
Expand Down Expand Up @@ -363,7 +365,7 @@ impl<'de, 'a, T: Deserialize<'de>> AuxIter<'a, T> {
let size: usize = if data.len() == 0 {
0 // Accept empty ItemRanges pointing anywhere
} else {
bincode::deserialize_from(&mut data, bincode::Infinite).expect("MEH: malicious input?")
bincode::deserialize_from(&mut UnsafeReader::new(&mut data), bincode::Infinite).expect("MEH: malicious input?")
};

AuxIter {
Expand All @@ -383,7 +385,7 @@ impl<'a, T: for<'de> Deserialize<'de>> Iterator for AuxIter<'a, T> {
} else {
self.size -= 1;
Some(
bincode::deserialize_from(&mut self.data, bincode::Infinite)
bincode::deserialize_from(&mut UnsafeReader::new(&mut self.data), bincode::Infinite)
.expect("MEH: malicious input?"),
)
}
Expand Down Expand Up @@ -525,6 +527,71 @@ fn serialize_fast<T: Serialize>(vec: &mut Vec<u8>, e: &T) {
debug_assert!(((w.0 as usize) - (vec.as_ptr() as usize)) == vec.len());
}

// This uses a (start, end) representation instead of (start, len) so that
// only need to update a single field as we read through it. This
// makes it easier for llvm to understand what's going on. (https://github.com/rust-lang/rust/issues/45068)
// We update the slice only once we're done reading
struct UnsafeReader<'a: 'b, 'b> {
start: *const u8,
end: *const u8,
slice: &'b mut &'a [u8],
}

impl<'a, 'b> UnsafeReader<'a, 'b> {
fn new(buf: &'b mut &'a [u8]) -> UnsafeReader<'a, 'b> {
unsafe {
let end = buf.as_ptr().offset(buf.len() as isize);
let start = buf.as_ptr();
UnsafeReader { start: start, end, slice: buf }
}
}

// This read implementation is significantly faster than the standard &[u8] one.
//
// First, it only supports reading exactly buf.len() bytes. This ensures that
// the argument to memcpy is always buf.len() and will allow a constant buf.len()
// to be propagated through to memcpy which LLVM will turn into explicit loads and
// stores. The standard implementation does a len = min(slice.len(), buf.len())
//
// Second, we only need to adjust 'start' after reading and it's only adjusted by a
// constant. This allows LLVM to avoid adjusting the length field after ever read
// and lets it be aggregated into a single adjustment.
#[inline(always)]
fn read_internal(&mut self, buf: &mut [u8]) {
// this is safe because we panic if start + buf.len() > end
unsafe {
assert!(self.start.offset(buf.len() as isize) <= self.end, "UnsafeReader: read past end of target");
ptr::copy_nonoverlapping(self.start, buf.as_mut_ptr(), buf.len());
self.start = self.start.offset(buf.len() as isize);
}
}
}

impl<'a, 'b> Drop for UnsafeReader<'a, 'b> {
// this adjusts input slice so that it properly represents the amount that's left.
fn drop(&mut self) {
// this is safe because we know that start and end are contained inside the original slice
unsafe {
*self.slice = slice::from_raw_parts(self.start, (self.end as usize) - (self.start as usize));
}
}
}

impl<'a, 'b> Read for UnsafeReader<'a, 'b> {
// These methods were not being inlined and we need them to be so that the memcpy
// is for a constant size
#[inline(always)]
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
self.read_internal(buf);
Ok(buf.len())
}
#[inline(always)]
fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
self.read_internal(buf);
Ok(())
}
}

#[derive(Clone, Debug)]
pub struct SaveState {
dl_len: usize,
Expand Down