Skip to content

Commit aee99d4

Browse files
author
bors-servo
authored
Auto merge of #1834 - jrmuizel:unsafe-reader, r=Gankro
Improve deserialization performance I haven't yet tested how much this improves deserialization performance but it should be noticeable. <!-- Reviewable:start --> --- This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/webrender/1834) <!-- Reviewable:end -->
2 parents 6440dff + 9818e4d commit aee99d4

File tree

3 files changed

+78
-11
lines changed

3 files changed

+78
-11
lines changed

Cargo.lock

Lines changed: 6 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

webrender_api/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ ipc = ["ipc-channel"]
1111

1212
[dependencies]
1313
app_units = "0.5.6"
14-
bincode = "0.8"
14+
bincode = "0.8.1"
1515
bitflags = "0.9"
1616
byteorder = "1.0"
1717
euclid = "0.15"

webrender_api/src/display_list.rs

Lines changed: 71 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@ use YuvImageDisplayItem;
1717
use bincode;
1818
use serde::{Deserialize, Serialize, Serializer};
1919
use serde::ser::{SerializeMap, SerializeSeq};
20-
use std::io::Write;
20+
use std::io::{Read, Write};
2121
use std::{io, ptr};
2222
use std::marker::PhantomData;
23+
use std::slice;
2324
use time::precise_time_ns;
2425

2526
// We don't want to push a long text-run. If a text-run is too long, split it into several parts.
@@ -171,6 +172,7 @@ fn skip_slice<T: for<'de> Deserialize<'de>>(
171172
(range, count)
172173
}
173174

175+
174176
impl<'a> BuiltDisplayListIter<'a> {
175177
pub fn new(list: &'a BuiltDisplayList) -> Self {
176178
Self::new_with_list_and_data(list, list.item_slice())
@@ -221,7 +223,7 @@ impl<'a> BuiltDisplayListIter<'a> {
221223
return None;
222224
}
223225

224-
self.cur_item = bincode::deserialize_from(&mut self.data, bincode::Infinite)
226+
self.cur_item = bincode::deserialize_from(&mut UnsafeReader::new(&mut self.data), bincode::Infinite)
225227
.expect("MEH: malicious process?");
226228

227229
match self.cur_item.item {
@@ -363,7 +365,7 @@ impl<'de, 'a, T: Deserialize<'de>> AuxIter<'a, T> {
363365
let size: usize = if data.len() == 0 {
364366
0 // Accept empty ItemRanges pointing anywhere
365367
} else {
366-
bincode::deserialize_from(&mut data, bincode::Infinite).expect("MEH: malicious input?")
368+
bincode::deserialize_from(&mut UnsafeReader::new(&mut data), bincode::Infinite).expect("MEH: malicious input?")
367369
};
368370

369371
AuxIter {
@@ -383,7 +385,7 @@ impl<'a, T: for<'de> Deserialize<'de>> Iterator for AuxIter<'a, T> {
383385
} else {
384386
self.size -= 1;
385387
Some(
386-
bincode::deserialize_from(&mut self.data, bincode::Infinite)
388+
bincode::deserialize_from(&mut UnsafeReader::new(&mut self.data), bincode::Infinite)
387389
.expect("MEH: malicious input?"),
388390
)
389391
}
@@ -525,6 +527,71 @@ fn serialize_fast<T: Serialize>(vec: &mut Vec<u8>, e: &T) {
525527
debug_assert!(((w.0 as usize) - (vec.as_ptr() as usize)) == vec.len());
526528
}
527529

530+
// This uses a (start, end) representation instead of (start, len) so that
531+
// only need to update a single field as we read through it. This
532+
// makes it easier for llvm to understand what's going on. (https://github.com/rust-lang/rust/issues/45068)
533+
// We update the slice only once we're done reading
534+
struct UnsafeReader<'a: 'b, 'b> {
535+
start: *const u8,
536+
end: *const u8,
537+
slice: &'b mut &'a [u8],
538+
}
539+
540+
impl<'a, 'b> UnsafeReader<'a, 'b> {
541+
fn new(buf: &'b mut &'a [u8]) -> UnsafeReader<'a, 'b> {
542+
unsafe {
543+
let end = buf.as_ptr().offset(buf.len() as isize);
544+
let start = buf.as_ptr();
545+
UnsafeReader { start: start, end, slice: buf }
546+
}
547+
}
548+
549+
// This read implementation is significantly faster than the standard &[u8] one.
550+
//
551+
// First, it only supports reading exactly buf.len() bytes. This ensures that
552+
// the argument to memcpy is always buf.len() and will allow a constant buf.len()
553+
// to be propagated through to memcpy which LLVM will turn into explicit loads and
554+
// stores. The standard implementation does a len = min(slice.len(), buf.len())
555+
//
556+
// Second, we only need to adjust 'start' after reading and it's only adjusted by a
557+
// constant. This allows LLVM to avoid adjusting the length field after ever read
558+
// and lets it be aggregated into a single adjustment.
559+
#[inline(always)]
560+
fn read_internal(&mut self, buf: &mut [u8]) {
561+
// this is safe because we panic if start + buf.len() > end
562+
unsafe {
563+
assert!(self.start.offset(buf.len() as isize) <= self.end, "UnsafeReader: read past end of target");
564+
ptr::copy_nonoverlapping(self.start, buf.as_mut_ptr(), buf.len());
565+
self.start = self.start.offset(buf.len() as isize);
566+
}
567+
}
568+
}
569+
570+
impl<'a, 'b> Drop for UnsafeReader<'a, 'b> {
571+
// this adjusts input slice so that it properly represents the amount that's left.
572+
fn drop(&mut self) {
573+
// this is safe because we know that start and end are contained inside the original slice
574+
unsafe {
575+
*self.slice = slice::from_raw_parts(self.start, (self.end as usize) - (self.start as usize));
576+
}
577+
}
578+
}
579+
580+
impl<'a, 'b> Read for UnsafeReader<'a, 'b> {
581+
// These methods were not being inlined and we need them to be so that the memcpy
582+
// is for a constant size
583+
#[inline(always)]
584+
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
585+
self.read_internal(buf);
586+
Ok(buf.len())
587+
}
588+
#[inline(always)]
589+
fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
590+
self.read_internal(buf);
591+
Ok(())
592+
}
593+
}
594+
528595
#[derive(Clone, Debug)]
529596
pub struct SaveState {
530597
dl_len: usize,

0 commit comments

Comments
 (0)