From 4cbbb7fb649c0ac24242dfb7810932ece5befb34 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 20 Nov 2018 08:32:38 +0100 Subject: [PATCH 1/4] document technical UB --- crossbeam-deque/src/lib.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/crossbeam-deque/src/lib.rs b/crossbeam-deque/src/lib.rs index c46dcdd01..13e888d42 100644 --- a/crossbeam-deque/src/lib.rs +++ b/crossbeam-deque/src/lib.rs @@ -208,11 +208,21 @@ impl Buffer { } /// Writes `value` into the specified `index`. + /// + /// Using this concurrently with another `read` or `write` is technically + /// speaking UB due to data races. We should be using relaxed accesses, but + /// that would cost too much performance. Hence, as a HACK, we use volatile + /// accesses instead. Experimental evidence shows that this works. unsafe fn write(&self, index: isize, value: T) { ptr::write_volatile(self.at(index), value) } /// Reads a value from the specified `index`. + /// + /// Using this concurrently with a `write` is technically speaking UB due to + /// data races. We should be using relaxed accesses, but that would cost + /// too much performance. Hence, as a HACK, we use volatile accesses + /// instead. Experimental evidence shows that this works. unsafe fn read(&self, index: isize) -> T { ptr::read_volatile(self.at(index)) } From fdc168f0994b2450ba9d549a7380b45bf84dba5f Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 20 Nov 2018 08:34:36 +0100 Subject: [PATCH 2/4] point out that using a CAS for a fence is somewhat dubious --- crossbeam-epoch/src/internal.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/crossbeam-epoch/src/internal.rs b/crossbeam-epoch/src/internal.rs index 81047480b..552cd86ee 100644 --- a/crossbeam-epoch/src/internal.rs +++ b/crossbeam-epoch/src/internal.rs @@ -361,7 +361,10 @@ impl Local { // instruction. // // Both instructions have the effect of a full barrier, but benchmarks have shown - // that the second one makes pinning faster in this particular case. + // that the second one makes pinning faster in this particular case. It is not + // clear that this is permitted by the C++ memory model (SC fences work very + // differently from SC accesses), but experimental evidence suggests that this + // works fine. let current = Epoch::starting(); let previous = self .epoch From c291a1b4fb2e05a300c4e29fce4f96dbf9099bb2 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 20 Nov 2018 12:24:38 +0100 Subject: [PATCH 3/4] mention inline assembly as possible alternative --- crossbeam-epoch/src/internal.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crossbeam-epoch/src/internal.rs b/crossbeam-epoch/src/internal.rs index 552cd86ee..a7623dfe5 100644 --- a/crossbeam-epoch/src/internal.rs +++ b/crossbeam-epoch/src/internal.rs @@ -364,7 +364,8 @@ impl Local { // that the second one makes pinning faster in this particular case. It is not // clear that this is permitted by the C++ memory model (SC fences work very // differently from SC accesses), but experimental evidence suggests that this - // works fine. + // works fine. Using inline assembly would be a viable (and correct) alternative, + // but alas, that is not possible on stable Rust. let current = Epoch::starting(); let previous = self .epoch From d94e5eeb5253c05f8b94b0aa07c09fd28bb3b1e1 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 20 Nov 2018 17:58:34 +0100 Subject: [PATCH 4/4] add a compiler fence --- crossbeam-epoch/src/internal.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crossbeam-epoch/src/internal.rs b/crossbeam-epoch/src/internal.rs index a7623dfe5..b21f43634 100644 --- a/crossbeam-epoch/src/internal.rs +++ b/crossbeam-epoch/src/internal.rs @@ -371,6 +371,10 @@ impl Local { .epoch .compare_and_swap(current, new_epoch, Ordering::SeqCst); debug_assert_eq!(current, previous, "participant was expected to be unpinned"); + // We add a compiler fence to make it less likely for LLVM to do something wrong + // here. Formally, this is not enough to get rid of data races; practically, + // it should go a long way. + atomic::compiler_fence(Ordering::SeqCst); } else { self.epoch.store(new_epoch, Ordering::Relaxed); atomic::fence(Ordering::SeqCst);