Skip to content

Commit d4d8cd8

Browse files
[libc] Factor specifics of packet type out of process
NFC. Simplifies process slightly, gives more options for testing it. Reviewed By: jhuber6 Differential Revision: https://reviews.llvm.org/D153604
1 parent 7e79934 commit d4d8cd8

File tree

2 files changed

+53
-47
lines changed

2 files changed

+53
-47
lines changed

libc/src/__support/RPC/rpc.h

Lines changed: 47 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ constexpr uint64_t DEFAULT_PORT_COUNT = 64;
7474
/// - The client will always start with a 'send' operation.
7575
/// - The server will always start with a 'recv' operation.
7676
/// - Every 'send' or 'recv' call is mirrored by the other process.
77-
template <bool Invert, uint32_t lane_size> struct Process {
77+
template <bool Invert, typename Packet> struct Process {
7878
LIBC_INLINE Process() = default;
7979
LIBC_INLINE Process(const Process &) = delete;
8080
LIBC_INLINE Process &operator=(const Process &) = delete;
@@ -85,7 +85,7 @@ template <bool Invert, uint32_t lane_size> struct Process {
8585
uint64_t port_count;
8686
cpp::Atomic<uint32_t> *inbox;
8787
cpp::Atomic<uint32_t> *outbox;
88-
Packet<lane_size> *packet;
88+
Packet *packet;
8989

9090
cpp::Atomic<uint32_t> lock[DEFAULT_PORT_COUNT] = {0};
9191

@@ -96,8 +96,8 @@ template <bool Invert, uint32_t lane_size> struct Process {
9696
advance(buffer, inbox_offset(port_count)));
9797
this->outbox = reinterpret_cast<cpp::Atomic<uint32_t> *>(
9898
advance(buffer, outbox_offset(port_count)));
99-
this->packet = reinterpret_cast<Packet<lane_size> *>(
100-
advance(buffer, buffer_offset(port_count)));
99+
this->packet =
100+
reinterpret_cast<Packet *>(advance(buffer, buffer_offset(port_count)));
101101
}
102102

103103
/// Returns the beginning of the unified buffer. Intended for initializing the
@@ -221,38 +221,14 @@ template <bool Invert, uint32_t lane_size> struct Process {
221221
gpu::sync_lane(lane_mask);
222222
}
223223

224-
/// Invokes a function accross every active buffer across the total lane size.
225-
LIBC_INLINE void invoke_rpc(cpp::function<void(Buffer *)> fn,
226-
Packet<lane_size> &packet) {
227-
if constexpr (is_process_gpu()) {
228-
fn(&packet.payload.slot[gpu::get_lane_id()]);
229-
} else {
230-
for (uint32_t i = 0; i < lane_size; i += gpu::get_lane_size())
231-
if (packet.header.mask & 1ul << i)
232-
fn(&packet.payload.slot[i]);
233-
}
234-
}
235-
236-
/// Alternate version that also provides the index of the current lane.
237-
LIBC_INLINE void invoke_rpc(cpp::function<void(Buffer *, uint32_t)> fn,
238-
Packet<lane_size> &packet) {
239-
if constexpr (is_process_gpu()) {
240-
fn(&packet.payload.slot[gpu::get_lane_id()], gpu::get_lane_id());
241-
} else {
242-
for (uint32_t i = 0; i < lane_size; i += gpu::get_lane_size())
243-
if (packet.header.mask & 1ul << i)
244-
fn(&packet.payload.slot[i], i);
245-
}
246-
}
247-
248224
/// Number of bytes to allocate for an inbox or outbox.
249225
LIBC_INLINE static constexpr uint64_t mailbox_bytes(uint64_t port_count) {
250226
return port_count * sizeof(cpp::Atomic<uint32_t>);
251227
}
252228

253229
/// Number of bytes to allocate for the buffer containing the packets.
254230
LIBC_INLINE static constexpr uint64_t buffer_bytes(uint64_t port_count) {
255-
return port_count * sizeof(Packet<lane_size>);
231+
return port_count * sizeof(Packet);
256232
}
257233

258234
/// Offset of the inbox in memory. This is the same as the outbox if inverted.
@@ -267,14 +243,40 @@ template <bool Invert, uint32_t lane_size> struct Process {
267243

268244
/// Offset of the buffer containing the packets after the inbox and outbox.
269245
LIBC_INLINE static constexpr uint64_t buffer_offset(uint64_t port_count) {
270-
return align_up(2 * mailbox_bytes(port_count), alignof(Packet<lane_size>));
246+
return align_up(2 * mailbox_bytes(port_count), alignof(Packet));
271247
}
272248
};
273249

250+
/// Invokes a function accross every active buffer across the total lane size.
251+
template <uint32_t lane_size>
252+
static LIBC_INLINE void invoke_rpc(cpp::function<void(Buffer *)> fn,
253+
Packet<lane_size> &packet) {
254+
if constexpr (is_process_gpu()) {
255+
fn(&packet.payload.slot[gpu::get_lane_id()]);
256+
} else {
257+
for (uint32_t i = 0; i < lane_size; i += gpu::get_lane_size())
258+
if (packet.header.mask & 1ul << i)
259+
fn(&packet.payload.slot[i]);
260+
}
261+
}
262+
263+
/// Alternate version that also provides the index of the current lane.
264+
template <uint32_t lane_size>
265+
static LIBC_INLINE void invoke_rpc(cpp::function<void(Buffer *, uint32_t)> fn,
266+
Packet<lane_size> &packet) {
267+
if constexpr (is_process_gpu()) {
268+
fn(&packet.payload.slot[gpu::get_lane_id()], gpu::get_lane_id());
269+
} else {
270+
for (uint32_t i = 0; i < lane_size; i += gpu::get_lane_size())
271+
if (packet.header.mask & 1ul << i)
272+
fn(&packet.payload.slot[i], i);
273+
}
274+
}
275+
274276
/// The port provides the interface to communicate between the multiple
275277
/// processes. A port is conceptually an index into the memory provided by the
276278
/// underlying process that is guarded by a lock bit.
277-
template <bool T, uint32_t S> struct Port {
279+
template <bool T, typename S> struct Port {
278280
LIBC_INLINE Port(Process<T, S> &process, uint64_t lane_mask, uint64_t index,
279281
uint32_t out)
280282
: process(process), lane_mask(lane_mask), index(index), out(out),
@@ -330,7 +332,7 @@ struct Client {
330332
LIBC_INLINE Client &operator=(const Client &) = delete;
331333
LIBC_INLINE ~Client() = default;
332334

333-
using Port = rpc::Port<false, gpu::LANE_SIZE>;
335+
using Port = rpc::Port<false, Packet<gpu::LANE_SIZE>>;
334336
template <uint16_t opcode> LIBC_INLINE cpp::optional<Port> try_open();
335337
template <uint16_t opcode> LIBC_INLINE Port open();
336338

@@ -339,7 +341,7 @@ struct Client {
339341
}
340342

341343
private:
342-
Process<false, gpu::LANE_SIZE> process;
344+
Process<false, Packet<gpu::LANE_SIZE>> process;
343345
};
344346

345347
/// The RPC server used to respond to the client.
@@ -349,7 +351,7 @@ template <uint32_t lane_size> struct Server {
349351
LIBC_INLINE Server &operator=(const Server &) = delete;
350352
LIBC_INLINE ~Server() = default;
351353

352-
using Port = rpc::Port<true, lane_size>;
354+
using Port = rpc::Port<true, Packet<lane_size>>;
353355
LIBC_INLINE cpp::optional<Port> try_open();
354356
LIBC_INLINE Port open();
355357

@@ -362,15 +364,15 @@ template <uint32_t lane_size> struct Server {
362364
}
363365

364366
LIBC_INLINE static uint64_t allocation_size(uint64_t port_count) {
365-
return Process<true, lane_size>::allocation_size(port_count);
367+
return Process<true, Packet<lane_size>>::allocation_size(port_count);
366368
}
367369

368370
private:
369-
Process<true, lane_size> process;
371+
Process<true, Packet<lane_size>> process;
370372
};
371373

372374
/// Applies \p fill to the shared buffer and initiates a send operation.
373-
template <bool T, uint32_t S>
375+
template <bool T, typename S>
374376
template <typename F>
375377
LIBC_INLINE void Port<T, S>::send(F fill) {
376378
uint32_t in = owns_buffer ? out ^ T : process.load_inbox(index);
@@ -379,14 +381,14 @@ LIBC_INLINE void Port<T, S>::send(F fill) {
379381
process.wait_for_ownership(index, out, in);
380382

381383
// Apply the \p fill function to initialize the buffer and release the memory.
382-
process.invoke_rpc(fill, process.packet[index]);
384+
invoke_rpc(fill, process.packet[index]);
383385
out = process.invert_outbox(index, out);
384386
owns_buffer = false;
385387
receive = false;
386388
}
387389

388390
/// Applies \p use to the shared buffer and acknowledges the send.
389-
template <bool T, uint32_t S>
391+
template <bool T, typename S>
390392
template <typename U>
391393
LIBC_INLINE void Port<T, S>::recv(U use) {
392394
// We only exchange ownership of the buffer during a receive if we are waiting
@@ -402,13 +404,13 @@ LIBC_INLINE void Port<T, S>::recv(U use) {
402404
process.wait_for_ownership(index, out, in);
403405

404406
// Apply the \p use function to read the memory out of the buffer.
405-
process.invoke_rpc(use, process.packet[index]);
407+
invoke_rpc(use, process.packet[index]);
406408
receive = true;
407409
owns_buffer = true;
408410
}
409411

410412
/// Combines a send and receive into a single function.
411-
template <bool T, uint32_t S>
413+
template <bool T, typename S>
412414
template <typename F, typename U>
413415
LIBC_INLINE void Port<T, S>::send_and_recv(F fill, U use) {
414416
send(fill);
@@ -418,7 +420,7 @@ LIBC_INLINE void Port<T, S>::send_and_recv(F fill, U use) {
418420
/// Combines a receive and send operation into a single function. The \p work
419421
/// function modifies the buffer in-place and the send is only used to initiate
420422
/// the copy back.
421-
template <bool T, uint32_t S>
423+
template <bool T, typename S>
422424
template <typename W>
423425
LIBC_INLINE void Port<T, S>::recv_and_send(W work) {
424426
recv(work);
@@ -427,7 +429,7 @@ LIBC_INLINE void Port<T, S>::recv_and_send(W work) {
427429

428430
/// Helper routine to simplify the interface when sending from the GPU using
429431
/// thread private pointers to the underlying value.
430-
template <bool T, uint32_t S>
432+
template <bool T, typename S>
431433
LIBC_INLINE void Port<T, S>::send_n(const void *src, uint64_t size) {
432434
static_assert(is_process_gpu(), "Only valid when running on the GPU");
433435
const void **src_ptr = &src;
@@ -437,7 +439,7 @@ LIBC_INLINE void Port<T, S>::send_n(const void *src, uint64_t size) {
437439

438440
/// Sends an arbitrarily sized data buffer \p src across the shared channel in
439441
/// multiples of the packet length.
440-
template <bool T, uint32_t S>
442+
template <bool T, typename S>
441443
LIBC_INLINE void Port<T, S>::send_n(const void *const *src, uint64_t *size) {
442444
uint64_t num_sends = 0;
443445
send([&](Buffer *buffer, uint32_t id) {
@@ -467,7 +469,7 @@ LIBC_INLINE void Port<T, S>::send_n(const void *const *src, uint64_t *size) {
467469
/// Receives an arbitrarily sized data buffer across the shared channel in
468470
/// multiples of the packet length. The \p alloc function is called with the
469471
/// size of the data so that we can initialize the size of the \p dst buffer.
470-
template <bool T, uint32_t S>
472+
template <bool T, typename S>
471473
template <typename A>
472474
LIBC_INLINE void Port<T, S>::recv_n(void **dst, uint64_t *size, A &&alloc) {
473475
uint64_t num_recvs = 0;

libc/test/src/__support/RPC/rpc_smoke_test.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,12 @@
1313
namespace {
1414
enum { lane_size = 8, port_count = 4 };
1515

16-
using ProcAType = __llvm_libc::rpc::Process<false, lane_size>;
17-
using ProcBType = __llvm_libc::rpc::Process<true, lane_size>;
16+
struct Packet {
17+
uint64_t unused;
18+
};
19+
20+
using ProcAType = __llvm_libc::rpc::Process<false, Packet>;
21+
using ProcBType = __llvm_libc::rpc::Process<true, Packet>;
1822

1923
static_assert(ProcAType::inbox_offset(port_count) ==
2024
ProcBType::outbox_offset(port_count));

0 commit comments

Comments
 (0)