@@ -74,7 +74,7 @@ constexpr uint64_t DEFAULT_PORT_COUNT = 64;
7474// / - The client will always start with a 'send' operation.
7575// / - The server will always start with a 'recv' operation.
7676// / - Every 'send' or 'recv' call is mirrored by the other process.
77- template <bool Invert, uint32_t lane_size > struct Process {
77+ template <bool Invert, typename Packet > struct Process {
7878 LIBC_INLINE Process () = default;
7979 LIBC_INLINE Process (const Process &) = delete;
8080 LIBC_INLINE Process &operator =(const Process &) = delete ;
@@ -85,7 +85,7 @@ template <bool Invert, uint32_t lane_size> struct Process {
8585 uint64_t port_count;
8686 cpp::Atomic<uint32_t > *inbox;
8787 cpp::Atomic<uint32_t > *outbox;
88- Packet<lane_size> *packet;
88+ Packet *packet;
8989
9090 cpp::Atomic<uint32_t > lock[DEFAULT_PORT_COUNT] = {0 };
9191
@@ -96,8 +96,8 @@ template <bool Invert, uint32_t lane_size> struct Process {
9696 advance (buffer, inbox_offset (port_count)));
9797 this ->outbox = reinterpret_cast <cpp::Atomic<uint32_t > *>(
9898 advance (buffer, outbox_offset (port_count)));
99- this ->packet = reinterpret_cast <Packet<lane_size> *>(
100- advance (buffer, buffer_offset (port_count)));
99+ this ->packet =
100+ reinterpret_cast <Packet *>( advance (buffer, buffer_offset (port_count)));
101101 }
102102
103103 // / Returns the beginning of the unified buffer. Intended for initializing the
@@ -221,38 +221,14 @@ template <bool Invert, uint32_t lane_size> struct Process {
221221 gpu::sync_lane (lane_mask);
222222 }
223223
224- // / Invokes a function accross every active buffer across the total lane size.
225- LIBC_INLINE void invoke_rpc (cpp::function<void (Buffer *)> fn,
226- Packet<lane_size> &packet) {
227- if constexpr (is_process_gpu ()) {
228- fn (&packet.payload .slot [gpu::get_lane_id ()]);
229- } else {
230- for (uint32_t i = 0 ; i < lane_size; i += gpu::get_lane_size ())
231- if (packet.header .mask & 1ul << i)
232- fn (&packet.payload .slot [i]);
233- }
234- }
235-
236- // / Alternate version that also provides the index of the current lane.
237- LIBC_INLINE void invoke_rpc (cpp::function<void (Buffer *, uint32_t )> fn,
238- Packet<lane_size> &packet) {
239- if constexpr (is_process_gpu ()) {
240- fn (&packet.payload .slot [gpu::get_lane_id ()], gpu::get_lane_id ());
241- } else {
242- for (uint32_t i = 0 ; i < lane_size; i += gpu::get_lane_size ())
243- if (packet.header .mask & 1ul << i)
244- fn (&packet.payload .slot [i], i);
245- }
246- }
247-
248224 // / Number of bytes to allocate for an inbox or outbox.
249225 LIBC_INLINE static constexpr uint64_t mailbox_bytes (uint64_t port_count) {
250226 return port_count * sizeof (cpp::Atomic<uint32_t >);
251227 }
252228
253229 // / Number of bytes to allocate for the buffer containing the packets.
254230 LIBC_INLINE static constexpr uint64_t buffer_bytes (uint64_t port_count) {
255- return port_count * sizeof (Packet<lane_size> );
231+ return port_count * sizeof (Packet);
256232 }
257233
258234 // / Offset of the inbox in memory. This is the same as the outbox if inverted.
@@ -267,14 +243,40 @@ template <bool Invert, uint32_t lane_size> struct Process {
267243
268244 // / Offset of the buffer containing the packets after the inbox and outbox.
269245 LIBC_INLINE static constexpr uint64_t buffer_offset (uint64_t port_count) {
270- return align_up (2 * mailbox_bytes (port_count), alignof (Packet<lane_size> ));
246+ return align_up (2 * mailbox_bytes (port_count), alignof (Packet));
271247 }
272248};
273249
250+ // / Invokes a function accross every active buffer across the total lane size.
251+ template <uint32_t lane_size>
252+ static LIBC_INLINE void invoke_rpc (cpp::function<void (Buffer *)> fn,
253+ Packet<lane_size> &packet) {
254+ if constexpr (is_process_gpu ()) {
255+ fn (&packet.payload .slot [gpu::get_lane_id ()]);
256+ } else {
257+ for (uint32_t i = 0 ; i < lane_size; i += gpu::get_lane_size ())
258+ if (packet.header .mask & 1ul << i)
259+ fn (&packet.payload .slot [i]);
260+ }
261+ }
262+
263+ // / Alternate version that also provides the index of the current lane.
264+ template <uint32_t lane_size>
265+ static LIBC_INLINE void invoke_rpc (cpp::function<void (Buffer *, uint32_t )> fn,
266+ Packet<lane_size> &packet) {
267+ if constexpr (is_process_gpu ()) {
268+ fn (&packet.payload .slot [gpu::get_lane_id ()], gpu::get_lane_id ());
269+ } else {
270+ for (uint32_t i = 0 ; i < lane_size; i += gpu::get_lane_size ())
271+ if (packet.header .mask & 1ul << i)
272+ fn (&packet.payload .slot [i], i);
273+ }
274+ }
275+
274276// / The port provides the interface to communicate between the multiple
275277// / processes. A port is conceptually an index into the memory provided by the
276278// / underlying process that is guarded by a lock bit.
277- template <bool T, uint32_t S> struct Port {
279+ template <bool T, typename S> struct Port {
278280 LIBC_INLINE Port (Process<T, S> &process, uint64_t lane_mask, uint64_t index,
279281 uint32_t out)
280282 : process(process), lane_mask(lane_mask), index(index), out(out),
@@ -330,7 +332,7 @@ struct Client {
330332 LIBC_INLINE Client &operator =(const Client &) = delete ;
331333 LIBC_INLINE ~Client () = default ;
332334
333- using Port = rpc::Port<false , gpu::LANE_SIZE>;
335+ using Port = rpc::Port<false , Packet< gpu::LANE_SIZE> >;
334336 template <uint16_t opcode> LIBC_INLINE cpp::optional<Port> try_open ();
335337 template <uint16_t opcode> LIBC_INLINE Port open ();
336338
@@ -339,7 +341,7 @@ struct Client {
339341 }
340342
341343private:
342- Process<false , gpu::LANE_SIZE> process;
344+ Process<false , Packet< gpu::LANE_SIZE> > process;
343345};
344346
345347// / The RPC server used to respond to the client.
@@ -349,7 +351,7 @@ template <uint32_t lane_size> struct Server {
349351 LIBC_INLINE Server &operator =(const Server &) = delete ;
350352 LIBC_INLINE ~Server () = default ;
351353
352- using Port = rpc::Port<true , lane_size>;
354+ using Port = rpc::Port<true , Packet< lane_size> >;
353355 LIBC_INLINE cpp::optional<Port> try_open ();
354356 LIBC_INLINE Port open ();
355357
@@ -362,15 +364,15 @@ template <uint32_t lane_size> struct Server {
362364 }
363365
364366 LIBC_INLINE static uint64_t allocation_size (uint64_t port_count) {
365- return Process<true , lane_size>::allocation_size (port_count);
367+ return Process<true , Packet< lane_size> >::allocation_size (port_count);
366368 }
367369
368370private:
369- Process<true , lane_size> process;
371+ Process<true , Packet< lane_size> > process;
370372};
371373
372374// / Applies \p fill to the shared buffer and initiates a send operation.
373- template <bool T, uint32_t S>
375+ template <bool T, typename S>
374376template <typename F>
375377LIBC_INLINE void Port<T, S>::send(F fill) {
376378 uint32_t in = owns_buffer ? out ^ T : process.load_inbox (index);
@@ -379,14 +381,14 @@ LIBC_INLINE void Port<T, S>::send(F fill) {
379381 process.wait_for_ownership (index, out, in);
380382
381383 // Apply the \p fill function to initialize the buffer and release the memory.
382- process. invoke_rpc (fill, process.packet [index]);
384+ invoke_rpc (fill, process.packet [index]);
383385 out = process.invert_outbox (index, out);
384386 owns_buffer = false ;
385387 receive = false ;
386388}
387389
388390// / Applies \p use to the shared buffer and acknowledges the send.
389- template <bool T, uint32_t S>
391+ template <bool T, typename S>
390392template <typename U>
391393LIBC_INLINE void Port<T, S>::recv(U use) {
392394 // We only exchange ownership of the buffer during a receive if we are waiting
@@ -402,13 +404,13 @@ LIBC_INLINE void Port<T, S>::recv(U use) {
402404 process.wait_for_ownership (index, out, in);
403405
404406 // Apply the \p use function to read the memory out of the buffer.
405- process. invoke_rpc (use, process.packet [index]);
407+ invoke_rpc (use, process.packet [index]);
406408 receive = true ;
407409 owns_buffer = true ;
408410}
409411
410412// / Combines a send and receive into a single function.
411- template <bool T, uint32_t S>
413+ template <bool T, typename S>
412414template <typename F, typename U>
413415LIBC_INLINE void Port<T, S>::send_and_recv(F fill, U use) {
414416 send (fill);
@@ -418,7 +420,7 @@ LIBC_INLINE void Port<T, S>::send_and_recv(F fill, U use) {
418420// / Combines a receive and send operation into a single function. The \p work
419421// / function modifies the buffer in-place and the send is only used to initiate
420422// / the copy back.
421- template <bool T, uint32_t S>
423+ template <bool T, typename S>
422424template <typename W>
423425LIBC_INLINE void Port<T, S>::recv_and_send(W work) {
424426 recv (work);
@@ -427,7 +429,7 @@ LIBC_INLINE void Port<T, S>::recv_and_send(W work) {
427429
428430// / Helper routine to simplify the interface when sending from the GPU using
429431// / thread private pointers to the underlying value.
430- template <bool T, uint32_t S>
432+ template <bool T, typename S>
431433LIBC_INLINE void Port<T, S>::send_n(const void *src, uint64_t size) {
432434 static_assert (is_process_gpu (), " Only valid when running on the GPU" );
433435 const void **src_ptr = &src;
@@ -437,7 +439,7 @@ LIBC_INLINE void Port<T, S>::send_n(const void *src, uint64_t size) {
437439
438440// / Sends an arbitrarily sized data buffer \p src across the shared channel in
439441// / multiples of the packet length.
440- template <bool T, uint32_t S>
442+ template <bool T, typename S>
441443LIBC_INLINE void Port<T, S>::send_n(const void *const *src, uint64_t *size) {
442444 uint64_t num_sends = 0 ;
443445 send ([&](Buffer *buffer, uint32_t id) {
@@ -467,7 +469,7 @@ LIBC_INLINE void Port<T, S>::send_n(const void *const *src, uint64_t *size) {
467469// / Receives an arbitrarily sized data buffer across the shared channel in
468470// / multiples of the packet length. The \p alloc function is called with the
469471// / size of the data so that we can initialize the size of the \p dst buffer.
470- template <bool T, uint32_t S>
472+ template <bool T, typename S>
471473template <typename A>
472474LIBC_INLINE void Port<T, S>::recv_n(void **dst, uint64_t *size, A &&alloc) {
473475 uint64_t num_recvs = 0 ;
0 commit comments