juntyr
diff --git a/‎examples/print/src/main.rs
Lines changed: 64 additions & 10 deletions b/‎examples/print/src/main.rs
Lines changed: 64 additions & 10 deletions
diff --git a/‎rust-cuda-kernel/src/kernel/wrapper/generate/cuda_generic_function.rs
Lines changed: 3 additions & 0 deletions b/‎rust-cuda-kernel/src/kernel/wrapper/generate/cuda_generic_function.rs
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/host/mod.rs
Lines changed: 15 additions & 17 deletions b/‎src/host/mod.rs
Lines changed: 15 additions & 17 deletions
@@ -23,7 +23,15 @@ pub enum Action {
 
 #[rust_cuda::kernel::kernel(use link! for impl)]
 #[kernel(allow(ptx::local_memory_usage))]
-pub fn kernel(action: rust_cuda::kernel::param::PerThreadShallowCopy<Action>) {
+pub fn kernel<'a>(
+    action: rust_cuda::kernel::param::PerThreadShallowCopy<Action>,
+    _unused: &mut rust_cuda::kernel::param::DeepPerThreadBorrow<
+        rust_cuda::utils::aliasing::SplitSliceOverCudaThreadsConstStride<
+            rust_cuda::utils::exchange::buffer::CudaExchangeBuffer<u8, true, true>,
+            1,
+        >,
+    >,
+) {
     match action {
         Action::Print => rust_cuda::device::utils::println!("println! from CUDA kernel"),
         Action::Panic => panic!("panic! from CUDA kernel"),
@@ -36,8 +44,10 @@ pub fn kernel(action: rust_cuda::kernel::param::PerThreadShallowCopy<Action>) {
 #[cfg(not(target_os = "cuda"))]
 fn main() -> rust_cuda::deps::rustacuda::error::CudaResult<()> {
     // Link the non-generic CUDA kernel
-    struct KernelPtx;
-    link! { impl kernel for KernelPtx }
+    struct KernelPtx<'a> {
+        _marker: &'a [u8],
+    }
+    link! { impl kernel<'a> for KernelPtx }
 
     // Initialize the CUDA API
     rust_cuda::deps::rustacuda::init(rust_cuda::deps::rustacuda::CudaFlags::empty())?;
@@ -69,13 +79,57 @@ fn main() -> rust_cuda::deps::rustacuda::error::CudaResult<()> {
         ptx_jit: false,
     };
 
-    // Launch the CUDA kernel on the stream and synchronise to its completion
-    println!("Launching print kernel ...");
-    kernel.launch1(&stream, &config, Action::Print)?;
-    println!("Launching panic kernel ...");
-    kernel.launch1(&stream, &config, Action::Panic)?;
-    println!("Launching alloc error kernel ...");
-    kernel.launch1(&stream, &config, Action::AllocError)?;
+    let mut slice = rust_cuda::utils::aliasing::SplitSliceOverCudaThreadsConstStride::<_, 1>::new(
+        rust_cuda::utils::exchange::buffer::CudaExchangeBuffer::<_, true, true>::from_vec(vec![
+            1_u8, 2, 3,
+        ])?,
+    );
+
+    rust_cuda::lend::LendToCuda::lend_to_cuda_mut(&mut slice, |mut slice| {
+        // let mut slice_async = slice.as_async(&stream);
+
+        // Launch the CUDA kernel on the stream and synchronise to its completion
+
+        println!("Launching print kernel ...");
+        {
+            let mut slice_async = slice.as_async(&stream);
+            let slice_async_mut = slice_async.proj_mut();
+
+            let capture = rust_cuda::kernel::Capture;
+            let r#async =
+                kernel.launch2_async(&stream, &config, &capture, Action::Print, slice_async_mut)?;
+            r#async.synchronize()?;
+        }
+
+        println!("Launching panic kernel ...");
+        {
+            let mut slice_async = slice.as_async(&stream);
+            let slice_async_mut = slice_async.proj_mut();
+
+            let capture = rust_cuda::kernel::Capture;
+            let r#async =
+                kernel.launch2_async(&stream, &config, &capture, Action::Panic, slice_async_mut)?;
+            r#async.synchronize()?;
+        }
+
+        println!("Launching alloc error kernel ...");
+        {
+            let mut slice_async = slice.as_async(&stream);
+            let slice_async_mut = slice_async.proj_mut();
+
+            let capture = rust_cuda::kernel::Capture;
+            let r#async = kernel.launch2_async(
+                &stream,
+                &config,
+                &capture,
+                Action::AllocError,
+                slice_async_mut,
+            )?;
+            r#async.synchronize()?;
+        }
+
+        Ok(())
+    })?;
 
     Ok(())
 }
 
@@ -82,6 +82,9 @@ pub(in super::super) fn quote_cuda_generic_function(
         )
         .collect::<Vec<_>>();
 
+    let generic_start_token = generic_start_token.unwrap_or_default();
+    let generic_close_token = generic_close_token.unwrap_or_default();
+
     quote! {
         #[cfg(target_os = "cuda")]
         #(#func_attrs)*
 
@@ -22,7 +22,7 @@ use crate::{
             DeviceConstPointer, DeviceConstRef, DeviceMutPointer, DeviceMutRef, DeviceOwnedPointer,
             DeviceOwnedRef,
         },
-        r#async::{Async, NoCompletion},
+        r#async::{Async, AsyncProj, NoCompletion},
     },
 };
 
@@ -194,17 +194,16 @@ impl<'a, T: PortableBitSemantics + TypeGraphLayout> HostAndDeviceMutRef<'a, T> {
     pub fn as_async<'b, 'stream>(
         &'b mut self,
         stream: &'stream Stream,
-    ) -> Async<'b, 'stream, HostAndDeviceMutRef<'b, T>, NoCompletion>
+    ) -> AsyncProj<'b, 'stream, HostAndDeviceMutRef<'b, T>>
     where
         'a: 'b,
     {
-        Async::ready(
-            HostAndDeviceMutRef {
-                device_box: self.device_box,
-                host_ref: self.host_ref,
-            },
-            stream,
-        )
+        let _ = stream;
+
+        AsyncProj::new(HostAndDeviceMutRef {
+            device_box: self.device_box,
+            host_ref: self.host_ref,
+        })
     }
 }
 
@@ -293,17 +292,16 @@ impl<'a, T: PortableBitSemantics + TypeGraphLayout> HostAndDeviceConstRef<'a, T>
     pub const fn as_async<'b, 'stream>(
         &'b self,
         stream: &'stream Stream,
-    ) -> Async<'b, 'stream, HostAndDeviceConstRef<'b, T>, NoCompletion>
+    ) -> AsyncProj<'b, 'stream, HostAndDeviceConstRef<'b, T>>
     where
         'a: 'b,
     {
-        Async::ready(
-            HostAndDeviceConstRef {
-                device_box: self.device_box,
-                host_ref: self.host_ref,
-            },
-            stream,
-        )
+        let _ = stream;
+
+        AsyncProj::new(HostAndDeviceConstRef {
+            device_box: self.device_box,
+            host_ref: self.host_ref,
+        })
     }
 }
Original file line number	Diff line number	Diff line change
`@@ -82,6 +82,9 @@ pub(in super::super) fn quote_cuda_generic_function(`
`82`	`82`	`)`
`83`	`83`	`.collect::<Vec<_>>();`
`84`	`84`
	`85`	`+ let generic_start_token = generic_start_token.unwrap_or_default();`
	`86`	`+ let generic_close_token = generic_close_token.unwrap_or_default();`
	`87`	`+`
`85`	`88`	`quote! {`
`86`	`89`	`#[cfg(target_os = "cuda")]`
`87`	`90`	`#(#func_attrs)*`