ornladios · pnorbert · May 21, 2025 · May 19, 2025 · May 21, 2025 · May 21, 2025
diff --git a/examples/hello/bpStepsWriteReadCuda/bpStepsWriteReadCupy-bindings.py b/examples/hello/bpStepsWriteReadCuda/bpStepsWriteReadCupy-bindings.py
@@ -69,5 +69,5 @@ def read_array(fileName, nSteps):
           pinned_mempool.n_free_blocks())
 
     nSteps = 2
-    write_array("StepsWriteReadCuPy.bp", nSteps, gpuArray, cpuArray)
-    read_array("StepsWriteReadCuPy.bp", nSteps)
+    write_array("StepsCuPyBindings.bp", nSteps, gpuArray, cpuArray)
+    read_array("StepsCuPyBindings.bp", nSteps)
diff --git a/examples/hello/bpStepsWriteReadCuda/bpStepsWriteReadCupy.py b/examples/hello/bpStepsWriteReadCuda/bpStepsWriteReadCupy.py
@@ -0,0 +1,40 @@
+import numpy as np
+import cupy as cp
+from adios2 import Stream
+
+def write_array(fileName, nSteps, gpuArray, cpuArray):
+    with Stream(fileName, "w") as wStream:
+        for _ in wStream.steps(nSteps):
+            wStream.write("cpuArray", cpuArray, cpuArray.shape,
+                          [0] * len(cpuArray.shape), cpuArray.shape)
+            wStream.write("gpuArray", gpuArray, gpuArray.shape,
+                          [0] * len(gpuArray.shape), gpuArray.shape)
+            # update buffers
+            gpuArray = gpuArray * 2
+            cpuArray = cpuArray + 1
+    print("Write to file %s: %s data from GPU and %s data from CPU" % (
+        fileName, gpuArray.shape, cpuArray.shape))
+
+def read_array(fileName, readGpuShape, readCpuShape):
+    with Stream(fileName, "r") as rStream:
+        for _ in rStream.steps():
+            step = rStream.current_step()
+            cpuBuffer = np.zeros(readCpuShape, dtype=np.float32)
+            rStream.read("cpuArray", cpuBuffer)
+
+            gpuBuffer = cp.zeros(readGpuShape, dtype=np.float32)
+            rStream.read("gpuArray", buffer=gpuBuffer)
+
+            print("Step %d: read GPU data\n %s" % (step, gpuBuffer))
+            print("Step %d: read CPU data\n %s" % (step, cpuBuffer))
+
+
+if __name__ == '__main__':
+    cpuArray = np.array([[0, 1.0, 2.0], [3.0, 4.0, 5.0]], dtype=np.float32)
+    gpuArray = cp.array([[0, 1.0, 2.0], [3.0, 4.0, 5.0]], dtype=np.float32)
+    print("Array allocation: ", gpuArray.device)
+    print("Bytes required to store the gpu array", gpuArray.nbytes)
+
+    nSteps = 2
+    write_array("StepsWriteReadTorch.bp", nSteps, gpuArray, cpuArray)
+    read_array("StepsWriteReadTorch.bp", gpuArray.shape, cpuArray.shape)
diff --git a/examples/hello/bpStepsWriteReadCuda/bpStepsWriteReadTorch-bindings.py b/examples/hello/bpStepsWriteReadCuda/bpStepsWriteReadTorch-bindings.py
@@ -62,5 +62,5 @@ def read_array(fileName, nSteps):
     print("Bytes required to store the gpu array", gpuArray.nbytes)
 
     nSteps = 2
-    write_array("StepsWriteReadTorch.bp", nSteps, gpuArray, cpuArray)
-    read_array("StepsWriteReadTorch.bp", nSteps)
+    write_array("StepsTorchBindings.bp", nSteps, gpuArray, cpuArray)
+    read_array("StepsTorchBindings.bp", nSteps)
diff --git a/examples/hello/bpStepsWriteReadCuda/bpStepsWriteReadTorch.py b/examples/hello/bpStepsWriteReadCuda/bpStepsWriteReadTorch.py
@@ -0,0 +1,43 @@
+import numpy as np
+import torch
+from adios2 import Stream, FileReader
+
+def write_array(fileName, nSteps, gpuArray, cpuArray):
+    with Stream(fileName, "w") as wStream:
+        for _ in wStream.steps(nSteps):
+            wStream.write("cpuArray", cpuArray, cpuArray.shape,
+                          [0] * len(cpuArray.shape), cpuArray.shape)
+            wStream.write("gpuArray", gpuArray, gpuArray.shape,
+                          [0] * len(gpuArray.shape), gpuArray.shape)
+            # update buffers
+            gpuArray = gpuArray * 2
+            cpuArray = cpuArray + 1
+    print("Write to file %s: %s data from GPU and %s data from CPU" % (
+        fileName, gpuArray.shape, cpuArray.shape))
+
+def read_array(fileName, readGpuShape, readCpuShape):
+    with Stream(fileName, "r") as rStream:
+        for _ in rStream.steps():
+            step = rStream.current_step()
+            cpuBuffer = np.zeros(readCpuShape, dtype=np.float32)
+            rStream.read_in_buffer("cpuArray", cpuBuffer)
+
+            cuda0 = torch.device('cuda:0')
+            gpuBuffer = torch.zeros(readGpuShape, dtype=torch.float32, device=cuda0)
+            rStream.read_in_buffer("gpuArray", gpuBuffer)
+
+            print("Step %d: read GPU data\n %s" % (step, gpuBuffer))
+            print("Step %d: read CPU data\n %s" % (step, cpuBuffer))
+
+
+if __name__ == '__main__':
+    cpuArray = np.array([[0, 1.0, 2.0], [3.0, 4.0, 5.0]], dtype=np.float32)
+    cuda0 = torch.device('cuda:0')
+    gpuArray = torch.tensor([[0, 1.0, 2.0], [3.0, 4.0, 5.0]],
+                            dtype=torch.float32, device=cuda0)
+    print("Array allocation: ", gpuArray.device)
+    print("Bytes required to store the gpu array", gpuArray.nbytes)
+
+    nSteps = 2
+    write_array("StepsWriteReadTorch.bp", nSteps, gpuArray, cpuArray)
+    read_array("StepsWriteReadTorch.bp", gpuArray.shape, cpuArray.shape)
@@ -5,6 +5,21 @@
 
 import numpy as np
 
+# pylint: disable=duplicate-code
+try:
+    import cupy as cp
+
+    ADIOS2_HAS_CUPY = True
+except ImportError:
+    ADIOS2_HAS_CUPY = False
+try:
+    import torch
+
+    ADIOS2_HAS_TORCH = True
+except ImportError:
+    ADIOS2_HAS_TORCH = False
+# pylint: enable=duplicate-code
+
 from adios2 import bindings
 
 
@@ -110,6 +125,14 @@ def put(self, variable, content, mode=bindings.Mode.Deferred):
             content = np.array([content])
             self.impl.Put(variable.impl, content)
         else:
+            if ADIOS2_HAS_CUPY:
+                if isinstance(content, cp.ndarray):
+                    self.impl.Put(variable.impl, content.data.ptr, mode)
+                    return
+            if ADIOS2_HAS_TORCH:
+                if isinstance(content, torch.Tensor):
+                    self.impl.Put(variable.impl, content.data_ptr(), mode)
+                    return
             raise ValueError
 
     def perform_puts(self):
@@ -140,6 +163,14 @@ def get(self, variable, content=None, mode=bindings.Mode.Sync):
             self.impl.Get(variable.impl, content, mode)
             return None
 
+        if ADIOS2_HAS_CUPY:
+            if isinstance(content, cp.ndarray):
+                self.impl.Get(variable.impl, content.data.ptr, mode)
+                return None
+        if ADIOS2_HAS_TORCH:
+            if isinstance(content, torch.Tensor):
+                self.impl.Get(variable.impl, content.data_ptr(), mode)
+                return None
         return self.impl.Get(variable.impl, mode)
 
     def perform_gets(self):