From d7d0b1511c00b4dbf4e2a702335176012f289a05 Mon Sep 17 00:00:00 2001 From: Adam Basfop Cavendish Date: Tue, 1 Apr 2025 19:53:11 +0800 Subject: [PATCH] refactor(cust_raw): consolidate CUDA, cuDNN, OptiX bindgen and remove find_cuda_helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Consolidation of bindgen related "*-sys" packages - Remove the common dependency of `find_cuda_helper`. Use the cargo metadata mechanism instead. - Merged all CUDA bindgen-generated code into the cust_raw crate for simplicity and maintainability. - Add CUDA Runtime API bindgen support. 2. cuDNN and OptiX Integration - Split cudnn into cudnn (high-level API) and cudnn-sys (low-level bindgens) for better abstraction. - Split optix into optix (high-level API) and optix-sys (low-level bindgens) for better abstraction. 3. CUDA 12+ Support - Updated cust to support CUDA versions >= 12. - Added compatibility for CUDA 12.3+ graph API changes: - Renamed cuGraphKernelNodeGetParams → cuGraphKernelNodeGetParams_v2. - Enabled conditional node support for CUDA >= 12.3. 4. Temporarily disable cuDNN in CI - Windows CI pipelines have no cuDNN support yet. Co-authored-by: Adam Basfop Cavendish Co-authored-by: Jorge Ortega --- .github/workflows/rust.yml | 6 +- Cargo.toml | 3 +- crates/blastoff/Cargo.toml | 2 +- crates/blastoff/src/context.rs | 57 +- crates/blastoff/src/error.rs | 51 +- crates/blastoff/src/lib.rs | 28 +- crates/blastoff/src/raw/level1.rs | 7 +- crates/blastoff/src/raw/level3.rs | 7 +- crates/cublas_sys/Cargo.toml | 9 - crates/cublas_sys/bindgen.sh | 16 - crates/cublas_sys/build.rs | 5 - crates/cublas_sys/src/cublasLt.rs | 684 -- crates/cublas_sys/src/cublasXt.rs | 831 -- crates/cublas_sys/src/cublas_v2.rs | 3779 ------- crates/cublas_sys/src/lib.rs | 10 - crates/cuda_builder/Cargo.toml | 1 - crates/cuda_builder/src/lib.rs | 23 +- crates/cuda_std/src/shared.rs | 5 +- crates/cudnn-sys/Cargo.toml | 15 + crates/cudnn-sys/build/cudnn_sdk.rs | 92 + crates/cudnn-sys/build/main.rs | 64 + crates/cudnn-sys/build/wrapper.h | 1 + crates/cudnn-sys/src/lib.rs | 5 + crates/cudnn/Cargo.toml | 1 + crates/cudnn/bindgen.sh | 9 - crates/cudnn/build.rs | 13 +- .../src/activation/activation_descriptor.rs | 11 +- .../cudnn/src/activation/activation_mode.rs | 4 +- crates/cudnn/src/activation/mod.rs | 15 +- .../src/attention/attention_descriptor.rs | 14 +- .../src/attention/attention_weights_kind.rs | 4 +- crates/cudnn/src/attention/mod.rs | 18 +- crates/cudnn/src/attention/seq_data_axis.rs | 16 +- .../src/attention/seq_data_descriptor.rs | 11 +- crates/cudnn/src/backend/conv_bwd_data.rs | 36 +- crates/cudnn/src/backend/conv_bwd_filter.rs | 36 +- crates/cudnn/src/backend/conv_cfg.rs | 34 +- crates/cudnn/src/backend/conv_fwd.rs | 36 +- crates/cudnn/src/backend/descriptor.rs | 29 +- crates/cudnn/src/backend/engine.rs | 12 +- crates/cudnn/src/backend/engine_cfg.rs | 8 +- crates/cudnn/src/backend/engine_heuristic.rs | 8 +- crates/cudnn/src/backend/execution_plan.rs | 8 +- crates/cudnn/src/backend/graph.rs | 12 +- crates/cudnn/src/backend/matmul.rs | 20 +- crates/cudnn/src/backend/matmul_cfg.rs | 10 +- crates/cudnn/src/backend/operation.rs | 5 +- crates/cudnn/src/backend/pointwise.rs | 36 +- crates/cudnn/src/backend/pointwise_cfg.rs | 46 +- crates/cudnn/src/backend/pointwise_mode.rs | 4 +- crates/cudnn/src/backend/reduction.rs | 16 +- crates/cudnn/src/backend/reduction_cfg.rs | 18 +- crates/cudnn/src/backend/reduction_mode.rs | 22 +- crates/cudnn/src/backend/tensor.rs | 38 +- crates/cudnn/src/context.rs | 16 +- .../cudnn/src/convolution/convolution_algo.rs | 129 +- .../src/convolution/convolution_descriptor.rs | 16 +- .../cudnn/src/convolution/convolution_mode.rs | 12 +- .../src/convolution/filter_descriptor.rs | 15 +- crates/cudnn/src/convolution/mod.rs | 34 +- crates/cudnn/src/data_type.rs | 14 +- crates/cudnn/src/determinism.rs | 18 +- .../cudnn/src/dropout/dropout_descriptor.rs | 5 +- crates/cudnn/src/dropout/mod.rs | 32 +- crates/cudnn/src/error.rs | 89 +- crates/cudnn/src/lib.rs | 1 - crates/cudnn/src/math_type.rs | 19 +- crates/cudnn/src/nan_propagation.rs | 11 +- crates/cudnn/src/op/mod.rs | 19 +- crates/cudnn/src/op/op_tensor_descriptor.rs | 21 +- crates/cudnn/src/op/op_tensor_op.rs | 6 +- crates/cudnn/src/pooling/mod.rs | 15 +- .../cudnn/src/pooling/pooling_descriptor.rs | 11 +- crates/cudnn/src/pooling/pooling_mode.rs | 4 +- crates/cudnn/src/reduction/indices_type.rs | 4 +- crates/cudnn/src/reduction/mod.rs | 20 +- crates/cudnn/src/reduction/reduce_indices.rs | 4 +- crates/cudnn/src/reduction/reduce_op.rs | 4 +- .../src/reduction/reduction_descriptor.rs | 10 +- crates/cudnn/src/rnn/forward_mode.rs | 9 +- crates/cudnn/src/rnn/mod.rs | 20 +- crates/cudnn/src/rnn/rnn_algo.rs | 13 +- crates/cudnn/src/rnn/rnn_bias_mode.rs | 13 +- crates/cudnn/src/rnn/rnn_clip_mode.rs | 9 +- crates/cudnn/src/rnn/rnn_data_descriptor.rs | 11 +- crates/cudnn/src/rnn/rnn_data_layout.rs | 17 +- crates/cudnn/src/rnn/rnn_descriptor.rs | 18 +- crates/cudnn/src/rnn/rnn_direction_mode.rs | 4 +- crates/cudnn/src/rnn/rnn_input_mode.rs | 9 +- crates/cudnn/src/rnn/rnn_mode.rs | 13 +- crates/cudnn/src/softmax/mod.rs | 11 +- crates/cudnn/src/softmax/softmax_algo.rs | 4 +- crates/cudnn/src/softmax/softmax_mode.rs | 4 +- crates/cudnn/src/sys.rs | 3862 ------- crates/cudnn/src/tensor/tensor_descriptor.rs | 19 +- crates/cudnn/src/tensor/tensor_format.rs | 12 +- crates/cudnn/src/w_grad_mode.rs | 4 +- crates/cust/Cargo.toml | 4 +- crates/cust/build.rs | 12 +- crates/cust/src/context/legacy.rs | 66 +- crates/cust/src/context/mod.rs | 79 +- crates/cust/src/device.rs | 30 +- crates/cust/src/error.rs | 8 +- crates/cust/src/event.rs | 13 +- crates/cust/src/external.rs | 24 +- crates/cust/src/function.rs | 27 +- crates/cust/src/graph.rs | 134 +- crates/cust/src/lib.rs | 8 +- crates/cust/src/link.rs | 23 +- crates/cust/src/memory/array.rs | 54 +- crates/cust/src/memory/device/device_box.rs | 59 +- .../cust/src/memory/device/device_buffer.rs | 27 +- crates/cust/src/memory/device/device_slice.rs | 76 +- crates/cust/src/memory/malloc.rs | 32 +- crates/cust/src/memory/mod.rs | 46 +- crates/cust/src/memory/pointer.rs | 6 +- crates/cust/src/memory/unified.rs | 53 +- crates/cust/src/module.rs | 64 +- crates/cust/src/stream.rs | 31 +- crates/cust/src/surface.rs | 2 +- crates/cust/src/texture.rs | 39 +- crates/cust_raw/Cargo.toml | 27 +- crates/cust_raw/README.md | 6 - crates/cust_raw/bindgen.sh | 21 - crates/cust_raw/build.rs | 3 - crates/cust_raw/build/cublas_wrapper.h | 1 + crates/cust_raw/build/cublaslt_wrapper.h | 1 + crates/cust_raw/build/cublasxt_wrapper.h | 1 + crates/cust_raw/build/cuda_sdk.rs | 342 + .../{wrapper.h => build/driver_wrapper.h} | 1 - crates/cust_raw/build/main.rs | 247 + .../cust_raw/build/nvptx_compiler_wrapper.h | 1 + crates/cust_raw/build/nvvm_wrapper.h | 1 + crates/cust_raw/build/runtime_wrapper.h | 3 + crates/cust_raw/src/cublas_sys.rs | 5 + crates/cust_raw/src/cublaslt_sys.rs | 5 + crates/cust_raw/src/cublasxt_sys.rs | 5 + crates/cust_raw/src/cuda.rs | 8989 ----------------- crates/cust_raw/src/driver_sys.rs | 5 + crates/cust_raw/src/lib.rs | 18 +- crates/cust_raw/src/nvptx_compiler_sys.rs | 5 + crates/cust_raw/src/nvvm_sys.rs | 7 + crates/cust_raw/src/runtime_sys.rs | 5 + crates/find_cuda_helper/Cargo.toml | 11 - crates/find_cuda_helper/src/lib.rs | 226 - crates/nvvm/Cargo.toml | 4 +- crates/nvvm/build.rs | 6 - crates/nvvm/src/lib.rs | 81 +- crates/nvvm/src/sys.rs | 305 - crates/optix-sys/Cargo.toml | 16 + crates/optix-sys/build/main.rs | 112 + crates/optix-sys/build/optix_sdk.rs | 88 + .../{optix => optix-sys/build}/optix_stubs.c | 0 .../build/wrapper.h} | 3 +- crates/optix-sys/src/lib.rs | 10 + crates/optix-sys/src/optix_sys.rs | 5 + crates/optix-sys/src/stub.rs | 5 + crates/optix/Cargo.toml | 19 +- crates/optix/build.rs | 97 +- .../optix/examples/ex02_pipeline/Cargo.toml | 6 +- crates/optix/examples/ex02_pipeline/build.rs | 31 +- crates/optix/examples/ex03_window/Cargo.toml | 8 +- crates/optix/examples/ex03_window/build.rs | 28 +- crates/optix/examples/ex04_mesh/Cargo.toml | 1 - crates/optix/src/acceleration.rs | 490 +- crates/optix/src/context.rs | 32 +- crates/optix/src/denoiser.rs | 60 +- crates/optix/src/error.rs | 173 +- crates/optix/src/lib.rs | 10 +- crates/optix/src/pipeline.rs | 277 +- crates/optix/src/shader_binding_table.rs | 27 +- crates/optix/src/sys.rs | 59 - crates/ptx_compiler/Cargo.toml | 4 +- crates/ptx_compiler/build.rs | 3 - crates/ptx_compiler/src/lib.rs | 66 +- crates/ptx_compiler/src/sys.rs | 236 - crates/rustc_codegen_nvvm/Cargo.toml | 2 +- crates/rustc_codegen_nvvm/src/nvvm.rs | 34 +- 178 files changed, 2992 insertions(+), 20991 deletions(-) delete mode 100644 crates/cublas_sys/Cargo.toml delete mode 100644 crates/cublas_sys/bindgen.sh delete mode 100644 crates/cublas_sys/build.rs delete mode 100644 crates/cublas_sys/src/cublasLt.rs delete mode 100644 crates/cublas_sys/src/cublasXt.rs delete mode 100644 crates/cublas_sys/src/cublas_v2.rs delete mode 100644 crates/cublas_sys/src/lib.rs create mode 100644 crates/cudnn-sys/Cargo.toml create mode 100644 crates/cudnn-sys/build/cudnn_sdk.rs create mode 100644 crates/cudnn-sys/build/main.rs create mode 100644 crates/cudnn-sys/build/wrapper.h create mode 100644 crates/cudnn-sys/src/lib.rs delete mode 100755 crates/cudnn/bindgen.sh delete mode 100644 crates/cudnn/src/sys.rs delete mode 100644 crates/cust_raw/README.md delete mode 100644 crates/cust_raw/bindgen.sh delete mode 100644 crates/cust_raw/build.rs create mode 100644 crates/cust_raw/build/cublas_wrapper.h create mode 100644 crates/cust_raw/build/cublaslt_wrapper.h create mode 100644 crates/cust_raw/build/cublasxt_wrapper.h create mode 100644 crates/cust_raw/build/cuda_sdk.rs rename crates/cust_raw/{wrapper.h => build/driver_wrapper.h} (77%) create mode 100644 crates/cust_raw/build/main.rs create mode 100644 crates/cust_raw/build/nvptx_compiler_wrapper.h create mode 100644 crates/cust_raw/build/nvvm_wrapper.h create mode 100644 crates/cust_raw/build/runtime_wrapper.h create mode 100644 crates/cust_raw/src/cublas_sys.rs create mode 100644 crates/cust_raw/src/cublaslt_sys.rs create mode 100644 crates/cust_raw/src/cublasxt_sys.rs delete mode 100644 crates/cust_raw/src/cuda.rs create mode 100644 crates/cust_raw/src/driver_sys.rs create mode 100644 crates/cust_raw/src/nvptx_compiler_sys.rs create mode 100644 crates/cust_raw/src/nvvm_sys.rs create mode 100644 crates/cust_raw/src/runtime_sys.rs delete mode 100644 crates/find_cuda_helper/Cargo.toml delete mode 100644 crates/find_cuda_helper/src/lib.rs delete mode 100644 crates/nvvm/build.rs delete mode 100644 crates/nvvm/src/sys.rs create mode 100644 crates/optix-sys/Cargo.toml create mode 100644 crates/optix-sys/build/main.rs create mode 100644 crates/optix-sys/build/optix_sdk.rs rename crates/{optix => optix-sys/build}/optix_stubs.c (100%) rename crates/{optix/src/optix_wrapper.h => optix-sys/build/wrapper.h} (98%) create mode 100644 crates/optix-sys/src/lib.rs create mode 100644 crates/optix-sys/src/optix_sys.rs create mode 100644 crates/optix-sys/src/stub.rs delete mode 100644 crates/optix/src/sys.rs delete mode 100644 crates/ptx_compiler/build.rs delete mode 100644 crates/ptx_compiler/src/sys.rs diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index dafff383..9ba6a215 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -102,7 +102,7 @@ jobs: run: cargo fmt --all -- --check - name: Build - run: cargo build --workspace --exclude "optix" --exclude "path_tracer" --exclude "denoiser" --exclude "add" --exclude "ex*" + run: cargo build --workspace --exclude "optix*" --exclude "path_tracer" --exclude "denoiser" --exclude "add" --exclude "ex*" --exclude "cudnn*" # Don't currently test because many tests rely on the system having a CUDA GPU # - name: Test @@ -112,9 +112,9 @@ jobs: if: contains(matrix.os, 'ubuntu') env: RUSTFLAGS: -Dwarnings - run: cargo clippy --workspace --exclude "optix" --exclude "path_tracer" --exclude "denoiser" --exclude "add" --exclude "ex*" + run: cargo clippy --workspace --exclude "optix*" --exclude "path_tracer" --exclude "denoiser" --exclude "add" --exclude "ex*" --exclude "cudnn*" - name: Check documentation env: RUSTDOCFLAGS: -Dwarnings - run: cargo doc --workspace --all-features --document-private-items --no-deps --exclude "optix" --exclude "path_tracer" --exclude "denoiser" --exclude "add" --exclude "ex*" + run: cargo doc --workspace --all-features --document-private-items --no-deps --exclude "optix*" --exclude "path_tracer" --exclude "denoiser" --exclude "add" --exclude "ex*" --exclude "cudnn*" --exclude "cust_raw" diff --git a/Cargo.toml b/Cargo.toml index 430e7ef0..7dd130f5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,11 +11,10 @@ members = [ "examples/optix/*", "examples/cuda/cpu/*", "examples/cuda/gpu/*", - ] exclude = [ - "crates/optix/examples/common" + "crates/optix/examples/common", ] [profile.dev.package.rustc_codegen_nvvm] diff --git a/crates/blastoff/Cargo.toml b/crates/blastoff/Cargo.toml index 0d3f69c9..a3e377cc 100644 --- a/crates/blastoff/Cargo.toml +++ b/crates/blastoff/Cargo.toml @@ -7,8 +7,8 @@ repository = "https://github.com/Rust-GPU/Rust-CUDA" [dependencies] bitflags = "2.8" -cublas_sys = { version = "0.1", path = "../cublas_sys" } cust = { version = "0.3", path = "../cust", features = ["impl_num_complex"] } +cust_raw = { path = "../cust_raw", features = ["cublas"] } num-complex = "0.4.6" half = { version = "2.4.1", optional = true } diff --git a/crates/blastoff/src/context.rs b/crates/blastoff/src/context.rs index c3d87b0e..421e2d49 100644 --- a/crates/blastoff/src/context.rs +++ b/crates/blastoff/src/context.rs @@ -1,15 +1,20 @@ -use crate::{error::*, sys}; -use cust::stream::Stream; use std::ffi::CString; use std::mem::{self, MaybeUninit}; use std::os::raw::c_char; use std::ptr; -type Result = std::result::Result; +use cust::stream::Stream; +use cust_raw::cublas_sys; +use cust_raw::driver_sys; + +use super::error::DropResult; +use super::error::ToResult as _; + +type Result = std::result::Result; bitflags::bitflags! { /// Configures precision levels for the math in cuBLAS. - #[derive(Default)] + #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)] pub struct MathMode: u32 { /// Highest performance mode which uses compute and intermediate storage precisions /// with at least the same number of mantissa and exponent bits as requested. Will @@ -68,7 +73,7 @@ bitflags::bitflags! { /// - [Matrix Multiplication `gemm`](CublasContext::gemm) #[derive(Debug)] pub struct CublasContext { - pub(crate) raw: sys::v2::cublasHandle_t, + pub(crate) raw: cublas_sys::cublasHandle_t, } impl CublasContext { @@ -87,10 +92,10 @@ impl CublasContext { pub fn new() -> Result { let mut raw = MaybeUninit::uninit(); unsafe { - sys::v2::cublasCreate_v2(raw.as_mut_ptr()).to_result()?; - sys::v2::cublasSetPointerMode_v2( + cublas_sys::cublasCreate_v2(raw.as_mut_ptr()).to_result()?; + cublas_sys::cublasSetPointerMode_v2( raw.assume_init(), - sys::v2::cublasPointerMode_t::CUBLAS_POINTER_MODE_DEVICE, + cublas_sys::cublasPointerMode_t::CUBLAS_POINTER_MODE_DEVICE, ) .to_result()?; Ok(Self { @@ -107,7 +112,7 @@ impl CublasContext { unsafe { let inner = mem::replace(&mut ctx.raw, ptr::null_mut()); - match sys::v2::cublasDestroy_v2(inner).to_result() { + match cublas_sys::cublasDestroy_v2(inner).to_result() { Ok(()) => { mem::forget(ctx); Ok(()) @@ -122,7 +127,7 @@ impl CublasContext { let mut raw = MaybeUninit::::uninit(); unsafe { // getVersion can't fail - sys::v2::cublasGetVersion_v2(self.raw, raw.as_mut_ptr().cast()) + cublas_sys::cublasGetVersion_v2(self.raw, raw.as_mut_ptr().cast()) .to_result() .unwrap(); @@ -140,9 +145,9 @@ impl CublasContext { ) -> Result { unsafe { // cudaStream_t is the same as CUstream - sys::v2::cublasSetStream_v2( + cublas_sys::cublasSetStream_v2( self.raw, - mem::transmute::<*mut cust::sys::CUstream_st, *mut cublas_sys::v2::CUstream_st>( + mem::transmute::<*mut driver_sys::CUstream_st, *mut cublas_sys::CUstream_st>( stream.as_inner(), ), ) @@ -150,7 +155,7 @@ impl CublasContext { let res = func(self)?; // reset the stream back to NULL just in case someone calls with_stream, then drops the stream, and tries to // execute a raw sys function with the context's handle. - sys::v2::cublasSetStream_v2(self.raw, ptr::null_mut()).to_result()?; + cublas_sys::cublasSetStream_v2(self.raw, ptr::null_mut()).to_result()?; Ok(res) } } @@ -180,12 +185,12 @@ impl CublasContext { /// ``` pub fn set_atomics_mode(&self, allowed: bool) -> Result<()> { unsafe { - Ok(sys::v2::cublasSetAtomicsMode( + Ok(cublas_sys::cublasSetAtomicsMode( self.raw, if allowed { - sys::v2::cublasAtomicsMode_t::CUBLAS_ATOMICS_ALLOWED + cublas_sys::cublasAtomicsMode_t::CUBLAS_ATOMICS_ALLOWED } else { - sys::v2::cublasAtomicsMode_t::CUBLAS_ATOMICS_NOT_ALLOWED + cublas_sys::cublasAtomicsMode_t::CUBLAS_ATOMICS_NOT_ALLOWED }, ) .to_result()?) @@ -210,10 +215,10 @@ impl CublasContext { pub fn get_atomics_mode(&self) -> Result { let mut mode = MaybeUninit::uninit(); unsafe { - sys::v2::cublasGetAtomicsMode(self.raw, mode.as_mut_ptr()).to_result()?; + cublas_sys::cublasGetAtomicsMode(self.raw, mode.as_mut_ptr()).to_result()?; Ok(match mode.assume_init() { - sys::v2::cublasAtomicsMode_t::CUBLAS_ATOMICS_ALLOWED => true, - sys::v2::cublasAtomicsMode_t::CUBLAS_ATOMICS_NOT_ALLOWED => false, + cublas_sys::cublasAtomicsMode_t::CUBLAS_ATOMICS_ALLOWED => true, + cublas_sys::cublasAtomicsMode_t::CUBLAS_ATOMICS_NOT_ALLOWED => false, }) } } @@ -233,9 +238,9 @@ impl CublasContext { /// ``` pub fn set_math_mode(&self, math_mode: MathMode) -> Result<()> { unsafe { - Ok(sys::v2::cublasSetMathMode( + Ok(cublas_sys::cublasSetMathMode( self.raw, - mem::transmute::(math_mode.bits()), + mem::transmute::(math_mode.bits()), ) .to_result()?) } @@ -258,7 +263,7 @@ impl CublasContext { pub fn get_math_mode(&self) -> Result { let mut mode = MaybeUninit::uninit(); unsafe { - sys::v2::cublasGetMathMode(self.raw, mode.as_mut_ptr()).to_result()?; + cublas_sys::cublasGetMathMode(self.raw, mode.as_mut_ptr()).to_result()?; Ok(MathMode::from_bits(mode.assume_init() as u32) .expect("Invalid MathMode from cuBLAS")) } @@ -298,7 +303,7 @@ impl CublasContext { let path = log_file_name.map(|p| CString::new(p).expect("nul in log_file_name")); let path_ptr = path.map_or(ptr::null(), |s| s.as_ptr()); - sys::v2::cublasLoggerConfigure( + cublas_sys::cublasLoggerConfigure( enable as i32, log_to_stdout as i32, log_to_stderr as i32, @@ -315,7 +320,7 @@ impl CublasContext { /// /// The callback must not panic and unwind. pub unsafe fn set_logger_callback(callback: Option) { - sys::v2::cublasSetLoggerCallback(callback) + cublas_sys::cublasSetLoggerCallback(callback) .to_result() .unwrap(); } @@ -324,7 +329,7 @@ impl CublasContext { pub fn get_logger_callback() -> Option { let mut cb = MaybeUninit::uninit(); unsafe { - sys::v2::cublasGetLoggerCallback(cb.as_mut_ptr()) + cublas_sys::cublasGetLoggerCallback(cb.as_mut_ptr()) .to_result() .unwrap(); cb.assume_init() @@ -335,7 +340,7 @@ impl CublasContext { impl Drop for CublasContext { fn drop(&mut self) { unsafe { - sys::v2::cublasDestroy_v2(self.raw); + cublas_sys::cublasDestroy_v2(self.raw); } } } diff --git a/crates/blastoff/src/error.rs b/crates/blastoff/src/error.rs index 74c3058b..b4b16420 100644 --- a/crates/blastoff/src/error.rs +++ b/crates/blastoff/src/error.rs @@ -1,7 +1,8 @@ -use crate::sys; -use cust::error::CudaError; use std::{ffi::CStr, fmt::Display}; +use cust::error::CudaError; +use cust_raw::cublas_sys; + /// Result that contains the un-dropped value on error. pub type DropResult = std::result::Result<(), (CublasError, T)>; @@ -24,7 +25,7 @@ impl std::error::Error for CublasError {} impl Display for CublasError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { unsafe { - let ptr = sys::v2::cublasGetStatusString(self.into_raw()); + let ptr = cublas_sys::cublasGetStatusString(self.into_raw()); let cow = CStr::from_ptr(ptr).to_string_lossy(); f.write_str(cow.as_ref()) } @@ -35,39 +36,41 @@ pub trait ToResult { fn to_result(self) -> Result<(), CublasError>; } -impl ToResult for sys::v2::cublasStatus_t { +impl ToResult for cublas_sys::cublasStatus_t { fn to_result(self) -> Result<(), CublasError> { + use cust_raw::cublas_sys::cublasStatus_t::*; use CublasError::*; Err(match self { - sys::v2::cublasStatus_t::CUBLAS_STATUS_SUCCESS => return Ok(()), - sys::v2::cublasStatus_t::CUBLAS_STATUS_NOT_INITIALIZED => NotInitialized, - sys::v2::cublasStatus_t::CUBLAS_STATUS_ALLOC_FAILED => AllocFailed, - sys::v2::cublasStatus_t::CUBLAS_STATUS_INVALID_VALUE => InvalidValue, - sys::v2::cublasStatus_t::CUBLAS_STATUS_ARCH_MISMATCH => ArchMismatch, - sys::v2::cublasStatus_t::CUBLAS_STATUS_MAPPING_ERROR => MappingError, - sys::v2::cublasStatus_t::CUBLAS_STATUS_EXECUTION_FAILED => ExecutionFailed, - sys::v2::cublasStatus_t::CUBLAS_STATUS_INTERNAL_ERROR => InternalError, - sys::v2::cublasStatus_t::CUBLAS_STATUS_NOT_SUPPORTED => NotSupported, - sys::v2::cublasStatus_t::CUBLAS_STATUS_LICENSE_ERROR => LicenseError, + CUBLAS_STATUS_SUCCESS => return Ok(()), + CUBLAS_STATUS_NOT_INITIALIZED => NotInitialized, + CUBLAS_STATUS_ALLOC_FAILED => AllocFailed, + CUBLAS_STATUS_INVALID_VALUE => InvalidValue, + CUBLAS_STATUS_ARCH_MISMATCH => ArchMismatch, + CUBLAS_STATUS_MAPPING_ERROR => MappingError, + CUBLAS_STATUS_EXECUTION_FAILED => ExecutionFailed, + CUBLAS_STATUS_INTERNAL_ERROR => InternalError, + CUBLAS_STATUS_NOT_SUPPORTED => NotSupported, + CUBLAS_STATUS_LICENSE_ERROR => LicenseError, }) } } impl CublasError { - pub fn into_raw(self) -> sys::v2::cublasStatus_t { + pub fn into_raw(self) -> cublas_sys::cublasStatus_t { + use cust_raw::cublas_sys::cublasStatus_t::*; use CublasError::*; match self { - NotInitialized => sys::v2::cublasStatus_t::CUBLAS_STATUS_NOT_INITIALIZED, - AllocFailed => sys::v2::cublasStatus_t::CUBLAS_STATUS_ALLOC_FAILED, - InvalidValue => sys::v2::cublasStatus_t::CUBLAS_STATUS_INVALID_VALUE, - ArchMismatch => sys::v2::cublasStatus_t::CUBLAS_STATUS_ARCH_MISMATCH, - MappingError => sys::v2::cublasStatus_t::CUBLAS_STATUS_MAPPING_ERROR, - ExecutionFailed => sys::v2::cublasStatus_t::CUBLAS_STATUS_EXECUTION_FAILED, - InternalError => sys::v2::cublasStatus_t::CUBLAS_STATUS_INTERNAL_ERROR, - NotSupported => sys::v2::cublasStatus_t::CUBLAS_STATUS_NOT_SUPPORTED, - LicenseError => sys::v2::cublasStatus_t::CUBLAS_STATUS_LICENSE_ERROR, + NotInitialized => CUBLAS_STATUS_NOT_INITIALIZED, + AllocFailed => CUBLAS_STATUS_ALLOC_FAILED, + InvalidValue => CUBLAS_STATUS_INVALID_VALUE, + ArchMismatch => CUBLAS_STATUS_ARCH_MISMATCH, + MappingError => CUBLAS_STATUS_MAPPING_ERROR, + ExecutionFailed => CUBLAS_STATUS_EXECUTION_FAILED, + InternalError => CUBLAS_STATUS_INTERNAL_ERROR, + NotSupported => CUBLAS_STATUS_NOT_SUPPORTED, + LicenseError => CUBLAS_STATUS_LICENSE_ERROR, } } } diff --git a/crates/blastoff/src/lib.rs b/crates/blastoff/src/lib.rs index 98fc032c..efce1dee 100644 --- a/crates/blastoff/src/lib.rs +++ b/crates/blastoff/src/lib.rs @@ -10,7 +10,7 @@ #![allow(clippy::too_many_arguments)] #![cfg_attr(docsrs, feature(doc_cfg))] -pub use cublas_sys as sys; +pub use cust_raw::cublas_sys; use num_complex::{Complex32, Complex64}; pub use context::*; @@ -39,34 +39,34 @@ pub trait BlasDatatype: private::Sealed + cust::memory::DeviceCopy { /// The corresponding float type. For complex numbers this means their backing /// precision, and for floats it is just themselves. type FloatTy: Float; - fn to_raw(&self) -> sys::v2::cudaDataType; + fn to_raw(&self) -> cublas_sys::cudaDataType; } impl BlasDatatype for f32 { type FloatTy = f32; - fn to_raw(&self) -> sys::v2::cudaDataType { - sys::v2::cudaDataType::CUDA_R_32F + fn to_raw(&self) -> cublas_sys::cudaDataType { + cublas_sys::cudaDataType::CUDA_R_32F } } impl BlasDatatype for f64 { type FloatTy = f64; - fn to_raw(&self) -> sys::v2::cudaDataType { - sys::v2::cudaDataType::CUDA_R_64F + fn to_raw(&self) -> cublas_sys::cudaDataType { + cublas_sys::cudaDataType::CUDA_R_64F } } impl BlasDatatype for Complex32 { type FloatTy = f32; - fn to_raw(&self) -> sys::v2::cudaDataType { - sys::v2::cudaDataType::CUDA_C_32F + fn to_raw(&self) -> cublas_sys::cudaDataType { + cublas_sys::cudaDataType::CUDA_C_32F } } impl BlasDatatype for Complex64 { type FloatTy = f64; - fn to_raw(&self) -> sys::v2::cudaDataType { - sys::v2::cudaDataType::CUDA_C_64F + fn to_raw(&self) -> cublas_sys::cudaDataType { + cublas_sys::cudaDataType::CUDA_C_64F } } @@ -106,11 +106,11 @@ pub enum MatrixOp { impl MatrixOp { /// Returns the corresponding `cublasOperation_t` for this operation. - pub fn to_raw(self) -> sys::v2::cublasOperation_t { + pub fn to_raw(self) -> cublas_sys::cublasOperation_t { match self { - MatrixOp::None => sys::v2::cublasOperation_t::CUBLAS_OP_N, - MatrixOp::Transpose => sys::v2::cublasOperation_t::CUBLAS_OP_T, - MatrixOp::ConjugateTranspose => sys::v2::cublasOperation_t::CUBLAS_OP_C, + MatrixOp::None => cublas_sys::cublasOperation_t::CUBLAS_OP_N, + MatrixOp::Transpose => cublas_sys::cublasOperation_t::CUBLAS_OP_T, + MatrixOp::ConjugateTranspose => cublas_sys::cublasOperation_t::CUBLAS_OP_C, } } } diff --git a/crates/blastoff/src/raw/level1.rs b/crates/blastoff/src/raw/level1.rs index 351091e6..d2167fbb 100644 --- a/crates/blastoff/src/raw/level1.rs +++ b/crates/blastoff/src/raw/level1.rs @@ -1,7 +1,10 @@ -use crate::{sys::v2::*, BlasDatatype}; -use num_complex::{Complex32, Complex64}; use std::os::raw::c_int; +use cust_raw::cublas_sys::*; +use num_complex::{Complex32, Complex64}; + +use crate::BlasDatatype; + pub trait Level1: BlasDatatype { unsafe fn amax( handle: cublasHandle_t, diff --git a/crates/blastoff/src/raw/level3.rs b/crates/blastoff/src/raw/level3.rs index 81d5a7de..5e6d8e17 100644 --- a/crates/blastoff/src/raw/level3.rs +++ b/crates/blastoff/src/raw/level3.rs @@ -1,7 +1,10 @@ -use crate::{sys::v2::*, GemmDatatype}; -use num_complex::{Complex32, Complex64}; use std::os::raw::c_int; +use cust_raw::cublas_sys::*; +use num_complex::{Complex32, Complex64}; + +use crate::GemmDatatype; + pub trait GemmOps: GemmDatatype { unsafe fn gemm( handle: cublasHandle_t, diff --git a/crates/cublas_sys/Cargo.toml b/crates/cublas_sys/Cargo.toml deleted file mode 100644 index c70f7d8f..00000000 --- a/crates/cublas_sys/Cargo.toml +++ /dev/null @@ -1,9 +0,0 @@ -[package] -name = "cublas_sys" -version = "0.1.0" -edition = "2021" -authors = ["Riccardo D'Ambrosio "] -repository = "https://github.com/Rust-GPU/Rust-CUDA" - -[build-dependencies] -find_cuda_helper = { version = "0.2", path = "../find_cuda_helper" } diff --git a/crates/cublas_sys/bindgen.sh b/crates/cublas_sys/bindgen.sh deleted file mode 100644 index 5b1bc752..00000000 --- a/crates/cublas_sys/bindgen.sh +++ /dev/null @@ -1,16 +0,0 @@ -func_prefixes=("cublas.*" "cublasLt.*" "cublasXt.*") -var_prefixes=("CUBLAS.*" "CUBLASLT.*" "CUBLASXT.*") -iter=0 -for f in cublas_v2.h cublasLt.h cublasXt.h; do - bindgen "${CUDA_PATH}/include/$f" \ - --size_t-is-usize \ - --allowlist-type "${func_prefixes[$iter]}" \ - --allowlist-function "${func_prefixes[$iter]}" \ - --allowlist-var "${var_prefixes[$iter]}" \ - --no-layout-tests \ - --no-doc-comments \ - --default-enum-style rust \ - -- -I "${CUDA_PATH}/include" > src/${f%.*}.rs - - ((iter=iter+1)) -done \ No newline at end of file diff --git a/crates/cublas_sys/build.rs b/crates/cublas_sys/build.rs deleted file mode 100644 index ccd7bd2b..00000000 --- a/crates/cublas_sys/build.rs +++ /dev/null @@ -1,5 +0,0 @@ -fn main() { - find_cuda_helper::include_cuda(); - println!("cargo:rustc-link-lib=dylib=cublas"); - println!("cargo:rustc-link-lib=dylib=cublasLt"); -} diff --git a/crates/cublas_sys/src/cublasLt.rs b/crates/cublas_sys/src/cublasLt.rs deleted file mode 100644 index 0f781bb2..00000000 --- a/crates/cublas_sys/src/cublasLt.rs +++ /dev/null @@ -1,684 +0,0 @@ -/* automatically generated by rust-bindgen 0.59.2 */ - -pub const CUBLASLT_NUMERICAL_IMPL_FLAGS_FMA: u32 = 1; -pub const CUBLASLT_NUMERICAL_IMPL_FLAGS_HMMA: u32 = 2; -pub const CUBLASLT_NUMERICAL_IMPL_FLAGS_IMMA: u32 = 4; -pub const CUBLASLT_NUMERICAL_IMPL_FLAGS_DMMA: u32 = 8; -pub const CUBLASLT_NUMERICAL_IMPL_FLAGS_TENSOR_OP_MASK: u32 = 254; -pub const CUBLASLT_NUMERICAL_IMPL_FLAGS_OP_TYPE_MASK: u32 = 255; -pub const CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_16F: u32 = 256; -pub const CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_32F: u32 = 512; -pub const CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_64F: u32 = 1024; -pub const CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_32I: u32 = 2048; -pub const CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_TYPE_MASK: u32 = 65280; -pub const CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_16F: u32 = 65536; -pub const CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_16BF: u32 = 131072; -pub const CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_TF32: u32 = 262144; -pub const CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_32F: u32 = 524288; -pub const CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_64F: u32 = 1048576; -pub const CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_8I: u32 = 2097152; -pub const CUBLASLT_NUMERICAL_IMPL_FLAGS_OP_INPUT_TYPE_MASK: u32 = 16711680; -pub const CUBLASLT_NUMERICAL_IMPL_FLAGS_GAUSSIAN: u64 = 4294967296; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct CUstream_st { - _unused: [u8; 0], -} -pub type cudaStream_t = *mut CUstream_st; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudaDataType_t { - CUDA_R_16F = 2, - CUDA_C_16F = 6, - CUDA_R_16BF = 14, - CUDA_C_16BF = 15, - CUDA_R_32F = 0, - CUDA_C_32F = 4, - CUDA_R_64F = 1, - CUDA_C_64F = 5, - CUDA_R_4I = 16, - CUDA_C_4I = 17, - CUDA_R_4U = 18, - CUDA_C_4U = 19, - CUDA_R_8I = 3, - CUDA_C_8I = 7, - CUDA_R_8U = 8, - CUDA_C_8U = 9, - CUDA_R_16I = 20, - CUDA_C_16I = 21, - CUDA_R_16U = 22, - CUDA_C_16U = 23, - CUDA_R_32I = 10, - CUDA_C_32I = 11, - CUDA_R_32U = 12, - CUDA_C_32U = 13, - CUDA_R_64I = 24, - CUDA_C_64I = 25, - CUDA_R_64U = 26, - CUDA_C_64U = 27, -} -pub use self::cudaDataType_t as cudaDataType; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum libraryPropertyType_t { - MAJOR_VERSION = 0, - MINOR_VERSION = 1, - PATCH_LEVEL = 2, -} -pub use self::libraryPropertyType_t as libraryPropertyType; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasStatus_t { - CUBLAS_STATUS_SUCCESS = 0, - CUBLAS_STATUS_NOT_INITIALIZED = 1, - CUBLAS_STATUS_ALLOC_FAILED = 3, - CUBLAS_STATUS_INVALID_VALUE = 7, - CUBLAS_STATUS_ARCH_MISMATCH = 8, - CUBLAS_STATUS_MAPPING_ERROR = 11, - CUBLAS_STATUS_EXECUTION_FAILED = 13, - CUBLAS_STATUS_INTERNAL_ERROR = 14, - CUBLAS_STATUS_NOT_SUPPORTED = 15, - CUBLAS_STATUS_LICENSE_ERROR = 16, -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasComputeType_t { - CUBLAS_COMPUTE_16F = 64, - CUBLAS_COMPUTE_16F_PEDANTIC = 65, - CUBLAS_COMPUTE_32F = 68, - CUBLAS_COMPUTE_32F_PEDANTIC = 69, - CUBLAS_COMPUTE_32F_FAST_16F = 74, - CUBLAS_COMPUTE_32F_FAST_16BF = 75, - CUBLAS_COMPUTE_32F_FAST_TF32 = 77, - CUBLAS_COMPUTE_64F = 70, - CUBLAS_COMPUTE_64F_PEDANTIC = 71, - CUBLAS_COMPUTE_32I = 72, - CUBLAS_COMPUTE_32I_PEDANTIC = 73, -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct _iobuf { - pub _Placeholder: *mut ::std::os::raw::c_void, -} -pub type FILE = _iobuf; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cublasLtContext { - _unused: [u8; 0], -} -pub type cublasLtHandle_t = *mut cublasLtContext; -extern "C" { - pub fn cublasLtCreate(lightHandle: *mut cublasLtHandle_t) -> cublasStatus_t; -} -extern "C" { - pub fn cublasLtDestroy(lightHandle: cublasLtHandle_t) -> cublasStatus_t; -} -extern "C" { - pub fn cublasLtGetStatusName(status: cublasStatus_t) -> *const ::std::os::raw::c_char; -} -extern "C" { - pub fn cublasLtGetStatusString(status: cublasStatus_t) -> *const ::std::os::raw::c_char; -} -extern "C" { - pub fn cublasLtGetVersion() -> usize; -} -extern "C" { - pub fn cublasLtGetCudartVersion() -> usize; -} -extern "C" { - pub fn cublasLtGetProperty( - type_: libraryPropertyType, - value: *mut ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cublasLtMatrixLayoutOpaque_t { - pub data: [u64; 8usize], -} -pub type cublasLtMatrixLayout_t = *mut cublasLtMatrixLayoutOpaque_t; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cublasLtMatmulAlgo_t { - pub data: [u64; 8usize], -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cublasLtMatmulDescOpaque_t { - pub data: [u64; 11usize], -} -pub type cublasLtMatmulDesc_t = *mut cublasLtMatmulDescOpaque_t; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cublasLtMatrixTransformDescOpaque_t { - pub data: [u64; 8usize], -} -pub type cublasLtMatrixTransformDesc_t = *mut cublasLtMatrixTransformDescOpaque_t; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cublasLtMatmulPreferenceOpaque_t { - pub data: [u64; 10usize], -} -pub type cublasLtMatmulPreference_t = *mut cublasLtMatmulPreferenceOpaque_t; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasLtMatmulTile_t { - CUBLASLT_MATMUL_TILE_UNDEFINED = 0, - CUBLASLT_MATMUL_TILE_8x8 = 1, - CUBLASLT_MATMUL_TILE_8x16 = 2, - CUBLASLT_MATMUL_TILE_16x8 = 3, - CUBLASLT_MATMUL_TILE_8x32 = 4, - CUBLASLT_MATMUL_TILE_16x16 = 5, - CUBLASLT_MATMUL_TILE_32x8 = 6, - CUBLASLT_MATMUL_TILE_8x64 = 7, - CUBLASLT_MATMUL_TILE_16x32 = 8, - CUBLASLT_MATMUL_TILE_32x16 = 9, - CUBLASLT_MATMUL_TILE_64x8 = 10, - CUBLASLT_MATMUL_TILE_32x32 = 11, - CUBLASLT_MATMUL_TILE_32x64 = 12, - CUBLASLT_MATMUL_TILE_64x32 = 13, - CUBLASLT_MATMUL_TILE_32x128 = 14, - CUBLASLT_MATMUL_TILE_64x64 = 15, - CUBLASLT_MATMUL_TILE_128x32 = 16, - CUBLASLT_MATMUL_TILE_64x128 = 17, - CUBLASLT_MATMUL_TILE_128x64 = 18, - CUBLASLT_MATMUL_TILE_64x256 = 19, - CUBLASLT_MATMUL_TILE_128x128 = 20, - CUBLASLT_MATMUL_TILE_256x64 = 21, - CUBLASLT_MATMUL_TILE_64x512 = 22, - CUBLASLT_MATMUL_TILE_128x256 = 23, - CUBLASLT_MATMUL_TILE_256x128 = 24, - CUBLASLT_MATMUL_TILE_512x64 = 25, - CUBLASLT_MATMUL_TILE_64x96 = 26, - CUBLASLT_MATMUL_TILE_96x64 = 27, - CUBLASLT_MATMUL_TILE_96x128 = 28, - CUBLASLT_MATMUL_TILE_128x160 = 29, - CUBLASLT_MATMUL_TILE_160x128 = 30, - CUBLASLT_MATMUL_TILE_192x128 = 31, - CUBLASLT_MATMUL_TILE_END = 32, -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasLtMatmulStages_t { - CUBLASLT_MATMUL_STAGES_UNDEFINED = 0, - CUBLASLT_MATMUL_STAGES_16x1 = 1, - CUBLASLT_MATMUL_STAGES_16x2 = 2, - CUBLASLT_MATMUL_STAGES_16x3 = 3, - CUBLASLT_MATMUL_STAGES_16x4 = 4, - CUBLASLT_MATMUL_STAGES_16x5 = 5, - CUBLASLT_MATMUL_STAGES_16x6 = 6, - CUBLASLT_MATMUL_STAGES_32x1 = 7, - CUBLASLT_MATMUL_STAGES_32x2 = 8, - CUBLASLT_MATMUL_STAGES_32x3 = 9, - CUBLASLT_MATMUL_STAGES_32x4 = 10, - CUBLASLT_MATMUL_STAGES_32x5 = 11, - CUBLASLT_MATMUL_STAGES_32x6 = 12, - CUBLASLT_MATMUL_STAGES_64x1 = 13, - CUBLASLT_MATMUL_STAGES_64x2 = 14, - CUBLASLT_MATMUL_STAGES_64x3 = 15, - CUBLASLT_MATMUL_STAGES_64x4 = 16, - CUBLASLT_MATMUL_STAGES_64x5 = 17, - CUBLASLT_MATMUL_STAGES_64x6 = 18, - CUBLASLT_MATMUL_STAGES_128x1 = 19, - CUBLASLT_MATMUL_STAGES_128x2 = 20, - CUBLASLT_MATMUL_STAGES_128x3 = 21, - CUBLASLT_MATMUL_STAGES_128x4 = 22, - CUBLASLT_MATMUL_STAGES_128x5 = 23, - CUBLASLT_MATMUL_STAGES_128x6 = 24, - CUBLASLT_MATMUL_STAGES_32x10 = 25, - CUBLASLT_MATMUL_STAGES_8x4 = 26, - CUBLASLT_MATMUL_STAGES_16x10 = 27, - CUBLASLT_MATMUL_STAGES_8x5 = 28, - CUBLASLT_MATMUL_STAGES_16x80 = 29, - CUBLASLT_MATMUL_STAGES_64x80 = 30, - CUBLASLT_MATMUL_STAGES_END = 31, -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasLtPointerMode_t { - CUBLASLT_POINTER_MODE_HOST = 0, - CUBLASLT_POINTER_MODE_DEVICE = 1, - CUBLASLT_POINTER_MODE_DEVICE_VECTOR = 2, - CUBLASLT_POINTER_MODE_ALPHA_DEVICE_VECTOR_BETA_ZERO = 3, - CUBLASLT_POINTER_MODE_ALPHA_DEVICE_VECTOR_BETA_HOST = 4, -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasLtPointerModeMask_t { - CUBLASLT_POINTER_MODE_MASK_NO_FILTERING = 0, - CUBLASLT_POINTER_MODE_MASK_HOST = 1, - CUBLASLT_POINTER_MODE_MASK_DEVICE = 2, - CUBLASLT_POINTER_MODE_MASK_DEVICE_VECTOR = 4, - CUBLASLT_POINTER_MODE_MASK_ALPHA_DEVICE_VECTOR_BETA_ZERO = 8, - CUBLASLT_POINTER_MODE_MASK_ALPHA_DEVICE_VECTOR_BETA_HOST = 16, -} -pub type cublasLtNumericalImplFlags_t = u64; -extern "C" { - pub fn cublasLtMatmul( - lightHandle: cublasLtHandle_t, - computeDesc: cublasLtMatmulDesc_t, - alpha: *const ::std::os::raw::c_void, - A: *const ::std::os::raw::c_void, - Adesc: cublasLtMatrixLayout_t, - B: *const ::std::os::raw::c_void, - Bdesc: cublasLtMatrixLayout_t, - beta: *const ::std::os::raw::c_void, - C: *const ::std::os::raw::c_void, - Cdesc: cublasLtMatrixLayout_t, - D: *mut ::std::os::raw::c_void, - Ddesc: cublasLtMatrixLayout_t, - algo: *const cublasLtMatmulAlgo_t, - workspace: *mut ::std::os::raw::c_void, - workspaceSizeInBytes: usize, - stream: cudaStream_t, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasLtMatrixTransform( - lightHandle: cublasLtHandle_t, - transformDesc: cublasLtMatrixTransformDesc_t, - alpha: *const ::std::os::raw::c_void, - A: *const ::std::os::raw::c_void, - Adesc: cublasLtMatrixLayout_t, - beta: *const ::std::os::raw::c_void, - B: *const ::std::os::raw::c_void, - Bdesc: cublasLtMatrixLayout_t, - C: *mut ::std::os::raw::c_void, - Cdesc: cublasLtMatrixLayout_t, - stream: cudaStream_t, - ) -> cublasStatus_t; -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasLtOrder_t { - CUBLASLT_ORDER_COL = 0, - CUBLASLT_ORDER_ROW = 1, - CUBLASLT_ORDER_COL32 = 2, - CUBLASLT_ORDER_COL4_4R2_8C = 3, - CUBLASLT_ORDER_COL32_2R_4R4 = 4, -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasLtMatrixLayoutAttribute_t { - CUBLASLT_MATRIX_LAYOUT_TYPE = 0, - CUBLASLT_MATRIX_LAYOUT_ORDER = 1, - CUBLASLT_MATRIX_LAYOUT_ROWS = 2, - CUBLASLT_MATRIX_LAYOUT_COLS = 3, - CUBLASLT_MATRIX_LAYOUT_LD = 4, - CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT = 5, - CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET = 6, - CUBLASLT_MATRIX_LAYOUT_PLANE_OFFSET = 7, -} -extern "C" { - pub fn cublasLtMatrixLayoutInit_internal( - matLayout: cublasLtMatrixLayout_t, - size: usize, - type_: cudaDataType, - rows: u64, - cols: u64, - ld: i64, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasLtMatrixLayoutCreate( - matLayout: *mut cublasLtMatrixLayout_t, - type_: cudaDataType, - rows: u64, - cols: u64, - ld: i64, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasLtMatrixLayoutDestroy(matLayout: cublasLtMatrixLayout_t) -> cublasStatus_t; -} -extern "C" { - pub fn cublasLtMatrixLayoutSetAttribute( - matLayout: cublasLtMatrixLayout_t, - attr: cublasLtMatrixLayoutAttribute_t, - buf: *const ::std::os::raw::c_void, - sizeInBytes: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasLtMatrixLayoutGetAttribute( - matLayout: cublasLtMatrixLayout_t, - attr: cublasLtMatrixLayoutAttribute_t, - buf: *mut ::std::os::raw::c_void, - sizeInBytes: usize, - sizeWritten: *mut usize, - ) -> cublasStatus_t; -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasLtMatmulDescAttributes_t { - CUBLASLT_MATMUL_DESC_COMPUTE_TYPE = 0, - CUBLASLT_MATMUL_DESC_SCALE_TYPE = 1, - CUBLASLT_MATMUL_DESC_POINTER_MODE = 2, - CUBLASLT_MATMUL_DESC_TRANSA = 3, - CUBLASLT_MATMUL_DESC_TRANSB = 4, - CUBLASLT_MATMUL_DESC_TRANSC = 5, - CUBLASLT_MATMUL_DESC_FILL_MODE = 6, - CUBLASLT_MATMUL_DESC_EPILOGUE = 7, - CUBLASLT_MATMUL_DESC_BIAS_POINTER = 8, - CUBLASLT_MATMUL_DESC_BIAS_BATCH_STRIDE = 10, - CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER = 11, - CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_LD = 12, - CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_BATCH_STRIDE = 13, - CUBLASLT_MATMUL_DESC_ALPHA_VECTOR_BATCH_STRIDE = 14, -} -extern "C" { - pub fn cublasLtMatmulDescInit_internal( - matmulDesc: cublasLtMatmulDesc_t, - size: usize, - computeType: cublasComputeType_t, - scaleType: cudaDataType_t, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasLtMatmulDescCreate( - matmulDesc: *mut cublasLtMatmulDesc_t, - computeType: cublasComputeType_t, - scaleType: cudaDataType_t, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasLtMatmulDescDestroy(matmulDesc: cublasLtMatmulDesc_t) -> cublasStatus_t; -} -extern "C" { - pub fn cublasLtMatmulDescSetAttribute( - matmulDesc: cublasLtMatmulDesc_t, - attr: cublasLtMatmulDescAttributes_t, - buf: *const ::std::os::raw::c_void, - sizeInBytes: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasLtMatmulDescGetAttribute( - matmulDesc: cublasLtMatmulDesc_t, - attr: cublasLtMatmulDescAttributes_t, - buf: *mut ::std::os::raw::c_void, - sizeInBytes: usize, - sizeWritten: *mut usize, - ) -> cublasStatus_t; -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasLtMatrixTransformDescAttributes_t { - CUBLASLT_MATRIX_TRANSFORM_DESC_SCALE_TYPE = 0, - CUBLASLT_MATRIX_TRANSFORM_DESC_POINTER_MODE = 1, - CUBLASLT_MATRIX_TRANSFORM_DESC_TRANSA = 2, - CUBLASLT_MATRIX_TRANSFORM_DESC_TRANSB = 3, -} -extern "C" { - pub fn cublasLtMatrixTransformDescInit_internal( - transformDesc: cublasLtMatrixTransformDesc_t, - size: usize, - scaleType: cudaDataType, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasLtMatrixTransformDescCreate( - transformDesc: *mut cublasLtMatrixTransformDesc_t, - scaleType: cudaDataType, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasLtMatrixTransformDescDestroy( - transformDesc: cublasLtMatrixTransformDesc_t, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasLtMatrixTransformDescSetAttribute( - transformDesc: cublasLtMatrixTransformDesc_t, - attr: cublasLtMatrixTransformDescAttributes_t, - buf: *const ::std::os::raw::c_void, - sizeInBytes: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasLtMatrixTransformDescGetAttribute( - transformDesc: cublasLtMatrixTransformDesc_t, - attr: cublasLtMatrixTransformDescAttributes_t, - buf: *mut ::std::os::raw::c_void, - sizeInBytes: usize, - sizeWritten: *mut usize, - ) -> cublasStatus_t; -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasLt3mMode_t { - CUBLASLT_3M_MODE_DISALLOWED = 0, - CUBLASLT_3M_MODE_ALLOWED = 1, -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasLtReductionScheme_t { - CUBLASLT_REDUCTION_SCHEME_NONE = 0, - CUBLASLT_REDUCTION_SCHEME_INPLACE = 1, - CUBLASLT_REDUCTION_SCHEME_COMPUTE_TYPE = 2, - CUBLASLT_REDUCTION_SCHEME_OUTPUT_TYPE = 4, - CUBLASLT_REDUCTION_SCHEME_MASK = 7, -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasLtEpilogue_t { - CUBLASLT_EPILOGUE_DEFAULT = 1, - CUBLASLT_EPILOGUE_RELU = 2, - CUBLASLT_EPILOGUE_RELU_AUX = 130, - CUBLASLT_EPILOGUE_BIAS = 4, - CUBLASLT_EPILOGUE_RELU_BIAS = 6, - CUBLASLT_EPILOGUE_RELU_AUX_BIAS = 134, - CUBLASLT_EPILOGUE_DRELU_BGRAD = 152, - CUBLASLT_EPILOGUE_GELU = 32, - CUBLASLT_EPILOGUE_GELU_AUX = 160, - CUBLASLT_EPILOGUE_GELU_BIAS = 36, - CUBLASLT_EPILOGUE_GELU_AUX_BIAS = 164, - CUBLASLT_EPILOGUE_DGELU_BGRAD = 208, - CUBLASLT_EPILOGUE_BGRADA = 256, - CUBLASLT_EPILOGUE_BGRADB = 512, -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasLtMatmulSearch_t { - CUBLASLT_SEARCH_BEST_FIT = 0, - CUBLASLT_SEARCH_LIMITED_BY_ALGO_ID = 1, - CUBLASLT_SEARCH_RESERVED_02 = 2, - CUBLASLT_SEARCH_RESERVED_03 = 3, - CUBLASLT_SEARCH_RESERVED_04 = 4, - CUBLASLT_SEARCH_RESERVED_05 = 5, -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasLtMatmulPreferenceAttributes_t { - CUBLASLT_MATMUL_PREF_SEARCH_MODE = 0, - CUBLASLT_MATMUL_PREF_MAX_WORKSPACE_BYTES = 1, - CUBLASLT_MATMUL_PREF_MATH_MODE_MASK = 2, - CUBLASLT_MATMUL_PREF_REDUCTION_SCHEME_MASK = 3, - CUBLASLT_MATMUL_PREF_GAUSSIAN_MODE_MASK = 4, - CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_A_BYTES = 5, - CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_B_BYTES = 6, - CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_C_BYTES = 7, - CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_D_BYTES = 8, - CUBLASLT_MATMUL_PREF_MAX_WAVES_COUNT = 9, - CUBLASLT_MATMUL_PREF_POINTER_MODE_MASK = 10, - CUBLASLT_MATMUL_PREF_EPILOGUE_MASK = 11, - CUBLASLT_MATMUL_PREF_IMPL_MASK = 12, - CUBLASLT_MATMUL_PREF_SM_COUNT_TARGET = 13, -} -extern "C" { - pub fn cublasLtMatmulPreferenceInit_internal( - pref: cublasLtMatmulPreference_t, - size: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasLtMatmulPreferenceCreate(pref: *mut cublasLtMatmulPreference_t) -> cublasStatus_t; -} -extern "C" { - pub fn cublasLtMatmulPreferenceDestroy(pref: cublasLtMatmulPreference_t) -> cublasStatus_t; -} -extern "C" { - pub fn cublasLtMatmulPreferenceSetAttribute( - pref: cublasLtMatmulPreference_t, - attr: cublasLtMatmulPreferenceAttributes_t, - buf: *const ::std::os::raw::c_void, - sizeInBytes: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasLtMatmulPreferenceGetAttribute( - pref: cublasLtMatmulPreference_t, - attr: cublasLtMatmulPreferenceAttributes_t, - buf: *mut ::std::os::raw::c_void, - sizeInBytes: usize, - sizeWritten: *mut usize, - ) -> cublasStatus_t; -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cublasLtMatmulHeuristicResult_t { - pub algo: cublasLtMatmulAlgo_t, - pub workspaceSize: usize, - pub state: cublasStatus_t, - pub wavesCount: f32, - pub reserved: [::std::os::raw::c_int; 4usize], -} -extern "C" { - pub fn cublasLtMatmulAlgoGetHeuristic( - lightHandle: cublasLtHandle_t, - operationDesc: cublasLtMatmulDesc_t, - Adesc: cublasLtMatrixLayout_t, - Bdesc: cublasLtMatrixLayout_t, - Cdesc: cublasLtMatrixLayout_t, - Ddesc: cublasLtMatrixLayout_t, - preference: cublasLtMatmulPreference_t, - requestedAlgoCount: ::std::os::raw::c_int, - heuristicResultsArray: *mut cublasLtMatmulHeuristicResult_t, - returnAlgoCount: *mut ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasLtMatmulAlgoGetIds( - lightHandle: cublasLtHandle_t, - computeType: cublasComputeType_t, - scaleType: cudaDataType_t, - Atype: cudaDataType_t, - Btype: cudaDataType_t, - Ctype: cudaDataType_t, - Dtype: cudaDataType_t, - requestedAlgoCount: ::std::os::raw::c_int, - algoIdsArray: *mut ::std::os::raw::c_int, - returnAlgoCount: *mut ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasLtMatmulAlgoInit( - lightHandle: cublasLtHandle_t, - computeType: cublasComputeType_t, - scaleType: cudaDataType_t, - Atype: cudaDataType_t, - Btype: cudaDataType_t, - Ctype: cudaDataType_t, - Dtype: cudaDataType_t, - algoId: ::std::os::raw::c_int, - algo: *mut cublasLtMatmulAlgo_t, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasLtMatmulAlgoCheck( - lightHandle: cublasLtHandle_t, - operationDesc: cublasLtMatmulDesc_t, - Adesc: cublasLtMatrixLayout_t, - Bdesc: cublasLtMatrixLayout_t, - Cdesc: cublasLtMatrixLayout_t, - Ddesc: cublasLtMatrixLayout_t, - algo: *const cublasLtMatmulAlgo_t, - result: *mut cublasLtMatmulHeuristicResult_t, - ) -> cublasStatus_t; -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasLtMatmulAlgoCapAttributes_t { - CUBLASLT_ALGO_CAP_SPLITK_SUPPORT = 0, - CUBLASLT_ALGO_CAP_REDUCTION_SCHEME_MASK = 1, - CUBLASLT_ALGO_CAP_CTA_SWIZZLING_SUPPORT = 2, - CUBLASLT_ALGO_CAP_STRIDED_BATCH_SUPPORT = 3, - CUBLASLT_ALGO_CAP_OUT_OF_PLACE_RESULT_SUPPORT = 4, - CUBLASLT_ALGO_CAP_UPLO_SUPPORT = 5, - CUBLASLT_ALGO_CAP_TILE_IDS = 6, - CUBLASLT_ALGO_CAP_CUSTOM_OPTION_MAX = 7, - CUBLASLT_ALGO_CAP_MATHMODE_IMPL = 8, - CUBLASLT_ALGO_CAP_GAUSSIAN_IMPL = 9, - CUBLASLT_ALGO_CAP_CUSTOM_MEMORY_ORDER = 10, - CUBLASLT_ALGO_CAP_POINTER_MODE_MASK = 11, - CUBLASLT_ALGO_CAP_EPILOGUE_MASK = 12, - CUBLASLT_ALGO_CAP_STAGES_IDS = 13, - CUBLASLT_ALGO_CAP_LD_NEGATIVE = 14, - CUBLASLT_ALGO_CAP_NUMERICAL_IMPL_FLAGS = 15, - CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_A_BYTES = 16, - CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_B_BYTES = 17, - CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_C_BYTES = 18, - CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_D_BYTES = 19, -} -extern "C" { - pub fn cublasLtMatmulAlgoCapGetAttribute( - algo: *const cublasLtMatmulAlgo_t, - attr: cublasLtMatmulAlgoCapAttributes_t, - buf: *mut ::std::os::raw::c_void, - sizeInBytes: usize, - sizeWritten: *mut usize, - ) -> cublasStatus_t; -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasLtMatmulAlgoConfigAttributes_t { - CUBLASLT_ALGO_CONFIG_ID = 0, - CUBLASLT_ALGO_CONFIG_TILE_ID = 1, - CUBLASLT_ALGO_CONFIG_SPLITK_NUM = 2, - CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME = 3, - CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING = 4, - CUBLASLT_ALGO_CONFIG_CUSTOM_OPTION = 5, - CUBLASLT_ALGO_CONFIG_STAGES_ID = 6, -} -extern "C" { - pub fn cublasLtMatmulAlgoConfigSetAttribute( - algo: *mut cublasLtMatmulAlgo_t, - attr: cublasLtMatmulAlgoConfigAttributes_t, - buf: *const ::std::os::raw::c_void, - sizeInBytes: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasLtMatmulAlgoConfigGetAttribute( - algo: *const cublasLtMatmulAlgo_t, - attr: cublasLtMatmulAlgoConfigAttributes_t, - buf: *mut ::std::os::raw::c_void, - sizeInBytes: usize, - sizeWritten: *mut usize, - ) -> cublasStatus_t; -} -pub type cublasLtLoggerCallback_t = ::std::option::Option< - unsafe extern "C" fn( - logLevel: ::std::os::raw::c_int, - functionName: *const ::std::os::raw::c_char, - message: *const ::std::os::raw::c_char, - ), ->; -extern "C" { - pub fn cublasLtLoggerSetCallback(callback: cublasLtLoggerCallback_t) -> cublasStatus_t; -} -extern "C" { - pub fn cublasLtLoggerSetFile(file: *mut FILE) -> cublasStatus_t; -} -extern "C" { - pub fn cublasLtLoggerOpenFile(logFile: *const ::std::os::raw::c_char) -> cublasStatus_t; -} -extern "C" { - pub fn cublasLtLoggerSetLevel(level: ::std::os::raw::c_int) -> cublasStatus_t; -} -extern "C" { - pub fn cublasLtLoggerSetMask(mask: ::std::os::raw::c_int) -> cublasStatus_t; -} -extern "C" { - pub fn cublasLtLoggerForceDisable() -> cublasStatus_t; -} diff --git a/crates/cublas_sys/src/cublasXt.rs b/crates/cublas_sys/src/cublasXt.rs deleted file mode 100644 index 2dcf22c6..00000000 --- a/crates/cublas_sys/src/cublasXt.rs +++ /dev/null @@ -1,831 +0,0 @@ -/* automatically generated by rust-bindgen 0.59.2 */ - -#[repr(C)] -#[repr(align(8))] -#[derive(Debug, Copy, Clone)] -pub struct float2 { - pub x: f32, - pub y: f32, -} -#[repr(C)] -#[repr(align(16))] -#[derive(Debug, Copy, Clone)] -pub struct double2 { - pub x: f64, - pub y: f64, -} -pub type cuFloatComplex = float2; -pub type cuDoubleComplex = double2; -pub type cuComplex = cuFloatComplex; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasStatus_t { - CUBLAS_STATUS_SUCCESS = 0, - CUBLAS_STATUS_NOT_INITIALIZED = 1, - CUBLAS_STATUS_ALLOC_FAILED = 3, - CUBLAS_STATUS_INVALID_VALUE = 7, - CUBLAS_STATUS_ARCH_MISMATCH = 8, - CUBLAS_STATUS_MAPPING_ERROR = 11, - CUBLAS_STATUS_EXECUTION_FAILED = 13, - CUBLAS_STATUS_INTERNAL_ERROR = 14, - CUBLAS_STATUS_NOT_SUPPORTED = 15, - CUBLAS_STATUS_LICENSE_ERROR = 16, -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasFillMode_t { - CUBLAS_FILL_MODE_LOWER = 0, - CUBLAS_FILL_MODE_UPPER = 1, - CUBLAS_FILL_MODE_FULL = 2, -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasDiagType_t { - CUBLAS_DIAG_NON_UNIT = 0, - CUBLAS_DIAG_UNIT = 1, -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasSideMode_t { - CUBLAS_SIDE_LEFT = 0, - CUBLAS_SIDE_RIGHT = 1, -} -impl cublasOperation_t { - pub const CUBLAS_OP_HERMITAN: cublasOperation_t = cublasOperation_t::CUBLAS_OP_C; -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasOperation_t { - CUBLAS_OP_N = 0, - CUBLAS_OP_T = 1, - CUBLAS_OP_C = 2, - CUBLAS_OP_CONJG = 3, -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cublasXtContext { - _unused: [u8; 0], -} -pub type cublasXtHandle_t = *mut cublasXtContext; -extern "C" { - pub fn cublasXtCreate(handle: *mut cublasXtHandle_t) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtDestroy(handle: cublasXtHandle_t) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtGetNumBoards( - nbDevices: ::std::os::raw::c_int, - deviceId: *mut ::std::os::raw::c_int, - nbBoards: *mut ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtMaxBoards(nbGpuBoards: *mut ::std::os::raw::c_int) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtDeviceSelect( - handle: cublasXtHandle_t, - nbDevices: ::std::os::raw::c_int, - deviceId: *mut ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtSetBlockDim( - handle: cublasXtHandle_t, - blockDim: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtGetBlockDim( - handle: cublasXtHandle_t, - blockDim: *mut ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasXtPinnedMemMode_t { - CUBLASXT_PINNING_DISABLED = 0, - CUBLASXT_PINNING_ENABLED = 1, -} -extern "C" { - pub fn cublasXtGetPinningMemMode( - handle: cublasXtHandle_t, - mode: *mut cublasXtPinnedMemMode_t, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtSetPinningMemMode( - handle: cublasXtHandle_t, - mode: cublasXtPinnedMemMode_t, - ) -> cublasStatus_t; -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasXtOpType_t { - CUBLASXT_FLOAT = 0, - CUBLASXT_DOUBLE = 1, - CUBLASXT_COMPLEX = 2, - CUBLASXT_DOUBLECOMPLEX = 3, -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasXtBlasOp_t { - CUBLASXT_GEMM = 0, - CUBLASXT_SYRK = 1, - CUBLASXT_HERK = 2, - CUBLASXT_SYMM = 3, - CUBLASXT_HEMM = 4, - CUBLASXT_TRSM = 5, - CUBLASXT_SYR2K = 6, - CUBLASXT_HER2K = 7, - CUBLASXT_SPMM = 8, - CUBLASXT_SYRKX = 9, - CUBLASXT_HERKX = 10, - CUBLASXT_TRMM = 11, - CUBLASXT_ROUTINE_MAX = 12, -} -extern "C" { - pub fn cublasXtSetCpuRoutine( - handle: cublasXtHandle_t, - blasOp: cublasXtBlasOp_t, - type_: cublasXtOpType_t, - blasFunctor: *mut ::std::os::raw::c_void, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtSetCpuRatio( - handle: cublasXtHandle_t, - blasOp: cublasXtBlasOp_t, - type_: cublasXtOpType_t, - ratio: f32, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtSgemm( - handle: cublasXtHandle_t, - transa: cublasOperation_t, - transb: cublasOperation_t, - m: usize, - n: usize, - k: usize, - alpha: *const f32, - A: *const f32, - lda: usize, - B: *const f32, - ldb: usize, - beta: *const f32, - C: *mut f32, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtDgemm( - handle: cublasXtHandle_t, - transa: cublasOperation_t, - transb: cublasOperation_t, - m: usize, - n: usize, - k: usize, - alpha: *const f64, - A: *const f64, - lda: usize, - B: *const f64, - ldb: usize, - beta: *const f64, - C: *mut f64, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtCgemm( - handle: cublasXtHandle_t, - transa: cublasOperation_t, - transb: cublasOperation_t, - m: usize, - n: usize, - k: usize, - alpha: *const cuComplex, - A: *const cuComplex, - lda: usize, - B: *const cuComplex, - ldb: usize, - beta: *const cuComplex, - C: *mut cuComplex, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtZgemm( - handle: cublasXtHandle_t, - transa: cublasOperation_t, - transb: cublasOperation_t, - m: usize, - n: usize, - k: usize, - alpha: *const cuDoubleComplex, - A: *const cuDoubleComplex, - lda: usize, - B: *const cuDoubleComplex, - ldb: usize, - beta: *const cuDoubleComplex, - C: *mut cuDoubleComplex, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtSsyrk( - handle: cublasXtHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: usize, - k: usize, - alpha: *const f32, - A: *const f32, - lda: usize, - beta: *const f32, - C: *mut f32, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtDsyrk( - handle: cublasXtHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: usize, - k: usize, - alpha: *const f64, - A: *const f64, - lda: usize, - beta: *const f64, - C: *mut f64, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtCsyrk( - handle: cublasXtHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: usize, - k: usize, - alpha: *const cuComplex, - A: *const cuComplex, - lda: usize, - beta: *const cuComplex, - C: *mut cuComplex, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtZsyrk( - handle: cublasXtHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: usize, - k: usize, - alpha: *const cuDoubleComplex, - A: *const cuDoubleComplex, - lda: usize, - beta: *const cuDoubleComplex, - C: *mut cuDoubleComplex, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtCherk( - handle: cublasXtHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: usize, - k: usize, - alpha: *const f32, - A: *const cuComplex, - lda: usize, - beta: *const f32, - C: *mut cuComplex, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtZherk( - handle: cublasXtHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: usize, - k: usize, - alpha: *const f64, - A: *const cuDoubleComplex, - lda: usize, - beta: *const f64, - C: *mut cuDoubleComplex, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtSsyr2k( - handle: cublasXtHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: usize, - k: usize, - alpha: *const f32, - A: *const f32, - lda: usize, - B: *const f32, - ldb: usize, - beta: *const f32, - C: *mut f32, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtDsyr2k( - handle: cublasXtHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: usize, - k: usize, - alpha: *const f64, - A: *const f64, - lda: usize, - B: *const f64, - ldb: usize, - beta: *const f64, - C: *mut f64, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtCsyr2k( - handle: cublasXtHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: usize, - k: usize, - alpha: *const cuComplex, - A: *const cuComplex, - lda: usize, - B: *const cuComplex, - ldb: usize, - beta: *const cuComplex, - C: *mut cuComplex, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtZsyr2k( - handle: cublasXtHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: usize, - k: usize, - alpha: *const cuDoubleComplex, - A: *const cuDoubleComplex, - lda: usize, - B: *const cuDoubleComplex, - ldb: usize, - beta: *const cuDoubleComplex, - C: *mut cuDoubleComplex, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtCherkx( - handle: cublasXtHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: usize, - k: usize, - alpha: *const cuComplex, - A: *const cuComplex, - lda: usize, - B: *const cuComplex, - ldb: usize, - beta: *const f32, - C: *mut cuComplex, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtZherkx( - handle: cublasXtHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: usize, - k: usize, - alpha: *const cuDoubleComplex, - A: *const cuDoubleComplex, - lda: usize, - B: *const cuDoubleComplex, - ldb: usize, - beta: *const f64, - C: *mut cuDoubleComplex, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtStrsm( - handle: cublasXtHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - m: usize, - n: usize, - alpha: *const f32, - A: *const f32, - lda: usize, - B: *mut f32, - ldb: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtDtrsm( - handle: cublasXtHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - m: usize, - n: usize, - alpha: *const f64, - A: *const f64, - lda: usize, - B: *mut f64, - ldb: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtCtrsm( - handle: cublasXtHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - m: usize, - n: usize, - alpha: *const cuComplex, - A: *const cuComplex, - lda: usize, - B: *mut cuComplex, - ldb: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtZtrsm( - handle: cublasXtHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - m: usize, - n: usize, - alpha: *const cuDoubleComplex, - A: *const cuDoubleComplex, - lda: usize, - B: *mut cuDoubleComplex, - ldb: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtSsymm( - handle: cublasXtHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - m: usize, - n: usize, - alpha: *const f32, - A: *const f32, - lda: usize, - B: *const f32, - ldb: usize, - beta: *const f32, - C: *mut f32, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtDsymm( - handle: cublasXtHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - m: usize, - n: usize, - alpha: *const f64, - A: *const f64, - lda: usize, - B: *const f64, - ldb: usize, - beta: *const f64, - C: *mut f64, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtCsymm( - handle: cublasXtHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - m: usize, - n: usize, - alpha: *const cuComplex, - A: *const cuComplex, - lda: usize, - B: *const cuComplex, - ldb: usize, - beta: *const cuComplex, - C: *mut cuComplex, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtZsymm( - handle: cublasXtHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - m: usize, - n: usize, - alpha: *const cuDoubleComplex, - A: *const cuDoubleComplex, - lda: usize, - B: *const cuDoubleComplex, - ldb: usize, - beta: *const cuDoubleComplex, - C: *mut cuDoubleComplex, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtChemm( - handle: cublasXtHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - m: usize, - n: usize, - alpha: *const cuComplex, - A: *const cuComplex, - lda: usize, - B: *const cuComplex, - ldb: usize, - beta: *const cuComplex, - C: *mut cuComplex, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtZhemm( - handle: cublasXtHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - m: usize, - n: usize, - alpha: *const cuDoubleComplex, - A: *const cuDoubleComplex, - lda: usize, - B: *const cuDoubleComplex, - ldb: usize, - beta: *const cuDoubleComplex, - C: *mut cuDoubleComplex, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtSsyrkx( - handle: cublasXtHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: usize, - k: usize, - alpha: *const f32, - A: *const f32, - lda: usize, - B: *const f32, - ldb: usize, - beta: *const f32, - C: *mut f32, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtDsyrkx( - handle: cublasXtHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: usize, - k: usize, - alpha: *const f64, - A: *const f64, - lda: usize, - B: *const f64, - ldb: usize, - beta: *const f64, - C: *mut f64, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtCsyrkx( - handle: cublasXtHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: usize, - k: usize, - alpha: *const cuComplex, - A: *const cuComplex, - lda: usize, - B: *const cuComplex, - ldb: usize, - beta: *const cuComplex, - C: *mut cuComplex, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtZsyrkx( - handle: cublasXtHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: usize, - k: usize, - alpha: *const cuDoubleComplex, - A: *const cuDoubleComplex, - lda: usize, - B: *const cuDoubleComplex, - ldb: usize, - beta: *const cuDoubleComplex, - C: *mut cuDoubleComplex, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtCher2k( - handle: cublasXtHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: usize, - k: usize, - alpha: *const cuComplex, - A: *const cuComplex, - lda: usize, - B: *const cuComplex, - ldb: usize, - beta: *const f32, - C: *mut cuComplex, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtZher2k( - handle: cublasXtHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: usize, - k: usize, - alpha: *const cuDoubleComplex, - A: *const cuDoubleComplex, - lda: usize, - B: *const cuDoubleComplex, - ldb: usize, - beta: *const f64, - C: *mut cuDoubleComplex, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtSspmm( - handle: cublasXtHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - m: usize, - n: usize, - alpha: *const f32, - AP: *const f32, - B: *const f32, - ldb: usize, - beta: *const f32, - C: *mut f32, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtDspmm( - handle: cublasXtHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - m: usize, - n: usize, - alpha: *const f64, - AP: *const f64, - B: *const f64, - ldb: usize, - beta: *const f64, - C: *mut f64, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtCspmm( - handle: cublasXtHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - m: usize, - n: usize, - alpha: *const cuComplex, - AP: *const cuComplex, - B: *const cuComplex, - ldb: usize, - beta: *const cuComplex, - C: *mut cuComplex, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtZspmm( - handle: cublasXtHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - m: usize, - n: usize, - alpha: *const cuDoubleComplex, - AP: *const cuDoubleComplex, - B: *const cuDoubleComplex, - ldb: usize, - beta: *const cuDoubleComplex, - C: *mut cuDoubleComplex, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtStrmm( - handle: cublasXtHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - m: usize, - n: usize, - alpha: *const f32, - A: *const f32, - lda: usize, - B: *const f32, - ldb: usize, - C: *mut f32, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtDtrmm( - handle: cublasXtHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - m: usize, - n: usize, - alpha: *const f64, - A: *const f64, - lda: usize, - B: *const f64, - ldb: usize, - C: *mut f64, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtCtrmm( - handle: cublasXtHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - m: usize, - n: usize, - alpha: *const cuComplex, - A: *const cuComplex, - lda: usize, - B: *const cuComplex, - ldb: usize, - C: *mut cuComplex, - ldc: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXtZtrmm( - handle: cublasXtHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - m: usize, - n: usize, - alpha: *const cuDoubleComplex, - A: *const cuDoubleComplex, - lda: usize, - B: *const cuDoubleComplex, - ldb: usize, - C: *mut cuDoubleComplex, - ldc: usize, - ) -> cublasStatus_t; -} diff --git a/crates/cublas_sys/src/cublas_v2.rs b/crates/cublas_sys/src/cublas_v2.rs deleted file mode 100644 index e43d7525..00000000 --- a/crates/cublas_sys/src/cublas_v2.rs +++ /dev/null @@ -1,3779 +0,0 @@ -/* automatically generated by rust-bindgen 0.59.2 */ - -pub const CUBLAS_VER_MAJOR: u32 = 11; -pub const CUBLAS_VER_MINOR: u32 = 7; -pub const CUBLAS_VER_PATCH: u32 = 3; -pub const CUBLAS_VER_BUILD: u32 = 1; -pub const CUBLAS_VERSION: u32 = 11703; -#[repr(C)] -#[repr(align(8))] -#[derive(Debug, Copy, Clone)] -pub struct float2 { - pub x: f32, - pub y: f32, -} -#[repr(C)] -#[repr(align(16))] -#[derive(Debug, Copy, Clone)] -pub struct double2 { - pub x: f64, - pub y: f64, -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct CUstream_st { - _unused: [u8; 0], -} -pub type cudaStream_t = *mut CUstream_st; -pub type cuFloatComplex = float2; -pub type cuDoubleComplex = double2; -pub type cuComplex = cuFloatComplex; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudaDataType_t { - CUDA_R_16F = 2, - CUDA_C_16F = 6, - CUDA_R_16BF = 14, - CUDA_C_16BF = 15, - CUDA_R_32F = 0, - CUDA_C_32F = 4, - CUDA_R_64F = 1, - CUDA_C_64F = 5, - CUDA_R_4I = 16, - CUDA_C_4I = 17, - CUDA_R_4U = 18, - CUDA_C_4U = 19, - CUDA_R_8I = 3, - CUDA_C_8I = 7, - CUDA_R_8U = 8, - CUDA_C_8U = 9, - CUDA_R_16I = 20, - CUDA_C_16I = 21, - CUDA_R_16U = 22, - CUDA_C_16U = 23, - CUDA_R_32I = 10, - CUDA_C_32I = 11, - CUDA_R_32U = 12, - CUDA_C_32U = 13, - CUDA_R_64I = 24, - CUDA_C_64I = 25, - CUDA_R_64U = 26, - CUDA_C_64U = 27, -} -pub use self::cudaDataType_t as cudaDataType; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum libraryPropertyType_t { - MAJOR_VERSION = 0, - MINOR_VERSION = 1, - PATCH_LEVEL = 2, -} -pub use self::libraryPropertyType_t as libraryPropertyType; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasStatus_t { - CUBLAS_STATUS_SUCCESS = 0, - CUBLAS_STATUS_NOT_INITIALIZED = 1, - CUBLAS_STATUS_ALLOC_FAILED = 3, - CUBLAS_STATUS_INVALID_VALUE = 7, - CUBLAS_STATUS_ARCH_MISMATCH = 8, - CUBLAS_STATUS_MAPPING_ERROR = 11, - CUBLAS_STATUS_EXECUTION_FAILED = 13, - CUBLAS_STATUS_INTERNAL_ERROR = 14, - CUBLAS_STATUS_NOT_SUPPORTED = 15, - CUBLAS_STATUS_LICENSE_ERROR = 16, -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasFillMode_t { - CUBLAS_FILL_MODE_LOWER = 0, - CUBLAS_FILL_MODE_UPPER = 1, - CUBLAS_FILL_MODE_FULL = 2, -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasDiagType_t { - CUBLAS_DIAG_NON_UNIT = 0, - CUBLAS_DIAG_UNIT = 1, -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasSideMode_t { - CUBLAS_SIDE_LEFT = 0, - CUBLAS_SIDE_RIGHT = 1, -} -impl cublasOperation_t { - pub const CUBLAS_OP_HERMITAN: cublasOperation_t = cublasOperation_t::CUBLAS_OP_C; -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasOperation_t { - CUBLAS_OP_N = 0, - CUBLAS_OP_T = 1, - CUBLAS_OP_C = 2, - CUBLAS_OP_CONJG = 3, -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasPointerMode_t { - CUBLAS_POINTER_MODE_HOST = 0, - CUBLAS_POINTER_MODE_DEVICE = 1, -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasAtomicsMode_t { - CUBLAS_ATOMICS_NOT_ALLOWED = 0, - CUBLAS_ATOMICS_ALLOWED = 1, -} -impl cublasGemmAlgo_t { - pub const CUBLAS_GEMM_DEFAULT: cublasGemmAlgo_t = cublasGemmAlgo_t::CUBLAS_GEMM_DFALT; -} -impl cublasGemmAlgo_t { - pub const CUBLAS_GEMM_DFALT_TENSOR_OP: cublasGemmAlgo_t = - cublasGemmAlgo_t::CUBLAS_GEMM_DEFAULT_TENSOR_OP; -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasGemmAlgo_t { - CUBLAS_GEMM_DFALT = -1, - CUBLAS_GEMM_ALGO0 = 0, - CUBLAS_GEMM_ALGO1 = 1, - CUBLAS_GEMM_ALGO2 = 2, - CUBLAS_GEMM_ALGO3 = 3, - CUBLAS_GEMM_ALGO4 = 4, - CUBLAS_GEMM_ALGO5 = 5, - CUBLAS_GEMM_ALGO6 = 6, - CUBLAS_GEMM_ALGO7 = 7, - CUBLAS_GEMM_ALGO8 = 8, - CUBLAS_GEMM_ALGO9 = 9, - CUBLAS_GEMM_ALGO10 = 10, - CUBLAS_GEMM_ALGO11 = 11, - CUBLAS_GEMM_ALGO12 = 12, - CUBLAS_GEMM_ALGO13 = 13, - CUBLAS_GEMM_ALGO14 = 14, - CUBLAS_GEMM_ALGO15 = 15, - CUBLAS_GEMM_ALGO16 = 16, - CUBLAS_GEMM_ALGO17 = 17, - CUBLAS_GEMM_ALGO18 = 18, - CUBLAS_GEMM_ALGO19 = 19, - CUBLAS_GEMM_ALGO20 = 20, - CUBLAS_GEMM_ALGO21 = 21, - CUBLAS_GEMM_ALGO22 = 22, - CUBLAS_GEMM_ALGO23 = 23, - CUBLAS_GEMM_DEFAULT_TENSOR_OP = 99, - CUBLAS_GEMM_ALGO0_TENSOR_OP = 100, - CUBLAS_GEMM_ALGO1_TENSOR_OP = 101, - CUBLAS_GEMM_ALGO2_TENSOR_OP = 102, - CUBLAS_GEMM_ALGO3_TENSOR_OP = 103, - CUBLAS_GEMM_ALGO4_TENSOR_OP = 104, - CUBLAS_GEMM_ALGO5_TENSOR_OP = 105, - CUBLAS_GEMM_ALGO6_TENSOR_OP = 106, - CUBLAS_GEMM_ALGO7_TENSOR_OP = 107, - CUBLAS_GEMM_ALGO8_TENSOR_OP = 108, - CUBLAS_GEMM_ALGO9_TENSOR_OP = 109, - CUBLAS_GEMM_ALGO10_TENSOR_OP = 110, - CUBLAS_GEMM_ALGO11_TENSOR_OP = 111, - CUBLAS_GEMM_ALGO12_TENSOR_OP = 112, - CUBLAS_GEMM_ALGO13_TENSOR_OP = 113, - CUBLAS_GEMM_ALGO14_TENSOR_OP = 114, - CUBLAS_GEMM_ALGO15_TENSOR_OP = 115, -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasMath_t { - CUBLAS_DEFAULT_MATH = 0, - CUBLAS_TENSOR_OP_MATH = 1, - CUBLAS_PEDANTIC_MATH = 2, - CUBLAS_TF32_TENSOR_OP_MATH = 3, - CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION = 16, -} -pub use self::cudaDataType as cublasDataType_t; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cublasComputeType_t { - CUBLAS_COMPUTE_16F = 64, - CUBLAS_COMPUTE_16F_PEDANTIC = 65, - CUBLAS_COMPUTE_32F = 68, - CUBLAS_COMPUTE_32F_PEDANTIC = 69, - CUBLAS_COMPUTE_32F_FAST_16F = 74, - CUBLAS_COMPUTE_32F_FAST_16BF = 75, - CUBLAS_COMPUTE_32F_FAST_TF32 = 77, - CUBLAS_COMPUTE_64F = 70, - CUBLAS_COMPUTE_64F_PEDANTIC = 71, - CUBLAS_COMPUTE_32I = 72, - CUBLAS_COMPUTE_32I_PEDANTIC = 73, -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cublasContext { - _unused: [u8; 0], -} -pub type cublasHandle_t = *mut cublasContext; -extern "C" { - pub fn cublasCreate_v2(handle: *mut cublasHandle_t) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDestroy_v2(handle: cublasHandle_t) -> cublasStatus_t; -} -extern "C" { - pub fn cublasGetVersion_v2( - handle: cublasHandle_t, - version: *mut ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasGetProperty( - type_: libraryPropertyType, - value: *mut ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasGetCudartVersion() -> usize; -} -extern "C" { - pub fn cublasSetWorkspace_v2( - handle: cublasHandle_t, - workspace: *mut ::std::os::raw::c_void, - workspaceSizeInBytes: usize, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSetStream_v2(handle: cublasHandle_t, streamId: cudaStream_t) -> cublasStatus_t; -} -extern "C" { - pub fn cublasGetStream_v2( - handle: cublasHandle_t, - streamId: *mut cudaStream_t, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasGetPointerMode_v2( - handle: cublasHandle_t, - mode: *mut cublasPointerMode_t, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSetPointerMode_v2( - handle: cublasHandle_t, - mode: cublasPointerMode_t, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasGetAtomicsMode( - handle: cublasHandle_t, - mode: *mut cublasAtomicsMode_t, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSetAtomicsMode( - handle: cublasHandle_t, - mode: cublasAtomicsMode_t, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasGetMathMode(handle: cublasHandle_t, mode: *mut cublasMath_t) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSetMathMode(handle: cublasHandle_t, mode: cublasMath_t) -> cublasStatus_t; -} -extern "C" { - pub fn cublasGetSmCountTarget( - handle: cublasHandle_t, - smCountTarget: *mut ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSetSmCountTarget( - handle: cublasHandle_t, - smCountTarget: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasGetStatusName(status: cublasStatus_t) -> *const ::std::os::raw::c_char; -} -extern "C" { - pub fn cublasGetStatusString(status: cublasStatus_t) -> *const ::std::os::raw::c_char; -} -pub type cublasLogCallback = - ::std::option::Option; -extern "C" { - pub fn cublasLoggerConfigure( - logIsOn: ::std::os::raw::c_int, - logToStdOut: ::std::os::raw::c_int, - logToStdErr: ::std::os::raw::c_int, - logFileName: *const ::std::os::raw::c_char, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSetLoggerCallback(userCallback: cublasLogCallback) -> cublasStatus_t; -} -extern "C" { - pub fn cublasGetLoggerCallback(userCallback: *mut cublasLogCallback) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSetVector( - n: ::std::os::raw::c_int, - elemSize: ::std::os::raw::c_int, - x: *const ::std::os::raw::c_void, - incx: ::std::os::raw::c_int, - devicePtr: *mut ::std::os::raw::c_void, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasGetVector( - n: ::std::os::raw::c_int, - elemSize: ::std::os::raw::c_int, - x: *const ::std::os::raw::c_void, - incx: ::std::os::raw::c_int, - y: *mut ::std::os::raw::c_void, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSetMatrix( - rows: ::std::os::raw::c_int, - cols: ::std::os::raw::c_int, - elemSize: ::std::os::raw::c_int, - A: *const ::std::os::raw::c_void, - lda: ::std::os::raw::c_int, - B: *mut ::std::os::raw::c_void, - ldb: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasGetMatrix( - rows: ::std::os::raw::c_int, - cols: ::std::os::raw::c_int, - elemSize: ::std::os::raw::c_int, - A: *const ::std::os::raw::c_void, - lda: ::std::os::raw::c_int, - B: *mut ::std::os::raw::c_void, - ldb: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSetVectorAsync( - n: ::std::os::raw::c_int, - elemSize: ::std::os::raw::c_int, - hostPtr: *const ::std::os::raw::c_void, - incx: ::std::os::raw::c_int, - devicePtr: *mut ::std::os::raw::c_void, - incy: ::std::os::raw::c_int, - stream: cudaStream_t, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasGetVectorAsync( - n: ::std::os::raw::c_int, - elemSize: ::std::os::raw::c_int, - devicePtr: *const ::std::os::raw::c_void, - incx: ::std::os::raw::c_int, - hostPtr: *mut ::std::os::raw::c_void, - incy: ::std::os::raw::c_int, - stream: cudaStream_t, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSetMatrixAsync( - rows: ::std::os::raw::c_int, - cols: ::std::os::raw::c_int, - elemSize: ::std::os::raw::c_int, - A: *const ::std::os::raw::c_void, - lda: ::std::os::raw::c_int, - B: *mut ::std::os::raw::c_void, - ldb: ::std::os::raw::c_int, - stream: cudaStream_t, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasGetMatrixAsync( - rows: ::std::os::raw::c_int, - cols: ::std::os::raw::c_int, - elemSize: ::std::os::raw::c_int, - A: *const ::std::os::raw::c_void, - lda: ::std::os::raw::c_int, - B: *mut ::std::os::raw::c_void, - ldb: ::std::os::raw::c_int, - stream: cudaStream_t, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasXerbla(srName: *const ::std::os::raw::c_char, info: ::std::os::raw::c_int); -} -extern "C" { - pub fn cublasNrm2Ex( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const ::std::os::raw::c_void, - xType: cudaDataType, - incx: ::std::os::raw::c_int, - result: *mut ::std::os::raw::c_void, - resultType: cudaDataType, - executionType: cudaDataType, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSnrm2_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const f32, - incx: ::std::os::raw::c_int, - result: *mut f32, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDnrm2_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const f64, - incx: ::std::os::raw::c_int, - result: *mut f64, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasScnrm2_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const cuComplex, - incx: ::std::os::raw::c_int, - result: *mut f32, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDznrm2_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const cuDoubleComplex, - incx: ::std::os::raw::c_int, - result: *mut f64, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDotEx( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const ::std::os::raw::c_void, - xType: cudaDataType, - incx: ::std::os::raw::c_int, - y: *const ::std::os::raw::c_void, - yType: cudaDataType, - incy: ::std::os::raw::c_int, - result: *mut ::std::os::raw::c_void, - resultType: cudaDataType, - executionType: cudaDataType, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDotcEx( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const ::std::os::raw::c_void, - xType: cudaDataType, - incx: ::std::os::raw::c_int, - y: *const ::std::os::raw::c_void, - yType: cudaDataType, - incy: ::std::os::raw::c_int, - result: *mut ::std::os::raw::c_void, - resultType: cudaDataType, - executionType: cudaDataType, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSdot_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const f32, - incx: ::std::os::raw::c_int, - y: *const f32, - incy: ::std::os::raw::c_int, - result: *mut f32, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDdot_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const f64, - incx: ::std::os::raw::c_int, - y: *const f64, - incy: ::std::os::raw::c_int, - result: *mut f64, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCdotu_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const cuComplex, - incx: ::std::os::raw::c_int, - y: *const cuComplex, - incy: ::std::os::raw::c_int, - result: *mut cuComplex, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCdotc_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const cuComplex, - incx: ::std::os::raw::c_int, - y: *const cuComplex, - incy: ::std::os::raw::c_int, - result: *mut cuComplex, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZdotu_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const cuDoubleComplex, - incx: ::std::os::raw::c_int, - y: *const cuDoubleComplex, - incy: ::std::os::raw::c_int, - result: *mut cuDoubleComplex, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZdotc_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const cuDoubleComplex, - incx: ::std::os::raw::c_int, - y: *const cuDoubleComplex, - incy: ::std::os::raw::c_int, - result: *mut cuDoubleComplex, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasScalEx( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - alpha: *const ::std::os::raw::c_void, - alphaType: cudaDataType, - x: *mut ::std::os::raw::c_void, - xType: cudaDataType, - incx: ::std::os::raw::c_int, - executionType: cudaDataType, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSscal_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - alpha: *const f32, - x: *mut f32, - incx: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDscal_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - alpha: *const f64, - x: *mut f64, - incx: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCscal_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - alpha: *const cuComplex, - x: *mut cuComplex, - incx: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCsscal_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - alpha: *const f32, - x: *mut cuComplex, - incx: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZscal_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - alpha: *const cuDoubleComplex, - x: *mut cuDoubleComplex, - incx: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZdscal_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - alpha: *const f64, - x: *mut cuDoubleComplex, - incx: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasAxpyEx( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - alpha: *const ::std::os::raw::c_void, - alphaType: cudaDataType, - x: *const ::std::os::raw::c_void, - xType: cudaDataType, - incx: ::std::os::raw::c_int, - y: *mut ::std::os::raw::c_void, - yType: cudaDataType, - incy: ::std::os::raw::c_int, - executiontype: cudaDataType, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSaxpy_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - alpha: *const f32, - x: *const f32, - incx: ::std::os::raw::c_int, - y: *mut f32, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDaxpy_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - alpha: *const f64, - x: *const f64, - incx: ::std::os::raw::c_int, - y: *mut f64, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCaxpy_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - alpha: *const cuComplex, - x: *const cuComplex, - incx: ::std::os::raw::c_int, - y: *mut cuComplex, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZaxpy_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - alpha: *const cuDoubleComplex, - x: *const cuDoubleComplex, - incx: ::std::os::raw::c_int, - y: *mut cuDoubleComplex, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCopyEx( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const ::std::os::raw::c_void, - xType: cudaDataType, - incx: ::std::os::raw::c_int, - y: *mut ::std::os::raw::c_void, - yType: cudaDataType, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasScopy_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const f32, - incx: ::std::os::raw::c_int, - y: *mut f32, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDcopy_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const f64, - incx: ::std::os::raw::c_int, - y: *mut f64, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCcopy_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const cuComplex, - incx: ::std::os::raw::c_int, - y: *mut cuComplex, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZcopy_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const cuDoubleComplex, - incx: ::std::os::raw::c_int, - y: *mut cuDoubleComplex, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSswap_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *mut f32, - incx: ::std::os::raw::c_int, - y: *mut f32, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDswap_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *mut f64, - incx: ::std::os::raw::c_int, - y: *mut f64, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCswap_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *mut cuComplex, - incx: ::std::os::raw::c_int, - y: *mut cuComplex, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZswap_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *mut cuDoubleComplex, - incx: ::std::os::raw::c_int, - y: *mut cuDoubleComplex, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSwapEx( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *mut ::std::os::raw::c_void, - xType: cudaDataType, - incx: ::std::os::raw::c_int, - y: *mut ::std::os::raw::c_void, - yType: cudaDataType, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasIsamax_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const f32, - incx: ::std::os::raw::c_int, - result: *mut ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasIdamax_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const f64, - incx: ::std::os::raw::c_int, - result: *mut ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasIcamax_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const cuComplex, - incx: ::std::os::raw::c_int, - result: *mut ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasIzamax_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const cuDoubleComplex, - incx: ::std::os::raw::c_int, - result: *mut ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasIamaxEx( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const ::std::os::raw::c_void, - xType: cudaDataType, - incx: ::std::os::raw::c_int, - result: *mut ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasIsamin_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const f32, - incx: ::std::os::raw::c_int, - result: *mut ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasIdamin_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const f64, - incx: ::std::os::raw::c_int, - result: *mut ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasIcamin_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const cuComplex, - incx: ::std::os::raw::c_int, - result: *mut ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasIzamin_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const cuDoubleComplex, - incx: ::std::os::raw::c_int, - result: *mut ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasIaminEx( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const ::std::os::raw::c_void, - xType: cudaDataType, - incx: ::std::os::raw::c_int, - result: *mut ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasAsumEx( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const ::std::os::raw::c_void, - xType: cudaDataType, - incx: ::std::os::raw::c_int, - result: *mut ::std::os::raw::c_void, - resultType: cudaDataType, - executiontype: cudaDataType, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSasum_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const f32, - incx: ::std::os::raw::c_int, - result: *mut f32, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDasum_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const f64, - incx: ::std::os::raw::c_int, - result: *mut f64, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasScasum_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const cuComplex, - incx: ::std::os::raw::c_int, - result: *mut f32, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDzasum_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *const cuDoubleComplex, - incx: ::std::os::raw::c_int, - result: *mut f64, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSrot_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *mut f32, - incx: ::std::os::raw::c_int, - y: *mut f32, - incy: ::std::os::raw::c_int, - c: *const f32, - s: *const f32, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDrot_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *mut f64, - incx: ::std::os::raw::c_int, - y: *mut f64, - incy: ::std::os::raw::c_int, - c: *const f64, - s: *const f64, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCrot_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *mut cuComplex, - incx: ::std::os::raw::c_int, - y: *mut cuComplex, - incy: ::std::os::raw::c_int, - c: *const f32, - s: *const cuComplex, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCsrot_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *mut cuComplex, - incx: ::std::os::raw::c_int, - y: *mut cuComplex, - incy: ::std::os::raw::c_int, - c: *const f32, - s: *const f32, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZrot_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *mut cuDoubleComplex, - incx: ::std::os::raw::c_int, - y: *mut cuDoubleComplex, - incy: ::std::os::raw::c_int, - c: *const f64, - s: *const cuDoubleComplex, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZdrot_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *mut cuDoubleComplex, - incx: ::std::os::raw::c_int, - y: *mut cuDoubleComplex, - incy: ::std::os::raw::c_int, - c: *const f64, - s: *const f64, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasRotEx( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *mut ::std::os::raw::c_void, - xType: cudaDataType, - incx: ::std::os::raw::c_int, - y: *mut ::std::os::raw::c_void, - yType: cudaDataType, - incy: ::std::os::raw::c_int, - c: *const ::std::os::raw::c_void, - s: *const ::std::os::raw::c_void, - csType: cudaDataType, - executiontype: cudaDataType, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSrotg_v2( - handle: cublasHandle_t, - a: *mut f32, - b: *mut f32, - c: *mut f32, - s: *mut f32, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDrotg_v2( - handle: cublasHandle_t, - a: *mut f64, - b: *mut f64, - c: *mut f64, - s: *mut f64, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCrotg_v2( - handle: cublasHandle_t, - a: *mut cuComplex, - b: *mut cuComplex, - c: *mut f32, - s: *mut cuComplex, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZrotg_v2( - handle: cublasHandle_t, - a: *mut cuDoubleComplex, - b: *mut cuDoubleComplex, - c: *mut f64, - s: *mut cuDoubleComplex, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasRotgEx( - handle: cublasHandle_t, - a: *mut ::std::os::raw::c_void, - b: *mut ::std::os::raw::c_void, - abType: cudaDataType, - c: *mut ::std::os::raw::c_void, - s: *mut ::std::os::raw::c_void, - csType: cudaDataType, - executiontype: cudaDataType, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSrotm_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *mut f32, - incx: ::std::os::raw::c_int, - y: *mut f32, - incy: ::std::os::raw::c_int, - param: *const f32, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDrotm_v2( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *mut f64, - incx: ::std::os::raw::c_int, - y: *mut f64, - incy: ::std::os::raw::c_int, - param: *const f64, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasRotmEx( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - x: *mut ::std::os::raw::c_void, - xType: cudaDataType, - incx: ::std::os::raw::c_int, - y: *mut ::std::os::raw::c_void, - yType: cudaDataType, - incy: ::std::os::raw::c_int, - param: *const ::std::os::raw::c_void, - paramType: cudaDataType, - executiontype: cudaDataType, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSrotmg_v2( - handle: cublasHandle_t, - d1: *mut f32, - d2: *mut f32, - x1: *mut f32, - y1: *const f32, - param: *mut f32, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDrotmg_v2( - handle: cublasHandle_t, - d1: *mut f64, - d2: *mut f64, - x1: *mut f64, - y1: *const f64, - param: *mut f64, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasRotmgEx( - handle: cublasHandle_t, - d1: *mut ::std::os::raw::c_void, - d1Type: cudaDataType, - d2: *mut ::std::os::raw::c_void, - d2Type: cudaDataType, - x1: *mut ::std::os::raw::c_void, - x1Type: cudaDataType, - y1: *const ::std::os::raw::c_void, - y1Type: cudaDataType, - param: *mut ::std::os::raw::c_void, - paramType: cudaDataType, - executiontype: cudaDataType, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSgemv_v2( - handle: cublasHandle_t, - trans: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const f32, - A: *const f32, - lda: ::std::os::raw::c_int, - x: *const f32, - incx: ::std::os::raw::c_int, - beta: *const f32, - y: *mut f32, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDgemv_v2( - handle: cublasHandle_t, - trans: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const f64, - A: *const f64, - lda: ::std::os::raw::c_int, - x: *const f64, - incx: ::std::os::raw::c_int, - beta: *const f64, - y: *mut f64, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCgemv_v2( - handle: cublasHandle_t, - trans: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const cuComplex, - A: *const cuComplex, - lda: ::std::os::raw::c_int, - x: *const cuComplex, - incx: ::std::os::raw::c_int, - beta: *const cuComplex, - y: *mut cuComplex, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZgemv_v2( - handle: cublasHandle_t, - trans: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const cuDoubleComplex, - A: *const cuDoubleComplex, - lda: ::std::os::raw::c_int, - x: *const cuDoubleComplex, - incx: ::std::os::raw::c_int, - beta: *const cuDoubleComplex, - y: *mut cuDoubleComplex, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSgbmv_v2( - handle: cublasHandle_t, - trans: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - kl: ::std::os::raw::c_int, - ku: ::std::os::raw::c_int, - alpha: *const f32, - A: *const f32, - lda: ::std::os::raw::c_int, - x: *const f32, - incx: ::std::os::raw::c_int, - beta: *const f32, - y: *mut f32, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDgbmv_v2( - handle: cublasHandle_t, - trans: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - kl: ::std::os::raw::c_int, - ku: ::std::os::raw::c_int, - alpha: *const f64, - A: *const f64, - lda: ::std::os::raw::c_int, - x: *const f64, - incx: ::std::os::raw::c_int, - beta: *const f64, - y: *mut f64, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCgbmv_v2( - handle: cublasHandle_t, - trans: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - kl: ::std::os::raw::c_int, - ku: ::std::os::raw::c_int, - alpha: *const cuComplex, - A: *const cuComplex, - lda: ::std::os::raw::c_int, - x: *const cuComplex, - incx: ::std::os::raw::c_int, - beta: *const cuComplex, - y: *mut cuComplex, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZgbmv_v2( - handle: cublasHandle_t, - trans: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - kl: ::std::os::raw::c_int, - ku: ::std::os::raw::c_int, - alpha: *const cuDoubleComplex, - A: *const cuDoubleComplex, - lda: ::std::os::raw::c_int, - x: *const cuDoubleComplex, - incx: ::std::os::raw::c_int, - beta: *const cuDoubleComplex, - y: *mut cuDoubleComplex, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasStrmv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - n: ::std::os::raw::c_int, - A: *const f32, - lda: ::std::os::raw::c_int, - x: *mut f32, - incx: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDtrmv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - n: ::std::os::raw::c_int, - A: *const f64, - lda: ::std::os::raw::c_int, - x: *mut f64, - incx: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCtrmv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - n: ::std::os::raw::c_int, - A: *const cuComplex, - lda: ::std::os::raw::c_int, - x: *mut cuComplex, - incx: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZtrmv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - n: ::std::os::raw::c_int, - A: *const cuDoubleComplex, - lda: ::std::os::raw::c_int, - x: *mut cuDoubleComplex, - incx: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasStbmv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - A: *const f32, - lda: ::std::os::raw::c_int, - x: *mut f32, - incx: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDtbmv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - A: *const f64, - lda: ::std::os::raw::c_int, - x: *mut f64, - incx: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCtbmv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - A: *const cuComplex, - lda: ::std::os::raw::c_int, - x: *mut cuComplex, - incx: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZtbmv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - A: *const cuDoubleComplex, - lda: ::std::os::raw::c_int, - x: *mut cuDoubleComplex, - incx: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasStpmv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - n: ::std::os::raw::c_int, - AP: *const f32, - x: *mut f32, - incx: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDtpmv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - n: ::std::os::raw::c_int, - AP: *const f64, - x: *mut f64, - incx: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCtpmv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - n: ::std::os::raw::c_int, - AP: *const cuComplex, - x: *mut cuComplex, - incx: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZtpmv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - n: ::std::os::raw::c_int, - AP: *const cuDoubleComplex, - x: *mut cuDoubleComplex, - incx: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasStrsv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - n: ::std::os::raw::c_int, - A: *const f32, - lda: ::std::os::raw::c_int, - x: *mut f32, - incx: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDtrsv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - n: ::std::os::raw::c_int, - A: *const f64, - lda: ::std::os::raw::c_int, - x: *mut f64, - incx: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCtrsv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - n: ::std::os::raw::c_int, - A: *const cuComplex, - lda: ::std::os::raw::c_int, - x: *mut cuComplex, - incx: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZtrsv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - n: ::std::os::raw::c_int, - A: *const cuDoubleComplex, - lda: ::std::os::raw::c_int, - x: *mut cuDoubleComplex, - incx: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasStpsv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - n: ::std::os::raw::c_int, - AP: *const f32, - x: *mut f32, - incx: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDtpsv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - n: ::std::os::raw::c_int, - AP: *const f64, - x: *mut f64, - incx: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCtpsv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - n: ::std::os::raw::c_int, - AP: *const cuComplex, - x: *mut cuComplex, - incx: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZtpsv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - n: ::std::os::raw::c_int, - AP: *const cuDoubleComplex, - x: *mut cuDoubleComplex, - incx: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasStbsv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - A: *const f32, - lda: ::std::os::raw::c_int, - x: *mut f32, - incx: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDtbsv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - A: *const f64, - lda: ::std::os::raw::c_int, - x: *mut f64, - incx: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCtbsv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - A: *const cuComplex, - lda: ::std::os::raw::c_int, - x: *mut cuComplex, - incx: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZtbsv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - A: *const cuDoubleComplex, - lda: ::std::os::raw::c_int, - x: *mut cuDoubleComplex, - incx: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSsymv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - alpha: *const f32, - A: *const f32, - lda: ::std::os::raw::c_int, - x: *const f32, - incx: ::std::os::raw::c_int, - beta: *const f32, - y: *mut f32, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDsymv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - alpha: *const f64, - A: *const f64, - lda: ::std::os::raw::c_int, - x: *const f64, - incx: ::std::os::raw::c_int, - beta: *const f64, - y: *mut f64, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCsymv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - alpha: *const cuComplex, - A: *const cuComplex, - lda: ::std::os::raw::c_int, - x: *const cuComplex, - incx: ::std::os::raw::c_int, - beta: *const cuComplex, - y: *mut cuComplex, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZsymv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - alpha: *const cuDoubleComplex, - A: *const cuDoubleComplex, - lda: ::std::os::raw::c_int, - x: *const cuDoubleComplex, - incx: ::std::os::raw::c_int, - beta: *const cuDoubleComplex, - y: *mut cuDoubleComplex, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasChemv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - alpha: *const cuComplex, - A: *const cuComplex, - lda: ::std::os::raw::c_int, - x: *const cuComplex, - incx: ::std::os::raw::c_int, - beta: *const cuComplex, - y: *mut cuComplex, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZhemv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - alpha: *const cuDoubleComplex, - A: *const cuDoubleComplex, - lda: ::std::os::raw::c_int, - x: *const cuDoubleComplex, - incx: ::std::os::raw::c_int, - beta: *const cuDoubleComplex, - y: *mut cuDoubleComplex, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSsbmv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const f32, - A: *const f32, - lda: ::std::os::raw::c_int, - x: *const f32, - incx: ::std::os::raw::c_int, - beta: *const f32, - y: *mut f32, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDsbmv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const f64, - A: *const f64, - lda: ::std::os::raw::c_int, - x: *const f64, - incx: ::std::os::raw::c_int, - beta: *const f64, - y: *mut f64, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasChbmv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const cuComplex, - A: *const cuComplex, - lda: ::std::os::raw::c_int, - x: *const cuComplex, - incx: ::std::os::raw::c_int, - beta: *const cuComplex, - y: *mut cuComplex, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZhbmv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const cuDoubleComplex, - A: *const cuDoubleComplex, - lda: ::std::os::raw::c_int, - x: *const cuDoubleComplex, - incx: ::std::os::raw::c_int, - beta: *const cuDoubleComplex, - y: *mut cuDoubleComplex, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSspmv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - alpha: *const f32, - AP: *const f32, - x: *const f32, - incx: ::std::os::raw::c_int, - beta: *const f32, - y: *mut f32, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDspmv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - alpha: *const f64, - AP: *const f64, - x: *const f64, - incx: ::std::os::raw::c_int, - beta: *const f64, - y: *mut f64, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasChpmv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - alpha: *const cuComplex, - AP: *const cuComplex, - x: *const cuComplex, - incx: ::std::os::raw::c_int, - beta: *const cuComplex, - y: *mut cuComplex, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZhpmv_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - alpha: *const cuDoubleComplex, - AP: *const cuDoubleComplex, - x: *const cuDoubleComplex, - incx: ::std::os::raw::c_int, - beta: *const cuDoubleComplex, - y: *mut cuDoubleComplex, - incy: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSger_v2( - handle: cublasHandle_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const f32, - x: *const f32, - incx: ::std::os::raw::c_int, - y: *const f32, - incy: ::std::os::raw::c_int, - A: *mut f32, - lda: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDger_v2( - handle: cublasHandle_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const f64, - x: *const f64, - incx: ::std::os::raw::c_int, - y: *const f64, - incy: ::std::os::raw::c_int, - A: *mut f64, - lda: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCgeru_v2( - handle: cublasHandle_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const cuComplex, - x: *const cuComplex, - incx: ::std::os::raw::c_int, - y: *const cuComplex, - incy: ::std::os::raw::c_int, - A: *mut cuComplex, - lda: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCgerc_v2( - handle: cublasHandle_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const cuComplex, - x: *const cuComplex, - incx: ::std::os::raw::c_int, - y: *const cuComplex, - incy: ::std::os::raw::c_int, - A: *mut cuComplex, - lda: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZgeru_v2( - handle: cublasHandle_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const cuDoubleComplex, - x: *const cuDoubleComplex, - incx: ::std::os::raw::c_int, - y: *const cuDoubleComplex, - incy: ::std::os::raw::c_int, - A: *mut cuDoubleComplex, - lda: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZgerc_v2( - handle: cublasHandle_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const cuDoubleComplex, - x: *const cuDoubleComplex, - incx: ::std::os::raw::c_int, - y: *const cuDoubleComplex, - incy: ::std::os::raw::c_int, - A: *mut cuDoubleComplex, - lda: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSsyr_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - alpha: *const f32, - x: *const f32, - incx: ::std::os::raw::c_int, - A: *mut f32, - lda: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDsyr_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - alpha: *const f64, - x: *const f64, - incx: ::std::os::raw::c_int, - A: *mut f64, - lda: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCsyr_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - alpha: *const cuComplex, - x: *const cuComplex, - incx: ::std::os::raw::c_int, - A: *mut cuComplex, - lda: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZsyr_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - alpha: *const cuDoubleComplex, - x: *const cuDoubleComplex, - incx: ::std::os::raw::c_int, - A: *mut cuDoubleComplex, - lda: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCher_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - alpha: *const f32, - x: *const cuComplex, - incx: ::std::os::raw::c_int, - A: *mut cuComplex, - lda: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZher_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - alpha: *const f64, - x: *const cuDoubleComplex, - incx: ::std::os::raw::c_int, - A: *mut cuDoubleComplex, - lda: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSspr_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - alpha: *const f32, - x: *const f32, - incx: ::std::os::raw::c_int, - AP: *mut f32, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDspr_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - alpha: *const f64, - x: *const f64, - incx: ::std::os::raw::c_int, - AP: *mut f64, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasChpr_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - alpha: *const f32, - x: *const cuComplex, - incx: ::std::os::raw::c_int, - AP: *mut cuComplex, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZhpr_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - alpha: *const f64, - x: *const cuDoubleComplex, - incx: ::std::os::raw::c_int, - AP: *mut cuDoubleComplex, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSsyr2_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - alpha: *const f32, - x: *const f32, - incx: ::std::os::raw::c_int, - y: *const f32, - incy: ::std::os::raw::c_int, - A: *mut f32, - lda: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDsyr2_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - alpha: *const f64, - x: *const f64, - incx: ::std::os::raw::c_int, - y: *const f64, - incy: ::std::os::raw::c_int, - A: *mut f64, - lda: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCsyr2_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - alpha: *const cuComplex, - x: *const cuComplex, - incx: ::std::os::raw::c_int, - y: *const cuComplex, - incy: ::std::os::raw::c_int, - A: *mut cuComplex, - lda: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZsyr2_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - alpha: *const cuDoubleComplex, - x: *const cuDoubleComplex, - incx: ::std::os::raw::c_int, - y: *const cuDoubleComplex, - incy: ::std::os::raw::c_int, - A: *mut cuDoubleComplex, - lda: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCher2_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - alpha: *const cuComplex, - x: *const cuComplex, - incx: ::std::os::raw::c_int, - y: *const cuComplex, - incy: ::std::os::raw::c_int, - A: *mut cuComplex, - lda: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZher2_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - alpha: *const cuDoubleComplex, - x: *const cuDoubleComplex, - incx: ::std::os::raw::c_int, - y: *const cuDoubleComplex, - incy: ::std::os::raw::c_int, - A: *mut cuDoubleComplex, - lda: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSspr2_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - alpha: *const f32, - x: *const f32, - incx: ::std::os::raw::c_int, - y: *const f32, - incy: ::std::os::raw::c_int, - AP: *mut f32, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDspr2_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - alpha: *const f64, - x: *const f64, - incx: ::std::os::raw::c_int, - y: *const f64, - incy: ::std::os::raw::c_int, - AP: *mut f64, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasChpr2_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - alpha: *const cuComplex, - x: *const cuComplex, - incx: ::std::os::raw::c_int, - y: *const cuComplex, - incy: ::std::os::raw::c_int, - AP: *mut cuComplex, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZhpr2_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - alpha: *const cuDoubleComplex, - x: *const cuDoubleComplex, - incx: ::std::os::raw::c_int, - y: *const cuDoubleComplex, - incy: ::std::os::raw::c_int, - AP: *mut cuDoubleComplex, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSgemm_v2( - handle: cublasHandle_t, - transa: cublasOperation_t, - transb: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const f32, - A: *const f32, - lda: ::std::os::raw::c_int, - B: *const f32, - ldb: ::std::os::raw::c_int, - beta: *const f32, - C: *mut f32, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDgemm_v2( - handle: cublasHandle_t, - transa: cublasOperation_t, - transb: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const f64, - A: *const f64, - lda: ::std::os::raw::c_int, - B: *const f64, - ldb: ::std::os::raw::c_int, - beta: *const f64, - C: *mut f64, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCgemm_v2( - handle: cublasHandle_t, - transa: cublasOperation_t, - transb: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const cuComplex, - A: *const cuComplex, - lda: ::std::os::raw::c_int, - B: *const cuComplex, - ldb: ::std::os::raw::c_int, - beta: *const cuComplex, - C: *mut cuComplex, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCgemm3m( - handle: cublasHandle_t, - transa: cublasOperation_t, - transb: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const cuComplex, - A: *const cuComplex, - lda: ::std::os::raw::c_int, - B: *const cuComplex, - ldb: ::std::os::raw::c_int, - beta: *const cuComplex, - C: *mut cuComplex, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCgemm3mEx( - handle: cublasHandle_t, - transa: cublasOperation_t, - transb: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const cuComplex, - A: *const ::std::os::raw::c_void, - Atype: cudaDataType, - lda: ::std::os::raw::c_int, - B: *const ::std::os::raw::c_void, - Btype: cudaDataType, - ldb: ::std::os::raw::c_int, - beta: *const cuComplex, - C: *mut ::std::os::raw::c_void, - Ctype: cudaDataType, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZgemm_v2( - handle: cublasHandle_t, - transa: cublasOperation_t, - transb: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const cuDoubleComplex, - A: *const cuDoubleComplex, - lda: ::std::os::raw::c_int, - B: *const cuDoubleComplex, - ldb: ::std::os::raw::c_int, - beta: *const cuDoubleComplex, - C: *mut cuDoubleComplex, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZgemm3m( - handle: cublasHandle_t, - transa: cublasOperation_t, - transb: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const cuDoubleComplex, - A: *const cuDoubleComplex, - lda: ::std::os::raw::c_int, - B: *const cuDoubleComplex, - ldb: ::std::os::raw::c_int, - beta: *const cuDoubleComplex, - C: *mut cuDoubleComplex, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSgemmEx( - handle: cublasHandle_t, - transa: cublasOperation_t, - transb: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const f32, - A: *const ::std::os::raw::c_void, - Atype: cudaDataType, - lda: ::std::os::raw::c_int, - B: *const ::std::os::raw::c_void, - Btype: cudaDataType, - ldb: ::std::os::raw::c_int, - beta: *const f32, - C: *mut ::std::os::raw::c_void, - Ctype: cudaDataType, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasGemmEx( - handle: cublasHandle_t, - transa: cublasOperation_t, - transb: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const ::std::os::raw::c_void, - A: *const ::std::os::raw::c_void, - Atype: cudaDataType, - lda: ::std::os::raw::c_int, - B: *const ::std::os::raw::c_void, - Btype: cudaDataType, - ldb: ::std::os::raw::c_int, - beta: *const ::std::os::raw::c_void, - C: *mut ::std::os::raw::c_void, - Ctype: cudaDataType, - ldc: ::std::os::raw::c_int, - computeType: cublasComputeType_t, - algo: cublasGemmAlgo_t, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCgemmEx( - handle: cublasHandle_t, - transa: cublasOperation_t, - transb: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const cuComplex, - A: *const ::std::os::raw::c_void, - Atype: cudaDataType, - lda: ::std::os::raw::c_int, - B: *const ::std::os::raw::c_void, - Btype: cudaDataType, - ldb: ::std::os::raw::c_int, - beta: *const cuComplex, - C: *mut ::std::os::raw::c_void, - Ctype: cudaDataType, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasUint8gemmBias( - handle: cublasHandle_t, - transa: cublasOperation_t, - transb: cublasOperation_t, - transc: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - A: *const ::std::os::raw::c_uchar, - A_bias: ::std::os::raw::c_int, - lda: ::std::os::raw::c_int, - B: *const ::std::os::raw::c_uchar, - B_bias: ::std::os::raw::c_int, - ldb: ::std::os::raw::c_int, - C: *mut ::std::os::raw::c_uchar, - C_bias: ::std::os::raw::c_int, - ldc: ::std::os::raw::c_int, - C_mult: ::std::os::raw::c_int, - C_shift: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSsyrk_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const f32, - A: *const f32, - lda: ::std::os::raw::c_int, - beta: *const f32, - C: *mut f32, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDsyrk_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const f64, - A: *const f64, - lda: ::std::os::raw::c_int, - beta: *const f64, - C: *mut f64, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCsyrk_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const cuComplex, - A: *const cuComplex, - lda: ::std::os::raw::c_int, - beta: *const cuComplex, - C: *mut cuComplex, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZsyrk_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const cuDoubleComplex, - A: *const cuDoubleComplex, - lda: ::std::os::raw::c_int, - beta: *const cuDoubleComplex, - C: *mut cuDoubleComplex, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCsyrkEx( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const cuComplex, - A: *const ::std::os::raw::c_void, - Atype: cudaDataType, - lda: ::std::os::raw::c_int, - beta: *const cuComplex, - C: *mut ::std::os::raw::c_void, - Ctype: cudaDataType, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCsyrk3mEx( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const cuComplex, - A: *const ::std::os::raw::c_void, - Atype: cudaDataType, - lda: ::std::os::raw::c_int, - beta: *const cuComplex, - C: *mut ::std::os::raw::c_void, - Ctype: cudaDataType, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCherk_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const f32, - A: *const cuComplex, - lda: ::std::os::raw::c_int, - beta: *const f32, - C: *mut cuComplex, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZherk_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const f64, - A: *const cuDoubleComplex, - lda: ::std::os::raw::c_int, - beta: *const f64, - C: *mut cuDoubleComplex, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCherkEx( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const f32, - A: *const ::std::os::raw::c_void, - Atype: cudaDataType, - lda: ::std::os::raw::c_int, - beta: *const f32, - C: *mut ::std::os::raw::c_void, - Ctype: cudaDataType, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCherk3mEx( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const f32, - A: *const ::std::os::raw::c_void, - Atype: cudaDataType, - lda: ::std::os::raw::c_int, - beta: *const f32, - C: *mut ::std::os::raw::c_void, - Ctype: cudaDataType, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSsyr2k_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const f32, - A: *const f32, - lda: ::std::os::raw::c_int, - B: *const f32, - ldb: ::std::os::raw::c_int, - beta: *const f32, - C: *mut f32, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDsyr2k_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const f64, - A: *const f64, - lda: ::std::os::raw::c_int, - B: *const f64, - ldb: ::std::os::raw::c_int, - beta: *const f64, - C: *mut f64, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCsyr2k_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const cuComplex, - A: *const cuComplex, - lda: ::std::os::raw::c_int, - B: *const cuComplex, - ldb: ::std::os::raw::c_int, - beta: *const cuComplex, - C: *mut cuComplex, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZsyr2k_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const cuDoubleComplex, - A: *const cuDoubleComplex, - lda: ::std::os::raw::c_int, - B: *const cuDoubleComplex, - ldb: ::std::os::raw::c_int, - beta: *const cuDoubleComplex, - C: *mut cuDoubleComplex, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCher2k_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const cuComplex, - A: *const cuComplex, - lda: ::std::os::raw::c_int, - B: *const cuComplex, - ldb: ::std::os::raw::c_int, - beta: *const f32, - C: *mut cuComplex, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZher2k_v2( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const cuDoubleComplex, - A: *const cuDoubleComplex, - lda: ::std::os::raw::c_int, - B: *const cuDoubleComplex, - ldb: ::std::os::raw::c_int, - beta: *const f64, - C: *mut cuDoubleComplex, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSsyrkx( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const f32, - A: *const f32, - lda: ::std::os::raw::c_int, - B: *const f32, - ldb: ::std::os::raw::c_int, - beta: *const f32, - C: *mut f32, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDsyrkx( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const f64, - A: *const f64, - lda: ::std::os::raw::c_int, - B: *const f64, - ldb: ::std::os::raw::c_int, - beta: *const f64, - C: *mut f64, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCsyrkx( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const cuComplex, - A: *const cuComplex, - lda: ::std::os::raw::c_int, - B: *const cuComplex, - ldb: ::std::os::raw::c_int, - beta: *const cuComplex, - C: *mut cuComplex, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZsyrkx( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const cuDoubleComplex, - A: *const cuDoubleComplex, - lda: ::std::os::raw::c_int, - B: *const cuDoubleComplex, - ldb: ::std::os::raw::c_int, - beta: *const cuDoubleComplex, - C: *mut cuDoubleComplex, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCherkx( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const cuComplex, - A: *const cuComplex, - lda: ::std::os::raw::c_int, - B: *const cuComplex, - ldb: ::std::os::raw::c_int, - beta: *const f32, - C: *mut cuComplex, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZherkx( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const cuDoubleComplex, - A: *const cuDoubleComplex, - lda: ::std::os::raw::c_int, - B: *const cuDoubleComplex, - ldb: ::std::os::raw::c_int, - beta: *const f64, - C: *mut cuDoubleComplex, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSsymm_v2( - handle: cublasHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const f32, - A: *const f32, - lda: ::std::os::raw::c_int, - B: *const f32, - ldb: ::std::os::raw::c_int, - beta: *const f32, - C: *mut f32, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDsymm_v2( - handle: cublasHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const f64, - A: *const f64, - lda: ::std::os::raw::c_int, - B: *const f64, - ldb: ::std::os::raw::c_int, - beta: *const f64, - C: *mut f64, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCsymm_v2( - handle: cublasHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const cuComplex, - A: *const cuComplex, - lda: ::std::os::raw::c_int, - B: *const cuComplex, - ldb: ::std::os::raw::c_int, - beta: *const cuComplex, - C: *mut cuComplex, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZsymm_v2( - handle: cublasHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const cuDoubleComplex, - A: *const cuDoubleComplex, - lda: ::std::os::raw::c_int, - B: *const cuDoubleComplex, - ldb: ::std::os::raw::c_int, - beta: *const cuDoubleComplex, - C: *mut cuDoubleComplex, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasChemm_v2( - handle: cublasHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const cuComplex, - A: *const cuComplex, - lda: ::std::os::raw::c_int, - B: *const cuComplex, - ldb: ::std::os::raw::c_int, - beta: *const cuComplex, - C: *mut cuComplex, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZhemm_v2( - handle: cublasHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const cuDoubleComplex, - A: *const cuDoubleComplex, - lda: ::std::os::raw::c_int, - B: *const cuDoubleComplex, - ldb: ::std::os::raw::c_int, - beta: *const cuDoubleComplex, - C: *mut cuDoubleComplex, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasStrsm_v2( - handle: cublasHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const f32, - A: *const f32, - lda: ::std::os::raw::c_int, - B: *mut f32, - ldb: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDtrsm_v2( - handle: cublasHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const f64, - A: *const f64, - lda: ::std::os::raw::c_int, - B: *mut f64, - ldb: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCtrsm_v2( - handle: cublasHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const cuComplex, - A: *const cuComplex, - lda: ::std::os::raw::c_int, - B: *mut cuComplex, - ldb: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZtrsm_v2( - handle: cublasHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const cuDoubleComplex, - A: *const cuDoubleComplex, - lda: ::std::os::raw::c_int, - B: *mut cuDoubleComplex, - ldb: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasStrmm_v2( - handle: cublasHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const f32, - A: *const f32, - lda: ::std::os::raw::c_int, - B: *const f32, - ldb: ::std::os::raw::c_int, - C: *mut f32, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDtrmm_v2( - handle: cublasHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const f64, - A: *const f64, - lda: ::std::os::raw::c_int, - B: *const f64, - ldb: ::std::os::raw::c_int, - C: *mut f64, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCtrmm_v2( - handle: cublasHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const cuComplex, - A: *const cuComplex, - lda: ::std::os::raw::c_int, - B: *const cuComplex, - ldb: ::std::os::raw::c_int, - C: *mut cuComplex, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZtrmm_v2( - handle: cublasHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const cuDoubleComplex, - A: *const cuDoubleComplex, - lda: ::std::os::raw::c_int, - B: *const cuDoubleComplex, - ldb: ::std::os::raw::c_int, - C: *mut cuDoubleComplex, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSgemmBatched( - handle: cublasHandle_t, - transa: cublasOperation_t, - transb: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const f32, - Aarray: *const *const f32, - lda: ::std::os::raw::c_int, - Barray: *const *const f32, - ldb: ::std::os::raw::c_int, - beta: *const f32, - Carray: *const *mut f32, - ldc: ::std::os::raw::c_int, - batchCount: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDgemmBatched( - handle: cublasHandle_t, - transa: cublasOperation_t, - transb: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const f64, - Aarray: *const *const f64, - lda: ::std::os::raw::c_int, - Barray: *const *const f64, - ldb: ::std::os::raw::c_int, - beta: *const f64, - Carray: *const *mut f64, - ldc: ::std::os::raw::c_int, - batchCount: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCgemmBatched( - handle: cublasHandle_t, - transa: cublasOperation_t, - transb: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const cuComplex, - Aarray: *const *const cuComplex, - lda: ::std::os::raw::c_int, - Barray: *const *const cuComplex, - ldb: ::std::os::raw::c_int, - beta: *const cuComplex, - Carray: *const *mut cuComplex, - ldc: ::std::os::raw::c_int, - batchCount: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCgemm3mBatched( - handle: cublasHandle_t, - transa: cublasOperation_t, - transb: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const cuComplex, - Aarray: *const *const cuComplex, - lda: ::std::os::raw::c_int, - Barray: *const *const cuComplex, - ldb: ::std::os::raw::c_int, - beta: *const cuComplex, - Carray: *const *mut cuComplex, - ldc: ::std::os::raw::c_int, - batchCount: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZgemmBatched( - handle: cublasHandle_t, - transa: cublasOperation_t, - transb: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const cuDoubleComplex, - Aarray: *const *const cuDoubleComplex, - lda: ::std::os::raw::c_int, - Barray: *const *const cuDoubleComplex, - ldb: ::std::os::raw::c_int, - beta: *const cuDoubleComplex, - Carray: *const *mut cuDoubleComplex, - ldc: ::std::os::raw::c_int, - batchCount: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasGemmBatchedEx( - handle: cublasHandle_t, - transa: cublasOperation_t, - transb: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const ::std::os::raw::c_void, - Aarray: *const *const ::std::os::raw::c_void, - Atype: cudaDataType, - lda: ::std::os::raw::c_int, - Barray: *const *const ::std::os::raw::c_void, - Btype: cudaDataType, - ldb: ::std::os::raw::c_int, - beta: *const ::std::os::raw::c_void, - Carray: *const *mut ::std::os::raw::c_void, - Ctype: cudaDataType, - ldc: ::std::os::raw::c_int, - batchCount: ::std::os::raw::c_int, - computeType: cublasComputeType_t, - algo: cublasGemmAlgo_t, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasGemmStridedBatchedEx( - handle: cublasHandle_t, - transa: cublasOperation_t, - transb: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const ::std::os::raw::c_void, - A: *const ::std::os::raw::c_void, - Atype: cudaDataType, - lda: ::std::os::raw::c_int, - strideA: ::std::os::raw::c_longlong, - B: *const ::std::os::raw::c_void, - Btype: cudaDataType, - ldb: ::std::os::raw::c_int, - strideB: ::std::os::raw::c_longlong, - beta: *const ::std::os::raw::c_void, - C: *mut ::std::os::raw::c_void, - Ctype: cudaDataType, - ldc: ::std::os::raw::c_int, - strideC: ::std::os::raw::c_longlong, - batchCount: ::std::os::raw::c_int, - computeType: cublasComputeType_t, - algo: cublasGemmAlgo_t, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSgemmStridedBatched( - handle: cublasHandle_t, - transa: cublasOperation_t, - transb: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const f32, - A: *const f32, - lda: ::std::os::raw::c_int, - strideA: ::std::os::raw::c_longlong, - B: *const f32, - ldb: ::std::os::raw::c_int, - strideB: ::std::os::raw::c_longlong, - beta: *const f32, - C: *mut f32, - ldc: ::std::os::raw::c_int, - strideC: ::std::os::raw::c_longlong, - batchCount: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDgemmStridedBatched( - handle: cublasHandle_t, - transa: cublasOperation_t, - transb: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const f64, - A: *const f64, - lda: ::std::os::raw::c_int, - strideA: ::std::os::raw::c_longlong, - B: *const f64, - ldb: ::std::os::raw::c_int, - strideB: ::std::os::raw::c_longlong, - beta: *const f64, - C: *mut f64, - ldc: ::std::os::raw::c_int, - strideC: ::std::os::raw::c_longlong, - batchCount: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCgemmStridedBatched( - handle: cublasHandle_t, - transa: cublasOperation_t, - transb: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const cuComplex, - A: *const cuComplex, - lda: ::std::os::raw::c_int, - strideA: ::std::os::raw::c_longlong, - B: *const cuComplex, - ldb: ::std::os::raw::c_int, - strideB: ::std::os::raw::c_longlong, - beta: *const cuComplex, - C: *mut cuComplex, - ldc: ::std::os::raw::c_int, - strideC: ::std::os::raw::c_longlong, - batchCount: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCgemm3mStridedBatched( - handle: cublasHandle_t, - transa: cublasOperation_t, - transb: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const cuComplex, - A: *const cuComplex, - lda: ::std::os::raw::c_int, - strideA: ::std::os::raw::c_longlong, - B: *const cuComplex, - ldb: ::std::os::raw::c_int, - strideB: ::std::os::raw::c_longlong, - beta: *const cuComplex, - C: *mut cuComplex, - ldc: ::std::os::raw::c_int, - strideC: ::std::os::raw::c_longlong, - batchCount: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZgemmStridedBatched( - handle: cublasHandle_t, - transa: cublasOperation_t, - transb: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - k: ::std::os::raw::c_int, - alpha: *const cuDoubleComplex, - A: *const cuDoubleComplex, - lda: ::std::os::raw::c_int, - strideA: ::std::os::raw::c_longlong, - B: *const cuDoubleComplex, - ldb: ::std::os::raw::c_int, - strideB: ::std::os::raw::c_longlong, - beta: *const cuDoubleComplex, - C: *mut cuDoubleComplex, - ldc: ::std::os::raw::c_int, - strideC: ::std::os::raw::c_longlong, - batchCount: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSgeam( - handle: cublasHandle_t, - transa: cublasOperation_t, - transb: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const f32, - A: *const f32, - lda: ::std::os::raw::c_int, - beta: *const f32, - B: *const f32, - ldb: ::std::os::raw::c_int, - C: *mut f32, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDgeam( - handle: cublasHandle_t, - transa: cublasOperation_t, - transb: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const f64, - A: *const f64, - lda: ::std::os::raw::c_int, - beta: *const f64, - B: *const f64, - ldb: ::std::os::raw::c_int, - C: *mut f64, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCgeam( - handle: cublasHandle_t, - transa: cublasOperation_t, - transb: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const cuComplex, - A: *const cuComplex, - lda: ::std::os::raw::c_int, - beta: *const cuComplex, - B: *const cuComplex, - ldb: ::std::os::raw::c_int, - C: *mut cuComplex, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZgeam( - handle: cublasHandle_t, - transa: cublasOperation_t, - transb: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const cuDoubleComplex, - A: *const cuDoubleComplex, - lda: ::std::os::raw::c_int, - beta: *const cuDoubleComplex, - B: *const cuDoubleComplex, - ldb: ::std::os::raw::c_int, - C: *mut cuDoubleComplex, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSgetrfBatched( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - A: *const *mut f32, - lda: ::std::os::raw::c_int, - P: *mut ::std::os::raw::c_int, - info: *mut ::std::os::raw::c_int, - batchSize: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDgetrfBatched( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - A: *const *mut f64, - lda: ::std::os::raw::c_int, - P: *mut ::std::os::raw::c_int, - info: *mut ::std::os::raw::c_int, - batchSize: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCgetrfBatched( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - A: *const *mut cuComplex, - lda: ::std::os::raw::c_int, - P: *mut ::std::os::raw::c_int, - info: *mut ::std::os::raw::c_int, - batchSize: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZgetrfBatched( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - A: *const *mut cuDoubleComplex, - lda: ::std::os::raw::c_int, - P: *mut ::std::os::raw::c_int, - info: *mut ::std::os::raw::c_int, - batchSize: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSgetriBatched( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - A: *const *const f32, - lda: ::std::os::raw::c_int, - P: *const ::std::os::raw::c_int, - C: *const *mut f32, - ldc: ::std::os::raw::c_int, - info: *mut ::std::os::raw::c_int, - batchSize: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDgetriBatched( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - A: *const *const f64, - lda: ::std::os::raw::c_int, - P: *const ::std::os::raw::c_int, - C: *const *mut f64, - ldc: ::std::os::raw::c_int, - info: *mut ::std::os::raw::c_int, - batchSize: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCgetriBatched( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - A: *const *const cuComplex, - lda: ::std::os::raw::c_int, - P: *const ::std::os::raw::c_int, - C: *const *mut cuComplex, - ldc: ::std::os::raw::c_int, - info: *mut ::std::os::raw::c_int, - batchSize: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZgetriBatched( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - A: *const *const cuDoubleComplex, - lda: ::std::os::raw::c_int, - P: *const ::std::os::raw::c_int, - C: *const *mut cuDoubleComplex, - ldc: ::std::os::raw::c_int, - info: *mut ::std::os::raw::c_int, - batchSize: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSgetrsBatched( - handle: cublasHandle_t, - trans: cublasOperation_t, - n: ::std::os::raw::c_int, - nrhs: ::std::os::raw::c_int, - Aarray: *const *const f32, - lda: ::std::os::raw::c_int, - devIpiv: *const ::std::os::raw::c_int, - Barray: *const *mut f32, - ldb: ::std::os::raw::c_int, - info: *mut ::std::os::raw::c_int, - batchSize: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDgetrsBatched( - handle: cublasHandle_t, - trans: cublasOperation_t, - n: ::std::os::raw::c_int, - nrhs: ::std::os::raw::c_int, - Aarray: *const *const f64, - lda: ::std::os::raw::c_int, - devIpiv: *const ::std::os::raw::c_int, - Barray: *const *mut f64, - ldb: ::std::os::raw::c_int, - info: *mut ::std::os::raw::c_int, - batchSize: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCgetrsBatched( - handle: cublasHandle_t, - trans: cublasOperation_t, - n: ::std::os::raw::c_int, - nrhs: ::std::os::raw::c_int, - Aarray: *const *const cuComplex, - lda: ::std::os::raw::c_int, - devIpiv: *const ::std::os::raw::c_int, - Barray: *const *mut cuComplex, - ldb: ::std::os::raw::c_int, - info: *mut ::std::os::raw::c_int, - batchSize: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZgetrsBatched( - handle: cublasHandle_t, - trans: cublasOperation_t, - n: ::std::os::raw::c_int, - nrhs: ::std::os::raw::c_int, - Aarray: *const *const cuDoubleComplex, - lda: ::std::os::raw::c_int, - devIpiv: *const ::std::os::raw::c_int, - Barray: *const *mut cuDoubleComplex, - ldb: ::std::os::raw::c_int, - info: *mut ::std::os::raw::c_int, - batchSize: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasStrsmBatched( - handle: cublasHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const f32, - A: *const *const f32, - lda: ::std::os::raw::c_int, - B: *const *mut f32, - ldb: ::std::os::raw::c_int, - batchCount: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDtrsmBatched( - handle: cublasHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const f64, - A: *const *const f64, - lda: ::std::os::raw::c_int, - B: *const *mut f64, - ldb: ::std::os::raw::c_int, - batchCount: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCtrsmBatched( - handle: cublasHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const cuComplex, - A: *const *const cuComplex, - lda: ::std::os::raw::c_int, - B: *const *mut cuComplex, - ldb: ::std::os::raw::c_int, - batchCount: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZtrsmBatched( - handle: cublasHandle_t, - side: cublasSideMode_t, - uplo: cublasFillMode_t, - trans: cublasOperation_t, - diag: cublasDiagType_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - alpha: *const cuDoubleComplex, - A: *const *const cuDoubleComplex, - lda: ::std::os::raw::c_int, - B: *const *mut cuDoubleComplex, - ldb: ::std::os::raw::c_int, - batchCount: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSmatinvBatched( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - A: *const *const f32, - lda: ::std::os::raw::c_int, - Ainv: *const *mut f32, - lda_inv: ::std::os::raw::c_int, - info: *mut ::std::os::raw::c_int, - batchSize: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDmatinvBatched( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - A: *const *const f64, - lda: ::std::os::raw::c_int, - Ainv: *const *mut f64, - lda_inv: ::std::os::raw::c_int, - info: *mut ::std::os::raw::c_int, - batchSize: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCmatinvBatched( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - A: *const *const cuComplex, - lda: ::std::os::raw::c_int, - Ainv: *const *mut cuComplex, - lda_inv: ::std::os::raw::c_int, - info: *mut ::std::os::raw::c_int, - batchSize: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZmatinvBatched( - handle: cublasHandle_t, - n: ::std::os::raw::c_int, - A: *const *const cuDoubleComplex, - lda: ::std::os::raw::c_int, - Ainv: *const *mut cuDoubleComplex, - lda_inv: ::std::os::raw::c_int, - info: *mut ::std::os::raw::c_int, - batchSize: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSgeqrfBatched( - handle: cublasHandle_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - Aarray: *const *mut f32, - lda: ::std::os::raw::c_int, - TauArray: *const *mut f32, - info: *mut ::std::os::raw::c_int, - batchSize: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDgeqrfBatched( - handle: cublasHandle_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - Aarray: *const *mut f64, - lda: ::std::os::raw::c_int, - TauArray: *const *mut f64, - info: *mut ::std::os::raw::c_int, - batchSize: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCgeqrfBatched( - handle: cublasHandle_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - Aarray: *const *mut cuComplex, - lda: ::std::os::raw::c_int, - TauArray: *const *mut cuComplex, - info: *mut ::std::os::raw::c_int, - batchSize: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZgeqrfBatched( - handle: cublasHandle_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - Aarray: *const *mut cuDoubleComplex, - lda: ::std::os::raw::c_int, - TauArray: *const *mut cuDoubleComplex, - info: *mut ::std::os::raw::c_int, - batchSize: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSgelsBatched( - handle: cublasHandle_t, - trans: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - nrhs: ::std::os::raw::c_int, - Aarray: *const *mut f32, - lda: ::std::os::raw::c_int, - Carray: *const *mut f32, - ldc: ::std::os::raw::c_int, - info: *mut ::std::os::raw::c_int, - devInfoArray: *mut ::std::os::raw::c_int, - batchSize: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDgelsBatched( - handle: cublasHandle_t, - trans: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - nrhs: ::std::os::raw::c_int, - Aarray: *const *mut f64, - lda: ::std::os::raw::c_int, - Carray: *const *mut f64, - ldc: ::std::os::raw::c_int, - info: *mut ::std::os::raw::c_int, - devInfoArray: *mut ::std::os::raw::c_int, - batchSize: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCgelsBatched( - handle: cublasHandle_t, - trans: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - nrhs: ::std::os::raw::c_int, - Aarray: *const *mut cuComplex, - lda: ::std::os::raw::c_int, - Carray: *const *mut cuComplex, - ldc: ::std::os::raw::c_int, - info: *mut ::std::os::raw::c_int, - devInfoArray: *mut ::std::os::raw::c_int, - batchSize: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZgelsBatched( - handle: cublasHandle_t, - trans: cublasOperation_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - nrhs: ::std::os::raw::c_int, - Aarray: *const *mut cuDoubleComplex, - lda: ::std::os::raw::c_int, - Carray: *const *mut cuDoubleComplex, - ldc: ::std::os::raw::c_int, - info: *mut ::std::os::raw::c_int, - devInfoArray: *mut ::std::os::raw::c_int, - batchSize: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasSdgmm( - handle: cublasHandle_t, - mode: cublasSideMode_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - A: *const f32, - lda: ::std::os::raw::c_int, - x: *const f32, - incx: ::std::os::raw::c_int, - C: *mut f32, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDdgmm( - handle: cublasHandle_t, - mode: cublasSideMode_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - A: *const f64, - lda: ::std::os::raw::c_int, - x: *const f64, - incx: ::std::os::raw::c_int, - C: *mut f64, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCdgmm( - handle: cublasHandle_t, - mode: cublasSideMode_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - A: *const cuComplex, - lda: ::std::os::raw::c_int, - x: *const cuComplex, - incx: ::std::os::raw::c_int, - C: *mut cuComplex, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZdgmm( - handle: cublasHandle_t, - mode: cublasSideMode_t, - m: ::std::os::raw::c_int, - n: ::std::os::raw::c_int, - A: *const cuDoubleComplex, - lda: ::std::os::raw::c_int, - x: *const cuDoubleComplex, - incx: ::std::os::raw::c_int, - C: *mut cuDoubleComplex, - ldc: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasStpttr( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - AP: *const f32, - A: *mut f32, - lda: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDtpttr( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - AP: *const f64, - A: *mut f64, - lda: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCtpttr( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - AP: *const cuComplex, - A: *mut cuComplex, - lda: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZtpttr( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - AP: *const cuDoubleComplex, - A: *mut cuDoubleComplex, - lda: ::std::os::raw::c_int, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasStrttp( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - A: *const f32, - lda: ::std::os::raw::c_int, - AP: *mut f32, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasDtrttp( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - A: *const f64, - lda: ::std::os::raw::c_int, - AP: *mut f64, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasCtrttp( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - A: *const cuComplex, - lda: ::std::os::raw::c_int, - AP: *mut cuComplex, - ) -> cublasStatus_t; -} -extern "C" { - pub fn cublasZtrttp( - handle: cublasHandle_t, - uplo: cublasFillMode_t, - n: ::std::os::raw::c_int, - A: *const cuDoubleComplex, - lda: ::std::os::raw::c_int, - AP: *mut cuDoubleComplex, - ) -> cublasStatus_t; -} diff --git a/crates/cublas_sys/src/lib.rs b/crates/cublas_sys/src/lib.rs deleted file mode 100644 index 8cd8c8bc..00000000 --- a/crates/cublas_sys/src/lib.rs +++ /dev/null @@ -1,10 +0,0 @@ -//! Raw bindings to cublas_v2, cublasLt, and cublasXt. - -#![allow(warnings)] - -#[path = "./cublasLt.rs"] -pub mod lt; -#[path = "./cublas_v2.rs"] -pub mod v2; -#[path = "./cublasXt.rs"] -pub mod xt; diff --git a/crates/cuda_builder/Cargo.toml b/crates/cuda_builder/Cargo.toml index 953fef98..8095b79e 100644 --- a/crates/cuda_builder/Cargo.toml +++ b/crates/cuda_builder/Cargo.toml @@ -13,4 +13,3 @@ rustc_codegen_nvvm = { version = "0.3", path = "../rustc_codegen_nvvm" } nvvm = { path = "../nvvm", version = "0.1" } serde = { version = "1.0.217", features = ["derive"] } serde_json = "1.0.138" -find_cuda_helper = { version = "0.2", path = "../find_cuda_helper" } diff --git a/crates/cuda_builder/src/lib.rs b/crates/cuda_builder/src/lib.rs index c82d4806..12f8c0f0 100644 --- a/crates/cuda_builder/src/lib.rs +++ b/crates/cuda_builder/src/lib.rs @@ -4,9 +4,7 @@ pub use nvvm::*; use serde::Deserialize; use std::{ borrow::Borrow, - env, - ffi::OsString, - fmt, + env, fmt, path::{Path, PathBuf}, process::{Command, Stdio}, }; @@ -340,21 +338,6 @@ fn find_rustc_codegen_nvvm() -> PathBuf { panic!("Could not find {} in library path", filename); } -fn get_new_path_var() -> OsString { - let split_paths = env::var_os(dylib_path_envvar()).unwrap_or_default(); - let mut paths = env::split_paths(&split_paths).collect::>(); - let possible_paths = if cfg!(target_os = "windows") { - vec![find_cuda_helper::find_cuda_root() - .unwrap() - .join("nvvm") - .join("bin")] - } else { - find_cuda_helper::find_cuda_lib_dirs() - }; - paths.extend(possible_paths); - env::join_paths(&paths).expect("Failed to join paths for PATH") -} - /// Joins strings together while ensuring none of the strings contain the separator. fn join_checking_for_separators(strings: Vec>, sep: &str) -> String { for s in &strings { @@ -374,8 +357,6 @@ fn invoke_rustc(builder: &CudaBuilder) -> Result { // on what this does let rustc_codegen_nvvm = find_rustc_codegen_nvvm(); - let new_path = get_new_path_var(); - let mut rustflags = vec![ format!("-Zcodegen-backend={}", rustc_codegen_nvvm.display()), "-Zcrate-attr=feature(register_tool)".into(), @@ -445,8 +426,6 @@ fn invoke_rustc(builder: &CudaBuilder) -> Result { cargo.args(&builder.build_args); - cargo.env(dylib_path_envvar(), new_path); - if builder.release { cargo.arg("--release"); } diff --git a/crates/cuda_std/src/shared.rs b/crates/cuda_std/src/shared.rs index 907e0ab6..107f4510 100644 --- a/crates/cuda_std/src/shared.rs +++ b/crates/cuda_std/src/shared.rs @@ -41,7 +41,10 @@ use crate::gpu_only; /// # Examples /// /// ```no_run -/// #[kernel] +/// # use cuda_std::kernel; +/// # use cuda_std::shared_array; +/// # use cuda_std::thread; +/// ##[kernel] /// pub unsafe fn reverse_array(d: *mut i32, n: usize) { /// let s = shared_array![i32; 64]; /// let t = thread::thread_idx_x() as usize; diff --git a/crates/cudnn-sys/Cargo.toml b/crates/cudnn-sys/Cargo.toml new file mode 100644 index 00000000..0c48edf6 --- /dev/null +++ b/crates/cudnn-sys/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "cudnn-sys" +version = "0.1.0" +edition = "2024" +license = "MIT OR Apache-2.0" +repository = "https://github.com/Rust-GPU/Rust-CUDA" +readme = "../../README.md" +links = "cudnn" +build = "build/main.rs" + +[dependencies] +cust_raw = { path = "../cust_raw", default-features = false, features = ["driver"] } + +[build-dependencies] +bindgen = "0.71.1" diff --git a/crates/cudnn-sys/build/cudnn_sdk.rs b/crates/cudnn-sys/build/cudnn_sdk.rs new file mode 100644 index 00000000..2974aa74 --- /dev/null +++ b/crates/cudnn-sys/build/cudnn_sdk.rs @@ -0,0 +1,92 @@ +use std::error; +use std::fs; +use std::path; + +/// Represents the cuDNN SDK installation. +#[derive(Debug, Clone)] +pub struct CudnnSdk { + /// cuDNN related paths and version numbers. + cudnn_include_path: path::PathBuf, + cudnn_version: [u32; 3], +} + +impl CudnnSdk { + /// Creates a new `cuDNN` instance by locating the cuDNN SDK installation + /// and parsing its version from the `cudnn_version.h` header file. + pub fn new() -> Result> { + // Retrieve the cuDNN include paths. + let cudnn_include_path = Self::find_cudnn_include_dir()?; + // Retrieve the cuDNN version. + let header_path = cudnn_include_path.join("cudnn_version.h"); + let header_content = fs::read_to_string(header_path)?; + let cudnn_version = Self::parse_cudnn_version(header_content.as_str())?; + Ok(Self { + cudnn_include_path, + cudnn_version, + }) + } + + pub fn cudnn_include_path(&self) -> &path::Path { + self.cudnn_include_path.as_path() + } + + /// Returns the full version of cuDNN as an integer. + /// For example, cuDNN 9.8.0 is represented as 90800. + pub fn cudnn_version(&self) -> u32 { + let [major, minor, patch] = self.cudnn_version; + major * 10000 + minor * 100 + patch + } + + pub fn cudnn_version_major(&self) -> u32 { + self.cudnn_version[0] + } + + pub fn cudnn_version_minor(&self) -> u32 { + self.cudnn_version[1] + } + + pub fn cudnn_version_patch(&self) -> u32 { + self.cudnn_version[2] + } + + /// Checks if the given path is a valid cuDNN installation by verifying + /// the existence of cuDNN header files. + fn is_cudnn_include_path>(path: P) -> bool { + let p = path.as_ref(); + p.join("cudnn.h").is_file() && p.join("cudnn_version.h").is_file() + } + + fn find_cudnn_include_dir() -> Result> { + #[cfg(not(target_os = "windows"))] + const CUDNN_DEFAULT_PATHS: &[&str] = &["/usr/include", "/usr/local/include"]; + #[cfg(target_os = "windows")] + const CUDNN_DEFAULT_PATHS: &[&str] = &[ + "C:/Program Files/NVIDIA/CUDNN/v9.x/include", + "C:/Program Files/NVIDIA/CUDNN/v8.x/include", + ]; + CUDNN_DEFAULT_PATHS + .iter() + .find(|s| Self::is_cudnn_include_path(s)) + .map(path::PathBuf::from) + .ok_or("Cannot find cuDNN include directory.".into()) + } + + fn parse_cudnn_version(header_content: &str) -> Result<[u32; 3], Box> { + let [major, minor, patch] = ["CUDNN_MAJOR", "CUDNN_MINOR", "CUDNN_PATCHLEVEL"] + .into_iter() + .map(|macro_name| { + let version = header_content + .lines() + .find(|line| line.contains(format!("#define {macro_name}").as_str())) + .and_then(|line| line.split_whitespace().last()) + .ok_or(format!("Cannot find {macro_name} from cuDNN header file.").as_str())?; + version + .parse::() + .map_err(|_| format!("Cannot parse {macro_name} as u32: '{}'", version)) + }) + .collect::, _>>()? + .try_into() + .map_err(|_| "Invalid cuDNN version length.")?; + Ok([major, minor, patch]) + } +} diff --git a/crates/cudnn-sys/build/main.rs b/crates/cudnn-sys/build/main.rs new file mode 100644 index 00000000..6b6aba02 --- /dev/null +++ b/crates/cudnn-sys/build/main.rs @@ -0,0 +1,64 @@ +use std::env; +use std::path; + +pub mod cudnn_sdk; + +fn main() { + let sdk = cudnn_sdk::CudnnSdk::new().expect("Cannot create cuDNN SDK instance."); + let cuda_include_paths = env::var_os("DEP_CUDA_CUDA_INCLUDE") + .map(|s| env::split_paths(s.as_os_str()).collect::>()) + .expect("Cannot find transitive metadata 'cuda_include' from cust_raw package."); + + println!("cargo::rerun-if-changed=build"); + // Emit metadata for the build script. + let (version, version_major, version_minor, version_patch) = ( + sdk.cudnn_version(), + sdk.cudnn_version_major(), + sdk.cudnn_version_minor(), + sdk.cudnn_version_patch(), + ); + let include_dir = sdk.cudnn_include_path().display().to_string(); + println!("cargo::metadata=version={version}"); + println!("cargo::metadata=version_major={version_major}"); + println!("cargo::metadata=version_minor={version_minor}"); + println!("cargo::metadata=version_patch={version_patch}"); + println!("cargo::metadata=include_dir={include_dir}",); + + // Generate bindings and link to the library. + create_cudnn_bindings(&sdk, &cuda_include_paths); + println!("cargo::rustc-link-lib=dylib=cudnn"); +} + +fn create_cudnn_bindings(sdk: &cudnn_sdk::CudnnSdk, cuda_include_paths: &[path::PathBuf]) { + println!("cargo::rerun-if-changed=build/wrapper.h"); + let outdir = path::PathBuf::from( + env::var("OUT_DIR").expect("OUT_DIR environment variable should be set by cargo."), + ); + let bindgen_path = path::PathBuf::from(format!("{}/cudnn_sys.rs", outdir.display())); + let bindings = bindgen::Builder::default() + .header("build/wrapper.h") + .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) + .clang_arg(format!("-I{}", sdk.cudnn_include_path().display())) + .clang_args( + cuda_include_paths + .iter() + .map(|p| format!("-I{}", p.display())), + ) + .allowlist_function("^cudnn.*") + .allowlist_type("^cudnn.*") + .allowlist_var("^CUDNN.*") + .default_enum_style(bindgen::EnumVariation::Rust { + non_exhaustive: false, + }) + .derive_default(true) + .derive_eq(true) + .derive_hash(true) + .derive_ord(true) + .size_t_is_usize(true) + .layout_tests(true) + .generate() + .expect("Unable to generate cuDNN bindings."); + bindings + .write_to_file(bindgen_path.as_path()) + .expect("Cannot write cuDNN bindgen output to file."); +} diff --git a/crates/cudnn-sys/build/wrapper.h b/crates/cudnn-sys/build/wrapper.h new file mode 100644 index 00000000..f5843457 --- /dev/null +++ b/crates/cudnn-sys/build/wrapper.h @@ -0,0 +1 @@ +#include "cudnn.h" \ No newline at end of file diff --git a/crates/cudnn-sys/src/lib.rs b/crates/cudnn-sys/src/lib.rs new file mode 100644 index 00000000..6bc9ed93 --- /dev/null +++ b/crates/cudnn-sys/src/lib.rs @@ -0,0 +1,5 @@ +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +include!(concat!(env!("OUT_DIR"), "/cudnn_sys.rs")); diff --git a/crates/cudnn/Cargo.toml b/crates/cudnn/Cargo.toml index f7ba0b9d..05de479f 100644 --- a/crates/cudnn/Cargo.toml +++ b/crates/cudnn/Cargo.toml @@ -7,3 +7,4 @@ version = "0.1.0" [dependencies] bitflags = "2.8" cust = {version = "0.3.2", path = "../cust"} +cudnn-sys = { path = "../cudnn-sys" } diff --git a/crates/cudnn/bindgen.sh b/crates/cudnn/bindgen.sh deleted file mode 100755 index 6dc69272..00000000 --- a/crates/cudnn/bindgen.sh +++ /dev/null @@ -1,9 +0,0 @@ -bindgen "${HOME}/local/include/cudnn.h" \ - --size_t-is-usize \ - --allowlist-type "cudnn.*" \ - --allowlist-function "cudnn.*" \ - --allowlist-var "CUDNN.*" \ - --no-layout-tests \ - --no-doc-comments \ - --default-enum-style rust \ - -- -I "/usr/local/cuda/include" > src/sys.rs diff --git a/crates/cudnn/build.rs b/crates/cudnn/build.rs index e1f2c284..5e799a5e 100644 --- a/crates/cudnn/build.rs +++ b/crates/cudnn/build.rs @@ -1,4 +1,13 @@ +use std::env; + fn main() { - println!("cargo:rustc-link-lib=dylib=cudnn"); - println!("cargo:rerun-if-changed=build.rs"); + let cudnn_version = env::var("DEP_CUDNN_VERSION") + .expect("Cannot find transitive metadata 'version' from cudnn-sys package.") + .parse::() + .expect("Failed to parse cuDNN version"); + + println!("cargo::rustc-check-cfg=cfg(cudnn9)"); + if cudnn_version >= 90000 { + println!("cargo::rustc-cfg=cudnn9"); + } } diff --git a/crates/cudnn/src/activation/activation_descriptor.rs b/crates/cudnn/src/activation/activation_descriptor.rs index d3ca372f..6c46669a 100644 --- a/crates/cudnn/src/activation/activation_descriptor.rs +++ b/crates/cudnn/src/activation/activation_descriptor.rs @@ -1,10 +1,11 @@ -use crate::{sys, ActivationMode, CudnnError, IntoResult, NanPropagation}; use std::mem::MaybeUninit; +use crate::{ActivationMode, CudnnError, IntoResult, NanPropagation}; + /// The descriptor of a neuron activation operation. #[derive(Debug, PartialEq, Eq, Hash)] pub struct ActivationDescriptor { - pub(crate) raw: sys::cudnnActivationDescriptor_t, + pub(crate) raw: cudnn_sys::cudnnActivationDescriptor_t, } impl ActivationDescriptor { @@ -47,7 +48,7 @@ impl ActivationDescriptor { let mut raw = MaybeUninit::uninit(); unsafe { - sys::cudnnCreateActivationDescriptor(raw.as_mut_ptr()).into_result()?; + cudnn_sys::cudnnCreateActivationDescriptor(raw.as_mut_ptr()).into_result()?; let raw = raw.assume_init(); @@ -56,7 +57,7 @@ impl ActivationDescriptor { _ => 1.0, }); - sys::cudnnSetActivationDescriptor(raw, mode.into(), nan_opt.into(), coefficient) + cudnn_sys::cudnnSetActivationDescriptor(raw, mode.into(), nan_opt.into(), coefficient) .into_result()?; Ok(Self { raw }) @@ -67,7 +68,7 @@ impl ActivationDescriptor { impl Drop for ActivationDescriptor { fn drop(&mut self) { unsafe { - sys::cudnnDestroyActivationDescriptor(self.raw); + cudnn_sys::cudnnDestroyActivationDescriptor(self.raw); } } } diff --git a/crates/cudnn/src/activation/activation_mode.rs b/crates/cudnn/src/activation/activation_mode.rs index 5e996e97..77e62c5d 100644 --- a/crates/cudnn/src/activation/activation_mode.rs +++ b/crates/cudnn/src/activation/activation_mode.rs @@ -1,5 +1,3 @@ -use crate::sys; - /// Specifies a neuron activation function. /// /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnActivationMode_t) @@ -26,7 +24,7 @@ pub enum ActivationMode { Identity, } -impl From for sys::cudnnActivationMode_t { +impl From for cudnn_sys::cudnnActivationMode_t { fn from(mode: ActivationMode) -> Self { match mode { ActivationMode::Sigmoid => Self::CUDNN_ACTIVATION_SIGMOID, diff --git a/crates/cudnn/src/activation/mod.rs b/crates/cudnn/src/activation/mod.rs index c3efc37d..c5193204 100644 --- a/crates/cudnn/src/activation/mod.rs +++ b/crates/cudnn/src/activation/mod.rs @@ -1,14 +1,15 @@ +use cust::memory::GpuBuffer; + +use crate::{ + private, CudnnContext, CudnnError, DataType, IntoResult, ScalingDataType, TensorDescriptor, +}; + mod activation_descriptor; mod activation_mode; pub use activation_descriptor::*; pub use activation_mode::*; -use crate::{ - private, sys, CudnnContext, CudnnError, DataType, IntoResult, ScalingDataType, TensorDescriptor, -}; -use cust::memory::GpuBuffer; - impl CudnnContext { /// Applies a specific neuron activation functions element wise of the provided /// tensor. @@ -87,7 +88,7 @@ impl CudnnContext { let y_ptr = y.as_device_ptr().as_mut_ptr() as *mut _; unsafe { - sys::cudnnActivationForward( + cudnn_sys::cudnnActivationForward( self.raw, activation_desc.raw, alpha_ptr, @@ -155,7 +156,7 @@ impl CudnnContext { let dx_ptr = dx.as_device_ptr().as_mut_ptr() as *mut _; unsafe { - sys::cudnnActivationBackward( + cudnn_sys::cudnnActivationBackward( self.raw, activation_desc.raw, alpha_ptr, diff --git a/crates/cudnn/src/attention/attention_descriptor.rs b/crates/cudnn/src/attention/attention_descriptor.rs index 10371aa7..4864cd7c 100644 --- a/crates/cudnn/src/attention/attention_descriptor.rs +++ b/crates/cudnn/src/attention/attention_descriptor.rs @@ -1,7 +1,9 @@ -use crate::{sys, CudnnError, DataType, DropoutDescriptor, IntoResult, MathType, SeqDataType}; -use cust::memory::GpuBuffer; use std::{marker::PhantomData, mem::MaybeUninit}; +use cust::memory::GpuBuffer; + +use crate::{CudnnError, DataType, DropoutDescriptor, IntoResult, MathType, SeqDataType}; + bitflags::bitflags! { /// Miscellaneous switches for configuring auxiliary multi-head attention features. pub struct AttnModeFlags: u32 { @@ -29,7 +31,7 @@ where D1: GpuBuffer, D2: GpuBuffer, { - pub(crate) raw: sys::cudnnAttnDescriptor_t, + pub(crate) raw: cudnn_sys::cudnnAttnDescriptor_t, data_type: PhantomData, math_prec: PhantomData, attn_dropout_desc: DropoutDescriptor, @@ -110,11 +112,11 @@ where let mut raw = MaybeUninit::uninit(); unsafe { - sys::cudnnCreateAttnDescriptor(raw.as_mut_ptr()).into_result()?; + cudnn_sys::cudnnCreateAttnDescriptor(raw.as_mut_ptr()).into_result()?; let raw = raw.assume_init(); - sys::cudnnSetAttnDescriptor( + cudnn_sys::cudnnSetAttnDescriptor( raw, mode.bits(), n_heads, @@ -158,7 +160,7 @@ where { fn drop(&mut self) { unsafe { - sys::cudnnDestroyAttnDescriptor(self.raw); + cudnn_sys::cudnnDestroyAttnDescriptor(self.raw); } } } diff --git a/crates/cudnn/src/attention/attention_weights_kind.rs b/crates/cudnn/src/attention/attention_weights_kind.rs index 48ef86af..490de02a 100644 --- a/crates/cudnn/src/attention/attention_weights_kind.rs +++ b/crates/cudnn/src/attention/attention_weights_kind.rs @@ -1,5 +1,3 @@ -use crate::sys; - /// Specifies a group of weights or biases for the multi-head attention layer. /// /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnMultiHeadAttnWeightKind_t) @@ -24,7 +22,7 @@ pub enum AttnWeight { OBiases, } -impl From for sys::cudnnMultiHeadAttnWeightKind_t { +impl From for cudnn_sys::cudnnMultiHeadAttnWeightKind_t { fn from(kind: AttnWeight) -> Self { match kind { AttnWeight::QWeights => Self::CUDNN_MH_ATTN_Q_WEIGHTS, diff --git a/crates/cudnn/src/attention/mod.rs b/crates/cudnn/src/attention/mod.rs index fc2665db..e9403f6c 100644 --- a/crates/cudnn/src/attention/mod.rs +++ b/crates/cudnn/src/attention/mod.rs @@ -1,3 +1,9 @@ +use std::mem::MaybeUninit; + +use cust::memory::GpuBuffer; + +use crate::{CudnnContext, CudnnError, IntoResult, WGradMode}; + mod attention_descriptor; mod attention_weights_kind; mod seq_data_axis; @@ -8,10 +14,6 @@ pub use attention_weights_kind::*; pub use seq_data_axis::*; pub use seq_data_descriptor::*; -use crate::{sys, CudnnContext, CudnnError, IntoResult, WGradMode}; -use cust::memory::GpuBuffer; -use std::mem::MaybeUninit; - impl CudnnContext { /// This function computes weight, work, and reserve space buffer sizes used by the /// following functions: @@ -46,7 +48,7 @@ impl CudnnContext { let mut reserve_space_size = MaybeUninit::uninit(); unsafe { - sys::cudnnGetMultiHeadAttnBuffers( + cudnn_sys::cudnnGetMultiHeadAttnBuffers( self.raw, desc.raw, weight_space_size.as_mut_ptr(), @@ -149,7 +151,7 @@ impl CudnnContext { }); unsafe { - sys::cudnnMultiHeadAttnForward( + cudnn_sys::cudnnMultiHeadAttnForward( self.raw, attn_desc.raw, current_idx, @@ -294,7 +296,7 @@ impl CudnnContext { let reserve_space_ptr = reserve_space.as_device_ptr().as_mut_ptr() as *mut _; unsafe { - sys::cudnnMultiHeadAttnBackwardData( + cudnn_sys::cudnnMultiHeadAttnBackwardData( self.raw, attn_desc.raw, lo_win_idx.as_ptr(), @@ -408,7 +410,7 @@ impl CudnnContext { let reserve_space_ptr = reserve_space.as_device_ptr().as_mut_ptr() as *mut _; unsafe { - sys::cudnnMultiHeadAttnBackwardWeights( + cudnn_sys::cudnnMultiHeadAttnBackwardWeights( self.raw, attn_desc.raw, grad_mode.into(), diff --git a/crates/cudnn/src/attention/seq_data_axis.rs b/crates/cudnn/src/attention/seq_data_axis.rs index 57612635..d295ee34 100644 --- a/crates/cudnn/src/attention/seq_data_axis.rs +++ b/crates/cudnn/src/attention/seq_data_axis.rs @@ -1,5 +1,3 @@ -use crate::sys; - /// Describes and indexes active dimensions in the `SeqDataDescriptor` `dim` field. This enum is /// also used in the `axis` argument of the `SeqDataDescriptor` constructor to define the layout /// of the sequence data buffer in memory. @@ -18,13 +16,13 @@ pub enum SeqDataAxis { VectDim, } -impl From for sys::cudnnSeqDataAxis_t { +impl From for cudnn_sys::cudnnSeqDataAxis_t { fn from(axis: SeqDataAxis) -> Self { match axis { - SeqDataAxis::TimeDim => sys::cudnnSeqDataAxis_t::CUDNN_SEQDATA_TIME_DIM, - SeqDataAxis::BatchDim => sys::cudnnSeqDataAxis_t::CUDNN_SEQDATA_BATCH_DIM, - SeqDataAxis::BeamDim => sys::cudnnSeqDataAxis_t::CUDNN_SEQDATA_BEAM_DIM, - SeqDataAxis::VectDim => sys::cudnnSeqDataAxis_t::CUDNN_SEQDATA_VECT_DIM, + SeqDataAxis::TimeDim => cudnn_sys::cudnnSeqDataAxis_t::CUDNN_SEQDATA_TIME_DIM, + SeqDataAxis::BatchDim => cudnn_sys::cudnnSeqDataAxis_t::CUDNN_SEQDATA_BATCH_DIM, + SeqDataAxis::BeamDim => cudnn_sys::cudnnSeqDataAxis_t::CUDNN_SEQDATA_BEAM_DIM, + SeqDataAxis::VectDim => cudnn_sys::cudnnSeqDataAxis_t::CUDNN_SEQDATA_VECT_DIM, } } } @@ -33,14 +31,14 @@ impl std::ops::Index for [T; 4] { type Output = T; fn index(&self, index: SeqDataAxis) -> &Self::Output { - let raw: sys::cudnnSeqDataAxis_t = index.into(); + let raw: cudnn_sys::cudnnSeqDataAxis_t = index.into(); self.index(raw as usize) } } impl std::ops::IndexMut for [T; 4] { fn index_mut(&mut self, index: SeqDataAxis) -> &mut Self::Output { - let raw: sys::cudnnSeqDataAxis_t = index.into(); + let raw: cudnn_sys::cudnnSeqDataAxis_t = index.into(); self.index_mut(raw as usize) } } diff --git a/crates/cudnn/src/attention/seq_data_descriptor.rs b/crates/cudnn/src/attention/seq_data_descriptor.rs index f7018eb7..9c47e53f 100644 --- a/crates/cudnn/src/attention/seq_data_descriptor.rs +++ b/crates/cudnn/src/attention/seq_data_descriptor.rs @@ -1,6 +1,7 @@ -use crate::{private, sys, CudnnError, DataType, IntoResult, SeqDataAxis}; use std::{marker::PhantomData, mem::MaybeUninit}; +use crate::{private, CudnnError, DataType, IntoResult, SeqDataAxis}; + /// Specifies the allowed types for the sequential data buffer. /// /// As stated in the [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnSetSeqataDescriptor) @@ -15,7 +16,7 @@ pub struct SeqDataDescriptor where T: SeqDataType, { - pub(crate) raw: sys::cudnnSeqDataDescriptor_t, + pub(crate) raw: cudnn_sys::cudnnSeqDataDescriptor_t, data_type: PhantomData, } @@ -123,13 +124,13 @@ where let mut raw = MaybeUninit::uninit(); unsafe { - sys::cudnnCreateSeqDataDescriptor(raw.as_mut_ptr()).into_result()?; + cudnn_sys::cudnnCreateSeqDataDescriptor(raw.as_mut_ptr()).into_result()?; let raw = raw.assume_init(); let raw_axes = axes.map(SeqDataAxis::into); - sys::cudnnSetSeqDataDescriptor( + cudnn_sys::cudnnSetSeqDataDescriptor( raw, T::into_raw(), 4_i32, @@ -155,7 +156,7 @@ where { fn drop(&mut self) { unsafe { - sys::cudnnDestroySeqDataDescriptor(self.raw); + cudnn_sys::cudnnDestroySeqDataDescriptor(self.raw); } } } diff --git a/crates/cudnn/src/backend/conv_bwd_data.rs b/crates/cudnn/src/backend/conv_bwd_data.rs index c011a38b..793e8659 100644 --- a/crates/cudnn/src/backend/conv_bwd_data.rs +++ b/crates/cudnn/src/backend/conv_bwd_data.rs @@ -1,6 +1,6 @@ use crate::{ backend::{ConvCfg, Descriptor, FloatDataType, Operation, Real, Tensor}, - sys, CudnnError, DataType, IntoResult, + CudnnError, DataType, IntoResult, }; pub struct ConvBwdDataBuilder { @@ -61,12 +61,12 @@ impl ConvBwdDataBuilder { unsafe { let mut raw = Descriptor::new( - sys::cudnnBackendDescriptorType_t::CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_DATA_DESCRIPTOR, + cudnn_sys::cudnnBackendDescriptorType_t::CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_DATA_DESCRIPTOR, )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_CONV_DESC, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_CONV_DESC, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &cfg.raw.inner(), ) @@ -74,14 +74,14 @@ impl ConvBwdDataBuilder { match self.alpha { Some(Real::Float(ref alpha)) => raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_FLOAT, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_FLOAT, 1, alpha, )?, Some(Real::Double(ref alpha)) => raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DOUBLE, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DOUBLE, 1, alpha, )?, @@ -90,14 +90,14 @@ impl ConvBwdDataBuilder { match self.beta { Some(Real::Float(ref beta)) => raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_FLOAT, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_FLOAT, 1, beta, )?, Some(Real::Double(ref beta)) => raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DOUBLE, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DOUBLE, 1, beta, )?, @@ -105,22 +105,22 @@ impl ConvBwdDataBuilder { } raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_W, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_W, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &w.raw.inner(), )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DX, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DX, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &dx.raw.inner(), )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DY, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DY, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &dy.raw.inner(), )?; diff --git a/crates/cudnn/src/backend/conv_bwd_filter.rs b/crates/cudnn/src/backend/conv_bwd_filter.rs index 64c98223..5ddce6f8 100644 --- a/crates/cudnn/src/backend/conv_bwd_filter.rs +++ b/crates/cudnn/src/backend/conv_bwd_filter.rs @@ -1,6 +1,6 @@ use crate::{ backend::{ConvCfg, Descriptor, FloatDataType, Operation, Real, Tensor}, - sys, CudnnError, DataType, IntoResult, + CudnnError, DataType, IntoResult, }; pub struct ConvBwdFilterBuilder { @@ -60,26 +60,26 @@ impl ConvBwdFilterBuilder { unsafe { let mut raw = Descriptor::new( - sys::cudnnBackendDescriptorType_t::CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_FILTER_DESCRIPTOR, + cudnn_sys::cudnnBackendDescriptorType_t::CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_FILTER_DESCRIPTOR, )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_CONV_DESC, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_CONV_DESC, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &cfg.raw.inner(), )?; match self.alpha { Some(Real::Float(ref alpha)) => raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_ALPHA, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_FLOAT, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_ALPHA, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_FLOAT, 1, alpha, )?, Some(Real::Double(ref alpha)) => raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_ALPHA, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DOUBLE, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_ALPHA, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DOUBLE, 1, alpha, )?, @@ -88,14 +88,14 @@ impl ConvBwdFilterBuilder { match self.beta { Some(Real::Float(ref beta)) => raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_BETA, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_FLOAT, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_BETA, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_FLOAT, 1, beta, )?, Some(Real::Double(ref beta)) => raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_BETA, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DOUBLE, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_BETA, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DOUBLE, 1, beta, )?, @@ -103,22 +103,22 @@ impl ConvBwdFilterBuilder { } raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DW, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DW, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &dw.raw.inner(), )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_X, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_X, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &x.raw.inner(), )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DY, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DY, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &dy.raw.inner(), )?; diff --git a/crates/cudnn/src/backend/conv_cfg.rs b/crates/cudnn/src/backend/conv_cfg.rs index 9e93a9d6..f89d99d1 100644 --- a/crates/cudnn/src/backend/conv_cfg.rs +++ b/crates/cudnn/src/backend/conv_cfg.rs @@ -1,8 +1,8 @@ -use crate::{backend::Descriptor, sys, ConvMode, CudnnError, DataType, IntoResult}; +use crate::{backend::Descriptor, ConvMode, CudnnError, DataType, IntoResult}; #[derive(Default, Clone, PartialEq, Eq, Hash, Debug)] pub struct ConvCfgBuilder<'a> { - comp_type: Option, + comp_type: Option, mode: Option, dilations: Option<&'a [i64]>, strides: Option<&'a [i64]>, @@ -54,54 +54,54 @@ impl<'a> ConvCfgBuilder<'a> { unsafe { let mut raw = Descriptor::new( - sys::cudnnBackendDescriptorType_t::CUDNN_BACKEND_CONVOLUTION_DESCRIPTOR, + cudnn_sys::cudnnBackendDescriptorType_t::CUDNN_BACKEND_CONVOLUTION_DESCRIPTOR, )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_CONVOLUTION_COMP_TYPE, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DATA_TYPE, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_CONVOLUTION_COMP_TYPE, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DATA_TYPE, 1, &comp_type, )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_CONVOLUTION_CONV_MODE, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_CONVOLUTION_MODE, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_CONVOLUTION_CONV_MODE, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_CONVOLUTION_MODE, 1, &mode, )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_CONVOLUTION_DILATIONS, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_INT64, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_CONVOLUTION_DILATIONS, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_INT64, dilations.len() as i64, dilations, )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_CONVOLUTION_DILATIONS, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_INT64, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_CONVOLUTION_DILATIONS, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_INT64, dilations.len() as i64, dilations, )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_CONVOLUTION_FILTER_STRIDES, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_INT64, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_CONVOLUTION_FILTER_STRIDES, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_INT64, strides.len() as i64, strides, )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_CONVOLUTION_PRE_PADDINGS, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_INT64, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_CONVOLUTION_PRE_PADDINGS, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_INT64, pre_paddings.len() as i64, pre_paddings, )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_CONVOLUTION_POST_PADDINGS, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_INT64, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_CONVOLUTION_POST_PADDINGS, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_INT64, post_paddings.len() as i64, post_paddings, )?; diff --git a/crates/cudnn/src/backend/conv_fwd.rs b/crates/cudnn/src/backend/conv_fwd.rs index 7909afca..ffde1de2 100644 --- a/crates/cudnn/src/backend/conv_fwd.rs +++ b/crates/cudnn/src/backend/conv_fwd.rs @@ -1,6 +1,6 @@ use crate::{ backend::{ConvCfg, Descriptor, FloatDataType, Operation, Real, Tensor}, - sys, CudnnError, DataType, IntoResult, + CudnnError, DataType, IntoResult, }; pub struct ConvFwdBuilder { @@ -61,26 +61,26 @@ impl ConvFwdBuilder { unsafe { let mut raw = Descriptor::new( - sys::cudnnBackendDescriptorType_t::CUDNN_BACKEND_OPERATION_CONVOLUTION_FORWARD_DESCRIPTOR, + cudnn_sys::cudnnBackendDescriptorType_t::CUDNN_BACKEND_OPERATION_CONVOLUTION_FORWARD_DESCRIPTOR, )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_CONV_DESC, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_CONV_DESC, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &cfg.raw.inner(), )?; match self.alpha { Some(Real::Float(ref alpha)) => raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_ALPHA, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_FLOAT, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_ALPHA, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_FLOAT, 1, alpha, )?, Some(Real::Double(ref alpha)) => raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_ALPHA, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DOUBLE, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_ALPHA, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DOUBLE, 1, alpha, )?, @@ -89,14 +89,14 @@ impl ConvFwdBuilder { match self.beta { Some(Real::Float(ref beta)) => raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_BETA, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_FLOAT, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_BETA, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_FLOAT, 1, beta, )?, Some(Real::Double(ref beta)) => raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_BETA, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DOUBLE, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_BETA, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DOUBLE, 1, beta, )?, @@ -104,22 +104,22 @@ impl ConvFwdBuilder { } raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_W, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_W, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &w.raw.inner(), )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_X, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_X, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &x.raw.inner(), )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_Y, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_Y, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &y.raw.inner(), )?; diff --git a/crates/cudnn/src/backend/descriptor.rs b/crates/cudnn/src/backend/descriptor.rs index a780c29c..9418d4e7 100644 --- a/crates/cudnn/src/backend/descriptor.rs +++ b/crates/cudnn/src/backend/descriptor.rs @@ -1,15 +1,16 @@ -use crate::{sys, CudnnError, IntoResult}; use std::{mem::MaybeUninit, rc::Rc}; +use crate::{CudnnError, IntoResult}; + #[derive(PartialEq, Eq, Hash, Debug)] pub(crate) struct Inner { - pub(crate) raw: sys::cudnnBackendDescriptor_t, + pub(crate) raw: cudnn_sys::cudnnBackendDescriptor_t, } impl Drop for Inner { fn drop(&mut self) { unsafe { - sys::cudnnBackendDestroyDescriptor(self.raw); + cudnn_sys::cudnnBackendDestroyDescriptor(self.raw); } } } @@ -18,10 +19,12 @@ impl Drop for Inner { pub struct Descriptor(Rc); impl Descriptor { - pub(crate) unsafe fn new(dtype: sys::cudnnBackendDescriptorType_t) -> Result { + pub(crate) unsafe fn new( + dtype: cudnn_sys::cudnnBackendDescriptorType_t, + ) -> Result { let mut raw = MaybeUninit::uninit(); - sys::cudnnBackendCreateDescriptor(dtype, raw.as_mut_ptr()).into_result()?; + cudnn_sys::cudnnBackendCreateDescriptor(dtype, raw.as_mut_ptr()).into_result()?; let raw = raw.assume_init(); @@ -29,29 +32,29 @@ impl Descriptor { } pub(crate) unsafe fn finalize(&mut self) -> Result<(), CudnnError> { - sys::cudnnBackendFinalize(self.0.raw).into_result() + cudnn_sys::cudnnBackendFinalize(self.0.raw).into_result() } pub(crate) unsafe fn set_attribute( &mut self, - aname: sys::cudnnBackendAttributeName_t, - atype: sys::cudnnBackendAttributeType_t, + aname: cudnn_sys::cudnnBackendAttributeName_t, + atype: cudnn_sys::cudnnBackendAttributeType_t, count: i64, val: &T, ) -> Result<(), CudnnError> { let ptr = val as *const T as *const std::ffi::c_void; - sys::cudnnBackendSetAttribute(self.0.raw, aname, atype, count, ptr).into_result() + cudnn_sys::cudnnBackendSetAttribute(self.0.raw, aname, atype, count, ptr).into_result() } pub(crate) unsafe fn get_attribute_count( &self, - aname: sys::cudnnBackendAttributeName_t, - atype: sys::cudnnBackendAttributeType_t, + aname: cudnn_sys::cudnnBackendAttributeName_t, + atype: cudnn_sys::cudnnBackendAttributeType_t, ) -> Result { let mut count = MaybeUninit::::uninit(); - sys::cudnnBackendGetAttribute( + cudnn_sys::cudnnBackendGetAttribute( self.0.raw, aname, atype, @@ -64,7 +67,7 @@ impl Descriptor { Ok(count.assume_init()) } - pub(crate) fn inner(&self) -> sys::cudnnBackendDescriptor_t { + pub(crate) fn inner(&self) -> cudnn_sys::cudnnBackendDescriptor_t { self.0.raw } } diff --git a/crates/cudnn/src/backend/engine.rs b/crates/cudnn/src/backend/engine.rs index b1b4aad3..9eb3d726 100644 --- a/crates/cudnn/src/backend/engine.rs +++ b/crates/cudnn/src/backend/engine.rs @@ -1,6 +1,6 @@ use crate::{ backend::{Descriptor, Graph}, - sys, CudnnError, IntoResult, + CudnnError, IntoResult, }; #[derive(Default, Debug, PartialEq)] @@ -26,19 +26,19 @@ impl EngineBuilder { unsafe { let mut descriptor = Descriptor::new( - sys::cudnnBackendDescriptorType_t::CUDNN_BACKEND_ENGINE_DESCRIPTOR, + cudnn_sys::cudnnBackendDescriptorType_t::CUDNN_BACKEND_ENGINE_DESCRIPTOR, )?; descriptor.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_ENGINE_OPERATION_GRAPH, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_ENGINE_OPERATION_GRAPH, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &graph.descriptor.inner(), )?; descriptor.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_ENGINE_GLOBAL_INDEX, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_INT64, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_ENGINE_GLOBAL_INDEX, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_INT64, 1, &global_index, )?; diff --git a/crates/cudnn/src/backend/engine_cfg.rs b/crates/cudnn/src/backend/engine_cfg.rs index 93bbdd50..9a8b39a1 100644 --- a/crates/cudnn/src/backend/engine_cfg.rs +++ b/crates/cudnn/src/backend/engine_cfg.rs @@ -1,6 +1,6 @@ use crate::{ backend::{Descriptor, Engine}, - sys, CudnnError, IntoResult, + CudnnError, IntoResult, }; #[derive(Default, PartialEq, Debug)] @@ -26,14 +26,14 @@ impl EngineCfgBuilder { unsafe { let mut descriptor = match self.descriptor { None => Descriptor::new( - sys::cudnnBackendDescriptorType_t::CUDNN_BACKEND_ENGINECFG_DESCRIPTOR, + cudnn_sys::cudnnBackendDescriptorType_t::CUDNN_BACKEND_ENGINECFG_DESCRIPTOR, )?, Some(descriptor) => descriptor, }; descriptor.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_ENGINECFG_ENGINE, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_ENGINECFG_ENGINE, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &engine.descriptor.inner(), )?; diff --git a/crates/cudnn/src/backend/engine_heuristic.rs b/crates/cudnn/src/backend/engine_heuristic.rs index 322e5756..ac7c4fe0 100644 --- a/crates/cudnn/src/backend/engine_heuristic.rs +++ b/crates/cudnn/src/backend/engine_heuristic.rs @@ -1,6 +1,6 @@ use crate::{ backend::{Descriptor, EngineCfgBuilder, Graph}, - sys, CudnnContext, CudnnError, IntoResult, + CudnnContext, CudnnError, IntoResult, }; pub enum HeuristicMode { @@ -8,11 +8,11 @@ pub enum HeuristicMode { B, } -impl From for sys::cudnnBackendHeurMode_t { +impl From for cudnn_sys::cudnnBackendHeurMode_t { fn from(mode: HeuristicMode) -> Self { match mode { - HeuristicMode::A => sys::cudnnBackendHeurMode_t::CUDNN_HEUR_MODE_A, - HeuristicMode::B => sys::cudnnBackendHeurMode_t::CUDNN_HEUR_MODE_B, + HeuristicMode::A => cudnn_sys::cudnnBackendHeurMode_t::CUDNN_HEUR_MODE_A, + HeuristicMode::B => cudnn_sys::cudnnBackendHeurMode_t::CUDNN_HEUR_MODE_B, } } } diff --git a/crates/cudnn/src/backend/execution_plan.rs b/crates/cudnn/src/backend/execution_plan.rs index 9ab50da1..14f313cf 100644 --- a/crates/cudnn/src/backend/execution_plan.rs +++ b/crates/cudnn/src/backend/execution_plan.rs @@ -1,6 +1,6 @@ use crate::{ backend::{Descriptor, EngineCfg}, - sys, CudnnContext, CudnnError, IntoResult, + CudnnContext, CudnnError, IntoResult, }; #[derive(Default, PartialEq, Debug)] @@ -19,12 +19,12 @@ impl ExecutionPlanBuilder { unsafe { let mut descriptor = Descriptor::new( - sys::cudnnBackendDescriptorType_t::CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR, + cudnn_sys::cudnnBackendDescriptorType_t::CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR, )?; descriptor.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &engine_cfg.descriptor.inner(), )?; diff --git a/crates/cudnn/src/backend/graph.rs b/crates/cudnn/src/backend/graph.rs index 2226c571..a30af78f 100644 --- a/crates/cudnn/src/backend/graph.rs +++ b/crates/cudnn/src/backend/graph.rs @@ -1,6 +1,6 @@ use crate::{ backend::{Descriptor, Operation}, - sys, CudnnContext, CudnnError, + CudnnContext, CudnnError, }; #[derive(Default, PartialEq, Debug)] @@ -26,12 +26,12 @@ impl GraphBuilder { unsafe { let mut descriptor = Descriptor::new( - sys::cudnnBackendDescriptorType_t::CUDNN_BACKEND_OPERATIONGRAPH_DESCRIPTOR, + cudnn_sys::cudnnBackendDescriptorType_t::CUDNN_BACKEND_OPERATIONGRAPH_DESCRIPTOR, )?; descriptor.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATIONGRAPH_HANDLE, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_HANDLE, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATIONGRAPH_HANDLE, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_HANDLE, 1, &context.raw, )?; @@ -50,8 +50,8 @@ impl GraphBuilder { .collect::>(); descriptor.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATIONGRAPH_OPS, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATIONGRAPH_OPS, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, descriptors.len() as i64, descriptors.as_slice(), )?; diff --git a/crates/cudnn/src/backend/matmul.rs b/crates/cudnn/src/backend/matmul.rs index a4ed499c..14badc8b 100644 --- a/crates/cudnn/src/backend/matmul.rs +++ b/crates/cudnn/src/backend/matmul.rs @@ -2,7 +2,7 @@ use cust::memory::bytemuck::Contiguous; use crate::{ backend::{Descriptor, MatMulCfg, Operation, Tensor}, - sys, CudnnError, DataType, IntoResult, + CudnnError, DataType, IntoResult, }; #[derive(Clone, Default, PartialEq, Eq, Hash, Debug)] @@ -42,33 +42,33 @@ impl MatMulBuilder { unsafe { let mut raw = Descriptor::new( - sys::cudnnBackendDescriptorType_t::CUDNN_BACKEND_OPERATION_MATMUL_DESCRIPTOR, + cudnn_sys::cudnnBackendDescriptorType_t::CUDNN_BACKEND_OPERATION_MATMUL_DESCRIPTOR, )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_MATMUL_ADESC, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_MATMUL_ADESC, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &a.raw.inner(), )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_MATMUL_BDESC, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_MATMUL_BDESC, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &b.raw.inner(), )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_MATMUL_CDESC, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_MATMUL_CDESC, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &c.raw.inner(), )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_MATMUL_DESC, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_MATMUL_DESC, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &cfg.raw.inner(), )?; diff --git a/crates/cudnn/src/backend/matmul_cfg.rs b/crates/cudnn/src/backend/matmul_cfg.rs index fe3043ad..3fba7107 100644 --- a/crates/cudnn/src/backend/matmul_cfg.rs +++ b/crates/cudnn/src/backend/matmul_cfg.rs @@ -1,8 +1,8 @@ -use crate::{backend::Descriptor, sys, CudnnError, DataType, IntoResult}; +use crate::{backend::Descriptor, CudnnError, DataType, IntoResult}; #[derive(Clone, Default, PartialEq, Eq, Hash, Debug)] pub struct MatMulCfgBuilder { - compt_type: Option, + compt_type: Option, } impl MatMulCfgBuilder { @@ -19,12 +19,12 @@ impl MatMulCfgBuilder { unsafe { let mut raw = Descriptor::new( - sys::cudnnBackendDescriptorType_t::CUDNN_BACKEND_MATMUL_DESCRIPTOR, + cudnn_sys::cudnnBackendDescriptorType_t::CUDNN_BACKEND_MATMUL_DESCRIPTOR, )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_MATMUL_COMP_TYPE, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DATA_TYPE, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_MATMUL_COMP_TYPE, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DATA_TYPE, 1, &compt_type, )?; diff --git a/crates/cudnn/src/backend/operation.rs b/crates/cudnn/src/backend/operation.rs index 3890e0cf..97a9b8ef 100644 --- a/crates/cudnn/src/backend/operation.rs +++ b/crates/cudnn/src/backend/operation.rs @@ -1,7 +1,4 @@ -use crate::{ - backend::{ConvCfg, Descriptor, MatMulCfg, PointwiseCfg, Real, ReductionCfg, Tensor}, - sys, -}; +use crate::backend::{ConvCfg, Descriptor, MatMulCfg, PointwiseCfg, Real, ReductionCfg, Tensor}; #[non_exhaustive] #[derive(Clone, PartialEq, Debug)] diff --git a/crates/cudnn/src/backend/pointwise.rs b/crates/cudnn/src/backend/pointwise.rs index 075f267c..baa0347e 100644 --- a/crates/cudnn/src/backend/pointwise.rs +++ b/crates/cudnn/src/backend/pointwise.rs @@ -1,6 +1,6 @@ use crate::{ backend::{Descriptor, FloatDataType, Operation, PointwiseCfg, PointwiseMode, Real, Tensor}, - sys, CudnnError, DataType, IntoResult, NanPropagation, + CudnnError, DataType, IntoResult, NanPropagation, }; #[derive(Clone, Default, Debug, PartialEq)] @@ -57,49 +57,49 @@ impl PointwiseBuilder { unsafe { let mut raw = Descriptor::new( - sys::cudnnBackendDescriptorType_t::CUDNN_BACKEND_OPERATION_POINTWISE_DESCRIPTOR, + cudnn_sys::cudnnBackendDescriptorType_t::CUDNN_BACKEND_OPERATION_POINTWISE_DESCRIPTOR, )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_POINTWISE_PW_DESCRIPTOR, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_POINTWISE_PW_DESCRIPTOR, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &cfg.raw.inner(), )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_POINTWISE_XDESC, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_POINTWISE_XDESC, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &x.raw.inner(), )?; if let Some(ref b) = self.b { raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_POINTWISE_BDESC, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_POINTWISE_BDESC, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &b.raw.inner(), )?; } raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_POINTWISE_YDESC, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_POINTWISE_YDESC, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &y.raw.inner(), )?; match self.alpha { Some(Real::Float(ref alpha)) => raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_POINTWISE_ALPHA1, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_FLOAT, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_POINTWISE_ALPHA1, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_FLOAT, 1, alpha, )?, Some(Real::Double(ref alpha)) => raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_POINTWISE_ALPHA1, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DOUBLE, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_POINTWISE_ALPHA1, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DOUBLE, 1, alpha, )?, @@ -108,14 +108,14 @@ impl PointwiseBuilder { match self.beta { Some(Real::Float(ref beta)) => raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_POINTWISE_ALPHA2, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_FLOAT, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_POINTWISE_ALPHA2, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_FLOAT, 1, beta, )?, Some(Real::Double(ref beta)) => raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_POINTWISE_ALPHA2, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DOUBLE, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_POINTWISE_ALPHA2, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DOUBLE, 1, beta, )?, diff --git a/crates/cudnn/src/backend/pointwise_cfg.rs b/crates/cudnn/src/backend/pointwise_cfg.rs index 554df8fb..bae13e72 100644 --- a/crates/cudnn/src/backend/pointwise_cfg.rs +++ b/crates/cudnn/src/backend/pointwise_cfg.rs @@ -1,11 +1,11 @@ use crate::{ backend::{Descriptor, PointwiseMode}, - sys, CudnnError, DataType, IntoResult, NanPropagation, + CudnnError, DataType, IntoResult, NanPropagation, }; #[derive(Clone, Default, PartialEq, Debug)] pub struct PointwiseCfgBuilder { - math_precision: Option, + math_precision: Option, mode: Option, nan_propagation: Option, relu_lower_clip: Option, @@ -66,36 +66,36 @@ impl PointwiseCfgBuilder { } pub fn build(&mut self) -> Result { - let mode: sys::cudnnPointwiseMode_t = + let mode: cudnn_sys::cudnnPointwiseMode_t = self.mode.expect("pointwise mode is required.").into(); let math_precision = self .math_precision - .unwrap_or(sys::cudnnDataType_t::CUDNN_DATA_FLOAT); + .unwrap_or(cudnn_sys::cudnnDataType_t::CUDNN_DATA_FLOAT); unsafe { let mut raw = Descriptor::new( - sys::cudnnBackendDescriptorType_t::CUDNN_BACKEND_POINTWISE_DESCRIPTOR, + cudnn_sys::cudnnBackendDescriptorType_t::CUDNN_BACKEND_POINTWISE_DESCRIPTOR, )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_POINTWISE_MATH_PREC, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DATA_TYPE, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_POINTWISE_MATH_PREC, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DATA_TYPE, 1, &math_precision, )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_POINTWISE_MODE, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_POINTWISE_MODE, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_POINTWISE_MODE, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_POINTWISE_MODE, 1, &mode, )?; if let Some(ref nan_propagation) = self.nan_propagation { raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_POINTWISE_NAN_PROPAGATION, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_NAN_PROPOGATION, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_POINTWISE_NAN_PROPAGATION, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_NAN_PROPOGATION, 1, nan_propagation, )?; @@ -103,8 +103,8 @@ impl PointwiseCfgBuilder { if let Some(ref relu_lower_clip) = self.relu_lower_clip { raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_POINTWISE_RELU_LOWER_CLIP, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DOUBLE, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_POINTWISE_RELU_LOWER_CLIP, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DOUBLE, 1, relu_lower_clip, )?; @@ -112,8 +112,8 @@ impl PointwiseCfgBuilder { if let Some(ref relu_upper_clip) = self.relu_upper_clip { raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_POINTWISE_RELU_UPPER_CLIP, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DOUBLE, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_POINTWISE_RELU_UPPER_CLIP, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DOUBLE, 1, relu_upper_clip, )?; @@ -121,8 +121,8 @@ impl PointwiseCfgBuilder { if let Some(ref relu_lower_clip_slope) = self.relu_lower_clip_slope { raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_POINTWISE_RELU_LOWER_CLIP_SLOPE, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DOUBLE, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_POINTWISE_RELU_LOWER_CLIP_SLOPE, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DOUBLE, 1, relu_lower_clip_slope, )?; @@ -130,8 +130,8 @@ impl PointwiseCfgBuilder { if let Some(ref elu_alpha) = self.elu_alpha { raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_POINTWISE_ELU_ALPHA, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DOUBLE, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_POINTWISE_ELU_ALPHA, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DOUBLE, 1, elu_alpha, )?; @@ -139,8 +139,8 @@ impl PointwiseCfgBuilder { if let Some(ref softplus_beta) = self.softplus_beta { raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_POINTWISE_SOFTPLUS_BETA, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DOUBLE, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_POINTWISE_SOFTPLUS_BETA, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DOUBLE, 1, softplus_beta, )?; @@ -148,8 +148,8 @@ impl PointwiseCfgBuilder { if let Some(ref swish_beta) = self.swish_beta { raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_POINTWISE_SWISH_BETA, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DOUBLE, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_POINTWISE_SWISH_BETA, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DOUBLE, 1, swish_beta, )?; diff --git a/crates/cudnn/src/backend/pointwise_mode.rs b/crates/cudnn/src/backend/pointwise_mode.rs index 658536e8..bddc7eda 100644 --- a/crates/cudnn/src/backend/pointwise_mode.rs +++ b/crates/cudnn/src/backend/pointwise_mode.rs @@ -1,5 +1,3 @@ -use crate::sys; - #[non_exhaustive] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum PointwiseMode { @@ -48,7 +46,7 @@ pub enum PointwiseMode { TanhFwd, } -impl From for sys::cudnnPointwiseMode_t { +impl From for cudnn_sys::cudnnPointwiseMode_t { fn from(mode: PointwiseMode) -> Self { match mode { PointwiseMode::Abs => Self::CUDNN_POINTWISE_ABS, diff --git a/crates/cudnn/src/backend/reduction.rs b/crates/cudnn/src/backend/reduction.rs index e34c9a72..3a05b767 100644 --- a/crates/cudnn/src/backend/reduction.rs +++ b/crates/cudnn/src/backend/reduction.rs @@ -1,6 +1,6 @@ use crate::{ backend::{Descriptor, Operation, ReductionCfg, Tensor}, - sys, CudnnError, IntoResult, + CudnnError, IntoResult, }; #[derive(Default, Clone, Debug, PartialEq, Eq, Hash)] @@ -33,26 +33,26 @@ impl ReductionBuilder { unsafe { let mut raw = Descriptor::new( - sys::cudnnBackendDescriptorType_t::CUDNN_BACKEND_OPERATION_REDUCTION_DESCRIPTOR, + cudnn_sys::cudnnBackendDescriptorType_t::CUDNN_BACKEND_OPERATION_REDUCTION_DESCRIPTOR, )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_REDUCTION_DESC, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_REDUCTION_DESC, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &cfg.raw.inner(), )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_REDUCTION_XDESC, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_REDUCTION_XDESC, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &x.raw.inner(), )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_REDUCTION_YDESC, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_OPERATION_REDUCTION_YDESC, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &y.raw.inner(), )?; diff --git a/crates/cudnn/src/backend/reduction_cfg.rs b/crates/cudnn/src/backend/reduction_cfg.rs index d0e18be6..bb1689e3 100644 --- a/crates/cudnn/src/backend/reduction_cfg.rs +++ b/crates/cudnn/src/backend/reduction_cfg.rs @@ -1,11 +1,11 @@ use crate::{ backend::{Descriptor, ReductionMode}, - sys, CudnnError, DataType, IntoResult, + CudnnError, DataType, IntoResult, }; #[derive(Clone, Default, PartialEq, Eq, Hash, Debug)] pub struct ReductionCfgBuilder { - math_precision: Option, + math_precision: Option, mode: Option, } @@ -26,26 +26,26 @@ impl ReductionCfgBuilder { pub fn build(self) -> Result { let math_precision = self .math_precision - .unwrap_or(sys::cudnnDataType_t::CUDNN_DATA_FLOAT); + .unwrap_or(cudnn_sys::cudnnDataType_t::CUDNN_DATA_FLOAT); - let mode: sys::cudnnReduceTensorOp_t = + let mode: cudnn_sys::cudnnReduceTensorOp_t = self.mode.expect("reduction mode is required.").into(); unsafe { let mut raw = Descriptor::new( - sys::cudnnBackendDescriptorType_t::CUDNN_BACKEND_REDUCTION_DESCRIPTOR, + cudnn_sys::cudnnBackendDescriptorType_t::CUDNN_BACKEND_REDUCTION_DESCRIPTOR, )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_REDUCTION_COMP_TYPE, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DATA_TYPE, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_REDUCTION_COMP_TYPE, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DATA_TYPE, 1, &math_precision, )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_REDUCTION_OPERATOR, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_REDUCTION_OPERATOR_TYPE, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_REDUCTION_OPERATOR, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_REDUCTION_OPERATOR_TYPE, 1, &mode, )?; diff --git a/crates/cudnn/src/backend/reduction_mode.rs b/crates/cudnn/src/backend/reduction_mode.rs index 72c4864e..ceaff616 100644 --- a/crates/cudnn/src/backend/reduction_mode.rs +++ b/crates/cudnn/src/backend/reduction_mode.rs @@ -1,5 +1,3 @@ -use crate::sys; - #[non_exhaustive] #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] pub enum ReductionMode { @@ -14,20 +12,20 @@ pub enum ReductionMode { Norm2, } -impl From for sys::cudnnReduceTensorOp_t { +impl From for cudnn_sys::cudnnReduceTensorOp_t { fn from(mode: ReductionMode) -> Self { match mode { - ReductionMode::Add => sys::cudnnReduceTensorOp_t::CUDNN_REDUCE_TENSOR_ADD, - ReductionMode::Amax => sys::cudnnReduceTensorOp_t::CUDNN_REDUCE_TENSOR_AMAX, - ReductionMode::Avg => sys::cudnnReduceTensorOp_t::CUDNN_REDUCE_TENSOR_AVG, - ReductionMode::Max => sys::cudnnReduceTensorOp_t::CUDNN_REDUCE_TENSOR_MAX, - ReductionMode::Min => sys::cudnnReduceTensorOp_t::CUDNN_REDUCE_TENSOR_MIN, - ReductionMode::Mul => sys::cudnnReduceTensorOp_t::CUDNN_REDUCE_TENSOR_MUL, + ReductionMode::Add => cudnn_sys::cudnnReduceTensorOp_t::CUDNN_REDUCE_TENSOR_ADD, + ReductionMode::Amax => cudnn_sys::cudnnReduceTensorOp_t::CUDNN_REDUCE_TENSOR_AMAX, + ReductionMode::Avg => cudnn_sys::cudnnReduceTensorOp_t::CUDNN_REDUCE_TENSOR_AVG, + ReductionMode::Max => cudnn_sys::cudnnReduceTensorOp_t::CUDNN_REDUCE_TENSOR_MAX, + ReductionMode::Min => cudnn_sys::cudnnReduceTensorOp_t::CUDNN_REDUCE_TENSOR_MIN, + ReductionMode::Mul => cudnn_sys::cudnnReduceTensorOp_t::CUDNN_REDUCE_TENSOR_MUL, ReductionMode::MulNoZeros => { - sys::cudnnReduceTensorOp_t::CUDNN_REDUCE_TENSOR_MUL_NO_ZEROS + cudnn_sys::cudnnReduceTensorOp_t::CUDNN_REDUCE_TENSOR_MUL_NO_ZEROS } - ReductionMode::Norm1 => sys::cudnnReduceTensorOp_t::CUDNN_REDUCE_TENSOR_NORM1, - ReductionMode::Norm2 => sys::cudnnReduceTensorOp_t::CUDNN_REDUCE_TENSOR_NORM2, + ReductionMode::Norm1 => cudnn_sys::cudnnReduceTensorOp_t::CUDNN_REDUCE_TENSOR_NORM1, + ReductionMode::Norm2 => cudnn_sys::cudnnReduceTensorOp_t::CUDNN_REDUCE_TENSOR_NORM2, } } } diff --git a/crates/cudnn/src/backend/tensor.rs b/crates/cudnn/src/backend/tensor.rs index 4c8c9dfd..1b9c7e88 100644 --- a/crates/cudnn/src/backend/tensor.rs +++ b/crates/cudnn/src/backend/tensor.rs @@ -1,9 +1,9 @@ -use crate::{backend::Descriptor, sys, CudnnError, DataType, IntoResult}; +use crate::{backend::Descriptor, CudnnError, DataType, IntoResult}; #[derive(Clone, Default, Debug, PartialEq, Eq, Hash)] pub struct TensorBuilder<'a> { id: Option, - data_type: Option, + data_type: Option, byte_alignment: Option, dimensions: Option<&'a [i64]>, strides: Option<&'a [i64]>, @@ -61,48 +61,48 @@ impl<'a> TensorBuilder<'a> { unsafe { let mut raw = Descriptor::new( - sys::cudnnBackendDescriptorType_t::CUDNN_BACKEND_TENSOR_DESCRIPTOR, + cudnn_sys::cudnnBackendDescriptorType_t::CUDNN_BACKEND_TENSOR_DESCRIPTOR, )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_TENSOR_UNIQUE_ID, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_INT64, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_TENSOR_UNIQUE_ID, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_INT64, 1, &id, )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_TENSOR_DATA_TYPE, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DATA_TYPE, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_TENSOR_DATA_TYPE, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_DATA_TYPE, 1, &data_type, )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_INT64, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_INT64, 1, &byte_alignment, )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_TENSOR_DIMENSIONS, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_INT64, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_TENSOR_DIMENSIONS, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_INT64, dimensions.len() as i64, dimensions, )?; raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_TENSOR_STRIDES, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_INT64, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_TENSOR_STRIDES, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_INT64, strides.len() as i64, strides, )?; if let Some(vector_count) = self.vector_count { raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_TENSOR_VECTOR_COUNT, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_INT64, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_TENSOR_VECTOR_COUNT, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_INT64, 1, &vector_count, )?; @@ -113,8 +113,8 @@ impl<'a> TensorBuilder<'a> { .expect("vectorized_dimension is required when vector_count > 1"); raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_TENSOR_VECTORIZED_DIMENSION, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_INT64, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_TENSOR_VECTORIZED_DIMENSION, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_INT64, 1, &vectorized_dimension, )?; @@ -122,8 +122,8 @@ impl<'a> TensorBuilder<'a> { } raw.set_attribute( - sys::cudnnBackendAttributeName_t::CUDNN_ATTR_TENSOR_IS_VIRTUAL, - sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BOOLEAN, + cudnn_sys::cudnnBackendAttributeName_t::CUDNN_ATTR_TENSOR_IS_VIRTUAL, + cudnn_sys::cudnnBackendAttributeType_t::CUDNN_TYPE_BOOLEAN, 1, &self.is_virtual, )?; diff --git a/crates/cudnn/src/context.rs b/crates/cudnn/src/context.rs index 5429049e..777b1c0b 100644 --- a/crates/cudnn/src/context.rs +++ b/crates/cudnn/src/context.rs @@ -1,6 +1,7 @@ -use crate::{sys, CudnnError, IntoResult}; use std::mem::MaybeUninit; +use crate::{CudnnError, IntoResult}; + /// cuDNN library context. It's the central structure required to interact with cuDNN. /// It holds and manages internal memory allocations. /// @@ -24,7 +25,7 @@ use std::mem::MaybeUninit; /// You should generally create and drop context outside of performance critical code paths. #[derive(PartialEq, Eq, Hash, Debug)] pub struct CudnnContext { - pub(crate) raw: sys::cudnnHandle_t, + pub(crate) raw: cudnn_sys::cudnnHandle_t, } impl CudnnContext { @@ -49,7 +50,7 @@ impl CudnnContext { let mut raw = MaybeUninit::uninit(); unsafe { - sys::cudnnCreate(raw.as_mut_ptr()).into_result()?; + cudnn_sys::cudnnCreate(raw.as_mut_ptr()).into_result()?; let raw = raw.assume_init(); Ok(Self { raw }) @@ -63,7 +64,7 @@ impl CudnnContext { pub fn version(&self) -> (u32, u32, u32) { unsafe { // cudnnGetVersion does not return a state as it never fails. - let version = sys::cudnnGetVersion(); + let version = cudnn_sys::cudnnGetVersion(); ( (version / 1000) as u32, ((version % 1000) / 100) as u32, @@ -81,7 +82,7 @@ impl CudnnContext { pub fn cuda_version(&self) -> (u32, u32, u32) { unsafe { // cudnnGetCudartVersion does not return a state as it never fails. - let version = sys::cudnnGetCudartVersion(); + let version = cudnn_sys::cudnnGetCudartVersion(); ( (version / 1000) as u32, ((version % 1000) / 100) as u32, @@ -112,7 +113,8 @@ impl CudnnContext { /// stream and the cuDNN handle context. pub fn set_stream(&mut self, stream: &cust::stream::Stream) -> Result<(), CudnnError> { unsafe { - sys::cudnnSetStream(self.raw, stream.as_inner() as sys::cudaStream_t).into_result() + cudnn_sys::cudnnSetStream(self.raw, stream.as_inner() as cudnn_sys::cudaStream_t) + .into_result() } } } @@ -122,7 +124,7 @@ impl Drop for CudnnContext { unsafe { // This can be either a valid cuDNN handle or a null pointer. // Since it's getting dropped we shouldn't bother much. - sys::cudnnDestroy(self.raw); + cudnn_sys::cudnnDestroy(self.raw); } } } diff --git a/crates/cudnn/src/convolution/convolution_algo.rs b/crates/cudnn/src/convolution/convolution_algo.rs index bf5f312f..9fa545b3 100644 --- a/crates/cudnn/src/convolution/convolution_algo.rs +++ b/crates/cudnn/src/convolution/convolution_algo.rs @@ -1,4 +1,4 @@ -use crate::{sys, CudnnError, Determinism, IntoResult, MathType}; +use crate::{CudnnError, Determinism, IntoResult, MathType}; /// The best suited algorithm according to the layer specifications obtained through a heuristic. #[derive(Debug, Clone, Copy, PartialEq)] @@ -73,7 +73,7 @@ pub enum ConvFwdAlgo { WinogradNonFused, } -impl From for sys::cudnnConvolutionFwdAlgo_t { +impl From for cudnn_sys::cudnnConvolutionFwdAlgo_t { fn from(algo: ConvFwdAlgo) -> Self { match algo { ConvFwdAlgo::ImplicitGemm => Self::CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM, @@ -90,40 +90,29 @@ impl From for sys::cudnnConvolutionFwdAlgo_t { } } -impl From for ConvFwdAlgo { - fn from(algo: sys::cudnnConvolutionFwdAlgo_t) -> Self { +impl From for ConvFwdAlgo { + fn from(algo: cudnn_sys::cudnnConvolutionFwdAlgo_t) -> Self { + use cudnn_sys::cudnnConvolutionFwdAlgo_t::*; match algo { - sys::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM => { - ConvFwdAlgo::ImplicitGemm - } - sys::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM => { - ConvFwdAlgo::ImplicitPrecompGemm - } - sys::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_GEMM => ConvFwdAlgo::Gemm, - sys::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_DIRECT => { - ConvFwdAlgo::Direct - } - sys::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_FFT => ConvFwdAlgo::Fft, - sys::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING => { - ConvFwdAlgo::FftTiling - } - sys::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD => { - ConvFwdAlgo::Winograd - } - sys::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED => { - ConvFwdAlgo::WinogradNonFused - } - sys::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_COUNT => unreachable!(), + CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM => ConvFwdAlgo::ImplicitGemm, + CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM => ConvFwdAlgo::ImplicitPrecompGemm, + CUDNN_CONVOLUTION_FWD_ALGO_GEMM => ConvFwdAlgo::Gemm, + CUDNN_CONVOLUTION_FWD_ALGO_DIRECT => ConvFwdAlgo::Direct, + CUDNN_CONVOLUTION_FWD_ALGO_FFT => ConvFwdAlgo::Fft, + CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING => ConvFwdAlgo::FftTiling, + CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD => ConvFwdAlgo::Winograd, + CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED => ConvFwdAlgo::WinogradNonFused, + CUDNN_CONVOLUTION_FWD_ALGO_COUNT => unreachable!(), } } } /// BestHeuristic for the forward convolution algorithm. -impl TryFrom for BestHeuristic { +impl TryFrom for BestHeuristic { type Error = CudnnError; - fn try_from(raw: sys::cudnnConvolutionFwdAlgoPerf_t) -> Result { - let sys::cudnnConvolutionFwdAlgoPerf_t { + fn try_from(raw: cudnn_sys::cudnnConvolutionFwdAlgoPerf_t) -> Result { + let cudnn_sys::cudnnConvolutionFwdAlgoPerf_t { algo, status, time, @@ -168,7 +157,7 @@ pub enum ConvBwdDataAlgo { WinogradNonFused, } -impl From for sys::cudnnConvolutionBwdDataAlgo_t { +impl From for cudnn_sys::cudnnConvolutionBwdDataAlgo_t { fn from(algo: ConvBwdDataAlgo) -> Self { match algo { ConvBwdDataAlgo::Algo0 => Self::CUDNN_CONVOLUTION_BWD_DATA_ALGO_0, @@ -183,26 +172,27 @@ impl From for sys::cudnnConvolutionBwdDataAlgo_t { } } -impl From for ConvBwdDataAlgo { - fn from(algo: sys::cudnnConvolutionBwdDataAlgo_t) -> Self { +impl From for ConvBwdDataAlgo { + fn from(algo: cudnn_sys::cudnnConvolutionBwdDataAlgo_t) -> Self { + use cudnn_sys::cudnnConvolutionBwdDataAlgo_t::*; match algo { - sys::cudnnConvolutionBwdDataAlgo_t::CUDNN_CONVOLUTION_BWD_DATA_ALGO_0 => Self::Algo0, - sys::cudnnConvolutionBwdDataAlgo_t::CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 => Self::Algo1, - sys::cudnnConvolutionBwdDataAlgo_t::CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT => Self::Fft, - sys::cudnnConvolutionBwdDataAlgo_t::CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING => Self::FftTiling, - sys::cudnnConvolutionBwdDataAlgo_t::CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD => Self::Winograd, - sys::cudnnConvolutionBwdDataAlgo_t::CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED => Self::WinogradNonFused, - sys::cudnnConvolutionBwdDataAlgo_t::CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT => unreachable!() + CUDNN_CONVOLUTION_BWD_DATA_ALGO_0 => Self::Algo0, + CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 => Self::Algo1, + CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT => Self::Fft, + CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING => Self::FftTiling, + CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD => Self::Winograd, + CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED => Self::WinogradNonFused, + CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT => unreachable!(), } } } /// BestHeuristic for the backward data convolution algorithm. -impl TryFrom for BestHeuristic { +impl TryFrom for BestHeuristic { type Error = CudnnError; - fn try_from(raw: sys::cudnnConvolutionBwdDataAlgoPerf_t) -> Result { - let sys::cudnnConvolutionBwdDataAlgoPerf_t { + fn try_from(raw: cudnn_sys::cudnnConvolutionBwdDataAlgoPerf_t) -> Result { + let cudnn_sys::cudnnConvolutionBwdDataAlgoPerf_t { algo, status, time, @@ -250,69 +240,56 @@ pub enum ConvBwdFilterAlgo { WinogradNonFused, } -impl From for sys::cudnnConvolutionBwdFilterAlgo_t { +impl From for cudnn_sys::cudnnConvolutionBwdFilterAlgo_t { fn from(algo: ConvBwdFilterAlgo) -> Self { match algo { ConvBwdFilterAlgo::Algo0 => { - sys::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0 + cudnn_sys::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0 } ConvBwdFilterAlgo::Algo1 => { - sys::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 + cudnn_sys::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 } ConvBwdFilterAlgo::Algo3 => { - sys::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3 + cudnn_sys::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3 } ConvBwdFilterAlgo::Fft => { - sys::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT + cudnn_sys::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT } ConvBwdFilterAlgo::FftTiling => { - sys::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING + cudnn_sys::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING } ConvBwdFilterAlgo::Winograd => { - sys::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD + cudnn_sys::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD } ConvBwdFilterAlgo::WinogradNonFused => { - sys::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED + cudnn_sys::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED } } } } -impl From for ConvBwdFilterAlgo { - fn from(algo: sys::cudnnConvolutionBwdFilterAlgo_t) -> Self { +impl From for ConvBwdFilterAlgo { + fn from(algo: cudnn_sys::cudnnConvolutionBwdFilterAlgo_t) -> Self { + use cudnn_sys::cudnnConvolutionBwdFilterAlgo_t::*; match algo { - sys::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0 => { - Self::Algo0 - } - sys::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 => { - Self::Algo1 - } - sys::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3 => { - Self::Algo3 - } - sys::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT => { - Self::Fft - } - sys::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING => { - Self::FftTiling - } - sys::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD => { - Self::Winograd - } - sys::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED => { - Self::WinogradNonFused - } - sys::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT => unreachable!() + CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0 => Self::Algo0, + CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 => Self::Algo1, + CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3 => Self::Algo3, + CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT => Self::Fft, + CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING => Self::FftTiling, + CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD => Self::Winograd, + CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED => Self::WinogradNonFused, + CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT => unreachable!(), } } } /// BestHeuristic for the backward filter convolution algorithm. -impl TryFrom for BestHeuristic { +impl TryFrom for BestHeuristic { type Error = CudnnError; - fn try_from(raw: sys::cudnnConvolutionBwdFilterAlgoPerf_t) -> Result { - let sys::cudnnConvolutionBwdFilterAlgoPerf_t { + fn try_from(raw: cudnn_sys::cudnnConvolutionBwdFilterAlgoPerf_t) -> Result { + let cudnn_sys::cudnnConvolutionBwdFilterAlgoPerf_t { algo, status, time, diff --git a/crates/cudnn/src/convolution/convolution_descriptor.rs b/crates/cudnn/src/convolution/convolution_descriptor.rs index f70fbb21..4f07821a 100644 --- a/crates/cudnn/src/convolution/convolution_descriptor.rs +++ b/crates/cudnn/src/convolution/convolution_descriptor.rs @@ -1,7 +1,7 @@ -use crate::{sys, ConvMode, CudnnError, DataType, IntoResult, MathType}; - use std::{marker::PhantomData, mem::MaybeUninit}; +use crate::{ConvMode, CudnnError, DataType, IntoResult, MathType}; + /// A generic description of an n-dimensional convolution. /// /// **Do note** that N can be either 2 or 3, respectively for a 2-d or a 3-d convolution, and that @@ -9,7 +9,7 @@ use std::{marker::PhantomData, mem::MaybeUninit}; /// the same layer. #[derive(Debug, PartialEq, Eq, Hash)] pub struct ConvDescriptor { - pub(crate) raw: sys::cudnnConvolutionDescriptor_t, + pub(crate) raw: cudnn_sys::cudnnConvolutionDescriptor_t, comp_type: PhantomData, } @@ -96,14 +96,14 @@ impl ConvDescriptor { let mut raw = MaybeUninit::uninit(); unsafe { - sys::cudnnCreateConvolutionDescriptor(raw.as_mut_ptr()).into_result()?; + cudnn_sys::cudnnCreateConvolutionDescriptor(raw.as_mut_ptr()).into_result()?; let conv_desc = Self { raw: raw.assume_init(), comp_type: PhantomData, }; - sys::cudnnSetConvolutionNdDescriptor( + cudnn_sys::cudnnSetConvolutionNdDescriptor( conv_desc.raw, N as i32, padding.as_ptr(), @@ -152,7 +152,7 @@ impl ConvDescriptor { /// # } /// ``` pub fn set_math_type(&mut self, math_type: MathType) -> Result<(), CudnnError> { - unsafe { sys::cudnnSetConvolutionMathType(self.raw, math_type.into()).into_result() } + unsafe { cudnn_sys::cudnnSetConvolutionMathType(self.raw, math_type.into()).into_result() } } /// Sets the group count for this convolution descriptor instance. @@ -187,14 +187,14 @@ impl ConvDescriptor { /// # } /// ``` pub fn set_group_count(&mut self, groups: i32) -> Result<(), CudnnError> { - unsafe { sys::cudnnSetConvolutionGroupCount(self.raw, groups) }.into_result() + unsafe { cudnn_sys::cudnnSetConvolutionGroupCount(self.raw, groups) }.into_result() } } impl Drop for ConvDescriptor { fn drop(&mut self) { unsafe { - sys::cudnnDestroyConvolutionDescriptor(self.raw); + cudnn_sys::cudnnDestroyConvolutionDescriptor(self.raw); } } } diff --git a/crates/cudnn/src/convolution/convolution_mode.rs b/crates/cudnn/src/convolution/convolution_mode.rs index 477bf41e..d57e1628 100644 --- a/crates/cudnn/src/convolution/convolution_mode.rs +++ b/crates/cudnn/src/convolution/convolution_mode.rs @@ -1,5 +1,3 @@ -use crate::sys; - /// Enum used to configure a convolution descriptor. /// /// The filter used for the convolution can be applied in two different ways, corresponding @@ -17,11 +15,13 @@ pub enum ConvMode { CrossCorrelation, } -impl From for sys::cudnnConvolutionMode_t { - fn from(convolution_mode: ConvMode) -> sys::cudnnConvolutionMode_t { +impl From for cudnn_sys::cudnnConvolutionMode_t { + fn from(convolution_mode: ConvMode) -> cudnn_sys::cudnnConvolutionMode_t { match convolution_mode { - ConvMode::Convolution => sys::cudnnConvolutionMode_t::CUDNN_CONVOLUTION, - ConvMode::CrossCorrelation => sys::cudnnConvolutionMode_t::CUDNN_CROSS_CORRELATION, + ConvMode::Convolution => cudnn_sys::cudnnConvolutionMode_t::CUDNN_CONVOLUTION, + ConvMode::CrossCorrelation => { + cudnn_sys::cudnnConvolutionMode_t::CUDNN_CROSS_CORRELATION + } } } } diff --git a/crates/cudnn/src/convolution/filter_descriptor.rs b/crates/cudnn/src/convolution/filter_descriptor.rs index ce379e8c..f08223ed 100644 --- a/crates/cudnn/src/convolution/filter_descriptor.rs +++ b/crates/cudnn/src/convolution/filter_descriptor.rs @@ -1,13 +1,14 @@ -use crate::{sys, CudnnError, DataType, IntoResult, ScalarC, TensorFormat, VecType}; use std::{marker::PhantomData, mem::MaybeUninit}; +use crate::{CudnnError, DataType, IntoResult, ScalarC, TensorFormat, VecType}; + /// A generic description of an n-dimensional filter dataset. #[derive(Debug, PartialEq, Eq, Hash)] pub struct FilterDescriptor where T: DataType, { - pub(crate) raw: sys::cudnnFilterDescriptor_t, + pub(crate) raw: cudnn_sys::cudnnFilterDescriptor_t, data_type: PhantomData, } @@ -59,11 +60,11 @@ where let ndims = shape.len(); unsafe { - sys::cudnnCreateFilterDescriptor(raw.as_mut_ptr()).into_result()?; + cudnn_sys::cudnnCreateFilterDescriptor(raw.as_mut_ptr()).into_result()?; let raw = raw.assume_init(); - sys::cudnnSetFilterNdDescriptor( + cudnn_sys::cudnnSetFilterNdDescriptor( raw, T::into_raw(), format.into(), @@ -112,11 +113,11 @@ where let format = TensorFormat::NchwVectC; unsafe { - sys::cudnnCreateFilterDescriptor(raw.as_mut_ptr()).into_result()?; + cudnn_sys::cudnnCreateFilterDescriptor(raw.as_mut_ptr()).into_result()?; let raw = raw.assume_init(); - sys::cudnnSetFilterNdDescriptor( + cudnn_sys::cudnnSetFilterNdDescriptor( raw, V::into_raw(), format.into(), @@ -139,7 +140,7 @@ where { fn drop(&mut self) { unsafe { - sys::cudnnDestroyFilterDescriptor(self.raw); + cudnn_sys::cudnnDestroyFilterDescriptor(self.raw); } } } diff --git a/crates/cudnn/src/convolution/mod.rs b/crates/cudnn/src/convolution/mod.rs index 1a07b0d1..928e2973 100644 --- a/crates/cudnn/src/convolution/mod.rs +++ b/crates/cudnn/src/convolution/mod.rs @@ -1,3 +1,11 @@ +use std::mem::MaybeUninit; + +use cust::memory::GpuBuffer; + +use crate::{ + ActivationDescriptor, CudnnContext, CudnnError, DataType, IntoResult, TensorDescriptor, +}; + mod convolution_algo; mod convolution_config; mod convolution_descriptor; @@ -10,12 +18,6 @@ pub use convolution_descriptor::*; pub use convolution_mode::*; pub use filter_descriptor::*; -use crate::{ - sys, ActivationDescriptor, CudnnContext, CudnnError, DataType, IntoResult, TensorDescriptor, -}; -use cust::memory::GpuBuffer; -use std::mem::MaybeUninit; - impl CudnnContext { /// This function serves as a heuristic for obtaining the best suited algorithm for /// `convolution_forward()` for the given layer specifications. @@ -92,7 +94,7 @@ impl CudnnContext { let mut perf_results = MaybeUninit::uninit(); unsafe { - sys::cudnnGetConvolutionForwardAlgorithm_v7( + cudnn_sys::cudnnGetConvolutionForwardAlgorithm_v7( self.raw, x_desc.raw, w_desc.raw, @@ -207,7 +209,7 @@ impl CudnnContext { let mut perf_results = MaybeUninit::uninit(); unsafe { - sys::cudnnGetConvolutionBackwardDataAlgorithm_v7( + cudnn_sys::cudnnGetConvolutionBackwardDataAlgorithm_v7( self.raw, w_desc.raw, dy_desc.raw, @@ -322,7 +324,7 @@ impl CudnnContext { let mut perf_results = MaybeUninit::uninit(); unsafe { - sys::cudnnGetConvolutionBackwardFilterAlgorithm_v7( + cudnn_sys::cudnnGetConvolutionBackwardFilterAlgorithm_v7( self.raw, x_desc.raw, dy_desc.raw, @@ -453,7 +455,7 @@ impl CudnnContext { let mut size = MaybeUninit::uninit(); unsafe { - sys::cudnnGetConvolutionForwardWorkspaceSize( + cudnn_sys::cudnnGetConvolutionForwardWorkspaceSize( self.raw, x_desc.raw, w_desc.raw, @@ -564,7 +566,7 @@ impl CudnnContext { let mut size = MaybeUninit::uninit(); unsafe { - sys::cudnnGetConvolutionBackwardDataWorkspaceSize( + cudnn_sys::cudnnGetConvolutionBackwardDataWorkspaceSize( self.raw, w_desc.raw, dy_desc.raw, @@ -675,7 +677,7 @@ impl CudnnContext { let mut size = MaybeUninit::uninit(); unsafe { - sys::cudnnGetConvolutionBackwardFilterWorkspaceSize( + cudnn_sys::cudnnGetConvolutionBackwardFilterWorkspaceSize( self.raw, x_desc.raw, dy_desc.raw, @@ -828,7 +830,7 @@ impl CudnnContext { }; unsafe { - sys::cudnnConvolutionForward( + cudnn_sys::cudnnConvolutionForward( self.raw, alpha, x_desc.raw, @@ -1008,7 +1010,7 @@ impl CudnnContext { }; unsafe { - sys::cudnnConvolutionBiasActivationForward( + cudnn_sys::cudnnConvolutionBiasActivationForward( self.raw, alpha, x_desc.raw, @@ -1162,7 +1164,7 @@ impl CudnnContext { }; unsafe { - sys::cudnnConvolutionBackwardData( + cudnn_sys::cudnnConvolutionBackwardData( self.raw, alpha, w_desc.raw, @@ -1312,7 +1314,7 @@ impl CudnnContext { }; unsafe { - sys::cudnnConvolutionBackwardFilter( + cudnn_sys::cudnnConvolutionBackwardFilter( self.raw, alpha, x_desc.raw, diff --git a/crates/cudnn/src/data_type.rs b/crates/cudnn/src/data_type.rs index aba15889..801ef014 100644 --- a/crates/cudnn/src/data_type.rs +++ b/crates/cudnn/src/data_type.rs @@ -1,8 +1,8 @@ -use crate::{private, sys}; +use crate::private; pub trait DataType: private::Sealed + cust::memory::DeviceCopy { /// Returns the corresponding raw cuDNN data type. - fn into_raw() -> sys::cudnnDataType_t; + fn into_raw() -> cudnn_sys::cudnnDataType_t; } macro_rules! impl_cudnn_data_type { @@ -10,8 +10,8 @@ macro_rules! impl_cudnn_data_type { impl private::Sealed for $safe_type {} impl DataType for $safe_type { - fn into_raw() -> sys::cudnnDataType_t { - sys::cudnnDataType_t::$raw_type + fn into_raw() -> cudnn_sys::cudnnDataType_t { + cudnn_sys::cudnnDataType_t::$raw_type } } }; @@ -36,7 +36,7 @@ where T: DataType, { /// Return the corresponding raw cuDNN data type. - fn into_raw() -> sys::cudnnDataType_t; + fn into_raw() -> cudnn_sys::cudnnDataType_t; } impl private::Sealed for Vec4 {} @@ -46,8 +46,8 @@ impl private::Sealed for Vec32 {} macro_rules! impl_cudnn_vec_type { ($type:ident, $safe_type:ident, $raw_type:ident) => { impl VecType<$safe_type> for $type { - fn into_raw() -> sys::cudnnDataType_t { - sys::cudnnDataType_t::$raw_type + fn into_raw() -> cudnn_sys::cudnnDataType_t { + cudnn_sys::cudnnDataType_t::$raw_type } } }; diff --git a/crates/cudnn/src/determinism.rs b/crates/cudnn/src/determinism.rs index f62c05d7..8415316d 100644 --- a/crates/cudnn/src/determinism.rs +++ b/crates/cudnn/src/determinism.rs @@ -1,5 +1,3 @@ -use crate::sys; - /// Enum stating whether or not the computed results are deterministic (reproducible). /// /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnDeterminism_t) @@ -12,20 +10,22 @@ pub enum Determinism { NonDeterministic, } -impl From for Determinism { - fn from(raw: sys::cudnnDeterminism_t) -> Self { +impl From for Determinism { + fn from(raw: cudnn_sys::cudnnDeterminism_t) -> Self { + use cudnn_sys::cudnnDeterminism_t::*; match raw { - sys::cudnnDeterminism_t::CUDNN_DETERMINISTIC => Self::Deterministic, - sys::cudnnDeterminism_t::CUDNN_NON_DETERMINISTIC => Self::NonDeterministic, + CUDNN_DETERMINISTIC => Self::Deterministic, + CUDNN_NON_DETERMINISTIC => Self::NonDeterministic, } } } -impl From for sys::cudnnDeterminism_t { +impl From for cudnn_sys::cudnnDeterminism_t { fn from(determinism: Determinism) -> Self { + use cudnn_sys::cudnnDeterminism_t::*; match determinism { - Determinism::Deterministic => sys::cudnnDeterminism_t::CUDNN_DETERMINISTIC, - Determinism::NonDeterministic => sys::cudnnDeterminism_t::CUDNN_NON_DETERMINISTIC, + Determinism::Deterministic => CUDNN_DETERMINISTIC, + Determinism::NonDeterministic => CUDNN_NON_DETERMINISTIC, } } } diff --git a/crates/cudnn/src/dropout/dropout_descriptor.rs b/crates/cudnn/src/dropout/dropout_descriptor.rs index ec4a74dd..673687c4 100644 --- a/crates/cudnn/src/dropout/dropout_descriptor.rs +++ b/crates/cudnn/src/dropout/dropout_descriptor.rs @@ -1,4 +1,3 @@ -use crate::sys; use cust::memory::GpuBuffer; /// The descriptor of a dropout operation. @@ -7,7 +6,7 @@ pub struct DropoutDescriptor where T: GpuBuffer, { - pub(crate) raw: sys::cudnnDropoutDescriptor_t, + pub(crate) raw: cudnn_sys::cudnnDropoutDescriptor_t, pub(crate) states: T, } @@ -17,7 +16,7 @@ where { fn drop(&mut self) { unsafe { - sys::cudnnDestroyDropoutDescriptor(self.raw); + cudnn_sys::cudnnDestroyDropoutDescriptor(self.raw); } } } diff --git a/crates/cudnn/src/dropout/mod.rs b/crates/cudnn/src/dropout/mod.rs index 3353808d..ac38b525 100644 --- a/crates/cudnn/src/dropout/mod.rs +++ b/crates/cudnn/src/dropout/mod.rs @@ -1,11 +1,13 @@ +use std::mem::MaybeUninit; + +use cust::memory::GpuBuffer; + +use crate::{CudnnContext, CudnnError, DataType, IntoResult, TensorDescriptor}; + mod dropout_descriptor; pub use dropout_descriptor::DropoutDescriptor; -use crate::{sys, CudnnContext, CudnnError, DataType, IntoResult, TensorDescriptor}; -use cust::memory::GpuBuffer; -use std::mem::MaybeUninit; - impl CudnnContext { /// This function is used to query the amount of space required to store the states of the /// random number generators. @@ -35,7 +37,7 @@ impl CudnnContext { let mut size = MaybeUninit::uninit(); unsafe { - sys::cudnnDropoutGetStatesSize(self.raw, size.as_mut_ptr()).into_result()?; + cudnn_sys::cudnnDropoutGetStatesSize(self.raw, size.as_mut_ptr()).into_result()?; Ok(size.assume_init()) } @@ -88,7 +90,8 @@ impl CudnnContext { let mut size = MaybeUninit::uninit(); unsafe { - sys::cudnnDropoutGetReserveSpaceSize(desc.raw, size.as_mut_ptr()).into_result()?; + cudnn_sys::cudnnDropoutGetReserveSpaceSize(desc.raw, size.as_mut_ptr()) + .into_result()?; Ok(size.assume_init()) } @@ -148,12 +151,19 @@ impl CudnnContext { let states_size = states.len(); unsafe { - sys::cudnnCreateDropoutDescriptor(raw.as_mut_ptr()).into_result()?; + cudnn_sys::cudnnCreateDropoutDescriptor(raw.as_mut_ptr()).into_result()?; let raw = raw.assume_init(); - sys::cudnnSetDropoutDescriptor(raw, self.raw, dropout, states_ptr, states_size, seed) - .into_result()?; + cudnn_sys::cudnnSetDropoutDescriptor( + raw, + self.raw, + dropout, + states_ptr, + states_size, + seed, + ) + .into_result()?; Ok(DropoutDescriptor { raw, states }) } @@ -245,7 +255,7 @@ impl CudnnContext { let reserve_space_size = reserve_space.len(); unsafe { - sys::cudnnDropoutForward( + cudnn_sys::cudnnDropoutForward( self.raw, dropout_desc.raw, x_desc.raw, @@ -345,7 +355,7 @@ impl CudnnContext { let reserve_space_size = reserve_space.len(); unsafe { - sys::cudnnDropoutBackward( + cudnn_sys::cudnnDropoutBackward( self.raw, dropout_desc.raw, dy_desc.raw, diff --git a/crates/cudnn/src/error.rs b/crates/cudnn/src/error.rs index 6305e03d..1c60e741 100644 --- a/crates/cudnn/src/error.rs +++ b/crates/cudnn/src/error.rs @@ -1,4 +1,3 @@ -use crate::sys; use std::{error::Error, ffi::CStr, fmt::Display}; /// Enum encapsulating function status returns. All cuDNN library functions return their status. @@ -17,6 +16,7 @@ pub enum CudnnError { NotInitialized, /// Resource allocation failed inside the cuDNN library. This is usually caused by an internal /// `cudaMalloc()` failure. + #[cfg(not(cudnn9))] AllocFailed, /// An incorrect value or parameter was passed to the function. BadParam, @@ -25,9 +25,11 @@ pub enum CudnnError { InvalidValue, /// The function requires a feature absent from the current GPU device. Note that cuDNN only /// supports devices with compute capabilities greater than or equal to 3.0. + #[cfg(not(cudnn9))] ArchMismatch, /// An access to GPU memory space failed, which is usually caused by a failure to bind a /// texture. + #[cfg(not(cudnn9))] MappingError, /// The GPU program failed to execute. This is usually caused by a failure to launch some /// cuDNN kernel on the GPU, which can occur for multiple reasons. @@ -42,34 +44,40 @@ pub enum CudnnError { /// These libraries are libcuda.so (nvcuda.dll) and libnvrtc.so /// (nvrtc64_Major Release Version Minor Release Version_0.dll and /// nvrtc-builtins64_Major Release Version Minor Release Version.dll). + #[cfg(not(cudnn9))] RuntimePrerequisiteMissing, /// Some tasks in the user stream are not completed. RuntimeInProgress, /// Numerical overflow occurred during the GPU kernel execution. RuntimeFpOverflow, + #[cfg(not(cudnn9))] VersionMismatch, } impl CudnnError { /// Converts the `CudnnError` into the corresponding raw variant. - pub fn into_raw(self) -> sys::cudnnStatus_t { + pub fn into_raw(self) -> cudnn_sys::cudnnStatus_t { + use cudnn_sys::cudnnStatus_t::*; match self { - CudnnError::NotInitialized => sys::cudnnStatus_t::CUDNN_STATUS_NOT_INITIALIZED, - CudnnError::AllocFailed => sys::cudnnStatus_t::CUDNN_STATUS_ALLOC_FAILED, - CudnnError::BadParam => sys::cudnnStatus_t::CUDNN_STATUS_BAD_PARAM, - CudnnError::InternalError => sys::cudnnStatus_t::CUDNN_STATUS_INTERNAL_ERROR, - CudnnError::InvalidValue => sys::cudnnStatus_t::CUDNN_STATUS_INVALID_VALUE, - CudnnError::ArchMismatch => sys::cudnnStatus_t::CUDNN_STATUS_ARCH_MISMATCH, - CudnnError::MappingError => sys::cudnnStatus_t::CUDNN_STATUS_MAPPING_ERROR, - CudnnError::ExecutionFailed => sys::cudnnStatus_t::CUDNN_STATUS_EXECUTION_FAILED, - CudnnError::NotSupported => sys::cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED, - CudnnError::LicenseError => sys::cudnnStatus_t::CUDNN_STATUS_LICENSE_ERROR, - CudnnError::RuntimePrerequisiteMissing => { - sys::cudnnStatus_t::CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING - } - CudnnError::RuntimeInProgress => sys::cudnnStatus_t::CUDNN_STATUS_RUNTIME_IN_PROGRESS, - CudnnError::RuntimeFpOverflow => sys::cudnnStatus_t::CUDNN_STATUS_RUNTIME_FP_OVERFLOW, - CudnnError::VersionMismatch => sys::cudnnStatus_t::CUDNN_STATUS_VERSION_MISMATCH, + CudnnError::NotInitialized => CUDNN_STATUS_NOT_INITIALIZED, + #[cfg(not(cudnn9))] + CudnnError::AllocFailed => CUDNN_STATUS_ALLOC_FAILED, + CudnnError::BadParam => CUDNN_STATUS_BAD_PARAM, + CudnnError::InternalError => CUDNN_STATUS_INTERNAL_ERROR, + CudnnError::InvalidValue => CUDNN_STATUS_INVALID_VALUE, + #[cfg(not(cudnn9))] + CudnnError::ArchMismatch => CUDNN_STATUS_ARCH_MISMATCH, + #[cfg(not(cudnn9))] + CudnnError::MappingError => CUDNN_STATUS_MAPPING_ERROR, + CudnnError::ExecutionFailed => CUDNN_STATUS_EXECUTION_FAILED, + CudnnError::NotSupported => CUDNN_STATUS_NOT_SUPPORTED, + CudnnError::LicenseError => CUDNN_STATUS_LICENSE_ERROR, + #[cfg(not(cudnn9))] + CudnnError::RuntimePrerequisiteMissing => CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING, + CudnnError::RuntimeInProgress => CUDNN_STATUS_RUNTIME_IN_PROGRESS, + CudnnError::RuntimeFpOverflow => CUDNN_STATUS_RUNTIME_FP_OVERFLOW, + #[cfg(not(cudnn9))] + CudnnError::VersionMismatch => CUDNN_STATUS_VERSION_MISMATCH, } } } @@ -77,7 +85,7 @@ impl CudnnError { impl Display for CudnnError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { unsafe { - let ptr = sys::cudnnGetErrorString(self.into_raw()); + let ptr = cudnn_sys::cudnnGetErrorString(self.into_raw()); let cow = CStr::from_ptr(ptr).to_string_lossy(); f.write_str(cow.as_ref()) } @@ -90,27 +98,34 @@ pub trait IntoResult { fn into_result(self) -> Result<(), CudnnError>; } -impl IntoResult for sys::cudnnStatus_t { +impl IntoResult for cudnn_sys::cudnnStatus_t { /// Converts the raw status into a result. fn into_result(self) -> Result<(), CudnnError> { + use cudnn_sys::cudnnStatus_t::*; + Err(match self { - sys::cudnnStatus_t::CUDNN_STATUS_SUCCESS => return Ok(()), - sys::cudnnStatus_t::CUDNN_STATUS_NOT_INITIALIZED => CudnnError::NotInitialized, - sys::cudnnStatus_t::CUDNN_STATUS_ALLOC_FAILED => CudnnError::AllocFailed, - sys::cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => CudnnError::BadParam, - sys::cudnnStatus_t::CUDNN_STATUS_INTERNAL_ERROR => CudnnError::InternalError, - sys::cudnnStatus_t::CUDNN_STATUS_INVALID_VALUE => CudnnError::InvalidValue, - sys::cudnnStatus_t::CUDNN_STATUS_ARCH_MISMATCH => CudnnError::ArchMismatch, - sys::cudnnStatus_t::CUDNN_STATUS_MAPPING_ERROR => CudnnError::MappingError, - sys::cudnnStatus_t::CUDNN_STATUS_EXECUTION_FAILED => CudnnError::ExecutionFailed, - sys::cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => CudnnError::NotSupported, - sys::cudnnStatus_t::CUDNN_STATUS_LICENSE_ERROR => CudnnError::LicenseError, - sys::cudnnStatus_t::CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING => { - CudnnError::RuntimePrerequisiteMissing - } - sys::cudnnStatus_t::CUDNN_STATUS_RUNTIME_IN_PROGRESS => CudnnError::RuntimeInProgress, - sys::cudnnStatus_t::CUDNN_STATUS_RUNTIME_FP_OVERFLOW => CudnnError::RuntimeFpOverflow, - sys::cudnnStatus_t::CUDNN_STATUS_VERSION_MISMATCH => CudnnError::VersionMismatch, + CUDNN_STATUS_SUCCESS => return Ok(()), + CUDNN_STATUS_NOT_INITIALIZED => CudnnError::NotInitialized, + #[cfg(not(cudnn9))] + CUDNN_STATUS_ALLOC_FAILED => CudnnError::AllocFailed, + CUDNN_STATUS_BAD_PARAM => CudnnError::BadParam, + CUDNN_STATUS_INTERNAL_ERROR => CudnnError::InternalError, + CUDNN_STATUS_INVALID_VALUE => CudnnError::InvalidValue, + #[cfg(not(cudnn9))] + CUDNN_STATUS_ARCH_MISMATCH => CudnnError::ArchMismatch, + #[cfg(not(cudnn9))] + CUDNN_STATUS_MAPPING_ERROR => CudnnError::MappingError, + CUDNN_STATUS_EXECUTION_FAILED => CudnnError::ExecutionFailed, + CUDNN_STATUS_NOT_SUPPORTED => CudnnError::NotSupported, + CUDNN_STATUS_LICENSE_ERROR => CudnnError::LicenseError, + #[cfg(not(cudnn9))] + CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING => CudnnError::RuntimePrerequisiteMissing, + CUDNN_STATUS_RUNTIME_IN_PROGRESS => CudnnError::RuntimeInProgress, + CUDNN_STATUS_RUNTIME_FP_OVERFLOW => CudnnError::RuntimeFpOverflow, + #[cfg(not(cudnn9))] + CUDNN_STATUS_VERSION_MISMATCH => CudnnError::VersionMismatch, + // TODO(adamcavendish): implement cuDNN 9 error codes. + _ => todo!(), }) } } diff --git a/crates/cudnn/src/lib.rs b/crates/cudnn/src/lib.rs index 0d9e2ebc..3939d399 100644 --- a/crates/cudnn/src/lib.rs +++ b/crates/cudnn/src/lib.rs @@ -16,7 +16,6 @@ mod pooling; mod reduction; mod rnn; mod softmax; -mod sys; mod tensor; mod w_grad_mode; diff --git a/crates/cudnn/src/math_type.rs b/crates/cudnn/src/math_type.rs index e0dbdf33..4806de48 100644 --- a/crates/cudnn/src/math_type.rs +++ b/crates/cudnn/src/math_type.rs @@ -1,5 +1,3 @@ -use crate::sys; - /// Enum stating whether the use of tensor core operations is permitted in a given library routine. /// /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnMathType_t) @@ -20,20 +18,19 @@ pub enum MathType { Fma, } -impl From for MathType { - fn from(raw: sys::cudnnMathType_t) -> Self { +impl From for MathType { + fn from(raw: cudnn_sys::cudnnMathType_t) -> Self { + use cudnn_sys::cudnnMathType_t::*; match raw { - sys::cudnnMathType_t::CUDNN_DEFAULT_MATH => Self::Default, - sys::cudnnMathType_t::CUDNN_TENSOR_OP_MATH => Self::TensorOp, - sys::cudnnMathType_t::CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION => { - Self::TensorOpAllowConversion - } - sys::cudnnMathType_t::CUDNN_FMA_MATH => Self::Fma, + CUDNN_DEFAULT_MATH => Self::Default, + CUDNN_TENSOR_OP_MATH => Self::TensorOp, + CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION => Self::TensorOpAllowConversion, + CUDNN_FMA_MATH => Self::Fma, } } } -impl From for sys::cudnnMathType_t { +impl From for cudnn_sys::cudnnMathType_t { fn from(math_type: MathType) -> Self { match math_type { MathType::Default => Self::CUDNN_DEFAULT_MATH, diff --git a/crates/cudnn/src/nan_propagation.rs b/crates/cudnn/src/nan_propagation.rs index 474e41ed..87a60b0e 100644 --- a/crates/cudnn/src/nan_propagation.rs +++ b/crates/cudnn/src/nan_propagation.rs @@ -1,5 +1,3 @@ -use crate::sys; - /// Indicates whether a given cuDNN routine should propagate Nan numbers. /// /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnNanPropagation_t) @@ -12,11 +10,12 @@ pub enum NanPropagation { PropagateNaN, } -impl From for sys::cudnnNanPropagation_t { - fn from(nan_propagation: NanPropagation) -> sys::cudnnNanPropagation_t { +impl From for cudnn_sys::cudnnNanPropagation_t { + fn from(nan_propagation: NanPropagation) -> cudnn_sys::cudnnNanPropagation_t { + use cudnn_sys::cudnnNanPropagation_t::*; match nan_propagation { - NanPropagation::NotPropagateNaN => sys::cudnnNanPropagation_t::CUDNN_NOT_PROPAGATE_NAN, - NanPropagation::PropagateNaN => sys::cudnnNanPropagation_t::CUDNN_PROPAGATE_NAN, + NanPropagation::NotPropagateNaN => CUDNN_NOT_PROPAGATE_NAN, + NanPropagation::PropagateNaN => CUDNN_PROPAGATE_NAN, } } } diff --git a/crates/cudnn/src/op/mod.rs b/crates/cudnn/src/op/mod.rs index c8bca544..f9b6fa47 100644 --- a/crates/cudnn/src/op/mod.rs +++ b/crates/cudnn/src/op/mod.rs @@ -1,14 +1,13 @@ +use cust::memory::GpuBuffer; + +use crate::{CudnnContext, CudnnError, DataType, IntoResult, ScalingDataType, TensorDescriptor}; + mod op_tensor_descriptor; mod op_tensor_op; pub use op_tensor_descriptor::*; pub use op_tensor_op::*; -use crate::{ - sys, CudnnContext, CudnnError, DataType, IntoResult, ScalingDataType, TensorDescriptor, -}; -use cust::memory::GpuBuffer; - impl CudnnContext { /// This function computes a binary element-wise tensor core operation according to the /// following equation: @@ -108,7 +107,7 @@ impl CudnnContext { let gamma = &gamma as *const CompT as *const std::ffi::c_void; unsafe { - sys::cudnnOpTensor( + cudnn_sys::cudnnOpTensor( self.raw, op_desc.raw, alpha, @@ -215,7 +214,7 @@ impl CudnnContext { unsafe { // The second tensor and the second scaling factors here are ignored. // We use the left operand twice to make cuDNN happy, as it won't accept a null pointer. - sys::cudnnOpTensor( + cudnn_sys::cudnnOpTensor( self.raw, op_desc.raw, alpha, @@ -310,7 +309,7 @@ impl CudnnContext { let gamma = &gamma as *const CompT as *const std::ffi::c_void; unsafe { - sys::cudnnAddTensor( + cudnn_sys::cudnnAddTensor( self.raw, alpha, a_desc.raw, a_data, gamma, c_desc.raw, c_data, ) .into_result() @@ -371,7 +370,7 @@ impl CudnnContext { let value = &value as *const CompT as *const std::ffi::c_void; - unsafe { sys::cudnnSetTensor(self.raw, desc.raw, data, value).into_result() } + unsafe { cudnn_sys::cudnnSetTensor(self.raw, desc.raw, data, value).into_result() } } /// This function scales all the element of a tensor by a given value. @@ -428,6 +427,6 @@ impl CudnnContext { let value = &value as *const CompT as *const std::ffi::c_void; - unsafe { sys::cudnnScaleTensor(self.raw, desc.raw, data, value).into_result() } + unsafe { cudnn_sys::cudnnScaleTensor(self.raw, desc.raw, data, value).into_result() } } } diff --git a/crates/cudnn/src/op/op_tensor_descriptor.rs b/crates/cudnn/src/op/op_tensor_descriptor.rs index b5bf11fd..11e1a89a 100644 --- a/crates/cudnn/src/op/op_tensor_descriptor.rs +++ b/crates/cudnn/src/op/op_tensor_descriptor.rs @@ -1,6 +1,7 @@ -use crate::{sys, BinaryOp, CudnnError, DataType, IntoResult, NanPropagation, UnaryOp}; use std::{marker::PhantomData, mem::MaybeUninit}; +use crate::{BinaryOp, CudnnError, DataType, IntoResult, NanPropagation, UnaryOp}; + /// Initializes an op tensor descriptor. /// /// # Arguments @@ -9,16 +10,16 @@ use std::{marker::PhantomData, mem::MaybeUninit}; /// * `op` - raw operation type. /// * `nan_opt` - raw nan propagation policy. unsafe fn init_raw_op_descriptor( - op: sys::cudnnOpTensorOp_t, - nan_opt: sys::cudnnNanPropagation_t, -) -> Result { + op: cudnn_sys::cudnnOpTensorOp_t, + nan_opt: cudnn_sys::cudnnNanPropagation_t, +) -> Result { let mut raw = MaybeUninit::uninit(); - sys::cudnnCreateOpTensorDescriptor(raw.as_mut_ptr()).into_result()?; + cudnn_sys::cudnnCreateOpTensorDescriptor(raw.as_mut_ptr()).into_result()?; let raw = raw.assume_init(); - sys::cudnnSetOpTensorDescriptor(raw, op, T::into_raw(), nan_opt).into_result()?; + cudnn_sys::cudnnSetOpTensorDescriptor(raw, op, T::into_raw(), nan_opt).into_result()?; Ok(raw) } @@ -29,7 +30,7 @@ unsafe fn init_raw_op_descriptor( /// respectively. #[derive(Debug, PartialEq, Eq, Hash)] pub struct UnaryOpTensorDescriptor { - pub(crate) raw: sys::cudnnOpTensorDescriptor_t, + pub(crate) raw: cudnn_sys::cudnnOpTensorDescriptor_t, comp_type: PhantomData, op: UnaryOp, } @@ -81,7 +82,7 @@ where impl Drop for UnaryOpTensorDescriptor { fn drop(&mut self) { unsafe { - sys::cudnnDestroyOpTensorDescriptor(self.raw); + cudnn_sys::cudnnDestroyOpTensorDescriptor(self.raw); } } } @@ -94,7 +95,7 @@ impl Drop for UnaryOpTensorDescriptor { /// respectively. #[derive(Debug, PartialEq, Eq, Hash)] pub struct BinaryOpTensorDescriptor { - pub(crate) raw: sys::cudnnOpTensorDescriptor_t, + pub(crate) raw: cudnn_sys::cudnnOpTensorDescriptor_t, comp_type: PhantomData, op: BinaryOp, } @@ -145,7 +146,7 @@ where impl Drop for BinaryOpTensorDescriptor { fn drop(&mut self) { unsafe { - sys::cudnnDestroyOpTensorDescriptor(self.raw); + cudnn_sys::cudnnDestroyOpTensorDescriptor(self.raw); } } } diff --git a/crates/cudnn/src/op/op_tensor_op.rs b/crates/cudnn/src/op/op_tensor_op.rs index 55774b0f..4aecb0f7 100644 --- a/crates/cudnn/src/op/op_tensor_op.rs +++ b/crates/cudnn/src/op/op_tensor_op.rs @@ -1,5 +1,3 @@ -use crate::sys; - /// A unary tensor core operation. /// /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnOpTensorOp_t) @@ -11,7 +9,7 @@ pub enum UnaryOp { Not, } -impl From for sys::cudnnOpTensorOp_t { +impl From for cudnn_sys::cudnnOpTensorOp_t { fn from(op: UnaryOp) -> Self { match op { UnaryOp::Sqrt => Self::CUDNN_OP_TENSOR_SQRT, @@ -33,7 +31,7 @@ pub enum BinaryOp { Max, } -impl From for sys::cudnnOpTensorOp_t { +impl From for cudnn_sys::cudnnOpTensorOp_t { fn from(op: BinaryOp) -> Self { match op { BinaryOp::Add => Self::CUDNN_OP_TENSOR_ADD, diff --git a/crates/cudnn/src/pooling/mod.rs b/crates/cudnn/src/pooling/mod.rs index dc2db8be..5953d2b7 100644 --- a/crates/cudnn/src/pooling/mod.rs +++ b/crates/cudnn/src/pooling/mod.rs @@ -1,14 +1,15 @@ +use cust::memory::GpuBuffer; + +use crate::{ + private, CudnnContext, CudnnError, DataType, IntoResult, ScalingDataType, TensorDescriptor, +}; + mod pooling_descriptor; mod pooling_mode; pub use pooling_descriptor::*; pub use pooling_mode::*; -use crate::{ - private, sys, CudnnContext, CudnnError, DataType, IntoResult, ScalingDataType, TensorDescriptor, -}; -use cust::memory::GpuBuffer; - impl CudnnContext { /// This function computes the pooling of the input tensor and produces a smaller /// tensor in output. @@ -53,7 +54,7 @@ impl CudnnContext { let y_ptr = y.as_device_ptr().as_mut_ptr() as *mut _; unsafe { - sys::cudnnPoolingForward( + cudnn_sys::cudnnPoolingForward( self.raw, pooling_desc.raw, alpha_ptr, @@ -121,7 +122,7 @@ impl CudnnContext { let dx_ptr = dx.as_device_ptr().as_mut_ptr() as *mut _; unsafe { - sys::cudnnPoolingBackward( + cudnn_sys::cudnnPoolingBackward( self.raw, pooling_desc.raw, alpha_ptr, diff --git a/crates/cudnn/src/pooling/pooling_descriptor.rs b/crates/cudnn/src/pooling/pooling_descriptor.rs index c568db50..889a2c53 100644 --- a/crates/cudnn/src/pooling/pooling_descriptor.rs +++ b/crates/cudnn/src/pooling/pooling_descriptor.rs @@ -1,10 +1,11 @@ -use crate::{sys, CudnnError, IntoResult, NanPropagation, PoolingMode}; use std::mem::MaybeUninit; +use crate::{CudnnError, IntoResult, NanPropagation, PoolingMode}; + /// The descriptor of a pooling operation. #[derive(Debug, PartialEq, Eq, Hash)] pub struct PoolingDescriptor { - pub(crate) raw: sys::cudnnPoolingDescriptor_t, + pub(crate) raw: cudnn_sys::cudnnPoolingDescriptor_t, } impl PoolingDescriptor { @@ -56,11 +57,11 @@ impl PoolingDescriptor { let mut raw = MaybeUninit::uninit(); unsafe { - sys::cudnnCreatePoolingDescriptor(raw.as_mut_ptr()).into_result()?; + cudnn_sys::cudnnCreatePoolingDescriptor(raw.as_mut_ptr()).into_result()?; let raw = raw.assume_init(); - sys::cudnnSetPoolingNdDescriptor( + cudnn_sys::cudnnSetPoolingNdDescriptor( raw, mode.into(), nan_opt.into(), @@ -79,7 +80,7 @@ impl PoolingDescriptor { impl Drop for PoolingDescriptor { fn drop(&mut self) { unsafe { - sys::cudnnDestroyPoolingDescriptor(self.raw); + cudnn_sys::cudnnDestroyPoolingDescriptor(self.raw); } } } diff --git a/crates/cudnn/src/pooling/pooling_mode.rs b/crates/cudnn/src/pooling/pooling_mode.rs index 013f9e97..64074366 100644 --- a/crates/cudnn/src/pooling/pooling_mode.rs +++ b/crates/cudnn/src/pooling/pooling_mode.rs @@ -1,5 +1,3 @@ -use crate::sys; - /// Specifies the pooling method. /// /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnPoolingMode_t) @@ -18,7 +16,7 @@ pub enum PoolingMode { MaxDeterministic, } -impl From for sys::cudnnPoolingMode_t { +impl From for cudnn_sys::cudnnPoolingMode_t { fn from(mode: PoolingMode) -> Self { match mode { PoolingMode::Max => Self::CUDNN_POOLING_MAX, diff --git a/crates/cudnn/src/reduction/indices_type.rs b/crates/cudnn/src/reduction/indices_type.rs index 4fd5ea02..bb2569d8 100644 --- a/crates/cudnn/src/reduction/indices_type.rs +++ b/crates/cudnn/src/reduction/indices_type.rs @@ -1,5 +1,3 @@ -use crate::sys; - /// Indicates the data type of the indices computed by a reduction operation. #[non_exhaustive] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] @@ -10,7 +8,7 @@ pub enum IndicesType { U64, } -impl From for sys::cudnnIndicesType_t { +impl From for cudnn_sys::cudnnIndicesType_t { fn from(mode: IndicesType) -> Self { match mode { IndicesType::U8 => Self::CUDNN_8BIT_INDICES, diff --git a/crates/cudnn/src/reduction/mod.rs b/crates/cudnn/src/reduction/mod.rs index e91d774a..e73b79fc 100644 --- a/crates/cudnn/src/reduction/mod.rs +++ b/crates/cudnn/src/reduction/mod.rs @@ -1,3 +1,9 @@ +use std::mem::MaybeUninit; + +use cust::memory::GpuBuffer; + +use crate::{CudnnContext, CudnnError, DataType, IntoResult, ScalingDataType, TensorDescriptor}; + mod indices_type; mod reduce_indices; mod reduce_op; @@ -8,14 +14,6 @@ pub use reduce_indices::*; pub use reduce_op::*; pub use reduction_descriptor::*; -use std::mem::MaybeUninit; - -use cust::memory::GpuBuffer; - -use crate::{ - sys, CudnnContext, CudnnError, DataType, IntoResult, ScalingDataType, TensorDescriptor, -}; - impl CudnnContext { /// Returns the minimum size of the workspace to be passed to the reduction given /// the input and output tensors. @@ -39,7 +37,7 @@ impl CudnnContext { let mut size = MaybeUninit::uninit(); unsafe { - sys::cudnnGetReductionWorkspaceSize( + cudnn_sys::cudnnGetReductionWorkspaceSize( self.raw, desc.raw, a_desc.raw, @@ -74,7 +72,7 @@ impl CudnnContext { let mut size = MaybeUninit::uninit(); unsafe { - sys::cudnnGetReductionIndicesSize( + cudnn_sys::cudnnGetReductionIndicesSize( self.raw, desc.raw, a_desc.raw, @@ -190,7 +188,7 @@ impl CudnnContext { let gamma = &gamma as *const CompT as _; unsafe { - sys::cudnnReduceTensor( + cudnn_sys::cudnnReduceTensor( self.raw, desc.raw, indices_ptr, diff --git a/crates/cudnn/src/reduction/reduce_indices.rs b/crates/cudnn/src/reduction/reduce_indices.rs index ccf2b7c5..4bdb4443 100644 --- a/crates/cudnn/src/reduction/reduce_indices.rs +++ b/crates/cudnn/src/reduction/reduce_indices.rs @@ -1,5 +1,3 @@ -use crate::sys; - /// Indicates whether a reduction operation should compute indices or not. #[non_exhaustive] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] @@ -11,7 +9,7 @@ pub enum ReduceIndices { Flattened, } -impl From for sys::cudnnReduceTensorIndices_t { +impl From for cudnn_sys::cudnnReduceTensorIndices_t { fn from(mode: ReduceIndices) -> Self { match mode { ReduceIndices::None => Self::CUDNN_REDUCE_TENSOR_NO_INDICES, diff --git a/crates/cudnn/src/reduction/reduce_op.rs b/crates/cudnn/src/reduction/reduce_op.rs index 862157dc..e8e9f3d6 100644 --- a/crates/cudnn/src/reduction/reduce_op.rs +++ b/crates/cudnn/src/reduction/reduce_op.rs @@ -1,5 +1,3 @@ -use crate::sys; - /// Tensor reduction operation. #[non_exhaustive] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] @@ -15,7 +13,7 @@ pub enum ReduceOp { MulNoZeros, } -impl From for sys::cudnnReduceTensorOp_t { +impl From for cudnn_sys::cudnnReduceTensorOp_t { fn from(op: ReduceOp) -> Self { match op { ReduceOp::Add => Self::CUDNN_REDUCE_TENSOR_ADD, diff --git a/crates/cudnn/src/reduction/reduction_descriptor.rs b/crates/cudnn/src/reduction/reduction_descriptor.rs index 260a0046..78c09020 100644 --- a/crates/cudnn/src/reduction/reduction_descriptor.rs +++ b/crates/cudnn/src/reduction/reduction_descriptor.rs @@ -1,7 +1,7 @@ use std::{marker::PhantomData, mem::MaybeUninit}; use crate::{ - sys, CudnnError, DataType, IndicesType, IntoResult, NanPropagation, ReduceIndices, ReduceOp, + CudnnError, DataType, IndicesType, IntoResult, NanPropagation, ReduceIndices, ReduceOp, }; /// Descriptor of a tensor reduction operation. @@ -9,7 +9,7 @@ pub struct ReductionDescriptor where T: DataType, { - pub(crate) raw: sys::cudnnReduceTensorDescriptor_t, + pub(crate) raw: cudnn_sys::cudnnReduceTensorDescriptor_t, comp_type: PhantomData, } @@ -61,10 +61,10 @@ where let indices_type = indices_type.into().unwrap_or(IndicesType::U8); unsafe { - sys::cudnnCreateReduceTensorDescriptor(raw.as_mut_ptr()).into_result()?; + cudnn_sys::cudnnCreateReduceTensorDescriptor(raw.as_mut_ptr()).into_result()?; let raw = raw.assume_init(); - sys::cudnnSetReduceTensorDescriptor( + cudnn_sys::cudnnSetReduceTensorDescriptor( raw, op.into(), T::into_raw(), @@ -88,7 +88,7 @@ where { fn drop(&mut self) { unsafe { - sys::cudnnDestroyReduceTensorDescriptor(self.raw); + cudnn_sys::cudnnDestroyReduceTensorDescriptor(self.raw); } } } diff --git a/crates/cudnn/src/rnn/forward_mode.rs b/crates/cudnn/src/rnn/forward_mode.rs index e0e68c74..7f083636 100644 --- a/crates/cudnn/src/rnn/forward_mode.rs +++ b/crates/cudnn/src/rnn/forward_mode.rs @@ -1,5 +1,3 @@ -use crate::sys; - /// Specifies inference or training mode in RNN API. /// /// This parameter allows the cuDNN library to tune more precisely the size of the workspace buffer @@ -15,11 +13,12 @@ pub enum ForwardMode { Training, } -impl From for sys::cudnnForwardMode_t { +impl From for cudnn_sys::cudnnForwardMode_t { fn from(mode: ForwardMode) -> Self { + use cudnn_sys::cudnnForwardMode_t::*; match mode { - ForwardMode::Training => sys::cudnnForwardMode_t::CUDNN_FWD_MODE_TRAINING, - ForwardMode::Inference => sys::cudnnForwardMode_t::CUDNN_FWD_MODE_INFERENCE, + ForwardMode::Training => CUDNN_FWD_MODE_TRAINING, + ForwardMode::Inference => CUDNN_FWD_MODE_INFERENCE, } } } diff --git a/crates/cudnn/src/rnn/mod.rs b/crates/cudnn/src/rnn/mod.rs index a623ae5b..ec2f0217 100644 --- a/crates/cudnn/src/rnn/mod.rs +++ b/crates/cudnn/src/rnn/mod.rs @@ -1,3 +1,9 @@ +use std::mem::MaybeUninit; + +use cust::memory::GpuBuffer; + +use crate::{CudnnContext, CudnnError, IntoResult, TensorDescriptor, WGradMode}; + mod forward_mode; mod rnn_algo; mod rnn_bias_mode; @@ -20,10 +26,6 @@ pub use rnn_direction_mode::*; pub use rnn_input_mode::*; pub use rnn_mode::*; -use crate::{sys, CudnnContext, CudnnError, IntoResult, TensorDescriptor, WGradMode}; -use cust::memory::GpuBuffer; -use std::mem::MaybeUninit; - impl CudnnContext { /// Computes the work and reserve space buffer sizes based on the RNN network /// geometry stored in `rnn_desc`, designated usage (inference or training) defined @@ -64,7 +66,7 @@ impl CudnnContext { let mut reserve_space_size = MaybeUninit::uninit(); unsafe { - sys::cudnnGetRNNTempSpaceSizes( + cudnn_sys::cudnnGetRNNTempSpaceSizes( self.raw, rnn_desc.raw, forward_mode.into(), @@ -104,7 +106,7 @@ impl CudnnContext { let mut size = MaybeUninit::uninit(); unsafe { - sys::cudnnGetRNNWeightSpaceSize(self.raw, rnn_desc.raw, size.as_mut_ptr()) + cudnn_sys::cudnnGetRNNWeightSpaceSize(self.raw, rnn_desc.raw, size.as_mut_ptr()) .into_result()?; Ok(size.assume_init()) @@ -256,7 +258,7 @@ impl CudnnContext { }); unsafe { - sys::cudnnRNNForward( + cudnn_sys::cudnnRNNForward( self.raw, rnn_desc.raw, forward_mode.into(), @@ -462,7 +464,7 @@ impl CudnnContext { let reserve_space_ptr = reserve_space.as_device_ptr().as_mut_ptr() as *mut std::ffi::c_void; unsafe { - sys::cudnnRNNBackwardData_v8( + cudnn_sys::cudnnRNNBackwardData_v8( self.raw, rnn_desc.raw, device_sequence_lengths_ptr, @@ -576,7 +578,7 @@ impl CudnnContext { let reserve_space_ptr = reserve_space.as_device_ptr().as_mut_ptr() as *mut std::ffi::c_void; unsafe { - sys::cudnnRNNBackwardWeights_v8( + cudnn_sys::cudnnRNNBackwardWeights_v8( self.raw, rnn_desc.raw, grad_mode.into(), diff --git a/crates/cudnn/src/rnn/rnn_algo.rs b/crates/cudnn/src/rnn/rnn_algo.rs index 50e78876..6147851e 100644 --- a/crates/cudnn/src/rnn/rnn_algo.rs +++ b/crates/cudnn/src/rnn/rnn_algo.rs @@ -1,5 +1,3 @@ -use crate::sys; - /// A recurrent neural network algorithm. /// /// **Do note** that double precision is only supported by `RnnAlgo::Standard`. @@ -15,13 +13,14 @@ pub enum RnnAlgo { PersistStaticSmallH, } -impl From for sys::cudnnRNNAlgo_t { +impl From for cudnn_sys::cudnnRNNAlgo_t { fn from(algo: RnnAlgo) -> Self { + use cudnn_sys::cudnnRNNAlgo_t::*; match algo { - RnnAlgo::Standard => sys::cudnnRNNAlgo_t::CUDNN_RNN_ALGO_STANDARD, - RnnAlgo::PersistStatic => sys::cudnnRNNAlgo_t::CUDNN_RNN_ALGO_PERSIST_STATIC, - RnnAlgo::PersistDynamic => sys::cudnnRNNAlgo_t::CUDNN_RNN_ALGO_PERSIST_DYNAMIC, - RnnAlgo::PersistStaticSmallH => sys::cudnnRNNAlgo_t::CUDNN_RNN_ALGO_PERSIST_STATIC, + RnnAlgo::Standard => CUDNN_RNN_ALGO_STANDARD, + RnnAlgo::PersistStatic => CUDNN_RNN_ALGO_PERSIST_STATIC, + RnnAlgo::PersistDynamic => CUDNN_RNN_ALGO_PERSIST_DYNAMIC, + RnnAlgo::PersistStaticSmallH => CUDNN_RNN_ALGO_PERSIST_STATIC, } } } diff --git a/crates/cudnn/src/rnn/rnn_bias_mode.rs b/crates/cudnn/src/rnn/rnn_bias_mode.rs index 1c234b0b..44c42029 100644 --- a/crates/cudnn/src/rnn/rnn_bias_mode.rs +++ b/crates/cudnn/src/rnn/rnn_bias_mode.rs @@ -1,5 +1,3 @@ -use crate::sys; - /// Specifies the number of bias vectors for a recurrent neural network function. /// /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnRNNBiasMode_t) @@ -19,13 +17,14 @@ pub enum RnnBiasMode { SingleRecurrentBias, } -impl From for sys::cudnnRNNBiasMode_t { +impl From for cudnn_sys::cudnnRNNBiasMode_t { fn from(mode: RnnBiasMode) -> Self { + use cudnn_sys::cudnnRNNBiasMode_t::*; match mode { - RnnBiasMode::NoBias => sys::cudnnRNNBiasMode_t::CUDNN_RNN_NO_BIAS, - RnnBiasMode::SingleInpBias => sys::cudnnRNNBiasMode_t::CUDNN_RNN_SINGLE_INP_BIAS, - RnnBiasMode::DoubleBias => sys::cudnnRNNBiasMode_t::CUDNN_RNN_DOUBLE_BIAS, - RnnBiasMode::SingleRecurrentBias => sys::cudnnRNNBiasMode_t::CUDNN_RNN_SINGLE_REC_BIAS, + RnnBiasMode::NoBias => CUDNN_RNN_NO_BIAS, + RnnBiasMode::SingleInpBias => CUDNN_RNN_SINGLE_INP_BIAS, + RnnBiasMode::DoubleBias => CUDNN_RNN_DOUBLE_BIAS, + RnnBiasMode::SingleRecurrentBias => CUDNN_RNN_SINGLE_REC_BIAS, } } } diff --git a/crates/cudnn/src/rnn/rnn_clip_mode.rs b/crates/cudnn/src/rnn/rnn_clip_mode.rs index ab5917b4..2d06daf0 100644 --- a/crates/cudnn/src/rnn/rnn_clip_mode.rs +++ b/crates/cudnn/src/rnn/rnn_clip_mode.rs @@ -1,5 +1,3 @@ -use crate::sys; - /// Selects the LSTM cell clipping mode. /// /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnRNNClipMode_t) @@ -13,11 +11,12 @@ pub enum RnnClipMode { ClipMinMax, } -impl From for sys::cudnnRNNClipMode_t { +impl From for cudnn_sys::cudnnRNNClipMode_t { fn from(mode: RnnClipMode) -> Self { + use cudnn_sys::cudnnRNNClipMode_t::*; match mode { - RnnClipMode::ClipNone => sys::cudnnRNNClipMode_t::CUDNN_RNN_CLIP_NONE, - RnnClipMode::ClipMinMax => sys::cudnnRNNClipMode_t::CUDNN_RNN_CLIP_MINMAX, + RnnClipMode::ClipNone => CUDNN_RNN_CLIP_NONE, + RnnClipMode::ClipMinMax => CUDNN_RNN_CLIP_MINMAX, } } } diff --git a/crates/cudnn/src/rnn/rnn_data_descriptor.rs b/crates/cudnn/src/rnn/rnn_data_descriptor.rs index 8ebeeb9e..696f8d8b 100644 --- a/crates/cudnn/src/rnn/rnn_data_descriptor.rs +++ b/crates/cudnn/src/rnn/rnn_data_descriptor.rs @@ -1,6 +1,7 @@ -use crate::{private, sys, CudnnError, DataType, IntoResult, RnnDataLayout}; use std::{marker::PhantomData, mem::MaybeUninit}; +use crate::{private, CudnnError, DataType, IntoResult, RnnDataLayout}; + /// Specifies the allowed types for the recurrent neural network inputs and outputs. /// /// As stated in the [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnSetRNNDataDescriptor) @@ -16,7 +17,7 @@ pub struct RnnDataDescriptor where T: RnnDataType, { - pub(crate) raw: sys::cudnnRNNDataDescriptor_t, + pub(crate) raw: cudnn_sys::cudnnRNNDataDescriptor_t, data_type: PhantomData, } @@ -114,7 +115,7 @@ where let mut raw = MaybeUninit::uninit(); unsafe { - sys::cudnnCreateRNNDataDescriptor(raw.as_mut_ptr()).into_result()?; + cudnn_sys::cudnnCreateRNNDataDescriptor(raw.as_mut_ptr()).into_result()?; let raw = raw.assume_init(); @@ -122,7 +123,7 @@ where .into() .map_or(std::ptr::null_mut(), |mut el| &mut el as *mut T); - sys::cudnnSetRNNDataDescriptor( + cudnn_sys::cudnnSetRNNDataDescriptor( raw, T::into_raw(), layout.into(), @@ -148,7 +149,7 @@ where { fn drop(&mut self) { unsafe { - sys::cudnnDestroyRNNDataDescriptor(self.raw); + cudnn_sys::cudnnDestroyRNNDataDescriptor(self.raw); } } } diff --git a/crates/cudnn/src/rnn/rnn_data_layout.rs b/crates/cudnn/src/rnn/rnn_data_layout.rs index 8a6bf558..011a4ec4 100644 --- a/crates/cudnn/src/rnn/rnn_data_layout.rs +++ b/crates/cudnn/src/rnn/rnn_data_layout.rs @@ -1,5 +1,3 @@ -use crate::sys; - /// The data layout for input and output of a recurrent neural network. /// /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnRNNDataLayout_t) @@ -14,18 +12,13 @@ pub enum RnnDataLayout { BatchMajorUnpacked, } -impl From for sys::cudnnRNNDataLayout_t { +impl From for cudnn_sys::cudnnRNNDataLayout_t { fn from(rnn_data_layout: RnnDataLayout) -> Self { + use cudnn_sys::cudnnRNNDataLayout_t::*; match rnn_data_layout { - RnnDataLayout::SeqMajorUnpacked => { - sys::cudnnRNNDataLayout_t::CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED - } - RnnDataLayout::SeqMajorPacked => { - sys::cudnnRNNDataLayout_t::CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_PACKED - } - RnnDataLayout::BatchMajorUnpacked => { - sys::cudnnRNNDataLayout_t::CUDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED - } + RnnDataLayout::SeqMajorUnpacked => CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED, + RnnDataLayout::SeqMajorPacked => CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_PACKED, + RnnDataLayout::BatchMajorUnpacked => CUDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED, } } } diff --git a/crates/cudnn/src/rnn/rnn_descriptor.rs b/crates/cudnn/src/rnn/rnn_descriptor.rs index f5d33bce..24564c61 100644 --- a/crates/cudnn/src/rnn/rnn_descriptor.rs +++ b/crates/cudnn/src/rnn/rnn_descriptor.rs @@ -1,9 +1,11 @@ +use std::{marker::PhantomData, mem::MaybeUninit}; + +use cust::memory::GpuBuffer; + use crate::{ - sys, CudnnError, DataType, DropoutDescriptor, IntoResult, MathType, NanPropagation, RnnAlgo, + CudnnError, DataType, DropoutDescriptor, IntoResult, MathType, NanPropagation, RnnAlgo, RnnBiasMode, RnnClipMode, RnnDirectionMode, RnnInputMode, RnnMode, }; -use cust::memory::GpuBuffer; -use std::{marker::PhantomData, mem::MaybeUninit}; bitflags::bitflags! { /// Miscellaneous switches for configuring auxiliary recurrent neural network features. @@ -27,7 +29,7 @@ where T: DataType, U: SupportedRnn, { - pub(crate) raw: sys::cudnnRNNDescriptor_t, + pub(crate) raw: cudnn_sys::cudnnRNNDescriptor_t, data_type: PhantomData, math_prec: PhantomData, } @@ -157,14 +159,14 @@ where let mut raw = MaybeUninit::uninit(); unsafe { - sys::cudnnCreateRNNDescriptor(raw.as_mut_ptr()).into_result()?; + cudnn_sys::cudnnCreateRNNDescriptor(raw.as_mut_ptr()).into_result()?; let raw = raw.assume_init(); let proj_size = projection_size.into().unwrap_or(0); let dropout_desc = dropout_desc.map_or(std::ptr::null_mut(), |desc| desc.raw); - sys::cudnnSetRNNDescriptor_v8( + cudnn_sys::cudnnSetRNNDescriptor_v8( raw, algo.into(), cell_mode.into(), @@ -227,7 +229,7 @@ where right_clip: f64, ) -> Result<(), CudnnError> { unsafe { - sys::cudnnRNNSetClip_v8( + cudnn_sys::cudnnRNNSetClip_v8( self.raw, clip_mode.into(), nan_opt.into(), @@ -246,7 +248,7 @@ where { fn drop(&mut self) { unsafe { - sys::cudnnDestroyRNNDescriptor(self.raw); + cudnn_sys::cudnnDestroyRNNDescriptor(self.raw); } } } diff --git a/crates/cudnn/src/rnn/rnn_direction_mode.rs b/crates/cudnn/src/rnn/rnn_direction_mode.rs index cf1cfc59..da7edc05 100644 --- a/crates/cudnn/src/rnn/rnn_direction_mode.rs +++ b/crates/cudnn/src/rnn/rnn_direction_mode.rs @@ -1,5 +1,3 @@ -use crate::sys; - /// Specifies the recurrence pattern for a recurrent neural network. /// /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnDirectionMode_t) @@ -14,7 +12,7 @@ pub enum RnnDirectionMode { Bidirectional, } -impl From for sys::cudnnDirectionMode_t { +impl From for cudnn_sys::cudnnDirectionMode_t { fn from(mode: RnnDirectionMode) -> Self { match mode { RnnDirectionMode::Unidirectional => Self::CUDNN_UNIDIRECTIONAL, diff --git a/crates/cudnn/src/rnn/rnn_input_mode.rs b/crates/cudnn/src/rnn/rnn_input_mode.rs index 9eb83348..cd5cb041 100644 --- a/crates/cudnn/src/rnn/rnn_input_mode.rs +++ b/crates/cudnn/src/rnn/rnn_input_mode.rs @@ -1,5 +1,3 @@ -use crate::sys; - /// Specifies the behavior of the first layer in a recurrent neural network. /// /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnRNNInputMode_t) @@ -14,11 +12,12 @@ pub enum RnnInputMode { SkipInput, } -impl From for sys::cudnnRNNInputMode_t { +impl From for cudnn_sys::cudnnRNNInputMode_t { fn from(mode: RnnInputMode) -> Self { + use cudnn_sys::cudnnRNNInputMode_t::*; match mode { - RnnInputMode::LinearInput => sys::cudnnRNNInputMode_t::CUDNN_LINEAR_INPUT, - RnnInputMode::SkipInput => sys::cudnnRNNInputMode_t::CUDNN_SKIP_INPUT, + RnnInputMode::LinearInput => CUDNN_LINEAR_INPUT, + RnnInputMode::SkipInput => CUDNN_SKIP_INPUT, } } } diff --git a/crates/cudnn/src/rnn/rnn_mode.rs b/crates/cudnn/src/rnn/rnn_mode.rs index 91e7c4bb..5fce8535 100644 --- a/crates/cudnn/src/rnn/rnn_mode.rs +++ b/crates/cudnn/src/rnn/rnn_mode.rs @@ -1,5 +1,3 @@ -use crate::sys; - /// Specifies the type of recurrent neural network used. /// /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnRNNMode_t) @@ -16,13 +14,14 @@ pub enum RnnMode { Gru, } -impl From for sys::cudnnRNNMode_t { +impl From for cudnn_sys::cudnnRNNMode_t { fn from(mode: RnnMode) -> Self { + use cudnn_sys::cudnnRNNMode_t::*; match mode { - RnnMode::RnnReLu => sys::cudnnRNNMode_t::CUDNN_RNN_RELU, - RnnMode::RnnTanh => sys::cudnnRNNMode_t::CUDNN_RNN_TANH, - RnnMode::Lstm => sys::cudnnRNNMode_t::CUDNN_LSTM, - RnnMode::Gru => sys::cudnnRNNMode_t::CUDNN_GRU, + RnnMode::RnnReLu => CUDNN_RNN_RELU, + RnnMode::RnnTanh => CUDNN_RNN_TANH, + RnnMode::Lstm => CUDNN_LSTM, + RnnMode::Gru => CUDNN_GRU, } } } diff --git a/crates/cudnn/src/softmax/mod.rs b/crates/cudnn/src/softmax/mod.rs index 5bd65bbf..ed979fc6 100644 --- a/crates/cudnn/src/softmax/mod.rs +++ b/crates/cudnn/src/softmax/mod.rs @@ -1,12 +1,13 @@ +use cust::memory::GpuBuffer; + +use crate::{private, CudnnContext, CudnnError, DataType, IntoResult, TensorDescriptor}; + mod softmax_algo; mod softmax_mode; pub use softmax_algo::*; pub use softmax_mode::*; -use crate::{private, sys, CudnnContext, CudnnError, DataType, IntoResult, TensorDescriptor}; -use cust::memory::GpuBuffer; - impl CudnnContext { /// Computes the softmax function. /// @@ -58,7 +59,7 @@ impl CudnnContext { let y_ptr = y.as_device_ptr().as_mut_ptr() as *mut _; unsafe { - sys::cudnnSoftmaxForward( + cudnn_sys::cudnnSoftmaxForward( self.raw, algo.into(), mode.into(), @@ -122,7 +123,7 @@ impl CudnnContext { let dx_ptr = dx.as_device_ptr().as_mut_ptr() as *mut _; unsafe { - sys::cudnnSoftmaxBackward( + cudnn_sys::cudnnSoftmaxBackward( self.raw, algo.into(), mode.into(), diff --git a/crates/cudnn/src/softmax/softmax_algo.rs b/crates/cudnn/src/softmax/softmax_algo.rs index 99652f4c..d812f0ac 100644 --- a/crates/cudnn/src/softmax/softmax_algo.rs +++ b/crates/cudnn/src/softmax/softmax_algo.rs @@ -1,5 +1,3 @@ -use crate::sys; - /// Specifies the implementation of the softmax function. /// /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnSoftmaxAlgorithm_t) @@ -16,7 +14,7 @@ pub enum SoftmaxAlgo { Log, } -impl From for sys::cudnnSoftmaxAlgorithm_t { +impl From for cudnn_sys::cudnnSoftmaxAlgorithm_t { fn from(algo: SoftmaxAlgo) -> Self { match algo { SoftmaxAlgo::Fast => Self::CUDNN_SOFTMAX_FAST, diff --git a/crates/cudnn/src/softmax/softmax_mode.rs b/crates/cudnn/src/softmax/softmax_mode.rs index 8d730bf9..46644128 100644 --- a/crates/cudnn/src/softmax/softmax_mode.rs +++ b/crates/cudnn/src/softmax/softmax_mode.rs @@ -1,5 +1,3 @@ -use crate::sys; - /// Specifies how the softmax input must be processed. /// /// cuDNN [docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnSoftmaxMode_t) @@ -13,7 +11,7 @@ pub enum SoftmaxMode { Channel, } -impl From for sys::cudnnSoftmaxMode_t { +impl From for cudnn_sys::cudnnSoftmaxMode_t { fn from(mode: SoftmaxMode) -> Self { match mode { SoftmaxMode::Channel => Self::CUDNN_SOFTMAX_MODE_CHANNEL, diff --git a/crates/cudnn/src/sys.rs b/crates/cudnn/src/sys.rs deleted file mode 100644 index 3fb58c91..00000000 --- a/crates/cudnn/src/sys.rs +++ /dev/null @@ -1,3862 +0,0 @@ -/* automatically generated by rust-bindgen 0.59.2 */ -#![allow(warnings)] - -pub const CUDNN_MAJOR: u32 = 8; -pub const CUDNN_MINOR: u32 = 3; -pub const CUDNN_PATCHLEVEL: u32 = 2; -pub const CUDNN_VERSION: u32 = 8302; -pub const CUDNN_OPS_INFER_MAJOR: u32 = 8; -pub const CUDNN_OPS_INFER_MINOR: u32 = 3; -pub const CUDNN_OPS_INFER_PATCH: u32 = 2; -pub const CUDNN_DIM_MAX: u32 = 8; -pub const CUDNN_LRN_MIN_N: u32 = 1; -pub const CUDNN_LRN_MAX_N: u32 = 16; -pub const CUDNN_LRN_MIN_K: f64 = 0.00001; -pub const CUDNN_LRN_MIN_BETA: f64 = 0.01; -pub const CUDNN_BN_MIN_EPSILON: f64 = 0.0; -pub const CUDNN_OPS_TRAIN_MAJOR: u32 = 8; -pub const CUDNN_OPS_TRAIN_MINOR: u32 = 3; -pub const CUDNN_OPS_TRAIN_PATCH: u32 = 2; -pub const CUDNN_ADV_INFER_MAJOR: u32 = 8; -pub const CUDNN_ADV_INFER_MINOR: u32 = 3; -pub const CUDNN_ADV_INFER_PATCH: u32 = 2; -pub const CUDNN_RNN_PADDED_IO_DISABLED: u32 = 0; -pub const CUDNN_RNN_PADDED_IO_ENABLED: u32 = 1; -pub const CUDNN_SEQDATA_DIM_COUNT: u32 = 4; -pub const CUDNN_ATTN_QUERYMAP_ALL_TO_ONE: u32 = 0; -pub const CUDNN_ATTN_QUERYMAP_ONE_TO_ONE: u32 = 1; -pub const CUDNN_ATTN_DISABLE_PROJ_BIASES: u32 = 0; -pub const CUDNN_ATTN_ENABLE_PROJ_BIASES: u32 = 2; -pub const CUDNN_ATTN_WKIND_COUNT: u32 = 8; -pub const CUDNN_ADV_TRAIN_MAJOR: u32 = 8; -pub const CUDNN_ADV_TRAIN_MINOR: u32 = 3; -pub const CUDNN_ADV_TRAIN_PATCH: u32 = 2; -pub const CUDNN_CNN_INFER_MAJOR: u32 = 8; -pub const CUDNN_CNN_INFER_MINOR: u32 = 3; -pub const CUDNN_CNN_INFER_PATCH: u32 = 2; -pub const CUDNN_CNN_TRAIN_MAJOR: u32 = 8; -pub const CUDNN_CNN_TRAIN_MINOR: u32 = 3; -pub const CUDNN_CNN_TRAIN_PATCH: u32 = 2; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct CUstream_st { - _unused: [u8; 0], -} -pub type cudaStream_t = *mut CUstream_st; -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum libraryPropertyType_t { - MAJOR_VERSION = 0, - MINOR_VERSION = 1, - PATCH_LEVEL = 2, -} -pub use self::libraryPropertyType_t as libraryPropertyType; -pub type __int32_t = ::std::os::raw::c_int; -pub type __uint32_t = ::std::os::raw::c_uint; -pub type __int64_t = ::std::os::raw::c_long; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cudnnContext { - _unused: [u8; 0], -} -pub type cudnnHandle_t = *mut cudnnContext; -extern "C" { - pub fn cudnnGetVersion() -> usize; -} -extern "C" { - pub fn cudnnGetCudartVersion() -> usize; -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnStatus_t { - CUDNN_STATUS_SUCCESS = 0, - CUDNN_STATUS_NOT_INITIALIZED = 1, - CUDNN_STATUS_ALLOC_FAILED = 2, - CUDNN_STATUS_BAD_PARAM = 3, - CUDNN_STATUS_INTERNAL_ERROR = 4, - CUDNN_STATUS_INVALID_VALUE = 5, - CUDNN_STATUS_ARCH_MISMATCH = 6, - CUDNN_STATUS_MAPPING_ERROR = 7, - CUDNN_STATUS_EXECUTION_FAILED = 8, - CUDNN_STATUS_NOT_SUPPORTED = 9, - CUDNN_STATUS_LICENSE_ERROR = 10, - CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING = 11, - CUDNN_STATUS_RUNTIME_IN_PROGRESS = 12, - CUDNN_STATUS_RUNTIME_FP_OVERFLOW = 13, - CUDNN_STATUS_VERSION_MISMATCH = 14, -} -extern "C" { - pub fn cudnnGetErrorString(status: cudnnStatus_t) -> *const ::std::os::raw::c_char; -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cudnnRuntimeTag_t { - _unused: [u8; 0], -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnErrQueryMode_t { - CUDNN_ERRQUERY_RAWCODE = 0, - CUDNN_ERRQUERY_NONBLOCKING = 1, - CUDNN_ERRQUERY_BLOCKING = 2, -} -extern "C" { - pub fn cudnnQueryRuntimeError( - handle: cudnnHandle_t, - rstatus: *mut cudnnStatus_t, - mode: cudnnErrQueryMode_t, - tag: *mut cudnnRuntimeTag_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetProperty( - type_: libraryPropertyType, - value: *mut ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnCreate(handle: *mut cudnnHandle_t) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnDestroy(handle: cudnnHandle_t) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetStream(handle: cudnnHandle_t, streamId: cudaStream_t) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetStream(handle: cudnnHandle_t, streamId: *mut cudaStream_t) -> cudnnStatus_t; -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cudnnTensorStruct { - _unused: [u8; 0], -} -pub type cudnnTensorDescriptor_t = *mut cudnnTensorStruct; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cudnnPoolingStruct { - _unused: [u8; 0], -} -pub type cudnnPoolingDescriptor_t = *mut cudnnPoolingStruct; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cudnnFilterStruct { - _unused: [u8; 0], -} -pub type cudnnFilterDescriptor_t = *mut cudnnFilterStruct; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cudnnLRNStruct { - _unused: [u8; 0], -} -pub type cudnnLRNDescriptor_t = *mut cudnnLRNStruct; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cudnnActivationStruct { - _unused: [u8; 0], -} -pub type cudnnActivationDescriptor_t = *mut cudnnActivationStruct; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cudnnSpatialTransformerStruct { - _unused: [u8; 0], -} -pub type cudnnSpatialTransformerDescriptor_t = *mut cudnnSpatialTransformerStruct; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cudnnOpTensorStruct { - _unused: [u8; 0], -} -pub type cudnnOpTensorDescriptor_t = *mut cudnnOpTensorStruct; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cudnnReduceTensorStruct { - _unused: [u8; 0], -} -pub type cudnnReduceTensorDescriptor_t = *mut cudnnReduceTensorStruct; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cudnnCTCLossStruct { - _unused: [u8; 0], -} -pub type cudnnCTCLossDescriptor_t = *mut cudnnCTCLossStruct; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cudnnTensorTransformStruct { - _unused: [u8; 0], -} -pub type cudnnTensorTransformDescriptor_t = *mut cudnnTensorTransformStruct; -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnDataType_t { - CUDNN_DATA_FLOAT = 0, - CUDNN_DATA_DOUBLE = 1, - CUDNN_DATA_HALF = 2, - CUDNN_DATA_INT8 = 3, - CUDNN_DATA_INT32 = 4, - CUDNN_DATA_INT8x4 = 5, - CUDNN_DATA_UINT8 = 6, - CUDNN_DATA_UINT8x4 = 7, - CUDNN_DATA_INT8x32 = 8, - CUDNN_DATA_BFLOAT16 = 9, - CUDNN_DATA_INT64 = 10, - CUDNN_DATA_BOOLEAN = 11, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnMathType_t { - CUDNN_DEFAULT_MATH = 0, - CUDNN_TENSOR_OP_MATH = 1, - CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION = 2, - CUDNN_FMA_MATH = 3, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnNanPropagation_t { - CUDNN_NOT_PROPAGATE_NAN = 0, - CUDNN_PROPAGATE_NAN = 1, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnDeterminism_t { - CUDNN_NON_DETERMINISTIC = 0, - CUDNN_DETERMINISTIC = 1, -} -extern "C" { - pub fn cudnnCreateTensorDescriptor(tensorDesc: *mut cudnnTensorDescriptor_t) -> cudnnStatus_t; -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnTensorFormat_t { - CUDNN_TENSOR_NCHW = 0, - CUDNN_TENSOR_NHWC = 1, - CUDNN_TENSOR_NCHW_VECT_C = 2, -} -extern "C" { - pub fn cudnnSetTensor4dDescriptor( - tensorDesc: cudnnTensorDescriptor_t, - format: cudnnTensorFormat_t, - dataType: cudnnDataType_t, - n: ::std::os::raw::c_int, - c: ::std::os::raw::c_int, - h: ::std::os::raw::c_int, - w: ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetTensor4dDescriptorEx( - tensorDesc: cudnnTensorDescriptor_t, - dataType: cudnnDataType_t, - n: ::std::os::raw::c_int, - c: ::std::os::raw::c_int, - h: ::std::os::raw::c_int, - w: ::std::os::raw::c_int, - nStride: ::std::os::raw::c_int, - cStride: ::std::os::raw::c_int, - hStride: ::std::os::raw::c_int, - wStride: ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetTensor4dDescriptor( - tensorDesc: cudnnTensorDescriptor_t, - dataType: *mut cudnnDataType_t, - n: *mut ::std::os::raw::c_int, - c: *mut ::std::os::raw::c_int, - h: *mut ::std::os::raw::c_int, - w: *mut ::std::os::raw::c_int, - nStride: *mut ::std::os::raw::c_int, - cStride: *mut ::std::os::raw::c_int, - hStride: *mut ::std::os::raw::c_int, - wStride: *mut ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetTensorNdDescriptor( - tensorDesc: cudnnTensorDescriptor_t, - dataType: cudnnDataType_t, - nbDims: ::std::os::raw::c_int, - dimA: *const ::std::os::raw::c_int, - strideA: *const ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetTensorNdDescriptorEx( - tensorDesc: cudnnTensorDescriptor_t, - format: cudnnTensorFormat_t, - dataType: cudnnDataType_t, - nbDims: ::std::os::raw::c_int, - dimA: *const ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetTensorNdDescriptor( - tensorDesc: cudnnTensorDescriptor_t, - nbDimsRequested: ::std::os::raw::c_int, - dataType: *mut cudnnDataType_t, - nbDims: *mut ::std::os::raw::c_int, - dimA: *mut ::std::os::raw::c_int, - strideA: *mut ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetTensorSizeInBytes( - tensorDesc: cudnnTensorDescriptor_t, - size: *mut usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnDestroyTensorDescriptor(tensorDesc: cudnnTensorDescriptor_t) -> cudnnStatus_t; -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnFoldingDirection_t { - CUDNN_TRANSFORM_FOLD = 0, - CUDNN_TRANSFORM_UNFOLD = 1, -} -extern "C" { - pub fn cudnnInitTransformDest( - transformDesc: cudnnTensorTransformDescriptor_t, - srcDesc: cudnnTensorDescriptor_t, - destDesc: cudnnTensorDescriptor_t, - destSizeInBytes: *mut usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnCreateTensorTransformDescriptor( - transformDesc: *mut cudnnTensorTransformDescriptor_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetTensorTransformDescriptor( - transformDesc: cudnnTensorTransformDescriptor_t, - nbDims: u32, - destFormat: cudnnTensorFormat_t, - padBeforeA: *const i32, - padAfterA: *const i32, - foldA: *const u32, - direction: cudnnFoldingDirection_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetTensorTransformDescriptor( - transformDesc: cudnnTensorTransformDescriptor_t, - nbDimsRequested: u32, - destFormat: *mut cudnnTensorFormat_t, - padBeforeA: *mut i32, - padAfterA: *mut i32, - foldA: *mut u32, - direction: *mut cudnnFoldingDirection_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnDestroyTensorTransformDescriptor( - transformDesc: cudnnTensorTransformDescriptor_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnTransformTensor( - handle: cudnnHandle_t, - alpha: *const ::std::os::raw::c_void, - xDesc: cudnnTensorDescriptor_t, - x: *const ::std::os::raw::c_void, - beta: *const ::std::os::raw::c_void, - yDesc: cudnnTensorDescriptor_t, - y: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnTransformTensorEx( - handle: cudnnHandle_t, - transDesc: cudnnTensorTransformDescriptor_t, - alpha: *const ::std::os::raw::c_void, - srcDesc: cudnnTensorDescriptor_t, - srcData: *const ::std::os::raw::c_void, - beta: *const ::std::os::raw::c_void, - destDesc: cudnnTensorDescriptor_t, - destData: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnAddTensor( - handle: cudnnHandle_t, - alpha: *const ::std::os::raw::c_void, - aDesc: cudnnTensorDescriptor_t, - A: *const ::std::os::raw::c_void, - beta: *const ::std::os::raw::c_void, - cDesc: cudnnTensorDescriptor_t, - C: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnOpTensorOp_t { - CUDNN_OP_TENSOR_ADD = 0, - CUDNN_OP_TENSOR_MUL = 1, - CUDNN_OP_TENSOR_MIN = 2, - CUDNN_OP_TENSOR_MAX = 3, - CUDNN_OP_TENSOR_SQRT = 4, - CUDNN_OP_TENSOR_NOT = 5, -} -extern "C" { - pub fn cudnnCreateOpTensorDescriptor( - opTensorDesc: *mut cudnnOpTensorDescriptor_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetOpTensorDescriptor( - opTensorDesc: cudnnOpTensorDescriptor_t, - opTensorOp: cudnnOpTensorOp_t, - opTensorCompType: cudnnDataType_t, - opTensorNanOpt: cudnnNanPropagation_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetOpTensorDescriptor( - opTensorDesc: cudnnOpTensorDescriptor_t, - opTensorOp: *mut cudnnOpTensorOp_t, - opTensorCompType: *mut cudnnDataType_t, - opTensorNanOpt: *mut cudnnNanPropagation_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnDestroyOpTensorDescriptor(opTensorDesc: cudnnOpTensorDescriptor_t) - -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnOpTensor( - handle: cudnnHandle_t, - opTensorDesc: cudnnOpTensorDescriptor_t, - alpha1: *const ::std::os::raw::c_void, - aDesc: cudnnTensorDescriptor_t, - A: *const ::std::os::raw::c_void, - alpha2: *const ::std::os::raw::c_void, - bDesc: cudnnTensorDescriptor_t, - B: *const ::std::os::raw::c_void, - beta: *const ::std::os::raw::c_void, - cDesc: cudnnTensorDescriptor_t, - C: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnReduceTensorOp_t { - CUDNN_REDUCE_TENSOR_ADD = 0, - CUDNN_REDUCE_TENSOR_MUL = 1, - CUDNN_REDUCE_TENSOR_MIN = 2, - CUDNN_REDUCE_TENSOR_MAX = 3, - CUDNN_REDUCE_TENSOR_AMAX = 4, - CUDNN_REDUCE_TENSOR_AVG = 5, - CUDNN_REDUCE_TENSOR_NORM1 = 6, - CUDNN_REDUCE_TENSOR_NORM2 = 7, - CUDNN_REDUCE_TENSOR_MUL_NO_ZEROS = 8, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnReduceTensorIndices_t { - CUDNN_REDUCE_TENSOR_NO_INDICES = 0, - CUDNN_REDUCE_TENSOR_FLATTENED_INDICES = 1, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnIndicesType_t { - CUDNN_32BIT_INDICES = 0, - CUDNN_64BIT_INDICES = 1, - CUDNN_16BIT_INDICES = 2, - CUDNN_8BIT_INDICES = 3, -} -extern "C" { - pub fn cudnnCreateReduceTensorDescriptor( - reduceTensorDesc: *mut cudnnReduceTensorDescriptor_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetReduceTensorDescriptor( - reduceTensorDesc: cudnnReduceTensorDescriptor_t, - reduceTensorOp: cudnnReduceTensorOp_t, - reduceTensorCompType: cudnnDataType_t, - reduceTensorNanOpt: cudnnNanPropagation_t, - reduceTensorIndices: cudnnReduceTensorIndices_t, - reduceTensorIndicesType: cudnnIndicesType_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetReduceTensorDescriptor( - reduceTensorDesc: cudnnReduceTensorDescriptor_t, - reduceTensorOp: *mut cudnnReduceTensorOp_t, - reduceTensorCompType: *mut cudnnDataType_t, - reduceTensorNanOpt: *mut cudnnNanPropagation_t, - reduceTensorIndices: *mut cudnnReduceTensorIndices_t, - reduceTensorIndicesType: *mut cudnnIndicesType_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnDestroyReduceTensorDescriptor( - reduceTensorDesc: cudnnReduceTensorDescriptor_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetReductionIndicesSize( - handle: cudnnHandle_t, - reduceTensorDesc: cudnnReduceTensorDescriptor_t, - aDesc: cudnnTensorDescriptor_t, - cDesc: cudnnTensorDescriptor_t, - sizeInBytes: *mut usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetReductionWorkspaceSize( - handle: cudnnHandle_t, - reduceTensorDesc: cudnnReduceTensorDescriptor_t, - aDesc: cudnnTensorDescriptor_t, - cDesc: cudnnTensorDescriptor_t, - sizeInBytes: *mut usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnReduceTensor( - handle: cudnnHandle_t, - reduceTensorDesc: cudnnReduceTensorDescriptor_t, - indices: *mut ::std::os::raw::c_void, - indicesSizeInBytes: usize, - workspace: *mut ::std::os::raw::c_void, - workspaceSizeInBytes: usize, - alpha: *const ::std::os::raw::c_void, - aDesc: cudnnTensorDescriptor_t, - A: *const ::std::os::raw::c_void, - beta: *const ::std::os::raw::c_void, - cDesc: cudnnTensorDescriptor_t, - C: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetTensor( - handle: cudnnHandle_t, - yDesc: cudnnTensorDescriptor_t, - y: *mut ::std::os::raw::c_void, - valuePtr: *const ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnScaleTensor( - handle: cudnnHandle_t, - yDesc: cudnnTensorDescriptor_t, - y: *mut ::std::os::raw::c_void, - alpha: *const ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnCreateFilterDescriptor(filterDesc: *mut cudnnFilterDescriptor_t) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetFilter4dDescriptor( - filterDesc: cudnnFilterDescriptor_t, - dataType: cudnnDataType_t, - format: cudnnTensorFormat_t, - k: ::std::os::raw::c_int, - c: ::std::os::raw::c_int, - h: ::std::os::raw::c_int, - w: ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetFilter4dDescriptor( - filterDesc: cudnnFilterDescriptor_t, - dataType: *mut cudnnDataType_t, - format: *mut cudnnTensorFormat_t, - k: *mut ::std::os::raw::c_int, - c: *mut ::std::os::raw::c_int, - h: *mut ::std::os::raw::c_int, - w: *mut ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetFilterNdDescriptor( - filterDesc: cudnnFilterDescriptor_t, - dataType: cudnnDataType_t, - format: cudnnTensorFormat_t, - nbDims: ::std::os::raw::c_int, - filterDimA: *const ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetFilterNdDescriptor( - filterDesc: cudnnFilterDescriptor_t, - nbDimsRequested: ::std::os::raw::c_int, - dataType: *mut cudnnDataType_t, - format: *mut cudnnTensorFormat_t, - nbDims: *mut ::std::os::raw::c_int, - filterDimA: *mut ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetFilterSizeInBytes( - filterDesc: cudnnFilterDescriptor_t, - size: *mut usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnTransformFilter( - handle: cudnnHandle_t, - transDesc: cudnnTensorTransformDescriptor_t, - alpha: *const ::std::os::raw::c_void, - srcDesc: cudnnFilterDescriptor_t, - srcData: *const ::std::os::raw::c_void, - beta: *const ::std::os::raw::c_void, - destDesc: cudnnFilterDescriptor_t, - destData: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnDestroyFilterDescriptor(filterDesc: cudnnFilterDescriptor_t) -> cudnnStatus_t; -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnSoftmaxAlgorithm_t { - CUDNN_SOFTMAX_FAST = 0, - CUDNN_SOFTMAX_ACCURATE = 1, - CUDNN_SOFTMAX_LOG = 2, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnSoftmaxMode_t { - CUDNN_SOFTMAX_MODE_INSTANCE = 0, - CUDNN_SOFTMAX_MODE_CHANNEL = 1, -} -extern "C" { - pub fn cudnnSoftmaxForward( - handle: cudnnHandle_t, - algo: cudnnSoftmaxAlgorithm_t, - mode: cudnnSoftmaxMode_t, - alpha: *const ::std::os::raw::c_void, - xDesc: cudnnTensorDescriptor_t, - x: *const ::std::os::raw::c_void, - beta: *const ::std::os::raw::c_void, - yDesc: cudnnTensorDescriptor_t, - y: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnPoolingMode_t { - CUDNN_POOLING_MAX = 0, - CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING = 1, - CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING = 2, - CUDNN_POOLING_MAX_DETERMINISTIC = 3, -} -extern "C" { - pub fn cudnnCreatePoolingDescriptor( - poolingDesc: *mut cudnnPoolingDescriptor_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetPooling2dDescriptor( - poolingDesc: cudnnPoolingDescriptor_t, - mode: cudnnPoolingMode_t, - maxpoolingNanOpt: cudnnNanPropagation_t, - windowHeight: ::std::os::raw::c_int, - windowWidth: ::std::os::raw::c_int, - verticalPadding: ::std::os::raw::c_int, - horizontalPadding: ::std::os::raw::c_int, - verticalStride: ::std::os::raw::c_int, - horizontalStride: ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetPooling2dDescriptor( - poolingDesc: cudnnPoolingDescriptor_t, - mode: *mut cudnnPoolingMode_t, - maxpoolingNanOpt: *mut cudnnNanPropagation_t, - windowHeight: *mut ::std::os::raw::c_int, - windowWidth: *mut ::std::os::raw::c_int, - verticalPadding: *mut ::std::os::raw::c_int, - horizontalPadding: *mut ::std::os::raw::c_int, - verticalStride: *mut ::std::os::raw::c_int, - horizontalStride: *mut ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetPoolingNdDescriptor( - poolingDesc: cudnnPoolingDescriptor_t, - mode: cudnnPoolingMode_t, - maxpoolingNanOpt: cudnnNanPropagation_t, - nbDims: ::std::os::raw::c_int, - windowDimA: *const ::std::os::raw::c_int, - paddingA: *const ::std::os::raw::c_int, - strideA: *const ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetPoolingNdDescriptor( - poolingDesc: cudnnPoolingDescriptor_t, - nbDimsRequested: ::std::os::raw::c_int, - mode: *mut cudnnPoolingMode_t, - maxpoolingNanOpt: *mut cudnnNanPropagation_t, - nbDims: *mut ::std::os::raw::c_int, - windowDimA: *mut ::std::os::raw::c_int, - paddingA: *mut ::std::os::raw::c_int, - strideA: *mut ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetPoolingNdForwardOutputDim( - poolingDesc: cudnnPoolingDescriptor_t, - inputTensorDesc: cudnnTensorDescriptor_t, - nbDims: ::std::os::raw::c_int, - outputTensorDimA: *mut ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetPooling2dForwardOutputDim( - poolingDesc: cudnnPoolingDescriptor_t, - inputTensorDesc: cudnnTensorDescriptor_t, - n: *mut ::std::os::raw::c_int, - c: *mut ::std::os::raw::c_int, - h: *mut ::std::os::raw::c_int, - w: *mut ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnDestroyPoolingDescriptor(poolingDesc: cudnnPoolingDescriptor_t) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnPoolingForward( - handle: cudnnHandle_t, - poolingDesc: cudnnPoolingDescriptor_t, - alpha: *const ::std::os::raw::c_void, - xDesc: cudnnTensorDescriptor_t, - x: *const ::std::os::raw::c_void, - beta: *const ::std::os::raw::c_void, - yDesc: cudnnTensorDescriptor_t, - y: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnActivationMode_t { - CUDNN_ACTIVATION_SIGMOID = 0, - CUDNN_ACTIVATION_RELU = 1, - CUDNN_ACTIVATION_TANH = 2, - CUDNN_ACTIVATION_CLIPPED_RELU = 3, - CUDNN_ACTIVATION_ELU = 4, - CUDNN_ACTIVATION_IDENTITY = 5, - CUDNN_ACTIVATION_SWISH = 6, -} -extern "C" { - pub fn cudnnCreateActivationDescriptor( - activationDesc: *mut cudnnActivationDescriptor_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetActivationDescriptor( - activationDesc: cudnnActivationDescriptor_t, - mode: cudnnActivationMode_t, - reluNanOpt: cudnnNanPropagation_t, - coef: f64, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetActivationDescriptor( - activationDesc: cudnnActivationDescriptor_t, - mode: *mut cudnnActivationMode_t, - reluNanOpt: *mut cudnnNanPropagation_t, - coef: *mut f64, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetActivationDescriptorSwishBeta( - activationDesc: cudnnActivationDescriptor_t, - swish_beta: f64, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetActivationDescriptorSwishBeta( - activationDesc: cudnnActivationDescriptor_t, - swish_beta: *mut f64, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnDestroyActivationDescriptor( - activationDesc: cudnnActivationDescriptor_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnActivationForward( - handle: cudnnHandle_t, - activationDesc: cudnnActivationDescriptor_t, - alpha: *const ::std::os::raw::c_void, - xDesc: cudnnTensorDescriptor_t, - x: *const ::std::os::raw::c_void, - beta: *const ::std::os::raw::c_void, - yDesc: cudnnTensorDescriptor_t, - y: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnCreateLRNDescriptor(normDesc: *mut cudnnLRNDescriptor_t) -> cudnnStatus_t; -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnLRNMode_t { - CUDNN_LRN_CROSS_CHANNEL_DIM1 = 0, -} -extern "C" { - pub fn cudnnSetLRNDescriptor( - normDesc: cudnnLRNDescriptor_t, - lrnN: ::std::os::raw::c_uint, - lrnAlpha: f64, - lrnBeta: f64, - lrnK: f64, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetLRNDescriptor( - normDesc: cudnnLRNDescriptor_t, - lrnN: *mut ::std::os::raw::c_uint, - lrnAlpha: *mut f64, - lrnBeta: *mut f64, - lrnK: *mut f64, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnDestroyLRNDescriptor(lrnDesc: cudnnLRNDescriptor_t) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnLRNCrossChannelForward( - handle: cudnnHandle_t, - normDesc: cudnnLRNDescriptor_t, - lrnMode: cudnnLRNMode_t, - alpha: *const ::std::os::raw::c_void, - xDesc: cudnnTensorDescriptor_t, - x: *const ::std::os::raw::c_void, - beta: *const ::std::os::raw::c_void, - yDesc: cudnnTensorDescriptor_t, - y: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnDivNormMode_t { - CUDNN_DIVNORM_PRECOMPUTED_MEANS = 0, -} -extern "C" { - pub fn cudnnDivisiveNormalizationForward( - handle: cudnnHandle_t, - normDesc: cudnnLRNDescriptor_t, - mode: cudnnDivNormMode_t, - alpha: *const ::std::os::raw::c_void, - xDesc: cudnnTensorDescriptor_t, - x: *const ::std::os::raw::c_void, - means: *const ::std::os::raw::c_void, - temp: *mut ::std::os::raw::c_void, - temp2: *mut ::std::os::raw::c_void, - beta: *const ::std::os::raw::c_void, - yDesc: cudnnTensorDescriptor_t, - y: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnBatchNormMode_t { - CUDNN_BATCHNORM_PER_ACTIVATION = 0, - CUDNN_BATCHNORM_SPATIAL = 1, - CUDNN_BATCHNORM_SPATIAL_PERSISTENT = 2, -} -extern "C" { - pub fn cudnnDeriveBNTensorDescriptor( - derivedBnDesc: cudnnTensorDescriptor_t, - xDesc: cudnnTensorDescriptor_t, - mode: cudnnBatchNormMode_t, - ) -> cudnnStatus_t; -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnBatchNormOps_t { - CUDNN_BATCHNORM_OPS_BN = 0, - CUDNN_BATCHNORM_OPS_BN_ACTIVATION = 1, - CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION = 2, -} -extern "C" { - pub fn cudnnBatchNormalizationForwardInference( - handle: cudnnHandle_t, - mode: cudnnBatchNormMode_t, - alpha: *const ::std::os::raw::c_void, - beta: *const ::std::os::raw::c_void, - xDesc: cudnnTensorDescriptor_t, - x: *const ::std::os::raw::c_void, - yDesc: cudnnTensorDescriptor_t, - y: *mut ::std::os::raw::c_void, - bnScaleBiasMeanVarDesc: cudnnTensorDescriptor_t, - bnScale: *const ::std::os::raw::c_void, - bnBias: *const ::std::os::raw::c_void, - estimatedMean: *const ::std::os::raw::c_void, - estimatedVariance: *const ::std::os::raw::c_void, - epsilon: f64, - ) -> cudnnStatus_t; -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnNormMode_t { - CUDNN_NORM_PER_ACTIVATION = 0, - CUDNN_NORM_PER_CHANNEL = 1, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnNormAlgo_t { - CUDNN_NORM_ALGO_STANDARD = 0, - CUDNN_NORM_ALGO_PERSIST = 1, -} -extern "C" { - pub fn cudnnDeriveNormTensorDescriptor( - derivedNormScaleBiasDesc: cudnnTensorDescriptor_t, - derivedNormMeanVarDesc: cudnnTensorDescriptor_t, - xDesc: cudnnTensorDescriptor_t, - mode: cudnnNormMode_t, - groupCnt: ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnNormOps_t { - CUDNN_NORM_OPS_NORM = 0, - CUDNN_NORM_OPS_NORM_ACTIVATION = 1, - CUDNN_NORM_OPS_NORM_ADD_ACTIVATION = 2, -} -extern "C" { - pub fn cudnnNormalizationForwardInference( - handle: cudnnHandle_t, - mode: cudnnNormMode_t, - normOps: cudnnNormOps_t, - algo: cudnnNormAlgo_t, - alpha: *const ::std::os::raw::c_void, - beta: *const ::std::os::raw::c_void, - xDesc: cudnnTensorDescriptor_t, - x: *const ::std::os::raw::c_void, - normScaleBiasDesc: cudnnTensorDescriptor_t, - normScale: *const ::std::os::raw::c_void, - normBias: *const ::std::os::raw::c_void, - normMeanVarDesc: cudnnTensorDescriptor_t, - estimatedMean: *const ::std::os::raw::c_void, - estimatedVariance: *const ::std::os::raw::c_void, - zDesc: cudnnTensorDescriptor_t, - z: *const ::std::os::raw::c_void, - activationDesc: cudnnActivationDescriptor_t, - yDesc: cudnnTensorDescriptor_t, - y: *mut ::std::os::raw::c_void, - epsilon: f64, - groupCnt: ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnSamplerType_t { - CUDNN_SAMPLER_BILINEAR = 0, -} -extern "C" { - pub fn cudnnCreateSpatialTransformerDescriptor( - stDesc: *mut cudnnSpatialTransformerDescriptor_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetSpatialTransformerNdDescriptor( - stDesc: cudnnSpatialTransformerDescriptor_t, - samplerType: cudnnSamplerType_t, - dataType: cudnnDataType_t, - nbDims: ::std::os::raw::c_int, - dimA: *const ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnDestroySpatialTransformerDescriptor( - stDesc: cudnnSpatialTransformerDescriptor_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSpatialTfGridGeneratorForward( - handle: cudnnHandle_t, - stDesc: cudnnSpatialTransformerDescriptor_t, - theta: *const ::std::os::raw::c_void, - grid: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSpatialTfSamplerForward( - handle: cudnnHandle_t, - stDesc: cudnnSpatialTransformerDescriptor_t, - alpha: *const ::std::os::raw::c_void, - xDesc: cudnnTensorDescriptor_t, - x: *const ::std::os::raw::c_void, - grid: *const ::std::os::raw::c_void, - beta: *const ::std::os::raw::c_void, - yDesc: cudnnTensorDescriptor_t, - y: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cudnnDropoutStruct { - _unused: [u8; 0], -} -pub type cudnnDropoutDescriptor_t = *mut cudnnDropoutStruct; -extern "C" { - pub fn cudnnCreateDropoutDescriptor( - dropoutDesc: *mut cudnnDropoutDescriptor_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnDestroyDropoutDescriptor(dropoutDesc: cudnnDropoutDescriptor_t) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnDropoutGetStatesSize( - handle: cudnnHandle_t, - sizeInBytes: *mut usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnDropoutGetReserveSpaceSize( - xdesc: cudnnTensorDescriptor_t, - sizeInBytes: *mut usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetDropoutDescriptor( - dropoutDesc: cudnnDropoutDescriptor_t, - handle: cudnnHandle_t, - dropout: f32, - states: *mut ::std::os::raw::c_void, - stateSizeInBytes: usize, - seed: ::std::os::raw::c_ulonglong, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnRestoreDropoutDescriptor( - dropoutDesc: cudnnDropoutDescriptor_t, - handle: cudnnHandle_t, - dropout: f32, - states: *mut ::std::os::raw::c_void, - stateSizeInBytes: usize, - seed: ::std::os::raw::c_ulonglong, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetDropoutDescriptor( - dropoutDesc: cudnnDropoutDescriptor_t, - handle: cudnnHandle_t, - dropout: *mut f32, - states: *mut *mut ::std::os::raw::c_void, - seed: *mut ::std::os::raw::c_ulonglong, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnDropoutForward( - handle: cudnnHandle_t, - dropoutDesc: cudnnDropoutDescriptor_t, - xdesc: cudnnTensorDescriptor_t, - x: *const ::std::os::raw::c_void, - ydesc: cudnnTensorDescriptor_t, - y: *mut ::std::os::raw::c_void, - reserveSpace: *mut ::std::os::raw::c_void, - reserveSpaceSizeInBytes: usize, - ) -> cudnnStatus_t; -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cudnnAlgorithmStruct { - _unused: [u8; 0], -} -pub type cudnnAlgorithmDescriptor_t = *mut cudnnAlgorithmStruct; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cudnnAlgorithmPerformanceStruct { - _unused: [u8; 0], -} -pub type cudnnAlgorithmPerformance_t = *mut cudnnAlgorithmPerformanceStruct; -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnConvolutionFwdAlgo_t { - CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM = 0, - CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM = 1, - CUDNN_CONVOLUTION_FWD_ALGO_GEMM = 2, - CUDNN_CONVOLUTION_FWD_ALGO_DIRECT = 3, - CUDNN_CONVOLUTION_FWD_ALGO_FFT = 4, - CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING = 5, - CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD = 6, - CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED = 7, - CUDNN_CONVOLUTION_FWD_ALGO_COUNT = 8, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnConvolutionBwdFilterAlgo_t { - CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0 = 0, - CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 = 1, - CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT = 2, - CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3 = 3, - CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD = 4, - CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED = 5, - CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING = 6, - CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT = 7, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnConvolutionBwdDataAlgo_t { - CUDNN_CONVOLUTION_BWD_DATA_ALGO_0 = 0, - CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 = 1, - CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT = 2, - CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING = 3, - CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD = 4, - CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED = 5, - CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT = 6, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnRNNAlgo_t { - CUDNN_RNN_ALGO_STANDARD = 0, - CUDNN_RNN_ALGO_PERSIST_STATIC = 1, - CUDNN_RNN_ALGO_PERSIST_DYNAMIC = 2, - CUDNN_RNN_ALGO_PERSIST_STATIC_SMALL_H = 3, - CUDNN_RNN_ALGO_COUNT = 4, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnCTCLossAlgo_t { - CUDNN_CTC_LOSS_ALGO_DETERMINISTIC = 0, - CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC = 1, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct cudnnAlgorithmUnionStruct { - pub algo: cudnnAlgorithmUnionStruct_Algorithm, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub union cudnnAlgorithmUnionStruct_Algorithm { - pub convFwdAlgo: cudnnConvolutionFwdAlgo_t, - pub convBwdFilterAlgo: cudnnConvolutionBwdFilterAlgo_t, - pub convBwdDataAlgo: cudnnConvolutionBwdDataAlgo_t, - pub RNNAlgo: cudnnRNNAlgo_t, - pub CTCLossAlgo: cudnnCTCLossAlgo_t, -} -pub type cudnnAlgorithm_t = cudnnAlgorithmUnionStruct; -extern "C" { - pub fn cudnnCreateAlgorithmDescriptor( - algoDesc: *mut cudnnAlgorithmDescriptor_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetAlgorithmDescriptor( - algoDesc: cudnnAlgorithmDescriptor_t, - algorithm: cudnnAlgorithm_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetAlgorithmDescriptor( - algoDesc: cudnnAlgorithmDescriptor_t, - algorithm: *mut cudnnAlgorithm_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnCopyAlgorithmDescriptor( - src: cudnnAlgorithmDescriptor_t, - dest: cudnnAlgorithmDescriptor_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnDestroyAlgorithmDescriptor(algoDesc: cudnnAlgorithmDescriptor_t) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnCreateAlgorithmPerformance( - algoPerf: *mut cudnnAlgorithmPerformance_t, - numberToCreate: ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetAlgorithmPerformance( - algoPerf: cudnnAlgorithmPerformance_t, - algoDesc: cudnnAlgorithmDescriptor_t, - status: cudnnStatus_t, - time: f32, - memory: usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetAlgorithmPerformance( - algoPerf: cudnnAlgorithmPerformance_t, - algoDesc: *mut cudnnAlgorithmDescriptor_t, - status: *mut cudnnStatus_t, - time: *mut f32, - memory: *mut usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnDestroyAlgorithmPerformance( - algoPerf: *mut cudnnAlgorithmPerformance_t, - numberToDestroy: ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetAlgorithmSpaceSize( - handle: cudnnHandle_t, - algoDesc: cudnnAlgorithmDescriptor_t, - algoSpaceSizeInBytes: *mut usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSaveAlgorithm( - handle: cudnnHandle_t, - algoDesc: cudnnAlgorithmDescriptor_t, - algoSpace: *mut ::std::os::raw::c_void, - algoSpaceSizeInBytes: usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnRestoreAlgorithm( - handle: cudnnHandle_t, - algoSpace: *mut ::std::os::raw::c_void, - algoSpaceSizeInBytes: usize, - algoDesc: cudnnAlgorithmDescriptor_t, - ) -> cudnnStatus_t; -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnSeverity_t { - CUDNN_SEV_FATAL = 0, - CUDNN_SEV_ERROR = 1, - CUDNN_SEV_WARNING = 2, - CUDNN_SEV_INFO = 3, -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cudnnDebugStruct { - pub cudnn_version: ::std::os::raw::c_uint, - pub cudnnStatus: cudnnStatus_t, - pub time_sec: ::std::os::raw::c_uint, - pub time_usec: ::std::os::raw::c_uint, - pub time_delta: ::std::os::raw::c_uint, - pub handle: cudnnHandle_t, - pub stream: cudaStream_t, - pub pid: ::std::os::raw::c_ulonglong, - pub tid: ::std::os::raw::c_ulonglong, - pub cudaDeviceId: ::std::os::raw::c_int, - pub reserved: [::std::os::raw::c_int; 15usize], -} -pub type cudnnDebug_t = cudnnDebugStruct; -pub type cudnnCallback_t = ::std::option::Option< - unsafe extern "C" fn( - sev: cudnnSeverity_t, - udata: *mut ::std::os::raw::c_void, - dbg: *const cudnnDebug_t, - msg: *const ::std::os::raw::c_char, - ), ->; -extern "C" { - pub fn cudnnSetCallback( - mask: ::std::os::raw::c_uint, - udata: *mut ::std::os::raw::c_void, - fptr: cudnnCallback_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetCallback( - mask: *mut ::std::os::raw::c_uint, - udata: *mut *mut ::std::os::raw::c_void, - fptr: *mut cudnnCallback_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnOpsInferVersionCheck() -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSoftmaxBackward( - handle: cudnnHandle_t, - algo: cudnnSoftmaxAlgorithm_t, - mode: cudnnSoftmaxMode_t, - alpha: *const ::std::os::raw::c_void, - yDesc: cudnnTensorDescriptor_t, - y: *const ::std::os::raw::c_void, - dyDesc: cudnnTensorDescriptor_t, - dy: *const ::std::os::raw::c_void, - beta: *const ::std::os::raw::c_void, - dxDesc: cudnnTensorDescriptor_t, - dx: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnPoolingBackward( - handle: cudnnHandle_t, - poolingDesc: cudnnPoolingDescriptor_t, - alpha: *const ::std::os::raw::c_void, - yDesc: cudnnTensorDescriptor_t, - y: *const ::std::os::raw::c_void, - dyDesc: cudnnTensorDescriptor_t, - dy: *const ::std::os::raw::c_void, - xDesc: cudnnTensorDescriptor_t, - x: *const ::std::os::raw::c_void, - beta: *const ::std::os::raw::c_void, - dxDesc: cudnnTensorDescriptor_t, - dx: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnActivationBackward( - handle: cudnnHandle_t, - activationDesc: cudnnActivationDescriptor_t, - alpha: *const ::std::os::raw::c_void, - yDesc: cudnnTensorDescriptor_t, - y: *const ::std::os::raw::c_void, - dyDesc: cudnnTensorDescriptor_t, - dy: *const ::std::os::raw::c_void, - xDesc: cudnnTensorDescriptor_t, - x: *const ::std::os::raw::c_void, - beta: *const ::std::os::raw::c_void, - dxDesc: cudnnTensorDescriptor_t, - dx: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnLRNCrossChannelBackward( - handle: cudnnHandle_t, - normDesc: cudnnLRNDescriptor_t, - lrnMode: cudnnLRNMode_t, - alpha: *const ::std::os::raw::c_void, - yDesc: cudnnTensorDescriptor_t, - y: *const ::std::os::raw::c_void, - dyDesc: cudnnTensorDescriptor_t, - dy: *const ::std::os::raw::c_void, - xDesc: cudnnTensorDescriptor_t, - x: *const ::std::os::raw::c_void, - beta: *const ::std::os::raw::c_void, - dxDesc: cudnnTensorDescriptor_t, - dx: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnDivisiveNormalizationBackward( - handle: cudnnHandle_t, - normDesc: cudnnLRNDescriptor_t, - mode: cudnnDivNormMode_t, - alpha: *const ::std::os::raw::c_void, - xDesc: cudnnTensorDescriptor_t, - x: *const ::std::os::raw::c_void, - means: *const ::std::os::raw::c_void, - dy: *const ::std::os::raw::c_void, - temp: *mut ::std::os::raw::c_void, - temp2: *mut ::std::os::raw::c_void, - beta: *const ::std::os::raw::c_void, - dXdMeansDesc: cudnnTensorDescriptor_t, - dx: *mut ::std::os::raw::c_void, - dMeans: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize( - handle: cudnnHandle_t, - mode: cudnnBatchNormMode_t, - bnOps: cudnnBatchNormOps_t, - xDesc: cudnnTensorDescriptor_t, - zDesc: cudnnTensorDescriptor_t, - yDesc: cudnnTensorDescriptor_t, - bnScaleBiasMeanVarDesc: cudnnTensorDescriptor_t, - activationDesc: cudnnActivationDescriptor_t, - sizeInBytes: *mut usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetBatchNormalizationBackwardExWorkspaceSize( - handle: cudnnHandle_t, - mode: cudnnBatchNormMode_t, - bnOps: cudnnBatchNormOps_t, - xDesc: cudnnTensorDescriptor_t, - yDesc: cudnnTensorDescriptor_t, - dyDesc: cudnnTensorDescriptor_t, - dzDesc: cudnnTensorDescriptor_t, - dxDesc: cudnnTensorDescriptor_t, - dBnScaleBiasDesc: cudnnTensorDescriptor_t, - activationDesc: cudnnActivationDescriptor_t, - sizeInBytes: *mut usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetBatchNormalizationTrainingExReserveSpaceSize( - handle: cudnnHandle_t, - mode: cudnnBatchNormMode_t, - bnOps: cudnnBatchNormOps_t, - activationDesc: cudnnActivationDescriptor_t, - xDesc: cudnnTensorDescriptor_t, - sizeInBytes: *mut usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnBatchNormalizationForwardTraining( - handle: cudnnHandle_t, - mode: cudnnBatchNormMode_t, - alpha: *const ::std::os::raw::c_void, - beta: *const ::std::os::raw::c_void, - xDesc: cudnnTensorDescriptor_t, - x: *const ::std::os::raw::c_void, - yDesc: cudnnTensorDescriptor_t, - y: *mut ::std::os::raw::c_void, - bnScaleBiasMeanVarDesc: cudnnTensorDescriptor_t, - bnScale: *const ::std::os::raw::c_void, - bnBias: *const ::std::os::raw::c_void, - exponentialAverageFactor: f64, - resultRunningMean: *mut ::std::os::raw::c_void, - resultRunningVariance: *mut ::std::os::raw::c_void, - epsilon: f64, - resultSaveMean: *mut ::std::os::raw::c_void, - resultSaveInvVariance: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnBatchNormalizationForwardTrainingEx( - handle: cudnnHandle_t, - mode: cudnnBatchNormMode_t, - bnOps: cudnnBatchNormOps_t, - alpha: *const ::std::os::raw::c_void, - beta: *const ::std::os::raw::c_void, - xDesc: cudnnTensorDescriptor_t, - xData: *const ::std::os::raw::c_void, - zDesc: cudnnTensorDescriptor_t, - zData: *const ::std::os::raw::c_void, - yDesc: cudnnTensorDescriptor_t, - yData: *mut ::std::os::raw::c_void, - bnScaleBiasMeanVarDesc: cudnnTensorDescriptor_t, - bnScale: *const ::std::os::raw::c_void, - bnBias: *const ::std::os::raw::c_void, - exponentialAverageFactor: f64, - resultRunningMean: *mut ::std::os::raw::c_void, - resultRunningVariance: *mut ::std::os::raw::c_void, - epsilon: f64, - resultSaveMean: *mut ::std::os::raw::c_void, - resultSaveInvVariance: *mut ::std::os::raw::c_void, - activationDesc: cudnnActivationDescriptor_t, - workspace: *mut ::std::os::raw::c_void, - workSpaceSizeInBytes: usize, - reserveSpace: *mut ::std::os::raw::c_void, - reserveSpaceSizeInBytes: usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnBatchNormalizationBackward( - handle: cudnnHandle_t, - mode: cudnnBatchNormMode_t, - alphaDataDiff: *const ::std::os::raw::c_void, - betaDataDiff: *const ::std::os::raw::c_void, - alphaParamDiff: *const ::std::os::raw::c_void, - betaParamDiff: *const ::std::os::raw::c_void, - xDesc: cudnnTensorDescriptor_t, - x: *const ::std::os::raw::c_void, - dyDesc: cudnnTensorDescriptor_t, - dy: *const ::std::os::raw::c_void, - dxDesc: cudnnTensorDescriptor_t, - dx: *mut ::std::os::raw::c_void, - dBnScaleBiasDesc: cudnnTensorDescriptor_t, - bnScale: *const ::std::os::raw::c_void, - dBnScaleResult: *mut ::std::os::raw::c_void, - dBnBiasResult: *mut ::std::os::raw::c_void, - epsilon: f64, - savedMean: *const ::std::os::raw::c_void, - savedInvVariance: *const ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnBatchNormalizationBackwardEx( - handle: cudnnHandle_t, - mode: cudnnBatchNormMode_t, - bnOps: cudnnBatchNormOps_t, - alphaDataDiff: *const ::std::os::raw::c_void, - betaDataDiff: *const ::std::os::raw::c_void, - alphaParamDiff: *const ::std::os::raw::c_void, - betaParamDiff: *const ::std::os::raw::c_void, - xDesc: cudnnTensorDescriptor_t, - xData: *const ::std::os::raw::c_void, - yDesc: cudnnTensorDescriptor_t, - yData: *const ::std::os::raw::c_void, - dyDesc: cudnnTensorDescriptor_t, - dyData: *const ::std::os::raw::c_void, - dzDesc: cudnnTensorDescriptor_t, - dzData: *mut ::std::os::raw::c_void, - dxDesc: cudnnTensorDescriptor_t, - dxData: *mut ::std::os::raw::c_void, - dBnScaleBiasDesc: cudnnTensorDescriptor_t, - bnScaleData: *const ::std::os::raw::c_void, - bnBiasData: *const ::std::os::raw::c_void, - dBnScaleData: *mut ::std::os::raw::c_void, - dBnBiasData: *mut ::std::os::raw::c_void, - epsilon: f64, - savedMean: *const ::std::os::raw::c_void, - savedInvVariance: *const ::std::os::raw::c_void, - activationDesc: cudnnActivationDescriptor_t, - workSpace: *mut ::std::os::raw::c_void, - workSpaceSizeInBytes: usize, - reserveSpace: *mut ::std::os::raw::c_void, - reserveSpaceSizeInBytes: usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetNormalizationForwardTrainingWorkspaceSize( - handle: cudnnHandle_t, - mode: cudnnNormMode_t, - normOps: cudnnNormOps_t, - algo: cudnnNormAlgo_t, - xDesc: cudnnTensorDescriptor_t, - zDesc: cudnnTensorDescriptor_t, - yDesc: cudnnTensorDescriptor_t, - normScaleBiasDesc: cudnnTensorDescriptor_t, - activationDesc: cudnnActivationDescriptor_t, - normMeanVarDesc: cudnnTensorDescriptor_t, - sizeInBytes: *mut usize, - groupCnt: ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetNormalizationBackwardWorkspaceSize( - handle: cudnnHandle_t, - mode: cudnnNormMode_t, - normOps: cudnnNormOps_t, - algo: cudnnNormAlgo_t, - xDesc: cudnnTensorDescriptor_t, - yDesc: cudnnTensorDescriptor_t, - dyDesc: cudnnTensorDescriptor_t, - dzDesc: cudnnTensorDescriptor_t, - dxDesc: cudnnTensorDescriptor_t, - dNormScaleBiasDesc: cudnnTensorDescriptor_t, - activationDesc: cudnnActivationDescriptor_t, - normMeanVarDesc: cudnnTensorDescriptor_t, - sizeInBytes: *mut usize, - groupCnt: ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetNormalizationTrainingReserveSpaceSize( - handle: cudnnHandle_t, - mode: cudnnNormMode_t, - normOps: cudnnNormOps_t, - algo: cudnnNormAlgo_t, - activationDesc: cudnnActivationDescriptor_t, - xDesc: cudnnTensorDescriptor_t, - sizeInBytes: *mut usize, - groupCnt: ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnNormalizationForwardTraining( - handle: cudnnHandle_t, - mode: cudnnNormMode_t, - normOps: cudnnNormOps_t, - algo: cudnnNormAlgo_t, - alpha: *const ::std::os::raw::c_void, - beta: *const ::std::os::raw::c_void, - xDesc: cudnnTensorDescriptor_t, - xData: *const ::std::os::raw::c_void, - normScaleBiasDesc: cudnnTensorDescriptor_t, - normScale: *const ::std::os::raw::c_void, - normBias: *const ::std::os::raw::c_void, - exponentialAverageFactor: f64, - normMeanVarDesc: cudnnTensorDescriptor_t, - resultRunningMean: *mut ::std::os::raw::c_void, - resultRunningVariance: *mut ::std::os::raw::c_void, - epsilon: f64, - resultSaveMean: *mut ::std::os::raw::c_void, - resultSaveInvVariance: *mut ::std::os::raw::c_void, - activationDesc: cudnnActivationDescriptor_t, - zDesc: cudnnTensorDescriptor_t, - zData: *const ::std::os::raw::c_void, - yDesc: cudnnTensorDescriptor_t, - yData: *mut ::std::os::raw::c_void, - workspace: *mut ::std::os::raw::c_void, - workSpaceSizeInBytes: usize, - reserveSpace: *mut ::std::os::raw::c_void, - reserveSpaceSizeInBytes: usize, - groupCnt: ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnNormalizationBackward( - handle: cudnnHandle_t, - mode: cudnnNormMode_t, - normOps: cudnnNormOps_t, - algo: cudnnNormAlgo_t, - alphaDataDiff: *const ::std::os::raw::c_void, - betaDataDiff: *const ::std::os::raw::c_void, - alphaParamDiff: *const ::std::os::raw::c_void, - betaParamDiff: *const ::std::os::raw::c_void, - xDesc: cudnnTensorDescriptor_t, - xData: *const ::std::os::raw::c_void, - yDesc: cudnnTensorDescriptor_t, - yData: *const ::std::os::raw::c_void, - dyDesc: cudnnTensorDescriptor_t, - dyData: *const ::std::os::raw::c_void, - dzDesc: cudnnTensorDescriptor_t, - dzData: *mut ::std::os::raw::c_void, - dxDesc: cudnnTensorDescriptor_t, - dxData: *mut ::std::os::raw::c_void, - dNormScaleBiasDesc: cudnnTensorDescriptor_t, - normScaleData: *const ::std::os::raw::c_void, - normBiasData: *const ::std::os::raw::c_void, - dNormScaleData: *mut ::std::os::raw::c_void, - dNormBiasData: *mut ::std::os::raw::c_void, - epsilon: f64, - normMeanVarDesc: cudnnTensorDescriptor_t, - savedMean: *const ::std::os::raw::c_void, - savedInvVariance: *const ::std::os::raw::c_void, - activationDesc: cudnnActivationDescriptor_t, - workSpace: *mut ::std::os::raw::c_void, - workSpaceSizeInBytes: usize, - reserveSpace: *mut ::std::os::raw::c_void, - reserveSpaceSizeInBytes: usize, - groupCnt: ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSpatialTfGridGeneratorBackward( - handle: cudnnHandle_t, - stDesc: cudnnSpatialTransformerDescriptor_t, - dgrid: *const ::std::os::raw::c_void, - dtheta: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSpatialTfSamplerBackward( - handle: cudnnHandle_t, - stDesc: cudnnSpatialTransformerDescriptor_t, - alpha: *const ::std::os::raw::c_void, - xDesc: cudnnTensorDescriptor_t, - x: *const ::std::os::raw::c_void, - beta: *const ::std::os::raw::c_void, - dxDesc: cudnnTensorDescriptor_t, - dx: *mut ::std::os::raw::c_void, - alphaDgrid: *const ::std::os::raw::c_void, - dyDesc: cudnnTensorDescriptor_t, - dy: *const ::std::os::raw::c_void, - grid: *const ::std::os::raw::c_void, - betaDgrid: *const ::std::os::raw::c_void, - dgrid: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnDropoutBackward( - handle: cudnnHandle_t, - dropoutDesc: cudnnDropoutDescriptor_t, - dydesc: cudnnTensorDescriptor_t, - dy: *const ::std::os::raw::c_void, - dxdesc: cudnnTensorDescriptor_t, - dx: *mut ::std::os::raw::c_void, - reserveSpace: *mut ::std::os::raw::c_void, - reserveSpaceSizeInBytes: usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnOpsTrainVersionCheck() -> cudnnStatus_t; -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnForwardMode_t { - CUDNN_FWD_MODE_INFERENCE = 0, - CUDNN_FWD_MODE_TRAINING = 1, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnRNNMode_t { - CUDNN_RNN_RELU = 0, - CUDNN_RNN_TANH = 1, - CUDNN_LSTM = 2, - CUDNN_GRU = 3, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnRNNBiasMode_t { - CUDNN_RNN_NO_BIAS = 0, - CUDNN_RNN_SINGLE_INP_BIAS = 1, - CUDNN_RNN_DOUBLE_BIAS = 2, - CUDNN_RNN_SINGLE_REC_BIAS = 3, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnDirectionMode_t { - CUDNN_UNIDIRECTIONAL = 0, - CUDNN_BIDIRECTIONAL = 1, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnRNNInputMode_t { - CUDNN_LINEAR_INPUT = 0, - CUDNN_SKIP_INPUT = 1, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnRNNClipMode_t { - CUDNN_RNN_CLIP_NONE = 0, - CUDNN_RNN_CLIP_MINMAX = 1, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnRNNDataLayout_t { - CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED = 0, - CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_PACKED = 1, - CUDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED = 2, -} -pub type cudnnRNNPaddingMode_t = ::std::os::raw::c_uint; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cudnnRNNStruct { - _unused: [u8; 0], -} -pub type cudnnRNNDescriptor_t = *mut cudnnRNNStruct; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cudnnPersistentRNNPlan { - _unused: [u8; 0], -} -pub type cudnnPersistentRNNPlan_t = *mut cudnnPersistentRNNPlan; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cudnnRNNDataStruct { - _unused: [u8; 0], -} -pub type cudnnRNNDataDescriptor_t = *mut cudnnRNNDataStruct; -extern "C" { - pub fn cudnnCreateRNNDescriptor(rnnDesc: *mut cudnnRNNDescriptor_t) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnDestroyRNNDescriptor(rnnDesc: cudnnRNNDescriptor_t) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetRNNDescriptor_v8( - rnnDesc: cudnnRNNDescriptor_t, - algo: cudnnRNNAlgo_t, - cellMode: cudnnRNNMode_t, - biasMode: cudnnRNNBiasMode_t, - dirMode: cudnnDirectionMode_t, - inputMode: cudnnRNNInputMode_t, - dataType: cudnnDataType_t, - mathPrec: cudnnDataType_t, - mathType: cudnnMathType_t, - inputSize: i32, - hiddenSize: i32, - projSize: i32, - numLayers: i32, - dropoutDesc: cudnnDropoutDescriptor_t, - auxFlags: u32, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetRNNDescriptor_v8( - rnnDesc: cudnnRNNDescriptor_t, - algo: *mut cudnnRNNAlgo_t, - cellMode: *mut cudnnRNNMode_t, - biasMode: *mut cudnnRNNBiasMode_t, - dirMode: *mut cudnnDirectionMode_t, - inputMode: *mut cudnnRNNInputMode_t, - dataType: *mut cudnnDataType_t, - mathPrec: *mut cudnnDataType_t, - mathType: *mut cudnnMathType_t, - inputSize: *mut i32, - hiddenSize: *mut i32, - projSize: *mut i32, - numLayers: *mut i32, - dropoutDesc: *mut cudnnDropoutDescriptor_t, - auxFlags: *mut u32, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetRNNDescriptor_v6( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - hiddenSize: ::std::os::raw::c_int, - numLayers: ::std::os::raw::c_int, - dropoutDesc: cudnnDropoutDescriptor_t, - inputMode: cudnnRNNInputMode_t, - direction: cudnnDirectionMode_t, - cellMode: cudnnRNNMode_t, - algo: cudnnRNNAlgo_t, - mathPrec: cudnnDataType_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetRNNDescriptor_v6( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - hiddenSize: *mut ::std::os::raw::c_int, - numLayers: *mut ::std::os::raw::c_int, - dropoutDesc: *mut cudnnDropoutDescriptor_t, - inputMode: *mut cudnnRNNInputMode_t, - direction: *mut cudnnDirectionMode_t, - cellMode: *mut cudnnRNNMode_t, - algo: *mut cudnnRNNAlgo_t, - mathPrec: *mut cudnnDataType_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetRNNMatrixMathType( - rnnDesc: cudnnRNNDescriptor_t, - mType: cudnnMathType_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetRNNMatrixMathType( - rnnDesc: cudnnRNNDescriptor_t, - mType: *mut cudnnMathType_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetRNNBiasMode( - rnnDesc: cudnnRNNDescriptor_t, - biasMode: cudnnRNNBiasMode_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetRNNBiasMode( - rnnDesc: cudnnRNNDescriptor_t, - biasMode: *mut cudnnRNNBiasMode_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnRNNSetClip_v8( - rnnDesc: cudnnRNNDescriptor_t, - clipMode: cudnnRNNClipMode_t, - clipNanOpt: cudnnNanPropagation_t, - lclip: f64, - rclip: f64, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnRNNGetClip_v8( - rnnDesc: cudnnRNNDescriptor_t, - clipMode: *mut cudnnRNNClipMode_t, - clipNanOpt: *mut cudnnNanPropagation_t, - lclip: *mut f64, - rclip: *mut f64, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnRNNSetClip( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - clipMode: cudnnRNNClipMode_t, - clipNanOpt: cudnnNanPropagation_t, - lclip: f64, - rclip: f64, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnRNNGetClip( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - clipMode: *mut cudnnRNNClipMode_t, - clipNanOpt: *mut cudnnNanPropagation_t, - lclip: *mut f64, - rclip: *mut f64, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetRNNProjectionLayers( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - recProjSize: ::std::os::raw::c_int, - outProjSize: ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetRNNProjectionLayers( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - recProjSize: *mut ::std::os::raw::c_int, - outProjSize: *mut ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnCreatePersistentRNNPlan( - rnnDesc: cudnnRNNDescriptor_t, - minibatch: ::std::os::raw::c_int, - dataType: cudnnDataType_t, - plan: *mut cudnnPersistentRNNPlan_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnDestroyPersistentRNNPlan(plan: cudnnPersistentRNNPlan_t) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetPersistentRNNPlan( - rnnDesc: cudnnRNNDescriptor_t, - plan: cudnnPersistentRNNPlan_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnBuildRNNDynamic( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - miniBatch: ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetRNNWorkspaceSize( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - seqLength: ::std::os::raw::c_int, - xDesc: *const cudnnTensorDescriptor_t, - sizeInBytes: *mut usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetRNNTrainingReserveSize( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - seqLength: ::std::os::raw::c_int, - xDesc: *const cudnnTensorDescriptor_t, - sizeInBytes: *mut usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetRNNTempSpaceSizes( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - fMode: cudnnForwardMode_t, - xDesc: cudnnRNNDataDescriptor_t, - workSpaceSize: *mut usize, - reserveSpaceSize: *mut usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetRNNParamsSize( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - xDesc: cudnnTensorDescriptor_t, - sizeInBytes: *mut usize, - dataType: cudnnDataType_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetRNNWeightSpaceSize( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - weightSpaceSize: *mut usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetRNNLinLayerMatrixParams( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - pseudoLayer: ::std::os::raw::c_int, - xDesc: cudnnTensorDescriptor_t, - wDesc: cudnnFilterDescriptor_t, - w: *const ::std::os::raw::c_void, - linLayerID: ::std::os::raw::c_int, - linLayerMatDesc: cudnnFilterDescriptor_t, - linLayerMat: *mut *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetRNNLinLayerBiasParams( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - pseudoLayer: ::std::os::raw::c_int, - xDesc: cudnnTensorDescriptor_t, - wDesc: cudnnFilterDescriptor_t, - w: *const ::std::os::raw::c_void, - linLayerID: ::std::os::raw::c_int, - linLayerBiasDesc: cudnnFilterDescriptor_t, - linLayerBias: *mut *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetRNNWeightParams( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - pseudoLayer: i32, - weightSpaceSize: usize, - weightSpace: *const ::std::os::raw::c_void, - linLayerID: i32, - mDesc: cudnnTensorDescriptor_t, - mAddr: *mut *mut ::std::os::raw::c_void, - bDesc: cudnnTensorDescriptor_t, - bAddr: *mut *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnRNNForwardInference( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - seqLength: ::std::os::raw::c_int, - xDesc: *const cudnnTensorDescriptor_t, - x: *const ::std::os::raw::c_void, - hxDesc: cudnnTensorDescriptor_t, - hx: *const ::std::os::raw::c_void, - cxDesc: cudnnTensorDescriptor_t, - cx: *const ::std::os::raw::c_void, - wDesc: cudnnFilterDescriptor_t, - w: *const ::std::os::raw::c_void, - yDesc: *const cudnnTensorDescriptor_t, - y: *mut ::std::os::raw::c_void, - hyDesc: cudnnTensorDescriptor_t, - hy: *mut ::std::os::raw::c_void, - cyDesc: cudnnTensorDescriptor_t, - cy: *mut ::std::os::raw::c_void, - workSpace: *mut ::std::os::raw::c_void, - workSpaceSizeInBytes: usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetRNNPaddingMode( - rnnDesc: cudnnRNNDescriptor_t, - paddingMode: ::std::os::raw::c_uint, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetRNNPaddingMode( - rnnDesc: cudnnRNNDescriptor_t, - paddingMode: *mut ::std::os::raw::c_uint, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnCreateRNNDataDescriptor( - rnnDataDesc: *mut cudnnRNNDataDescriptor_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnDestroyRNNDataDescriptor(rnnDataDesc: cudnnRNNDataDescriptor_t) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetRNNDataDescriptor( - rnnDataDesc: cudnnRNNDataDescriptor_t, - dataType: cudnnDataType_t, - layout: cudnnRNNDataLayout_t, - maxSeqLength: ::std::os::raw::c_int, - batchSize: ::std::os::raw::c_int, - vectorSize: ::std::os::raw::c_int, - seqLengthArray: *const ::std::os::raw::c_int, - paddingFill: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetRNNDataDescriptor( - rnnDataDesc: cudnnRNNDataDescriptor_t, - dataType: *mut cudnnDataType_t, - layout: *mut cudnnRNNDataLayout_t, - maxSeqLength: *mut ::std::os::raw::c_int, - batchSize: *mut ::std::os::raw::c_int, - vectorSize: *mut ::std::os::raw::c_int, - arrayLengthRequested: ::std::os::raw::c_int, - seqLengthArray: *mut ::std::os::raw::c_int, - paddingFill: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnRNNForwardInferenceEx( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - xDesc: cudnnRNNDataDescriptor_t, - x: *const ::std::os::raw::c_void, - hxDesc: cudnnTensorDescriptor_t, - hx: *const ::std::os::raw::c_void, - cxDesc: cudnnTensorDescriptor_t, - cx: *const ::std::os::raw::c_void, - wDesc: cudnnFilterDescriptor_t, - w: *const ::std::os::raw::c_void, - yDesc: cudnnRNNDataDescriptor_t, - y: *mut ::std::os::raw::c_void, - hyDesc: cudnnTensorDescriptor_t, - hy: *mut ::std::os::raw::c_void, - cyDesc: cudnnTensorDescriptor_t, - cy: *mut ::std::os::raw::c_void, - kDesc: cudnnRNNDataDescriptor_t, - keys: *const ::std::os::raw::c_void, - cDesc: cudnnRNNDataDescriptor_t, - cAttn: *mut ::std::os::raw::c_void, - iDesc: cudnnRNNDataDescriptor_t, - iAttn: *mut ::std::os::raw::c_void, - qDesc: cudnnRNNDataDescriptor_t, - queries: *mut ::std::os::raw::c_void, - workSpace: *mut ::std::os::raw::c_void, - workSpaceSizeInBytes: usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnRNNForward( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - fwdMode: cudnnForwardMode_t, - devSeqLengths: *const i32, - xDesc: cudnnRNNDataDescriptor_t, - x: *const ::std::os::raw::c_void, - yDesc: cudnnRNNDataDescriptor_t, - y: *mut ::std::os::raw::c_void, - hDesc: cudnnTensorDescriptor_t, - hx: *const ::std::os::raw::c_void, - hy: *mut ::std::os::raw::c_void, - cDesc: cudnnTensorDescriptor_t, - cx: *const ::std::os::raw::c_void, - cy: *mut ::std::os::raw::c_void, - weightSpaceSize: usize, - weightSpace: *const ::std::os::raw::c_void, - workSpaceSize: usize, - workSpace: *mut ::std::os::raw::c_void, - reserveSpaceSize: usize, - reserveSpace: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetRNNAlgorithmDescriptor( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - algoDesc: cudnnAlgorithmDescriptor_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetRNNForwardInferenceAlgorithmMaxCount( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - count: *mut ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnFindRNNForwardInferenceAlgorithmEx( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - seqLength: ::std::os::raw::c_int, - xDesc: *const cudnnTensorDescriptor_t, - x: *const ::std::os::raw::c_void, - hxDesc: cudnnTensorDescriptor_t, - hx: *const ::std::os::raw::c_void, - cxDesc: cudnnTensorDescriptor_t, - cx: *const ::std::os::raw::c_void, - wDesc: cudnnFilterDescriptor_t, - w: *const ::std::os::raw::c_void, - yDesc: *const cudnnTensorDescriptor_t, - y: *mut ::std::os::raw::c_void, - hyDesc: cudnnTensorDescriptor_t, - hy: *mut ::std::os::raw::c_void, - cyDesc: cudnnTensorDescriptor_t, - cy: *mut ::std::os::raw::c_void, - findIntensity: f32, - requestedAlgoCount: ::std::os::raw::c_int, - returnedAlgoCount: *mut ::std::os::raw::c_int, - perfResults: *mut cudnnAlgorithmPerformance_t, - workspace: *mut ::std::os::raw::c_void, - workSpaceSizeInBytes: usize, - ) -> cudnnStatus_t; -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnSeqDataAxis_t { - CUDNN_SEQDATA_TIME_DIM = 0, - CUDNN_SEQDATA_BATCH_DIM = 1, - CUDNN_SEQDATA_BEAM_DIM = 2, - CUDNN_SEQDATA_VECT_DIM = 3, -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cudnnSeqDataStruct { - _unused: [u8; 0], -} -pub type cudnnSeqDataDescriptor_t = *mut cudnnSeqDataStruct; -extern "C" { - pub fn cudnnCreateSeqDataDescriptor( - seqDataDesc: *mut cudnnSeqDataDescriptor_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnDestroySeqDataDescriptor(seqDataDesc: cudnnSeqDataDescriptor_t) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetSeqDataDescriptor( - seqDataDesc: cudnnSeqDataDescriptor_t, - dataType: cudnnDataType_t, - nbDims: ::std::os::raw::c_int, - dimA: *const ::std::os::raw::c_int, - axes: *const cudnnSeqDataAxis_t, - seqLengthArraySize: usize, - seqLengthArray: *const ::std::os::raw::c_int, - paddingFill: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetSeqDataDescriptor( - seqDataDesc: cudnnSeqDataDescriptor_t, - dataType: *mut cudnnDataType_t, - nbDims: *mut ::std::os::raw::c_int, - nbDimsRequested: ::std::os::raw::c_int, - dimA: *mut ::std::os::raw::c_int, - axes: *mut cudnnSeqDataAxis_t, - seqLengthArraySize: *mut usize, - seqLengthSizeRequested: usize, - seqLengthArray: *mut ::std::os::raw::c_int, - paddingFill: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -pub type cudnnAttnQueryMap_t = ::std::os::raw::c_uint; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cudnnAttnStruct { - _unused: [u8; 0], -} -pub type cudnnAttnDescriptor_t = *mut cudnnAttnStruct; -extern "C" { - pub fn cudnnCreateAttnDescriptor(attnDesc: *mut cudnnAttnDescriptor_t) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnDestroyAttnDescriptor(attnDesc: cudnnAttnDescriptor_t) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetAttnDescriptor( - attnDesc: cudnnAttnDescriptor_t, - attnMode: ::std::os::raw::c_uint, - nHeads: ::std::os::raw::c_int, - smScaler: f64, - dataType: cudnnDataType_t, - computePrec: cudnnDataType_t, - mathType: cudnnMathType_t, - attnDropoutDesc: cudnnDropoutDescriptor_t, - postDropoutDesc: cudnnDropoutDescriptor_t, - qSize: ::std::os::raw::c_int, - kSize: ::std::os::raw::c_int, - vSize: ::std::os::raw::c_int, - qProjSize: ::std::os::raw::c_int, - kProjSize: ::std::os::raw::c_int, - vProjSize: ::std::os::raw::c_int, - oProjSize: ::std::os::raw::c_int, - qoMaxSeqLength: ::std::os::raw::c_int, - kvMaxSeqLength: ::std::os::raw::c_int, - maxBatchSize: ::std::os::raw::c_int, - maxBeamSize: ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetAttnDescriptor( - attnDesc: cudnnAttnDescriptor_t, - attnMode: *mut ::std::os::raw::c_uint, - nHeads: *mut ::std::os::raw::c_int, - smScaler: *mut f64, - dataType: *mut cudnnDataType_t, - computePrec: *mut cudnnDataType_t, - mathType: *mut cudnnMathType_t, - attnDropoutDesc: *mut cudnnDropoutDescriptor_t, - postDropoutDesc: *mut cudnnDropoutDescriptor_t, - qSize: *mut ::std::os::raw::c_int, - kSize: *mut ::std::os::raw::c_int, - vSize: *mut ::std::os::raw::c_int, - qProjSize: *mut ::std::os::raw::c_int, - kProjSize: *mut ::std::os::raw::c_int, - vProjSize: *mut ::std::os::raw::c_int, - oProjSize: *mut ::std::os::raw::c_int, - qoMaxSeqLength: *mut ::std::os::raw::c_int, - kvMaxSeqLength: *mut ::std::os::raw::c_int, - maxBatchSize: *mut ::std::os::raw::c_int, - maxBeamSize: *mut ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetMultiHeadAttnBuffers( - handle: cudnnHandle_t, - attnDesc: cudnnAttnDescriptor_t, - weightSizeInBytes: *mut usize, - workSpaceSizeInBytes: *mut usize, - reserveSpaceSizeInBytes: *mut usize, - ) -> cudnnStatus_t; -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnMultiHeadAttnWeightKind_t { - CUDNN_MH_ATTN_Q_WEIGHTS = 0, - CUDNN_MH_ATTN_K_WEIGHTS = 1, - CUDNN_MH_ATTN_V_WEIGHTS = 2, - CUDNN_MH_ATTN_O_WEIGHTS = 3, - CUDNN_MH_ATTN_Q_BIASES = 4, - CUDNN_MH_ATTN_K_BIASES = 5, - CUDNN_MH_ATTN_V_BIASES = 6, - CUDNN_MH_ATTN_O_BIASES = 7, -} -extern "C" { - pub fn cudnnGetMultiHeadAttnWeights( - handle: cudnnHandle_t, - attnDesc: cudnnAttnDescriptor_t, - wKind: cudnnMultiHeadAttnWeightKind_t, - weightSizeInBytes: usize, - weights: *const ::std::os::raw::c_void, - wDesc: cudnnTensorDescriptor_t, - wAddr: *mut *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnMultiHeadAttnForward( - handle: cudnnHandle_t, - attnDesc: cudnnAttnDescriptor_t, - currIdx: ::std::os::raw::c_int, - loWinIdx: *const ::std::os::raw::c_int, - hiWinIdx: *const ::std::os::raw::c_int, - devSeqLengthsQO: *const ::std::os::raw::c_int, - devSeqLengthsKV: *const ::std::os::raw::c_int, - qDesc: cudnnSeqDataDescriptor_t, - queries: *const ::std::os::raw::c_void, - residuals: *const ::std::os::raw::c_void, - kDesc: cudnnSeqDataDescriptor_t, - keys: *const ::std::os::raw::c_void, - vDesc: cudnnSeqDataDescriptor_t, - values: *const ::std::os::raw::c_void, - oDesc: cudnnSeqDataDescriptor_t, - out: *mut ::std::os::raw::c_void, - weightSizeInBytes: usize, - weights: *const ::std::os::raw::c_void, - workSpaceSizeInBytes: usize, - workSpace: *mut ::std::os::raw::c_void, - reserveSpaceSizeInBytes: usize, - reserveSpace: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnAdvInferVersionCheck() -> cudnnStatus_t; -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnWgradMode_t { - CUDNN_WGRAD_MODE_ADD = 0, - CUDNN_WGRAD_MODE_SET = 1, -} -extern "C" { - pub fn cudnnRNNForwardTraining( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - seqLength: ::std::os::raw::c_int, - xDesc: *const cudnnTensorDescriptor_t, - x: *const ::std::os::raw::c_void, - hxDesc: cudnnTensorDescriptor_t, - hx: *const ::std::os::raw::c_void, - cxDesc: cudnnTensorDescriptor_t, - cx: *const ::std::os::raw::c_void, - wDesc: cudnnFilterDescriptor_t, - w: *const ::std::os::raw::c_void, - yDesc: *const cudnnTensorDescriptor_t, - y: *mut ::std::os::raw::c_void, - hyDesc: cudnnTensorDescriptor_t, - hy: *mut ::std::os::raw::c_void, - cyDesc: cudnnTensorDescriptor_t, - cy: *mut ::std::os::raw::c_void, - workSpace: *mut ::std::os::raw::c_void, - workSpaceSizeInBytes: usize, - reserveSpace: *mut ::std::os::raw::c_void, - reserveSpaceSizeInBytes: usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnRNNBackwardData( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - seqLength: ::std::os::raw::c_int, - yDesc: *const cudnnTensorDescriptor_t, - y: *const ::std::os::raw::c_void, - dyDesc: *const cudnnTensorDescriptor_t, - dy: *const ::std::os::raw::c_void, - dhyDesc: cudnnTensorDescriptor_t, - dhy: *const ::std::os::raw::c_void, - dcyDesc: cudnnTensorDescriptor_t, - dcy: *const ::std::os::raw::c_void, - wDesc: cudnnFilterDescriptor_t, - w: *const ::std::os::raw::c_void, - hxDesc: cudnnTensorDescriptor_t, - hx: *const ::std::os::raw::c_void, - cxDesc: cudnnTensorDescriptor_t, - cx: *const ::std::os::raw::c_void, - dxDesc: *const cudnnTensorDescriptor_t, - dx: *mut ::std::os::raw::c_void, - dhxDesc: cudnnTensorDescriptor_t, - dhx: *mut ::std::os::raw::c_void, - dcxDesc: cudnnTensorDescriptor_t, - dcx: *mut ::std::os::raw::c_void, - workSpace: *mut ::std::os::raw::c_void, - workSpaceSizeInBytes: usize, - reserveSpace: *mut ::std::os::raw::c_void, - reserveSpaceSizeInBytes: usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnRNNBackwardData_v8( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - devSeqLengths: *const i32, - yDesc: cudnnRNNDataDescriptor_t, - y: *const ::std::os::raw::c_void, - dy: *const ::std::os::raw::c_void, - xDesc: cudnnRNNDataDescriptor_t, - dx: *mut ::std::os::raw::c_void, - hDesc: cudnnTensorDescriptor_t, - hx: *const ::std::os::raw::c_void, - dhy: *const ::std::os::raw::c_void, - dhx: *mut ::std::os::raw::c_void, - cDesc: cudnnTensorDescriptor_t, - cx: *const ::std::os::raw::c_void, - dcy: *const ::std::os::raw::c_void, - dcx: *mut ::std::os::raw::c_void, - weightSpaceSize: usize, - weightSpace: *const ::std::os::raw::c_void, - workSpaceSize: usize, - workSpace: *mut ::std::os::raw::c_void, - reserveSpaceSize: usize, - reserveSpace: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnRNNBackwardWeights( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - seqLength: ::std::os::raw::c_int, - xDesc: *const cudnnTensorDescriptor_t, - x: *const ::std::os::raw::c_void, - hxDesc: cudnnTensorDescriptor_t, - hx: *const ::std::os::raw::c_void, - yDesc: *const cudnnTensorDescriptor_t, - y: *const ::std::os::raw::c_void, - workSpace: *const ::std::os::raw::c_void, - workSpaceSizeInBytes: usize, - dwDesc: cudnnFilterDescriptor_t, - dw: *mut ::std::os::raw::c_void, - reserveSpace: *const ::std::os::raw::c_void, - reserveSpaceSizeInBytes: usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnRNNBackwardWeights_v8( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - addGrad: cudnnWgradMode_t, - devSeqLengths: *const i32, - xDesc: cudnnRNNDataDescriptor_t, - x: *const ::std::os::raw::c_void, - hDesc: cudnnTensorDescriptor_t, - hx: *const ::std::os::raw::c_void, - yDesc: cudnnRNNDataDescriptor_t, - y: *const ::std::os::raw::c_void, - weightSpaceSize: usize, - dweightSpace: *mut ::std::os::raw::c_void, - workSpaceSize: usize, - workSpace: *mut ::std::os::raw::c_void, - reserveSpaceSize: usize, - reserveSpace: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnRNNForwardTrainingEx( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - xDesc: cudnnRNNDataDescriptor_t, - x: *const ::std::os::raw::c_void, - hxDesc: cudnnTensorDescriptor_t, - hx: *const ::std::os::raw::c_void, - cxDesc: cudnnTensorDescriptor_t, - cx: *const ::std::os::raw::c_void, - wDesc: cudnnFilterDescriptor_t, - w: *const ::std::os::raw::c_void, - yDesc: cudnnRNNDataDescriptor_t, - y: *mut ::std::os::raw::c_void, - hyDesc: cudnnTensorDescriptor_t, - hy: *mut ::std::os::raw::c_void, - cyDesc: cudnnTensorDescriptor_t, - cy: *mut ::std::os::raw::c_void, - kDesc: cudnnRNNDataDescriptor_t, - keys: *const ::std::os::raw::c_void, - cDesc: cudnnRNNDataDescriptor_t, - cAttn: *mut ::std::os::raw::c_void, - iDesc: cudnnRNNDataDescriptor_t, - iAttn: *mut ::std::os::raw::c_void, - qDesc: cudnnRNNDataDescriptor_t, - queries: *mut ::std::os::raw::c_void, - workSpace: *mut ::std::os::raw::c_void, - workSpaceSizeInBytes: usize, - reserveSpace: *mut ::std::os::raw::c_void, - reserveSpaceSizeInBytes: usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnRNNBackwardDataEx( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - yDesc: cudnnRNNDataDescriptor_t, - y: *const ::std::os::raw::c_void, - dyDesc: cudnnRNNDataDescriptor_t, - dy: *const ::std::os::raw::c_void, - dcDesc: cudnnRNNDataDescriptor_t, - dcAttn: *const ::std::os::raw::c_void, - dhyDesc: cudnnTensorDescriptor_t, - dhy: *const ::std::os::raw::c_void, - dcyDesc: cudnnTensorDescriptor_t, - dcy: *const ::std::os::raw::c_void, - wDesc: cudnnFilterDescriptor_t, - w: *const ::std::os::raw::c_void, - hxDesc: cudnnTensorDescriptor_t, - hx: *const ::std::os::raw::c_void, - cxDesc: cudnnTensorDescriptor_t, - cx: *const ::std::os::raw::c_void, - dxDesc: cudnnRNNDataDescriptor_t, - dx: *mut ::std::os::raw::c_void, - dhxDesc: cudnnTensorDescriptor_t, - dhx: *mut ::std::os::raw::c_void, - dcxDesc: cudnnTensorDescriptor_t, - dcx: *mut ::std::os::raw::c_void, - dkDesc: cudnnRNNDataDescriptor_t, - dkeys: *mut ::std::os::raw::c_void, - workSpace: *mut ::std::os::raw::c_void, - workSpaceSizeInBytes: usize, - reserveSpace: *mut ::std::os::raw::c_void, - reserveSpaceSizeInBytes: usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnRNNBackwardWeightsEx( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - xDesc: cudnnRNNDataDescriptor_t, - x: *const ::std::os::raw::c_void, - hxDesc: cudnnTensorDescriptor_t, - hx: *const ::std::os::raw::c_void, - yDesc: cudnnRNNDataDescriptor_t, - y: *const ::std::os::raw::c_void, - workSpace: *mut ::std::os::raw::c_void, - workSpaceSizeInBytes: usize, - dwDesc: cudnnFilterDescriptor_t, - dw: *mut ::std::os::raw::c_void, - reserveSpace: *mut ::std::os::raw::c_void, - reserveSpaceSizeInBytes: usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetRNNForwardTrainingAlgorithmMaxCount( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - count: *mut ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnFindRNNForwardTrainingAlgorithmEx( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - seqLength: ::std::os::raw::c_int, - xDesc: *const cudnnTensorDescriptor_t, - x: *const ::std::os::raw::c_void, - hxDesc: cudnnTensorDescriptor_t, - hx: *const ::std::os::raw::c_void, - cxDesc: cudnnTensorDescriptor_t, - cx: *const ::std::os::raw::c_void, - wDesc: cudnnFilterDescriptor_t, - w: *const ::std::os::raw::c_void, - yDesc: *const cudnnTensorDescriptor_t, - y: *mut ::std::os::raw::c_void, - hyDesc: cudnnTensorDescriptor_t, - hy: *mut ::std::os::raw::c_void, - cyDesc: cudnnTensorDescriptor_t, - cy: *mut ::std::os::raw::c_void, - findIntensity: f32, - requestedAlgoCount: ::std::os::raw::c_int, - returnedAlgoCount: *mut ::std::os::raw::c_int, - perfResults: *mut cudnnAlgorithmPerformance_t, - workspace: *mut ::std::os::raw::c_void, - workSpaceSizeInBytes: usize, - reserveSpace: *mut ::std::os::raw::c_void, - reserveSpaceSizeInBytes: usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetRNNBackwardDataAlgorithmMaxCount( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - count: *mut ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnFindRNNBackwardDataAlgorithmEx( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - seqLength: ::std::os::raw::c_int, - yDesc: *const cudnnTensorDescriptor_t, - y: *const ::std::os::raw::c_void, - dyDesc: *const cudnnTensorDescriptor_t, - dy: *const ::std::os::raw::c_void, - dhyDesc: cudnnTensorDescriptor_t, - dhy: *const ::std::os::raw::c_void, - dcyDesc: cudnnTensorDescriptor_t, - dcy: *const ::std::os::raw::c_void, - wDesc: cudnnFilterDescriptor_t, - w: *const ::std::os::raw::c_void, - hxDesc: cudnnTensorDescriptor_t, - hx: *const ::std::os::raw::c_void, - cxDesc: cudnnTensorDescriptor_t, - cx: *const ::std::os::raw::c_void, - dxDesc: *const cudnnTensorDescriptor_t, - dx: *mut ::std::os::raw::c_void, - dhxDesc: cudnnTensorDescriptor_t, - dhx: *mut ::std::os::raw::c_void, - dcxDesc: cudnnTensorDescriptor_t, - dcx: *mut ::std::os::raw::c_void, - findIntensity: f32, - requestedAlgoCount: ::std::os::raw::c_int, - returnedAlgoCount: *mut ::std::os::raw::c_int, - perfResults: *mut cudnnAlgorithmPerformance_t, - workspace: *mut ::std::os::raw::c_void, - workSpaceSizeInBytes: usize, - reserveSpace: *mut ::std::os::raw::c_void, - reserveSpaceSizeInBytes: usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetRNNBackwardWeightsAlgorithmMaxCount( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - count: *mut ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnFindRNNBackwardWeightsAlgorithmEx( - handle: cudnnHandle_t, - rnnDesc: cudnnRNNDescriptor_t, - seqLength: ::std::os::raw::c_int, - xDesc: *const cudnnTensorDescriptor_t, - x: *const ::std::os::raw::c_void, - hxDesc: cudnnTensorDescriptor_t, - hx: *const ::std::os::raw::c_void, - yDesc: *const cudnnTensorDescriptor_t, - y: *const ::std::os::raw::c_void, - findIntensity: f32, - requestedAlgoCount: ::std::os::raw::c_int, - returnedAlgoCount: *mut ::std::os::raw::c_int, - perfResults: *mut cudnnAlgorithmPerformance_t, - workspace: *const ::std::os::raw::c_void, - workSpaceSizeInBytes: usize, - dwDesc: cudnnFilterDescriptor_t, - dw: *mut ::std::os::raw::c_void, - reserveSpace: *const ::std::os::raw::c_void, - reserveSpaceSizeInBytes: usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnMultiHeadAttnBackwardData( - handle: cudnnHandle_t, - attnDesc: cudnnAttnDescriptor_t, - loWinIdx: *const ::std::os::raw::c_int, - hiWinIdx: *const ::std::os::raw::c_int, - devSeqLengthsDQDO: *const ::std::os::raw::c_int, - devSeqLengthsDKDV: *const ::std::os::raw::c_int, - doDesc: cudnnSeqDataDescriptor_t, - dout: *const ::std::os::raw::c_void, - dqDesc: cudnnSeqDataDescriptor_t, - dqueries: *mut ::std::os::raw::c_void, - queries: *const ::std::os::raw::c_void, - dkDesc: cudnnSeqDataDescriptor_t, - dkeys: *mut ::std::os::raw::c_void, - keys: *const ::std::os::raw::c_void, - dvDesc: cudnnSeqDataDescriptor_t, - dvalues: *mut ::std::os::raw::c_void, - values: *const ::std::os::raw::c_void, - weightSizeInBytes: usize, - weights: *const ::std::os::raw::c_void, - workSpaceSizeInBytes: usize, - workSpace: *mut ::std::os::raw::c_void, - reserveSpaceSizeInBytes: usize, - reserveSpace: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnMultiHeadAttnBackwardWeights( - handle: cudnnHandle_t, - attnDesc: cudnnAttnDescriptor_t, - addGrad: cudnnWgradMode_t, - qDesc: cudnnSeqDataDescriptor_t, - queries: *const ::std::os::raw::c_void, - kDesc: cudnnSeqDataDescriptor_t, - keys: *const ::std::os::raw::c_void, - vDesc: cudnnSeqDataDescriptor_t, - values: *const ::std::os::raw::c_void, - doDesc: cudnnSeqDataDescriptor_t, - dout: *const ::std::os::raw::c_void, - weightSizeInBytes: usize, - weights: *const ::std::os::raw::c_void, - dweights: *mut ::std::os::raw::c_void, - workSpaceSizeInBytes: usize, - workSpace: *mut ::std::os::raw::c_void, - reserveSpaceSizeInBytes: usize, - reserveSpace: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnLossNormalizationMode_t { - CUDNN_LOSS_NORMALIZATION_NONE = 0, - CUDNN_LOSS_NORMALIZATION_SOFTMAX = 1, -} -extern "C" { - pub fn cudnnCreateCTCLossDescriptor( - ctcLossDesc: *mut cudnnCTCLossDescriptor_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetCTCLossDescriptor( - ctcLossDesc: cudnnCTCLossDescriptor_t, - compType: cudnnDataType_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetCTCLossDescriptorEx( - ctcLossDesc: cudnnCTCLossDescriptor_t, - compType: cudnnDataType_t, - normMode: cudnnLossNormalizationMode_t, - gradMode: cudnnNanPropagation_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetCTCLossDescriptor_v8( - ctcLossDesc: cudnnCTCLossDescriptor_t, - compType: cudnnDataType_t, - normMode: cudnnLossNormalizationMode_t, - gradMode: cudnnNanPropagation_t, - maxLabelLength: ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetCTCLossDescriptor( - ctcLossDesc: cudnnCTCLossDescriptor_t, - compType: *mut cudnnDataType_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetCTCLossDescriptorEx( - ctcLossDesc: cudnnCTCLossDescriptor_t, - compType: *mut cudnnDataType_t, - normMode: *mut cudnnLossNormalizationMode_t, - gradMode: *mut cudnnNanPropagation_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetCTCLossDescriptor_v8( - ctcLossDesc: cudnnCTCLossDescriptor_t, - compType: *mut cudnnDataType_t, - normMode: *mut cudnnLossNormalizationMode_t, - gradMode: *mut cudnnNanPropagation_t, - maxLabelLength: *mut ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnDestroyCTCLossDescriptor(ctcLossDesc: cudnnCTCLossDescriptor_t) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnCTCLoss( - handle: cudnnHandle_t, - probsDesc: cudnnTensorDescriptor_t, - probs: *const ::std::os::raw::c_void, - hostLabels: *const ::std::os::raw::c_int, - hostLabelLengths: *const ::std::os::raw::c_int, - hostInputLengths: *const ::std::os::raw::c_int, - costs: *mut ::std::os::raw::c_void, - gradientsDesc: cudnnTensorDescriptor_t, - gradients: *mut ::std::os::raw::c_void, - algo: cudnnCTCLossAlgo_t, - ctcLossDesc: cudnnCTCLossDescriptor_t, - workspace: *mut ::std::os::raw::c_void, - workSpaceSizeInBytes: usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnCTCLoss_v8( - handle: cudnnHandle_t, - algo: cudnnCTCLossAlgo_t, - ctcLossDesc: cudnnCTCLossDescriptor_t, - probsDesc: cudnnTensorDescriptor_t, - probs: *const ::std::os::raw::c_void, - labels: *const ::std::os::raw::c_int, - labelLengths: *const ::std::os::raw::c_int, - inputLengths: *const ::std::os::raw::c_int, - costs: *mut ::std::os::raw::c_void, - gradientsDesc: cudnnTensorDescriptor_t, - gradients: *mut ::std::os::raw::c_void, - workSpaceSizeInBytes: usize, - workspace: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetCTCLossWorkspaceSize( - handle: cudnnHandle_t, - probsDesc: cudnnTensorDescriptor_t, - gradientsDesc: cudnnTensorDescriptor_t, - labels: *const ::std::os::raw::c_int, - labelLengths: *const ::std::os::raw::c_int, - inputLengths: *const ::std::os::raw::c_int, - algo: cudnnCTCLossAlgo_t, - ctcLossDesc: cudnnCTCLossDescriptor_t, - sizeInBytes: *mut usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetCTCLossWorkspaceSize_v8( - handle: cudnnHandle_t, - algo: cudnnCTCLossAlgo_t, - ctcLossDesc: cudnnCTCLossDescriptor_t, - probsDesc: cudnnTensorDescriptor_t, - gradientsDesc: cudnnTensorDescriptor_t, - sizeInBytes: *mut usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnAdvTrainVersionCheck() -> cudnnStatus_t; -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cudnnConvolutionStruct { - _unused: [u8; 0], -} -pub type cudnnConvolutionDescriptor_t = *mut cudnnConvolutionStruct; -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnConvolutionMode_t { - CUDNN_CONVOLUTION = 0, - CUDNN_CROSS_CORRELATION = 1, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnReorderType_t { - CUDNN_DEFAULT_REORDER = 0, - CUDNN_NO_REORDER = 1, -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cudnnConvolutionFwdAlgoPerfStruct { - pub algo: cudnnConvolutionFwdAlgo_t, - pub status: cudnnStatus_t, - pub time: f32, - pub memory: usize, - pub determinism: cudnnDeterminism_t, - pub mathType: cudnnMathType_t, - pub reserved: [::std::os::raw::c_int; 3usize], -} -pub type cudnnConvolutionFwdAlgoPerf_t = cudnnConvolutionFwdAlgoPerfStruct; -extern "C" { - pub fn cudnnCreateConvolutionDescriptor( - convDesc: *mut cudnnConvolutionDescriptor_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnDestroyConvolutionDescriptor( - convDesc: cudnnConvolutionDescriptor_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetConvolutionMathType( - convDesc: cudnnConvolutionDescriptor_t, - mathType: cudnnMathType_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetConvolutionMathType( - convDesc: cudnnConvolutionDescriptor_t, - mathType: *mut cudnnMathType_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetConvolutionGroupCount( - convDesc: cudnnConvolutionDescriptor_t, - groupCount: ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetConvolutionGroupCount( - convDesc: cudnnConvolutionDescriptor_t, - groupCount: *mut ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetConvolutionReorderType( - convDesc: cudnnConvolutionDescriptor_t, - reorderType: cudnnReorderType_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetConvolutionReorderType( - convDesc: cudnnConvolutionDescriptor_t, - reorderType: *mut cudnnReorderType_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetConvolution2dDescriptor( - convDesc: cudnnConvolutionDescriptor_t, - pad_h: ::std::os::raw::c_int, - pad_w: ::std::os::raw::c_int, - u: ::std::os::raw::c_int, - v: ::std::os::raw::c_int, - dilation_h: ::std::os::raw::c_int, - dilation_w: ::std::os::raw::c_int, - mode: cudnnConvolutionMode_t, - computeType: cudnnDataType_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetConvolution2dDescriptor( - convDesc: cudnnConvolutionDescriptor_t, - pad_h: *mut ::std::os::raw::c_int, - pad_w: *mut ::std::os::raw::c_int, - u: *mut ::std::os::raw::c_int, - v: *mut ::std::os::raw::c_int, - dilation_h: *mut ::std::os::raw::c_int, - dilation_w: *mut ::std::os::raw::c_int, - mode: *mut cudnnConvolutionMode_t, - computeType: *mut cudnnDataType_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetConvolutionNdDescriptor( - convDesc: cudnnConvolutionDescriptor_t, - arrayLength: ::std::os::raw::c_int, - padA: *const ::std::os::raw::c_int, - filterStrideA: *const ::std::os::raw::c_int, - dilationA: *const ::std::os::raw::c_int, - mode: cudnnConvolutionMode_t, - computeType: cudnnDataType_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetConvolutionNdDescriptor( - convDesc: cudnnConvolutionDescriptor_t, - arrayLengthRequested: ::std::os::raw::c_int, - arrayLength: *mut ::std::os::raw::c_int, - padA: *mut ::std::os::raw::c_int, - strideA: *mut ::std::os::raw::c_int, - dilationA: *mut ::std::os::raw::c_int, - mode: *mut cudnnConvolutionMode_t, - computeType: *mut cudnnDataType_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetConvolution2dForwardOutputDim( - convDesc: cudnnConvolutionDescriptor_t, - inputTensorDesc: cudnnTensorDescriptor_t, - filterDesc: cudnnFilterDescriptor_t, - n: *mut ::std::os::raw::c_int, - c: *mut ::std::os::raw::c_int, - h: *mut ::std::os::raw::c_int, - w: *mut ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetConvolutionNdForwardOutputDim( - convDesc: cudnnConvolutionDescriptor_t, - inputTensorDesc: cudnnTensorDescriptor_t, - filterDesc: cudnnFilterDescriptor_t, - nbDims: ::std::os::raw::c_int, - tensorOuputDimA: *mut ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetConvolutionForwardAlgorithmMaxCount( - handle: cudnnHandle_t, - count: *mut ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetConvolutionForwardAlgorithm_v7( - handle: cudnnHandle_t, - srcDesc: cudnnTensorDescriptor_t, - filterDesc: cudnnFilterDescriptor_t, - convDesc: cudnnConvolutionDescriptor_t, - destDesc: cudnnTensorDescriptor_t, - requestedAlgoCount: ::std::os::raw::c_int, - returnedAlgoCount: *mut ::std::os::raw::c_int, - perfResults: *mut cudnnConvolutionFwdAlgoPerf_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnFindConvolutionForwardAlgorithm( - handle: cudnnHandle_t, - xDesc: cudnnTensorDescriptor_t, - wDesc: cudnnFilterDescriptor_t, - convDesc: cudnnConvolutionDescriptor_t, - yDesc: cudnnTensorDescriptor_t, - requestedAlgoCount: ::std::os::raw::c_int, - returnedAlgoCount: *mut ::std::os::raw::c_int, - perfResults: *mut cudnnConvolutionFwdAlgoPerf_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnFindConvolutionForwardAlgorithmEx( - handle: cudnnHandle_t, - xDesc: cudnnTensorDescriptor_t, - x: *const ::std::os::raw::c_void, - wDesc: cudnnFilterDescriptor_t, - w: *const ::std::os::raw::c_void, - convDesc: cudnnConvolutionDescriptor_t, - yDesc: cudnnTensorDescriptor_t, - y: *mut ::std::os::raw::c_void, - requestedAlgoCount: ::std::os::raw::c_int, - returnedAlgoCount: *mut ::std::os::raw::c_int, - perfResults: *mut cudnnConvolutionFwdAlgoPerf_t, - workSpace: *mut ::std::os::raw::c_void, - workSpaceSizeInBytes: usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnIm2Col( - handle: cudnnHandle_t, - xDesc: cudnnTensorDescriptor_t, - x: *const ::std::os::raw::c_void, - wDesc: cudnnFilterDescriptor_t, - convDesc: cudnnConvolutionDescriptor_t, - colBuffer: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnReorderFilterAndBias( - handle: cudnnHandle_t, - filterDesc: cudnnFilterDescriptor_t, - reorderType: cudnnReorderType_t, - filterData: *const ::std::os::raw::c_void, - reorderedFilterData: *mut ::std::os::raw::c_void, - reorderBias: ::std::os::raw::c_int, - biasData: *const ::std::os::raw::c_void, - reorderedBiasData: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetConvolutionForwardWorkspaceSize( - handle: cudnnHandle_t, - xDesc: cudnnTensorDescriptor_t, - wDesc: cudnnFilterDescriptor_t, - convDesc: cudnnConvolutionDescriptor_t, - yDesc: cudnnTensorDescriptor_t, - algo: cudnnConvolutionFwdAlgo_t, - sizeInBytes: *mut usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnConvolutionForward( - handle: cudnnHandle_t, - alpha: *const ::std::os::raw::c_void, - xDesc: cudnnTensorDescriptor_t, - x: *const ::std::os::raw::c_void, - wDesc: cudnnFilterDescriptor_t, - w: *const ::std::os::raw::c_void, - convDesc: cudnnConvolutionDescriptor_t, - algo: cudnnConvolutionFwdAlgo_t, - workSpace: *mut ::std::os::raw::c_void, - workSpaceSizeInBytes: usize, - beta: *const ::std::os::raw::c_void, - yDesc: cudnnTensorDescriptor_t, - y: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnConvolutionBiasActivationForward( - handle: cudnnHandle_t, - alpha1: *const ::std::os::raw::c_void, - xDesc: cudnnTensorDescriptor_t, - x: *const ::std::os::raw::c_void, - wDesc: cudnnFilterDescriptor_t, - w: *const ::std::os::raw::c_void, - convDesc: cudnnConvolutionDescriptor_t, - algo: cudnnConvolutionFwdAlgo_t, - workSpace: *mut ::std::os::raw::c_void, - workSpaceSizeInBytes: usize, - alpha2: *const ::std::os::raw::c_void, - zDesc: cudnnTensorDescriptor_t, - z: *const ::std::os::raw::c_void, - biasDesc: cudnnTensorDescriptor_t, - bias: *const ::std::os::raw::c_void, - activationDesc: cudnnActivationDescriptor_t, - yDesc: cudnnTensorDescriptor_t, - y: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cudnnConvolutionBwdDataAlgoPerfStruct { - pub algo: cudnnConvolutionBwdDataAlgo_t, - pub status: cudnnStatus_t, - pub time: f32, - pub memory: usize, - pub determinism: cudnnDeterminism_t, - pub mathType: cudnnMathType_t, - pub reserved: [::std::os::raw::c_int; 3usize], -} -pub type cudnnConvolutionBwdDataAlgoPerf_t = cudnnConvolutionBwdDataAlgoPerfStruct; -extern "C" { - pub fn cudnnGetConvolutionBackwardDataAlgorithmMaxCount( - handle: cudnnHandle_t, - count: *mut ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnFindConvolutionBackwardDataAlgorithm( - handle: cudnnHandle_t, - wDesc: cudnnFilterDescriptor_t, - dyDesc: cudnnTensorDescriptor_t, - convDesc: cudnnConvolutionDescriptor_t, - dxDesc: cudnnTensorDescriptor_t, - requestedAlgoCount: ::std::os::raw::c_int, - returnedAlgoCount: *mut ::std::os::raw::c_int, - perfResults: *mut cudnnConvolutionBwdDataAlgoPerf_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnFindConvolutionBackwardDataAlgorithmEx( - handle: cudnnHandle_t, - wDesc: cudnnFilterDescriptor_t, - w: *const ::std::os::raw::c_void, - dyDesc: cudnnTensorDescriptor_t, - dy: *const ::std::os::raw::c_void, - convDesc: cudnnConvolutionDescriptor_t, - dxDesc: cudnnTensorDescriptor_t, - dx: *mut ::std::os::raw::c_void, - requestedAlgoCount: ::std::os::raw::c_int, - returnedAlgoCount: *mut ::std::os::raw::c_int, - perfResults: *mut cudnnConvolutionBwdDataAlgoPerf_t, - workSpace: *mut ::std::os::raw::c_void, - workSpaceSizeInBytes: usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetConvolutionBackwardDataAlgorithm_v7( - handle: cudnnHandle_t, - filterDesc: cudnnFilterDescriptor_t, - diffDesc: cudnnTensorDescriptor_t, - convDesc: cudnnConvolutionDescriptor_t, - gradDesc: cudnnTensorDescriptor_t, - requestedAlgoCount: ::std::os::raw::c_int, - returnedAlgoCount: *mut ::std::os::raw::c_int, - perfResults: *mut cudnnConvolutionBwdDataAlgoPerf_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetConvolutionBackwardDataWorkspaceSize( - handle: cudnnHandle_t, - wDesc: cudnnFilterDescriptor_t, - dyDesc: cudnnTensorDescriptor_t, - convDesc: cudnnConvolutionDescriptor_t, - dxDesc: cudnnTensorDescriptor_t, - algo: cudnnConvolutionBwdDataAlgo_t, - sizeInBytes: *mut usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnConvolutionBackwardData( - handle: cudnnHandle_t, - alpha: *const ::std::os::raw::c_void, - wDesc: cudnnFilterDescriptor_t, - w: *const ::std::os::raw::c_void, - dyDesc: cudnnTensorDescriptor_t, - dy: *const ::std::os::raw::c_void, - convDesc: cudnnConvolutionDescriptor_t, - algo: cudnnConvolutionBwdDataAlgo_t, - workSpace: *mut ::std::os::raw::c_void, - workSpaceSizeInBytes: usize, - beta: *const ::std::os::raw::c_void, - dxDesc: cudnnTensorDescriptor_t, - dx: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetFoldedConvBackwardDataDescriptors( - handle: cudnnHandle_t, - filterDesc: cudnnFilterDescriptor_t, - diffDesc: cudnnTensorDescriptor_t, - convDesc: cudnnConvolutionDescriptor_t, - gradDesc: cudnnTensorDescriptor_t, - transformFormat: cudnnTensorFormat_t, - foldedFilterDesc: cudnnFilterDescriptor_t, - paddedDiffDesc: cudnnTensorDescriptor_t, - foldedConvDesc: cudnnConvolutionDescriptor_t, - foldedGradDesc: cudnnTensorDescriptor_t, - filterFoldTransDesc: cudnnTensorTransformDescriptor_t, - diffPadTransDesc: cudnnTensorTransformDescriptor_t, - gradFoldTransDesc: cudnnTensorTransformDescriptor_t, - gradUnfoldTransDesc: cudnnTensorTransformDescriptor_t, - ) -> cudnnStatus_t; -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cudnnFusedOpsConstParamStruct { - _unused: [u8; 0], -} -pub type cudnnFusedOpsConstParamPack_t = *mut cudnnFusedOpsConstParamStruct; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cudnnFusedOpsVariantParamStruct { - _unused: [u8; 0], -} -pub type cudnnFusedOpsVariantParamPack_t = *mut cudnnFusedOpsVariantParamStruct; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cudnnFusedOpsPlanStruct { - _unused: [u8; 0], -} -pub type cudnnFusedOpsPlan_t = *mut cudnnFusedOpsPlanStruct; -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnFusedOps_t { - CUDNN_FUSED_SCALE_BIAS_ACTIVATION_CONV_BNSTATS = 0, - CUDNN_FUSED_SCALE_BIAS_ACTIVATION_WGRAD = 1, - CUDNN_FUSED_BN_FINALIZE_STATISTICS_TRAINING = 2, - CUDNN_FUSED_BN_FINALIZE_STATISTICS_INFERENCE = 3, - CUDNN_FUSED_CONV_SCALE_BIAS_ADD_ACTIVATION = 4, - CUDNN_FUSED_SCALE_BIAS_ADD_ACTIVATION_GEN_BITMASK = 5, - CUDNN_FUSED_DACTIVATION_FORK_DBATCHNORM = 6, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnFusedOpsConstParamLabel_t { - CUDNN_PARAM_XDESC = 0, - CUDNN_PARAM_XDATA_PLACEHOLDER = 1, - CUDNN_PARAM_BN_MODE = 2, - CUDNN_PARAM_BN_EQSCALEBIAS_DESC = 3, - CUDNN_PARAM_BN_EQSCALE_PLACEHOLDER = 4, - CUDNN_PARAM_BN_EQBIAS_PLACEHOLDER = 5, - CUDNN_PARAM_ACTIVATION_DESC = 6, - CUDNN_PARAM_CONV_DESC = 7, - CUDNN_PARAM_WDESC = 8, - CUDNN_PARAM_WDATA_PLACEHOLDER = 9, - CUDNN_PARAM_DWDESC = 10, - CUDNN_PARAM_DWDATA_PLACEHOLDER = 11, - CUDNN_PARAM_YDESC = 12, - CUDNN_PARAM_YDATA_PLACEHOLDER = 13, - CUDNN_PARAM_DYDESC = 14, - CUDNN_PARAM_DYDATA_PLACEHOLDER = 15, - CUDNN_PARAM_YSTATS_DESC = 16, - CUDNN_PARAM_YSUM_PLACEHOLDER = 17, - CUDNN_PARAM_YSQSUM_PLACEHOLDER = 18, - CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC = 19, - CUDNN_PARAM_BN_SCALE_PLACEHOLDER = 20, - CUDNN_PARAM_BN_BIAS_PLACEHOLDER = 21, - CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER = 22, - CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER = 23, - CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER = 24, - CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER = 25, - CUDNN_PARAM_ZDESC = 26, - CUDNN_PARAM_ZDATA_PLACEHOLDER = 27, - CUDNN_PARAM_BN_Z_EQSCALEBIAS_DESC = 28, - CUDNN_PARAM_BN_Z_EQSCALE_PLACEHOLDER = 29, - CUDNN_PARAM_BN_Z_EQBIAS_PLACEHOLDER = 30, - CUDNN_PARAM_ACTIVATION_BITMASK_DESC = 31, - CUDNN_PARAM_ACTIVATION_BITMASK_PLACEHOLDER = 32, - CUDNN_PARAM_DXDESC = 33, - CUDNN_PARAM_DXDATA_PLACEHOLDER = 34, - CUDNN_PARAM_DZDESC = 35, - CUDNN_PARAM_DZDATA_PLACEHOLDER = 36, - CUDNN_PARAM_BN_DSCALE_PLACEHOLDER = 37, - CUDNN_PARAM_BN_DBIAS_PLACEHOLDER = 38, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnFusedOpsPointerPlaceHolder_t { - CUDNN_PTR_NULL = 0, - CUDNN_PTR_ELEM_ALIGNED = 1, - CUDNN_PTR_16B_ALIGNED = 2, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnFusedOpsVariantParamLabel_t { - CUDNN_PTR_XDATA = 0, - CUDNN_PTR_BN_EQSCALE = 1, - CUDNN_PTR_BN_EQBIAS = 2, - CUDNN_PTR_WDATA = 3, - CUDNN_PTR_DWDATA = 4, - CUDNN_PTR_YDATA = 5, - CUDNN_PTR_DYDATA = 6, - CUDNN_PTR_YSUM = 7, - CUDNN_PTR_YSQSUM = 8, - CUDNN_PTR_WORKSPACE = 9, - CUDNN_PTR_BN_SCALE = 10, - CUDNN_PTR_BN_BIAS = 11, - CUDNN_PTR_BN_SAVED_MEAN = 12, - CUDNN_PTR_BN_SAVED_INVSTD = 13, - CUDNN_PTR_BN_RUNNING_MEAN = 14, - CUDNN_PTR_BN_RUNNING_VAR = 15, - CUDNN_PTR_ZDATA = 16, - CUDNN_PTR_BN_Z_EQSCALE = 17, - CUDNN_PTR_BN_Z_EQBIAS = 18, - CUDNN_PTR_ACTIVATION_BITMASK = 19, - CUDNN_PTR_DXDATA = 20, - CUDNN_PTR_DZDATA = 21, - CUDNN_PTR_BN_DSCALE = 22, - CUDNN_PTR_BN_DBIAS = 23, - CUDNN_SCALAR_SIZE_T_WORKSPACE_SIZE_IN_BYTES = 100, - CUDNN_SCALAR_INT64_T_BN_ACCUMULATION_COUNT = 101, - CUDNN_SCALAR_DOUBLE_BN_EXP_AVG_FACTOR = 102, - CUDNN_SCALAR_DOUBLE_BN_EPSILON = 103, -} -extern "C" { - pub fn cudnnCnnInferVersionCheck() -> cudnnStatus_t; -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct cudnnConvolutionBwdFilterAlgoPerfStruct { - pub algo: cudnnConvolutionBwdFilterAlgo_t, - pub status: cudnnStatus_t, - pub time: f32, - pub memory: usize, - pub determinism: cudnnDeterminism_t, - pub mathType: cudnnMathType_t, - pub reserved: [::std::os::raw::c_int; 3usize], -} -pub type cudnnConvolutionBwdFilterAlgoPerf_t = cudnnConvolutionBwdFilterAlgoPerfStruct; -extern "C" { - pub fn cudnnGetConvolutionBackwardFilterAlgorithmMaxCount( - handle: cudnnHandle_t, - count: *mut ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnFindConvolutionBackwardFilterAlgorithm( - handle: cudnnHandle_t, - xDesc: cudnnTensorDescriptor_t, - dyDesc: cudnnTensorDescriptor_t, - convDesc: cudnnConvolutionDescriptor_t, - dwDesc: cudnnFilterDescriptor_t, - requestedAlgoCount: ::std::os::raw::c_int, - returnedAlgoCount: *mut ::std::os::raw::c_int, - perfResults: *mut cudnnConvolutionBwdFilterAlgoPerf_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnFindConvolutionBackwardFilterAlgorithmEx( - handle: cudnnHandle_t, - xDesc: cudnnTensorDescriptor_t, - x: *const ::std::os::raw::c_void, - dyDesc: cudnnTensorDescriptor_t, - y: *const ::std::os::raw::c_void, - convDesc: cudnnConvolutionDescriptor_t, - dwDesc: cudnnFilterDescriptor_t, - dw: *mut ::std::os::raw::c_void, - requestedAlgoCount: ::std::os::raw::c_int, - returnedAlgoCount: *mut ::std::os::raw::c_int, - perfResults: *mut cudnnConvolutionBwdFilterAlgoPerf_t, - workSpace: *mut ::std::os::raw::c_void, - workSpaceSizeInBytes: usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetConvolutionBackwardFilterAlgorithm_v7( - handle: cudnnHandle_t, - srcDesc: cudnnTensorDescriptor_t, - diffDesc: cudnnTensorDescriptor_t, - convDesc: cudnnConvolutionDescriptor_t, - gradDesc: cudnnFilterDescriptor_t, - requestedAlgoCount: ::std::os::raw::c_int, - returnedAlgoCount: *mut ::std::os::raw::c_int, - perfResults: *mut cudnnConvolutionBwdFilterAlgoPerf_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetConvolutionBackwardFilterWorkspaceSize( - handle: cudnnHandle_t, - xDesc: cudnnTensorDescriptor_t, - dyDesc: cudnnTensorDescriptor_t, - convDesc: cudnnConvolutionDescriptor_t, - gradDesc: cudnnFilterDescriptor_t, - algo: cudnnConvolutionBwdFilterAlgo_t, - sizeInBytes: *mut usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnConvolutionBackwardFilter( - handle: cudnnHandle_t, - alpha: *const ::std::os::raw::c_void, - xDesc: cudnnTensorDescriptor_t, - x: *const ::std::os::raw::c_void, - dyDesc: cudnnTensorDescriptor_t, - dy: *const ::std::os::raw::c_void, - convDesc: cudnnConvolutionDescriptor_t, - algo: cudnnConvolutionBwdFilterAlgo_t, - workSpace: *mut ::std::os::raw::c_void, - workSpaceSizeInBytes: usize, - beta: *const ::std::os::raw::c_void, - dwDesc: cudnnFilterDescriptor_t, - dw: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnConvolutionBackwardBias( - handle: cudnnHandle_t, - alpha: *const ::std::os::raw::c_void, - dyDesc: cudnnTensorDescriptor_t, - dy: *const ::std::os::raw::c_void, - beta: *const ::std::os::raw::c_void, - dbDesc: cudnnTensorDescriptor_t, - db: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnCreateFusedOpsConstParamPack( - constPack: *mut cudnnFusedOpsConstParamPack_t, - ops: cudnnFusedOps_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnDestroyFusedOpsConstParamPack( - constPack: cudnnFusedOpsConstParamPack_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetFusedOpsConstParamPackAttribute( - constPack: cudnnFusedOpsConstParamPack_t, - paramLabel: cudnnFusedOpsConstParamLabel_t, - param: *const ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetFusedOpsConstParamPackAttribute( - constPack: cudnnFusedOpsConstParamPack_t, - paramLabel: cudnnFusedOpsConstParamLabel_t, - param: *mut ::std::os::raw::c_void, - isNULL: *mut ::std::os::raw::c_int, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnCreateFusedOpsVariantParamPack( - varPack: *mut cudnnFusedOpsVariantParamPack_t, - ops: cudnnFusedOps_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnDestroyFusedOpsVariantParamPack( - varPack: cudnnFusedOpsVariantParamPack_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnSetFusedOpsVariantParamPackAttribute( - varPack: cudnnFusedOpsVariantParamPack_t, - paramLabel: cudnnFusedOpsVariantParamLabel_t, - ptr: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnGetFusedOpsVariantParamPackAttribute( - varPack: cudnnFusedOpsVariantParamPack_t, - paramLabel: cudnnFusedOpsVariantParamLabel_t, - ptr: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnCreateFusedOpsPlan( - plan: *mut cudnnFusedOpsPlan_t, - ops: cudnnFusedOps_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnDestroyFusedOpsPlan(plan: cudnnFusedOpsPlan_t) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnMakeFusedOpsPlan( - handle: cudnnHandle_t, - plan: cudnnFusedOpsPlan_t, - constPack: cudnnFusedOpsConstParamPack_t, - workspaceSizeInBytes: *mut usize, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnFusedOpsExecute( - handle: cudnnHandle_t, - plan: cudnnFusedOpsPlan_t, - varPack: cudnnFusedOpsVariantParamPack_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnCnnTrainVersionCheck() -> cudnnStatus_t; -} -pub type cudnnBackendDescriptor_t = *mut ::std::os::raw::c_void; -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnPointwiseMode_t { - CUDNN_POINTWISE_ADD = 0, - CUDNN_POINTWISE_ADD_SQUARE = 5, - CUDNN_POINTWISE_DIV = 6, - CUDNN_POINTWISE_MAX = 3, - CUDNN_POINTWISE_MIN = 2, - CUDNN_POINTWISE_MOD = 7, - CUDNN_POINTWISE_MUL = 1, - CUDNN_POINTWISE_POW = 8, - CUDNN_POINTWISE_SUB = 9, - CUDNN_POINTWISE_ABS = 10, - CUDNN_POINTWISE_CEIL = 11, - CUDNN_POINTWISE_COS = 12, - CUDNN_POINTWISE_EXP = 13, - CUDNN_POINTWISE_FLOOR = 14, - CUDNN_POINTWISE_LOG = 15, - CUDNN_POINTWISE_NEG = 16, - CUDNN_POINTWISE_RSQRT = 17, - CUDNN_POINTWISE_SIN = 18, - CUDNN_POINTWISE_SQRT = 4, - CUDNN_POINTWISE_TAN = 19, - CUDNN_POINTWISE_RELU_FWD = 100, - CUDNN_POINTWISE_TANH_FWD = 101, - CUDNN_POINTWISE_SIGMOID_FWD = 102, - CUDNN_POINTWISE_ELU_FWD = 103, - CUDNN_POINTWISE_GELU_FWD = 104, - CUDNN_POINTWISE_SOFTPLUS_FWD = 105, - CUDNN_POINTWISE_SWISH_FWD = 106, - CUDNN_POINTWISE_RELU_BWD = 200, - CUDNN_POINTWISE_TANH_BWD = 201, - CUDNN_POINTWISE_SIGMOID_BWD = 202, - CUDNN_POINTWISE_ELU_BWD = 203, - CUDNN_POINTWISE_GELU_BWD = 204, - CUDNN_POINTWISE_SOFTPLUS_BWD = 205, - CUDNN_POINTWISE_SWISH_BWD = 206, - CUDNN_POINTWISE_CMP_EQ = 300, - CUDNN_POINTWISE_CMP_NEQ = 301, - CUDNN_POINTWISE_CMP_GT = 302, - CUDNN_POINTWISE_CMP_GE = 303, - CUDNN_POINTWISE_CMP_LT = 304, - CUDNN_POINTWISE_CMP_LE = 305, - CUDNN_POINTWISE_LOGICAL_AND = 400, - CUDNN_POINTWISE_LOGICAL_OR = 401, - CUDNN_POINTWISE_LOGICAL_NOT = 402, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnResampleMode_t { - CUDNN_RESAMPLE_NEAREST = 0, - CUDNN_RESAMPLE_BILINEAR = 1, - CUDNN_RESAMPLE_AVGPOOL = 2, - CUDNN_RESAMPLE_MAXPOOL = 3, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnGenStatsMode_t { - CUDNN_GENSTATS_SUM_SQSUM = 0, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnBnFinalizeStatsMode_t { - CUDNN_BN_FINALIZE_STATISTICS_TRAINING = 0, - CUDNN_BN_FINALIZE_STATISTICS_INFERENCE = 1, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnBackendAttributeName_t { - CUDNN_ATTR_POINTWISE_MODE = 0, - CUDNN_ATTR_POINTWISE_MATH_PREC = 1, - CUDNN_ATTR_POINTWISE_NAN_PROPAGATION = 2, - CUDNN_ATTR_POINTWISE_RELU_LOWER_CLIP = 3, - CUDNN_ATTR_POINTWISE_RELU_UPPER_CLIP = 4, - CUDNN_ATTR_POINTWISE_RELU_LOWER_CLIP_SLOPE = 5, - CUDNN_ATTR_POINTWISE_ELU_ALPHA = 6, - CUDNN_ATTR_POINTWISE_SOFTPLUS_BETA = 7, - CUDNN_ATTR_POINTWISE_SWISH_BETA = 8, - CUDNN_ATTR_CONVOLUTION_COMP_TYPE = 100, - CUDNN_ATTR_CONVOLUTION_CONV_MODE = 101, - CUDNN_ATTR_CONVOLUTION_DILATIONS = 102, - CUDNN_ATTR_CONVOLUTION_FILTER_STRIDES = 103, - CUDNN_ATTR_CONVOLUTION_POST_PADDINGS = 104, - CUDNN_ATTR_CONVOLUTION_PRE_PADDINGS = 105, - CUDNN_ATTR_CONVOLUTION_SPATIAL_DIMS = 106, - CUDNN_ATTR_ENGINEHEUR_MODE = 200, - CUDNN_ATTR_ENGINEHEUR_OPERATION_GRAPH = 201, - CUDNN_ATTR_ENGINEHEUR_RESULTS = 202, - CUDNN_ATTR_ENGINECFG_ENGINE = 300, - CUDNN_ATTR_ENGINECFG_INTERMEDIATE_INFO = 301, - CUDNN_ATTR_ENGINECFG_KNOB_CHOICES = 302, - CUDNN_ATTR_EXECUTION_PLAN_HANDLE = 400, - CUDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG = 401, - CUDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE = 402, - CUDNN_ATTR_EXECUTION_PLAN_COMPUTED_INTERMEDIATE_UIDS = 403, - CUDNN_ATTR_EXECUTION_PLAN_RUN_ONLY_INTERMEDIATE_UIDS = 404, - CUDNN_ATTR_INTERMEDIATE_INFO_UNIQUE_ID = 500, - CUDNN_ATTR_INTERMEDIATE_INFO_SIZE = 501, - CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_DATA_UIDS = 502, - CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_ATTRIBUTES = 503, - CUDNN_ATTR_KNOB_CHOICE_KNOB_TYPE = 600, - CUDNN_ATTR_KNOB_CHOICE_KNOB_VALUE = 601, - CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_ALPHA = 700, - CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_BETA = 701, - CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_CONV_DESC = 702, - CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_W = 703, - CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_X = 704, - CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_Y = 705, - CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA = 706, - CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA = 707, - CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_CONV_DESC = 708, - CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_W = 709, - CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DX = 710, - CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DY = 711, - CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_ALPHA = 712, - CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_BETA = 713, - CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_CONV_DESC = 714, - CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DW = 715, - CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_X = 716, - CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DY = 717, - CUDNN_ATTR_OPERATION_POINTWISE_PW_DESCRIPTOR = 750, - CUDNN_ATTR_OPERATION_POINTWISE_XDESC = 751, - CUDNN_ATTR_OPERATION_POINTWISE_BDESC = 752, - CUDNN_ATTR_OPERATION_POINTWISE_YDESC = 753, - CUDNN_ATTR_OPERATION_POINTWISE_ALPHA1 = 754, - CUDNN_ATTR_OPERATION_POINTWISE_ALPHA2 = 755, - CUDNN_ATTR_OPERATION_POINTWISE_DXDESC = 756, - CUDNN_ATTR_OPERATION_POINTWISE_DYDESC = 757, - CUDNN_ATTR_OPERATION_POINTWISE_TDESC = 758, - CUDNN_ATTR_OPERATION_GENSTATS_MODE = 770, - CUDNN_ATTR_OPERATION_GENSTATS_MATH_PREC = 771, - CUDNN_ATTR_OPERATION_GENSTATS_XDESC = 772, - CUDNN_ATTR_OPERATION_GENSTATS_SUMDESC = 773, - CUDNN_ATTR_OPERATION_GENSTATS_SQSUMDESC = 774, - CUDNN_ATTR_OPERATION_BN_FINALIZE_STATS_MODE = 780, - CUDNN_ATTR_OPERATION_BN_FINALIZE_MATH_PREC = 781, - CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SUM_DESC = 782, - CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SQ_SUM_DESC = 783, - CUDNN_ATTR_OPERATION_BN_FINALIZE_SCALE_DESC = 784, - CUDNN_ATTR_OPERATION_BN_FINALIZE_BIAS_DESC = 785, - CUDNN_ATTR_OPERATION_BN_FINALIZE_PREV_RUNNING_MEAN_DESC = 786, - CUDNN_ATTR_OPERATION_BN_FINALIZE_PREV_RUNNING_VAR_DESC = 787, - CUDNN_ATTR_OPERATION_BN_FINALIZE_UPDATED_RUNNING_MEAN_DESC = 788, - CUDNN_ATTR_OPERATION_BN_FINALIZE_UPDATED_RUNNING_VAR_DESC = 789, - CUDNN_ATTR_OPERATION_BN_FINALIZE_SAVED_MEAN_DESC = 790, - CUDNN_ATTR_OPERATION_BN_FINALIZE_SAVED_INV_STD_DESC = 791, - CUDNN_ATTR_OPERATION_BN_FINALIZE_EQ_SCALE_DESC = 792, - CUDNN_ATTR_OPERATION_BN_FINALIZE_EQ_BIAS_DESC = 793, - CUDNN_ATTR_OPERATION_BN_FINALIZE_ACCUM_COUNT_DESC = 794, - CUDNN_ATTR_OPERATION_BN_FINALIZE_EPSILON_DESC = 795, - CUDNN_ATTR_OPERATION_BN_FINALIZE_EXP_AVERATE_FACTOR_DESC = 796, - CUDNN_ATTR_OPERATIONGRAPH_HANDLE = 800, - CUDNN_ATTR_OPERATIONGRAPH_OPS = 801, - CUDNN_ATTR_OPERATIONGRAPH_ENGINE_GLOBAL_COUNT = 802, - CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT = 900, - CUDNN_ATTR_TENSOR_DATA_TYPE = 901, - CUDNN_ATTR_TENSOR_DIMENSIONS = 902, - CUDNN_ATTR_TENSOR_STRIDES = 903, - CUDNN_ATTR_TENSOR_VECTOR_COUNT = 904, - CUDNN_ATTR_TENSOR_VECTORIZED_DIMENSION = 905, - CUDNN_ATTR_TENSOR_UNIQUE_ID = 906, - CUDNN_ATTR_TENSOR_IS_VIRTUAL = 907, - CUDNN_ATTR_TENSOR_IS_BY_VALUE = 908, - CUDNN_ATTR_TENSOR_REORDERING_MODE = 909, - CUDNN_ATTR_VARIANT_PACK_UNIQUE_IDS = 1000, - CUDNN_ATTR_VARIANT_PACK_DATA_POINTERS = 1001, - CUDNN_ATTR_VARIANT_PACK_INTERMEDIATES = 1002, - CUDNN_ATTR_VARIANT_PACK_WORKSPACE = 1003, - CUDNN_ATTR_LAYOUT_INFO_TENSOR_UID = 1100, - CUDNN_ATTR_LAYOUT_INFO_TYPES = 1101, - CUDNN_ATTR_KNOB_INFO_TYPE = 1200, - CUDNN_ATTR_KNOB_INFO_MAXIMUM_VALUE = 1201, - CUDNN_ATTR_KNOB_INFO_MINIMUM_VALUE = 1202, - CUDNN_ATTR_KNOB_INFO_STRIDE = 1203, - CUDNN_ATTR_ENGINE_OPERATION_GRAPH = 1300, - CUDNN_ATTR_ENGINE_GLOBAL_INDEX = 1301, - CUDNN_ATTR_ENGINE_KNOB_INFO = 1302, - CUDNN_ATTR_ENGINE_NUMERICAL_NOTE = 1303, - CUDNN_ATTR_ENGINE_LAYOUT_INFO = 1304, - CUDNN_ATTR_ENGINE_BEHAVIOR_NOTE = 1305, - CUDNN_ATTR_MATMUL_COMP_TYPE = 1500, - CUDNN_ATTR_OPERATION_MATMUL_ADESC = 1520, - CUDNN_ATTR_OPERATION_MATMUL_BDESC = 1521, - CUDNN_ATTR_OPERATION_MATMUL_CDESC = 1522, - CUDNN_ATTR_OPERATION_MATMUL_DESC = 1523, - CUDNN_ATTR_OPERATION_MATMUL_IRREGULARLY_STRIDED_BATCH_COUNT = 1524, - CUDNN_ATTR_REDUCTION_OPERATOR = 1600, - CUDNN_ATTR_REDUCTION_COMP_TYPE = 1601, - CUDNN_ATTR_OPERATION_REDUCTION_XDESC = 1610, - CUDNN_ATTR_OPERATION_REDUCTION_YDESC = 1611, - CUDNN_ATTR_OPERATION_REDUCTION_DESC = 1612, - CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_MATH_PREC = 1620, - CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_MEAN_DESC = 1621, - CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_INVSTD_DESC = 1622, - CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_BN_SCALE_DESC = 1623, - CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_X_DESC = 1624, - CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_DY_DESC = 1625, - CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_DBN_SCALE_DESC = 1626, - CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_DBN_BIAS_DESC = 1627, - CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_DY_SCALE_DESC = 1628, - CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_X_SCALE_DESC = 1629, - CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_BIAS = 1630, - CUDNN_ATTR_RESAMPLE_MODE = 1700, - CUDNN_ATTR_RESAMPLE_COMP_TYPE = 1701, - CUDNN_ATTR_RESAMPLE_SPATIAL_DIMS = 1702, - CUDNN_ATTR_RESAMPLE_POST_PADDINGS = 1703, - CUDNN_ATTR_RESAMPLE_PRE_PADDINGS = 1704, - CUDNN_ATTR_RESAMPLE_STRIDES = 1705, - CUDNN_ATTR_RESAMPLE_WINDOW_DIMS = 1706, - CUDNN_ATTR_RESAMPLE_NAN_PROPAGATION = 1707, - CUDNN_ATTR_RESAMPLE_PADDING_MODE = 1708, - CUDNN_ATTR_OPERATION_RESAMPLE_FWD_XDESC = 1710, - CUDNN_ATTR_OPERATION_RESAMPLE_FWD_YDESC = 1711, - CUDNN_ATTR_OPERATION_RESAMPLE_FWD_IDXDESC = 1712, - CUDNN_ATTR_OPERATION_RESAMPLE_FWD_ALPHA = 1713, - CUDNN_ATTR_OPERATION_RESAMPLE_FWD_BETA = 1714, - CUDNN_ATTR_OPERATION_RESAMPLE_FWD_DESC = 1716, - CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DXDESC = 1720, - CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DYDESC = 1721, - CUDNN_ATTR_OPERATION_RESAMPLE_BWD_IDXDESC = 1722, - CUDNN_ATTR_OPERATION_RESAMPLE_BWD_ALPHA = 1723, - CUDNN_ATTR_OPERATION_RESAMPLE_BWD_BETA = 1724, - CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DESC = 1725, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnBackendAttributeType_t { - CUDNN_TYPE_HANDLE = 0, - CUDNN_TYPE_DATA_TYPE = 1, - CUDNN_TYPE_BOOLEAN = 2, - CUDNN_TYPE_INT64 = 3, - CUDNN_TYPE_FLOAT = 4, - CUDNN_TYPE_DOUBLE = 5, - CUDNN_TYPE_VOID_PTR = 6, - CUDNN_TYPE_CONVOLUTION_MODE = 7, - CUDNN_TYPE_HEUR_MODE = 8, - CUDNN_TYPE_KNOB_TYPE = 9, - CUDNN_TYPE_NAN_PROPOGATION = 10, - CUDNN_TYPE_NUMERICAL_NOTE = 11, - CUDNN_TYPE_LAYOUT_TYPE = 12, - CUDNN_TYPE_ATTRIB_NAME = 13, - CUDNN_TYPE_POINTWISE_MODE = 14, - CUDNN_TYPE_BACKEND_DESCRIPTOR = 15, - CUDNN_TYPE_GENSTATS_MODE = 16, - CUDNN_TYPE_BN_FINALIZE_STATS_MODE = 17, - CUDNN_TYPE_REDUCTION_OPERATOR_TYPE = 18, - CUDNN_TYPE_BEHAVIOR_NOTE = 19, - CUDNN_TYPE_TENSOR_REORDERING_MODE = 20, - CUDNN_TYPE_RESAMPLE_MODE = 21, - CUDNN_TYPE_PADDING_MODE = 22, - CUDNN_TYPE_INT32 = 23, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnBackendDescriptorType_t { - CUDNN_BACKEND_POINTWISE_DESCRIPTOR = 0, - CUDNN_BACKEND_CONVOLUTION_DESCRIPTOR = 1, - CUDNN_BACKEND_ENGINE_DESCRIPTOR = 2, - CUDNN_BACKEND_ENGINECFG_DESCRIPTOR = 3, - CUDNN_BACKEND_ENGINEHEUR_DESCRIPTOR = 4, - CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR = 5, - CUDNN_BACKEND_INTERMEDIATE_INFO_DESCRIPTOR = 6, - CUDNN_BACKEND_KNOB_CHOICE_DESCRIPTOR = 7, - CUDNN_BACKEND_KNOB_INFO_DESCRIPTOR = 8, - CUDNN_BACKEND_LAYOUT_INFO_DESCRIPTOR = 9, - CUDNN_BACKEND_OPERATION_CONVOLUTION_FORWARD_DESCRIPTOR = 10, - CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_FILTER_DESCRIPTOR = 11, - CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_DATA_DESCRIPTOR = 12, - CUDNN_BACKEND_OPERATION_POINTWISE_DESCRIPTOR = 13, - CUDNN_BACKEND_OPERATION_GEN_STATS_DESCRIPTOR = 14, - CUDNN_BACKEND_OPERATIONGRAPH_DESCRIPTOR = 15, - CUDNN_BACKEND_VARIANT_PACK_DESCRIPTOR = 16, - CUDNN_BACKEND_TENSOR_DESCRIPTOR = 17, - CUDNN_BACKEND_MATMUL_DESCRIPTOR = 18, - CUDNN_BACKEND_OPERATION_MATMUL_DESCRIPTOR = 19, - CUDNN_BACKEND_OPERATION_BN_FINALIZE_STATISTICS_DESCRIPTOR = 20, - CUDNN_BACKEND_REDUCTION_DESCRIPTOR = 21, - CUDNN_BACKEND_OPERATION_REDUCTION_DESCRIPTOR = 22, - CUDNN_BACKEND_OPERATION_BN_BWD_WEIGHTS_DESCRIPTOR = 23, - CUDNN_BACKEND_RESAMPLE_DESCRIPTOR = 24, - CUDNN_BACKEND_OPERATION_RESAMPLE_FWD_DESCRIPTOR = 25, - CUDNN_BACKEND_OPERATION_RESAMPLE_BWD_DESCRIPTOR = 26, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnBackendNumericalNote_t { - CUDNN_NUMERICAL_NOTE_TENSOR_CORE = 0, - CUDNN_NUMERICAL_NOTE_DOWN_CONVERT_INPUTS = 1, - CUDNN_NUMERICAL_NOTE_REDUCED_PRECISION_REDUCTION = 2, - CUDNN_NUMERICAL_NOTE_FFT = 3, - CUDNN_NUMERICAL_NOTE_NONDETERMINISTIC = 4, - CUDNN_NUMERICAL_NOTE_WINOGRAD = 5, - CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_4x4 = 6, - CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_6x6 = 7, - CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_13x13 = 8, - CUDNN_NUMERICAL_NOTE_TYPE_COUNT = 9, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnBackendBehaviorNote_t { - CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION = 0, - CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER = 1, - CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER = 2, - CUDNN_BEHAVIOR_NOTE_TYPE_COUNT = 3, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnBackendKnobType_t { - CUDNN_KNOB_TYPE_SPLIT_K = 0, - CUDNN_KNOB_TYPE_SWIZZLE = 1, - CUDNN_KNOB_TYPE_TILE_SIZE = 2, - CUDNN_KNOB_TYPE_USE_TEX = 3, - CUDNN_KNOB_TYPE_EDGE = 4, - CUDNN_KNOB_TYPE_KBLOCK = 5, - CUDNN_KNOB_TYPE_LDGA = 6, - CUDNN_KNOB_TYPE_LDGB = 7, - CUDNN_KNOB_TYPE_CHUNK_K = 8, - CUDNN_KNOB_TYPE_SPLIT_H = 9, - CUDNN_KNOB_TYPE_WINO_TILE = 10, - CUDNN_KNOB_TYPE_MULTIPLY = 11, - CUDNN_KNOB_TYPE_SPLIT_K_BUF = 12, - CUDNN_KNOB_TYPE_TILEK = 13, - CUDNN_KNOB_TYPE_STAGES = 14, - CUDNN_KNOB_TYPE_REDUCTION_MODE = 15, - CUDNN_KNOB_TYPE_CTA_SPLIT_K_MODE = 16, - CUDNN_KNOB_TYPE_SPLIT_K_SLC = 17, - CUDNN_KNOB_TYPE_IDX_MODE = 18, - CUDNN_KNOB_TYPE_SLICED = 19, - CUDNN_KNOB_TYPE_SPLIT_RS = 20, - CUDNN_KNOB_TYPE_SINGLEBUFFER = 21, - CUDNN_KNOB_TYPE_LDGC = 22, - CUDNN_KNOB_TYPE_SPECFILT = 23, - CUDNN_KNOB_TYPE_KERNEL_CFG = 24, - CUDNN_KNOB_TYPE_COUNTS = 25, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnBackendLayoutType_t { - CUDNN_LAYOUT_TYPE_PREFERRED_NCHW = 0, - CUDNN_LAYOUT_TYPE_PREFERRED_NHWC = 1, - CUDNN_LAYOUT_TYPE_PREFERRED_PAD4CK = 2, - CUDNN_LAYOUT_TYPE_PREFERRED_PAD8CK = 3, - CUDNN_LAYOUT_TYPE_COUNT = 4, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnBackendHeurMode_t { - CUDNN_HEUR_MODE_INSTANT = 0, - CUDNN_HEUR_MODE_B = 1, - CUDNN_HEUR_MODE_FALLBACK = 2, - CUDNN_HEUR_MODE_A = 3, - CUDNN_HEUR_MODES_COUNT = 4, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnBackendTensorReordering_t { - CUDNN_TENSOR_REORDERING_NONE = 0, - CUDNN_TENSOR_REORDERING_INT8x32 = 1, -} -#[repr(u32)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum cudnnPaddingMode_t { - CUDNN_ZERO_PAD = 0, - CUDNN_NEG_INF_PAD = 1, - CUDNN_EDGE_VAL_PAD = 2, -} -extern "C" { - pub fn cudnnBackendCreateDescriptor( - descriptorType: cudnnBackendDescriptorType_t, - descriptor: *mut cudnnBackendDescriptor_t, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnBackendDestroyDescriptor(descriptor: cudnnBackendDescriptor_t) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnBackendInitialize(descriptor: cudnnBackendDescriptor_t) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnBackendFinalize(descriptor: cudnnBackendDescriptor_t) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnBackendSetAttribute( - descriptor: cudnnBackendDescriptor_t, - attributeName: cudnnBackendAttributeName_t, - attributeType: cudnnBackendAttributeType_t, - elementCount: i64, - arrayOfElements: *const ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnBackendGetAttribute( - descriptor: cudnnBackendDescriptor_t, - attributeName: cudnnBackendAttributeName_t, - attributeType: cudnnBackendAttributeType_t, - requestedElementCount: i64, - elementCount: *mut i64, - arrayOfElements: *mut ::std::os::raw::c_void, - ) -> cudnnStatus_t; -} -extern "C" { - pub fn cudnnBackendExecute( - handle: cudnnHandle_t, - executionPlan: cudnnBackendDescriptor_t, - variantPack: cudnnBackendDescriptor_t, - ) -> cudnnStatus_t; -} diff --git a/crates/cudnn/src/tensor/tensor_descriptor.rs b/crates/cudnn/src/tensor/tensor_descriptor.rs index 3d141b92..9259e86b 100644 --- a/crates/cudnn/src/tensor/tensor_descriptor.rs +++ b/crates/cudnn/src/tensor/tensor_descriptor.rs @@ -1,13 +1,14 @@ -use crate::{sys, CudnnError, DataType, IntoResult, ScalarC, TensorFormat, VecType}; use std::{marker::PhantomData, mem::MaybeUninit}; +use crate::{CudnnError, DataType, IntoResult, ScalarC, TensorFormat, VecType}; + /// A generic description of an n-dimensional dataset. #[derive(Debug, PartialEq, Eq, Hash)] pub struct TensorDescriptor where T: DataType, { - pub(crate) raw: sys::cudnnTensorDescriptor_t, + pub(crate) raw: cudnn_sys::cudnnTensorDescriptor_t, data_type: PhantomData, } @@ -53,10 +54,10 @@ where ); unsafe { - sys::cudnnCreateTensorDescriptor(raw.as_mut_ptr()).into_result()?; + cudnn_sys::cudnnCreateTensorDescriptor(raw.as_mut_ptr()).into_result()?; let raw = raw.assume_init(); - sys::cudnnSetTensorNdDescriptor( + cudnn_sys::cudnnSetTensorNdDescriptor( raw, T::into_raw(), ndims as i32, @@ -104,10 +105,10 @@ where let ndims = shape.len(); unsafe { - sys::cudnnCreateTensorDescriptor(raw.as_mut_ptr()).into_result()?; + cudnn_sys::cudnnCreateTensorDescriptor(raw.as_mut_ptr()).into_result()?; let raw = raw.assume_init(); - sys::cudnnSetTensorNdDescriptorEx( + cudnn_sys::cudnnSetTensorNdDescriptorEx( raw, format.into(), T::into_raw(), @@ -155,10 +156,10 @@ where let format = TensorFormat::NchwVectC; unsafe { - sys::cudnnCreateTensorDescriptor(raw.as_mut_ptr()).into_result()?; + cudnn_sys::cudnnCreateTensorDescriptor(raw.as_mut_ptr()).into_result()?; let raw = raw.assume_init(); - sys::cudnnSetTensorNdDescriptorEx( + cudnn_sys::cudnnSetTensorNdDescriptorEx( raw, format.into(), V::into_raw(), @@ -181,7 +182,7 @@ where { fn drop(&mut self) { unsafe { - sys::cudnnDestroyTensorDescriptor(self.raw); + cudnn_sys::cudnnDestroyTensorDescriptor(self.raw); } } } diff --git a/crates/cudnn/src/tensor/tensor_format.rs b/crates/cudnn/src/tensor/tensor_format.rs index 707b201d..a11194c1 100644 --- a/crates/cudnn/src/tensor/tensor_format.rs +++ b/crates/cudnn/src/tensor/tensor_format.rs @@ -1,5 +1,3 @@ -use crate::sys; - /// Tensor formats in which each element of the tensor has scalar value. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum ScalarC { @@ -15,11 +13,11 @@ pub enum ScalarC { Nhwc, } -impl From for sys::cudnnTensorFormat_t { +impl From for cudnn_sys::cudnnTensorFormat_t { fn from(tensor_format: ScalarC) -> Self { match tensor_format { - ScalarC::Nchw => sys::cudnnTensorFormat_t::CUDNN_TENSOR_NCHW, - ScalarC::Nhwc => sys::cudnnTensorFormat_t::CUDNN_TENSOR_NHWC, + ScalarC::Nchw => cudnn_sys::cudnnTensorFormat_t::CUDNN_TENSOR_NCHW, + ScalarC::Nhwc => cudnn_sys::cudnnTensorFormat_t::CUDNN_TENSOR_NHWC, } } } @@ -50,11 +48,11 @@ impl From for TensorFormat { } } -impl From for sys::cudnnTensorFormat_t { +impl From for cudnn_sys::cudnnTensorFormat_t { fn from(tensor_format: TensorFormat) -> Self { match tensor_format { TensorFormat::ScalarC(fmt) => fmt.into(), - TensorFormat::NchwVectC => sys::cudnnTensorFormat_t::CUDNN_TENSOR_NCHW_VECT_C, + TensorFormat::NchwVectC => cudnn_sys::cudnnTensorFormat_t::CUDNN_TENSOR_NCHW_VECT_C, } } } diff --git a/crates/cudnn/src/w_grad_mode.rs b/crates/cudnn/src/w_grad_mode.rs index e6466bb4..d6a053ee 100644 --- a/crates/cudnn/src/w_grad_mode.rs +++ b/crates/cudnn/src/w_grad_mode.rs @@ -1,5 +1,3 @@ -use crate::sys; - /// Selects how buffers holding gradients of the loss function, computed with respect to trainable /// parameters, are updated. /// @@ -17,7 +15,7 @@ pub enum WGradMode { Add, } -impl From for sys::cudnnWgradMode_t { +impl From for cudnn_sys::cudnnWgradMode_t { fn from(mode: WGradMode) -> Self { match mode { WGradMode::Set => Self::CUDNN_WGRAD_MODE_SET, diff --git a/crates/cust/Cargo.toml b/crates/cust/Cargo.toml index 5a609176..5dd045e9 100644 --- a/crates/cust/Cargo.toml +++ b/crates/cust/Cargo.toml @@ -14,7 +14,7 @@ readme = "../../README.md" [dependencies] cust_core = { path = "../cust_core", version = "0.1.0"} -cust_raw = { path = "../cust_raw", version = "0.11.2"} +cust_raw = { path = "../cust_raw", default-features = false, features = ["driver"] } bitflags = "2.8" cust_derive = { path = "../cust_derive", version = "0.2" } glam = { version = "0.29.2", features=["cuda"], optional = true } @@ -32,7 +32,7 @@ impl_half = ["cust_core/half"] impl_num_complex = ["cust_core/num-complex", "num-complex"] [build-dependencies] -find_cuda_helper = { path = "../find_cuda_helper", version = "0.2" } +serde_json = "1.0.140" [dev-dependencies] image = "0.25.5" diff --git a/crates/cust/build.rs b/crates/cust/build.rs index a6c47b89..77fe3b1a 100644 --- a/crates/cust/build.rs +++ b/crates/cust/build.rs @@ -1,3 +1,13 @@ +use std::env; + fn main() { - find_cuda_helper::include_cuda(); + let driver_version = env::var("DEP_CUDA_DRIVER_VERSION") + .expect("Cannot find transitive metadata 'driver_version' from cust_raw package.") + .parse::() + .expect("Failed to parse CUDA driver version"); + + println!("cargo::rustc-check-cfg=cfg(conditional_node)"); + if driver_version >= 12030 { + println!("cargo::rustc-cfg=conditional_node"); + } } diff --git a/crates/cust/src/context/legacy.rs b/crates/cust/src/context/legacy.rs index ea948ea3..838d6d77 100644 --- a/crates/cust/src/context/legacy.rs +++ b/crates/cust/src/context/legacy.rs @@ -114,15 +114,18 @@ //! # } //! ``` +use std::mem; +use std::mem::transmute; +use std::ptr; + +use cust_raw::driver_sys; +use cust_raw::driver_sys::CUcontext; + use crate::context::ContextHandle; use crate::device::Device; use crate::error::{CudaResult, DropResult, ToResult}; use crate::private::Sealed; -use crate::sys::{self as cuda, CUcontext}; use crate::CudaApiVersion; -use std::mem; -use std::mem::transmute; -use std::ptr; /// This enumeration represents configuration settings for devices which share hardware resources /// between L1 cache and shared memory. @@ -259,7 +262,7 @@ impl Context { // lifetime guarantees so we create-and-push, then pop, then the programmer has to // push again. let mut ctx: CUcontext = ptr::null_mut(); - cuda::cuCtxCreate_v2(&mut ctx as *mut CUcontext, flags.bits(), device.as_raw()) + driver_sys::cuCtxCreate_v2(&mut ctx as *mut CUcontext, flags.bits(), device.as_raw()) .to_result()?; Ok(Context { inner: ctx }) } @@ -287,7 +290,7 @@ impl Context { pub fn get_api_version(&self) -> CudaResult { unsafe { let mut api_version = 0u32; - cuda::cuCtxGetApiVersion(self.inner, &mut api_version as *mut u32).to_result()?; + driver_sys::cuCtxGetApiVersion(self.inner, &mut api_version as *mut u32).to_result()?; Ok(CudaApiVersion { version: api_version as i32, }) @@ -351,7 +354,7 @@ impl Context { unsafe { let inner = mem::replace(&mut ctx.inner, ptr::null_mut()); - match cuda::cuCtxDestroy_v2(inner).to_result() { + match driver_sys::cuCtxDestroy_v2(inner).to_result() { Ok(()) => { mem::forget(ctx); Ok(()) @@ -369,7 +372,7 @@ impl Drop for Context { unsafe { let inner = mem::replace(&mut self.inner, ptr::null_mut()); - cuda::cuCtxDestroy_v2(inner); + driver_sys::cuCtxDestroy_v2(inner); } } } @@ -419,7 +422,7 @@ impl UnownedContext { pub fn get_api_version(&self) -> CudaResult { unsafe { let mut api_version = 0u32; - cuda::cuCtxGetApiVersion(self.inner, &mut api_version as *mut u32).to_result()?; + driver_sys::cuCtxGetApiVersion(self.inner, &mut api_version as *mut u32).to_result()?; Ok(CudaApiVersion { version: api_version as i32, }) @@ -453,7 +456,7 @@ impl ContextStack { pub fn pop() -> CudaResult { unsafe { let mut ctx: CUcontext = ptr::null_mut(); - cuda::cuCtxPopCurrent_v2(&mut ctx as *mut CUcontext).to_result()?; + driver_sys::cuCtxPopCurrent_v2(&mut ctx as *mut CUcontext).to_result()?; Ok(UnownedContext { inner: ctx }) } } @@ -478,7 +481,7 @@ impl ContextStack { /// ``` pub fn push(ctx: &C) -> CudaResult<()> { unsafe { - cuda::cuCtxPushCurrent_v2(ctx.get_inner()).to_result()?; + driver_sys::cuCtxPushCurrent_v2(ctx.get_inner()).to_result()?; Ok(()) } } @@ -525,8 +528,10 @@ impl CurrentContext { pub fn get_cache_config() -> CudaResult { unsafe { let mut config = CacheConfig::PreferNone; - cuda::cuCtxGetCacheConfig(&mut config as *mut CacheConfig as *mut cuda::CUfunc_cache) - .to_result()?; + driver_sys::cuCtxGetCacheConfig( + &mut config as *mut CacheConfig as *mut driver_sys::CUfunc_cache, + ) + .to_result()?; Ok(config) } } @@ -551,7 +556,8 @@ impl CurrentContext { pub fn get_device() -> CudaResult { unsafe { let mut device = Device { device: 0 }; - cuda::cuCtxGetDevice(&mut device.device as *mut cuda::CUdevice).to_result()?; + driver_sys::cuCtxGetDevice(&mut device.device as *mut driver_sys::CUdevice) + .to_result()?; Ok(device) } } @@ -576,7 +582,7 @@ impl CurrentContext { pub fn get_flags() -> CudaResult { unsafe { let mut flags = 0u32; - cuda::cuCtxGetFlags(&mut flags as *mut u32).to_result()?; + driver_sys::cuCtxGetFlags(&mut flags as *mut u32).to_result()?; Ok(ContextFlags::from_bits_truncate(flags)) } } @@ -601,9 +607,9 @@ impl CurrentContext { pub fn get_resource_limit(resource: ResourceLimit) -> CudaResult { unsafe { let mut limit: usize = 0; - cuda::cuCtxGetLimit( + driver_sys::cuCtxGetLimit( &mut limit as *mut usize, - transmute::(resource), + transmute::(resource), ) .to_result()?; Ok(limit) @@ -630,8 +636,8 @@ impl CurrentContext { pub fn get_shared_memory_config() -> CudaResult { unsafe { let mut cfg = SharedMemoryConfig::DefaultBankSize; - cuda::cuCtxGetSharedMemConfig( - &mut cfg as *mut SharedMemoryConfig as *mut cuda::CUsharedconfig, + driver_sys::cuCtxGetSharedMemConfig( + &mut cfg as *mut SharedMemoryConfig as *mut driver_sys::CUsharedconfig, ) .to_result()?; Ok(cfg) @@ -665,7 +671,7 @@ impl CurrentContext { least: 0, greatest: 0, }; - cuda::cuCtxGetStreamPriorityRange( + driver_sys::cuCtxGetStreamPriorityRange( &mut range.least as *mut i32, &mut range.greatest as *mut i32, ) @@ -701,8 +707,10 @@ impl CurrentContext { /// ``` pub fn set_cache_config(cfg: CacheConfig) -> CudaResult<()> { unsafe { - cuda::cuCtxSetCacheConfig(transmute::(cfg)) - .to_result() + driver_sys::cuCtxSetCacheConfig( + transmute::(cfg), + ) + .to_result() } } @@ -750,8 +758,8 @@ impl CurrentContext { /// ``` pub fn set_resource_limit(resource: ResourceLimit, limit: usize) -> CudaResult<()> { unsafe { - cuda::cuCtxSetLimit( - transmute::(resource), + driver_sys::cuCtxSetLimit( + transmute::(resource), limit, ) .to_result()?; @@ -781,9 +789,9 @@ impl CurrentContext { /// ``` pub fn set_shared_memory_config(cfg: SharedMemoryConfig) -> CudaResult<()> { unsafe { - cuda::cuCtxSetSharedMemConfig(transmute::< + driver_sys::cuCtxSetSharedMemConfig(transmute::< SharedMemoryConfig, - cust_raw::CUsharedconfig_enum, + driver_sys::CUsharedconfig_enum, >(cfg)) .to_result() } @@ -809,7 +817,7 @@ impl CurrentContext { pub fn get_current() -> CudaResult { unsafe { let mut ctx: CUcontext = ptr::null_mut(); - cuda::cuCtxGetCurrent(&mut ctx as *mut CUcontext).to_result()?; + driver_sys::cuCtxGetCurrent(&mut ctx as *mut CUcontext).to_result()?; Ok(UnownedContext { inner: ctx }) } } @@ -837,7 +845,7 @@ impl CurrentContext { /// ``` pub fn set_current(c: &C) -> CudaResult<()> { unsafe { - cuda::cuCtxSetCurrent(c.get_inner()).to_result()?; + driver_sys::cuCtxSetCurrent(c.get_inner()).to_result()?; Ok(()) } } @@ -845,7 +853,7 @@ impl CurrentContext { /// Block to wait for a context's tasks to complete. pub fn synchronize() -> CudaResult<()> { unsafe { - cuda::cuCtxSynchronize().to_result()?; + driver_sys::cuCtxSynchronize().to_result()?; Ok(()) } } diff --git a/crates/cust/src/context/mod.rs b/crates/cust/src/context/mod.rs index 8820150f..6b2551bd 100644 --- a/crates/cust/src/context/mod.rs +++ b/crates/cust/src/context/mod.rs @@ -33,27 +33,30 @@ //! Primary contexts are the default in cust, you can use the old legacy context handling //! with the [`legacy`] module. +use std::{ + mem::{self, transmute, MaybeUninit}, + ptr, +}; + +use cust_raw::driver_sys; + /// Legacy context handling. pub mod legacy; +use crate::context::legacy::StreamPriorityRange; use crate::{ device::Device, error::{CudaResult, DropResult, ToResult}, private::Sealed, - sys as cuda, CudaApiVersion, -}; -use legacy::StreamPriorityRange; -use std::{ - mem::{self, transmute, MaybeUninit}, - ptr, + CudaApiVersion, }; pub trait ContextHandle: Sealed { - fn get_inner(&self) -> cuda::CUcontext; + fn get_inner(&self) -> driver_sys::CUcontext; } impl Sealed for Context {} impl ContextHandle for Context { - fn get_inner(&self) -> cuda::CUcontext { + fn get_inner(&self) -> driver_sys::CUcontext { self.inner } } @@ -162,8 +165,8 @@ bitflags::bitflags! { #[derive(Debug)] pub struct Context { - inner: cuda::CUcontext, - device: cuda::CUdevice, + inner: driver_sys::CUcontext, + device: driver_sys::CUdevice, } unsafe impl Send for Context {} @@ -191,9 +194,10 @@ impl Context { pub fn new(device: Device) -> CudaResult { let mut inner = MaybeUninit::uninit(); unsafe { - cuda::cuDevicePrimaryCtxRetain(inner.as_mut_ptr(), device.as_raw()).to_result()?; + driver_sys::cuDevicePrimaryCtxRetain(inner.as_mut_ptr(), device.as_raw()) + .to_result()?; let inner = inner.assume_init(); - cuda::cuCtxSetCurrent(inner); + driver_sys::cuCtxSetCurrent(inner); Ok(Self { inner, device: device.as_raw(), @@ -211,17 +215,17 @@ impl Context { /// Nothing else should be using the primary context for this device, otherwise, /// spurious errors or segfaults will occur. pub unsafe fn reset(device: &Device) -> CudaResult<()> { - cuda::cuDevicePrimaryCtxReset_v2(device.as_raw()).to_result() + driver_sys::cuDevicePrimaryCtxReset_v2(device.as_raw()).to_result() } /// Sets the flags for the device context, these flags will apply to any user of the primary /// context associated with this device. pub fn set_flags(&self, flags: ContextFlags) -> CudaResult<()> { - unsafe { cuda::cuDevicePrimaryCtxSetFlags_v2(self.device, flags.bits()).to_result() } + unsafe { driver_sys::cuDevicePrimaryCtxSetFlags_v2(self.device, flags.bits()).to_result() } } /// Returns the raw handle to this context. - pub fn as_raw(&self) -> cuda::CUcontext { + pub fn as_raw(&self) -> driver_sys::CUcontext { self.inner } @@ -247,7 +251,7 @@ impl Context { pub fn get_api_version(&self) -> CudaResult { unsafe { let mut api_version = 0u32; - cuda::cuCtxGetApiVersion(self.inner, &mut api_version as *mut u32).to_result()?; + driver_sys::cuCtxGetApiVersion(self.inner, &mut api_version as *mut u32).to_result()?; Ok(CudaApiVersion { version: api_version as i32, }) @@ -287,7 +291,7 @@ impl Context { unsafe { let inner = mem::replace(&mut ctx.inner, ptr::null_mut()); - match cuda::cuDevicePrimaryCtxRelease_v2(ctx.device).to_result() { + match driver_sys::cuDevicePrimaryCtxRelease_v2(ctx.device).to_result() { Ok(()) => { mem::forget(ctx); Ok(()) @@ -312,7 +316,7 @@ impl Drop for Context { unsafe { self.inner = ptr::null_mut(); - cuda::cuDevicePrimaryCtxRelease_v2(self.device); + driver_sys::cuDevicePrimaryCtxRelease_v2(self.device); } } } @@ -346,8 +350,10 @@ impl CurrentContext { pub fn get_cache_config() -> CudaResult { unsafe { let mut config = CacheConfig::PreferNone; - cuda::cuCtxGetCacheConfig(&mut config as *mut CacheConfig as *mut cuda::CUfunc_cache) - .to_result()?; + driver_sys::cuCtxGetCacheConfig( + &mut config as *mut CacheConfig as *mut driver_sys::CUfunc_cache, + ) + .to_result()?; Ok(config) } } @@ -372,7 +378,8 @@ impl CurrentContext { pub fn get_device() -> CudaResult { unsafe { let mut device = Device { device: 0 }; - cuda::cuCtxGetDevice(&mut device.device as *mut cuda::CUdevice).to_result()?; + driver_sys::cuCtxGetDevice(&mut device.device as *mut driver_sys::CUdevice) + .to_result()?; Ok(device) } } @@ -397,7 +404,7 @@ impl CurrentContext { pub fn get_flags() -> CudaResult { unsafe { let mut flags = 0u32; - cuda::cuCtxGetFlags(&mut flags as *mut u32).to_result()?; + driver_sys::cuCtxGetFlags(&mut flags as *mut u32).to_result()?; Ok(ContextFlags::from_bits_truncate(flags)) } } @@ -422,9 +429,9 @@ impl CurrentContext { pub fn get_resource_limit(resource: ResourceLimit) -> CudaResult { unsafe { let mut limit: usize = 0; - cuda::cuCtxGetLimit( + driver_sys::cuCtxGetLimit( &mut limit as *mut usize, - transmute::(resource), + transmute::(resource), ) .to_result()?; Ok(limit) @@ -451,8 +458,8 @@ impl CurrentContext { pub fn get_shared_memory_config() -> CudaResult { unsafe { let mut cfg = SharedMemoryConfig::DefaultBankSize; - cuda::cuCtxGetSharedMemConfig( - &mut cfg as *mut SharedMemoryConfig as *mut cuda::CUsharedconfig, + driver_sys::cuCtxGetSharedMemConfig( + &mut cfg as *mut SharedMemoryConfig as *mut driver_sys::CUsharedconfig, ) .to_result()?; Ok(cfg) @@ -486,7 +493,7 @@ impl CurrentContext { least: 0, greatest: 0, }; - cuda::cuCtxGetStreamPriorityRange( + driver_sys::cuCtxGetStreamPriorityRange( &mut range.least as *mut i32, &mut range.greatest as *mut i32, ) @@ -522,8 +529,10 @@ impl CurrentContext { /// ``` pub fn set_cache_config(cfg: CacheConfig) -> CudaResult<()> { unsafe { - cuda::cuCtxSetCacheConfig(transmute::(cfg)) - .to_result() + driver_sys::cuCtxSetCacheConfig( + transmute::(cfg), + ) + .to_result() } } @@ -572,8 +581,8 @@ impl CurrentContext { /// ``` pub fn set_resource_limit(resource: ResourceLimit, limit: usize) -> CudaResult<()> { unsafe { - cuda::cuCtxSetLimit( - transmute::(resource), + driver_sys::cuCtxSetLimit( + transmute::(resource), limit, ) .to_result()?; @@ -603,9 +612,9 @@ impl CurrentContext { /// ``` pub fn set_shared_memory_config(cfg: SharedMemoryConfig) -> CudaResult<()> { unsafe { - cuda::cuCtxSetSharedMemConfig(transmute::< + driver_sys::cuCtxSetSharedMemConfig(transmute::< SharedMemoryConfig, - cust_raw::CUsharedconfig_enum, + driver_sys::CUsharedconfig_enum, >(cfg)) .to_result() } @@ -630,7 +639,7 @@ impl CurrentContext { /// ``` pub fn set_current(c: &C) -> CudaResult<()> { unsafe { - cuda::cuCtxSetCurrent(c.get_inner()).to_result()?; + driver_sys::cuCtxSetCurrent(c.get_inner()).to_result()?; Ok(()) } } @@ -638,7 +647,7 @@ impl CurrentContext { /// Block to wait for a context's tasks to complete. pub fn synchronize() -> CudaResult<()> { unsafe { - cuda::cuCtxSynchronize().to_result()?; + driver_sys::cuCtxSynchronize().to_result()?; Ok(()) } } diff --git a/crates/cust/src/device.rs b/crates/cust/src/device.rs index d8ef916f..fb345c86 100644 --- a/crates/cust/src/device.rs +++ b/crates/cust/src/device.rs @@ -1,10 +1,12 @@ //! Functions and types for enumerating CUDA devices and retrieving information about them. -use crate::error::{CudaResult, ToResult}; -use crate::sys::*; use std::ffi::CStr; use std::ops::Range; +use cust_raw::driver_sys; + +use crate::error::{CudaResult, ToResult}; + /// All supported device attributes for [Device::get_attribute](struct.Device.html#method.get_attribute) #[repr(u32)] #[non_exhaustive] @@ -198,7 +200,7 @@ pub enum DeviceAttribute { /// Opaque handle to a CUDA device. #[derive(Debug, Clone, Copy, Hash, Eq, PartialEq)] pub struct Device { - pub(crate) device: CUdevice, + pub(crate) device: driver_sys::CUdevice, } impl Device { /// Get the number of CUDA-capable devices. @@ -221,7 +223,7 @@ impl Device { pub fn num_devices() -> CudaResult { unsafe { let mut num_devices = 0i32; - cuDeviceGetCount(&mut num_devices as *mut i32).to_result()?; + driver_sys::cuDeviceGetCount(&mut num_devices as *mut i32).to_result()?; Ok(num_devices as u32) } } @@ -245,7 +247,11 @@ impl Device { pub fn get_device(ordinal: u32) -> CudaResult { unsafe { let mut device = Device { device: 0 }; - cuDeviceGet(&mut device.device as *mut CUdevice, ordinal as i32).to_result()?; + driver_sys::cuDeviceGet( + &mut device.device as *mut driver_sys::CUdevice, + ordinal as i32, + ) + .to_result()?; Ok(device) } } @@ -289,7 +295,7 @@ impl Device { pub fn total_memory(self) -> CudaResult { unsafe { let mut memory = 0; - cuDeviceTotalMem_v2(&mut memory as *mut usize, self.device).to_result()?; + driver_sys::cuDeviceTotalMem_v2(&mut memory as *mut usize, self.device).to_result()?; Ok(memory) } } @@ -311,7 +317,7 @@ impl Device { pub fn name(self) -> CudaResult { unsafe { let mut name = [0u8; 128]; // Hopefully this is big enough... - cuDeviceGetName( + driver_sys::cuDeviceGetName( &mut name[0] as *mut u8 as *mut ::std::os::raw::c_char, 128, self.device, @@ -342,9 +348,9 @@ impl Device { /// # } /// ``` pub fn uuid(self) -> CudaResult<[u8; 16]> { - let mut cu_uuid = CUuuid { bytes: [0; 16] }; + let mut cu_uuid = driver_sys::CUuuid { bytes: [0; 16] }; unsafe { - cuDeviceGetUuid(&mut cu_uuid, self.device).to_result()?; + driver_sys::cuDeviceGetUuid(&mut cu_uuid, self.device).to_result()?; } let uuid: [u8; 16] = cu_uuid.bytes.map(|byte| byte as u8); Ok(uuid) @@ -368,10 +374,10 @@ impl Device { pub fn get_attribute(self, attr: DeviceAttribute) -> CudaResult { unsafe { let mut val = 0i32; - cuDeviceGetAttribute( + driver_sys::cuDeviceGetAttribute( &mut val as *mut i32, // This should be safe, as the repr and values of DeviceAttribute should match. - ::std::mem::transmute::(attr), + ::std::mem::transmute::(attr), self.device, ) .to_result()?; @@ -381,7 +387,7 @@ impl Device { /// Returns a raw handle to this device, not handing over ownership, meaning that dropping /// this device will try to drop the underlying device. - pub fn as_raw(&self) -> CUdevice { + pub fn as_raw(&self) -> driver_sys::CUdevice { self.device } } diff --git a/crates/cust/src/error.rs b/crates/cust/src/error.rs index 3b5b3849..5b8a881c 100644 --- a/crates/cust/src/error.rs +++ b/crates/cust/src/error.rs @@ -7,7 +7,6 @@ //! cust) can fail. Even those functions which have no normal failure conditions can return //! errors related to previous asynchronous launches. -use crate::sys::{self as cuda, cudaError_enum}; use std::error::Error; use std::ffi::CStr; use std::fmt; @@ -16,6 +15,9 @@ use std::os::raw::c_char; use std::ptr; use std::result::Result; +use cust_raw::driver_sys; +use cust_raw::driver_sys::cudaError_enum; + /// Error enum which represents all the potential errors returned by the CUDA driver API. #[repr(u32)] #[allow(missing_docs)] @@ -96,8 +98,8 @@ impl fmt::Display for CudaError { let value = other as u32; let mut ptr: *const c_char = ptr::null(); unsafe { - cuda::cuGetErrorString( - mem::transmute::(value), + driver_sys::cuGetErrorString( + mem::transmute::(value), &mut ptr as *mut *const c_char, ) .to_result() diff --git a/crates/cust/src/event.rs b/crates/cust/src/event.rs index 28739491..55ed8195 100644 --- a/crates/cust/src/event.rs +++ b/crates/cust/src/event.rs @@ -13,16 +13,17 @@ // TODO: I'm not sure that these events are/can be safe by Rust's model of safety; they inherently // create state which can be mutated even while an immutable borrow is held. -use crate::error::{CudaError, CudaResult, DropResult, ToResult}; -use crate::stream::Stream; -use crate::sys::{ +use std::mem; +use std::ptr; +use std::time::Duration; + +use cust_raw::driver_sys::{ cuEventCreate, cuEventDestroy_v2, cuEventElapsedTime, cuEventQuery, cuEventRecord, cuEventSynchronize, CUevent, }; -use std::mem; -use std::ptr; -use std::time::Duration; +use crate::error::{CudaError, CudaResult, DropResult, ToResult}; +use crate::stream::Stream; bitflags::bitflags! { /// Bit flags for configuring a CUDA Event. diff --git a/crates/cust/src/external.rs b/crates/cust/src/external.rs index c735842a..a634645f 100644 --- a/crates/cust/src/external.rs +++ b/crates/cust/src/external.rs @@ -1,28 +1,28 @@ //! External memory and synchronization resources +use cust_raw::driver_sys; + use crate::error::{CudaResult, ToResult}; use crate::memory::{DeviceCopy, DevicePointer}; -use cust_raw as sys; - #[repr(transparent)] -pub struct ExternalMemory(sys::CUexternalMemory); +pub struct ExternalMemory(driver_sys::CUexternalMemory); impl ExternalMemory { // Import an external memory referenced by `fd` with `size` #[allow(clippy::missing_safety_doc)] pub unsafe fn import(fd: i32, size: usize) -> CudaResult { - let desc = sys::CUDA_EXTERNAL_MEMORY_HANDLE_DESC { - type_: sys::CUexternalMemoryHandleType_enum::CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD, - handle: sys::CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1 { fd }, + let desc = driver_sys::CUDA_EXTERNAL_MEMORY_HANDLE_DESC { + type_: driver_sys::CUexternalMemoryHandleType_enum::CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD, + handle: driver_sys::CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1 { fd }, size: size as u64, flags: 0, reserved: Default::default(), }; - let mut memory: sys::CUexternalMemory = std::ptr::null_mut(); + let mut memory: driver_sys::CUexternalMemory = std::ptr::null_mut(); - sys::cuImportExternalMemory(&mut memory, &desc) + driver_sys::cuImportExternalMemory(&mut memory, &desc) .to_result() .map(|_| ExternalMemory(memory)) } @@ -41,7 +41,7 @@ impl ExternalMemory { size_in_bytes: usize, offset_in_bytes: usize, ) -> CudaResult> { - let buffer_desc = sys::CUDA_EXTERNAL_MEMORY_BUFFER_DESC { + let buffer_desc = driver_sys::CUDA_EXTERNAL_MEMORY_BUFFER_DESC { flags: 0, size: size_in_bytes as u64, offset: offset_in_bytes as u64, @@ -50,7 +50,7 @@ impl ExternalMemory { let mut dptr = 0; unsafe { - sys::cuExternalMemoryGetMappedBuffer(&mut dptr, self.0, &buffer_desc) + driver_sys::cuExternalMemoryGetMappedBuffer(&mut dptr, self.0, &buffer_desc) .to_result() .map(|_| DevicePointer::from_raw(dptr)) } @@ -60,7 +60,9 @@ impl ExternalMemory { impl Drop for ExternalMemory { fn drop(&mut self) { unsafe { - sys::cuDestroyExternalMemory(self.0).to_result().unwrap(); + driver_sys::cuDestroyExternalMemory(self.0) + .to_result() + .unwrap(); } } } diff --git a/crates/cust/src/function.rs b/crates/cust/src/function.rs index 266018d4..0bff08f4 100644 --- a/crates/cust/src/function.rs +++ b/crates/cust/src/function.rs @@ -1,11 +1,14 @@ //! Functions and types for working with CUDA kernels. +use std::marker::PhantomData; +use std::mem::{transmute, MaybeUninit}; + +use cust_raw::driver_sys; +use cust_raw::driver_sys::CUfunction; + use crate::context::{CacheConfig, SharedMemoryConfig}; use crate::error::{CudaResult, ToResult}; use crate::module::Module; -use crate::sys::{self as cuda, CUfunction}; -use std::marker::PhantomData; -use std::mem::{transmute, MaybeUninit}; /// Dimensions of a grid, or the number of thread blocks in a kernel launch. /// @@ -240,10 +243,10 @@ impl Function<'_> { pub fn get_attribute(&self, attr: FunctionAttribute) -> CudaResult { unsafe { let mut val = 0i32; - cuda::cuFuncGetAttribute( + driver_sys::cuFuncGetAttribute( &mut val as *mut i32, // This should be safe, as the repr and values of FunctionAttribute should match. - ::std::mem::transmute::( + ::std::mem::transmute::( attr, ), self.inner, @@ -283,9 +286,9 @@ impl Function<'_> { /// ``` pub fn set_cache_config(&mut self, config: CacheConfig) -> CudaResult<()> { unsafe { - cuda::cuFuncSetCacheConfig( + driver_sys::cuFuncSetCacheConfig( self.inner, - transmute::(config), + transmute::(config), ) .to_result() } @@ -316,9 +319,9 @@ impl Function<'_> { /// ``` pub fn set_shared_memory_config(&mut self, cfg: SharedMemoryConfig) -> CudaResult<()> { unsafe { - cuda::cuFuncSetSharedMemConfig( + driver_sys::cuFuncSetSharedMemConfig( self.inner, - transmute::(cfg), + transmute::(cfg), ) .to_result() } @@ -343,7 +346,7 @@ impl Function<'_> { let mut result = MaybeUninit::uninit(); unsafe { - cuda::cuOccupancyAvailableDynamicSMemPerBlock( + driver_sys::cuOccupancyAvailableDynamicSMemPerBlock( result.as_mut_ptr(), self.to_raw(), num_blocks as i32, @@ -365,7 +368,7 @@ impl Function<'_> { let mut num_blocks = MaybeUninit::uninit(); unsafe { - cuda::cuOccupancyMaxActiveBlocksPerMultiprocessor( + driver_sys::cuOccupancyMaxActiveBlocksPerMultiprocessor( num_blocks.as_mut_ptr(), self.to_raw(), total_block_size as i32, @@ -402,7 +405,7 @@ impl Function<'_> { let total_block_size_limit = block_size_limit.x * block_size_limit.y * block_size_limit.z; unsafe { - cuda::cuOccupancyMaxPotentialBlockSize( + driver_sys::cuOccupancyMaxPotentialBlockSize( min_grid_size.as_mut_ptr(), block_size.as_mut_ptr(), self.to_raw(), diff --git a/crates/cust/src/graph.rs b/crates/cust/src/graph.rs index e4b8bc9e..914f42cf 100644 --- a/crates/cust/src/graph.rs +++ b/crates/cust/src/graph.rs @@ -8,10 +8,11 @@ use std::{ ptr, }; +use cust_raw::driver_sys; + use crate::{ error::{CudaResult, ToResult}, function::{BlockSize, GridSize}, - sys as cuda, }; /// Creates a kernel invocation using the same syntax as [`launch`] to be used to insert kernel launches inside graphs. @@ -56,7 +57,7 @@ pub struct KernelInvocation { pub block_dim: BlockSize, pub grid_dim: GridSize, pub shared_mem_bytes: u32, - func: cuda::CUfunction, + func: driver_sys::CUfunction, params: Box<*mut c_void>, params_len: Option, } @@ -67,7 +68,7 @@ impl KernelInvocation { block_dim: BlockSize, grid_dim: GridSize, shared_mem_bytes: u32, - func: cuda::CUfunction, + func: driver_sys::CUfunction, params: Box<*mut c_void>, params_len: usize, ) -> Self { @@ -81,8 +82,8 @@ impl KernelInvocation { } } - pub fn to_raw(self) -> cuda::CUDA_KERNEL_NODE_PARAMS { - cuda::CUDA_KERNEL_NODE_PARAMS { + pub fn to_raw(self) -> driver_sys::CUDA_KERNEL_NODE_PARAMS { + driver_sys::CUDA_KERNEL_NODE_PARAMS { func: self.func, gridDimX: self.grid_dim.x, gridDimY: self.grid_dim.y, @@ -93,6 +94,8 @@ impl KernelInvocation { kernelParams: Box::into_raw(self.params), sharedMemBytes: self.shared_mem_bytes, extra: ptr::null_mut(), + kern: ptr::null_mut(), + ctx: ptr::null_mut(), } } @@ -103,7 +106,7 @@ impl KernelInvocation { /// The function pointer must be a valid CUfunction pointer and /// params' "ownership" must be able to be transferred to the invocation /// (it will be turned into a Box). - pub unsafe fn from_raw(raw: cuda::CUDA_KERNEL_NODE_PARAMS) -> Self { + pub unsafe fn from_raw(raw: driver_sys::CUDA_KERNEL_NODE_PARAMS) -> Self { Self { func: raw.func, grid_dim: GridSize::xyz(raw.gridDimX, raw.gridDimY, raw.gridDimZ), @@ -120,7 +123,7 @@ impl KernelInvocation { #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[repr(transparent)] pub struct GraphNode { - raw: cuda::CUgraphNode, + raw: driver_sys::CUgraphNode, } unsafe impl Send for GraphNode {} @@ -129,12 +132,12 @@ unsafe impl Sync for GraphNode {} impl GraphNode { /// Creates a new node from a raw handle. This is safe because node checks /// happen on the graph when functions are called. - pub fn from_raw(raw: cuda::CUgraphNode) -> Self { + pub fn from_raw(raw: driver_sys::CUgraphNode) -> Self { Self { raw } } /// Converts this node into a raw handle. - pub fn to_raw(self) -> cuda::CUgraphNode { + pub fn to_raw(self) -> driver_sys::CUgraphNode { self.raw } } @@ -167,46 +170,55 @@ pub enum GraphNodeType { MemoryAllocation, /// Frees some memory. MemoryFree, + /// Batch memory operation. + BatchMemoryOperation, + /// Conditional node. + #[cfg(conditional_node)] + Conditional, } impl GraphNodeType { /// Converts a raw type to a [`GraphNodeType`]. - pub fn from_raw(raw: cuda::CUgraphNodeType) -> Self { + pub fn from_raw(raw: driver_sys::CUgraphNodeType) -> Self { + use driver_sys::CUgraphNodeType::*; match raw { - cuda::CUgraphNodeType::CU_GRAPH_NODE_TYPE_KERNEL => GraphNodeType::KernelInvocation, - cuda::CUgraphNodeType::CU_GRAPH_NODE_TYPE_MEMCPY => GraphNodeType::Memcpy, - cuda::CUgraphNodeType::CU_GRAPH_NODE_TYPE_MEMSET => GraphNodeType::Memset, - cuda::CUgraphNodeType::CU_GRAPH_NODE_TYPE_HOST => GraphNodeType::HostExecute, - cuda::CUgraphNodeType::CU_GRAPH_NODE_TYPE_GRAPH => GraphNodeType::ChildGraph, - cuda::CUgraphNodeType::CU_GRAPH_NODE_TYPE_EMPTY => GraphNodeType::Empty, - cuda::CUgraphNodeType::CU_GRAPH_NODE_TYPE_WAIT_EVENT => GraphNodeType::WaitEvent, - cuda::CUgraphNodeType::CU_GRAPH_NODE_TYPE_EVENT_RECORD => GraphNodeType::EventRecord, - cuda::CUgraphNodeType::CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL => { - GraphNodeType::SemaphoreSignal - } - cuda::CUgraphNodeType::CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT => { - GraphNodeType::SemaphoreWait - } - cuda::CUgraphNodeType::CU_GRAPH_NODE_TYPE_MEM_ALLOC => GraphNodeType::MemoryAllocation, - cuda::CUgraphNodeType::CU_GRAPH_NODE_TYPE_MEM_FREE => GraphNodeType::MemoryFree, + CU_GRAPH_NODE_TYPE_KERNEL => GraphNodeType::KernelInvocation, + CU_GRAPH_NODE_TYPE_MEMCPY => GraphNodeType::Memcpy, + CU_GRAPH_NODE_TYPE_MEMSET => GraphNodeType::Memset, + CU_GRAPH_NODE_TYPE_HOST => GraphNodeType::HostExecute, + CU_GRAPH_NODE_TYPE_GRAPH => GraphNodeType::ChildGraph, + CU_GRAPH_NODE_TYPE_EMPTY => GraphNodeType::Empty, + CU_GRAPH_NODE_TYPE_WAIT_EVENT => GraphNodeType::WaitEvent, + CU_GRAPH_NODE_TYPE_EVENT_RECORD => GraphNodeType::EventRecord, + CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL => GraphNodeType::SemaphoreSignal, + CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT => GraphNodeType::SemaphoreWait, + CU_GRAPH_NODE_TYPE_MEM_ALLOC => GraphNodeType::MemoryAllocation, + CU_GRAPH_NODE_TYPE_MEM_FREE => GraphNodeType::MemoryFree, + CU_GRAPH_NODE_TYPE_BATCH_MEM_OP => GraphNodeType::BatchMemoryOperation, + #[cfg(conditional_node)] + CU_GRAPH_NODE_TYPE_CONDITIONAL => GraphNodeType::Conditional, } } /// Converts this type to its raw counterpart. - pub fn to_raw(self) -> cuda::CUgraphNodeType { + pub fn to_raw(self) -> driver_sys::CUgraphNodeType { + use driver_sys::CUgraphNodeType::*; match self { - Self::KernelInvocation => cuda::CUgraphNodeType::CU_GRAPH_NODE_TYPE_KERNEL, - Self::Memcpy => cuda::CUgraphNodeType::CU_GRAPH_NODE_TYPE_MEMCPY, - Self::Memset => cuda::CUgraphNodeType::CU_GRAPH_NODE_TYPE_MEMSET, - Self::HostExecute => cuda::CUgraphNodeType::CU_GRAPH_NODE_TYPE_HOST, - Self::ChildGraph => cuda::CUgraphNodeType::CU_GRAPH_NODE_TYPE_GRAPH, - Self::Empty => cuda::CUgraphNodeType::CU_GRAPH_NODE_TYPE_EMPTY, - Self::WaitEvent => cuda::CUgraphNodeType::CU_GRAPH_NODE_TYPE_WAIT_EVENT, - Self::EventRecord => cuda::CUgraphNodeType::CU_GRAPH_NODE_TYPE_EVENT_RECORD, - Self::SemaphoreSignal => cuda::CUgraphNodeType::CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL, - Self::SemaphoreWait => cuda::CUgraphNodeType::CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT, - Self::MemoryAllocation => cuda::CUgraphNodeType::CU_GRAPH_NODE_TYPE_MEM_ALLOC, - Self::MemoryFree => cuda::CUgraphNodeType::CU_GRAPH_NODE_TYPE_MEM_FREE, + Self::KernelInvocation => CU_GRAPH_NODE_TYPE_KERNEL, + Self::Memcpy => CU_GRAPH_NODE_TYPE_MEMCPY, + Self::Memset => CU_GRAPH_NODE_TYPE_MEMSET, + Self::HostExecute => CU_GRAPH_NODE_TYPE_HOST, + Self::ChildGraph => CU_GRAPH_NODE_TYPE_GRAPH, + Self::Empty => CU_GRAPH_NODE_TYPE_EMPTY, + Self::WaitEvent => CU_GRAPH_NODE_TYPE_WAIT_EVENT, + Self::EventRecord => CU_GRAPH_NODE_TYPE_EVENT_RECORD, + Self::SemaphoreSignal => CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL, + Self::SemaphoreWait => CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT, + Self::MemoryAllocation => CU_GRAPH_NODE_TYPE_MEM_ALLOC, + Self::MemoryFree => CU_GRAPH_NODE_TYPE_MEM_FREE, + Self::BatchMemoryOperation => CU_GRAPH_NODE_TYPE_BATCH_MEM_OP, + #[cfg(conditional_node)] + Self::Conditional => CU_GRAPH_NODE_TYPE_CONDITIONAL, } } } @@ -238,7 +250,7 @@ impl GraphNodeType { /// send graphs between threads. #[derive(Debug)] pub struct Graph { - raw: cuda::CUgraph, + raw: driver_sys::CUgraph, // a cache of nodes, this cache is None when the node cache is out of date, // it will get refreshed when get_nodes is called. node_cache: Option>, @@ -291,7 +303,7 @@ impl Graph { pub fn num_nodes(&mut self) -> CudaResult { unsafe { let mut len = MaybeUninit::uninit(); - cuda::cuGraphGetNodes(self.raw, ptr::null_mut(), len.as_mut_ptr()).to_result()?; + driver_sys::cuGraphGetNodes(self.raw, ptr::null_mut(), len.as_mut_ptr()).to_result()?; Ok(len.assume_init()) } } @@ -302,9 +314,9 @@ impl Graph { unsafe { let mut len = self.num_nodes()?; let mut vec = Vec::with_capacity(len); - cuda::cuGraphGetNodes( + driver_sys::cuGraphGetNodes( self.raw, - vec.as_mut_ptr() as *mut cuda::CUgraphNode, + vec.as_mut_ptr() as *mut driver_sys::CUgraphNode, &mut len as *mut usize, ) .to_result()?; @@ -320,7 +332,7 @@ impl Graph { let mut raw = MaybeUninit::uninit(); unsafe { - cuda::cuGraphCreate(raw.as_mut_ptr(), flags.bits()).to_result()?; + driver_sys::cuGraphCreate(raw.as_mut_ptr(), flags.bits()).to_result()?; Ok(Self { raw: raw.assume_init(), @@ -333,15 +345,6 @@ impl Graph { /// This dotfile can be turned into an image with graphviz. #[cfg(any(windows, unix))] pub fn dump_debug_dotfile>(&mut self, path: P) -> CudaResult<()> { - // not currently present in cuda-driver-sys for some reason - extern "C" { - fn cuGraphDebugDotPrint( - hGraph: cuda::CUgraph, - path: *const c_char, - flags: c_uint, - ) -> cuda::CUresult; - } - let path = path.as_ref(); let mut buf = Vec::new(); #[cfg(unix)] @@ -366,7 +369,14 @@ impl Graph { ); } - unsafe { cuGraphDebugDotPrint(self.raw, "./out.dot\0".as_ptr().cast(), 1 << 0).to_result() } + unsafe { + driver_sys::cuGraphDebugDotPrint( + self.raw, + c"./out.dot".as_ptr(), + driver_sys::CUgraphDebugDot_flags::CU_GRAPH_DEBUG_DOT_FLAGS_VERBOSE as u32, + ) + .to_result() + } } /// Adds a kernel invocation node to this graph, [`KernelInvocation`] can be created using @@ -385,7 +395,7 @@ impl Graph { let deps_ptr = deps.as_ptr().cast(); let mut node = MaybeUninit::::uninit(); let params = invocation.to_raw(); - cuda::cuGraphAddKernelNode( + driver_sys::cuGraphAddKernelNode_v2( node.as_mut_ptr().cast(), self.raw, deps_ptr, @@ -401,7 +411,7 @@ impl Graph { pub fn num_edges(&mut self) -> CudaResult { unsafe { let mut size = MaybeUninit::uninit(); - cuda::cuGraphGetEdges( + driver_sys::cuGraphGetEdges( self.raw, ptr::null_mut(), ptr::null_mut(), @@ -425,7 +435,7 @@ impl Graph { let mut from = vec![ptr::null_mut(); num_edges].into_boxed_slice(); let mut to = vec![ptr::null_mut(); num_edges].into_boxed_slice(); - cuda::cuGraphGetEdges( + driver_sys::cuGraphGetEdges( self.raw, from.as_mut_ptr(), to.as_mut_ptr(), @@ -446,7 +456,7 @@ impl Graph { self.check_deps_are_valid("node_type", &[node])?; unsafe { let mut ty = MaybeUninit::uninit(); - cuda::cuGraphNodeGetType(node.to_raw(), ty.as_mut_ptr()).to_result()?; + driver_sys::cuGraphNodeGetType(node.to_raw(), ty.as_mut_ptr()).to_result()?; let raw = ty.assume_init(); Ok(GraphNodeType::from_raw(raw)) } @@ -466,7 +476,7 @@ impl Graph { ); unsafe { let mut params = MaybeUninit::uninit(); - cuda::cuGraphKernelNodeGetParams(node.to_raw(), params.as_mut_ptr()); + driver_sys::cuGraphKernelNodeGetParams_v2(node.to_raw(), params.as_mut_ptr()); Ok(KernelInvocation::from_raw(params.assume_init())) } } @@ -479,7 +489,7 @@ impl Graph { /// - This handle is exclusive, nothing else can use it in any way, including trying to drop it. /// - It must be a valid handle. This invariant must be upheld, the library is allowed to rely on /// the fact that the handle is valid in terms of safety, therefore failure to uphold this invariant is UB. - pub unsafe fn from_raw(raw: cuda::CUgraph) -> Self { + pub unsafe fn from_raw(raw: driver_sys::CUgraph) -> Self { Self { raw, node_cache: None, @@ -488,7 +498,7 @@ impl Graph { /// Consumes this [`Graph`], turning it into a raw handle. The handle will not be dropped, /// it is up to the caller to ensure the graph is destroyed. - pub fn into_raw(self) -> cuda::CUgraph { + pub fn into_raw(self) -> driver_sys::CUgraph { let me = ManuallyDrop::new(self); me.raw } @@ -497,7 +507,7 @@ impl Graph { impl Drop for Graph { fn drop(&mut self) { unsafe { - cuda::cuGraphDestroy(self.raw); + driver_sys::cuGraphDestroy(self.raw); } } } diff --git a/crates/cust/src/lib.rs b/crates/cust/src/lib.rs index 82bbbd61..638b523d 100644 --- a/crates/cust/src/lib.rs +++ b/crates/cust/src/lib.rs @@ -3,9 +3,8 @@ //! # Low level CUDA interop //! //! Because additions to CUDA and libraries that use CUDA are everchanging, this library -//! provides unsafe functions for retrieving and setting handles to raw cuda_sys objects. -//! This allows advanced users to embed libraries that rely on CUDA, such as OptiX. We -//! also re-export cuda_sys as a [`sys`] module for convenience. +//! provides unsafe functions for retrieving and setting handles to raw CUDA objects. +//! This allows advanced users to embed libraries that rely on CUDA, such as OptiX. //! //! # CUDA Terminology: //! @@ -76,13 +75,12 @@ mod texture; pub mod util; pub use cust_derive::DeviceCopy; -pub use cust_raw as sys; use crate::context::{Context, ContextFlags}; use crate::device::Device; use crate::error::{CudaResult, ToResult}; use bitflags::bitflags; -use sys::{cuDriverGetVersion, cuInit}; +use cust_raw::driver_sys::{cuDriverGetVersion, cuInit}; bitflags! { /// Bit flags for initializing the CUDA driver. Currently, no flags are defined, diff --git a/crates/cust/src/link.rs b/crates/cust/src/link.rs index fc231d9d..26bf7202 100644 --- a/crates/cust/src/link.rs +++ b/crates/cust/src/link.rs @@ -3,7 +3,7 @@ use std::mem::MaybeUninit; use std::ptr::null_mut; -use crate::sys as cuda; +use cust_raw::driver_sys; use crate::error::{CudaResult, ToResult}; @@ -12,7 +12,7 @@ static UNNAMED: &str = "\0"; /// A linker used to link together PTX files into a single module. #[derive(Debug)] pub struct Linker { - raw: cuda::CUlinkState, + raw: driver_sys::CUlinkState, } unsafe impl Send for Linker {} @@ -27,7 +27,7 @@ impl Linker { unsafe { let mut raw = MaybeUninit::uninit(); - cuda::cuLinkCreate_v2(0, null_mut(), null_mut(), raw.as_mut_ptr()).to_result()?; + driver_sys::cuLinkCreate_v2(0, null_mut(), null_mut(), raw.as_mut_ptr()).to_result()?; Ok(Self { raw: raw.assume_init(), }) @@ -48,9 +48,9 @@ impl Linker { let ptx = ptx.as_ref(); unsafe { - cuda::cuLinkAddData_v2( + driver_sys::cuLinkAddData_v2( self.raw, - cuda::CUjitInputType::CU_JIT_INPUT_PTX, + driver_sys::CUjitInputType::CU_JIT_INPUT_PTX, // cuda_sys wants *mut but from the API docs we know we retain ownership so // this cast is sound. ptx.as_ptr() as *mut _, @@ -73,9 +73,9 @@ impl Linker { let cubin = cubin.as_ref(); unsafe { - cuda::cuLinkAddData_v2( + driver_sys::cuLinkAddData_v2( self.raw, - cuda::CUjitInputType::CU_JIT_INPUT_CUBIN, + driver_sys::CUjitInputType::CU_JIT_INPUT_CUBIN, // cuda_sys wants *mut but from the API docs we know we retain ownership so // this cast is sound. cubin.as_ptr() as *mut _, @@ -98,9 +98,9 @@ impl Linker { let fatbin = fatbin.as_ref(); unsafe { - cuda::cuLinkAddData_v2( + driver_sys::cuLinkAddData_v2( self.raw, - cuda::CUjitInputType::CU_JIT_INPUT_FATBINARY, + driver_sys::CUjitInputType::CU_JIT_INPUT_FATBINARY, // cuda_sys wants *mut but from the API docs we know we retain ownership so // this cast is sound. fatbin.as_ptr() as *mut _, @@ -121,7 +121,8 @@ impl Linker { let mut size = MaybeUninit::uninit(); unsafe { - cuda::cuLinkComplete(self.raw, cubin.as_mut_ptr(), size.as_mut_ptr()).to_result()?; + driver_sys::cuLinkComplete(self.raw, cubin.as_mut_ptr(), size.as_mut_ptr()) + .to_result()?; // docs say that CULinkState owns the data, so clone it out before we destroy ourselves. let cubin = cubin.assume_init() as *const u8; let size = size.assume_init(); @@ -136,6 +137,6 @@ impl Linker { impl Drop for Linker { fn drop(&mut self) { - unsafe { cuda::cuLinkDestroy(self.raw) }; + unsafe { driver_sys::cuLinkDestroy(self.raw) }; } } diff --git a/crates/cust/src/memory/array.rs b/crates/cust/src/memory/array.rs index 8f3fa12c..7d543e0c 100644 --- a/crates/cust/src/memory/array.rs +++ b/crates/cust/src/memory/array.rs @@ -3,23 +3,27 @@ //! Detailed documentation about allocating CUDA Arrays can be found in the //! [CUDA Driver API](https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1gc2322c70b38c2984536c90ed118bb1d7) -use crate::context::CurrentContext; -use crate::device::DeviceAttribute; -use crate::error::*; -use crate::sys::cuMemcpy2D_v2; -use crate::sys::cuMemcpyAtoH_v2; -use crate::sys::cuMemcpyHtoA_v2; -use crate::sys::CUDA_MEMCPY2D; -use crate::sys::{self as cuda, CUarray, CUarray_format, CUarray_format_enum}; use std::ffi::c_void; use std::mem; use std::mem::zeroed; use std::mem::ManuallyDrop; use std::mem::MaybeUninit; use std::os::raw::c_uint; +use std::panic; use std::ptr::null; use std::ptr::null_mut; +use cust_raw::driver_sys; +use cust_raw::driver_sys::cuMemcpy2D_v2; +use cust_raw::driver_sys::cuMemcpyAtoH_v2; +use cust_raw::driver_sys::cuMemcpyHtoA_v2; +use cust_raw::driver_sys::CUDA_MEMCPY2D; +use cust_raw::driver_sys::{CUarray, CUarray_format, CUarray_format_enum}; + +use crate::context::CurrentContext; +use crate::device::DeviceAttribute; +use crate::error::*; + /// Describes the format used for a CUDA Array. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum ArrayFormat { @@ -135,6 +139,7 @@ impl ArrayFormat { // there are literally no docs on what nv12 is??? // it seems to be something with multiplanar arrays, needs some investigation CUarray_format_enum::CU_AD_FORMAT_NV12 => panic!("nv12 is not supported yet"), + _ => panic!("Unsupported array format: {:?}", raw), } } @@ -159,19 +164,19 @@ bitflags::bitflags! { pub struct ArrayObjectFlags: c_uint { /// Enables creation of layered CUDA arrays. When this flag is set, depth specifies the /// number of layers, not the depth of a 3D array. - const LAYERED = cuda::CUDA_ARRAY3D_LAYERED; + const LAYERED = driver_sys::CUDA_ARRAY3D_LAYERED; /// Enables surface references to be bound to the CUDA array. - const SURFACE_LDST = cuda::CUDA_ARRAY3D_SURFACE_LDST; + const SURFACE_LDST = driver_sys::CUDA_ARRAY3D_SURFACE_LDST; /// Enables creation of cubemaps. If this flag is set, Width must be equal to Height, and /// Depth must be six. If the `LAYERED` flag is also set, then Depth must be a multiple of /// six. - const CUBEMAP = cuda::CUDA_ARRAY3D_CUBEMAP; + const CUBEMAP = driver_sys::CUDA_ARRAY3D_CUBEMAP; /// Indicates that the CUDA array will be used for texture gather. Texture gather can only /// be performed on 2D CUDA arrays. - const TEXTURE_GATHER = cuda::CUDA_ARRAY3D_TEXTURE_GATHER; + const TEXTURE_GATHER = driver_sys::CUDA_ARRAY3D_TEXTURE_GATHER; } } @@ -185,12 +190,12 @@ impl ArrayObjectFlags { /// Describes a CUDA Array #[derive(Clone, Copy, Debug)] pub struct ArrayDescriptor { - desc: cuda::CUDA_ARRAY3D_DESCRIPTOR, + desc: driver_sys::CUDA_ARRAY3D_DESCRIPTOR, } impl ArrayDescriptor { /// Constructs an ArrayDescriptor from a CUDA Driver API Array Descriptor. - pub fn from_raw(desc: cuda::CUDA_ARRAY3D_DESCRIPTOR) -> Self { + pub fn from_raw(desc: driver_sys::CUDA_ARRAY3D_DESCRIPTOR) -> Self { Self { desc } } @@ -202,7 +207,7 @@ impl ArrayDescriptor { flags: ArrayObjectFlags, ) -> Self { Self { - desc: cuda::CUDA_ARRAY3D_DESCRIPTOR { + desc: driver_sys::CUDA_ARRAY3D_DESCRIPTOR { Width: dims[0], Height: dims[1], Depth: dims[2], @@ -216,7 +221,7 @@ impl ArrayDescriptor { /// Creates a new ArrayDescriptor from a set of dimensions and format. pub fn from_dims_format(dims: [usize; 3], format: ArrayFormat) -> Self { Self { - desc: cuda::CUDA_ARRAY3D_DESCRIPTOR { + desc: driver_sys::CUDA_ARRAY3D_DESCRIPTOR { Width: dims[0], Height: dims[1], Depth: dims[2], @@ -474,7 +479,8 @@ impl ArrayObject { } let mut handle = MaybeUninit::uninit(); - unsafe { cuda::cuArray3DCreate_v2(handle.as_mut_ptr(), &descriptor.desc) }.to_result()?; + unsafe { driver_sys::cuArray3DCreate_v2(handle.as_mut_ptr(), &descriptor.desc) } + .to_result()?; Ok(Self { handle: unsafe { handle.assume_init() }, }) @@ -725,7 +731,7 @@ impl ArrayObject { pub fn descriptor(&self) -> CudaResult { // Use "zeroed" incase CUDA_ARRAY3D_DESCRIPTOR has uninitialized padding let mut raw_descriptor = MaybeUninit::zeroed(); - unsafe { cuda::cuArray3DGetDescriptor_v2(raw_descriptor.as_mut_ptr(), self.handle) } + unsafe { driver_sys::cuArray3DGetDescriptor_v2(raw_descriptor.as_mut_ptr(), self.handle) } .to_result()?; Ok(ArrayDescriptor::from_raw(unsafe { @@ -736,7 +742,7 @@ impl ArrayObject { /// Try to destroy an `ArrayObject`. Can fail - if it does, returns the CUDA error and the /// un-destroyed array object pub fn drop(array: ArrayObject) -> DropResult { - match unsafe { cuda::cuArrayDestroy(array.handle) }.to_result() { + match unsafe { driver_sys::cuArrayDestroy(array.handle) }.to_result() { Ok(()) => Ok(()), Err(e) => Err((e, array)), } @@ -769,14 +775,14 @@ impl ArrayObject { dstArray: self.handle, dstDevice: 0, dstHost: null_mut(), - dstMemoryType: cuda::CUmemorytype_enum::CU_MEMORYTYPE_ARRAY, + dstMemoryType: driver_sys::CUmemorytype_enum::CU_MEMORYTYPE_ARRAY, dstPitch: 0, dstXInBytes: 0, dstY: 0, srcArray: null_mut(), srcDevice: 0, srcHost: val.as_ptr() as *const c_void, - srcMemoryType: cuda::CUmemorytype_enum::CU_MEMORYTYPE_HOST, + srcMemoryType: driver_sys::CUmemorytype_enum::CU_MEMORYTYPE_HOST, srcPitch: 0, srcXInBytes: 0, srcY: 0, @@ -814,14 +820,14 @@ impl ArrayObject { dstArray: null_mut(), dstDevice: 0, dstHost: val.as_mut_ptr() as *mut c_void, - dstMemoryType: cuda::CUmemorytype_enum::CU_MEMORYTYPE_HOST, + dstMemoryType: driver_sys::CUmemorytype_enum::CU_MEMORYTYPE_HOST, dstPitch: 0, dstXInBytes: 0, dstY: 0, srcArray: self.handle, srcDevice: 0, srcHost: null(), - srcMemoryType: cuda::CUmemorytype_enum::CU_MEMORYTYPE_ARRAY, + srcMemoryType: driver_sys::CUmemorytype_enum::CU_MEMORYTYPE_ARRAY, srcPitch: 0, srcXInBytes: 0, srcY: 0, @@ -863,7 +869,7 @@ impl std::fmt::Debug for ArrayObject { impl Drop for ArrayObject { fn drop(&mut self) { - unsafe { cuda::cuArrayDestroy(self.handle) }; + unsafe { driver_sys::cuArrayDestroy(self.handle) }; } } diff --git a/crates/cust/src/memory/device/device_box.rs b/crates/cust/src/memory/device/device_box.rs index cdba519d..acb0d040 100644 --- a/crates/cust/src/memory/device/device_box.rs +++ b/crates/cust/src/memory/device/device_box.rs @@ -1,3 +1,9 @@ +use std::fmt::{self, Pointer}; +use std::mem::{self, ManuallyDrop, MaybeUninit}; +use std::os::raw::c_void; + +use cust_raw::driver_sys; + use crate::error::{CudaResult, DropResult, ToResult}; use crate::memory::device::AsyncCopyDestination; use crate::memory::device::CopyDestination; @@ -5,11 +11,6 @@ use crate::memory::malloc::{cuda_free, cuda_malloc}; use crate::memory::DevicePointer; use crate::memory::{cuda_free_async, cuda_malloc_async, DeviceCopy}; use crate::stream::Stream; -use crate::sys as cuda; -use std::fmt::{self, Pointer}; -use std::mem::{self, ManuallyDrop, MaybeUninit}; - -use std::os::raw::c_void; /// A pointer type for heap-allocation in CUDA device memory. /// @@ -163,7 +164,7 @@ impl DeviceBox { unsafe { let new_box = DeviceBox::uninitialized()?; if mem::size_of::() != 0 { - cuda::cuMemsetD8_v2(new_box.as_device_ptr().as_raw(), 0, mem::size_of::()) + driver_sys::cuMemsetD8_v2(new_box.as_device_ptr().as_raw(), 0, mem::size_of::()) .to_result()?; } Ok(new_box) @@ -205,7 +206,7 @@ impl DeviceBox { pub unsafe fn zeroed_async(stream: &Stream) -> CudaResult { let new_box = DeviceBox::uninitialized_async(stream)?; if mem::size_of::() != 0 { - cuda::cuMemsetD8Async( + driver_sys::cuMemsetD8Async( new_box.as_device_ptr().as_raw(), 0, mem::size_of::(), @@ -292,7 +293,7 @@ impl DeviceBox { /// let ptr = DeviceBox::into_device(x).as_raw_mut(); /// let x = unsafe { DeviceBox::from_raw(ptr) }; /// ``` - pub unsafe fn from_raw(ptr: cust_raw::CUdeviceptr) -> Self { + pub unsafe fn from_raw(ptr: driver_sys::CUdeviceptr) -> Self { DeviceBox { ptr: DevicePointer::from_raw(ptr), } @@ -429,8 +430,12 @@ impl CopyDestination for DeviceBox { let size = mem::size_of::(); if size != 0 { unsafe { - cuda::cuMemcpyHtoD_v2(self.ptr.as_raw(), val as *const T as *const c_void, size) - .to_result()? + driver_sys::cuMemcpyHtoD_v2( + self.ptr.as_raw(), + val as *const T as *const c_void, + size, + ) + .to_result()? } } Ok(()) @@ -440,7 +445,7 @@ impl CopyDestination for DeviceBox { let size = mem::size_of::(); if size != 0 { unsafe { - cuda::cuMemcpyDtoH_v2(val as *const T as *mut c_void, self.ptr.as_raw(), size) + driver_sys::cuMemcpyDtoH_v2(val as *const T as *mut c_void, self.ptr.as_raw(), size) .to_result()? } } @@ -451,7 +456,10 @@ impl CopyDestination> for DeviceBox { fn copy_from(&mut self, val: &DeviceBox) -> CudaResult<()> { let size = mem::size_of::(); if size != 0 { - unsafe { cuda::cuMemcpyDtoD_v2(self.ptr.as_raw(), val.ptr.as_raw(), size).to_result()? } + unsafe { + driver_sys::cuMemcpyDtoD_v2(self.ptr.as_raw(), val.ptr.as_raw(), size) + .to_result()? + } } Ok(()) } @@ -459,7 +467,10 @@ impl CopyDestination> for DeviceBox { fn copy_to(&self, val: &mut DeviceBox) -> CudaResult<()> { let size = mem::size_of::(); if size != 0 { - unsafe { cuda::cuMemcpyDtoD_v2(val.ptr.as_raw(), self.ptr.as_raw(), size).to_result()? } + unsafe { + driver_sys::cuMemcpyDtoD_v2(val.ptr.as_raw(), self.ptr.as_raw(), size) + .to_result()? + } } Ok(()) } @@ -468,7 +479,7 @@ impl AsyncCopyDestination for DeviceBox { unsafe fn async_copy_from(&mut self, val: &T, stream: &Stream) -> CudaResult<()> { let size = mem::size_of::(); if size != 0 { - cuda::cuMemcpyHtoDAsync_v2( + driver_sys::cuMemcpyHtoDAsync_v2( self.ptr.as_raw(), val as *const _ as *const c_void, size, @@ -482,7 +493,7 @@ impl AsyncCopyDestination for DeviceBox { unsafe fn async_copy_to(&self, val: &mut T, stream: &Stream) -> CudaResult<()> { let size = mem::size_of::(); if size != 0 { - cuda::cuMemcpyDtoHAsync_v2( + driver_sys::cuMemcpyDtoHAsync_v2( val as *mut _ as *mut c_void, self.ptr.as_raw(), size, @@ -497,8 +508,13 @@ impl AsyncCopyDestination> for DeviceBox { unsafe fn async_copy_from(&mut self, val: &DeviceBox, stream: &Stream) -> CudaResult<()> { let size = mem::size_of::(); if size != 0 { - cuda::cuMemcpyDtoDAsync_v2(self.ptr.as_raw(), val.ptr.as_raw(), size, stream.as_inner()) - .to_result()? + driver_sys::cuMemcpyDtoDAsync_v2( + self.ptr.as_raw(), + val.ptr.as_raw(), + size, + stream.as_inner(), + ) + .to_result()? } Ok(()) } @@ -506,8 +522,13 @@ impl AsyncCopyDestination> for DeviceBox { unsafe fn async_copy_to(&self, val: &mut DeviceBox, stream: &Stream) -> CudaResult<()> { let size = mem::size_of::(); if size != 0 { - cuda::cuMemcpyDtoDAsync_v2(val.ptr.as_raw(), self.ptr.as_raw(), size, stream.as_inner()) - .to_result()? + driver_sys::cuMemcpyDtoDAsync_v2( + val.ptr.as_raw(), + self.ptr.as_raw(), + size, + stream.as_inner(), + ) + .to_result()? } Ok(()) } diff --git a/crates/cust/src/memory/device/device_buffer.rs b/crates/cust/src/memory/device/device_buffer.rs index 4de5e72b..6fc5dde6 100644 --- a/crates/cust/src/memory/device/device_buffer.rs +++ b/crates/cust/src/memory/device/device_buffer.rs @@ -1,16 +1,18 @@ +use std::mem::{self, align_of, size_of, transmute, ManuallyDrop}; +use std::ops::{Deref, DerefMut}; + +#[cfg(feature = "bytemuck")] +pub use bytemuck; +#[cfg(feature = "bytemuck")] +use bytemuck::{Pod, PodCastError, Zeroable}; +use cust_raw::driver_sys; + use crate::error::{CudaResult, DropResult, ToResult}; use crate::memory::device::{AsyncCopyDestination, CopyDestination, DeviceSlice}; use crate::memory::malloc::{cuda_free, cuda_malloc}; use crate::memory::{cuda_free_async, DevicePointer}; use crate::memory::{cuda_malloc_async, DeviceCopy}; use crate::stream::Stream; -use crate::sys as cuda; -#[cfg(feature = "bytemuck")] -pub use bytemuck; -#[cfg(feature = "bytemuck")] -use bytemuck::{Pod, PodCastError, Zeroable}; -use std::mem::{self, align_of, size_of, transmute, ManuallyDrop}; -use std::ops::{Deref, DerefMut}; /// Fixed-size device-side buffer. Provides basic access to device memory. #[derive(Debug)] @@ -230,8 +232,12 @@ impl DeviceBuffer { unsafe { let new_buf = DeviceBuffer::uninitialized(size)?; if size_of::() != 0 { - cuda::cuMemsetD8_v2(new_buf.as_device_ptr().as_raw(), 0, size_of::() * size) - .to_result()?; + driver_sys::cuMemsetD8_v2( + new_buf.as_device_ptr().as_raw(), + 0, + size_of::() * size, + ) + .to_result()?; } Ok(new_buf) } @@ -272,7 +278,7 @@ impl DeviceBuffer { pub unsafe fn zeroed_async(size: usize, stream: &Stream) -> CudaResult { let new_buf = DeviceBuffer::uninitialized_async(size, stream)?; if size_of::() != 0 { - cuda::cuMemsetD8Async( + driver_sys::cuMemsetD8Async( new_buf.as_device_ptr().as_raw(), 0, size_of::() * size, @@ -437,6 +443,7 @@ mod test_device_buffer { use super::*; use crate::stream::{Stream, StreamFlags}; + #[expect(dead_code)] #[derive(Clone, Copy, Debug)] struct ZeroSizedType; unsafe impl DeviceCopy for ZeroSizedType {} diff --git a/crates/cust/src/memory/device/device_slice.rs b/crates/cust/src/memory/device/device_slice.rs index 42b07ca6..702b9d04 100644 --- a/crates/cust/src/memory/device/device_slice.rs +++ b/crates/cust/src/memory/device/device_slice.rs @@ -1,12 +1,3 @@ -use crate::error::{CudaResult, ToResult}; -use crate::memory::device::AsyncCopyDestination; -use crate::memory::device::{CopyDestination, DeviceBuffer}; -use crate::memory::DevicePointer; -use crate::memory::{DeviceCopy, DeviceMemory}; -use crate::stream::Stream; -use crate::sys as cuda; -#[cfg(feature = "bytemuck")] -use bytemuck::{Pod, Zeroable}; use std::fmt::{self, Debug, Formatter}; use std::marker::PhantomData; use std::ops::{ @@ -16,6 +7,17 @@ use std::os::raw::c_void; use std::ptr::{slice_from_raw_parts, slice_from_raw_parts_mut}; use std::slice; +#[cfg(feature = "bytemuck")] +use bytemuck::{Pod, Zeroable}; +use cust_raw::driver_sys; + +use crate::error::{CudaResult, ToResult}; +use crate::memory::device::AsyncCopyDestination; +use crate::memory::device::{CopyDestination, DeviceBuffer}; +use crate::memory::DevicePointer; +use crate::memory::{DeviceCopy, DeviceMemory}; +use crate::stream::Stream; + /// Fixed-size device-side slice. #[repr(transparent)] pub struct DeviceSlice { @@ -247,7 +249,9 @@ impl DeviceSlice { // SAFETY: We know T can hold any value because it is `Pod`, and // sub-byte alignment isn't a thing so we know the alignment is right. - unsafe { cuda::cuMemsetD8_v2(self.as_raw_ptr(), value, self.size_in_bytes()).to_result() } + unsafe { + driver_sys::cuMemsetD8_v2(self.as_raw_ptr(), value, self.size_in_bytes()).to_result() + } } /// Sets the memory range of this buffer to contiguous `8-bit` values of `value` asynchronously. @@ -264,7 +268,7 @@ impl DeviceSlice { return Ok(()); } - cuda::cuMemsetD8Async( + driver_sys::cuMemsetD8Async( self.as_raw_ptr(), value, self.size_in_bytes(), @@ -296,7 +300,7 @@ impl DeviceSlice { 0, "Buffer pointer is not aligned to at least 2 bytes!" ); - unsafe { cuda::cuMemsetD16_v2(self.as_raw_ptr(), value, data_len / 2).to_result() } + unsafe { driver_sys::cuMemsetD16_v2(self.as_raw_ptr(), value, data_len / 2).to_result() } } /// Sets the memory range of this buffer to contiguous `16-bit` values of `value` asynchronously. @@ -327,7 +331,7 @@ impl DeviceSlice { 0, "Buffer pointer is not aligned to at least 2 bytes!" ); - cuda::cuMemsetD16Async(self.as_raw_ptr(), value, data_len / 2, stream.as_inner()) + driver_sys::cuMemsetD16Async(self.as_raw_ptr(), value, data_len / 2, stream.as_inner()) .to_result() } @@ -354,7 +358,7 @@ impl DeviceSlice { 0, "Buffer pointer is not aligned to at least 4 bytes!" ); - unsafe { cuda::cuMemsetD32_v2(self.as_raw_ptr(), value, data_len / 4).to_result() } + unsafe { driver_sys::cuMemsetD32_v2(self.as_raw_ptr(), value, data_len / 4).to_result() } } /// Sets the memory range of this buffer to contiguous `32-bit` values of `value` asynchronously. @@ -385,7 +389,7 @@ impl DeviceSlice { 0, "Buffer pointer is not aligned to at least 4 bytes!" ); - cuda::cuMemsetD32Async(self.as_raw_ptr(), value, data_len / 4, stream.as_inner()) + driver_sys::cuMemsetD32Async(self.as_raw_ptr(), value, data_len / 4, stream.as_inner()) .to_result() } } @@ -647,7 +651,7 @@ impl + AsMut<[T]> + ?Sized> CopyDestination for let size = self.size_in_bytes(); if size != 0 { unsafe { - cuda::cuMemcpyHtoD_v2(self.as_raw_ptr(), val.as_ptr() as *const c_void, size) + driver_sys::cuMemcpyHtoD_v2(self.as_raw_ptr(), val.as_ptr() as *const c_void, size) .to_result()? } } @@ -663,8 +667,12 @@ impl + AsMut<[T]> + ?Sized> CopyDestination for let size = self.size_in_bytes(); if size != 0 { unsafe { - cuda::cuMemcpyDtoH_v2(val.as_mut_ptr() as *mut c_void, self.as_raw_ptr(), size) - .to_result()? + driver_sys::cuMemcpyDtoH_v2( + val.as_mut_ptr() as *mut c_void, + self.as_raw_ptr(), + size, + ) + .to_result()? } } Ok(()) @@ -678,7 +686,10 @@ impl CopyDestination> for DeviceSlice { ); let size = self.size_in_bytes(); if size != 0 { - unsafe { cuda::cuMemcpyDtoD_v2(self.as_raw_ptr(), val.as_raw_ptr(), size).to_result()? } + unsafe { + driver_sys::cuMemcpyDtoD_v2(self.as_raw_ptr(), val.as_raw_ptr(), size) + .to_result()? + } } Ok(()) } @@ -690,7 +701,10 @@ impl CopyDestination> for DeviceSlice { ); let size = self.size_in_bytes(); if size != 0 { - unsafe { cuda::cuMemcpyDtoD_v2(val.as_raw_ptr(), self.as_raw_ptr(), size).to_result()? } + unsafe { + driver_sys::cuMemcpyDtoD_v2(val.as_raw_ptr(), self.as_raw_ptr(), size) + .to_result()? + } } Ok(()) } @@ -715,7 +729,7 @@ impl + AsMut<[T]> + ?Sized> AsyncCopyDestination ); let size = self.size_in_bytes(); if size != 0 { - cuda::cuMemcpyHtoDAsync_v2( + driver_sys::cuMemcpyHtoDAsync_v2( self.as_raw_ptr(), val.as_ptr() as *const c_void, size, @@ -734,7 +748,7 @@ impl + AsMut<[T]> + ?Sized> AsyncCopyDestination ); let size = self.size_in_bytes(); if size != 0 { - cuda::cuMemcpyDtoHAsync_v2( + driver_sys::cuMemcpyDtoHAsync_v2( val.as_mut_ptr() as *mut c_void, self.as_raw_ptr(), size, @@ -753,8 +767,13 @@ impl AsyncCopyDestination> for DeviceSlice { ); let size = self.size_in_bytes(); if size != 0 { - cuda::cuMemcpyDtoDAsync_v2(self.as_raw_ptr(), val.as_raw_ptr(), size, stream.as_inner()) - .to_result()? + driver_sys::cuMemcpyDtoDAsync_v2( + self.as_raw_ptr(), + val.as_raw_ptr(), + size, + stream.as_inner(), + ) + .to_result()? } Ok(()) } @@ -766,8 +785,13 @@ impl AsyncCopyDestination> for DeviceSlice { ); let size = self.size_in_bytes(); if size != 0 { - cuda::cuMemcpyDtoDAsync_v2(val.as_raw_ptr(), self.as_raw_ptr(), size, stream.as_inner()) - .to_result()? + driver_sys::cuMemcpyDtoDAsync_v2( + val.as_raw_ptr(), + self.as_raw_ptr(), + size, + stream.as_inner(), + ) + .to_result()? } Ok(()) } diff --git a/crates/cust/src/memory/malloc.rs b/crates/cust/src/memory/malloc.rs index c0965c33..78f1f356 100644 --- a/crates/cust/src/memory/malloc.rs +++ b/crates/cust/src/memory/malloc.rs @@ -1,12 +1,14 @@ +use std::mem; +use std::os::raw::c_void; +use std::ptr; + +use cust_raw::driver_sys; + use super::DeviceCopy; use crate::error::*; use crate::memory::DevicePointer; use crate::memory::UnifiedPointer; use crate::prelude::Stream; -use crate::sys as cuda; -use std::mem; -use std::os::raw::c_void; -use std::ptr; /// Unsafe wrapper around the `cuMemAlloc` function, which allocates some device memory and /// returns a [`DevicePointer`](struct.DevicePointer.html) pointing to it. The memory is not cleared. @@ -46,7 +48,7 @@ pub unsafe fn cuda_malloc(count: usize) -> CudaResult( } let mut ptr: *mut c_void = ptr::null_mut(); - cuda::cuMemAllocAsync( + driver_sys::cuMemAllocAsync( &mut ptr as *mut *mut c_void as *mut u64, size, stream.as_inner(), ) .to_result()?; let ptr = ptr as *mut T; - Ok(DevicePointer::from_raw(ptr as cuda::CUdeviceptr)) + Ok(DevicePointer::from_raw(ptr as driver_sys::CUdeviceptr)) } /// Unsafe wrapper around `cuMemFreeAsync` which queues a memory allocation free operation on a stream. @@ -95,7 +97,7 @@ pub unsafe fn cuda_free_async( return Err(CudaError::InvalidMemoryAllocation); } - cuda::cuMemFreeAsync(p.as_raw(), stream.as_inner()).to_result() + driver_sys::cuMemFreeAsync(p.as_raw(), stream.as_inner()).to_result() } /// Unsafe wrapper around the `cuMemAllocManaged` function, which allocates some unified memory and @@ -138,10 +140,10 @@ pub unsafe fn cuda_malloc_unified(count: usize) -> CudaResult( let mut ptr = 0; let mut pitch = 0; - cuda::cuMemAllocPitch_v2(&mut ptr, &mut pitch, width_bytes, height, element_size) + driver_sys::cuMemAllocPitch_v2(&mut ptr, &mut pitch, width_bytes, height, element_size) .to_result()?; Ok((DevicePointer::from_raw(ptr), pitch)) } @@ -234,7 +236,7 @@ pub unsafe fn cuda_free(ptr: DevicePointer) -> CudaResult<()> return Err(CudaError::InvalidMemoryAllocation); } - cuda::cuMemFree_v2(ptr.as_raw()).to_result()?; + driver_sys::cuMemFree_v2(ptr.as_raw()).to_result()?; Ok(()) } @@ -267,7 +269,7 @@ pub unsafe fn cuda_free_unified(mut p: UnifiedPointer) -> Cuda return Err(CudaError::InvalidMemoryAllocation); } - cuda::cuMemFree_v2(ptr as u64).to_result()?; + driver_sys::cuMemFree_v2(ptr as u64).to_result()?; Ok(()) } @@ -309,7 +311,7 @@ pub unsafe fn cuda_malloc_locked(count: usize) -> CudaResult<*mut T> { } let mut ptr: *mut c_void = ptr::null_mut(); - cuda::cuMemAllocHost_v2(&mut ptr as *mut *mut c_void, size).to_result()?; + driver_sys::cuMemAllocHost_v2(&mut ptr as *mut *mut c_void, size).to_result()?; let ptr = ptr as *mut T; Ok(ptr) } @@ -342,7 +344,7 @@ pub unsafe fn cuda_free_locked(ptr: *mut T) -> CudaResult<()> { return Err(CudaError::InvalidMemoryAllocation); } - cuda::cuMemFreeHost(ptr as *mut c_void).to_result()?; + driver_sys::cuMemFreeHost(ptr as *mut c_void).to_result()?; Ok(()) } diff --git a/crates/cust/src/memory/mod.rs b/crates/cust/src/memory/mod.rs index 152f1cfe..d9fd4838 100644 --- a/crates/cust/src/memory/mod.rs +++ b/crates/cust/src/memory/mod.rs @@ -97,6 +97,8 @@ pub use cust_core::_hidden::DeviceCopy; use std::ffi::c_void; +use cust_raw::driver_sys; + /// A trait describing a generic buffer that can be accessed from the GPU. This could be either a [`UnifiedBuffer`] /// or a regular [`DeviceBuffer`]. #[allow(clippy::len_without_is_empty)] @@ -147,14 +149,14 @@ impl GpuBox for UnifiedBox { /// a size, used to be generic over DeviceBox, DeviceBuffer, DeviceVariable etc. pub trait DeviceMemory { /// Get the raw cuda device pointer - fn as_raw_ptr(&self) -> cust_raw::CUdeviceptr; + fn as_raw_ptr(&self) -> driver_sys::CUdeviceptr; /// Get the size of the memory region in bytes fn size_in_bytes(&self) -> usize; } impl DeviceMemory for DeviceBox { - fn as_raw_ptr(&self) -> cust_raw::CUdeviceptr { + fn as_raw_ptr(&self) -> driver_sys::CUdeviceptr { self.as_device_ptr().as_raw() } @@ -164,7 +166,7 @@ impl DeviceMemory for DeviceBox { } impl DeviceMemory for DeviceVariable { - fn as_raw_ptr(&self) -> cust_raw::CUdeviceptr { + fn as_raw_ptr(&self) -> driver_sys::CUdeviceptr { self.as_device_ptr().as_raw() } @@ -174,7 +176,7 @@ impl DeviceMemory for DeviceVariable { } impl DeviceMemory for DeviceBuffer { - fn as_raw_ptr(&self) -> cust_raw::CUdeviceptr { + fn as_raw_ptr(&self) -> driver_sys::CUdeviceptr { self.as_device_ptr().as_raw() } @@ -184,7 +186,7 @@ impl DeviceMemory for DeviceBuffer { } impl DeviceMemory for DeviceSlice { - fn as_raw_ptr(&self) -> cust_raw::CUdeviceptr { + fn as_raw_ptr(&self) -> driver_sys::CUdeviceptr { self.as_device_ptr().as_raw() } @@ -206,11 +208,11 @@ mod private { /// Simple wrapper over cuMemcpyHtoD_v2 #[allow(clippy::missing_safety_doc)] pub unsafe fn memcpy_htod( - d_ptr: cust_raw::CUdeviceptr, + d_ptr: driver_sys::CUdeviceptr, src_ptr: *const c_void, size: usize, ) -> CudaResult<()> { - crate::sys::cuMemcpyHtoD_v2(d_ptr, src_ptr, size).to_result()?; + driver_sys::cuMemcpyHtoD_v2(d_ptr, src_ptr, size).to_result()?; Ok(()) } @@ -218,10 +220,10 @@ pub unsafe fn memcpy_htod( #[allow(clippy::missing_safety_doc)] pub unsafe fn memcpy_dtoh( d_ptr: *mut c_void, - src_ptr: cust_raw::CUdeviceptr, + src_ptr: driver_sys::CUdeviceptr, size: usize, ) -> CudaResult<()> { - crate::sys::cuMemcpyDtoH_v2(d_ptr, src_ptr, size).to_result()?; + driver_sys::cuMemcpyDtoH_v2(d_ptr, src_ptr, size).to_result()?; Ok(()) } @@ -282,32 +284,32 @@ pub unsafe fn memcpy_2d_htod( width: usize, height: usize, ) -> CudaResult<()> { - use cust_raw::CUmemorytype; + use cust_raw::driver_sys::CUmemorytype; let width_in_bytes = width .checked_mul(std::mem::size_of::()) .ok_or(CudaError::InvalidMemoryAllocation)?; - let pcopy = cust_raw::CUDA_MEMCPY2D_st { + let pcopy = driver_sys::CUDA_MEMCPY2D_st { srcXInBytes: 0, srcY: 0, srcMemoryType: CUmemorytype::CU_MEMORYTYPE_HOST, srcHost: src as *const c_void, - srcDevice: 0, // Ignored - srcArray: std::ptr::null_mut::(), // Ignored + srcDevice: 0, // Ignored + srcArray: std::ptr::null_mut::(), // Ignored srcPitch: spitch, dstXInBytes: 0, dstY: 0, dstMemoryType: CUmemorytype::CU_MEMORYTYPE_DEVICE, dstHost: std::ptr::null_mut::(), // Ignored dstDevice: dst.as_raw(), - dstArray: std::ptr::null_mut::(), // Ignored + dstArray: std::ptr::null_mut::(), // Ignored dstPitch: dpitch, WidthInBytes: width_in_bytes, Height: height, }; - crate::sys::cuMemcpy2D_v2(&pcopy).to_result()?; + driver_sys::cuMemcpy2D_v2(&pcopy).to_result()?; Ok(()) } @@ -368,32 +370,32 @@ pub unsafe fn memcpy_2d_dtoh( width: usize, height: usize, ) -> CudaResult<()> { - use cust_raw::CUmemorytype; + use cust_raw::driver_sys::CUmemorytype; let width_in_bytes = width .checked_mul(std::mem::size_of::()) .ok_or(CudaError::InvalidMemoryAllocation)?; - let pcopy = cust_raw::CUDA_MEMCPY2D_st { + let pcopy = driver_sys::CUDA_MEMCPY2D_st { srcXInBytes: 0, srcY: 0, srcMemoryType: CUmemorytype::CU_MEMORYTYPE_DEVICE, srcHost: std::ptr::null_mut::(), // Ignored srcDevice: src.as_raw(), - srcArray: std::ptr::null_mut::(), // Ignored + srcArray: std::ptr::null_mut::(), // Ignored srcPitch: spitch, dstXInBytes: 0, dstY: 0, dstMemoryType: CUmemorytype::CU_MEMORYTYPE_HOST, dstHost: dst as *mut c_void, - dstDevice: 0, // Ignored - dstArray: std::ptr::null_mut::(), // Ignored + dstDevice: 0, // Ignored + dstArray: std::ptr::null_mut::(), // Ignored dstPitch: dpitch, WidthInBytes: width_in_bytes, Height: height, }; - crate::sys::cuMemcpy2D_v2(&pcopy).to_result()?; + driver_sys::cuMemcpy2D_v2(&pcopy).to_result()?; Ok(()) } @@ -407,7 +409,7 @@ pub fn mem_get_info() -> CudaResult<(usize, usize)> { let mut mem_free = 0; let mut mem_total = 0; unsafe { - crate::sys::cuMemGetInfo_v2(&mut mem_free, &mut mem_total).to_result()?; + driver_sys::cuMemGetInfo_v2(&mut mem_free, &mut mem_total).to_result()?; } Ok((mem_free, mem_total)) } diff --git a/crates/cust/src/memory/pointer.rs b/crates/cust/src/memory/pointer.rs index 2b211bb8..baafc417 100644 --- a/crates/cust/src/memory/pointer.rs +++ b/crates/cust/src/memory/pointer.rs @@ -1,6 +1,3 @@ -use crate::memory::DeviceCopy; -use cust_raw::CUdeviceptr; - use core::{ fmt::{self, Debug, Pointer}, hash::Hash, @@ -10,6 +7,9 @@ use std::ffi::c_void; use std::marker::PhantomData; use std::mem::size_of; +use cust_raw::driver_sys::CUdeviceptr; + +use crate::memory::DeviceCopy; /// A pointer to device memory. /// /// `DevicePointer` cannot be dereferenced by the CPU, as it is a pointer to a memory allocation in diff --git a/crates/cust/src/memory/unified.rs b/crates/cust/src/memory/unified.rs index 311fc7ac..d67693e5 100644 --- a/crates/cust/src/memory/unified.rs +++ b/crates/cust/src/memory/unified.rs @@ -1,12 +1,3 @@ -use super::DeviceCopy; -use crate::device::Device; -#[allow(unused_imports)] -use crate::device::DeviceAttribute; -use crate::error::*; -use crate::memory::malloc::{cuda_free_unified, cuda_malloc_unified}; -use crate::memory::UnifiedPointer; -use crate::prelude::Stream; -use crate::sys as cuda; use std::borrow::{Borrow, BorrowMut}; use std::cmp::Ordering; use std::convert::{AsMut, AsRef}; @@ -17,6 +8,17 @@ use std::ops::{Deref, DerefMut}; use std::ptr; use std::slice; +use cust_raw::driver_sys; + +use super::DeviceCopy; +use crate::device::Device; +#[allow(unused_imports)] +use crate::device::DeviceAttribute; +use crate::error::*; +use crate::memory::malloc::{cuda_free_unified, cuda_malloc_unified}; +use crate::memory::UnifiedPointer; +use crate::prelude::Stream; + /// A pointer type for heap-allocation in CUDA unified memory. /// /// See the [`module-level documentation`](../memory/index.html) for more information on unified @@ -638,8 +640,8 @@ pub trait MemoryAdvise: private::Sealed { let mem_size = std::mem::size_of_val(slice); unsafe { - cuda::cuMemPrefetchAsync( - slice.as_ptr() as cuda::CUdeviceptr, + driver_sys::cuMemPrefetchAsync( + slice.as_ptr() as driver_sys::CUdeviceptr, mem_size, -1, // CU_DEVICE_CPU #define stream.as_inner(), @@ -675,8 +677,8 @@ pub trait MemoryAdvise: private::Sealed { let mem_size = std::mem::size_of_val(slice); unsafe { - cuda::cuMemPrefetchAsync( - slice.as_ptr() as cuda::CUdeviceptr, + driver_sys::cuMemPrefetchAsync( + slice.as_ptr() as driver_sys::CUdeviceptr, mem_size, device.as_raw(), stream.as_inner(), @@ -701,14 +703,19 @@ pub trait MemoryAdvise: private::Sealed { let mem_size = std::mem::size_of_val(slice); let advice = if read_mostly { - cuda::CUmem_advise::CU_MEM_ADVISE_SET_READ_MOSTLY + driver_sys::CUmem_advise::CU_MEM_ADVISE_SET_READ_MOSTLY } else { - cuda::CUmem_advise::CU_MEM_ADVISE_UNSET_READ_MOSTLY + driver_sys::CUmem_advise::CU_MEM_ADVISE_UNSET_READ_MOSTLY }; unsafe { - cuda::cuMemAdvise(slice.as_ptr() as cuda::CUdeviceptr, mem_size, advice, 0) - .to_result()?; + driver_sys::cuMemAdvise( + slice.as_ptr() as driver_sys::CUdeviceptr, + mem_size, + advice, + 0, + ) + .to_result()?; } Ok(()) } @@ -737,10 +744,10 @@ pub trait MemoryAdvise: private::Sealed { let mem_size = std::mem::size_of_val(slice); unsafe { - cuda::cuMemAdvise( - slice.as_ptr() as cuda::CUdeviceptr, + driver_sys::cuMemAdvise( + slice.as_ptr() as driver_sys::CUdeviceptr, mem_size, - cuda::CUmem_advise::CU_MEM_ADVISE_SET_PREFERRED_LOCATION, + driver_sys::CUmem_advise::CU_MEM_ADVISE_SET_PREFERRED_LOCATION, preferred_location.map(|d| d.as_raw()).unwrap_or(-1), ) .to_result()?; @@ -754,10 +761,10 @@ pub trait MemoryAdvise: private::Sealed { let mem_size = std::mem::size_of_val(slice); unsafe { - cuda::cuMemAdvise( - slice.as_ptr() as cuda::CUdeviceptr, + driver_sys::cuMemAdvise( + slice.as_ptr() as driver_sys::CUdeviceptr, mem_size, - cuda::CUmem_advise::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION, + driver_sys::CUmem_advise::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION, 0, ) .to_result()?; diff --git a/crates/cust/src/module.rs b/crates/cust/src/module.rs index 7ad0e0ea..815cbd5c 100644 --- a/crates/cust/src/module.rs +++ b/crates/cust/src/module.rs @@ -1,9 +1,5 @@ //! Functions and types for working with CUDA modules. -use crate::error::{CudaResult, DropResult, ToResult}; -use crate::function::Function; -use crate::memory::{CopyDestination, DeviceCopy, DevicePointer}; -use crate::sys as cuda; use std::ffi::{c_void, CStr, CString}; use std::fmt; use std::marker::PhantomData; @@ -12,10 +8,16 @@ use std::os::raw::c_uint; use std::path::Path; use std::ptr; +use cust_raw::driver_sys; + +use crate::error::{CudaResult, DropResult, ToResult}; +use crate::function::Function; +use crate::memory::{CopyDestination, DeviceCopy, DevicePointer}; + /// A compiled CUDA module, loaded into a context. #[derive(Debug)] pub struct Module { - inner: cuda::CUmodule, + inner: driver_sys::CUmodule, } unsafe impl Send for Module {} @@ -91,7 +93,7 @@ pub enum ModuleJitOption { } impl ModuleJitOption { - pub fn into_raw(opts: &[Self]) -> (Vec, Vec<*mut c_void>) { + pub fn into_raw(opts: &[Self]) -> (Vec, Vec<*mut c_void>) { // And here we stumble across one of the most horrific things i have ever seen in my entire // journey of working with many parts of CUDA. As a background, CUDA usually wants an array // of pointers to values when it takes void**, after all, this is what is expected by anyone. @@ -107,30 +109,30 @@ impl ModuleJitOption { for opt in opts { match opt { Self::MaxRegisters(regs) => { - raw_opts.push(cuda::CUjit_option::CU_JIT_MAX_REGISTERS); + raw_opts.push(driver_sys::CUjit_option::CU_JIT_MAX_REGISTERS); raw_vals.push(*regs as usize as *mut c_void); } Self::OptLevel(level) => { - raw_opts.push(cuda::CUjit_option::CU_JIT_OPTIMIZATION_LEVEL); + raw_opts.push(driver_sys::CUjit_option::CU_JIT_OPTIMIZATION_LEVEL); raw_vals.push(*level as usize as *mut c_void); } Self::DetermineTargetFromContext => { - raw_opts.push(cuda::CUjit_option::CU_JIT_TARGET_FROM_CUCONTEXT); + raw_opts.push(driver_sys::CUjit_option::CU_JIT_TARGET_FROM_CUCONTEXT); } Self::Target(target) => { - raw_opts.push(cuda::CUjit_option::CU_JIT_TARGET); + raw_opts.push(driver_sys::CUjit_option::CU_JIT_TARGET); raw_vals.push(*target as usize as *mut c_void); } Self::Fallback(fallback) => { - raw_opts.push(cuda::CUjit_option::CU_JIT_FALLBACK_STRATEGY); + raw_opts.push(driver_sys::CUjit_option::CU_JIT_FALLBACK_STRATEGY); raw_vals.push(*fallback as usize as *mut c_void); } Self::GenenerateDebugInfo(gen) => { - raw_opts.push(cuda::CUjit_option::CU_JIT_GENERATE_DEBUG_INFO); + raw_opts.push(driver_sys::CUjit_option::CU_JIT_GENERATE_DEBUG_INFO); raw_vals.push(*gen as usize as *mut c_void); } Self::GenerateLineInfo(gen) => { - raw_opts.push(cuda::CUjit_option::CU_JIT_GENERATE_LINE_INFO); + raw_opts.push(driver_sys::CUjit_option::CU_JIT_GENERATE_LINE_INFO); raw_vals.push(*gen as usize as *mut c_void) } } @@ -179,8 +181,8 @@ impl Module { let mut module = Module { inner: ptr::null_mut(), }; - cuda::cuModuleLoad( - &mut module.inner as *mut cuda::CUmodule, + driver_sys::cuModuleLoad( + &mut module.inner as *mut driver_sys::CUmodule, bytes.as_ptr() as *const _, ) .to_result()?; @@ -253,8 +255,8 @@ impl Module { inner: ptr::null_mut(), }; let (mut options, mut option_values) = ModuleJitOption::into_raw(options); - cuda::cuModuleLoadDataEx( - &mut module.inner as *mut cuda::CUmodule, + driver_sys::cuModuleLoadDataEx( + &mut module.inner as *mut driver_sys::CUmodule, image, options.len() as c_uint, options.as_mut_ptr(), @@ -347,8 +349,8 @@ impl Module { let mut module = Module { inner: ptr::null_mut(), }; - cuda::cuModuleLoadData( - &mut module.inner as *mut cuda::CUmodule, + driver_sys::cuModuleLoadData( + &mut module.inner as *mut driver_sys::CUmodule, image.as_ptr() as *const c_void, ) .to_result()?; @@ -388,8 +390,8 @@ impl Module { let mut ptr: DevicePointer = DevicePointer::null(); let mut size: usize = 0; - cuda::cuModuleGetGlobal_v2( - &mut ptr as *mut DevicePointer as *mut cuda::CUdeviceptr, + driver_sys::cuModuleGetGlobal_v2( + &mut ptr as *mut DevicePointer as *mut driver_sys::CUdeviceptr, &mut size as *mut usize, self.inner, name.as_ptr(), @@ -425,10 +427,10 @@ impl Module { unsafe { let name = name.as_ref(); let cstr = CString::new(name).expect("Argument to get_function had a nul"); - let mut func: cuda::CUfunction = ptr::null_mut(); + let mut func: driver_sys::CUfunction = ptr::null_mut(); - cuda::cuModuleGetFunction( - &mut func as *mut cuda::CUfunction, + driver_sys::cuModuleGetFunction( + &mut func as *mut driver_sys::CUfunction, self.inner, cstr.as_ptr(), ) @@ -471,7 +473,7 @@ impl Module { unsafe { let inner = mem::replace(&mut module.inner, ptr::null_mut()); - match cuda::cuModuleUnload(inner).to_result() { + match driver_sys::cuModuleUnload(inner).to_result() { Ok(()) => { mem::forget(module); Ok(()) @@ -489,7 +491,7 @@ impl Drop for Module { unsafe { // No choice but to panic if this fails... let module = mem::replace(&mut self.inner, ptr::null_mut()); - cuda::cuModuleUnload(module); + driver_sys::cuModuleUnload(module); } } } @@ -511,8 +513,12 @@ impl CopyDestination for Symbol<'_, T> { let size = mem::size_of::(); if size != 0 { unsafe { - cuda::cuMemcpyHtoD_v2(self.ptr.as_raw(), val as *const T as *const c_void, size) - .to_result()? + driver_sys::cuMemcpyHtoD_v2( + self.ptr.as_raw(), + val as *const T as *const c_void, + size, + ) + .to_result()? } } Ok(()) @@ -522,7 +528,7 @@ impl CopyDestination for Symbol<'_, T> { let size = mem::size_of::(); if size != 0 { unsafe { - cuda::cuMemcpyDtoH_v2(val as *const T as *mut c_void, self.ptr.as_raw(), size) + driver_sys::cuMemcpyDtoH_v2(val as *const T as *mut c_void, self.ptr.as_raw(), size) .to_result()? } } diff --git a/crates/cust/src/stream.rs b/crates/cust/src/stream.rs index 0542908b..dc67119d 100644 --- a/crates/cust/src/stream.rs +++ b/crates/cust/src/stream.rs @@ -10,15 +10,18 @@ //! are not currently supported by cust. Finally, the host can wait for all work scheduled in //! a stream to be completed. -use crate::error::{CudaResult, DropResult, ToResult}; -use crate::event::Event; -use crate::function::{BlockSize, Function, GridSize}; -use crate::sys::{self as cuda, cudaError_enum, CUstream}; use std::ffi::c_void; use std::mem; use std::panic; use std::ptr; +use cust_raw::driver_sys; +use cust_raw::driver_sys::{cudaError_enum, CUstream}; + +use crate::error::{CudaResult, DropResult, ToResult}; +use crate::event::Event; +use crate::function::{BlockSize, Function, GridSize}; + bitflags::bitflags! { /// Bit flags for configuring a CUDA Stream. pub struct StreamFlags: u32 { @@ -96,7 +99,7 @@ impl Stream { let mut stream = Stream { inner: ptr::null_mut(), }; - cuda::cuStreamCreateWithPriority( + driver_sys::cuStreamCreateWithPriority( &mut stream.inner as *mut CUstream, flags.bits(), priority.unwrap_or(0), @@ -110,7 +113,7 @@ impl Stream { pub fn get_flags(&self) -> CudaResult { unsafe { let mut bits = 0u32; - cuda::cuStreamGetFlags(self.inner, &mut bits as *mut u32).to_result()?; + driver_sys::cuStreamGetFlags(self.inner, &mut bits as *mut u32).to_result()?; Ok(StreamFlags::from_bits_truncate(bits)) } } @@ -138,7 +141,7 @@ impl Stream { pub fn get_priority(&self) -> CudaResult { unsafe { let mut priority = 0i32; - cuda::cuStreamGetPriority(self.inner, &mut priority as *mut i32).to_result()?; + driver_sys::cuStreamGetPriority(self.inner, &mut priority as *mut i32).to_result()?; Ok(priority) } } @@ -179,7 +182,7 @@ impl Stream { T: FnOnce(CudaResult<()>) + Send, { unsafe { - cuda::cuStreamAddCallback( + driver_sys::cuStreamAddCallback( self.inner, Some(callback_wrapper::), Box::into_raw(callback) as *mut c_void, @@ -212,7 +215,7 @@ impl Stream { /// # } /// ``` pub fn synchronize(&self) -> CudaResult<()> { - unsafe { cuda::cuStreamSynchronize(self.inner).to_result() } + unsafe { driver_sys::cuStreamSynchronize(self.inner).to_result() } } /// Make the stream wait on an event. @@ -246,7 +249,9 @@ impl Stream { /// } /// ``` pub fn wait_event(&self, event: &Event, flags: StreamWaitEventFlags) -> CudaResult<()> { - unsafe { cuda::cuStreamWaitEvent(self.inner, event.as_inner(), flags.bits()).to_result() } + unsafe { + driver_sys::cuStreamWaitEvent(self.inner, event.as_inner(), flags.bits()).to_result() + } } // Hidden implementation detail function. Highly unsafe. Use the `launch!` macro instead. @@ -266,7 +271,7 @@ impl Stream { let grid_size: GridSize = grid_size.into(); let block_size: BlockSize = block_size.into(); - cuda::cuLaunchKernel( + driver_sys::cuLaunchKernel( func.to_raw(), grid_size.x, grid_size.y, @@ -320,7 +325,7 @@ impl Stream { unsafe { let inner = mem::replace(&mut stream.inner, ptr::null_mut()); - match cuda::cuStreamDestroy_v2(inner).to_result() { + match driver_sys::cuStreamDestroy_v2(inner).to_result() { Ok(()) => { mem::forget(stream); Ok(()) @@ -339,7 +344,7 @@ impl Drop for Stream { unsafe { let inner = mem::replace(&mut self.inner, ptr::null_mut()); - cuda::cuStreamDestroy_v2(inner); + driver_sys::cuStreamDestroy_v2(inner); } } } diff --git a/crates/cust/src/surface.rs b/crates/cust/src/surface.rs index 7d02abb9..593b91a5 100644 --- a/crates/cust/src/surface.rs +++ b/crates/cust/src/surface.rs @@ -3,7 +3,7 @@ use std::{ os::raw::c_ulonglong, }; -use crate::sys::{ +use cust_raw::driver_sys::{ cuSurfObjectCreate, cuSurfObjectDestroy, cuSurfObjectGetResourceDesc, CUsurfObject, CUDA_RESOURCE_DESC, }; diff --git a/crates/cust/src/texture.rs b/crates/cust/src/texture.rs index b5804a5f..d7b0ea6a 100644 --- a/crates/cust/src/texture.rs +++ b/crates/cust/src/texture.rs @@ -1,15 +1,3 @@ -use crate::error::CudaResult; -use crate::error::ToResult; -use crate::memory::array::ArrayDescriptor; -use crate::memory::array::ArrayFormat; -use crate::memory::array::ArrayObject; -use crate::sys::cuTexObjectCreate; -use crate::sys::cuTexObjectGetResourceDesc; -use crate::sys::{ - self as cuda, cuTexObjectDestroy, CUDA_RESOURCE_DESC_st__bindgen_ty_1, - CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_1, CUresourcetype, CUtexObject, - CUDA_RESOURCE_DESC, CUDA_RESOURCE_VIEW_DESC, CUDA_TEXTURE_DESC, -}; use std::mem::transmute; use std::mem::ManuallyDrop; use std::mem::MaybeUninit; @@ -17,6 +5,19 @@ use std::os::raw::c_ulonglong; use std::os::raw::{c_float, c_uint}; use std::ptr; +use cust_raw::driver_sys; +use cust_raw::driver_sys::{ + cuTexObjectCreate, cuTexObjectDestroy, cuTexObjectGetResourceDesc, + CUDA_RESOURCE_DESC_st__bindgen_ty_1, CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_1, + CUresourcetype, CUtexObject, CUDA_RESOURCE_DESC, CUDA_RESOURCE_VIEW_DESC, CUDA_TEXTURE_DESC, +}; + +use crate::error::CudaResult; +use crate::error::ToResult; +use crate::memory::array::ArrayDescriptor; +use crate::memory::array::ArrayFormat; +use crate::memory::array::ArrayObject; + /// How a texture should behave if it's adressed with out of bounds indices. #[repr(u32)] #[derive(Clone, Copy, Debug, PartialEq, Eq)] @@ -45,11 +46,11 @@ bitflags::bitflags! { pub struct TextureDescriptorFlags: c_uint { /// Suppresses the default behavior of having the texture promote data to floating point data in the range /// of [0, 1]. This flag does nothing if the texture is a texture of `u32`s. - const READ_AS_INTEGER = cuda::CU_TRSF_READ_AS_INTEGER; + const READ_AS_INTEGER = driver_sys::CU_TRSF_READ_AS_INTEGER; /// Suppresses the default behavior of having the texture coordinates range from [0, Dim], where Dim is the /// width or height of the CUDA array. Instead, the texture coordinates [0, 1] reference the entire array. /// This flag must be set if a mipmapped array is being used. - const NORMALIZED_COORDINATES = cuda::CU_TRSF_NORMALIZED_COORDINATES; + const NORMALIZED_COORDINATES = driver_sys::CU_TRSF_NORMALIZED_COORDINATES; /// Disables any trilinear filtering optimizations. Trilinear optimizations improve texture filtering performance /// by allowing bilinear filtering on textures in scenarios where it can closely approximate the expected results. const DISABLE_TRILINEAR_OPTIMIZATION = 0x20; // cuda-sys doesnt have this for some reason? @@ -110,17 +111,17 @@ impl TextureDescriptor { } = self; CUDA_TEXTURE_DESC { addressMode: unsafe { - transmute::<[TextureAdressingMode; 3], [cust_raw::CUaddress_mode_enum; 3]>( + transmute::<[TextureAdressingMode; 3], [driver_sys::CUaddress_mode_enum; 3]>( adress_modes, ) }, filterMode: unsafe { - transmute::(filter_mode) + transmute::(filter_mode) }, flags: flags.bits(), maxAnisotropy: max_anisotropy, mipmapFilterMode: unsafe { - transmute::(mipmap_filter_mode) + transmute::(mipmap_filter_mode) }, mipmapLevelBias: mipmap_level_bias, minMipmapLevelClamp: min_mipmap_level_clamp, @@ -300,7 +301,7 @@ impl ResourceViewDescriptor { CUDA_RESOURCE_VIEW_DESC { format: unsafe { - transmute::(format) + transmute::(format) }, width, height, @@ -381,7 +382,7 @@ impl ResourceDescriptor { // TODO: evaluate if its possible to cause UB by making a raw descriptor with an invalid array handle. pub(crate) fn from_raw(raw: CUDA_RESOURCE_DESC) -> Self { match raw.resType { - cuda::CUresourcetype_enum::CU_RESOURCE_TYPE_ARRAY => Self { + driver_sys::CUresourcetype_enum::CU_RESOURCE_TYPE_ARRAY => Self { flags: ResourceDescriptorFlags::from_bits(raw.flags) .expect("invalid resource descriptor flags"), ty: ResourceType::Array { diff --git a/crates/cust_raw/Cargo.toml b/crates/cust_raw/Cargo.toml index 02655b24..94c91911 100644 --- a/crates/cust_raw/Cargo.toml +++ b/crates/cust_raw/Cargo.toml @@ -6,6 +6,31 @@ license = "MIT OR Apache-2.0" description = "Low level bindings to the CUDA Driver API" repository = "https://github.com/Rust-GPU/Rust-CUDA" readme = "../../README.md" +links = "cuda" +build = "build/main.rs" [build-dependencies] -find_cuda_helper = { path = "../find_cuda_helper", version = "0.2" } +bindgen = "0.71.1" + +[package.metadata.docs.rs] +features = [ + "driver", + "runtime", + "cublas", + "cublaslt", + "cublasxt", + "cudnn", + "nvptx-compiler", + "nvvm", +] + +[features] +default = ["driver"] +driver = [] +runtime = [] +cublas = [] +cublaslt = [] +cublasxt = [] +cudnn = [] +nvptx-compiler = [] +nvvm = [] diff --git a/crates/cust_raw/README.md b/crates/cust_raw/README.md deleted file mode 100644 index 73d6c1f1..00000000 --- a/crates/cust_raw/README.md +++ /dev/null @@ -1,6 +0,0 @@ -# cust_raw - -Raw bindings to the CUDA Driver API used by cust. These bindings are actively updated for new versions. -We use our own bindings so we have more control over when they get updated and what they contain. - -Current version is based on CUDA 11.2 diff --git a/crates/cust_raw/bindgen.sh b/crates/cust_raw/bindgen.sh deleted file mode 100644 index 83c8b7bc..00000000 --- a/crates/cust_raw/bindgen.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -set -exu - -bindgen \ - --whitelist-type="^CU.*" \ - --whitelist-type="^cuuint(32|64)_t" \ - --whitelist-type="^cudaError_enum" \ - --whitelist-type="^cu.*Complex$" \ - --whitelist-type="^cuda.*" \ - --whitelist-type="^libraryPropertyType.*" \ - --whitelist-var="^CU.*" \ - --whitelist-function="^cu.*" \ - --default-enum-style=rust \ - --no-doc-comments \ - --with-derive-default \ - --with-derive-eq \ - --with-derive-hash \ - --with-derive-ord \ - --size_t-is-usize \ - wrapper.h -- -I/opt/cuda/include \ - > src/cuda.rs \ No newline at end of file diff --git a/crates/cust_raw/build.rs b/crates/cust_raw/build.rs deleted file mode 100644 index a6c47b89..00000000 --- a/crates/cust_raw/build.rs +++ /dev/null @@ -1,3 +0,0 @@ -fn main() { - find_cuda_helper::include_cuda(); -} diff --git a/crates/cust_raw/build/cublas_wrapper.h b/crates/cust_raw/build/cublas_wrapper.h new file mode 100644 index 00000000..b457c695 --- /dev/null +++ b/crates/cust_raw/build/cublas_wrapper.h @@ -0,0 +1 @@ +#include "cublas_v2.h" \ No newline at end of file diff --git a/crates/cust_raw/build/cublaslt_wrapper.h b/crates/cust_raw/build/cublaslt_wrapper.h new file mode 100644 index 00000000..4a7f2564 --- /dev/null +++ b/crates/cust_raw/build/cublaslt_wrapper.h @@ -0,0 +1 @@ +#include "cublasLt.h" \ No newline at end of file diff --git a/crates/cust_raw/build/cublasxt_wrapper.h b/crates/cust_raw/build/cublasxt_wrapper.h new file mode 100644 index 00000000..a3398141 --- /dev/null +++ b/crates/cust_raw/build/cublasxt_wrapper.h @@ -0,0 +1 @@ +#include "cublasXt.h" \ No newline at end of file diff --git a/crates/cust_raw/build/cuda_sdk.rs b/crates/cust_raw/build/cuda_sdk.rs new file mode 100644 index 00000000..d3760027 --- /dev/null +++ b/crates/cust_raw/build/cuda_sdk.rs @@ -0,0 +1,342 @@ +use std::env; +use std::error; +use std::ffi; +use std::fs; +use std::iter; +use std::path; + +const CUDA_ROOT_ENVS: &[&str] = &["CUDA_PATH", "CUDA_ROOT", "CUDA_TOOLKIT_ROOT_DIR"]; +const CUDA_LIBRARY_PATH_ENV: &str = "CUDA_LIBRARY_PATH"; + +/// Represents the CUDA SDK installation. +#[derive(Debug, Clone)] +pub struct CudaSdk { + /// The root directory of the CUDA SDK installation, related paths + /// and versions. + cuda_root: path::PathBuf, + cuda_include_paths: Vec, + cuda_library_paths: Vec, + driver_version: u32, + runtime_version: u32, + /// libNVVM related paths. + nvvm_include_paths: Vec, + nvvm_library_paths: Vec, + libdevice_bitcode_path: path::PathBuf, +} + +impl CudaSdk { + /// Creates a new `CudaSdk` instance by locating the CUDA SDK installation + /// and parsing versions from various header files. + /// + /// # Errors + /// Returns an error if the CUDA SDK cannot be found or if the versions cannot be parsed. + pub fn new() -> Result> { + let cuda_root = Self::find_cuda_root().ok_or("CUDA SDK cannot be found.")?; + // Retrieve the CUDA related versions. + let header_path = cuda_root.join("include").join("cuda.h"); + let header_content = fs::read_to_string(header_path)?; + let driver_version = Self::parse_driver_version(header_content.as_str())?; + let header_path = cuda_root.join("include").join("cuda_runtime_api.h"); + let header_content = fs::read_to_string(header_path)?; + let runtime_version = Self::parse_runtime_version(header_content.as_str())?; + // Retrieve the CUDA include paths and library paths. + let cuda_include_paths = vec![cuda_root.join("include")]; + let cuda_library_paths = Self::find_cuda_library_dirs(cuda_root.as_path())?; + // Retrieve the NVVM related paths. + let nvvm_include_paths = Self::find_nvvm_include_dirs(cuda_root.as_path())?; + let nvvm_library_paths = Self::find_nvvm_library_dirs(cuda_root.as_path())?; + let libdevice_bitcode_path = cuda_root + .join("nvvm") + .join("libdevice") + .join("libdevice.10.bc"); + if !libdevice_bitcode_path.is_file() { + return Err(format!( + "libdevice bitcode file not found: {}.", + libdevice_bitcode_path.display() + ) + .into()); + } + + Ok(Self { + cuda_root, + cuda_include_paths, + cuda_library_paths, + driver_version, + runtime_version, + nvvm_include_paths, + nvvm_library_paths, + libdevice_bitcode_path, + }) + } + + /// Returns the root path of the CUDA SDK installation. + pub fn cuda_root(&self) -> &path::Path { + self.cuda_root.as_path() + } + + /// Returns the full version of the CUDA SDK as an integer. + /// For example, CUDA 11.8 is represented as 11080. + pub fn driver_version(&self) -> u32 { + self.driver_version + } + + /// Returns the major version of the CUDA SDK. + /// For example, for CUDA 11.8, this method returns 11. + pub fn driver_version_major(&self) -> u32 { + self.driver_version / 1000 + } + + /// Returns the minor version of the CUDA SDK. + /// For example, for CUDA 11.8, this method returns 8. + pub fn driver_version_minor(&self) -> u32 { + self.driver_version / 10 % 100 + } + + /// Returns the CUDA runtime version which is defined in + /// `cuda_runtime_api.h` file as: `#define CUDART_VERSION 12080` + pub fn runtime_version(&self) -> u32 { + self.runtime_version + } + + pub fn cuda_include_paths(&self) -> &[path::PathBuf] { + &self.cuda_include_paths + } + + pub fn cuda_library_paths(&self) -> &[path::PathBuf] { + &self.cuda_library_paths + } + + pub fn nvvm_include_paths(&self) -> &[path::PathBuf] { + &self.nvvm_include_paths + } + + pub fn nvvm_library_paths(&self) -> &[path::PathBuf] { + &self.nvvm_library_paths + } + + pub fn libdevice_bitcode_path(&self) -> &path::Path { + self.libdevice_bitcode_path.as_path() + } + + pub fn related_cuda_envs(&self) -> Vec { + CUDA_ROOT_ENVS + .iter() + .map(|name| name.to_string()) + .chain(iter::once(CUDA_LIBRARY_PATH_ENV.to_string())) + .collect::>() + } + + /// Attempts to locate the root directory of the CUDA SDK installation. + /// + /// Searches common environment variables and default installation paths. + /// Returns `None` if no valid CUDA SDK installation is found. + fn find_cuda_root() -> Option { + // Search through the common environment variables first. + let p = CUDA_ROOT_ENVS + .iter() + .filter_map(|name| env::var(name).ok()) + .find(|s| Self::is_cuda_root_path(s.as_str())) + .map(path::PathBuf::from); + if p.is_some() { + return p; + } + // Then default installation paths. + if cfg!(target_os = "windows") { + const CUDA_DEFAULT_PATHS: &[&str] = &[ + "C:/CUDA", + "C:/Program Files/NVIDIA", + "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA", + ]; + CUDA_DEFAULT_PATHS + .iter() + .flat_map(Self::subdirs) + .find(|p| Self::is_cuda_root_path(p)) + } else { + const CUDA_DEFAULT_PATHS: &[&str] = &["/usr/lib/cuda", "/usr/local/cuda", "/opt/cuda"]; + CUDA_DEFAULT_PATHS + .iter() + .find(|s| Self::is_cuda_root_path(s)) + .map(path::PathBuf::from) + } + } + + fn find_cuda_library_dirs( + cuda_root: &path::Path, + ) -> Result, Box> { + let (target, triple) = Self::parse_target_triple()?; + assert!(triple.len() >= 3, "Invalid target triple: {:?}", triple); + + let search_dirs = match [triple[0].as_str(), triple[1].as_str(), triple[2].as_str()] { + ["x86_64", "pc", "windows"] => { + vec![cuda_root.join("lib").join("x64")] + } + [_, _, "windows"] => { + panic!( + "Cannot support Windows architecture other than \ + x86_64-pc-windows-*. target: {target}" + ); + } + [_, _, "linux"] => { + vec![ + cuda_root.join("lib"), + cuda_root.join("lib").join("stubs"), + cuda_root.join("lib64"), + cuda_root.join("lib64").join("stubs"), + cuda_root.join("targets").join("x86_64-linux").join("lib"), + ] + } + [_, _, _] => { + panic!("Unsupported target triple: {target}"); + } + }; + let library_dirs = [Self::parse_cuda_library_path_env(), search_dirs].concat(); + let library_dirs = Self::normalize_dirpaths(library_dirs); + Ok(library_dirs) + } + + fn find_nvvm_include_dirs( + cuda_root: &path::Path, + ) -> Result, Box> { + let search_dirs = vec![cuda_root.join("nvvm").join("include")]; + let include_dirs = Self::normalize_dirpaths(search_dirs); + Ok(include_dirs) + } + + fn find_nvvm_library_dirs( + cuda_root: &path::Path, + ) -> Result, Box> { + // The bin paths are required to find the cicc compiler. + let search_dirs = if cfg!(target_os = "windows") { + vec![ + cuda_root.join("nvvm").join("bin"), + cuda_root.join("nvvm").join("lib").join("x64"), + ] + } else { + vec![ + cuda_root.join("nvvm").join("bin"), + cuda_root.join("nvvm").join("lib64"), + ] + }; + let library_dirs = Self::normalize_dirpaths(search_dirs); + Ok(library_dirs) + } + + fn parse_cuda_library_path_env() -> Vec { + // The location of the libcuda, libcudart, libcublas, etc. can be hardcoded with the + // CUDA_LIBRARY_PATH environment variable. + match env::var_os(CUDA_LIBRARY_PATH_ENV) { + Some(v) => env::split_paths(v.as_os_str()).collect::>(), + None => vec![], + } + } + + /// Checks if the given path is a valid CUDA SDK installation by verifying + /// the existence of the `cuda.h` header file in the `include` directory. + fn is_cuda_root_path>(path: P) -> bool { + path.as_ref().join("include").join("cuda.h").is_file() + } + + /// Parses the content of the `cuda.h` header file to extract the driver version. + /// + /// # Errors + /// Returns an error if the `CUDA_VERSION` definition cannot be found or parsed. + fn parse_driver_version(header_content: &str) -> Result> { + let version = header_content + .lines() + .find(|line| line.contains("#define CUDA_VERSION")) + .and_then(|line| line.split_whitespace().last()) + .ok_or("Cannot find CUDA_VERSION from CUDA header file.")?; + let version = version + .parse::() + .map_err(|_| format!("Cannot parse CUDA_VERSION as u32: '{}'", version))?; + Ok(version) + } + + /// Parses the content of the `cuda_runtime.h` header file to extract the runtime version. + /// + /// # Errors + /// Returns an error if the `CUDART_VERSION` definition cannot be found or parsed. + fn parse_runtime_version(header_content: &str) -> Result> { + let version = header_content + .lines() + .find(|line| line.contains("#define CUDART_VERSION")) + .and_then(|line| line.split_whitespace().last()) + .ok_or("Cannot find CUDART_VERSION from cuda_runtime header file.")?; + let version = version + .parse::() + .map_err(|_| format!("Cannot parse CUDART_VERSION as u32: '{}'", version))?; + Ok(version) + } + + fn parse_target_triple() -> Result<(String, Vec), Box> { + let target = env::var("TARGET") + .map_err(|_| "cargo did not set the TARGET environment variable as required.")?; + + // Targets use '-' separators. e.g. x86_64-pc-windows-msvc, x86_64-unknown-linux-gnu, etc. + let triple = target + .as_str() + .split('-') + .map(|s| s.to_string()) + .collect::>(); + Ok((target, triple)) + } + + fn follow_symlink(p: &path::Path) -> Result> { + let mut p = p.to_path_buf(); + while p.is_symlink() { + p = p.read_link()?; + } + Ok(p) + } + + fn path_dedup(paths: Vec) -> Vec { + let mut seen = std::collections::HashSet::new(); + paths + .into_iter() + .filter(|p| seen.insert(p.clone())) + .collect() + } + + fn normalize_dirpaths(dirs: Vec) -> Vec { + let dirs = dirs + .into_iter() + .filter(|d| d.exists()) + .filter_map(|d| Self::follow_symlink(d.as_path()).ok()) + .collect::>(); + let dirs = Self::path_dedup(dirs); + dirs.into_iter().filter(|d| d.is_dir()).collect() + } + + fn subdirs

(p: P) -> Vec + where + P: AsRef, + { + let p = p.as_ref(); + if !p.exists() || !p.is_dir() { + return vec![]; + } + + let mut ret = Vec::new(); + let read_dir = match fs::read_dir(p) { + Ok(d) => d, + Err(_) => return vec![], + }; + for entry in read_dir { + let entry = match entry { + Ok(e) => e, + Err(_) => continue, + }; + let subpath = entry.path(); + // Skip current and parent directories + if subpath.file_name() == Some(ffi::OsStr::new(".")) + || subpath.file_name() == Some(ffi::OsStr::new("..")) + { + continue; + } + if entry.file_type().map(|t| t.is_dir()).unwrap_or(false) { + ret.push(subpath); + } + } + ret + } +} diff --git a/crates/cust_raw/wrapper.h b/crates/cust_raw/build/driver_wrapper.h similarity index 77% rename from crates/cust_raw/wrapper.h rename to crates/cust_raw/build/driver_wrapper.h index 2afd950a..624c3397 100644 --- a/crates/cust_raw/wrapper.h +++ b/crates/cust_raw/build/driver_wrapper.h @@ -1,5 +1,4 @@ #include "cuComplex.h" #include "cuda.h" #include "cudaProfiler.h" -#include "library_types.h" #include "vector_types.h" \ No newline at end of file diff --git a/crates/cust_raw/build/main.rs b/crates/cust_raw/build/main.rs new file mode 100644 index 00000000..7137b40d --- /dev/null +++ b/crates/cust_raw/build/main.rs @@ -0,0 +1,247 @@ +use std::env; +use std::fs; +use std::path; + +pub mod cuda_sdk; + +fn main() { + let outdir = path::PathBuf::from( + env::var("OUT_DIR").expect("OUT_DIR environment variable should be set by cargo."), + ); + + let sdk = cuda_sdk::CudaSdk::new().expect("Cannot create CUDA SDK instance."); + // Emit metadata for the build script. + println!("cargo::metadata=root={}", sdk.cuda_root().display()); + println!("cargo::metadata=driver_version={}", sdk.driver_version()); + println!( + "cargo::metadata=driver_version_major={}", + sdk.driver_version_major() + ); + println!( + "cargo::metadata=driver_version_minor={}", + sdk.driver_version_minor() + ); + println!("cargo::metadata=runtime_version={}", sdk.runtime_version()); + let metadata_cuda_include = env::join_paths(sdk.cuda_include_paths()) + .map(|s| s.to_string_lossy().to_string()) + .expect("Failed to build metadata for cuda_include."); + let metadata_nvvm_include = env::join_paths(sdk.nvvm_include_paths()) + .map(|s| s.to_string_lossy().to_string()) + .expect("Failed to build metadata for nvvm_include."); + println!("cargo::metadata=cuda_include={}", metadata_cuda_include); + println!("cargo::metadata=nvvm_include={}", metadata_nvvm_include); + // Re-run build script conditions. + println!("cargo::rerun-if-changed=build"); + for e in sdk.related_cuda_envs() { + println!("cargo::rerun-if-env-changed={}", e); + } + + create_cuda_driver_bindings(&sdk, outdir.as_path()); + create_cuda_runtime_bindings(&sdk, outdir.as_path()); + create_cublas_bindings(&sdk, outdir.as_path()); + create_nptx_compiler_bindings(&sdk, outdir.as_path()); + create_nvvm_bindings(&sdk, outdir.as_path()); + + if cfg!(any( + feature = "driver", + feature = "runtime", + feature = "cublas", + feature = "cublaslt", + feature = "cublasxt" + )) { + for libdir in sdk.cuda_library_paths() { + println!("cargo::rustc-link-search=native={}", libdir.display()); + } + println!("cargo::rustc-link-lib=dylib=cuda"); + } + if cfg!(feature = "runtime") { + println!("cargo::rustc-link-lib=dylib=cudart"); + } + if cfg!(feature = "cublas") || cfg!(feature = "cublasxt") { + println!("cargo::rustc-link-lib=dylib=cublas"); + } + if cfg!(feature = "cublaslt") { + println!("cargo::rustc-link-lib=dylib=cublaslt"); + } + if cfg!(feature = "nvvm") { + for libdir in sdk.nvvm_library_paths() { + println!("cargo::rustc-link-search=native={}", libdir.display()); + } + println!("cargo::rustc-link-lib=dylib=nvvm"); + // Handle libdevice support. + fs::copy(sdk.libdevice_bitcode_path(), outdir.join("libdevice.bc")) + .expect("Cannot copy libdevice bitcode file."); + } +} + +fn create_cuda_driver_bindings(sdk: &cuda_sdk::CudaSdk, outdir: &path::Path) { + if !cfg!(feature = "driver") { + return; + } + let bindgen_path = path::PathBuf::from(format!("{}/driver_sys.rs", outdir.display())); + let bindings = bindgen::Builder::default() + .header("build/driver_wrapper.h") + .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) + .clang_args( + sdk.cuda_include_paths() + .iter() + .map(|p| format!("-I{}", p.display())), + ) + .allowlist_type("^CU.*") + .allowlist_type("^cuuint(32|64)_t") + .allowlist_type("^cudaError_enum") + .allowlist_type("^cu.*Complex$") + .allowlist_type("^cuda.*") + .allowlist_var("^CU.*") + .allowlist_function("^cu.*") + .default_enum_style(bindgen::EnumVariation::Rust { + non_exhaustive: false, + }) + .derive_default(true) + .derive_eq(true) + .derive_hash(true) + .derive_ord(true) + .size_t_is_usize(true) + .layout_tests(true) + .generate() + .expect("Unable to generate CUDA driver bindings."); + bindings + .write_to_file(bindgen_path.as_path()) + .expect("Cannot write CUDA driver bindgen output to file."); +} + +fn create_cuda_runtime_bindings(sdk: &cuda_sdk::CudaSdk, outdir: &path::Path) { + if !cfg!(feature = "runtime") { + return; + } + let bindgen_path = path::PathBuf::from(format!("{}/runtime_sys.rs", outdir.display())); + let bindings = bindgen::Builder::default() + .header("build/runtime_wrapper.h") + .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) + .clang_args( + sdk.cuda_include_paths() + .iter() + .map(|p| format!("-I{}", p.display())), + ) + .allowlist_type("^CU.*") + .allowlist_type("^cuda.*") + .allowlist_type("^libraryPropertyType.*") + .allowlist_var("^CU.*") + .allowlist_function("^cu.*") + .default_enum_style(bindgen::EnumVariation::Rust { + non_exhaustive: false, + }) + .derive_default(true) + .derive_eq(true) + .derive_hash(true) + .derive_ord(true) + .size_t_is_usize(true) + .layout_tests(true) + .generate() + .expect("Unable to generate CUDA runtime bindings."); + bindings + .write_to_file(bindgen_path.as_path()) + .expect("Cannot write CUDA runtime bindgen output to file."); +} + +fn create_cublas_bindings(sdk: &cuda_sdk::CudaSdk, outdir: &path::Path) { + #[rustfmt::skip] + let params = &[ + (cfg!(feature = "cublas"), "cublas", "^cublas.*", "^CUBLAS.*"), + (cfg!(feature = "cublaslt"), "cublaslt", "^cublasLt.*", "^CUBLASLT.*"), + (cfg!(feature = "cublasxt"), "cublasxt", "^cublasXt.*", "^CUBLASXT.*"), + ]; + for (should_generate, pkg, tf, var) in params { + if !should_generate { + continue; + } + let bindgen_path = path::PathBuf::from(format!("{}/{pkg}_sys.rs", outdir.display())); + let bindings = bindgen::Builder::default() + .header(format!("build/{pkg}_wrapper.h")) + .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) + .clang_args( + sdk.cuda_include_paths() + .iter() + .map(|p| format!("-I{}", p.display())), + ) + .allowlist_type(tf) + .allowlist_function(tf) + .allowlist_var(var) + .default_enum_style(bindgen::EnumVariation::Rust { + non_exhaustive: false, + }) + .derive_default(true) + .derive_eq(true) + .derive_hash(true) + .derive_ord(true) + .size_t_is_usize(true) + .layout_tests(true) + .generate() + .unwrap_or_else(|_| panic!("Unable to generate {pkg} bindings.")); + bindings + .write_to_file(bindgen_path.as_path()) + .unwrap_or_else(|_| panic!("Cannot write {pkg} bindgen output to file.")); + } +} + +fn create_nptx_compiler_bindings(sdk: &cuda_sdk::CudaSdk, outdir: &path::Path) { + if !cfg!(feature = "nvptx-compiler") { + return; + } + let bindgen_path = path::PathBuf::from(format!("{}/nvptx_compiler_sys.rs", outdir.display())); + let bindings = bindgen::Builder::default() + .header("build/nvptx_compiler_wrapper.h") + .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) + .clang_args( + sdk.cuda_include_paths() + .iter() + .map(|p| format!("-I{}", p.display())), + ) + .allowlist_function("^nvPTX.*") + .allowlist_type("^nvPTX.*") + .allowlist_var("^NVPTX.*") + .default_enum_style(bindgen::EnumVariation::Rust { + non_exhaustive: false, + }) + .derive_default(true) + .derive_eq(true) + .derive_hash(true) + .derive_ord(true) + .size_t_is_usize(true) + .layout_tests(true) + .generate() + .expect("Unable to generate nvptx-compiler bindings."); + bindings + .write_to_file(bindgen_path.as_path()) + .expect("Cannot write nvptx-compiler bindgen output to file."); +} + +fn create_nvvm_bindings(sdk: &cuda_sdk::CudaSdk, outdir: &path::Path) { + if !cfg!(feature = "nvvm") { + return; + } + let bindgen_path = path::PathBuf::from(format!("{}/nvvm_sys.rs", outdir.display())); + let bindings = bindgen::Builder::default() + .header("build/nvvm_wrapper.h") + .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) + .clang_args( + sdk.nvvm_include_paths() + .iter() + .map(|p| format!("-I{}", p.display())), + ) + .allowlist_function("^nvvm.*") + .default_enum_style(bindgen::EnumVariation::Rust { + non_exhaustive: false, + }) + .derive_default(true) + .derive_eq(true) + .derive_hash(true) + .derive_ord(true) + .size_t_is_usize(true) + .layout_tests(true) + .generate() + .expect("Unable to generate libNVVM bindings."); + bindings + .write_to_file(bindgen_path.as_path()) + .expect("Cannot write libNVVM bindgen output to file."); +} diff --git a/crates/cust_raw/build/nvptx_compiler_wrapper.h b/crates/cust_raw/build/nvptx_compiler_wrapper.h new file mode 100644 index 00000000..58a3cc5c --- /dev/null +++ b/crates/cust_raw/build/nvptx_compiler_wrapper.h @@ -0,0 +1 @@ +#include "nvPTXCompiler.h" \ No newline at end of file diff --git a/crates/cust_raw/build/nvvm_wrapper.h b/crates/cust_raw/build/nvvm_wrapper.h new file mode 100644 index 00000000..71bf8788 --- /dev/null +++ b/crates/cust_raw/build/nvvm_wrapper.h @@ -0,0 +1 @@ +#include "nvvm.h" \ No newline at end of file diff --git a/crates/cust_raw/build/runtime_wrapper.h b/crates/cust_raw/build/runtime_wrapper.h new file mode 100644 index 00000000..a2f4379f --- /dev/null +++ b/crates/cust_raw/build/runtime_wrapper.h @@ -0,0 +1,3 @@ +#include "cuda_runtime.h" +#include "cuda_runtime_api.h" +#include "cuda_profiler_api.h" \ No newline at end of file diff --git a/crates/cust_raw/src/cublas_sys.rs b/crates/cust_raw/src/cublas_sys.rs new file mode 100644 index 00000000..712b9b77 --- /dev/null +++ b/crates/cust_raw/src/cublas_sys.rs @@ -0,0 +1,5 @@ +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +include!(concat!(env!("OUT_DIR"), "/cublas_sys.rs")); diff --git a/crates/cust_raw/src/cublaslt_sys.rs b/crates/cust_raw/src/cublaslt_sys.rs new file mode 100644 index 00000000..6347ffbd --- /dev/null +++ b/crates/cust_raw/src/cublaslt_sys.rs @@ -0,0 +1,5 @@ +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +include!(concat!(env!("OUT_DIR"), "/cublaslt_sys.rs")); diff --git a/crates/cust_raw/src/cublasxt_sys.rs b/crates/cust_raw/src/cublasxt_sys.rs new file mode 100644 index 00000000..db2b81c6 --- /dev/null +++ b/crates/cust_raw/src/cublasxt_sys.rs @@ -0,0 +1,5 @@ +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +include!(concat!(env!("OUT_DIR"), "/cublasxt_sys.rs")); diff --git a/crates/cust_raw/src/cuda.rs b/crates/cust_raw/src/cuda.rs deleted file mode 100644 index 3cd0571c..00000000 --- a/crates/cust_raw/src/cuda.rs +++ /dev/null @@ -1,8989 +0,0 @@ -/* automatically generated by rust-bindgen 0.58.1 */ - -pub const CUDA_VERSION: u32 = 11040; -pub const CU_IPC_HANDLE_SIZE: u32 = 64; -pub const CU_MEMHOSTALLOC_PORTABLE: u32 = 1; -pub const CU_MEMHOSTALLOC_DEVICEMAP: u32 = 2; -pub const CU_MEMHOSTALLOC_WRITECOMBINED: u32 = 4; -pub const CU_MEMHOSTREGISTER_PORTABLE: u32 = 1; -pub const CU_MEMHOSTREGISTER_DEVICEMAP: u32 = 2; -pub const CU_MEMHOSTREGISTER_IOMEMORY: u32 = 4; -pub const CU_MEMHOSTREGISTER_READ_ONLY: u32 = 8; -pub const CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL: u32 = 1; -pub const CUDA_EXTERNAL_MEMORY_DEDICATED: u32 = 1; -pub const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC: u32 = 1; -pub const CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC: u32 = 2; -pub const CUDA_NVSCISYNC_ATTR_SIGNAL: u32 = 1; -pub const CUDA_NVSCISYNC_ATTR_WAIT: u32 = 2; -pub const CU_MEM_CREATE_USAGE_TILE_POOL: u32 = 1; -pub const CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC: u32 = 1; -pub const CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC: u32 = 2; -pub const CUDA_ARRAY3D_LAYERED: u32 = 1; -pub const CUDA_ARRAY3D_2DARRAY: u32 = 1; -pub const CUDA_ARRAY3D_SURFACE_LDST: u32 = 2; -pub const CUDA_ARRAY3D_CUBEMAP: u32 = 4; -pub const CUDA_ARRAY3D_TEXTURE_GATHER: u32 = 8; -pub const CUDA_ARRAY3D_DEPTH_TEXTURE: u32 = 16; -pub const CUDA_ARRAY3D_COLOR_ATTACHMENT: u32 = 32; -pub const CUDA_ARRAY3D_SPARSE: u32 = 64; -pub const CU_TRSA_OVERRIDE_FORMAT: u32 = 1; -pub const CU_TRSF_READ_AS_INTEGER: u32 = 1; -pub const CU_TRSF_NORMALIZED_COORDINATES: u32 = 2; -pub const CU_TRSF_SRGB: u32 = 16; -pub const CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION: u32 = 32; -pub const CU_PARAM_TR_DEFAULT: i32 = -1; -#[repr(C)] -#[repr(align(8))] -#[derive(Debug, Default, Copy, Clone, PartialOrd, PartialEq)] -pub struct float2 { - pub x: f32, - pub y: f32, -} -#[test] -fn bindgen_test_layout_float2() { - assert_eq!( - ::std::mem::size_of::(), - 8usize, - concat!("Size of: ", stringify!(float2)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(float2)) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).x as *const _ as usize }, - 0usize, - concat!("Offset of field: ", stringify!(float2), "::", stringify!(x)) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).y as *const _ as usize }, - 4usize, - concat!("Offset of field: ", stringify!(float2), "::", stringify!(y)) - ); -} -#[repr(C)] -#[repr(align(16))] -#[derive(Debug, Default, Copy, Clone, PartialOrd, PartialEq)] -pub struct double2 { - pub x: f64, - pub y: f64, -} -#[test] -fn bindgen_test_layout_double2() { - assert_eq!( - ::std::mem::size_of::(), - 16usize, - concat!("Size of: ", stringify!(double2)) - ); - assert_eq!( - ::std::mem::align_of::(), - 16usize, - concat!("Alignment of ", stringify!(double2)) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).x as *const _ as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(double2), - "::", - stringify!(x) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).y as *const _ as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(double2), - "::", - stringify!(y) - ) - ); -} -pub type cuFloatComplex = float2; -pub type cuDoubleComplex = double2; -pub type cuComplex = cuFloatComplex; -pub type cuuint32_t = ::std::os::raw::c_uint; -pub type cuuint64_t = ::std::os::raw::c_ulonglong; -pub type CUdeviceptr_v2 = ::std::os::raw::c_ulonglong; -pub type CUdeviceptr = CUdeviceptr_v2; -pub type CUdevice_v1 = ::std::os::raw::c_int; -pub type CUdevice = CUdevice_v1; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct CUctx_st { - _unused: [u8; 0], -} -pub type CUcontext = *mut CUctx_st; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct CUmod_st { - _unused: [u8; 0], -} -pub type CUmodule = *mut CUmod_st; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct CUfunc_st { - _unused: [u8; 0], -} -pub type CUfunction = *mut CUfunc_st; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct CUarray_st { - _unused: [u8; 0], -} -pub type CUarray = *mut CUarray_st; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct CUmipmappedArray_st { - _unused: [u8; 0], -} -pub type CUmipmappedArray = *mut CUmipmappedArray_st; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct CUtexref_st { - _unused: [u8; 0], -} -pub type CUtexref = *mut CUtexref_st; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct CUsurfref_st { - _unused: [u8; 0], -} -pub type CUsurfref = *mut CUsurfref_st; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct CUevent_st { - _unused: [u8; 0], -} -pub type CUevent = *mut CUevent_st; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct CUstream_st { - _unused: [u8; 0], -} -pub type CUstream = *mut CUstream_st; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct CUgraphicsResource_st { - _unused: [u8; 0], -} -pub type CUgraphicsResource = *mut CUgraphicsResource_st; -pub type CUtexObject_v1 = ::std::os::raw::c_ulonglong; -pub type CUtexObject = CUtexObject_v1; -pub type CUsurfObject_v1 = ::std::os::raw::c_ulonglong; -pub type CUsurfObject = CUsurfObject_v1; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct CUextMemory_st { - _unused: [u8; 0], -} -pub type CUexternalMemory = *mut CUextMemory_st; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct CUextSemaphore_st { - _unused: [u8; 0], -} -pub type CUexternalSemaphore = *mut CUextSemaphore_st; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct CUgraph_st { - _unused: [u8; 0], -} -pub type CUgraph = *mut CUgraph_st; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct CUgraphNode_st { - _unused: [u8; 0], -} -pub type CUgraphNode = *mut CUgraphNode_st; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct CUgraphExec_st { - _unused: [u8; 0], -} -pub type CUgraphExec = *mut CUgraphExec_st; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct CUmemPoolHandle_st { - _unused: [u8; 0], -} -pub type CUmemoryPool = *mut CUmemPoolHandle_st; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct CUuserObject_st { - _unused: [u8; 0], -} -pub type CUuserObject = *mut CUuserObject_st; -#[repr(C)] -#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUuuid_st { - pub bytes: [::std::os::raw::c_char; 16usize], -} -#[test] -fn bindgen_test_layout_CUuuid_st() { - assert_eq!( - ::std::mem::size_of::(), - 16usize, - concat!("Size of: ", stringify!(CUuuid_st)) - ); - assert_eq!( - ::std::mem::align_of::(), - 1usize, - concat!("Alignment of ", stringify!(CUuuid_st)) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).bytes as *const _ as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUuuid_st), - "::", - stringify!(bytes) - ) - ); -} -pub type CUuuid = CUuuid_st; -#[repr(C)] -#[derive(Copy, Clone)] -pub struct CUipcEventHandle_st { - pub reserved: [::std::os::raw::c_char; 64usize], -} -#[test] -fn bindgen_test_layout_CUipcEventHandle_st() { - assert_eq!( - ::std::mem::size_of::(), - 64usize, - concat!("Size of: ", stringify!(CUipcEventHandle_st)) - ); - assert_eq!( - ::std::mem::align_of::(), - 1usize, - concat!("Alignment of ", stringify!(CUipcEventHandle_st)) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).reserved as *const _ as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUipcEventHandle_st), - "::", - stringify!(reserved) - ) - ); -} -impl Default for CUipcEventHandle_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUipcEventHandle_v1 = CUipcEventHandle_st; -pub type CUipcEventHandle = CUipcEventHandle_v1; -#[repr(C)] -#[derive(Copy, Clone)] -pub struct CUipcMemHandle_st { - pub reserved: [::std::os::raw::c_char; 64usize], -} -#[test] -fn bindgen_test_layout_CUipcMemHandle_st() { - assert_eq!( - ::std::mem::size_of::(), - 64usize, - concat!("Size of: ", stringify!(CUipcMemHandle_st)) - ); - assert_eq!( - ::std::mem::align_of::(), - 1usize, - concat!("Alignment of ", stringify!(CUipcMemHandle_st)) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).reserved as *const _ as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUipcMemHandle_st), - "::", - stringify!(reserved) - ) - ); -} -impl Default for CUipcMemHandle_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUipcMemHandle_v1 = CUipcMemHandle_st; -pub type CUipcMemHandle = CUipcMemHandle_v1; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUipcMem_flags_enum { - CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS = 1, -} -pub use self::CUipcMem_flags_enum as CUipcMem_flags; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUmemAttach_flags_enum { - CU_MEM_ATTACH_GLOBAL = 1, - CU_MEM_ATTACH_HOST = 2, - CU_MEM_ATTACH_SINGLE = 4, -} -pub use self::CUmemAttach_flags_enum as CUmemAttach_flags; -impl CUctx_flags_enum { - pub const CU_CTX_BLOCKING_SYNC: CUctx_flags_enum = CUctx_flags_enum::CU_CTX_SCHED_BLOCKING_SYNC; -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUctx_flags_enum { - CU_CTX_SCHED_AUTO = 0, - CU_CTX_SCHED_SPIN = 1, - CU_CTX_SCHED_YIELD = 2, - CU_CTX_SCHED_BLOCKING_SYNC = 4, - CU_CTX_SCHED_MASK = 7, - CU_CTX_MAP_HOST = 8, - CU_CTX_LMEM_RESIZE_TO_MAX = 16, - CU_CTX_FLAGS_MASK = 31, -} -pub use self::CUctx_flags_enum as CUctx_flags; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUstream_flags_enum { - CU_STREAM_DEFAULT = 0, - CU_STREAM_NON_BLOCKING = 1, -} -pub use self::CUstream_flags_enum as CUstream_flags; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUevent_flags_enum { - CU_EVENT_DEFAULT = 0, - CU_EVENT_BLOCKING_SYNC = 1, - CU_EVENT_DISABLE_TIMING = 2, - CU_EVENT_INTERPROCESS = 4, -} -pub use self::CUevent_flags_enum as CUevent_flags; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUevent_record_flags_enum { - CU_EVENT_RECORD_DEFAULT = 0, - CU_EVENT_RECORD_EXTERNAL = 1, -} -pub use self::CUevent_record_flags_enum as CUevent_record_flags; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUevent_wait_flags_enum { - CU_EVENT_WAIT_DEFAULT = 0, - CU_EVENT_WAIT_EXTERNAL = 1, -} -pub use self::CUevent_wait_flags_enum as CUevent_wait_flags; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUstreamWaitValue_flags_enum { - CU_STREAM_WAIT_VALUE_GEQ = 0, - CU_STREAM_WAIT_VALUE_EQ = 1, - CU_STREAM_WAIT_VALUE_AND = 2, - CU_STREAM_WAIT_VALUE_NOR = 3, - CU_STREAM_WAIT_VALUE_FLUSH = 1073741824, -} -pub use self::CUstreamWaitValue_flags_enum as CUstreamWaitValue_flags; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUstreamWriteValue_flags_enum { - CU_STREAM_WRITE_VALUE_DEFAULT = 0, - CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER = 1, -} -pub use self::CUstreamWriteValue_flags_enum as CUstreamWriteValue_flags; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUstreamBatchMemOpType_enum { - CU_STREAM_MEM_OP_WAIT_VALUE_32 = 1, - CU_STREAM_MEM_OP_WRITE_VALUE_32 = 2, - CU_STREAM_MEM_OP_WAIT_VALUE_64 = 4, - CU_STREAM_MEM_OP_WRITE_VALUE_64 = 5, - CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES = 3, -} -pub use self::CUstreamBatchMemOpType_enum as CUstreamBatchMemOpType; -#[repr(C)] -#[derive(Copy, Clone)] -pub union CUstreamBatchMemOpParams_union { - pub operation: CUstreamBatchMemOpType, - pub waitValue: CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st, - pub writeValue: CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st, - pub flushRemoteWrites: CUstreamBatchMemOpParams_union_CUstreamMemOpFlushRemoteWritesParams_st, - pub pad: [cuuint64_t; 6usize], -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st { - pub operation: CUstreamBatchMemOpType, - pub address: CUdeviceptr, - pub __bindgen_anon_1: - CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st__bindgen_ty_1, - pub flags: ::std::os::raw::c_uint, - pub alias: CUdeviceptr, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub union CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st__bindgen_ty_1 { - pub value: cuuint32_t, - pub value64: cuuint64_t, -} -#[test] -fn bindgen_test_layout_CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st__bindgen_ty_1( -) { - assert_eq!( - ::std::mem::size_of::< - CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st__bindgen_ty_1, - >(), - 8usize, - concat!( - "Size of: ", - stringify!( - CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st__bindgen_ty_1 - ) - ) - ); - assert_eq!( - ::std::mem::align_of::< - CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st__bindgen_ty_1, - >(), - 8usize, - concat!( - "Alignment of ", - stringify!( - CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st__bindgen_ty_1 - ) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::< - CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st__bindgen_ty_1, - >())) - .value as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!( - CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st__bindgen_ty_1 - ), - "::", - stringify!(value) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::< - CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st__bindgen_ty_1, - >())) - .value64 as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!( - CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st__bindgen_ty_1 - ), - "::", - stringify!(value64) - ) - ); -} -impl Default for CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st__bindgen_ty_1 { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -#[test] -fn bindgen_test_layout_CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st() { - assert_eq!( - ::std::mem::size_of::(), - 40usize, - concat!( - "Size of: ", - stringify!(CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st) - ) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!( - "Alignment of ", - stringify!(CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::( - ))) - .operation as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st), - "::", - stringify!(operation) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::( - ))) - .address as *const _ as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st), - "::", - stringify!(address) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::( - ))) - .flags as *const _ as usize - }, - 24usize, - concat!( - "Offset of field: ", - stringify!(CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st), - "::", - stringify!(flags) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::( - ))) - .alias as *const _ as usize - }, - 32usize, - concat!( - "Offset of field: ", - stringify!(CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st), - "::", - stringify!(alias) - ) - ); -} -impl Default for CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st { - pub operation: CUstreamBatchMemOpType, - pub address: CUdeviceptr, - pub __bindgen_anon_1: - CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st__bindgen_ty_1, - pub flags: ::std::os::raw::c_uint, - pub alias: CUdeviceptr, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub union CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st__bindgen_ty_1 { - pub value: cuuint32_t, - pub value64: cuuint64_t, -} -#[test] -fn bindgen_test_layout_CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st__bindgen_ty_1( -) { - assert_eq!( - ::std::mem::size_of::< - CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st__bindgen_ty_1, - >(), - 8usize, - concat!( - "Size of: ", - stringify!( - CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st__bindgen_ty_1 - ) - ) - ); - assert_eq!( - ::std::mem::align_of::< - CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st__bindgen_ty_1, - >(), - 8usize, - concat!( - "Alignment of ", - stringify!( - CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st__bindgen_ty_1 - ) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::< - CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st__bindgen_ty_1, - >())) - .value as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!( - CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st__bindgen_ty_1 - ), - "::", - stringify!(value) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::< - CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st__bindgen_ty_1, - >())) - .value64 as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!( - CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st__bindgen_ty_1 - ), - "::", - stringify!(value64) - ) - ); -} -impl Default for CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st__bindgen_ty_1 { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -#[test] -fn bindgen_test_layout_CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st() { - assert_eq!( - ::std::mem::size_of::(), - 40usize, - concat!( - "Size of: ", - stringify!(CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st) - ) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!( - "Alignment of ", - stringify!(CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::( - ))) - .operation as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st), - "::", - stringify!(operation) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::( - ))) - .address as *const _ as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st), - "::", - stringify!(address) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::( - ))) - .flags as *const _ as usize - }, - 24usize, - concat!( - "Offset of field: ", - stringify!(CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st), - "::", - stringify!(flags) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::( - ))) - .alias as *const _ as usize - }, - 32usize, - concat!( - "Offset of field: ", - stringify!(CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st), - "::", - stringify!(alias) - ) - ); -} -impl Default for CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -#[repr(C)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUstreamBatchMemOpParams_union_CUstreamMemOpFlushRemoteWritesParams_st { - pub operation: CUstreamBatchMemOpType, - pub flags: ::std::os::raw::c_uint, -} -#[test] -fn bindgen_test_layout_CUstreamBatchMemOpParams_union_CUstreamMemOpFlushRemoteWritesParams_st() { - assert_eq!( - ::std::mem::size_of::( - ), - 8usize, - concat!( - "Size of: ", - stringify!(CUstreamBatchMemOpParams_union_CUstreamMemOpFlushRemoteWritesParams_st) - ) - ); - assert_eq!( - ::std::mem::align_of::< - CUstreamBatchMemOpParams_union_CUstreamMemOpFlushRemoteWritesParams_st, - >(), - 4usize, - concat!( - "Alignment of ", - stringify!(CUstreamBatchMemOpParams_union_CUstreamMemOpFlushRemoteWritesParams_st) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::< - CUstreamBatchMemOpParams_union_CUstreamMemOpFlushRemoteWritesParams_st, - >())) - .operation as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUstreamBatchMemOpParams_union_CUstreamMemOpFlushRemoteWritesParams_st), - "::", - stringify!(operation) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::< - CUstreamBatchMemOpParams_union_CUstreamMemOpFlushRemoteWritesParams_st, - >())) - .flags as *const _ as usize - }, - 4usize, - concat!( - "Offset of field: ", - stringify!(CUstreamBatchMemOpParams_union_CUstreamMemOpFlushRemoteWritesParams_st), - "::", - stringify!(flags) - ) - ); -} -impl Default for CUstreamBatchMemOpParams_union_CUstreamMemOpFlushRemoteWritesParams_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -#[test] -fn bindgen_test_layout_CUstreamBatchMemOpParams_union() { - assert_eq!( - ::std::mem::size_of::(), - 48usize, - concat!("Size of: ", stringify!(CUstreamBatchMemOpParams_union)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(CUstreamBatchMemOpParams_union)) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).operation as *const _ - as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUstreamBatchMemOpParams_union), - "::", - stringify!(operation) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).waitValue as *const _ - as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUstreamBatchMemOpParams_union), - "::", - stringify!(waitValue) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).writeValue as *const _ - as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUstreamBatchMemOpParams_union), - "::", - stringify!(writeValue) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).flushRemoteWrites as *const _ - as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUstreamBatchMemOpParams_union), - "::", - stringify!(flushRemoteWrites) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).pad as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUstreamBatchMemOpParams_union), - "::", - stringify!(pad) - ) - ); -} -impl Default for CUstreamBatchMemOpParams_union { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUstreamBatchMemOpParams_v1 = CUstreamBatchMemOpParams_union; -pub type CUstreamBatchMemOpParams = CUstreamBatchMemOpParams_v1; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUoccupancy_flags_enum { - CU_OCCUPANCY_DEFAULT = 0, - CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE = 1, -} -pub use self::CUoccupancy_flags_enum as CUoccupancy_flags; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUstreamUpdateCaptureDependencies_flags_enum { - CU_STREAM_ADD_CAPTURE_DEPENDENCIES = 0, - CU_STREAM_SET_CAPTURE_DEPENDENCIES = 1, -} -pub use self::CUstreamUpdateCaptureDependencies_flags_enum as CUstreamUpdateCaptureDependencies_flags; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUarray_format_enum { - CU_AD_FORMAT_UNSIGNED_INT8 = 1, - CU_AD_FORMAT_UNSIGNED_INT16 = 2, - CU_AD_FORMAT_UNSIGNED_INT32 = 3, - CU_AD_FORMAT_SIGNED_INT8 = 8, - CU_AD_FORMAT_SIGNED_INT16 = 9, - CU_AD_FORMAT_SIGNED_INT32 = 10, - CU_AD_FORMAT_HALF = 16, - CU_AD_FORMAT_FLOAT = 32, - CU_AD_FORMAT_NV12 = 176, -} -pub use self::CUarray_format_enum as CUarray_format; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUaddress_mode_enum { - CU_TR_ADDRESS_MODE_WRAP = 0, - CU_TR_ADDRESS_MODE_CLAMP = 1, - CU_TR_ADDRESS_MODE_MIRROR = 2, - CU_TR_ADDRESS_MODE_BORDER = 3, -} -pub use self::CUaddress_mode_enum as CUaddress_mode; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUfilter_mode_enum { - CU_TR_FILTER_MODE_POINT = 0, - CU_TR_FILTER_MODE_LINEAR = 1, -} -pub use self::CUfilter_mode_enum as CUfilter_mode; -impl CUdevice_attribute_enum { - pub const CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK: CUdevice_attribute_enum = - CUdevice_attribute_enum::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK; -} -impl CUdevice_attribute_enum { - pub const CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK: CUdevice_attribute_enum = - CUdevice_attribute_enum::CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK; -} -impl CUdevice_attribute_enum { - pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH: CUdevice_attribute_enum = - CUdevice_attribute_enum::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH; -} -impl CUdevice_attribute_enum { - pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT: CUdevice_attribute_enum = - CUdevice_attribute_enum::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT; -} -impl CUdevice_attribute_enum { - pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES: CUdevice_attribute_enum = - CUdevice_attribute_enum::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS; -} -impl CUdevice_attribute_enum { - pub const CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED: CUdevice_attribute_enum = - CUdevice_attribute_enum::CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED; -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUdevice_attribute_enum { - CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1, - CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2, - CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3, - CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4, - CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5, - CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6, - CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7, - CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8, - CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9, - CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10, - CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11, - CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12, - CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13, - CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14, - CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15, - CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16, - CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17, - CU_DEVICE_ATTRIBUTE_INTEGRATED = 18, - CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19, - CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29, - CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30, - CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31, - CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32, - CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33, - CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34, - CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35, - CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36, - CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37, - CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38, - CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39, - CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40, - CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43, - CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = 44, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49, - CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50, - CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = 52, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54, - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = 55, - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = 56, - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = 57, - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = 58, - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = 59, - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = 60, - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61, - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62, - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63, - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64, - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65, - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = 66, - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67, - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74, - CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75, - CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77, - CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78, - CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79, - CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80, - CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81, - CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82, - CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83, - CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84, - CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85, - CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = 86, - CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87, - CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88, - CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89, - CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90, - CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91, - CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS = 92, - CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS = 93, - CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR = 94, - CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 95, - CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 96, - CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 97, - CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES = 98, - CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED = 99, - CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 100, - CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 101, - CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED = 102, - CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED = 103, - CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED = 104, - CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED = 105, - CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR = 106, - CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED = 107, - CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE = 108, - CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE = 109, - CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED = 110, - CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK = 111, - CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED = 112, - CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED = 113, - CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED = 114, - CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED = 115, - CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED = 116, - CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS = 117, - CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING = 118, - CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES = 119, - CU_DEVICE_ATTRIBUTE_MAX = 120, -} -pub use self::CUdevice_attribute_enum as CUdevice_attribute; -#[repr(C)] -#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUdevprop_st { - pub maxThreadsPerBlock: ::std::os::raw::c_int, - pub maxThreadsDim: [::std::os::raw::c_int; 3usize], - pub maxGridSize: [::std::os::raw::c_int; 3usize], - pub sharedMemPerBlock: ::std::os::raw::c_int, - pub totalConstantMemory: ::std::os::raw::c_int, - pub SIMDWidth: ::std::os::raw::c_int, - pub memPitch: ::std::os::raw::c_int, - pub regsPerBlock: ::std::os::raw::c_int, - pub clockRate: ::std::os::raw::c_int, - pub textureAlign: ::std::os::raw::c_int, -} -#[test] -fn bindgen_test_layout_CUdevprop_st() { - assert_eq!( - ::std::mem::size_of::(), - 56usize, - concat!("Size of: ", stringify!(CUdevprop_st)) - ); - assert_eq!( - ::std::mem::align_of::(), - 4usize, - concat!("Alignment of ", stringify!(CUdevprop_st)) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).maxThreadsPerBlock as *const _ as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUdevprop_st), - "::", - stringify!(maxThreadsPerBlock) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).maxThreadsDim as *const _ as usize }, - 4usize, - concat!( - "Offset of field: ", - stringify!(CUdevprop_st), - "::", - stringify!(maxThreadsDim) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).maxGridSize as *const _ as usize }, - 16usize, - concat!( - "Offset of field: ", - stringify!(CUdevprop_st), - "::", - stringify!(maxGridSize) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).sharedMemPerBlock as *const _ as usize }, - 28usize, - concat!( - "Offset of field: ", - stringify!(CUdevprop_st), - "::", - stringify!(sharedMemPerBlock) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).totalConstantMemory as *const _ as usize - }, - 32usize, - concat!( - "Offset of field: ", - stringify!(CUdevprop_st), - "::", - stringify!(totalConstantMemory) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).SIMDWidth as *const _ as usize }, - 36usize, - concat!( - "Offset of field: ", - stringify!(CUdevprop_st), - "::", - stringify!(SIMDWidth) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).memPitch as *const _ as usize }, - 40usize, - concat!( - "Offset of field: ", - stringify!(CUdevprop_st), - "::", - stringify!(memPitch) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).regsPerBlock as *const _ as usize }, - 44usize, - concat!( - "Offset of field: ", - stringify!(CUdevprop_st), - "::", - stringify!(regsPerBlock) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).clockRate as *const _ as usize }, - 48usize, - concat!( - "Offset of field: ", - stringify!(CUdevprop_st), - "::", - stringify!(clockRate) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).textureAlign as *const _ as usize }, - 52usize, - concat!( - "Offset of field: ", - stringify!(CUdevprop_st), - "::", - stringify!(textureAlign) - ) - ); -} -pub type CUdevprop_v1 = CUdevprop_st; -pub type CUdevprop = CUdevprop_v1; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUpointer_attribute_enum { - CU_POINTER_ATTRIBUTE_CONTEXT = 1, - CU_POINTER_ATTRIBUTE_MEMORY_TYPE = 2, - CU_POINTER_ATTRIBUTE_DEVICE_POINTER = 3, - CU_POINTER_ATTRIBUTE_HOST_POINTER = 4, - CU_POINTER_ATTRIBUTE_P2P_TOKENS = 5, - CU_POINTER_ATTRIBUTE_SYNC_MEMOPS = 6, - CU_POINTER_ATTRIBUTE_BUFFER_ID = 7, - CU_POINTER_ATTRIBUTE_IS_MANAGED = 8, - CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL = 9, - CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE = 10, - CU_POINTER_ATTRIBUTE_RANGE_START_ADDR = 11, - CU_POINTER_ATTRIBUTE_RANGE_SIZE = 12, - CU_POINTER_ATTRIBUTE_MAPPED = 13, - CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES = 14, - CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE = 15, - CU_POINTER_ATTRIBUTE_ACCESS_FLAGS = 16, - CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE = 17, -} -pub use self::CUpointer_attribute_enum as CUpointer_attribute; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUfunction_attribute_enum { - CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0, - CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1, - CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2, - CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3, - CU_FUNC_ATTRIBUTE_NUM_REGS = 4, - CU_FUNC_ATTRIBUTE_PTX_VERSION = 5, - CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6, - CU_FUNC_ATTRIBUTE_CACHE_MODE_CA = 7, - CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES = 8, - CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 9, - CU_FUNC_ATTRIBUTE_MAX = 10, -} -pub use self::CUfunction_attribute_enum as CUfunction_attribute; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUfunc_cache_enum { - CU_FUNC_CACHE_PREFER_NONE = 0, - CU_FUNC_CACHE_PREFER_SHARED = 1, - CU_FUNC_CACHE_PREFER_L1 = 2, - CU_FUNC_CACHE_PREFER_EQUAL = 3, -} -pub use self::CUfunc_cache_enum as CUfunc_cache; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUsharedconfig_enum { - CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE = 0, - CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE = 1, - CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE = 2, -} -pub use self::CUsharedconfig_enum as CUsharedconfig; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUshared_carveout_enum { - CU_SHAREDMEM_CARVEOUT_DEFAULT = -1, - CU_SHAREDMEM_CARVEOUT_MAX_SHARED = 100, - CU_SHAREDMEM_CARVEOUT_MAX_L1 = 0, -} -pub use self::CUshared_carveout_enum as CUshared_carveout; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUmemorytype_enum { - CU_MEMORYTYPE_HOST = 1, - CU_MEMORYTYPE_DEVICE = 2, - CU_MEMORYTYPE_ARRAY = 3, - CU_MEMORYTYPE_UNIFIED = 4, -} -pub use self::CUmemorytype_enum as CUmemorytype; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUcomputemode_enum { - CU_COMPUTEMODE_DEFAULT = 0, - CU_COMPUTEMODE_PROHIBITED = 2, - CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3, -} -pub use self::CUcomputemode_enum as CUcomputemode; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUmem_advise_enum { - CU_MEM_ADVISE_SET_READ_MOSTLY = 1, - CU_MEM_ADVISE_UNSET_READ_MOSTLY = 2, - CU_MEM_ADVISE_SET_PREFERRED_LOCATION = 3, - CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION = 4, - CU_MEM_ADVISE_SET_ACCESSED_BY = 5, - CU_MEM_ADVISE_UNSET_ACCESSED_BY = 6, -} -pub use self::CUmem_advise_enum as CUmem_advise; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUmem_range_attribute_enum { - CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY = 1, - CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION = 2, - CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY = 3, - CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION = 4, -} -pub use self::CUmem_range_attribute_enum as CUmem_range_attribute; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUjit_option_enum { - CU_JIT_MAX_REGISTERS = 0, - CU_JIT_THREADS_PER_BLOCK = 1, - CU_JIT_WALL_TIME = 2, - CU_JIT_INFO_LOG_BUFFER = 3, - CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES = 4, - CU_JIT_ERROR_LOG_BUFFER = 5, - CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES = 6, - CU_JIT_OPTIMIZATION_LEVEL = 7, - CU_JIT_TARGET_FROM_CUCONTEXT = 8, - CU_JIT_TARGET = 9, - CU_JIT_FALLBACK_STRATEGY = 10, - CU_JIT_GENERATE_DEBUG_INFO = 11, - CU_JIT_LOG_VERBOSE = 12, - CU_JIT_GENERATE_LINE_INFO = 13, - CU_JIT_CACHE_MODE = 14, - CU_JIT_NEW_SM3X_OPT = 15, - CU_JIT_FAST_COMPILE = 16, - CU_JIT_GLOBAL_SYMBOL_NAMES = 17, - CU_JIT_GLOBAL_SYMBOL_ADDRESSES = 18, - CU_JIT_GLOBAL_SYMBOL_COUNT = 19, - CU_JIT_LTO = 20, - CU_JIT_FTZ = 21, - CU_JIT_PREC_DIV = 22, - CU_JIT_PREC_SQRT = 23, - CU_JIT_FMA = 24, - CU_JIT_NUM_OPTIONS = 25, -} -pub use self::CUjit_option_enum as CUjit_option; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUjit_target_enum { - CU_TARGET_COMPUTE_20 = 20, - CU_TARGET_COMPUTE_21 = 21, - CU_TARGET_COMPUTE_30 = 30, - CU_TARGET_COMPUTE_32 = 32, - CU_TARGET_COMPUTE_35 = 35, - CU_TARGET_COMPUTE_37 = 37, - CU_TARGET_COMPUTE_50 = 50, - CU_TARGET_COMPUTE_52 = 52, - CU_TARGET_COMPUTE_53 = 53, - CU_TARGET_COMPUTE_60 = 60, - CU_TARGET_COMPUTE_61 = 61, - CU_TARGET_COMPUTE_62 = 62, - CU_TARGET_COMPUTE_70 = 70, - CU_TARGET_COMPUTE_72 = 72, - CU_TARGET_COMPUTE_75 = 75, - CU_TARGET_COMPUTE_80 = 80, - CU_TARGET_COMPUTE_86 = 86, -} -pub use self::CUjit_target_enum as CUjit_target; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUjit_fallback_enum { - CU_PREFER_PTX = 0, - CU_PREFER_BINARY = 1, -} -pub use self::CUjit_fallback_enum as CUjit_fallback; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUjit_cacheMode_enum { - CU_JIT_CACHE_OPTION_NONE = 0, - CU_JIT_CACHE_OPTION_CG = 1, - CU_JIT_CACHE_OPTION_CA = 2, -} -pub use self::CUjit_cacheMode_enum as CUjit_cacheMode; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUjitInputType_enum { - CU_JIT_INPUT_CUBIN = 0, - CU_JIT_INPUT_PTX = 1, - CU_JIT_INPUT_FATBINARY = 2, - CU_JIT_INPUT_OBJECT = 3, - CU_JIT_INPUT_LIBRARY = 4, - CU_JIT_INPUT_NVVM = 5, - CU_JIT_NUM_INPUT_TYPES = 6, -} -pub use self::CUjitInputType_enum as CUjitInputType; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct CUlinkState_st { - _unused: [u8; 0], -} -pub type CUlinkState = *mut CUlinkState_st; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUgraphicsRegisterFlags_enum { - CU_GRAPHICS_REGISTER_FLAGS_NONE = 0, - CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY = 1, - CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD = 2, - CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST = 4, - CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER = 8, -} -pub use self::CUgraphicsRegisterFlags_enum as CUgraphicsRegisterFlags; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUgraphicsMapResourceFlags_enum { - CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0, - CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 1, - CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 2, -} -pub use self::CUgraphicsMapResourceFlags_enum as CUgraphicsMapResourceFlags; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUarray_cubemap_face_enum { - CU_CUBEMAP_FACE_POSITIVE_X = 0, - CU_CUBEMAP_FACE_NEGATIVE_X = 1, - CU_CUBEMAP_FACE_POSITIVE_Y = 2, - CU_CUBEMAP_FACE_NEGATIVE_Y = 3, - CU_CUBEMAP_FACE_POSITIVE_Z = 4, - CU_CUBEMAP_FACE_NEGATIVE_Z = 5, -} -pub use self::CUarray_cubemap_face_enum as CUarray_cubemap_face; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUlimit_enum { - CU_LIMIT_STACK_SIZE = 0, - CU_LIMIT_PRINTF_FIFO_SIZE = 1, - CU_LIMIT_MALLOC_HEAP_SIZE = 2, - CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH = 3, - CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT = 4, - CU_LIMIT_MAX_L2_FETCH_GRANULARITY = 5, - CU_LIMIT_PERSISTING_L2_CACHE_SIZE = 6, - CU_LIMIT_MAX = 7, -} -pub use self::CUlimit_enum as CUlimit; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUresourcetype_enum { - CU_RESOURCE_TYPE_ARRAY = 0, - CU_RESOURCE_TYPE_MIPMAPPED_ARRAY = 1, - CU_RESOURCE_TYPE_LINEAR = 2, - CU_RESOURCE_TYPE_PITCH2D = 3, -} -pub use self::CUresourcetype_enum as CUresourcetype; -pub type CUhostFn = - ::std::option::Option; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUaccessProperty_enum { - CU_ACCESS_PROPERTY_NORMAL = 0, - CU_ACCESS_PROPERTY_STREAMING = 1, - CU_ACCESS_PROPERTY_PERSISTING = 2, -} -pub use self::CUaccessProperty_enum as CUaccessProperty; -#[repr(C)] -#[derive(Debug, Copy, Clone, PartialOrd, PartialEq)] -pub struct CUaccessPolicyWindow_st { - pub base_ptr: *mut ::std::os::raw::c_void, - pub num_bytes: usize, - pub hitRatio: f32, - pub hitProp: CUaccessProperty, - pub missProp: CUaccessProperty, -} -#[test] -fn bindgen_test_layout_CUaccessPolicyWindow_st() { - assert_eq!( - ::std::mem::size_of::(), - 32usize, - concat!("Size of: ", stringify!(CUaccessPolicyWindow_st)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(CUaccessPolicyWindow_st)) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).base_ptr as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUaccessPolicyWindow_st), - "::", - stringify!(base_ptr) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).num_bytes as *const _ as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUaccessPolicyWindow_st), - "::", - stringify!(num_bytes) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).hitRatio as *const _ as usize - }, - 16usize, - concat!( - "Offset of field: ", - stringify!(CUaccessPolicyWindow_st), - "::", - stringify!(hitRatio) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).hitProp as *const _ as usize }, - 20usize, - concat!( - "Offset of field: ", - stringify!(CUaccessPolicyWindow_st), - "::", - stringify!(hitProp) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).missProp as *const _ as usize - }, - 24usize, - concat!( - "Offset of field: ", - stringify!(CUaccessPolicyWindow_st), - "::", - stringify!(missProp) - ) - ); -} -impl Default for CUaccessPolicyWindow_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUaccessPolicyWindow_v1 = CUaccessPolicyWindow_st; -pub type CUaccessPolicyWindow = CUaccessPolicyWindow_v1; -#[repr(C)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUDA_KERNEL_NODE_PARAMS_st { - pub func: CUfunction, - pub gridDimX: ::std::os::raw::c_uint, - pub gridDimY: ::std::os::raw::c_uint, - pub gridDimZ: ::std::os::raw::c_uint, - pub blockDimX: ::std::os::raw::c_uint, - pub blockDimY: ::std::os::raw::c_uint, - pub blockDimZ: ::std::os::raw::c_uint, - pub sharedMemBytes: ::std::os::raw::c_uint, - pub kernelParams: *mut *mut ::std::os::raw::c_void, - pub extra: *mut *mut ::std::os::raw::c_void, -} -#[test] -fn bindgen_test_layout_CUDA_KERNEL_NODE_PARAMS_st() { - assert_eq!( - ::std::mem::size_of::(), - 56usize, - concat!("Size of: ", stringify!(CUDA_KERNEL_NODE_PARAMS_st)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(CUDA_KERNEL_NODE_PARAMS_st)) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).func as *const _ as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_KERNEL_NODE_PARAMS_st), - "::", - stringify!(func) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).gridDimX as *const _ as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUDA_KERNEL_NODE_PARAMS_st), - "::", - stringify!(gridDimX) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).gridDimY as *const _ as usize - }, - 12usize, - concat!( - "Offset of field: ", - stringify!(CUDA_KERNEL_NODE_PARAMS_st), - "::", - stringify!(gridDimY) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).gridDimZ as *const _ as usize - }, - 16usize, - concat!( - "Offset of field: ", - stringify!(CUDA_KERNEL_NODE_PARAMS_st), - "::", - stringify!(gridDimZ) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).blockDimX as *const _ as usize - }, - 20usize, - concat!( - "Offset of field: ", - stringify!(CUDA_KERNEL_NODE_PARAMS_st), - "::", - stringify!(blockDimX) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).blockDimY as *const _ as usize - }, - 24usize, - concat!( - "Offset of field: ", - stringify!(CUDA_KERNEL_NODE_PARAMS_st), - "::", - stringify!(blockDimY) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).blockDimZ as *const _ as usize - }, - 28usize, - concat!( - "Offset of field: ", - stringify!(CUDA_KERNEL_NODE_PARAMS_st), - "::", - stringify!(blockDimZ) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).sharedMemBytes as *const _ - as usize - }, - 32usize, - concat!( - "Offset of field: ", - stringify!(CUDA_KERNEL_NODE_PARAMS_st), - "::", - stringify!(sharedMemBytes) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).kernelParams as *const _ as usize - }, - 40usize, - concat!( - "Offset of field: ", - stringify!(CUDA_KERNEL_NODE_PARAMS_st), - "::", - stringify!(kernelParams) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).extra as *const _ as usize - }, - 48usize, - concat!( - "Offset of field: ", - stringify!(CUDA_KERNEL_NODE_PARAMS_st), - "::", - stringify!(extra) - ) - ); -} -impl Default for CUDA_KERNEL_NODE_PARAMS_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUDA_KERNEL_NODE_PARAMS_v1 = CUDA_KERNEL_NODE_PARAMS_st; -pub type CUDA_KERNEL_NODE_PARAMS = CUDA_KERNEL_NODE_PARAMS_v1; -#[repr(C)] -#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUDA_MEMSET_NODE_PARAMS_st { - pub dst: CUdeviceptr, - pub pitch: usize, - pub value: ::std::os::raw::c_uint, - pub elementSize: ::std::os::raw::c_uint, - pub width: usize, - pub height: usize, -} -#[test] -fn bindgen_test_layout_CUDA_MEMSET_NODE_PARAMS_st() { - assert_eq!( - ::std::mem::size_of::(), - 40usize, - concat!("Size of: ", stringify!(CUDA_MEMSET_NODE_PARAMS_st)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(CUDA_MEMSET_NODE_PARAMS_st)) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).dst as *const _ as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMSET_NODE_PARAMS_st), - "::", - stringify!(dst) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).pitch as *const _ as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMSET_NODE_PARAMS_st), - "::", - stringify!(pitch) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).value as *const _ as usize - }, - 16usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMSET_NODE_PARAMS_st), - "::", - stringify!(value) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).elementSize as *const _ as usize - }, - 20usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMSET_NODE_PARAMS_st), - "::", - stringify!(elementSize) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).width as *const _ as usize - }, - 24usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMSET_NODE_PARAMS_st), - "::", - stringify!(width) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).height as *const _ as usize - }, - 32usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMSET_NODE_PARAMS_st), - "::", - stringify!(height) - ) - ); -} -pub type CUDA_MEMSET_NODE_PARAMS_v1 = CUDA_MEMSET_NODE_PARAMS_st; -pub type CUDA_MEMSET_NODE_PARAMS = CUDA_MEMSET_NODE_PARAMS_v1; -#[repr(C)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUDA_HOST_NODE_PARAMS_st { - pub fn_: CUhostFn, - pub userData: *mut ::std::os::raw::c_void, -} -#[test] -fn bindgen_test_layout_CUDA_HOST_NODE_PARAMS_st() { - assert_eq!( - ::std::mem::size_of::(), - 16usize, - concat!("Size of: ", stringify!(CUDA_HOST_NODE_PARAMS_st)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(CUDA_HOST_NODE_PARAMS_st)) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).fn_ as *const _ as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_HOST_NODE_PARAMS_st), - "::", - stringify!(fn_) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).userData as *const _ as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUDA_HOST_NODE_PARAMS_st), - "::", - stringify!(userData) - ) - ); -} -impl Default for CUDA_HOST_NODE_PARAMS_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUDA_HOST_NODE_PARAMS_v1 = CUDA_HOST_NODE_PARAMS_st; -pub type CUDA_HOST_NODE_PARAMS = CUDA_HOST_NODE_PARAMS_v1; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUgraphNodeType_enum { - CU_GRAPH_NODE_TYPE_KERNEL = 0, - CU_GRAPH_NODE_TYPE_MEMCPY = 1, - CU_GRAPH_NODE_TYPE_MEMSET = 2, - CU_GRAPH_NODE_TYPE_HOST = 3, - CU_GRAPH_NODE_TYPE_GRAPH = 4, - CU_GRAPH_NODE_TYPE_EMPTY = 5, - CU_GRAPH_NODE_TYPE_WAIT_EVENT = 6, - CU_GRAPH_NODE_TYPE_EVENT_RECORD = 7, - CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL = 8, - CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT = 9, - CU_GRAPH_NODE_TYPE_MEM_ALLOC = 10, - CU_GRAPH_NODE_TYPE_MEM_FREE = 11, -} -pub use self::CUgraphNodeType_enum as CUgraphNodeType; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUsynchronizationPolicy_enum { - CU_SYNC_POLICY_AUTO = 1, - CU_SYNC_POLICY_SPIN = 2, - CU_SYNC_POLICY_YIELD = 3, - CU_SYNC_POLICY_BLOCKING_SYNC = 4, -} -pub use self::CUsynchronizationPolicy_enum as CUsynchronizationPolicy; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUkernelNodeAttrID_enum { - CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW = 1, - CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE = 2, -} -pub use self::CUkernelNodeAttrID_enum as CUkernelNodeAttrID; -#[repr(C)] -#[derive(Copy, Clone)] -pub union CUkernelNodeAttrValue_union { - pub accessPolicyWindow: CUaccessPolicyWindow, - pub cooperative: ::std::os::raw::c_int, -} -#[test] -fn bindgen_test_layout_CUkernelNodeAttrValue_union() { - assert_eq!( - ::std::mem::size_of::(), - 32usize, - concat!("Size of: ", stringify!(CUkernelNodeAttrValue_union)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(CUkernelNodeAttrValue_union)) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).accessPolicyWindow as *const _ - as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUkernelNodeAttrValue_union), - "::", - stringify!(accessPolicyWindow) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).cooperative as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUkernelNodeAttrValue_union), - "::", - stringify!(cooperative) - ) - ); -} -impl Default for CUkernelNodeAttrValue_union { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUkernelNodeAttrValue_v1 = CUkernelNodeAttrValue_union; -pub type CUkernelNodeAttrValue = CUkernelNodeAttrValue_v1; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUstreamCaptureStatus_enum { - CU_STREAM_CAPTURE_STATUS_NONE = 0, - CU_STREAM_CAPTURE_STATUS_ACTIVE = 1, - CU_STREAM_CAPTURE_STATUS_INVALIDATED = 2, -} -pub use self::CUstreamCaptureStatus_enum as CUstreamCaptureStatus; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUstreamCaptureMode_enum { - CU_STREAM_CAPTURE_MODE_GLOBAL = 0, - CU_STREAM_CAPTURE_MODE_THREAD_LOCAL = 1, - CU_STREAM_CAPTURE_MODE_RELAXED = 2, -} -pub use self::CUstreamCaptureMode_enum as CUstreamCaptureMode; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUstreamAttrID_enum { - CU_STREAM_ATTRIBUTE_ACCESS_POLICY_WINDOW = 1, - CU_STREAM_ATTRIBUTE_SYNCHRONIZATION_POLICY = 3, -} -pub use self::CUstreamAttrID_enum as CUstreamAttrID; -#[repr(C)] -#[derive(Copy, Clone)] -pub union CUstreamAttrValue_union { - pub accessPolicyWindow: CUaccessPolicyWindow, - pub syncPolicy: CUsynchronizationPolicy, -} -#[test] -fn bindgen_test_layout_CUstreamAttrValue_union() { - assert_eq!( - ::std::mem::size_of::(), - 32usize, - concat!("Size of: ", stringify!(CUstreamAttrValue_union)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(CUstreamAttrValue_union)) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).accessPolicyWindow as *const _ - as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUstreamAttrValue_union), - "::", - stringify!(accessPolicyWindow) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).syncPolicy as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUstreamAttrValue_union), - "::", - stringify!(syncPolicy) - ) - ); -} -impl Default for CUstreamAttrValue_union { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUstreamAttrValue_v1 = CUstreamAttrValue_union; -pub type CUstreamAttrValue = CUstreamAttrValue_v1; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUdriverProcAddress_flags_enum { - CU_GET_PROC_ADDRESS_DEFAULT = 0, - CU_GET_PROC_ADDRESS_LEGACY_STREAM = 1, - CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM = 2, -} -pub use self::CUdriverProcAddress_flags_enum as CUdriverProcAddress_flags; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUexecAffinityType_enum { - CU_EXEC_AFFINITY_TYPE_SM_COUNT = 0, - CU_EXEC_AFFINITY_TYPE_MAX = 1, -} -pub use self::CUexecAffinityType_enum as CUexecAffinityType; -#[repr(C)] -#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUexecAffinitySmCount_st { - pub val: ::std::os::raw::c_uint, -} -#[test] -fn bindgen_test_layout_CUexecAffinitySmCount_st() { - assert_eq!( - ::std::mem::size_of::(), - 4usize, - concat!("Size of: ", stringify!(CUexecAffinitySmCount_st)) - ); - assert_eq!( - ::std::mem::align_of::(), - 4usize, - concat!("Alignment of ", stringify!(CUexecAffinitySmCount_st)) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).val as *const _ as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUexecAffinitySmCount_st), - "::", - stringify!(val) - ) - ); -} -pub type CUexecAffinitySmCount_v1 = CUexecAffinitySmCount_st; -pub type CUexecAffinitySmCount = CUexecAffinitySmCount_v1; -#[repr(C)] -#[derive(Copy, Clone)] -pub struct CUexecAffinityParam_st { - pub type_: CUexecAffinityType, - pub param: CUexecAffinityParam_st__bindgen_ty_1, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub union CUexecAffinityParam_st__bindgen_ty_1 { - pub smCount: CUexecAffinitySmCount, -} -#[test] -fn bindgen_test_layout_CUexecAffinityParam_st__bindgen_ty_1() { - assert_eq!( - ::std::mem::size_of::(), - 4usize, - concat!( - "Size of: ", - stringify!(CUexecAffinityParam_st__bindgen_ty_1) - ) - ); - assert_eq!( - ::std::mem::align_of::(), - 4usize, - concat!( - "Alignment of ", - stringify!(CUexecAffinityParam_st__bindgen_ty_1) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).smCount as *const _ - as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUexecAffinityParam_st__bindgen_ty_1), - "::", - stringify!(smCount) - ) - ); -} -impl Default for CUexecAffinityParam_st__bindgen_ty_1 { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -#[test] -fn bindgen_test_layout_CUexecAffinityParam_st() { - assert_eq!( - ::std::mem::size_of::(), - 8usize, - concat!("Size of: ", stringify!(CUexecAffinityParam_st)) - ); - assert_eq!( - ::std::mem::align_of::(), - 4usize, - concat!("Alignment of ", stringify!(CUexecAffinityParam_st)) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).type_ as *const _ as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUexecAffinityParam_st), - "::", - stringify!(type_) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).param as *const _ as usize }, - 4usize, - concat!( - "Offset of field: ", - stringify!(CUexecAffinityParam_st), - "::", - stringify!(param) - ) - ); -} -impl Default for CUexecAffinityParam_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUexecAffinityParam_v1 = CUexecAffinityParam_st; -pub type CUexecAffinityParam = CUexecAffinityParam_v1; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum cudaError_enum { - CUDA_SUCCESS = 0, - CUDA_ERROR_INVALID_VALUE = 1, - CUDA_ERROR_OUT_OF_MEMORY = 2, - CUDA_ERROR_NOT_INITIALIZED = 3, - CUDA_ERROR_DEINITIALIZED = 4, - CUDA_ERROR_PROFILER_DISABLED = 5, - CUDA_ERROR_PROFILER_NOT_INITIALIZED = 6, - CUDA_ERROR_PROFILER_ALREADY_STARTED = 7, - CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8, - CUDA_ERROR_STUB_LIBRARY = 34, - CUDA_ERROR_NO_DEVICE = 100, - CUDA_ERROR_INVALID_DEVICE = 101, - CUDA_ERROR_DEVICE_NOT_LICENSED = 102, - CUDA_ERROR_INVALID_IMAGE = 200, - CUDA_ERROR_INVALID_CONTEXT = 201, - CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202, - CUDA_ERROR_MAP_FAILED = 205, - CUDA_ERROR_UNMAP_FAILED = 206, - CUDA_ERROR_ARRAY_IS_MAPPED = 207, - CUDA_ERROR_ALREADY_MAPPED = 208, - CUDA_ERROR_NO_BINARY_FOR_GPU = 209, - CUDA_ERROR_ALREADY_ACQUIRED = 210, - CUDA_ERROR_NOT_MAPPED = 211, - CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212, - CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213, - CUDA_ERROR_ECC_UNCORRECTABLE = 214, - CUDA_ERROR_UNSUPPORTED_LIMIT = 215, - CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216, - CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = 217, - CUDA_ERROR_INVALID_PTX = 218, - CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219, - CUDA_ERROR_NVLINK_UNCORRECTABLE = 220, - CUDA_ERROR_JIT_COMPILER_NOT_FOUND = 221, - CUDA_ERROR_UNSUPPORTED_PTX_VERSION = 222, - CUDA_ERROR_JIT_COMPILATION_DISABLED = 223, - CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY = 224, - CUDA_ERROR_INVALID_SOURCE = 300, - CUDA_ERROR_FILE_NOT_FOUND = 301, - CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302, - CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303, - CUDA_ERROR_OPERATING_SYSTEM = 304, - CUDA_ERROR_INVALID_HANDLE = 400, - CUDA_ERROR_ILLEGAL_STATE = 401, - CUDA_ERROR_NOT_FOUND = 500, - CUDA_ERROR_NOT_READY = 600, - CUDA_ERROR_ILLEGAL_ADDRESS = 700, - CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701, - CUDA_ERROR_LAUNCH_TIMEOUT = 702, - CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703, - CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704, - CUDA_ERROR_PEER_ACCESS_NOT_ENABLED = 705, - CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE = 708, - CUDA_ERROR_CONTEXT_IS_DESTROYED = 709, - CUDA_ERROR_ASSERT = 710, - CUDA_ERROR_TOO_MANY_PEERS = 711, - CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712, - CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED = 713, - CUDA_ERROR_HARDWARE_STACK_ERROR = 714, - CUDA_ERROR_ILLEGAL_INSTRUCTION = 715, - CUDA_ERROR_MISALIGNED_ADDRESS = 716, - CUDA_ERROR_INVALID_ADDRESS_SPACE = 717, - CUDA_ERROR_INVALID_PC = 718, - CUDA_ERROR_LAUNCH_FAILED = 719, - CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE = 720, - CUDA_ERROR_NOT_PERMITTED = 800, - CUDA_ERROR_NOT_SUPPORTED = 801, - CUDA_ERROR_SYSTEM_NOT_READY = 802, - CUDA_ERROR_SYSTEM_DRIVER_MISMATCH = 803, - CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE = 804, - CUDA_ERROR_MPS_CONNECTION_FAILED = 805, - CUDA_ERROR_MPS_RPC_FAILURE = 806, - CUDA_ERROR_MPS_SERVER_NOT_READY = 807, - CUDA_ERROR_MPS_MAX_CLIENTS_REACHED = 808, - CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED = 809, - CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED = 900, - CUDA_ERROR_STREAM_CAPTURE_INVALIDATED = 901, - CUDA_ERROR_STREAM_CAPTURE_MERGE = 902, - CUDA_ERROR_STREAM_CAPTURE_UNMATCHED = 903, - CUDA_ERROR_STREAM_CAPTURE_UNJOINED = 904, - CUDA_ERROR_STREAM_CAPTURE_ISOLATION = 905, - CUDA_ERROR_STREAM_CAPTURE_IMPLICIT = 906, - CUDA_ERROR_CAPTURED_EVENT = 907, - CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD = 908, - CUDA_ERROR_TIMEOUT = 909, - CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE = 910, - CUDA_ERROR_EXTERNAL_DEVICE = 911, - CUDA_ERROR_UNKNOWN = 999, -} -pub use self::cudaError_enum as CUresult; -impl CUdevice_P2PAttribute_enum { - pub const CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED: CUdevice_P2PAttribute_enum = - CUdevice_P2PAttribute_enum::CU_DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED; -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUdevice_P2PAttribute_enum { - CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK = 1, - CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED = 2, - CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED = 3, - CU_DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED = 4, -} -pub use self::CUdevice_P2PAttribute_enum as CUdevice_P2PAttribute; -pub type CUstreamCallback = ::std::option::Option< - unsafe extern "C" fn( - hStream: CUstream, - status: CUresult, - userData: *mut ::std::os::raw::c_void, - ), ->; -pub type CUoccupancyB2DSize = - ::std::option::Option usize>; -#[repr(C)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUDA_MEMCPY2D_st { - pub srcXInBytes: usize, - pub srcY: usize, - pub srcMemoryType: CUmemorytype, - pub srcHost: *const ::std::os::raw::c_void, - pub srcDevice: CUdeviceptr, - pub srcArray: CUarray, - pub srcPitch: usize, - pub dstXInBytes: usize, - pub dstY: usize, - pub dstMemoryType: CUmemorytype, - pub dstHost: *mut ::std::os::raw::c_void, - pub dstDevice: CUdeviceptr, - pub dstArray: CUarray, - pub dstPitch: usize, - pub WidthInBytes: usize, - pub Height: usize, -} -#[test] -fn bindgen_test_layout_CUDA_MEMCPY2D_st() { - assert_eq!( - ::std::mem::size_of::(), - 128usize, - concat!("Size of: ", stringify!(CUDA_MEMCPY2D_st)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(CUDA_MEMCPY2D_st)) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).srcXInBytes as *const _ as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY2D_st), - "::", - stringify!(srcXInBytes) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).srcY as *const _ as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY2D_st), - "::", - stringify!(srcY) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).srcMemoryType as *const _ as usize }, - 16usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY2D_st), - "::", - stringify!(srcMemoryType) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).srcHost as *const _ as usize }, - 24usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY2D_st), - "::", - stringify!(srcHost) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).srcDevice as *const _ as usize }, - 32usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY2D_st), - "::", - stringify!(srcDevice) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).srcArray as *const _ as usize }, - 40usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY2D_st), - "::", - stringify!(srcArray) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).srcPitch as *const _ as usize }, - 48usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY2D_st), - "::", - stringify!(srcPitch) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).dstXInBytes as *const _ as usize }, - 56usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY2D_st), - "::", - stringify!(dstXInBytes) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).dstY as *const _ as usize }, - 64usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY2D_st), - "::", - stringify!(dstY) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).dstMemoryType as *const _ as usize }, - 72usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY2D_st), - "::", - stringify!(dstMemoryType) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).dstHost as *const _ as usize }, - 80usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY2D_st), - "::", - stringify!(dstHost) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).dstDevice as *const _ as usize }, - 88usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY2D_st), - "::", - stringify!(dstDevice) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).dstArray as *const _ as usize }, - 96usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY2D_st), - "::", - stringify!(dstArray) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).dstPitch as *const _ as usize }, - 104usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY2D_st), - "::", - stringify!(dstPitch) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).WidthInBytes as *const _ as usize }, - 112usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY2D_st), - "::", - stringify!(WidthInBytes) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).Height as *const _ as usize }, - 120usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY2D_st), - "::", - stringify!(Height) - ) - ); -} -impl Default for CUDA_MEMCPY2D_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUDA_MEMCPY2D_v2 = CUDA_MEMCPY2D_st; -pub type CUDA_MEMCPY2D = CUDA_MEMCPY2D_v2; -#[repr(C)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUDA_MEMCPY3D_st { - pub srcXInBytes: usize, - pub srcY: usize, - pub srcZ: usize, - pub srcLOD: usize, - pub srcMemoryType: CUmemorytype, - pub srcHost: *const ::std::os::raw::c_void, - pub srcDevice: CUdeviceptr, - pub srcArray: CUarray, - pub reserved0: *mut ::std::os::raw::c_void, - pub srcPitch: usize, - pub srcHeight: usize, - pub dstXInBytes: usize, - pub dstY: usize, - pub dstZ: usize, - pub dstLOD: usize, - pub dstMemoryType: CUmemorytype, - pub dstHost: *mut ::std::os::raw::c_void, - pub dstDevice: CUdeviceptr, - pub dstArray: CUarray, - pub reserved1: *mut ::std::os::raw::c_void, - pub dstPitch: usize, - pub dstHeight: usize, - pub WidthInBytes: usize, - pub Height: usize, - pub Depth: usize, -} -#[test] -fn bindgen_test_layout_CUDA_MEMCPY3D_st() { - assert_eq!( - ::std::mem::size_of::(), - 200usize, - concat!("Size of: ", stringify!(CUDA_MEMCPY3D_st)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(CUDA_MEMCPY3D_st)) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).srcXInBytes as *const _ as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_st), - "::", - stringify!(srcXInBytes) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).srcY as *const _ as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_st), - "::", - stringify!(srcY) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).srcZ as *const _ as usize }, - 16usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_st), - "::", - stringify!(srcZ) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).srcLOD as *const _ as usize }, - 24usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_st), - "::", - stringify!(srcLOD) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).srcMemoryType as *const _ as usize }, - 32usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_st), - "::", - stringify!(srcMemoryType) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).srcHost as *const _ as usize }, - 40usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_st), - "::", - stringify!(srcHost) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).srcDevice as *const _ as usize }, - 48usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_st), - "::", - stringify!(srcDevice) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).srcArray as *const _ as usize }, - 56usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_st), - "::", - stringify!(srcArray) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).reserved0 as *const _ as usize }, - 64usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_st), - "::", - stringify!(reserved0) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).srcPitch as *const _ as usize }, - 72usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_st), - "::", - stringify!(srcPitch) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).srcHeight as *const _ as usize }, - 80usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_st), - "::", - stringify!(srcHeight) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).dstXInBytes as *const _ as usize }, - 88usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_st), - "::", - stringify!(dstXInBytes) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).dstY as *const _ as usize }, - 96usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_st), - "::", - stringify!(dstY) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).dstZ as *const _ as usize }, - 104usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_st), - "::", - stringify!(dstZ) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).dstLOD as *const _ as usize }, - 112usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_st), - "::", - stringify!(dstLOD) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).dstMemoryType as *const _ as usize }, - 120usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_st), - "::", - stringify!(dstMemoryType) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).dstHost as *const _ as usize }, - 128usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_st), - "::", - stringify!(dstHost) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).dstDevice as *const _ as usize }, - 136usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_st), - "::", - stringify!(dstDevice) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).dstArray as *const _ as usize }, - 144usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_st), - "::", - stringify!(dstArray) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).reserved1 as *const _ as usize }, - 152usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_st), - "::", - stringify!(reserved1) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).dstPitch as *const _ as usize }, - 160usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_st), - "::", - stringify!(dstPitch) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).dstHeight as *const _ as usize }, - 168usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_st), - "::", - stringify!(dstHeight) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).WidthInBytes as *const _ as usize }, - 176usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_st), - "::", - stringify!(WidthInBytes) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).Height as *const _ as usize }, - 184usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_st), - "::", - stringify!(Height) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).Depth as *const _ as usize }, - 192usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_st), - "::", - stringify!(Depth) - ) - ); -} -impl Default for CUDA_MEMCPY3D_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUDA_MEMCPY3D_v2 = CUDA_MEMCPY3D_st; -pub type CUDA_MEMCPY3D = CUDA_MEMCPY3D_v2; -#[repr(C)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUDA_MEMCPY3D_PEER_st { - pub srcXInBytes: usize, - pub srcY: usize, - pub srcZ: usize, - pub srcLOD: usize, - pub srcMemoryType: CUmemorytype, - pub srcHost: *const ::std::os::raw::c_void, - pub srcDevice: CUdeviceptr, - pub srcArray: CUarray, - pub srcContext: CUcontext, - pub srcPitch: usize, - pub srcHeight: usize, - pub dstXInBytes: usize, - pub dstY: usize, - pub dstZ: usize, - pub dstLOD: usize, - pub dstMemoryType: CUmemorytype, - pub dstHost: *mut ::std::os::raw::c_void, - pub dstDevice: CUdeviceptr, - pub dstArray: CUarray, - pub dstContext: CUcontext, - pub dstPitch: usize, - pub dstHeight: usize, - pub WidthInBytes: usize, - pub Height: usize, - pub Depth: usize, -} -#[test] -fn bindgen_test_layout_CUDA_MEMCPY3D_PEER_st() { - assert_eq!( - ::std::mem::size_of::(), - 200usize, - concat!("Size of: ", stringify!(CUDA_MEMCPY3D_PEER_st)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(CUDA_MEMCPY3D_PEER_st)) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).srcXInBytes as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_PEER_st), - "::", - stringify!(srcXInBytes) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).srcY as *const _ as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_PEER_st), - "::", - stringify!(srcY) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).srcZ as *const _ as usize }, - 16usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_PEER_st), - "::", - stringify!(srcZ) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).srcLOD as *const _ as usize }, - 24usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_PEER_st), - "::", - stringify!(srcLOD) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).srcMemoryType as *const _ as usize - }, - 32usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_PEER_st), - "::", - stringify!(srcMemoryType) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).srcHost as *const _ as usize }, - 40usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_PEER_st), - "::", - stringify!(srcHost) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).srcDevice as *const _ as usize }, - 48usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_PEER_st), - "::", - stringify!(srcDevice) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).srcArray as *const _ as usize }, - 56usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_PEER_st), - "::", - stringify!(srcArray) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).srcContext as *const _ as usize - }, - 64usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_PEER_st), - "::", - stringify!(srcContext) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).srcPitch as *const _ as usize }, - 72usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_PEER_st), - "::", - stringify!(srcPitch) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).srcHeight as *const _ as usize }, - 80usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_PEER_st), - "::", - stringify!(srcHeight) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).dstXInBytes as *const _ as usize - }, - 88usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_PEER_st), - "::", - stringify!(dstXInBytes) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).dstY as *const _ as usize }, - 96usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_PEER_st), - "::", - stringify!(dstY) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).dstZ as *const _ as usize }, - 104usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_PEER_st), - "::", - stringify!(dstZ) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).dstLOD as *const _ as usize }, - 112usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_PEER_st), - "::", - stringify!(dstLOD) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).dstMemoryType as *const _ as usize - }, - 120usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_PEER_st), - "::", - stringify!(dstMemoryType) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).dstHost as *const _ as usize }, - 128usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_PEER_st), - "::", - stringify!(dstHost) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).dstDevice as *const _ as usize }, - 136usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_PEER_st), - "::", - stringify!(dstDevice) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).dstArray as *const _ as usize }, - 144usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_PEER_st), - "::", - stringify!(dstArray) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).dstContext as *const _ as usize - }, - 152usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_PEER_st), - "::", - stringify!(dstContext) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).dstPitch as *const _ as usize }, - 160usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_PEER_st), - "::", - stringify!(dstPitch) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).dstHeight as *const _ as usize }, - 168usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_PEER_st), - "::", - stringify!(dstHeight) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).WidthInBytes as *const _ as usize - }, - 176usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_PEER_st), - "::", - stringify!(WidthInBytes) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).Height as *const _ as usize }, - 184usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_PEER_st), - "::", - stringify!(Height) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).Depth as *const _ as usize }, - 192usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEMCPY3D_PEER_st), - "::", - stringify!(Depth) - ) - ); -} -impl Default for CUDA_MEMCPY3D_PEER_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUDA_MEMCPY3D_PEER_v1 = CUDA_MEMCPY3D_PEER_st; -pub type CUDA_MEMCPY3D_PEER = CUDA_MEMCPY3D_PEER_v1; -#[repr(C)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUDA_ARRAY_DESCRIPTOR_st { - pub Width: usize, - pub Height: usize, - pub Format: CUarray_format, - pub NumChannels: ::std::os::raw::c_uint, -} -#[test] -fn bindgen_test_layout_CUDA_ARRAY_DESCRIPTOR_st() { - assert_eq!( - ::std::mem::size_of::(), - 24usize, - concat!("Size of: ", stringify!(CUDA_ARRAY_DESCRIPTOR_st)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(CUDA_ARRAY_DESCRIPTOR_st)) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).Width as *const _ as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_ARRAY_DESCRIPTOR_st), - "::", - stringify!(Width) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).Height as *const _ as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUDA_ARRAY_DESCRIPTOR_st), - "::", - stringify!(Height) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).Format as *const _ as usize }, - 16usize, - concat!( - "Offset of field: ", - stringify!(CUDA_ARRAY_DESCRIPTOR_st), - "::", - stringify!(Format) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).NumChannels as *const _ as usize - }, - 20usize, - concat!( - "Offset of field: ", - stringify!(CUDA_ARRAY_DESCRIPTOR_st), - "::", - stringify!(NumChannels) - ) - ); -} -impl Default for CUDA_ARRAY_DESCRIPTOR_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUDA_ARRAY_DESCRIPTOR_v2 = CUDA_ARRAY_DESCRIPTOR_st; -pub type CUDA_ARRAY_DESCRIPTOR = CUDA_ARRAY_DESCRIPTOR_v2; -#[repr(C)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUDA_ARRAY3D_DESCRIPTOR_st { - pub Width: usize, - pub Height: usize, - pub Depth: usize, - pub Format: CUarray_format, - pub NumChannels: ::std::os::raw::c_uint, - pub Flags: ::std::os::raw::c_uint, -} -#[test] -fn bindgen_test_layout_CUDA_ARRAY3D_DESCRIPTOR_st() { - assert_eq!( - ::std::mem::size_of::(), - 40usize, - concat!("Size of: ", stringify!(CUDA_ARRAY3D_DESCRIPTOR_st)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(CUDA_ARRAY3D_DESCRIPTOR_st)) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).Width as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_ARRAY3D_DESCRIPTOR_st), - "::", - stringify!(Width) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).Height as *const _ as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUDA_ARRAY3D_DESCRIPTOR_st), - "::", - stringify!(Height) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).Depth as *const _ as usize - }, - 16usize, - concat!( - "Offset of field: ", - stringify!(CUDA_ARRAY3D_DESCRIPTOR_st), - "::", - stringify!(Depth) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).Format as *const _ as usize - }, - 24usize, - concat!( - "Offset of field: ", - stringify!(CUDA_ARRAY3D_DESCRIPTOR_st), - "::", - stringify!(Format) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).NumChannels as *const _ as usize - }, - 28usize, - concat!( - "Offset of field: ", - stringify!(CUDA_ARRAY3D_DESCRIPTOR_st), - "::", - stringify!(NumChannels) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).Flags as *const _ as usize - }, - 32usize, - concat!( - "Offset of field: ", - stringify!(CUDA_ARRAY3D_DESCRIPTOR_st), - "::", - stringify!(Flags) - ) - ); -} -impl Default for CUDA_ARRAY3D_DESCRIPTOR_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUDA_ARRAY3D_DESCRIPTOR_v2 = CUDA_ARRAY3D_DESCRIPTOR_st; -pub type CUDA_ARRAY3D_DESCRIPTOR = CUDA_ARRAY3D_DESCRIPTOR_v2; -#[repr(C)] -#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUDA_ARRAY_SPARSE_PROPERTIES_st { - pub tileExtent: CUDA_ARRAY_SPARSE_PROPERTIES_st__bindgen_ty_1, - pub miptailFirstLevel: ::std::os::raw::c_uint, - pub miptailSize: ::std::os::raw::c_ulonglong, - pub flags: ::std::os::raw::c_uint, - pub reserved: [::std::os::raw::c_uint; 4usize], -} -#[repr(C)] -#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUDA_ARRAY_SPARSE_PROPERTIES_st__bindgen_ty_1 { - pub width: ::std::os::raw::c_uint, - pub height: ::std::os::raw::c_uint, - pub depth: ::std::os::raw::c_uint, -} -#[test] -fn bindgen_test_layout_CUDA_ARRAY_SPARSE_PROPERTIES_st__bindgen_ty_1() { - assert_eq!( - ::std::mem::size_of::(), - 12usize, - concat!( - "Size of: ", - stringify!(CUDA_ARRAY_SPARSE_PROPERTIES_st__bindgen_ty_1) - ) - ); - assert_eq!( - ::std::mem::align_of::(), - 4usize, - concat!( - "Alignment of ", - stringify!(CUDA_ARRAY_SPARSE_PROPERTIES_st__bindgen_ty_1) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).width - as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_ARRAY_SPARSE_PROPERTIES_st__bindgen_ty_1), - "::", - stringify!(width) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).height - as *const _ as usize - }, - 4usize, - concat!( - "Offset of field: ", - stringify!(CUDA_ARRAY_SPARSE_PROPERTIES_st__bindgen_ty_1), - "::", - stringify!(height) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).depth - as *const _ as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUDA_ARRAY_SPARSE_PROPERTIES_st__bindgen_ty_1), - "::", - stringify!(depth) - ) - ); -} -#[test] -fn bindgen_test_layout_CUDA_ARRAY_SPARSE_PROPERTIES_st() { - assert_eq!( - ::std::mem::size_of::(), - 48usize, - concat!("Size of: ", stringify!(CUDA_ARRAY_SPARSE_PROPERTIES_st)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(CUDA_ARRAY_SPARSE_PROPERTIES_st)) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).tileExtent as *const _ - as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_ARRAY_SPARSE_PROPERTIES_st), - "::", - stringify!(tileExtent) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).miptailFirstLevel - as *const _ as usize - }, - 12usize, - concat!( - "Offset of field: ", - stringify!(CUDA_ARRAY_SPARSE_PROPERTIES_st), - "::", - stringify!(miptailFirstLevel) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).miptailSize as *const _ - as usize - }, - 16usize, - concat!( - "Offset of field: ", - stringify!(CUDA_ARRAY_SPARSE_PROPERTIES_st), - "::", - stringify!(miptailSize) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).flags as *const _ as usize - }, - 24usize, - concat!( - "Offset of field: ", - stringify!(CUDA_ARRAY_SPARSE_PROPERTIES_st), - "::", - stringify!(flags) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).reserved as *const _ - as usize - }, - 28usize, - concat!( - "Offset of field: ", - stringify!(CUDA_ARRAY_SPARSE_PROPERTIES_st), - "::", - stringify!(reserved) - ) - ); -} -pub type CUDA_ARRAY_SPARSE_PROPERTIES_v1 = CUDA_ARRAY_SPARSE_PROPERTIES_st; -pub type CUDA_ARRAY_SPARSE_PROPERTIES = CUDA_ARRAY_SPARSE_PROPERTIES_v1; -#[repr(C)] -#[derive(Copy, Clone)] -pub struct CUDA_RESOURCE_DESC_st { - pub resType: CUresourcetype, - pub res: CUDA_RESOURCE_DESC_st__bindgen_ty_1, - pub flags: ::std::os::raw::c_uint, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub union CUDA_RESOURCE_DESC_st__bindgen_ty_1 { - pub array: CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_1, - pub mipmap: CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_2, - pub linear: CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_3, - pub pitch2D: CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_4, - pub reserved: CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_5, -} -#[repr(C)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_1 { - pub hArray: CUarray, -} -#[test] -fn bindgen_test_layout_CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_1() { - assert_eq!( - ::std::mem::size_of::(), - 8usize, - concat!( - "Size of: ", - stringify!(CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_1) - ) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!( - "Alignment of ", - stringify!(CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_1) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).hArray - as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_1), - "::", - stringify!(hArray) - ) - ); -} -impl Default for CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_1 { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -#[repr(C)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_2 { - pub hMipmappedArray: CUmipmappedArray, -} -#[test] -fn bindgen_test_layout_CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_2() { - assert_eq!( - ::std::mem::size_of::(), - 8usize, - concat!( - "Size of: ", - stringify!(CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_2) - ) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!( - "Alignment of ", - stringify!(CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_2) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())) - .hMipmappedArray as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_2), - "::", - stringify!(hMipmappedArray) - ) - ); -} -impl Default for CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_2 { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -#[repr(C)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_3 { - pub devPtr: CUdeviceptr, - pub format: CUarray_format, - pub numChannels: ::std::os::raw::c_uint, - pub sizeInBytes: usize, -} -#[test] -fn bindgen_test_layout_CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_3() { - assert_eq!( - ::std::mem::size_of::(), - 24usize, - concat!( - "Size of: ", - stringify!(CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_3) - ) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!( - "Alignment of ", - stringify!(CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_3) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).devPtr - as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(devPtr) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).format - as *const _ as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(format) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())) - .numChannels as *const _ as usize - }, - 12usize, - concat!( - "Offset of field: ", - stringify!(CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(numChannels) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())) - .sizeInBytes as *const _ as usize - }, - 16usize, - concat!( - "Offset of field: ", - stringify!(CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(sizeInBytes) - ) - ); -} -impl Default for CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_3 { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -#[repr(C)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_4 { - pub devPtr: CUdeviceptr, - pub format: CUarray_format, - pub numChannels: ::std::os::raw::c_uint, - pub width: usize, - pub height: usize, - pub pitchInBytes: usize, -} -#[test] -fn bindgen_test_layout_CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_4() { - assert_eq!( - ::std::mem::size_of::(), - 40usize, - concat!( - "Size of: ", - stringify!(CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_4) - ) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!( - "Alignment of ", - stringify!(CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_4) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).devPtr - as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_4), - "::", - stringify!(devPtr) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).format - as *const _ as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_4), - "::", - stringify!(format) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())) - .numChannels as *const _ as usize - }, - 12usize, - concat!( - "Offset of field: ", - stringify!(CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_4), - "::", - stringify!(numChannels) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).width - as *const _ as usize - }, - 16usize, - concat!( - "Offset of field: ", - stringify!(CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_4), - "::", - stringify!(width) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).height - as *const _ as usize - }, - 24usize, - concat!( - "Offset of field: ", - stringify!(CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_4), - "::", - stringify!(height) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())) - .pitchInBytes as *const _ as usize - }, - 32usize, - concat!( - "Offset of field: ", - stringify!(CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_4), - "::", - stringify!(pitchInBytes) - ) - ); -} -impl Default for CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_4 { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -#[repr(C)] -#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_5 { - pub reserved: [::std::os::raw::c_int; 32usize], -} -#[test] -fn bindgen_test_layout_CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_5() { - assert_eq!( - ::std::mem::size_of::(), - 128usize, - concat!( - "Size of: ", - stringify!(CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_5) - ) - ); - assert_eq!( - ::std::mem::align_of::(), - 4usize, - concat!( - "Alignment of ", - stringify!(CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_5) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).reserved - as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_5), - "::", - stringify!(reserved) - ) - ); -} -#[test] -fn bindgen_test_layout_CUDA_RESOURCE_DESC_st__bindgen_ty_1() { - assert_eq!( - ::std::mem::size_of::(), - 128usize, - concat!("Size of: ", stringify!(CUDA_RESOURCE_DESC_st__bindgen_ty_1)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!( - "Alignment of ", - stringify!(CUDA_RESOURCE_DESC_st__bindgen_ty_1) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).array as *const _ - as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_RESOURCE_DESC_st__bindgen_ty_1), - "::", - stringify!(array) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).mipmap as *const _ - as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_RESOURCE_DESC_st__bindgen_ty_1), - "::", - stringify!(mipmap) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).linear as *const _ - as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_RESOURCE_DESC_st__bindgen_ty_1), - "::", - stringify!(linear) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).pitch2D as *const _ - as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_RESOURCE_DESC_st__bindgen_ty_1), - "::", - stringify!(pitch2D) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).reserved as *const _ - as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_RESOURCE_DESC_st__bindgen_ty_1), - "::", - stringify!(reserved) - ) - ); -} -impl Default for CUDA_RESOURCE_DESC_st__bindgen_ty_1 { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -#[test] -fn bindgen_test_layout_CUDA_RESOURCE_DESC_st() { - assert_eq!( - ::std::mem::size_of::(), - 144usize, - concat!("Size of: ", stringify!(CUDA_RESOURCE_DESC_st)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(CUDA_RESOURCE_DESC_st)) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).resType as *const _ as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_RESOURCE_DESC_st), - "::", - stringify!(resType) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).res as *const _ as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUDA_RESOURCE_DESC_st), - "::", - stringify!(res) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).flags as *const _ as usize }, - 136usize, - concat!( - "Offset of field: ", - stringify!(CUDA_RESOURCE_DESC_st), - "::", - stringify!(flags) - ) - ); -} -impl Default for CUDA_RESOURCE_DESC_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUDA_RESOURCE_DESC_v1 = CUDA_RESOURCE_DESC_st; -pub type CUDA_RESOURCE_DESC = CUDA_RESOURCE_DESC_v1; -#[repr(C)] -#[derive(Debug, Copy, Clone, PartialOrd, PartialEq)] -pub struct CUDA_TEXTURE_DESC_st { - pub addressMode: [CUaddress_mode; 3usize], - pub filterMode: CUfilter_mode, - pub flags: ::std::os::raw::c_uint, - pub maxAnisotropy: ::std::os::raw::c_uint, - pub mipmapFilterMode: CUfilter_mode, - pub mipmapLevelBias: f32, - pub minMipmapLevelClamp: f32, - pub maxMipmapLevelClamp: f32, - pub borderColor: [f32; 4usize], - pub reserved: [::std::os::raw::c_int; 12usize], -} -#[test] -fn bindgen_test_layout_CUDA_TEXTURE_DESC_st() { - assert_eq!( - ::std::mem::size_of::(), - 104usize, - concat!("Size of: ", stringify!(CUDA_TEXTURE_DESC_st)) - ); - assert_eq!( - ::std::mem::align_of::(), - 4usize, - concat!("Alignment of ", stringify!(CUDA_TEXTURE_DESC_st)) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).addressMode as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_TEXTURE_DESC_st), - "::", - stringify!(addressMode) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).filterMode as *const _ as usize }, - 12usize, - concat!( - "Offset of field: ", - stringify!(CUDA_TEXTURE_DESC_st), - "::", - stringify!(filterMode) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).flags as *const _ as usize }, - 16usize, - concat!( - "Offset of field: ", - stringify!(CUDA_TEXTURE_DESC_st), - "::", - stringify!(flags) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).maxAnisotropy as *const _ as usize - }, - 20usize, - concat!( - "Offset of field: ", - stringify!(CUDA_TEXTURE_DESC_st), - "::", - stringify!(maxAnisotropy) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).mipmapFilterMode as *const _ as usize - }, - 24usize, - concat!( - "Offset of field: ", - stringify!(CUDA_TEXTURE_DESC_st), - "::", - stringify!(mipmapFilterMode) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).mipmapLevelBias as *const _ as usize - }, - 28usize, - concat!( - "Offset of field: ", - stringify!(CUDA_TEXTURE_DESC_st), - "::", - stringify!(mipmapLevelBias) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).minMipmapLevelClamp as *const _ - as usize - }, - 32usize, - concat!( - "Offset of field: ", - stringify!(CUDA_TEXTURE_DESC_st), - "::", - stringify!(minMipmapLevelClamp) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).maxMipmapLevelClamp as *const _ - as usize - }, - 36usize, - concat!( - "Offset of field: ", - stringify!(CUDA_TEXTURE_DESC_st), - "::", - stringify!(maxMipmapLevelClamp) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).borderColor as *const _ as usize - }, - 40usize, - concat!( - "Offset of field: ", - stringify!(CUDA_TEXTURE_DESC_st), - "::", - stringify!(borderColor) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).reserved as *const _ as usize }, - 56usize, - concat!( - "Offset of field: ", - stringify!(CUDA_TEXTURE_DESC_st), - "::", - stringify!(reserved) - ) - ); -} -impl Default for CUDA_TEXTURE_DESC_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUDA_TEXTURE_DESC_v1 = CUDA_TEXTURE_DESC_st; -pub type CUDA_TEXTURE_DESC = CUDA_TEXTURE_DESC_v1; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUresourceViewFormat_enum { - CU_RES_VIEW_FORMAT_NONE = 0, - CU_RES_VIEW_FORMAT_UINT_1X8 = 1, - CU_RES_VIEW_FORMAT_UINT_2X8 = 2, - CU_RES_VIEW_FORMAT_UINT_4X8 = 3, - CU_RES_VIEW_FORMAT_SINT_1X8 = 4, - CU_RES_VIEW_FORMAT_SINT_2X8 = 5, - CU_RES_VIEW_FORMAT_SINT_4X8 = 6, - CU_RES_VIEW_FORMAT_UINT_1X16 = 7, - CU_RES_VIEW_FORMAT_UINT_2X16 = 8, - CU_RES_VIEW_FORMAT_UINT_4X16 = 9, - CU_RES_VIEW_FORMAT_SINT_1X16 = 10, - CU_RES_VIEW_FORMAT_SINT_2X16 = 11, - CU_RES_VIEW_FORMAT_SINT_4X16 = 12, - CU_RES_VIEW_FORMAT_UINT_1X32 = 13, - CU_RES_VIEW_FORMAT_UINT_2X32 = 14, - CU_RES_VIEW_FORMAT_UINT_4X32 = 15, - CU_RES_VIEW_FORMAT_SINT_1X32 = 16, - CU_RES_VIEW_FORMAT_SINT_2X32 = 17, - CU_RES_VIEW_FORMAT_SINT_4X32 = 18, - CU_RES_VIEW_FORMAT_FLOAT_1X16 = 19, - CU_RES_VIEW_FORMAT_FLOAT_2X16 = 20, - CU_RES_VIEW_FORMAT_FLOAT_4X16 = 21, - CU_RES_VIEW_FORMAT_FLOAT_1X32 = 22, - CU_RES_VIEW_FORMAT_FLOAT_2X32 = 23, - CU_RES_VIEW_FORMAT_FLOAT_4X32 = 24, - CU_RES_VIEW_FORMAT_UNSIGNED_BC1 = 25, - CU_RES_VIEW_FORMAT_UNSIGNED_BC2 = 26, - CU_RES_VIEW_FORMAT_UNSIGNED_BC3 = 27, - CU_RES_VIEW_FORMAT_UNSIGNED_BC4 = 28, - CU_RES_VIEW_FORMAT_SIGNED_BC4 = 29, - CU_RES_VIEW_FORMAT_UNSIGNED_BC5 = 30, - CU_RES_VIEW_FORMAT_SIGNED_BC5 = 31, - CU_RES_VIEW_FORMAT_UNSIGNED_BC6H = 32, - CU_RES_VIEW_FORMAT_SIGNED_BC6H = 33, - CU_RES_VIEW_FORMAT_UNSIGNED_BC7 = 34, -} -pub use self::CUresourceViewFormat_enum as CUresourceViewFormat; -#[repr(C)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUDA_RESOURCE_VIEW_DESC_st { - pub format: CUresourceViewFormat, - pub width: usize, - pub height: usize, - pub depth: usize, - pub firstMipmapLevel: ::std::os::raw::c_uint, - pub lastMipmapLevel: ::std::os::raw::c_uint, - pub firstLayer: ::std::os::raw::c_uint, - pub lastLayer: ::std::os::raw::c_uint, - pub reserved: [::std::os::raw::c_uint; 16usize], -} -#[test] -fn bindgen_test_layout_CUDA_RESOURCE_VIEW_DESC_st() { - assert_eq!( - ::std::mem::size_of::(), - 112usize, - concat!("Size of: ", stringify!(CUDA_RESOURCE_VIEW_DESC_st)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(CUDA_RESOURCE_VIEW_DESC_st)) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).format as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_RESOURCE_VIEW_DESC_st), - "::", - stringify!(format) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).width as *const _ as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUDA_RESOURCE_VIEW_DESC_st), - "::", - stringify!(width) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).height as *const _ as usize - }, - 16usize, - concat!( - "Offset of field: ", - stringify!(CUDA_RESOURCE_VIEW_DESC_st), - "::", - stringify!(height) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).depth as *const _ as usize - }, - 24usize, - concat!( - "Offset of field: ", - stringify!(CUDA_RESOURCE_VIEW_DESC_st), - "::", - stringify!(depth) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).firstMipmapLevel as *const _ - as usize - }, - 32usize, - concat!( - "Offset of field: ", - stringify!(CUDA_RESOURCE_VIEW_DESC_st), - "::", - stringify!(firstMipmapLevel) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).lastMipmapLevel as *const _ - as usize - }, - 36usize, - concat!( - "Offset of field: ", - stringify!(CUDA_RESOURCE_VIEW_DESC_st), - "::", - stringify!(lastMipmapLevel) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).firstLayer as *const _ as usize - }, - 40usize, - concat!( - "Offset of field: ", - stringify!(CUDA_RESOURCE_VIEW_DESC_st), - "::", - stringify!(firstLayer) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).lastLayer as *const _ as usize - }, - 44usize, - concat!( - "Offset of field: ", - stringify!(CUDA_RESOURCE_VIEW_DESC_st), - "::", - stringify!(lastLayer) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).reserved as *const _ as usize - }, - 48usize, - concat!( - "Offset of field: ", - stringify!(CUDA_RESOURCE_VIEW_DESC_st), - "::", - stringify!(reserved) - ) - ); -} -impl Default for CUDA_RESOURCE_VIEW_DESC_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUDA_RESOURCE_VIEW_DESC_v1 = CUDA_RESOURCE_VIEW_DESC_st; -pub type CUDA_RESOURCE_VIEW_DESC = CUDA_RESOURCE_VIEW_DESC_v1; -#[repr(C)] -#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st { - pub p2pToken: ::std::os::raw::c_ulonglong, - pub vaSpaceToken: ::std::os::raw::c_uint, -} -#[test] -fn bindgen_test_layout_CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st() { - assert_eq!( - ::std::mem::size_of::(), - 16usize, - concat!( - "Size of: ", - stringify!(CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st) - ) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!( - "Alignment of ", - stringify!(CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).p2pToken as *const _ - as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st), - "::", - stringify!(p2pToken) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).vaSpaceToken - as *const _ as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st), - "::", - stringify!(vaSpaceToken) - ) - ); -} -pub type CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_v1 = CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st; -pub type CUDA_POINTER_ATTRIBUTE_P2P_TOKENS = CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_v1; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUDA_POINTER_ATTRIBUTE_ACCESS_FLAGS_enum { - CU_POINTER_ATTRIBUTE_ACCESS_FLAG_NONE = 0, - CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READ = 1, - CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READWRITE = 3, -} -pub use self::CUDA_POINTER_ATTRIBUTE_ACCESS_FLAGS_enum as CUDA_POINTER_ATTRIBUTE_ACCESS_FLAGS; -#[repr(C)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUDA_LAUNCH_PARAMS_st { - pub function: CUfunction, - pub gridDimX: ::std::os::raw::c_uint, - pub gridDimY: ::std::os::raw::c_uint, - pub gridDimZ: ::std::os::raw::c_uint, - pub blockDimX: ::std::os::raw::c_uint, - pub blockDimY: ::std::os::raw::c_uint, - pub blockDimZ: ::std::os::raw::c_uint, - pub sharedMemBytes: ::std::os::raw::c_uint, - pub hStream: CUstream, - pub kernelParams: *mut *mut ::std::os::raw::c_void, -} -#[test] -fn bindgen_test_layout_CUDA_LAUNCH_PARAMS_st() { - assert_eq!( - ::std::mem::size_of::(), - 56usize, - concat!("Size of: ", stringify!(CUDA_LAUNCH_PARAMS_st)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(CUDA_LAUNCH_PARAMS_st)) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).function as *const _ as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_LAUNCH_PARAMS_st), - "::", - stringify!(function) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).gridDimX as *const _ as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUDA_LAUNCH_PARAMS_st), - "::", - stringify!(gridDimX) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).gridDimY as *const _ as usize }, - 12usize, - concat!( - "Offset of field: ", - stringify!(CUDA_LAUNCH_PARAMS_st), - "::", - stringify!(gridDimY) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).gridDimZ as *const _ as usize }, - 16usize, - concat!( - "Offset of field: ", - stringify!(CUDA_LAUNCH_PARAMS_st), - "::", - stringify!(gridDimZ) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).blockDimX as *const _ as usize }, - 20usize, - concat!( - "Offset of field: ", - stringify!(CUDA_LAUNCH_PARAMS_st), - "::", - stringify!(blockDimX) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).blockDimY as *const _ as usize }, - 24usize, - concat!( - "Offset of field: ", - stringify!(CUDA_LAUNCH_PARAMS_st), - "::", - stringify!(blockDimY) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).blockDimZ as *const _ as usize }, - 28usize, - concat!( - "Offset of field: ", - stringify!(CUDA_LAUNCH_PARAMS_st), - "::", - stringify!(blockDimZ) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).sharedMemBytes as *const _ as usize - }, - 32usize, - concat!( - "Offset of field: ", - stringify!(CUDA_LAUNCH_PARAMS_st), - "::", - stringify!(sharedMemBytes) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).hStream as *const _ as usize }, - 40usize, - concat!( - "Offset of field: ", - stringify!(CUDA_LAUNCH_PARAMS_st), - "::", - stringify!(hStream) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).kernelParams as *const _ as usize - }, - 48usize, - concat!( - "Offset of field: ", - stringify!(CUDA_LAUNCH_PARAMS_st), - "::", - stringify!(kernelParams) - ) - ); -} -impl Default for CUDA_LAUNCH_PARAMS_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUDA_LAUNCH_PARAMS_v1 = CUDA_LAUNCH_PARAMS_st; -pub type CUDA_LAUNCH_PARAMS = CUDA_LAUNCH_PARAMS_v1; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUexternalMemoryHandleType_enum { - CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD = 1, - CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32 = 2, - CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT = 3, - CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP = 4, - CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE = 5, - CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE = 6, - CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT = 7, - CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF = 8, -} -pub use self::CUexternalMemoryHandleType_enum as CUexternalMemoryHandleType; -#[repr(C)] -#[derive(Copy, Clone)] -pub struct CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st { - pub type_: CUexternalMemoryHandleType, - pub handle: CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1, - pub size: ::std::os::raw::c_ulonglong, - pub flags: ::std::os::raw::c_uint, - pub reserved: [::std::os::raw::c_uint; 16usize], -} -#[repr(C)] -#[derive(Copy, Clone)] -pub union CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1 { - pub fd: ::std::os::raw::c_int, - pub win32: CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1, - pub nvSciBufObject: *const ::std::os::raw::c_void, -} -#[repr(C)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1 { - pub handle: *mut ::std::os::raw::c_void, - pub name: *const ::std::os::raw::c_void, -} -#[test] -fn bindgen_test_layout_CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1() { - assert_eq!( - ::std::mem::size_of::(), - 16usize, - concat!( - "Size of: ", - stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1) - ) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!( - "Alignment of ", - stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::( - ))) - .handle as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1), - "::", - stringify!(handle) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::( - ))) - .name as *const _ as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1), - "::", - stringify!(name) - ) - ); -} -impl Default for CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1 { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -#[test] -fn bindgen_test_layout_CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1() { - assert_eq!( - ::std::mem::size_of::(), - 16usize, - concat!( - "Size of: ", - stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1) - ) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!( - "Alignment of ", - stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).fd - as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1), - "::", - stringify!(fd) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).win32 - as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1), - "::", - stringify!(win32) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())) - .nvSciBufObject as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1), - "::", - stringify!(nvSciBufObject) - ) - ); -} -impl Default for CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1 { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -#[test] -fn bindgen_test_layout_CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st() { - assert_eq!( - ::std::mem::size_of::(), - 104usize, - concat!("Size of: ", stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!( - "Alignment of ", - stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).type_ as *const _ - as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st), - "::", - stringify!(type_) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).handle as *const _ - as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st), - "::", - stringify!(handle) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).size as *const _ - as usize - }, - 24usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st), - "::", - stringify!(size) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).flags as *const _ - as usize - }, - 32usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st), - "::", - stringify!(flags) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).reserved as *const _ - as usize - }, - 36usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st), - "::", - stringify!(reserved) - ) - ); -} -impl Default for CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUDA_EXTERNAL_MEMORY_HANDLE_DESC_v1 = CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st; -pub type CUDA_EXTERNAL_MEMORY_HANDLE_DESC = CUDA_EXTERNAL_MEMORY_HANDLE_DESC_v1; -#[repr(C)] -#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st { - pub offset: ::std::os::raw::c_ulonglong, - pub size: ::std::os::raw::c_ulonglong, - pub flags: ::std::os::raw::c_uint, - pub reserved: [::std::os::raw::c_uint; 16usize], -} -#[test] -fn bindgen_test_layout_CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st() { - assert_eq!( - ::std::mem::size_of::(), - 88usize, - concat!("Size of: ", stringify!(CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!( - "Alignment of ", - stringify!(CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).offset as *const _ - as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st), - "::", - stringify!(offset) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).size as *const _ - as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st), - "::", - stringify!(size) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).flags as *const _ - as usize - }, - 16usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st), - "::", - stringify!(flags) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).reserved as *const _ - as usize - }, - 20usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st), - "::", - stringify!(reserved) - ) - ); -} -pub type CUDA_EXTERNAL_MEMORY_BUFFER_DESC_v1 = CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st; -pub type CUDA_EXTERNAL_MEMORY_BUFFER_DESC = CUDA_EXTERNAL_MEMORY_BUFFER_DESC_v1; -#[repr(C)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st { - pub offset: ::std::os::raw::c_ulonglong, - pub arrayDesc: CUDA_ARRAY3D_DESCRIPTOR, - pub numLevels: ::std::os::raw::c_uint, - pub reserved: [::std::os::raw::c_uint; 16usize], -} -#[test] -fn bindgen_test_layout_CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st() { - assert_eq!( - ::std::mem::size_of::(), - 120usize, - concat!( - "Size of: ", - stringify!(CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st) - ) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!( - "Alignment of ", - stringify!(CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).offset - as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st), - "::", - stringify!(offset) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).arrayDesc - as *const _ as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st), - "::", - stringify!(arrayDesc) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).numLevels - as *const _ as usize - }, - 48usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st), - "::", - stringify!(numLevels) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).reserved - as *const _ as usize - }, - 52usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st), - "::", - stringify!(reserved) - ) - ); -} -impl Default for CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_v1 = - CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st; -pub type CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC = CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_v1; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUexternalSemaphoreHandleType_enum { - CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD = 1, - CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32 = 2, - CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT = 3, - CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE = 4, - CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE = 5, - CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC = 6, - CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX = 7, - CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT = 8, - CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD = 9, - CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 = 10, -} -pub use self::CUexternalSemaphoreHandleType_enum as CUexternalSemaphoreHandleType; -#[repr(C)] -#[derive(Copy, Clone)] -pub struct CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st { - pub type_: CUexternalSemaphoreHandleType, - pub handle: CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1, - pub flags: ::std::os::raw::c_uint, - pub reserved: [::std::os::raw::c_uint; 16usize], -} -#[repr(C)] -#[derive(Copy, Clone)] -pub union CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1 { - pub fd: ::std::os::raw::c_int, - pub win32: CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1, - pub nvSciSyncObj: *const ::std::os::raw::c_void, -} -#[repr(C)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1 { - pub handle: *mut ::std::os::raw::c_void, - pub name: *const ::std::os::raw::c_void, -} -#[test] -fn bindgen_test_layout_CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1() { - assert_eq!( - ::std::mem::size_of::(), - 16usize, - concat!( - "Size of: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1) - ) - ); - assert_eq!( - ::std::mem::align_of::( - ), - 8usize, - concat!( - "Alignment of ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::< - CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1, - >())) - .handle as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1), - "::", - stringify!(handle) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::< - CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1, - >())) - .name as *const _ as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1), - "::", - stringify!(name) - ) - ); -} -impl Default for CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1 { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -#[test] -fn bindgen_test_layout_CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1() { - assert_eq!( - ::std::mem::size_of::(), - 16usize, - concat!( - "Size of: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1) - ) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!( - "Alignment of ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).fd - as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1), - "::", - stringify!(fd) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).win32 - as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1), - "::", - stringify!(win32) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())) - .nvSciSyncObj as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1), - "::", - stringify!(nvSciSyncObj) - ) - ); -} -impl Default for CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1 { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -#[test] -fn bindgen_test_layout_CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st() { - assert_eq!( - ::std::mem::size_of::(), - 96usize, - concat!( - "Size of: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st) - ) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!( - "Alignment of ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).type_ as *const _ - as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st), - "::", - stringify!(type_) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).handle as *const _ - as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st), - "::", - stringify!(handle) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).flags as *const _ - as usize - }, - 24usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st), - "::", - stringify!(flags) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).reserved as *const _ - as usize - }, - 28usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st), - "::", - stringify!(reserved) - ) - ); -} -impl Default for CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_v1 = CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st; -pub type CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC = CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_v1; -#[repr(C)] -#[derive(Copy, Clone)] -pub struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st { - pub params: CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1, - pub flags: ::std::os::raw::c_uint, - pub reserved: [::std::os::raw::c_uint; 16usize], -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1 { - pub fence: CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_1, - pub nvSciSync: CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2, - pub keyedMutex: CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_3, - pub reserved: [::std::os::raw::c_uint; 12usize], -} -#[repr(C)] -#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_1 { - pub value: ::std::os::raw::c_ulonglong, -} -#[test] -fn bindgen_test_layout_CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_1() { - assert_eq!( - ::std::mem::size_of::( - ), - 8usize, - concat!( - "Size of: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_1) - ) - ); - assert_eq!( - ::std::mem::align_of::( - ), - 8usize, - concat!( - "Alignment of ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_1) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::< - CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_1, - >())) - .value as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_1), - "::", - stringify!(value) - ) - ); -} -#[repr(C)] -#[derive(Copy, Clone)] -pub union CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2 { - pub fence: *mut ::std::os::raw::c_void, - pub reserved: ::std::os::raw::c_ulonglong, -} -#[test] -fn bindgen_test_layout_CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2() { - assert_eq!( - ::std::mem::size_of::( - ), - 8usize, - concat!( - "Size of: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2) - ) - ); - assert_eq!( - ::std::mem::align_of::( - ), - 8usize, - concat!( - "Alignment of ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::< - CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2, - >())) - .fence as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2), - "::", - stringify!(fence) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::< - CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2, - >())) - .reserved as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2), - "::", - stringify!(reserved) - ) - ); -} -impl Default for CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2 { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -#[repr(C)] -#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_3 { - pub key: ::std::os::raw::c_ulonglong, -} -#[test] -fn bindgen_test_layout_CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_3() { - assert_eq!( - ::std::mem::size_of::( - ), - 8usize, - concat!( - "Size of: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_3) - ) - ); - assert_eq!( - ::std::mem::align_of::( - ), - 8usize, - concat!( - "Alignment of ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_3) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::< - CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_3, - >())) - .key as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(key) - ) - ); -} -#[test] -fn bindgen_test_layout_CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1() { - assert_eq!( - ::std::mem::size_of::(), - 72usize, - concat!( - "Size of: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1) - ) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!( - "Alignment of ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).fence - as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1), - "::", - stringify!(fence) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())) - .nvSciSync as *const _ as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1), - "::", - stringify!(nvSciSync) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())) - .keyedMutex as *const _ as usize - }, - 16usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1), - "::", - stringify!(keyedMutex) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())) - .reserved as *const _ as usize - }, - 24usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1), - "::", - stringify!(reserved) - ) - ); -} -impl Default for CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1 { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -#[test] -fn bindgen_test_layout_CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st() { - assert_eq!( - ::std::mem::size_of::(), - 144usize, - concat!( - "Size of: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st) - ) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!( - "Alignment of ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).params as *const _ - as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st), - "::", - stringify!(params) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).flags as *const _ - as usize - }, - 72usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st), - "::", - stringify!(flags) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).reserved - as *const _ as usize - }, - 76usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st), - "::", - stringify!(reserved) - ) - ); -} -impl Default for CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_v1 = CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st; -pub type CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS = CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_v1; -#[repr(C)] -#[derive(Copy, Clone)] -pub struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st { - pub params: CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1, - pub flags: ::std::os::raw::c_uint, - pub reserved: [::std::os::raw::c_uint; 16usize], -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1 { - pub fence: CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_1, - pub nvSciSync: CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2, - pub keyedMutex: CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_3, - pub reserved: [::std::os::raw::c_uint; 10usize], -} -#[repr(C)] -#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_1 { - pub value: ::std::os::raw::c_ulonglong, -} -#[test] -fn bindgen_test_layout_CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_1() { - assert_eq!( - ::std::mem::size_of::(), - 8usize, - concat!( - "Size of: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_1) - ) - ); - assert_eq!( - ::std::mem::align_of::( - ), - 8usize, - concat!( - "Alignment of ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_1) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::< - CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_1, - >())) - .value as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_1), - "::", - stringify!(value) - ) - ); -} -#[repr(C)] -#[derive(Copy, Clone)] -pub union CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2 { - pub fence: *mut ::std::os::raw::c_void, - pub reserved: ::std::os::raw::c_ulonglong, -} -#[test] -fn bindgen_test_layout_CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2() { - assert_eq!( - ::std::mem::size_of::(), - 8usize, - concat!( - "Size of: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2) - ) - ); - assert_eq!( - ::std::mem::align_of::( - ), - 8usize, - concat!( - "Alignment of ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::< - CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2, - >())) - .fence as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2), - "::", - stringify!(fence) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::< - CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2, - >())) - .reserved as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2), - "::", - stringify!(reserved) - ) - ); -} -impl Default for CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2 { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -#[repr(C)] -#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_3 { - pub key: ::std::os::raw::c_ulonglong, - pub timeoutMs: ::std::os::raw::c_uint, -} -#[test] -fn bindgen_test_layout_CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_3() { - assert_eq!( - ::std::mem::size_of::(), - 16usize, - concat!( - "Size of: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_3) - ) - ); - assert_eq!( - ::std::mem::align_of::( - ), - 8usize, - concat!( - "Alignment of ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_3) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::< - CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_3, - >())) - .key as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(key) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::< - CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_3, - >())) - .timeoutMs as *const _ as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(timeoutMs) - ) - ); -} -#[test] -fn bindgen_test_layout_CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1() { - assert_eq!( - ::std::mem::size_of::(), - 72usize, - concat!( - "Size of: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1) - ) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!( - "Alignment of ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).fence - as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1), - "::", - stringify!(fence) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())) - .nvSciSync as *const _ as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1), - "::", - stringify!(nvSciSync) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())) - .keyedMutex as *const _ as usize - }, - 16usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1), - "::", - stringify!(keyedMutex) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())) - .reserved as *const _ as usize - }, - 32usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1), - "::", - stringify!(reserved) - ) - ); -} -impl Default for CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1 { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -#[test] -fn bindgen_test_layout_CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st() { - assert_eq!( - ::std::mem::size_of::(), - 144usize, - concat!( - "Size of: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st) - ) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!( - "Alignment of ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).params as *const _ - as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st), - "::", - stringify!(params) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).flags as *const _ - as usize - }, - 72usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st), - "::", - stringify!(flags) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).reserved as *const _ - as usize - }, - 76usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st), - "::", - stringify!(reserved) - ) - ); -} -impl Default for CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_v1 = CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st; -pub type CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS = CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_v1; -#[repr(C)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st { - pub extSemArray: *mut CUexternalSemaphore, - pub paramsArray: *const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS, - pub numExtSems: ::std::os::raw::c_uint, -} -#[test] -fn bindgen_test_layout_CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st() { - assert_eq!( - ::std::mem::size_of::(), - 24usize, - concat!("Size of: ", stringify!(CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!( - "Alignment of ", - stringify!(CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).extSemArray as *const _ - as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st), - "::", - stringify!(extSemArray) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).paramsArray as *const _ - as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st), - "::", - stringify!(paramsArray) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).numExtSems as *const _ - as usize - }, - 16usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st), - "::", - stringify!(numExtSems) - ) - ); -} -impl Default for CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v1 = CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st; -pub type CUDA_EXT_SEM_SIGNAL_NODE_PARAMS = CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v1; -#[repr(C)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUDA_EXT_SEM_WAIT_NODE_PARAMS_st { - pub extSemArray: *mut CUexternalSemaphore, - pub paramsArray: *const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS, - pub numExtSems: ::std::os::raw::c_uint, -} -#[test] -fn bindgen_test_layout_CUDA_EXT_SEM_WAIT_NODE_PARAMS_st() { - assert_eq!( - ::std::mem::size_of::(), - 24usize, - concat!("Size of: ", stringify!(CUDA_EXT_SEM_WAIT_NODE_PARAMS_st)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!( - "Alignment of ", - stringify!(CUDA_EXT_SEM_WAIT_NODE_PARAMS_st) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).extSemArray as *const _ - as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXT_SEM_WAIT_NODE_PARAMS_st), - "::", - stringify!(extSemArray) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).paramsArray as *const _ - as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXT_SEM_WAIT_NODE_PARAMS_st), - "::", - stringify!(paramsArray) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).numExtSems as *const _ - as usize - }, - 16usize, - concat!( - "Offset of field: ", - stringify!(CUDA_EXT_SEM_WAIT_NODE_PARAMS_st), - "::", - stringify!(numExtSems) - ) - ); -} -impl Default for CUDA_EXT_SEM_WAIT_NODE_PARAMS_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUDA_EXT_SEM_WAIT_NODE_PARAMS_v1 = CUDA_EXT_SEM_WAIT_NODE_PARAMS_st; -pub type CUDA_EXT_SEM_WAIT_NODE_PARAMS = CUDA_EXT_SEM_WAIT_NODE_PARAMS_v1; -pub type CUmemGenericAllocationHandle_v1 = ::std::os::raw::c_ulonglong; -pub type CUmemGenericAllocationHandle = CUmemGenericAllocationHandle_v1; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUmemAllocationHandleType_enum { - CU_MEM_HANDLE_TYPE_NONE = 0, - CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR = 1, - CU_MEM_HANDLE_TYPE_WIN32 = 2, - CU_MEM_HANDLE_TYPE_WIN32_KMT = 4, - CU_MEM_HANDLE_TYPE_MAX = 2147483647, -} -pub use self::CUmemAllocationHandleType_enum as CUmemAllocationHandleType; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUmemAccess_flags_enum { - CU_MEM_ACCESS_FLAGS_PROT_NONE = 0, - CU_MEM_ACCESS_FLAGS_PROT_READ = 1, - CU_MEM_ACCESS_FLAGS_PROT_READWRITE = 3, - CU_MEM_ACCESS_FLAGS_PROT_MAX = 2147483647, -} -pub use self::CUmemAccess_flags_enum as CUmemAccess_flags; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUmemLocationType_enum { - CU_MEM_LOCATION_TYPE_INVALID = 0, - CU_MEM_LOCATION_TYPE_DEVICE = 1, - CU_MEM_LOCATION_TYPE_MAX = 2147483647, -} -pub use self::CUmemLocationType_enum as CUmemLocationType; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUmemAllocationType_enum { - CU_MEM_ALLOCATION_TYPE_INVALID = 0, - CU_MEM_ALLOCATION_TYPE_PINNED = 1, - CU_MEM_ALLOCATION_TYPE_MAX = 2147483647, -} -pub use self::CUmemAllocationType_enum as CUmemAllocationType; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUmemAllocationGranularity_flags_enum { - CU_MEM_ALLOC_GRANULARITY_MINIMUM = 0, - CU_MEM_ALLOC_GRANULARITY_RECOMMENDED = 1, -} -pub use self::CUmemAllocationGranularity_flags_enum as CUmemAllocationGranularity_flags; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUarraySparseSubresourceType_enum { - CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL = 0, - CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL = 1, -} -pub use self::CUarraySparseSubresourceType_enum as CUarraySparseSubresourceType; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUmemOperationType_enum { - CU_MEM_OPERATION_TYPE_MAP = 1, - CU_MEM_OPERATION_TYPE_UNMAP = 2, -} -pub use self::CUmemOperationType_enum as CUmemOperationType; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUmemHandleType_enum { - CU_MEM_HANDLE_TYPE_GENERIC = 0, -} -pub use self::CUmemHandleType_enum as CUmemHandleType; -#[repr(C)] -#[derive(Copy, Clone)] -pub struct CUarrayMapInfo_st { - pub resourceType: CUresourcetype, - pub resource: CUarrayMapInfo_st__bindgen_ty_1, - pub subresourceType: CUarraySparseSubresourceType, - pub subresource: CUarrayMapInfo_st__bindgen_ty_2, - pub memOperationType: CUmemOperationType, - pub memHandleType: CUmemHandleType, - pub memHandle: CUarrayMapInfo_st__bindgen_ty_3, - pub offset: ::std::os::raw::c_ulonglong, - pub deviceBitMask: ::std::os::raw::c_uint, - pub flags: ::std::os::raw::c_uint, - pub reserved: [::std::os::raw::c_uint; 2usize], -} -#[repr(C)] -#[derive(Copy, Clone)] -pub union CUarrayMapInfo_st__bindgen_ty_1 { - pub mipmap: CUmipmappedArray, - pub array: CUarray, -} -#[test] -fn bindgen_test_layout_CUarrayMapInfo_st__bindgen_ty_1() { - assert_eq!( - ::std::mem::size_of::(), - 8usize, - concat!("Size of: ", stringify!(CUarrayMapInfo_st__bindgen_ty_1)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(CUarrayMapInfo_st__bindgen_ty_1)) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).mipmap as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUarrayMapInfo_st__bindgen_ty_1), - "::", - stringify!(mipmap) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).array as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUarrayMapInfo_st__bindgen_ty_1), - "::", - stringify!(array) - ) - ); -} -impl Default for CUarrayMapInfo_st__bindgen_ty_1 { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -#[repr(C)] -#[derive(Copy, Clone)] -pub union CUarrayMapInfo_st__bindgen_ty_2 { - pub sparseLevel: CUarrayMapInfo_st__bindgen_ty_2__bindgen_ty_1, - pub miptail: CUarrayMapInfo_st__bindgen_ty_2__bindgen_ty_2, -} -#[repr(C)] -#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUarrayMapInfo_st__bindgen_ty_2__bindgen_ty_1 { - pub level: ::std::os::raw::c_uint, - pub layer: ::std::os::raw::c_uint, - pub offsetX: ::std::os::raw::c_uint, - pub offsetY: ::std::os::raw::c_uint, - pub offsetZ: ::std::os::raw::c_uint, - pub extentWidth: ::std::os::raw::c_uint, - pub extentHeight: ::std::os::raw::c_uint, - pub extentDepth: ::std::os::raw::c_uint, -} -#[test] -fn bindgen_test_layout_CUarrayMapInfo_st__bindgen_ty_2__bindgen_ty_1() { - assert_eq!( - ::std::mem::size_of::(), - 32usize, - concat!( - "Size of: ", - stringify!(CUarrayMapInfo_st__bindgen_ty_2__bindgen_ty_1) - ) - ); - assert_eq!( - ::std::mem::align_of::(), - 4usize, - concat!( - "Alignment of ", - stringify!(CUarrayMapInfo_st__bindgen_ty_2__bindgen_ty_1) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).level - as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUarrayMapInfo_st__bindgen_ty_2__bindgen_ty_1), - "::", - stringify!(level) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).layer - as *const _ as usize - }, - 4usize, - concat!( - "Offset of field: ", - stringify!(CUarrayMapInfo_st__bindgen_ty_2__bindgen_ty_1), - "::", - stringify!(layer) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).offsetX - as *const _ as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUarrayMapInfo_st__bindgen_ty_2__bindgen_ty_1), - "::", - stringify!(offsetX) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).offsetY - as *const _ as usize - }, - 12usize, - concat!( - "Offset of field: ", - stringify!(CUarrayMapInfo_st__bindgen_ty_2__bindgen_ty_1), - "::", - stringify!(offsetY) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).offsetZ - as *const _ as usize - }, - 16usize, - concat!( - "Offset of field: ", - stringify!(CUarrayMapInfo_st__bindgen_ty_2__bindgen_ty_1), - "::", - stringify!(offsetZ) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).extentWidth - as *const _ as usize - }, - 20usize, - concat!( - "Offset of field: ", - stringify!(CUarrayMapInfo_st__bindgen_ty_2__bindgen_ty_1), - "::", - stringify!(extentWidth) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).extentHeight - as *const _ as usize - }, - 24usize, - concat!( - "Offset of field: ", - stringify!(CUarrayMapInfo_st__bindgen_ty_2__bindgen_ty_1), - "::", - stringify!(extentHeight) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).extentDepth - as *const _ as usize - }, - 28usize, - concat!( - "Offset of field: ", - stringify!(CUarrayMapInfo_st__bindgen_ty_2__bindgen_ty_1), - "::", - stringify!(extentDepth) - ) - ); -} -#[repr(C)] -#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUarrayMapInfo_st__bindgen_ty_2__bindgen_ty_2 { - pub layer: ::std::os::raw::c_uint, - pub offset: ::std::os::raw::c_ulonglong, - pub size: ::std::os::raw::c_ulonglong, -} -#[test] -fn bindgen_test_layout_CUarrayMapInfo_st__bindgen_ty_2__bindgen_ty_2() { - assert_eq!( - ::std::mem::size_of::(), - 24usize, - concat!( - "Size of: ", - stringify!(CUarrayMapInfo_st__bindgen_ty_2__bindgen_ty_2) - ) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!( - "Alignment of ", - stringify!(CUarrayMapInfo_st__bindgen_ty_2__bindgen_ty_2) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).layer - as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUarrayMapInfo_st__bindgen_ty_2__bindgen_ty_2), - "::", - stringify!(layer) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).offset - as *const _ as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUarrayMapInfo_st__bindgen_ty_2__bindgen_ty_2), - "::", - stringify!(offset) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).size - as *const _ as usize - }, - 16usize, - concat!( - "Offset of field: ", - stringify!(CUarrayMapInfo_st__bindgen_ty_2__bindgen_ty_2), - "::", - stringify!(size) - ) - ); -} -#[test] -fn bindgen_test_layout_CUarrayMapInfo_st__bindgen_ty_2() { - assert_eq!( - ::std::mem::size_of::(), - 32usize, - concat!("Size of: ", stringify!(CUarrayMapInfo_st__bindgen_ty_2)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(CUarrayMapInfo_st__bindgen_ty_2)) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).sparseLevel as *const _ - as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUarrayMapInfo_st__bindgen_ty_2), - "::", - stringify!(sparseLevel) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).miptail as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUarrayMapInfo_st__bindgen_ty_2), - "::", - stringify!(miptail) - ) - ); -} -impl Default for CUarrayMapInfo_st__bindgen_ty_2 { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -#[repr(C)] -#[derive(Copy, Clone)] -pub union CUarrayMapInfo_st__bindgen_ty_3 { - pub memHandle: CUmemGenericAllocationHandle, -} -#[test] -fn bindgen_test_layout_CUarrayMapInfo_st__bindgen_ty_3() { - assert_eq!( - ::std::mem::size_of::(), - 8usize, - concat!("Size of: ", stringify!(CUarrayMapInfo_st__bindgen_ty_3)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(CUarrayMapInfo_st__bindgen_ty_3)) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).memHandle as *const _ - as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUarrayMapInfo_st__bindgen_ty_3), - "::", - stringify!(memHandle) - ) - ); -} -impl Default for CUarrayMapInfo_st__bindgen_ty_3 { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -#[test] -fn bindgen_test_layout_CUarrayMapInfo_st() { - assert_eq!( - ::std::mem::size_of::(), - 96usize, - concat!("Size of: ", stringify!(CUarrayMapInfo_st)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(CUarrayMapInfo_st)) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).resourceType as *const _ as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUarrayMapInfo_st), - "::", - stringify!(resourceType) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).resource as *const _ as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUarrayMapInfo_st), - "::", - stringify!(resource) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).subresourceType as *const _ as usize - }, - 16usize, - concat!( - "Offset of field: ", - stringify!(CUarrayMapInfo_st), - "::", - stringify!(subresourceType) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).subresource as *const _ as usize }, - 24usize, - concat!( - "Offset of field: ", - stringify!(CUarrayMapInfo_st), - "::", - stringify!(subresource) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).memOperationType as *const _ as usize - }, - 56usize, - concat!( - "Offset of field: ", - stringify!(CUarrayMapInfo_st), - "::", - stringify!(memOperationType) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).memHandleType as *const _ as usize }, - 60usize, - concat!( - "Offset of field: ", - stringify!(CUarrayMapInfo_st), - "::", - stringify!(memHandleType) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).memHandle as *const _ as usize }, - 64usize, - concat!( - "Offset of field: ", - stringify!(CUarrayMapInfo_st), - "::", - stringify!(memHandle) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).offset as *const _ as usize }, - 72usize, - concat!( - "Offset of field: ", - stringify!(CUarrayMapInfo_st), - "::", - stringify!(offset) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).deviceBitMask as *const _ as usize }, - 80usize, - concat!( - "Offset of field: ", - stringify!(CUarrayMapInfo_st), - "::", - stringify!(deviceBitMask) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).flags as *const _ as usize }, - 84usize, - concat!( - "Offset of field: ", - stringify!(CUarrayMapInfo_st), - "::", - stringify!(flags) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).reserved as *const _ as usize }, - 88usize, - concat!( - "Offset of field: ", - stringify!(CUarrayMapInfo_st), - "::", - stringify!(reserved) - ) - ); -} -impl Default for CUarrayMapInfo_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUarrayMapInfo_v1 = CUarrayMapInfo_st; -pub type CUarrayMapInfo = CUarrayMapInfo_v1; -#[repr(C)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUmemLocation_st { - pub type_: CUmemLocationType, - pub id: ::std::os::raw::c_int, -} -#[test] -fn bindgen_test_layout_CUmemLocation_st() { - assert_eq!( - ::std::mem::size_of::(), - 8usize, - concat!("Size of: ", stringify!(CUmemLocation_st)) - ); - assert_eq!( - ::std::mem::align_of::(), - 4usize, - concat!("Alignment of ", stringify!(CUmemLocation_st)) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).type_ as *const _ as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUmemLocation_st), - "::", - stringify!(type_) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).id as *const _ as usize }, - 4usize, - concat!( - "Offset of field: ", - stringify!(CUmemLocation_st), - "::", - stringify!(id) - ) - ); -} -impl Default for CUmemLocation_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUmemLocation_v1 = CUmemLocation_st; -pub type CUmemLocation = CUmemLocation_v1; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUmemAllocationCompType_enum { - CU_MEM_ALLOCATION_COMP_NONE = 0, - CU_MEM_ALLOCATION_COMP_GENERIC = 1, -} -pub use self::CUmemAllocationCompType_enum as CUmemAllocationCompType; -#[repr(C)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUmemAllocationProp_st { - pub type_: CUmemAllocationType, - pub requestedHandleTypes: CUmemAllocationHandleType, - pub location: CUmemLocation, - pub win32HandleMetaData: *mut ::std::os::raw::c_void, - pub allocFlags: CUmemAllocationProp_st__bindgen_ty_1, -} -#[repr(C)] -#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUmemAllocationProp_st__bindgen_ty_1 { - pub compressionType: ::std::os::raw::c_uchar, - pub gpuDirectRDMACapable: ::std::os::raw::c_uchar, - pub usage: ::std::os::raw::c_ushort, - pub reserved: [::std::os::raw::c_uchar; 4usize], -} -#[test] -fn bindgen_test_layout_CUmemAllocationProp_st__bindgen_ty_1() { - assert_eq!( - ::std::mem::size_of::(), - 8usize, - concat!( - "Size of: ", - stringify!(CUmemAllocationProp_st__bindgen_ty_1) - ) - ); - assert_eq!( - ::std::mem::align_of::(), - 2usize, - concat!( - "Alignment of ", - stringify!(CUmemAllocationProp_st__bindgen_ty_1) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).compressionType - as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUmemAllocationProp_st__bindgen_ty_1), - "::", - stringify!(compressionType) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).gpuDirectRDMACapable - as *const _ as usize - }, - 1usize, - concat!( - "Offset of field: ", - stringify!(CUmemAllocationProp_st__bindgen_ty_1), - "::", - stringify!(gpuDirectRDMACapable) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).usage as *const _ - as usize - }, - 2usize, - concat!( - "Offset of field: ", - stringify!(CUmemAllocationProp_st__bindgen_ty_1), - "::", - stringify!(usage) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).reserved as *const _ - as usize - }, - 4usize, - concat!( - "Offset of field: ", - stringify!(CUmemAllocationProp_st__bindgen_ty_1), - "::", - stringify!(reserved) - ) - ); -} -#[test] -fn bindgen_test_layout_CUmemAllocationProp_st() { - assert_eq!( - ::std::mem::size_of::(), - 32usize, - concat!("Size of: ", stringify!(CUmemAllocationProp_st)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(CUmemAllocationProp_st)) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).type_ as *const _ as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUmemAllocationProp_st), - "::", - stringify!(type_) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).requestedHandleTypes as *const _ - as usize - }, - 4usize, - concat!( - "Offset of field: ", - stringify!(CUmemAllocationProp_st), - "::", - stringify!(requestedHandleTypes) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).location as *const _ as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUmemAllocationProp_st), - "::", - stringify!(location) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).win32HandleMetaData as *const _ - as usize - }, - 16usize, - concat!( - "Offset of field: ", - stringify!(CUmemAllocationProp_st), - "::", - stringify!(win32HandleMetaData) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).allocFlags as *const _ as usize - }, - 24usize, - concat!( - "Offset of field: ", - stringify!(CUmemAllocationProp_st), - "::", - stringify!(allocFlags) - ) - ); -} -impl Default for CUmemAllocationProp_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUmemAllocationProp_v1 = CUmemAllocationProp_st; -pub type CUmemAllocationProp = CUmemAllocationProp_v1; -#[repr(C)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub struct CUmemAccessDesc_st { - pub location: CUmemLocation, - pub flags: CUmemAccess_flags, -} -#[test] -fn bindgen_test_layout_CUmemAccessDesc_st() { - assert_eq!( - ::std::mem::size_of::(), - 12usize, - concat!("Size of: ", stringify!(CUmemAccessDesc_st)) - ); - assert_eq!( - ::std::mem::align_of::(), - 4usize, - concat!("Alignment of ", stringify!(CUmemAccessDesc_st)) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).location as *const _ as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUmemAccessDesc_st), - "::", - stringify!(location) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).flags as *const _ as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUmemAccessDesc_st), - "::", - stringify!(flags) - ) - ); -} -impl Default for CUmemAccessDesc_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUmemAccessDesc_v1 = CUmemAccessDesc_st; -pub type CUmemAccessDesc = CUmemAccessDesc_v1; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUgraphExecUpdateResult_enum { - CU_GRAPH_EXEC_UPDATE_SUCCESS = 0, - CU_GRAPH_EXEC_UPDATE_ERROR = 1, - CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED = 2, - CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED = 3, - CU_GRAPH_EXEC_UPDATE_ERROR_FUNCTION_CHANGED = 4, - CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED = 5, - CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED = 6, - CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE = 7, -} -pub use self::CUgraphExecUpdateResult_enum as CUgraphExecUpdateResult; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUmemPool_attribute_enum { - CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES = 1, - CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC = 2, - CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES = 3, - CU_MEMPOOL_ATTR_RELEASE_THRESHOLD = 4, - CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT = 5, - CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH = 6, - CU_MEMPOOL_ATTR_USED_MEM_CURRENT = 7, - CU_MEMPOOL_ATTR_USED_MEM_HIGH = 8, -} -pub use self::CUmemPool_attribute_enum as CUmemPool_attribute; -#[repr(C)] -#[derive(Copy, Clone)] -pub struct CUmemPoolProps_st { - pub allocType: CUmemAllocationType, - pub handleTypes: CUmemAllocationHandleType, - pub location: CUmemLocation, - pub win32SecurityAttributes: *mut ::std::os::raw::c_void, - pub reserved: [::std::os::raw::c_uchar; 64usize], -} -#[test] -fn bindgen_test_layout_CUmemPoolProps_st() { - assert_eq!( - ::std::mem::size_of::(), - 88usize, - concat!("Size of: ", stringify!(CUmemPoolProps_st)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(CUmemPoolProps_st)) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).allocType as *const _ as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUmemPoolProps_st), - "::", - stringify!(allocType) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).handleTypes as *const _ as usize }, - 4usize, - concat!( - "Offset of field: ", - stringify!(CUmemPoolProps_st), - "::", - stringify!(handleTypes) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).location as *const _ as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(CUmemPoolProps_st), - "::", - stringify!(location) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).win32SecurityAttributes as *const _ - as usize - }, - 16usize, - concat!( - "Offset of field: ", - stringify!(CUmemPoolProps_st), - "::", - stringify!(win32SecurityAttributes) - ) - ); - assert_eq!( - unsafe { &(*(::std::ptr::null::())).reserved as *const _ as usize }, - 24usize, - concat!( - "Offset of field: ", - stringify!(CUmemPoolProps_st), - "::", - stringify!(reserved) - ) - ); -} -impl Default for CUmemPoolProps_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUmemPoolProps_v1 = CUmemPoolProps_st; -pub type CUmemPoolProps = CUmemPoolProps_v1; -#[repr(C)] -#[derive(Copy, Clone)] -pub struct CUmemPoolPtrExportData_st { - pub reserved: [::std::os::raw::c_uchar; 64usize], -} -#[test] -fn bindgen_test_layout_CUmemPoolPtrExportData_st() { - assert_eq!( - ::std::mem::size_of::(), - 64usize, - concat!("Size of: ", stringify!(CUmemPoolPtrExportData_st)) - ); - assert_eq!( - ::std::mem::align_of::(), - 1usize, - concat!("Alignment of ", stringify!(CUmemPoolPtrExportData_st)) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).reserved as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUmemPoolPtrExportData_st), - "::", - stringify!(reserved) - ) - ); -} -impl Default for CUmemPoolPtrExportData_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUmemPoolPtrExportData_v1 = CUmemPoolPtrExportData_st; -pub type CUmemPoolPtrExportData = CUmemPoolPtrExportData_v1; -#[repr(C)] -#[derive(Copy, Clone)] -pub struct CUDA_MEM_ALLOC_NODE_PARAMS_st { - pub poolProps: CUmemPoolProps, - pub accessDescs: *const CUmemAccessDesc, - pub accessDescCount: usize, - pub bytesize: usize, - pub dptr: CUdeviceptr, -} -#[test] -fn bindgen_test_layout_CUDA_MEM_ALLOC_NODE_PARAMS_st() { - assert_eq!( - ::std::mem::size_of::(), - 120usize, - concat!("Size of: ", stringify!(CUDA_MEM_ALLOC_NODE_PARAMS_st)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(CUDA_MEM_ALLOC_NODE_PARAMS_st)) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).poolProps as *const _ as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEM_ALLOC_NODE_PARAMS_st), - "::", - stringify!(poolProps) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).accessDescs as *const _ - as usize - }, - 88usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEM_ALLOC_NODE_PARAMS_st), - "::", - stringify!(accessDescs) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).accessDescCount as *const _ - as usize - }, - 96usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEM_ALLOC_NODE_PARAMS_st), - "::", - stringify!(accessDescCount) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).bytesize as *const _ as usize - }, - 104usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEM_ALLOC_NODE_PARAMS_st), - "::", - stringify!(bytesize) - ) - ); - assert_eq!( - unsafe { - &(*(::std::ptr::null::())).dptr as *const _ as usize - }, - 112usize, - concat!( - "Offset of field: ", - stringify!(CUDA_MEM_ALLOC_NODE_PARAMS_st), - "::", - stringify!(dptr) - ) - ); -} -impl Default for CUDA_MEM_ALLOC_NODE_PARAMS_st { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type CUDA_MEM_ALLOC_NODE_PARAMS = CUDA_MEM_ALLOC_NODE_PARAMS_st; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUgraphMem_attribute_enum { - CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT = 0, - CU_GRAPH_MEM_ATTR_USED_MEM_HIGH = 1, - CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT = 2, - CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH = 3, -} -pub use self::CUgraphMem_attribute_enum as CUgraphMem_attribute; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUflushGPUDirectRDMAWritesOptions_enum { - CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_HOST = 1, - CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_MEMOPS = 2, -} -pub use self::CUflushGPUDirectRDMAWritesOptions_enum as CUflushGPUDirectRDMAWritesOptions; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUGPUDirectRDMAWritesOrdering_enum { - CU_GPU_DIRECT_RDMA_WRITES_ORDERING_NONE = 0, - CU_GPU_DIRECT_RDMA_WRITES_ORDERING_OWNER = 100, - CU_GPU_DIRECT_RDMA_WRITES_ORDERING_ALL_DEVICES = 200, -} -pub use self::CUGPUDirectRDMAWritesOrdering_enum as CUGPUDirectRDMAWritesOrdering; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUflushGPUDirectRDMAWritesScope_enum { - CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_OWNER = 100, - CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_ALL_DEVICES = 200, -} -pub use self::CUflushGPUDirectRDMAWritesScope_enum as CUflushGPUDirectRDMAWritesScope; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUflushGPUDirectRDMAWritesTarget_enum { - CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TARGET_CURRENT_CTX = 0, -} -pub use self::CUflushGPUDirectRDMAWritesTarget_enum as CUflushGPUDirectRDMAWritesTarget; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUgraphDebugDot_flags_enum { - CU_GRAPH_DEBUG_DOT_FLAGS_VERBOSE = 1, - CU_GRAPH_DEBUG_DOT_FLAGS_RUNTIME_TYPES = 2, - CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_PARAMS = 4, - CU_GRAPH_DEBUG_DOT_FLAGS_MEMCPY_NODE_PARAMS = 8, - CU_GRAPH_DEBUG_DOT_FLAGS_MEMSET_NODE_PARAMS = 16, - CU_GRAPH_DEBUG_DOT_FLAGS_HOST_NODE_PARAMS = 32, - CU_GRAPH_DEBUG_DOT_FLAGS_EVENT_NODE_PARAMS = 64, - CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_SIGNAL_NODE_PARAMS = 128, - CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_WAIT_NODE_PARAMS = 256, - CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_ATTRIBUTES = 512, - CU_GRAPH_DEBUG_DOT_FLAGS_HANDLES = 1024, - CU_GRAPH_DEBUG_DOT_FLAGS_MEM_ALLOC_NODE_PARAMS = 2048, - CU_GRAPH_DEBUG_DOT_FLAGS_MEM_FREE_NODE_PARAMS = 4096, -} -pub use self::CUgraphDebugDot_flags_enum as CUgraphDebugDot_flags; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUuserObject_flags_enum { - CU_USER_OBJECT_NO_DESTRUCTOR_SYNC = 1, -} -pub use self::CUuserObject_flags_enum as CUuserObject_flags; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUuserObjectRetain_flags_enum { - CU_GRAPH_USER_OBJECT_MOVE = 1, -} -pub use self::CUuserObjectRetain_flags_enum as CUuserObjectRetain_flags; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUgraphInstantiate_flags_enum { - CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH = 1, -} -pub use self::CUgraphInstantiate_flags_enum as CUgraphInstantiate_flags; -extern "C" { - pub fn cuGetErrorString(error: CUresult, pStr: *mut *const ::std::os::raw::c_char) -> CUresult; -} -extern "C" { - pub fn cuGetErrorName(error: CUresult, pStr: *mut *const ::std::os::raw::c_char) -> CUresult; -} -extern "C" { - pub fn cuInit(Flags: ::std::os::raw::c_uint) -> CUresult; -} -extern "C" { - pub fn cuDriverGetVersion(driverVersion: *mut ::std::os::raw::c_int) -> CUresult; -} -extern "C" { - pub fn cuDeviceGet(device: *mut CUdevice, ordinal: ::std::os::raw::c_int) -> CUresult; -} -extern "C" { - pub fn cuDeviceGetCount(count: *mut ::std::os::raw::c_int) -> CUresult; -} -extern "C" { - pub fn cuDeviceGetName( - name: *mut ::std::os::raw::c_char, - len: ::std::os::raw::c_int, - dev: CUdevice, - ) -> CUresult; -} -extern "C" { - pub fn cuDeviceGetUuid(uuid: *mut CUuuid, dev: CUdevice) -> CUresult; -} -extern "C" { - pub fn cuDeviceGetUuid_v2(uuid: *mut CUuuid, dev: CUdevice) -> CUresult; -} -extern "C" { - pub fn cuDeviceGetLuid( - luid: *mut ::std::os::raw::c_char, - deviceNodeMask: *mut ::std::os::raw::c_uint, - dev: CUdevice, - ) -> CUresult; -} -extern "C" { - pub fn cuDeviceTotalMem_v2(bytes: *mut usize, dev: CUdevice) -> CUresult; -} -extern "C" { - pub fn cuDeviceGetTexture1DLinearMaxWidth( - maxWidthInElements: *mut usize, - format: CUarray_format, - numChannels: ::std::os::raw::c_uint, - dev: CUdevice, - ) -> CUresult; -} -extern "C" { - pub fn cuDeviceGetAttribute( - pi: *mut ::std::os::raw::c_int, - attrib: CUdevice_attribute, - dev: CUdevice, - ) -> CUresult; -} -extern "C" { - pub fn cuDeviceGetNvSciSyncAttributes( - nvSciSyncAttrList: *mut ::std::os::raw::c_void, - dev: CUdevice, - flags: ::std::os::raw::c_int, - ) -> CUresult; -} -extern "C" { - pub fn cuDeviceSetMemPool(dev: CUdevice, pool: CUmemoryPool) -> CUresult; -} -extern "C" { - pub fn cuDeviceGetMemPool(pool: *mut CUmemoryPool, dev: CUdevice) -> CUresult; -} -extern "C" { - pub fn cuDeviceGetDefaultMemPool(pool_out: *mut CUmemoryPool, dev: CUdevice) -> CUresult; -} -extern "C" { - pub fn cuFlushGPUDirectRDMAWrites( - target: CUflushGPUDirectRDMAWritesTarget, - scope: CUflushGPUDirectRDMAWritesScope, - ) -> CUresult; -} -extern "C" { - pub fn cuDeviceGetProperties(prop: *mut CUdevprop, dev: CUdevice) -> CUresult; -} -extern "C" { - pub fn cuDeviceComputeCapability( - major: *mut ::std::os::raw::c_int, - minor: *mut ::std::os::raw::c_int, - dev: CUdevice, - ) -> CUresult; -} -extern "C" { - pub fn cuDevicePrimaryCtxRetain(pctx: *mut CUcontext, dev: CUdevice) -> CUresult; -} -extern "C" { - pub fn cuDevicePrimaryCtxRelease_v2(dev: CUdevice) -> CUresult; -} -extern "C" { - pub fn cuDevicePrimaryCtxSetFlags_v2(dev: CUdevice, flags: ::std::os::raw::c_uint) -> CUresult; -} -extern "C" { - pub fn cuDevicePrimaryCtxGetState( - dev: CUdevice, - flags: *mut ::std::os::raw::c_uint, - active: *mut ::std::os::raw::c_int, - ) -> CUresult; -} -extern "C" { - pub fn cuDevicePrimaryCtxReset_v2(dev: CUdevice) -> CUresult; -} -extern "C" { - pub fn cuDeviceGetExecAffinitySupport( - pi: *mut ::std::os::raw::c_int, - type_: CUexecAffinityType, - dev: CUdevice, - ) -> CUresult; -} -extern "C" { - pub fn cuCtxCreate_v2( - pctx: *mut CUcontext, - flags: ::std::os::raw::c_uint, - dev: CUdevice, - ) -> CUresult; -} -extern "C" { - pub fn cuCtxCreate_v3( - pctx: *mut CUcontext, - paramsArray: *mut CUexecAffinityParam, - numParams: ::std::os::raw::c_int, - flags: ::std::os::raw::c_uint, - dev: CUdevice, - ) -> CUresult; -} -extern "C" { - pub fn cuCtxDestroy_v2(ctx: CUcontext) -> CUresult; -} -extern "C" { - pub fn cuCtxPushCurrent_v2(ctx: CUcontext) -> CUresult; -} -extern "C" { - pub fn cuCtxPopCurrent_v2(pctx: *mut CUcontext) -> CUresult; -} -extern "C" { - pub fn cuCtxSetCurrent(ctx: CUcontext) -> CUresult; -} -extern "C" { - pub fn cuCtxGetCurrent(pctx: *mut CUcontext) -> CUresult; -} -extern "C" { - pub fn cuCtxGetDevice(device: *mut CUdevice) -> CUresult; -} -extern "C" { - pub fn cuCtxGetFlags(flags: *mut ::std::os::raw::c_uint) -> CUresult; -} -extern "C" { - pub fn cuCtxSynchronize() -> CUresult; -} -extern "C" { - pub fn cuCtxSetLimit(limit: CUlimit, value: usize) -> CUresult; -} -extern "C" { - pub fn cuCtxGetLimit(pvalue: *mut usize, limit: CUlimit) -> CUresult; -} -extern "C" { - pub fn cuCtxGetCacheConfig(pconfig: *mut CUfunc_cache) -> CUresult; -} -extern "C" { - pub fn cuCtxSetCacheConfig(config: CUfunc_cache) -> CUresult; -} -extern "C" { - pub fn cuCtxGetSharedMemConfig(pConfig: *mut CUsharedconfig) -> CUresult; -} -extern "C" { - pub fn cuCtxSetSharedMemConfig(config: CUsharedconfig) -> CUresult; -} -extern "C" { - pub fn cuCtxGetApiVersion(ctx: CUcontext, version: *mut ::std::os::raw::c_uint) -> CUresult; -} -extern "C" { - pub fn cuCtxGetStreamPriorityRange( - leastPriority: *mut ::std::os::raw::c_int, - greatestPriority: *mut ::std::os::raw::c_int, - ) -> CUresult; -} -extern "C" { - pub fn cuCtxResetPersistingL2Cache() -> CUresult; -} -extern "C" { - pub fn cuCtxGetExecAffinity( - pExecAffinity: *mut CUexecAffinityParam, - type_: CUexecAffinityType, - ) -> CUresult; -} -extern "C" { - pub fn cuCtxAttach(pctx: *mut CUcontext, flags: ::std::os::raw::c_uint) -> CUresult; -} -extern "C" { - pub fn cuCtxDetach(ctx: CUcontext) -> CUresult; -} -extern "C" { - pub fn cuModuleLoad(module: *mut CUmodule, fname: *const ::std::os::raw::c_char) -> CUresult; -} -extern "C" { - pub fn cuModuleLoadData( - module: *mut CUmodule, - image: *const ::std::os::raw::c_void, - ) -> CUresult; -} -extern "C" { - pub fn cuModuleLoadDataEx( - module: *mut CUmodule, - image: *const ::std::os::raw::c_void, - numOptions: ::std::os::raw::c_uint, - options: *mut CUjit_option, - optionValues: *mut *mut ::std::os::raw::c_void, - ) -> CUresult; -} -extern "C" { - pub fn cuModuleLoadFatBinary( - module: *mut CUmodule, - fatCubin: *const ::std::os::raw::c_void, - ) -> CUresult; -} -extern "C" { - pub fn cuModuleUnload(hmod: CUmodule) -> CUresult; -} -extern "C" { - pub fn cuModuleGetFunction( - hfunc: *mut CUfunction, - hmod: CUmodule, - name: *const ::std::os::raw::c_char, - ) -> CUresult; -} -extern "C" { - pub fn cuModuleGetGlobal_v2( - dptr: *mut CUdeviceptr, - bytes: *mut usize, - hmod: CUmodule, - name: *const ::std::os::raw::c_char, - ) -> CUresult; -} -extern "C" { - pub fn cuModuleGetTexRef( - pTexRef: *mut CUtexref, - hmod: CUmodule, - name: *const ::std::os::raw::c_char, - ) -> CUresult; -} -extern "C" { - pub fn cuModuleGetSurfRef( - pSurfRef: *mut CUsurfref, - hmod: CUmodule, - name: *const ::std::os::raw::c_char, - ) -> CUresult; -} -extern "C" { - pub fn cuLinkCreate_v2( - numOptions: ::std::os::raw::c_uint, - options: *mut CUjit_option, - optionValues: *mut *mut ::std::os::raw::c_void, - stateOut: *mut CUlinkState, - ) -> CUresult; -} -extern "C" { - pub fn cuLinkAddData_v2( - state: CUlinkState, - type_: CUjitInputType, - data: *mut ::std::os::raw::c_void, - size: usize, - name: *const ::std::os::raw::c_char, - numOptions: ::std::os::raw::c_uint, - options: *mut CUjit_option, - optionValues: *mut *mut ::std::os::raw::c_void, - ) -> CUresult; -} -extern "C" { - pub fn cuLinkAddFile_v2( - state: CUlinkState, - type_: CUjitInputType, - path: *const ::std::os::raw::c_char, - numOptions: ::std::os::raw::c_uint, - options: *mut CUjit_option, - optionValues: *mut *mut ::std::os::raw::c_void, - ) -> CUresult; -} -extern "C" { - pub fn cuLinkComplete( - state: CUlinkState, - cubinOut: *mut *mut ::std::os::raw::c_void, - sizeOut: *mut usize, - ) -> CUresult; -} -extern "C" { - pub fn cuLinkDestroy(state: CUlinkState) -> CUresult; -} -extern "C" { - pub fn cuMemGetInfo_v2(free: *mut usize, total: *mut usize) -> CUresult; -} -extern "C" { - pub fn cuMemAlloc_v2(dptr: *mut CUdeviceptr, bytesize: usize) -> CUresult; -} -extern "C" { - pub fn cuMemAllocPitch_v2( - dptr: *mut CUdeviceptr, - pPitch: *mut usize, - WidthInBytes: usize, - Height: usize, - ElementSizeBytes: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuMemFree_v2(dptr: CUdeviceptr) -> CUresult; -} -extern "C" { - pub fn cuMemGetAddressRange_v2( - pbase: *mut CUdeviceptr, - psize: *mut usize, - dptr: CUdeviceptr, - ) -> CUresult; -} -extern "C" { - pub fn cuMemAllocHost_v2(pp: *mut *mut ::std::os::raw::c_void, bytesize: usize) -> CUresult; -} -extern "C" { - pub fn cuMemFreeHost(p: *mut ::std::os::raw::c_void) -> CUresult; -} -extern "C" { - pub fn cuMemHostAlloc( - pp: *mut *mut ::std::os::raw::c_void, - bytesize: usize, - Flags: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuMemHostGetDevicePointer_v2( - pdptr: *mut CUdeviceptr, - p: *mut ::std::os::raw::c_void, - Flags: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuMemHostGetFlags( - pFlags: *mut ::std::os::raw::c_uint, - p: *mut ::std::os::raw::c_void, - ) -> CUresult; -} -extern "C" { - pub fn cuMemAllocManaged( - dptr: *mut CUdeviceptr, - bytesize: usize, - flags: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuDeviceGetByPCIBusId( - dev: *mut CUdevice, - pciBusId: *const ::std::os::raw::c_char, - ) -> CUresult; -} -extern "C" { - pub fn cuDeviceGetPCIBusId( - pciBusId: *mut ::std::os::raw::c_char, - len: ::std::os::raw::c_int, - dev: CUdevice, - ) -> CUresult; -} -extern "C" { - pub fn cuIpcGetEventHandle(pHandle: *mut CUipcEventHandle, event: CUevent) -> CUresult; -} -extern "C" { - pub fn cuIpcOpenEventHandle(phEvent: *mut CUevent, handle: CUipcEventHandle) -> CUresult; -} -extern "C" { - pub fn cuIpcGetMemHandle(pHandle: *mut CUipcMemHandle, dptr: CUdeviceptr) -> CUresult; -} -extern "C" { - pub fn cuIpcOpenMemHandle_v2( - pdptr: *mut CUdeviceptr, - handle: CUipcMemHandle, - Flags: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuIpcCloseMemHandle(dptr: CUdeviceptr) -> CUresult; -} -extern "C" { - pub fn cuMemHostRegister_v2( - p: *mut ::std::os::raw::c_void, - bytesize: usize, - Flags: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuMemHostUnregister(p: *mut ::std::os::raw::c_void) -> CUresult; -} -extern "C" { - pub fn cuMemcpy(dst: CUdeviceptr, src: CUdeviceptr, ByteCount: usize) -> CUresult; -} -extern "C" { - pub fn cuMemcpyPeer( - dstDevice: CUdeviceptr, - dstContext: CUcontext, - srcDevice: CUdeviceptr, - srcContext: CUcontext, - ByteCount: usize, - ) -> CUresult; -} -extern "C" { - pub fn cuMemcpyHtoD_v2( - dstDevice: CUdeviceptr, - srcHost: *const ::std::os::raw::c_void, - ByteCount: usize, - ) -> CUresult; -} -extern "C" { - pub fn cuMemcpyDtoH_v2( - dstHost: *mut ::std::os::raw::c_void, - srcDevice: CUdeviceptr, - ByteCount: usize, - ) -> CUresult; -} -extern "C" { - pub fn cuMemcpyDtoD_v2( - dstDevice: CUdeviceptr, - srcDevice: CUdeviceptr, - ByteCount: usize, - ) -> CUresult; -} -extern "C" { - pub fn cuMemcpyDtoA_v2( - dstArray: CUarray, - dstOffset: usize, - srcDevice: CUdeviceptr, - ByteCount: usize, - ) -> CUresult; -} -extern "C" { - pub fn cuMemcpyAtoD_v2( - dstDevice: CUdeviceptr, - srcArray: CUarray, - srcOffset: usize, - ByteCount: usize, - ) -> CUresult; -} -extern "C" { - pub fn cuMemcpyHtoA_v2( - dstArray: CUarray, - dstOffset: usize, - srcHost: *const ::std::os::raw::c_void, - ByteCount: usize, - ) -> CUresult; -} -extern "C" { - pub fn cuMemcpyAtoH_v2( - dstHost: *mut ::std::os::raw::c_void, - srcArray: CUarray, - srcOffset: usize, - ByteCount: usize, - ) -> CUresult; -} -extern "C" { - pub fn cuMemcpyAtoA_v2( - dstArray: CUarray, - dstOffset: usize, - srcArray: CUarray, - srcOffset: usize, - ByteCount: usize, - ) -> CUresult; -} -extern "C" { - pub fn cuMemcpy2D_v2(pCopy: *const CUDA_MEMCPY2D) -> CUresult; -} -extern "C" { - pub fn cuMemcpy2DUnaligned_v2(pCopy: *const CUDA_MEMCPY2D) -> CUresult; -} -extern "C" { - pub fn cuMemcpy3D_v2(pCopy: *const CUDA_MEMCPY3D) -> CUresult; -} -extern "C" { - pub fn cuMemcpy3DPeer(pCopy: *const CUDA_MEMCPY3D_PEER) -> CUresult; -} -extern "C" { - pub fn cuMemcpyAsync( - dst: CUdeviceptr, - src: CUdeviceptr, - ByteCount: usize, - hStream: CUstream, - ) -> CUresult; -} -extern "C" { - pub fn cuMemcpyPeerAsync( - dstDevice: CUdeviceptr, - dstContext: CUcontext, - srcDevice: CUdeviceptr, - srcContext: CUcontext, - ByteCount: usize, - hStream: CUstream, - ) -> CUresult; -} -extern "C" { - pub fn cuMemcpyHtoDAsync_v2( - dstDevice: CUdeviceptr, - srcHost: *const ::std::os::raw::c_void, - ByteCount: usize, - hStream: CUstream, - ) -> CUresult; -} -extern "C" { - pub fn cuMemcpyDtoHAsync_v2( - dstHost: *mut ::std::os::raw::c_void, - srcDevice: CUdeviceptr, - ByteCount: usize, - hStream: CUstream, - ) -> CUresult; -} -extern "C" { - pub fn cuMemcpyDtoDAsync_v2( - dstDevice: CUdeviceptr, - srcDevice: CUdeviceptr, - ByteCount: usize, - hStream: CUstream, - ) -> CUresult; -} -extern "C" { - pub fn cuMemcpyHtoAAsync_v2( - dstArray: CUarray, - dstOffset: usize, - srcHost: *const ::std::os::raw::c_void, - ByteCount: usize, - hStream: CUstream, - ) -> CUresult; -} -extern "C" { - pub fn cuMemcpyAtoHAsync_v2( - dstHost: *mut ::std::os::raw::c_void, - srcArray: CUarray, - srcOffset: usize, - ByteCount: usize, - hStream: CUstream, - ) -> CUresult; -} -extern "C" { - pub fn cuMemcpy2DAsync_v2(pCopy: *const CUDA_MEMCPY2D, hStream: CUstream) -> CUresult; -} -extern "C" { - pub fn cuMemcpy3DAsync_v2(pCopy: *const CUDA_MEMCPY3D, hStream: CUstream) -> CUresult; -} -extern "C" { - pub fn cuMemcpy3DPeerAsync(pCopy: *const CUDA_MEMCPY3D_PEER, hStream: CUstream) -> CUresult; -} -extern "C" { - pub fn cuMemsetD8_v2(dstDevice: CUdeviceptr, uc: ::std::os::raw::c_uchar, N: usize) - -> CUresult; -} -extern "C" { - pub fn cuMemsetD16_v2( - dstDevice: CUdeviceptr, - us: ::std::os::raw::c_ushort, - N: usize, - ) -> CUresult; -} -extern "C" { - pub fn cuMemsetD32_v2(dstDevice: CUdeviceptr, ui: ::std::os::raw::c_uint, N: usize) - -> CUresult; -} -extern "C" { - pub fn cuMemsetD2D8_v2( - dstDevice: CUdeviceptr, - dstPitch: usize, - uc: ::std::os::raw::c_uchar, - Width: usize, - Height: usize, - ) -> CUresult; -} -extern "C" { - pub fn cuMemsetD2D16_v2( - dstDevice: CUdeviceptr, - dstPitch: usize, - us: ::std::os::raw::c_ushort, - Width: usize, - Height: usize, - ) -> CUresult; -} -extern "C" { - pub fn cuMemsetD2D32_v2( - dstDevice: CUdeviceptr, - dstPitch: usize, - ui: ::std::os::raw::c_uint, - Width: usize, - Height: usize, - ) -> CUresult; -} -extern "C" { - pub fn cuMemsetD8Async( - dstDevice: CUdeviceptr, - uc: ::std::os::raw::c_uchar, - N: usize, - hStream: CUstream, - ) -> CUresult; -} -extern "C" { - pub fn cuMemsetD16Async( - dstDevice: CUdeviceptr, - us: ::std::os::raw::c_ushort, - N: usize, - hStream: CUstream, - ) -> CUresult; -} -extern "C" { - pub fn cuMemsetD32Async( - dstDevice: CUdeviceptr, - ui: ::std::os::raw::c_uint, - N: usize, - hStream: CUstream, - ) -> CUresult; -} -extern "C" { - pub fn cuMemsetD2D8Async( - dstDevice: CUdeviceptr, - dstPitch: usize, - uc: ::std::os::raw::c_uchar, - Width: usize, - Height: usize, - hStream: CUstream, - ) -> CUresult; -} -extern "C" { - pub fn cuMemsetD2D16Async( - dstDevice: CUdeviceptr, - dstPitch: usize, - us: ::std::os::raw::c_ushort, - Width: usize, - Height: usize, - hStream: CUstream, - ) -> CUresult; -} -extern "C" { - pub fn cuMemsetD2D32Async( - dstDevice: CUdeviceptr, - dstPitch: usize, - ui: ::std::os::raw::c_uint, - Width: usize, - Height: usize, - hStream: CUstream, - ) -> CUresult; -} -extern "C" { - pub fn cuArrayCreate_v2( - pHandle: *mut CUarray, - pAllocateArray: *const CUDA_ARRAY_DESCRIPTOR, - ) -> CUresult; -} -extern "C" { - pub fn cuArrayGetDescriptor_v2( - pArrayDescriptor: *mut CUDA_ARRAY_DESCRIPTOR, - hArray: CUarray, - ) -> CUresult; -} -extern "C" { - pub fn cuArrayGetSparseProperties( - sparseProperties: *mut CUDA_ARRAY_SPARSE_PROPERTIES, - array: CUarray, - ) -> CUresult; -} -extern "C" { - pub fn cuMipmappedArrayGetSparseProperties( - sparseProperties: *mut CUDA_ARRAY_SPARSE_PROPERTIES, - mipmap: CUmipmappedArray, - ) -> CUresult; -} -extern "C" { - pub fn cuArrayGetPlane( - pPlaneArray: *mut CUarray, - hArray: CUarray, - planeIdx: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuArrayDestroy(hArray: CUarray) -> CUresult; -} -extern "C" { - pub fn cuArray3DCreate_v2( - pHandle: *mut CUarray, - pAllocateArray: *const CUDA_ARRAY3D_DESCRIPTOR, - ) -> CUresult; -} -extern "C" { - pub fn cuArray3DGetDescriptor_v2( - pArrayDescriptor: *mut CUDA_ARRAY3D_DESCRIPTOR, - hArray: CUarray, - ) -> CUresult; -} -extern "C" { - pub fn cuMipmappedArrayCreate( - pHandle: *mut CUmipmappedArray, - pMipmappedArrayDesc: *const CUDA_ARRAY3D_DESCRIPTOR, - numMipmapLevels: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuMipmappedArrayGetLevel( - pLevelArray: *mut CUarray, - hMipmappedArray: CUmipmappedArray, - level: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuMipmappedArrayDestroy(hMipmappedArray: CUmipmappedArray) -> CUresult; -} -extern "C" { - pub fn cuMemAddressReserve( - ptr: *mut CUdeviceptr, - size: usize, - alignment: usize, - addr: CUdeviceptr, - flags: ::std::os::raw::c_ulonglong, - ) -> CUresult; -} -extern "C" { - pub fn cuMemAddressFree(ptr: CUdeviceptr, size: usize) -> CUresult; -} -extern "C" { - pub fn cuMemCreate( - handle: *mut CUmemGenericAllocationHandle, - size: usize, - prop: *const CUmemAllocationProp, - flags: ::std::os::raw::c_ulonglong, - ) -> CUresult; -} -extern "C" { - pub fn cuMemRelease(handle: CUmemGenericAllocationHandle) -> CUresult; -} -extern "C" { - pub fn cuMemMap( - ptr: CUdeviceptr, - size: usize, - offset: usize, - handle: CUmemGenericAllocationHandle, - flags: ::std::os::raw::c_ulonglong, - ) -> CUresult; -} -extern "C" { - pub fn cuMemMapArrayAsync( - mapInfoList: *mut CUarrayMapInfo, - count: ::std::os::raw::c_uint, - hStream: CUstream, - ) -> CUresult; -} -extern "C" { - pub fn cuMemUnmap(ptr: CUdeviceptr, size: usize) -> CUresult; -} -extern "C" { - pub fn cuMemSetAccess( - ptr: CUdeviceptr, - size: usize, - desc: *const CUmemAccessDesc, - count: usize, - ) -> CUresult; -} -extern "C" { - pub fn cuMemGetAccess( - flags: *mut ::std::os::raw::c_ulonglong, - location: *const CUmemLocation, - ptr: CUdeviceptr, - ) -> CUresult; -} -extern "C" { - pub fn cuMemExportToShareableHandle( - shareableHandle: *mut ::std::os::raw::c_void, - handle: CUmemGenericAllocationHandle, - handleType: CUmemAllocationHandleType, - flags: ::std::os::raw::c_ulonglong, - ) -> CUresult; -} -extern "C" { - pub fn cuMemImportFromShareableHandle( - handle: *mut CUmemGenericAllocationHandle, - osHandle: *mut ::std::os::raw::c_void, - shHandleType: CUmemAllocationHandleType, - ) -> CUresult; -} -extern "C" { - pub fn cuMemGetAllocationGranularity( - granularity: *mut usize, - prop: *const CUmemAllocationProp, - option: CUmemAllocationGranularity_flags, - ) -> CUresult; -} -extern "C" { - pub fn cuMemGetAllocationPropertiesFromHandle( - prop: *mut CUmemAllocationProp, - handle: CUmemGenericAllocationHandle, - ) -> CUresult; -} -extern "C" { - pub fn cuMemRetainAllocationHandle( - handle: *mut CUmemGenericAllocationHandle, - addr: *mut ::std::os::raw::c_void, - ) -> CUresult; -} -extern "C" { - pub fn cuMemFreeAsync(dptr: CUdeviceptr, hStream: CUstream) -> CUresult; -} -extern "C" { - pub fn cuMemAllocAsync(dptr: *mut CUdeviceptr, bytesize: usize, hStream: CUstream) -> CUresult; -} -extern "C" { - pub fn cuMemPoolTrimTo(pool: CUmemoryPool, minBytesToKeep: usize) -> CUresult; -} -extern "C" { - pub fn cuMemPoolSetAttribute( - pool: CUmemoryPool, - attr: CUmemPool_attribute, - value: *mut ::std::os::raw::c_void, - ) -> CUresult; -} -extern "C" { - pub fn cuMemPoolGetAttribute( - pool: CUmemoryPool, - attr: CUmemPool_attribute, - value: *mut ::std::os::raw::c_void, - ) -> CUresult; -} -extern "C" { - pub fn cuMemPoolSetAccess( - pool: CUmemoryPool, - map: *const CUmemAccessDesc, - count: usize, - ) -> CUresult; -} -extern "C" { - pub fn cuMemPoolGetAccess( - flags: *mut CUmemAccess_flags, - memPool: CUmemoryPool, - location: *mut CUmemLocation, - ) -> CUresult; -} -extern "C" { - pub fn cuMemPoolCreate(pool: *mut CUmemoryPool, poolProps: *const CUmemPoolProps) -> CUresult; -} -extern "C" { - pub fn cuMemPoolDestroy(pool: CUmemoryPool) -> CUresult; -} -extern "C" { - pub fn cuMemAllocFromPoolAsync( - dptr: *mut CUdeviceptr, - bytesize: usize, - pool: CUmemoryPool, - hStream: CUstream, - ) -> CUresult; -} -extern "C" { - pub fn cuMemPoolExportToShareableHandle( - handle_out: *mut ::std::os::raw::c_void, - pool: CUmemoryPool, - handleType: CUmemAllocationHandleType, - flags: ::std::os::raw::c_ulonglong, - ) -> CUresult; -} -extern "C" { - pub fn cuMemPoolImportFromShareableHandle( - pool_out: *mut CUmemoryPool, - handle: *mut ::std::os::raw::c_void, - handleType: CUmemAllocationHandleType, - flags: ::std::os::raw::c_ulonglong, - ) -> CUresult; -} -extern "C" { - pub fn cuMemPoolExportPointer( - shareData_out: *mut CUmemPoolPtrExportData, - ptr: CUdeviceptr, - ) -> CUresult; -} -extern "C" { - pub fn cuMemPoolImportPointer( - ptr_out: *mut CUdeviceptr, - pool: CUmemoryPool, - shareData: *mut CUmemPoolPtrExportData, - ) -> CUresult; -} -extern "C" { - pub fn cuPointerGetAttribute( - data: *mut ::std::os::raw::c_void, - attribute: CUpointer_attribute, - ptr: CUdeviceptr, - ) -> CUresult; -} -extern "C" { - pub fn cuMemPrefetchAsync( - devPtr: CUdeviceptr, - count: usize, - dstDevice: CUdevice, - hStream: CUstream, - ) -> CUresult; -} -extern "C" { - pub fn cuMemAdvise( - devPtr: CUdeviceptr, - count: usize, - advice: CUmem_advise, - device: CUdevice, - ) -> CUresult; -} -extern "C" { - pub fn cuMemRangeGetAttribute( - data: *mut ::std::os::raw::c_void, - dataSize: usize, - attribute: CUmem_range_attribute, - devPtr: CUdeviceptr, - count: usize, - ) -> CUresult; -} -extern "C" { - pub fn cuMemRangeGetAttributes( - data: *mut *mut ::std::os::raw::c_void, - dataSizes: *mut usize, - attributes: *mut CUmem_range_attribute, - numAttributes: usize, - devPtr: CUdeviceptr, - count: usize, - ) -> CUresult; -} -extern "C" { - pub fn cuPointerSetAttribute( - value: *const ::std::os::raw::c_void, - attribute: CUpointer_attribute, - ptr: CUdeviceptr, - ) -> CUresult; -} -extern "C" { - pub fn cuPointerGetAttributes( - numAttributes: ::std::os::raw::c_uint, - attributes: *mut CUpointer_attribute, - data: *mut *mut ::std::os::raw::c_void, - ptr: CUdeviceptr, - ) -> CUresult; -} -extern "C" { - pub fn cuStreamCreate(phStream: *mut CUstream, Flags: ::std::os::raw::c_uint) -> CUresult; -} -extern "C" { - pub fn cuStreamCreateWithPriority( - phStream: *mut CUstream, - flags: ::std::os::raw::c_uint, - priority: ::std::os::raw::c_int, - ) -> CUresult; -} -extern "C" { - pub fn cuStreamGetPriority(hStream: CUstream, priority: *mut ::std::os::raw::c_int) - -> CUresult; -} -extern "C" { - pub fn cuStreamGetFlags(hStream: CUstream, flags: *mut ::std::os::raw::c_uint) -> CUresult; -} -extern "C" { - pub fn cuStreamGetCtx(hStream: CUstream, pctx: *mut CUcontext) -> CUresult; -} -extern "C" { - pub fn cuStreamWaitEvent( - hStream: CUstream, - hEvent: CUevent, - Flags: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuStreamAddCallback( - hStream: CUstream, - callback: CUstreamCallback, - userData: *mut ::std::os::raw::c_void, - flags: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuStreamBeginCapture_v2(hStream: CUstream, mode: CUstreamCaptureMode) -> CUresult; -} -extern "C" { - pub fn cuThreadExchangeStreamCaptureMode(mode: *mut CUstreamCaptureMode) -> CUresult; -} -extern "C" { - pub fn cuStreamEndCapture(hStream: CUstream, phGraph: *mut CUgraph) -> CUresult; -} -extern "C" { - pub fn cuStreamIsCapturing( - hStream: CUstream, - captureStatus: *mut CUstreamCaptureStatus, - ) -> CUresult; -} -extern "C" { - pub fn cuStreamGetCaptureInfo( - hStream: CUstream, - captureStatus_out: *mut CUstreamCaptureStatus, - id_out: *mut cuuint64_t, - ) -> CUresult; -} -extern "C" { - pub fn cuStreamGetCaptureInfo_v2( - hStream: CUstream, - captureStatus_out: *mut CUstreamCaptureStatus, - id_out: *mut cuuint64_t, - graph_out: *mut CUgraph, - dependencies_out: *mut *const CUgraphNode, - numDependencies_out: *mut usize, - ) -> CUresult; -} -extern "C" { - pub fn cuStreamUpdateCaptureDependencies( - hStream: CUstream, - dependencies: *mut CUgraphNode, - numDependencies: usize, - flags: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuStreamAttachMemAsync( - hStream: CUstream, - dptr: CUdeviceptr, - length: usize, - flags: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuStreamQuery(hStream: CUstream) -> CUresult; -} -extern "C" { - pub fn cuStreamSynchronize(hStream: CUstream) -> CUresult; -} -extern "C" { - pub fn cuStreamDestroy_v2(hStream: CUstream) -> CUresult; -} -extern "C" { - pub fn cuStreamCopyAttributes(dst: CUstream, src: CUstream) -> CUresult; -} -extern "C" { - pub fn cuStreamGetAttribute( - hStream: CUstream, - attr: CUstreamAttrID, - value_out: *mut CUstreamAttrValue, - ) -> CUresult; -} -extern "C" { - pub fn cuStreamSetAttribute( - hStream: CUstream, - attr: CUstreamAttrID, - value: *const CUstreamAttrValue, - ) -> CUresult; -} -extern "C" { - pub fn cuEventCreate(phEvent: *mut CUevent, Flags: ::std::os::raw::c_uint) -> CUresult; -} -extern "C" { - pub fn cuEventRecord(hEvent: CUevent, hStream: CUstream) -> CUresult; -} -extern "C" { - pub fn cuEventRecordWithFlags( - hEvent: CUevent, - hStream: CUstream, - flags: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuEventQuery(hEvent: CUevent) -> CUresult; -} -extern "C" { - pub fn cuEventSynchronize(hEvent: CUevent) -> CUresult; -} -extern "C" { - pub fn cuEventDestroy_v2(hEvent: CUevent) -> CUresult; -} -extern "C" { - pub fn cuEventElapsedTime(pMilliseconds: *mut f32, hStart: CUevent, hEnd: CUevent) -> CUresult; -} -extern "C" { - pub fn cuImportExternalMemory( - extMem_out: *mut CUexternalMemory, - memHandleDesc: *const CUDA_EXTERNAL_MEMORY_HANDLE_DESC, - ) -> CUresult; -} -extern "C" { - pub fn cuExternalMemoryGetMappedBuffer( - devPtr: *mut CUdeviceptr, - extMem: CUexternalMemory, - bufferDesc: *const CUDA_EXTERNAL_MEMORY_BUFFER_DESC, - ) -> CUresult; -} -extern "C" { - pub fn cuExternalMemoryGetMappedMipmappedArray( - mipmap: *mut CUmipmappedArray, - extMem: CUexternalMemory, - mipmapDesc: *const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC, - ) -> CUresult; -} -extern "C" { - pub fn cuDestroyExternalMemory(extMem: CUexternalMemory) -> CUresult; -} -extern "C" { - pub fn cuImportExternalSemaphore( - extSem_out: *mut CUexternalSemaphore, - semHandleDesc: *const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC, - ) -> CUresult; -} -extern "C" { - pub fn cuSignalExternalSemaphoresAsync( - extSemArray: *const CUexternalSemaphore, - paramsArray: *const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS, - numExtSems: ::std::os::raw::c_uint, - stream: CUstream, - ) -> CUresult; -} -extern "C" { - pub fn cuWaitExternalSemaphoresAsync( - extSemArray: *const CUexternalSemaphore, - paramsArray: *const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS, - numExtSems: ::std::os::raw::c_uint, - stream: CUstream, - ) -> CUresult; -} -extern "C" { - pub fn cuDestroyExternalSemaphore(extSem: CUexternalSemaphore) -> CUresult; -} -extern "C" { - pub fn cuStreamWaitValue32( - stream: CUstream, - addr: CUdeviceptr, - value: cuuint32_t, - flags: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuStreamWaitValue64( - stream: CUstream, - addr: CUdeviceptr, - value: cuuint64_t, - flags: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuStreamWriteValue32( - stream: CUstream, - addr: CUdeviceptr, - value: cuuint32_t, - flags: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuStreamWriteValue64( - stream: CUstream, - addr: CUdeviceptr, - value: cuuint64_t, - flags: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuStreamBatchMemOp( - stream: CUstream, - count: ::std::os::raw::c_uint, - paramArray: *mut CUstreamBatchMemOpParams, - flags: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuFuncGetAttribute( - pi: *mut ::std::os::raw::c_int, - attrib: CUfunction_attribute, - hfunc: CUfunction, - ) -> CUresult; -} -extern "C" { - pub fn cuFuncSetAttribute( - hfunc: CUfunction, - attrib: CUfunction_attribute, - value: ::std::os::raw::c_int, - ) -> CUresult; -} -extern "C" { - pub fn cuFuncSetCacheConfig(hfunc: CUfunction, config: CUfunc_cache) -> CUresult; -} -extern "C" { - pub fn cuFuncSetSharedMemConfig(hfunc: CUfunction, config: CUsharedconfig) -> CUresult; -} -extern "C" { - pub fn cuFuncGetModule(hmod: *mut CUmodule, hfunc: CUfunction) -> CUresult; -} -extern "C" { - pub fn cuLaunchKernel( - f: CUfunction, - gridDimX: ::std::os::raw::c_uint, - gridDimY: ::std::os::raw::c_uint, - gridDimZ: ::std::os::raw::c_uint, - blockDimX: ::std::os::raw::c_uint, - blockDimY: ::std::os::raw::c_uint, - blockDimZ: ::std::os::raw::c_uint, - sharedMemBytes: ::std::os::raw::c_uint, - hStream: CUstream, - kernelParams: *mut *mut ::std::os::raw::c_void, - extra: *mut *mut ::std::os::raw::c_void, - ) -> CUresult; -} -extern "C" { - pub fn cuLaunchCooperativeKernel( - f: CUfunction, - gridDimX: ::std::os::raw::c_uint, - gridDimY: ::std::os::raw::c_uint, - gridDimZ: ::std::os::raw::c_uint, - blockDimX: ::std::os::raw::c_uint, - blockDimY: ::std::os::raw::c_uint, - blockDimZ: ::std::os::raw::c_uint, - sharedMemBytes: ::std::os::raw::c_uint, - hStream: CUstream, - kernelParams: *mut *mut ::std::os::raw::c_void, - ) -> CUresult; -} -extern "C" { - pub fn cuLaunchCooperativeKernelMultiDevice( - launchParamsList: *mut CUDA_LAUNCH_PARAMS, - numDevices: ::std::os::raw::c_uint, - flags: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuLaunchHostFunc( - hStream: CUstream, - fn_: CUhostFn, - userData: *mut ::std::os::raw::c_void, - ) -> CUresult; -} -extern "C" { - pub fn cuFuncSetBlockShape( - hfunc: CUfunction, - x: ::std::os::raw::c_int, - y: ::std::os::raw::c_int, - z: ::std::os::raw::c_int, - ) -> CUresult; -} -extern "C" { - pub fn cuFuncSetSharedSize(hfunc: CUfunction, bytes: ::std::os::raw::c_uint) -> CUresult; -} -extern "C" { - pub fn cuParamSetSize(hfunc: CUfunction, numbytes: ::std::os::raw::c_uint) -> CUresult; -} -extern "C" { - pub fn cuParamSeti( - hfunc: CUfunction, - offset: ::std::os::raw::c_int, - value: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuParamSetf(hfunc: CUfunction, offset: ::std::os::raw::c_int, value: f32) -> CUresult; -} -extern "C" { - pub fn cuParamSetv( - hfunc: CUfunction, - offset: ::std::os::raw::c_int, - ptr: *mut ::std::os::raw::c_void, - numbytes: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuLaunch(f: CUfunction) -> CUresult; -} -extern "C" { - pub fn cuLaunchGrid( - f: CUfunction, - grid_width: ::std::os::raw::c_int, - grid_height: ::std::os::raw::c_int, - ) -> CUresult; -} -extern "C" { - pub fn cuLaunchGridAsync( - f: CUfunction, - grid_width: ::std::os::raw::c_int, - grid_height: ::std::os::raw::c_int, - hStream: CUstream, - ) -> CUresult; -} -extern "C" { - pub fn cuParamSetTexRef( - hfunc: CUfunction, - texunit: ::std::os::raw::c_int, - hTexRef: CUtexref, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphCreate(phGraph: *mut CUgraph, flags: ::std::os::raw::c_uint) -> CUresult; -} -extern "C" { - pub fn cuGraphAddKernelNode( - phGraphNode: *mut CUgraphNode, - hGraph: CUgraph, - dependencies: *const CUgraphNode, - numDependencies: usize, - nodeParams: *const CUDA_KERNEL_NODE_PARAMS, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphKernelNodeGetParams( - hNode: CUgraphNode, - nodeParams: *mut CUDA_KERNEL_NODE_PARAMS, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphKernelNodeSetParams( - hNode: CUgraphNode, - nodeParams: *const CUDA_KERNEL_NODE_PARAMS, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphAddMemcpyNode( - phGraphNode: *mut CUgraphNode, - hGraph: CUgraph, - dependencies: *const CUgraphNode, - numDependencies: usize, - copyParams: *const CUDA_MEMCPY3D, - ctx: CUcontext, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphMemcpyNodeGetParams( - hNode: CUgraphNode, - nodeParams: *mut CUDA_MEMCPY3D, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphMemcpyNodeSetParams( - hNode: CUgraphNode, - nodeParams: *const CUDA_MEMCPY3D, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphAddMemsetNode( - phGraphNode: *mut CUgraphNode, - hGraph: CUgraph, - dependencies: *const CUgraphNode, - numDependencies: usize, - memsetParams: *const CUDA_MEMSET_NODE_PARAMS, - ctx: CUcontext, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphMemsetNodeGetParams( - hNode: CUgraphNode, - nodeParams: *mut CUDA_MEMSET_NODE_PARAMS, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphMemsetNodeSetParams( - hNode: CUgraphNode, - nodeParams: *const CUDA_MEMSET_NODE_PARAMS, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphAddHostNode( - phGraphNode: *mut CUgraphNode, - hGraph: CUgraph, - dependencies: *const CUgraphNode, - numDependencies: usize, - nodeParams: *const CUDA_HOST_NODE_PARAMS, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphHostNodeGetParams( - hNode: CUgraphNode, - nodeParams: *mut CUDA_HOST_NODE_PARAMS, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphHostNodeSetParams( - hNode: CUgraphNode, - nodeParams: *const CUDA_HOST_NODE_PARAMS, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphAddChildGraphNode( - phGraphNode: *mut CUgraphNode, - hGraph: CUgraph, - dependencies: *const CUgraphNode, - numDependencies: usize, - childGraph: CUgraph, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphChildGraphNodeGetGraph(hNode: CUgraphNode, phGraph: *mut CUgraph) -> CUresult; -} -extern "C" { - pub fn cuGraphAddEmptyNode( - phGraphNode: *mut CUgraphNode, - hGraph: CUgraph, - dependencies: *const CUgraphNode, - numDependencies: usize, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphAddEventRecordNode( - phGraphNode: *mut CUgraphNode, - hGraph: CUgraph, - dependencies: *const CUgraphNode, - numDependencies: usize, - event: CUevent, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphEventRecordNodeGetEvent(hNode: CUgraphNode, event_out: *mut CUevent) -> CUresult; -} -extern "C" { - pub fn cuGraphEventRecordNodeSetEvent(hNode: CUgraphNode, event: CUevent) -> CUresult; -} -extern "C" { - pub fn cuGraphAddEventWaitNode( - phGraphNode: *mut CUgraphNode, - hGraph: CUgraph, - dependencies: *const CUgraphNode, - numDependencies: usize, - event: CUevent, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphEventWaitNodeGetEvent(hNode: CUgraphNode, event_out: *mut CUevent) -> CUresult; -} -extern "C" { - pub fn cuGraphEventWaitNodeSetEvent(hNode: CUgraphNode, event: CUevent) -> CUresult; -} -extern "C" { - pub fn cuGraphAddExternalSemaphoresSignalNode( - phGraphNode: *mut CUgraphNode, - hGraph: CUgraph, - dependencies: *const CUgraphNode, - numDependencies: usize, - nodeParams: *const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphExternalSemaphoresSignalNodeGetParams( - hNode: CUgraphNode, - params_out: *mut CUDA_EXT_SEM_SIGNAL_NODE_PARAMS, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphExternalSemaphoresSignalNodeSetParams( - hNode: CUgraphNode, - nodeParams: *const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphAddExternalSemaphoresWaitNode( - phGraphNode: *mut CUgraphNode, - hGraph: CUgraph, - dependencies: *const CUgraphNode, - numDependencies: usize, - nodeParams: *const CUDA_EXT_SEM_WAIT_NODE_PARAMS, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphExternalSemaphoresWaitNodeGetParams( - hNode: CUgraphNode, - params_out: *mut CUDA_EXT_SEM_WAIT_NODE_PARAMS, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphExternalSemaphoresWaitNodeSetParams( - hNode: CUgraphNode, - nodeParams: *const CUDA_EXT_SEM_WAIT_NODE_PARAMS, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphAddMemAllocNode( - phGraphNode: *mut CUgraphNode, - hGraph: CUgraph, - dependencies: *const CUgraphNode, - numDependencies: usize, - nodeParams: *mut CUDA_MEM_ALLOC_NODE_PARAMS, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphMemAllocNodeGetParams( - hNode: CUgraphNode, - params_out: *mut CUDA_MEM_ALLOC_NODE_PARAMS, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphAddMemFreeNode( - phGraphNode: *mut CUgraphNode, - hGraph: CUgraph, - dependencies: *const CUgraphNode, - numDependencies: usize, - dptr: CUdeviceptr, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphMemFreeNodeGetParams(hNode: CUgraphNode, dptr_out: *mut CUdeviceptr) -> CUresult; -} -extern "C" { - pub fn cuDeviceGraphMemTrim(device: CUdevice) -> CUresult; -} -extern "C" { - pub fn cuDeviceGetGraphMemAttribute( - device: CUdevice, - attr: CUgraphMem_attribute, - value: *mut ::std::os::raw::c_void, - ) -> CUresult; -} -extern "C" { - pub fn cuDeviceSetGraphMemAttribute( - device: CUdevice, - attr: CUgraphMem_attribute, - value: *mut ::std::os::raw::c_void, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphClone(phGraphClone: *mut CUgraph, originalGraph: CUgraph) -> CUresult; -} -extern "C" { - pub fn cuGraphNodeFindInClone( - phNode: *mut CUgraphNode, - hOriginalNode: CUgraphNode, - hClonedGraph: CUgraph, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphNodeGetType(hNode: CUgraphNode, type_: *mut CUgraphNodeType) -> CUresult; -} -extern "C" { - pub fn cuGraphGetNodes( - hGraph: CUgraph, - nodes: *mut CUgraphNode, - numNodes: *mut usize, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphGetRootNodes( - hGraph: CUgraph, - rootNodes: *mut CUgraphNode, - numRootNodes: *mut usize, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphGetEdges( - hGraph: CUgraph, - from: *mut CUgraphNode, - to: *mut CUgraphNode, - numEdges: *mut usize, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphNodeGetDependencies( - hNode: CUgraphNode, - dependencies: *mut CUgraphNode, - numDependencies: *mut usize, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphNodeGetDependentNodes( - hNode: CUgraphNode, - dependentNodes: *mut CUgraphNode, - numDependentNodes: *mut usize, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphAddDependencies( - hGraph: CUgraph, - from: *const CUgraphNode, - to: *const CUgraphNode, - numDependencies: usize, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphRemoveDependencies( - hGraph: CUgraph, - from: *const CUgraphNode, - to: *const CUgraphNode, - numDependencies: usize, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphDestroyNode(hNode: CUgraphNode) -> CUresult; -} -extern "C" { - pub fn cuGraphInstantiate_v2( - phGraphExec: *mut CUgraphExec, - hGraph: CUgraph, - phErrorNode: *mut CUgraphNode, - logBuffer: *mut ::std::os::raw::c_char, - bufferSize: usize, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphInstantiateWithFlags( - phGraphExec: *mut CUgraphExec, - hGraph: CUgraph, - flags: ::std::os::raw::c_ulonglong, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphExecKernelNodeSetParams( - hGraphExec: CUgraphExec, - hNode: CUgraphNode, - nodeParams: *const CUDA_KERNEL_NODE_PARAMS, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphExecMemcpyNodeSetParams( - hGraphExec: CUgraphExec, - hNode: CUgraphNode, - copyParams: *const CUDA_MEMCPY3D, - ctx: CUcontext, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphExecMemsetNodeSetParams( - hGraphExec: CUgraphExec, - hNode: CUgraphNode, - memsetParams: *const CUDA_MEMSET_NODE_PARAMS, - ctx: CUcontext, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphExecHostNodeSetParams( - hGraphExec: CUgraphExec, - hNode: CUgraphNode, - nodeParams: *const CUDA_HOST_NODE_PARAMS, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphExecChildGraphNodeSetParams( - hGraphExec: CUgraphExec, - hNode: CUgraphNode, - childGraph: CUgraph, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphExecEventRecordNodeSetEvent( - hGraphExec: CUgraphExec, - hNode: CUgraphNode, - event: CUevent, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphExecEventWaitNodeSetEvent( - hGraphExec: CUgraphExec, - hNode: CUgraphNode, - event: CUevent, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphExecExternalSemaphoresSignalNodeSetParams( - hGraphExec: CUgraphExec, - hNode: CUgraphNode, - nodeParams: *const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphExecExternalSemaphoresWaitNodeSetParams( - hGraphExec: CUgraphExec, - hNode: CUgraphNode, - nodeParams: *const CUDA_EXT_SEM_WAIT_NODE_PARAMS, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphUpload(hGraphExec: CUgraphExec, hStream: CUstream) -> CUresult; -} -extern "C" { - pub fn cuGraphLaunch(hGraphExec: CUgraphExec, hStream: CUstream) -> CUresult; -} -extern "C" { - pub fn cuGraphExecDestroy(hGraphExec: CUgraphExec) -> CUresult; -} -extern "C" { - pub fn cuGraphDestroy(hGraph: CUgraph) -> CUresult; -} -extern "C" { - pub fn cuGraphExecUpdate( - hGraphExec: CUgraphExec, - hGraph: CUgraph, - hErrorNode_out: *mut CUgraphNode, - updateResult_out: *mut CUgraphExecUpdateResult, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphKernelNodeCopyAttributes(dst: CUgraphNode, src: CUgraphNode) -> CUresult; -} -extern "C" { - pub fn cuGraphKernelNodeGetAttribute( - hNode: CUgraphNode, - attr: CUkernelNodeAttrID, - value_out: *mut CUkernelNodeAttrValue, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphKernelNodeSetAttribute( - hNode: CUgraphNode, - attr: CUkernelNodeAttrID, - value: *const CUkernelNodeAttrValue, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphDebugDotPrint( - hGraph: CUgraph, - path: *const ::std::os::raw::c_char, - flags: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuUserObjectCreate( - object_out: *mut CUuserObject, - ptr: *mut ::std::os::raw::c_void, - destroy: CUhostFn, - initialRefcount: ::std::os::raw::c_uint, - flags: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuUserObjectRetain(object: CUuserObject, count: ::std::os::raw::c_uint) -> CUresult; -} -extern "C" { - pub fn cuUserObjectRelease(object: CUuserObject, count: ::std::os::raw::c_uint) -> CUresult; -} -extern "C" { - pub fn cuGraphRetainUserObject( - graph: CUgraph, - object: CUuserObject, - count: ::std::os::raw::c_uint, - flags: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphReleaseUserObject( - graph: CUgraph, - object: CUuserObject, - count: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuOccupancyMaxActiveBlocksPerMultiprocessor( - numBlocks: *mut ::std::os::raw::c_int, - func: CUfunction, - blockSize: ::std::os::raw::c_int, - dynamicSMemSize: usize, - ) -> CUresult; -} -extern "C" { - pub fn cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( - numBlocks: *mut ::std::os::raw::c_int, - func: CUfunction, - blockSize: ::std::os::raw::c_int, - dynamicSMemSize: usize, - flags: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuOccupancyMaxPotentialBlockSize( - minGridSize: *mut ::std::os::raw::c_int, - blockSize: *mut ::std::os::raw::c_int, - func: CUfunction, - blockSizeToDynamicSMemSize: CUoccupancyB2DSize, - dynamicSMemSize: usize, - blockSizeLimit: ::std::os::raw::c_int, - ) -> CUresult; -} -extern "C" { - pub fn cuOccupancyMaxPotentialBlockSizeWithFlags( - minGridSize: *mut ::std::os::raw::c_int, - blockSize: *mut ::std::os::raw::c_int, - func: CUfunction, - blockSizeToDynamicSMemSize: CUoccupancyB2DSize, - dynamicSMemSize: usize, - blockSizeLimit: ::std::os::raw::c_int, - flags: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuOccupancyAvailableDynamicSMemPerBlock( - dynamicSmemSize: *mut usize, - func: CUfunction, - numBlocks: ::std::os::raw::c_int, - blockSize: ::std::os::raw::c_int, - ) -> CUresult; -} -extern "C" { - pub fn cuTexRefSetArray( - hTexRef: CUtexref, - hArray: CUarray, - Flags: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuTexRefSetMipmappedArray( - hTexRef: CUtexref, - hMipmappedArray: CUmipmappedArray, - Flags: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuTexRefSetAddress_v2( - ByteOffset: *mut usize, - hTexRef: CUtexref, - dptr: CUdeviceptr, - bytes: usize, - ) -> CUresult; -} -extern "C" { - pub fn cuTexRefSetAddress2D_v3( - hTexRef: CUtexref, - desc: *const CUDA_ARRAY_DESCRIPTOR, - dptr: CUdeviceptr, - Pitch: usize, - ) -> CUresult; -} -extern "C" { - pub fn cuTexRefSetFormat( - hTexRef: CUtexref, - fmt: CUarray_format, - NumPackedComponents: ::std::os::raw::c_int, - ) -> CUresult; -} -extern "C" { - pub fn cuTexRefSetAddressMode( - hTexRef: CUtexref, - dim: ::std::os::raw::c_int, - am: CUaddress_mode, - ) -> CUresult; -} -extern "C" { - pub fn cuTexRefSetFilterMode(hTexRef: CUtexref, fm: CUfilter_mode) -> CUresult; -} -extern "C" { - pub fn cuTexRefSetMipmapFilterMode(hTexRef: CUtexref, fm: CUfilter_mode) -> CUresult; -} -extern "C" { - pub fn cuTexRefSetMipmapLevelBias(hTexRef: CUtexref, bias: f32) -> CUresult; -} -extern "C" { - pub fn cuTexRefSetMipmapLevelClamp( - hTexRef: CUtexref, - minMipmapLevelClamp: f32, - maxMipmapLevelClamp: f32, - ) -> CUresult; -} -extern "C" { - pub fn cuTexRefSetMaxAnisotropy( - hTexRef: CUtexref, - maxAniso: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuTexRefSetBorderColor(hTexRef: CUtexref, pBorderColor: *mut f32) -> CUresult; -} -extern "C" { - pub fn cuTexRefSetFlags(hTexRef: CUtexref, Flags: ::std::os::raw::c_uint) -> CUresult; -} -extern "C" { - pub fn cuTexRefGetAddress_v2(pdptr: *mut CUdeviceptr, hTexRef: CUtexref) -> CUresult; -} -extern "C" { - pub fn cuTexRefGetArray(phArray: *mut CUarray, hTexRef: CUtexref) -> CUresult; -} -extern "C" { - pub fn cuTexRefGetMipmappedArray( - phMipmappedArray: *mut CUmipmappedArray, - hTexRef: CUtexref, - ) -> CUresult; -} -extern "C" { - pub fn cuTexRefGetAddressMode( - pam: *mut CUaddress_mode, - hTexRef: CUtexref, - dim: ::std::os::raw::c_int, - ) -> CUresult; -} -extern "C" { - pub fn cuTexRefGetFilterMode(pfm: *mut CUfilter_mode, hTexRef: CUtexref) -> CUresult; -} -extern "C" { - pub fn cuTexRefGetFormat( - pFormat: *mut CUarray_format, - pNumChannels: *mut ::std::os::raw::c_int, - hTexRef: CUtexref, - ) -> CUresult; -} -extern "C" { - pub fn cuTexRefGetMipmapFilterMode(pfm: *mut CUfilter_mode, hTexRef: CUtexref) -> CUresult; -} -extern "C" { - pub fn cuTexRefGetMipmapLevelBias(pbias: *mut f32, hTexRef: CUtexref) -> CUresult; -} -extern "C" { - pub fn cuTexRefGetMipmapLevelClamp( - pminMipmapLevelClamp: *mut f32, - pmaxMipmapLevelClamp: *mut f32, - hTexRef: CUtexref, - ) -> CUresult; -} -extern "C" { - pub fn cuTexRefGetMaxAnisotropy( - pmaxAniso: *mut ::std::os::raw::c_int, - hTexRef: CUtexref, - ) -> CUresult; -} -extern "C" { - pub fn cuTexRefGetBorderColor(pBorderColor: *mut f32, hTexRef: CUtexref) -> CUresult; -} -extern "C" { - pub fn cuTexRefGetFlags(pFlags: *mut ::std::os::raw::c_uint, hTexRef: CUtexref) -> CUresult; -} -extern "C" { - pub fn cuTexRefCreate(pTexRef: *mut CUtexref) -> CUresult; -} -extern "C" { - pub fn cuTexRefDestroy(hTexRef: CUtexref) -> CUresult; -} -extern "C" { - pub fn cuSurfRefSetArray( - hSurfRef: CUsurfref, - hArray: CUarray, - Flags: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuSurfRefGetArray(phArray: *mut CUarray, hSurfRef: CUsurfref) -> CUresult; -} -extern "C" { - pub fn cuTexObjectCreate( - pTexObject: *mut CUtexObject, - pResDesc: *const CUDA_RESOURCE_DESC, - pTexDesc: *const CUDA_TEXTURE_DESC, - pResViewDesc: *const CUDA_RESOURCE_VIEW_DESC, - ) -> CUresult; -} -extern "C" { - pub fn cuTexObjectDestroy(texObject: CUtexObject) -> CUresult; -} -extern "C" { - pub fn cuTexObjectGetResourceDesc( - pResDesc: *mut CUDA_RESOURCE_DESC, - texObject: CUtexObject, - ) -> CUresult; -} -extern "C" { - pub fn cuTexObjectGetTextureDesc( - pTexDesc: *mut CUDA_TEXTURE_DESC, - texObject: CUtexObject, - ) -> CUresult; -} -extern "C" { - pub fn cuTexObjectGetResourceViewDesc( - pResViewDesc: *mut CUDA_RESOURCE_VIEW_DESC, - texObject: CUtexObject, - ) -> CUresult; -} -extern "C" { - pub fn cuSurfObjectCreate( - pSurfObject: *mut CUsurfObject, - pResDesc: *const CUDA_RESOURCE_DESC, - ) -> CUresult; -} -extern "C" { - pub fn cuSurfObjectDestroy(surfObject: CUsurfObject) -> CUresult; -} -extern "C" { - pub fn cuSurfObjectGetResourceDesc( - pResDesc: *mut CUDA_RESOURCE_DESC, - surfObject: CUsurfObject, - ) -> CUresult; -} -extern "C" { - pub fn cuDeviceCanAccessPeer( - canAccessPeer: *mut ::std::os::raw::c_int, - dev: CUdevice, - peerDev: CUdevice, - ) -> CUresult; -} -extern "C" { - pub fn cuCtxEnablePeerAccess(peerContext: CUcontext, Flags: ::std::os::raw::c_uint) - -> CUresult; -} -extern "C" { - pub fn cuCtxDisablePeerAccess(peerContext: CUcontext) -> CUresult; -} -extern "C" { - pub fn cuDeviceGetP2PAttribute( - value: *mut ::std::os::raw::c_int, - attrib: CUdevice_P2PAttribute, - srcDevice: CUdevice, - dstDevice: CUdevice, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphicsUnregisterResource(resource: CUgraphicsResource) -> CUresult; -} -extern "C" { - pub fn cuGraphicsSubResourceGetMappedArray( - pArray: *mut CUarray, - resource: CUgraphicsResource, - arrayIndex: ::std::os::raw::c_uint, - mipLevel: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphicsResourceGetMappedMipmappedArray( - pMipmappedArray: *mut CUmipmappedArray, - resource: CUgraphicsResource, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphicsResourceGetMappedPointer_v2( - pDevPtr: *mut CUdeviceptr, - pSize: *mut usize, - resource: CUgraphicsResource, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphicsResourceSetMapFlags_v2( - resource: CUgraphicsResource, - flags: ::std::os::raw::c_uint, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphicsMapResources( - count: ::std::os::raw::c_uint, - resources: *mut CUgraphicsResource, - hStream: CUstream, - ) -> CUresult; -} -extern "C" { - pub fn cuGraphicsUnmapResources( - count: ::std::os::raw::c_uint, - resources: *mut CUgraphicsResource, - hStream: CUstream, - ) -> CUresult; -} -extern "C" { - pub fn cuGetProcAddress( - symbol: *const ::std::os::raw::c_char, - pfn: *mut *mut ::std::os::raw::c_void, - cudaVersion: ::std::os::raw::c_int, - flags: cuuint64_t, - ) -> CUresult; -} -extern "C" { - pub fn cuGetExportTable( - ppExportTable: *mut *const ::std::os::raw::c_void, - pExportTableId: *const CUuuid, - ) -> CUresult; -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum CUoutput_mode_enum { - CU_OUT_KEY_VALUE_PAIR = 0, - CU_OUT_CSV = 1, -} -pub use self::CUoutput_mode_enum as CUoutput_mode; -extern "C" { - pub fn cuProfilerInitialize( - configFile: *const ::std::os::raw::c_char, - outputFile: *const ::std::os::raw::c_char, - outputMode: CUoutput_mode, - ) -> CUresult; -} -extern "C" { - pub fn cuProfilerStart() -> CUresult; -} -extern "C" { - pub fn cuProfilerStop() -> CUresult; -} -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum cudaDataType_t { - CUDA_R_16F = 2, - CUDA_C_16F = 6, - CUDA_R_16BF = 14, - CUDA_C_16BF = 15, - CUDA_R_32F = 0, - CUDA_C_32F = 4, - CUDA_R_64F = 1, - CUDA_C_64F = 5, - CUDA_R_4I = 16, - CUDA_C_4I = 17, - CUDA_R_4U = 18, - CUDA_C_4U = 19, - CUDA_R_8I = 3, - CUDA_C_8I = 7, - CUDA_R_8U = 8, - CUDA_C_8U = 9, - CUDA_R_16I = 20, - CUDA_C_16I = 21, - CUDA_R_16U = 22, - CUDA_C_16U = 23, - CUDA_R_32I = 10, - CUDA_C_32I = 11, - CUDA_R_32U = 12, - CUDA_C_32U = 13, - CUDA_R_64I = 24, - CUDA_C_64I = 25, - CUDA_R_64U = 26, - CUDA_C_64U = 27, -} -pub use self::cudaDataType_t as cudaDataType; -#[repr(i32)] -#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] -pub enum libraryPropertyType_t { - MAJOR_VERSION = 0, - MINOR_VERSION = 1, - PATCH_LEVEL = 2, -} -pub use self::libraryPropertyType_t as libraryPropertyType; diff --git a/crates/cust_raw/src/driver_sys.rs b/crates/cust_raw/src/driver_sys.rs new file mode 100644 index 00000000..841e3c72 --- /dev/null +++ b/crates/cust_raw/src/driver_sys.rs @@ -0,0 +1,5 @@ +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +include!(concat!(env!("OUT_DIR"), "/driver_sys.rs")); diff --git a/crates/cust_raw/src/lib.rs b/crates/cust_raw/src/lib.rs index d75537c1..62bd4b0c 100644 --- a/crates/cust_raw/src/lib.rs +++ b/crates/cust_raw/src/lib.rs @@ -1,4 +1,16 @@ -#![allow(warnings)] +#[cfg(feature = "driver")] +pub mod driver_sys; +#[cfg(feature = "runtime")] +pub mod runtime_sys; -mod cuda; -pub use cuda::*; +#[cfg(feature = "cublas")] +pub mod cublas_sys; +#[cfg(feature = "cublaslt")] +pub mod cublaslt_sys; +#[cfg(feature = "cublasxt")] +pub mod cublasxt_sys; + +#[cfg(feature = "nvptx-compiler")] +pub mod nvptx_compiler_sys; +#[cfg(feature = "nvvm")] +pub mod nvvm_sys; diff --git a/crates/cust_raw/src/nvptx_compiler_sys.rs b/crates/cust_raw/src/nvptx_compiler_sys.rs new file mode 100644 index 00000000..c3f1090c --- /dev/null +++ b/crates/cust_raw/src/nvptx_compiler_sys.rs @@ -0,0 +1,5 @@ +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +include!(concat!(env!("OUT_DIR"), "/nvptx_compiler_sys.rs")); diff --git a/crates/cust_raw/src/nvvm_sys.rs b/crates/cust_raw/src/nvvm_sys.rs new file mode 100644 index 00000000..6911dc49 --- /dev/null +++ b/crates/cust_raw/src/nvvm_sys.rs @@ -0,0 +1,7 @@ +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +pub const LIBDEVICE_BITCODE: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/libdevice.bc")); + +include!(concat!(env!("OUT_DIR"), "/nvvm_sys.rs")); diff --git a/crates/cust_raw/src/runtime_sys.rs b/crates/cust_raw/src/runtime_sys.rs new file mode 100644 index 00000000..2ec7c5e6 --- /dev/null +++ b/crates/cust_raw/src/runtime_sys.rs @@ -0,0 +1,5 @@ +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +include!(concat!(env!("OUT_DIR"), "/runtime_sys.rs")); diff --git a/crates/find_cuda_helper/Cargo.toml b/crates/find_cuda_helper/Cargo.toml deleted file mode 100644 index 3816f2df..00000000 --- a/crates/find_cuda_helper/Cargo.toml +++ /dev/null @@ -1,11 +0,0 @@ -[package] -name = "find_cuda_helper" -version = "0.2.0" -edition = "2021" -license = "MIT OR Apache-2.0" -description = "Helper crate for searching for CUDA libraries" -repository = "https://github.com/Rust-GPU/Rust-CUDA" -readme = "../../README.md" - -[dependencies] -glob = "0.3.0" diff --git a/crates/find_cuda_helper/src/lib.rs b/crates/find_cuda_helper/src/lib.rs deleted file mode 100644 index ed3b2f46..00000000 --- a/crates/find_cuda_helper/src/lib.rs +++ /dev/null @@ -1,226 +0,0 @@ -//! Tiny crate for common logic for finding and including CUDA. - -use std::{ - env, - path::{Path, PathBuf}, -}; - -pub fn include_cuda() { - if env::var("DOCS_RS").is_err() && !cfg!(doc) { - let paths = find_cuda_lib_dirs(); - if paths.is_empty() { - panic!("Could not find a cuda installation"); - } - for path in paths { - println!("cargo:rustc-link-search=native={}", path.display()); - } - - println!("cargo:rustc-link-lib=dylib=cuda"); - println!("cargo:rerun-if-changed=build.rs"); - println!("cargo:rerun-if-env-changed=CUDA_LIBRARY_PATH"); - println!("cargo:rerun-if-env-changed=CUDA_ROOT"); - println!("cargo:rerun-if-env-changed=CUDA_PATH"); - println!("cargo:rerun-if-env-changed=CUDA_TOOLKIT_ROOT_DIR"); - } -} - -// Returns true if the given path is a valid cuda installation -fn is_cuda_root_path>(path: P) -> bool { - path.as_ref().join("include").join("cuda.h").is_file() -} - -pub fn find_cuda_root() -> Option { - // search through the common environment variables first - for path in ["CUDA_PATH", "CUDA_ROOT", "CUDA_TOOLKIT_ROOT_DIR"] - .iter() - .filter_map(|name| std::env::var(*name).ok()) - { - if is_cuda_root_path(&path) { - return Some(path.into()); - } - } - - // If it wasn't specified by env var, try the default installation paths - #[cfg(not(target_os = "windows"))] - let default_paths = ["/usr/lib/cuda", "/usr/local/cuda", "/opt/cuda"]; - #[cfg(target_os = "windows")] - let default_paths = ["C:/CUDA"]; // TODO (AL): what's the actual path here? - - for path in default_paths { - if is_cuda_root_path(path) { - return Some(path.into()); - } - } - - None -} - -#[cfg(target_os = "windows")] -pub fn find_cuda_lib_dirs() -> Vec { - if let Some(root_path) = find_cuda_root() { - // To do this the right way, we check to see which target we're building for. - let target = env::var("TARGET") - .expect("cargo did not set the TARGET environment variable as required."); - - // Targets use '-' separators. e.g. x86_64-pc-windows-msvc - let target_components: Vec<_> = target.as_str().split('-').collect(); - - // We check that we're building for Windows. This code assumes that the layout in - // CUDA_PATH matches Windows. - if target_components[2] != "windows" { - panic!( - "The CUDA_PATH variable is only used by cuda-sys on Windows. Your target is {}.", - target - ); - } - - // Sanity check that the second component of 'target' is "pc" - debug_assert_eq!( - "pc", target_components[1], - "Expected a Windows target to have the second component be 'pc'. Target: {}", - target - ); - - // x86_64 should use the libs in the "lib/x64" directory. If we ever support i686 (which - // does not ship with cublas support), its libraries are in "lib/Win32". - let lib_path = match target_components[0] { - "x86_64" => "x64", - "i686" => { - // lib path would be "Win32" if we support i686. "cublas" is not present in the - // 32-bit install. - panic!("Rust cuda-sys does not currently support 32-bit Windows."); - } - _ => { - panic!("Rust cuda-sys only supports the x86_64 Windows architecture."); - } - }; - - let lib_dir = root_path.join("lib").join(lib_path); - - return if lib_dir.is_dir() { - vec![lib_dir] - } else { - vec![] - }; - } - - vec![] -} - -pub fn read_env() -> Vec { - if let Ok(path) = env::var("CUDA_LIBRARY_PATH") { - // The location of the libcuda, libcudart, and libcublas can be hardcoded with the - // CUDA_LIBRARY_PATH environment variable. - let split_char = if cfg!(target_os = "windows") { - ";" - } else { - ":" - }; - path.split(split_char).map(PathBuf::from).collect() - } else { - vec![] - } -} - -#[cfg(not(target_os = "windows"))] -pub fn find_cuda_lib_dirs() -> Vec { - let mut candidates = read_env(); - candidates.push(PathBuf::from("/opt/cuda")); - candidates.push(PathBuf::from("/usr/local/cuda")); - for e in glob::glob("/usr/local/cuda-*").unwrap().flatten() { - candidates.push(e) - } - candidates.push(PathBuf::from("/usr/lib/cuda")); - candidates.push(detect_cuda_root_via_which_nvcc()); - - let mut valid_paths = vec![]; - for base in &candidates { - let lib = PathBuf::from(base).join("lib64"); - if lib.is_dir() { - valid_paths.push(lib.clone()); - valid_paths.push(lib.join("stubs")); - } - let base = base.join("targets/x86_64-linux"); - let header = base.join("include/cuda.h"); - if header.is_file() { - valid_paths.push(base.join("lib")); - valid_paths.push(base.join("lib/stubs")); - continue; - } - } - valid_paths -} - -#[cfg(not(target_os = "windows"))] -fn detect_cuda_root_via_which_nvcc() -> PathBuf { - use std::process::Command; - let output = Command::new("which") - .arg("nvcc") - .output() - .expect("Command `which` must be available on *nix like systems."); - - if !output.status.success() { - panic!("Couldn't find nvcc - `which nvcc` returned non-zero"); - } - - let path: PathBuf = String::from_utf8(output.stdout) - .expect("Result must be valid UTF-8") - .trim() - .to_string() - .into(); - - // The above finds `CUDASDK/bin/nvcc`, so we have to go 2 up for the SDK root. - path.parent().unwrap().parent().unwrap().to_path_buf() -} - -#[cfg(target_os = "windows")] -pub fn find_optix_root() -> Option { - // the optix SDK installer sets OPTIX_ROOT_DIR whenever it installs. - // We also check OPTIX_ROOT first in case someone wants to override it without overriding - // the SDK-set variable. - - env::var("OPTIX_ROOT") - .ok() - .or_else(|| env::var("OPTIX_ROOT_DIR").ok()) - .map(PathBuf::from) -} - -#[cfg(target_family = "unix")] -pub fn find_optix_root() -> Option { - env::var("OPTIX_ROOT") - .ok() - .or_else(|| env::var("OPTIX_ROOT_DIR").ok()) - .map(PathBuf::from) -} - -#[cfg(doc)] -pub fn find_libnvvm_bin_dir() -> String { - String::new() -} - -#[cfg(all(target_os = "windows", not(doc)))] -pub fn find_libnvvm_bin_dir() -> String { - if env::var("DOCS_RS").is_ok() { - return String::new(); - } - find_cuda_root() - .expect("Failed to find CUDA ROOT, make sure the CUDA SDK is installed and CUDA_PATH or CUDA_ROOT are set!") - .join("nvvm") - .join("lib") - .join("x64") - .to_string_lossy() - .into_owned() -} - -#[cfg(all(target_os = "linux", not(doc)))] -pub fn find_libnvvm_bin_dir() -> String { - if env::var("DOCS_RS").is_ok() { - return String::new(); - } - find_cuda_root() - .expect("Failed to find CUDA ROOT, make sure the CUDA SDK is installed and CUDA_PATH or CUDA_ROOT are set!") - .join("nvvm") - .join("lib64") - .to_string_lossy() - .into_owned() -} diff --git a/crates/nvvm/Cargo.toml b/crates/nvvm/Cargo.toml index 1178c468..f45fd131 100644 --- a/crates/nvvm/Cargo.toml +++ b/crates/nvvm/Cargo.toml @@ -8,5 +8,5 @@ description = "High level bindings to libnvvm" repository = "https://github.com/Rust-GPU/Rust-CUDA" readme = "../../README.md" -[build-dependencies] -find_cuda_helper = { version = "0.2", path = "../find_cuda_helper" } +[dependencies] +cust_raw = { path = "../cust_raw", default-features = false, features = ["nvvm"] } diff --git a/crates/nvvm/build.rs b/crates/nvvm/build.rs deleted file mode 100644 index 120f816b..00000000 --- a/crates/nvvm/build.rs +++ /dev/null @@ -1,6 +0,0 @@ -use find_cuda_helper::find_libnvvm_bin_dir; - -fn main() { - println!("cargo:rustc-link-search={}", find_libnvvm_bin_dir()); - println!("cargo:rustc-link-lib=dylib=nvvm"); -} diff --git a/crates/nvvm/src/lib.rs b/crates/nvvm/src/lib.rs index e8bae639..2b3962f4 100644 --- a/crates/nvvm/src/lib.rs +++ b/crates/nvvm/src/lib.rs @@ -8,8 +8,7 @@ use std::{ str::FromStr, }; -#[allow(warnings, clippy::warnings)] -pub mod sys; +use cust_raw::nvvm_sys; /// Get the major and minor NVVM IR version. pub fn ir_version() -> (i32, i32) { @@ -19,7 +18,7 @@ pub fn ir_version() -> (i32, i32) { let mut major_dbg = MaybeUninit::uninit(); let mut minor_dbg = MaybeUninit::uninit(); // according to the docs this cant fail - sys::nvvmIRVersion( + nvvm_sys::nvvmIRVersion( major_ir.as_mut_ptr(), minor_ir.as_mut_ptr(), major_dbg.as_mut_ptr(), @@ -37,7 +36,7 @@ pub fn dbg_version() -> (i32, i32) { let mut major_dbg = MaybeUninit::uninit(); let mut minor_dbg = MaybeUninit::uninit(); // according to the docs this cant fail - sys::nvvmIRVersion( + nvvm_sys::nvvmIRVersion( major_ir.as_mut_ptr(), minor_ir.as_mut_ptr(), major_dbg.as_mut_ptr(), @@ -53,7 +52,7 @@ pub fn nvvm_version() -> (i32, i32) { let mut major = MaybeUninit::uninit(); let mut minor = MaybeUninit::uninit(); // according to the docs this cant fail - sys::nvvmVersion(major.as_mut_ptr(), minor.as_mut_ptr()); + nvvm_sys::nvvmVersion(major.as_mut_ptr(), minor.as_mut_ptr()); (major.assume_init(), minor.assume_init()) } } @@ -83,40 +82,40 @@ pub enum NvvmError { impl Display for NvvmError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { unsafe { - let ptr = sys::nvvmGetErrorString(self.to_raw()); + let ptr = nvvm_sys::nvvmGetErrorString(self.to_raw()); f.write_str(&CStr::from_ptr(ptr).to_string_lossy()) } } } impl NvvmError { - fn to_raw(self) -> sys::nvvmResult { + fn to_raw(self) -> nvvm_sys::nvvmResult { match self { - NvvmError::CompilationError => sys::nvvmResult_NVVM_ERROR_COMPILATION, - NvvmError::OutOfMemory => sys::nvvmResult_NVVM_ERROR_OUT_OF_MEMORY, + NvvmError::CompilationError => nvvm_sys::nvvmResult::NVVM_ERROR_COMPILATION, + NvvmError::OutOfMemory => nvvm_sys::nvvmResult::NVVM_ERROR_OUT_OF_MEMORY, NvvmError::ProgramCreationFailure => { - sys::nvvmResult_NVVM_ERROR_PROGRAM_CREATION_FAILURE + nvvm_sys::nvvmResult::NVVM_ERROR_PROGRAM_CREATION_FAILURE } - NvvmError::IrVersionMismatch => sys::nvvmResult_NVVM_ERROR_IR_VERSION_MISMATCH, - NvvmError::InvalidOption => sys::nvvmResult_NVVM_ERROR_INVALID_OPTION, - NvvmError::InvalidInput => sys::nvvmResult_NVVM_ERROR_INVALID_INPUT, - NvvmError::InvalidIr => sys::nvvmResult_NVVM_ERROR_INVALID_IR, - NvvmError::NoModuleInProgram => sys::nvvmResult_NVVM_ERROR_NO_MODULE_IN_PROGRAM, + NvvmError::IrVersionMismatch => nvvm_sys::nvvmResult::NVVM_ERROR_IR_VERSION_MISMATCH, + NvvmError::InvalidOption => nvvm_sys::nvvmResult::NVVM_ERROR_INVALID_OPTION, + NvvmError::InvalidInput => nvvm_sys::nvvmResult::NVVM_ERROR_INVALID_INPUT, + NvvmError::InvalidIr => nvvm_sys::nvvmResult::NVVM_ERROR_INVALID_IR, + NvvmError::NoModuleInProgram => nvvm_sys::nvvmResult::NVVM_ERROR_NO_MODULE_IN_PROGRAM, } } - fn from_raw(result: sys::nvvmResult) -> Self { + fn from_raw(result: nvvm_sys::nvvmResult) -> Self { use NvvmError::*; match result { - sys::nvvmResult_NVVM_ERROR_COMPILATION => CompilationError, - sys::nvvmResult_NVVM_ERROR_OUT_OF_MEMORY => OutOfMemory, - sys::nvvmResult_NVVM_ERROR_PROGRAM_CREATION_FAILURE => ProgramCreationFailure, - sys::nvvmResult_NVVM_ERROR_IR_VERSION_MISMATCH => IrVersionMismatch, - sys::nvvmResult_NVVM_ERROR_INVALID_OPTION => InvalidOption, - sys::nvvmResult_NVVM_ERROR_INVALID_INPUT => InvalidInput, - sys::nvvmResult_NVVM_ERROR_INVALID_IR => InvalidIr, - sys::nvvmResult_NVVM_ERROR_NO_MODULE_IN_PROGRAM => NoModuleInProgram, - sys::nvvmResult_NVVM_SUCCESS => panic!(), + nvvm_sys::nvvmResult::NVVM_ERROR_COMPILATION => CompilationError, + nvvm_sys::nvvmResult::NVVM_ERROR_OUT_OF_MEMORY => OutOfMemory, + nvvm_sys::nvvmResult::NVVM_ERROR_PROGRAM_CREATION_FAILURE => ProgramCreationFailure, + nvvm_sys::nvvmResult::NVVM_ERROR_IR_VERSION_MISMATCH => IrVersionMismatch, + nvvm_sys::nvvmResult::NVVM_ERROR_INVALID_OPTION => InvalidOption, + nvvm_sys::nvvmResult::NVVM_ERROR_INVALID_INPUT => InvalidInput, + nvvm_sys::nvvmResult::NVVM_ERROR_INVALID_IR => InvalidIr, + nvvm_sys::nvvmResult::NVVM_ERROR_NO_MODULE_IN_PROGRAM => NoModuleInProgram, + nvvm_sys::nvvmResult::NVVM_SUCCESS => panic!(), _ => unreachable!(), } } @@ -126,10 +125,10 @@ trait ToNvvmResult { fn to_result(self) -> Result<(), NvvmError>; } -impl ToNvvmResult for sys::nvvmResult { +impl ToNvvmResult for nvvm_sys::nvvmResult { fn to_result(self) -> Result<(), NvvmError> { let err = match self { - sys::nvvmResult_NVVM_SUCCESS => return Ok(()), + nvvm_sys::nvvmResult::NVVM_SUCCESS => return Ok(()), _ => NvvmError::from_raw(self), }; Err(err) @@ -295,13 +294,13 @@ impl Default for NvvmArch { } pub struct NvvmProgram { - raw: sys::nvvmProgram, + raw: nvvm_sys::nvvmProgram, } impl Drop for NvvmProgram { fn drop(&mut self) { unsafe { - sys::nvvmDestroyProgram(&mut self.raw as *mut _) + nvvm_sys::nvvmDestroyProgram(&mut self.raw as *mut _) .to_result() .expect("failed to destroy nvvm program"); } @@ -313,7 +312,7 @@ impl NvvmProgram { pub fn new() -> Result { unsafe { let mut raw = MaybeUninit::uninit(); - sys::nvvmCreateProgram(raw.as_mut_ptr()).to_result()?; + nvvm_sys::nvvmCreateProgram(raw.as_mut_ptr()).to_result()?; Ok(Self { raw: raw.assume_init(), }) @@ -333,13 +332,13 @@ impl NvvmProgram { .map(|x| x.as_ptr().cast()) .collect::>(); - sys::nvvmCompileProgram(self.raw, options.len() as i32, options_ptr.as_mut_ptr()) + nvvm_sys::nvvmCompileProgram(self.raw, options.len() as i32, options_ptr.as_mut_ptr()) .to_result()?; let mut size = 0; - sys::nvvmGetCompiledResultSize(self.raw, &mut size as *mut usize as *mut _) + nvvm_sys::nvvmGetCompiledResultSize(self.raw, &mut size as *mut usize as *mut _) .to_result()?; let mut buf: Vec = Vec::with_capacity(size); - sys::nvvmGetCompiledResult(self.raw, buf.as_mut_ptr().cast()).to_result()?; + nvvm_sys::nvvmGetCompiledResult(self.raw, buf.as_mut_ptr().cast()).to_result()?; buf.set_len(size); // 𝖇𝖆𝖓𝖎𝖘𝖍 𝖙𝖍𝖞 𝖓𝖚𝖑 buf.pop(); @@ -351,10 +350,10 @@ impl NvvmProgram { pub fn add_module(&self, bitcode: &[u8], name: String) -> Result<(), NvvmError> { unsafe { let cstring = CString::new(name).expect("module name with nul"); - sys::nvvmAddModuleToProgram( + nvvm_sys::nvvmAddModuleToProgram( self.raw, bitcode.as_ptr().cast(), - bitcode.len() as u64, + bitcode.len(), cstring.as_ptr(), ) .to_result() @@ -370,10 +369,10 @@ impl NvvmProgram { pub fn add_lazy_module(&self, bitcode: &[u8], name: String) -> Result<(), NvvmError> { unsafe { let cstring = CString::new(name).expect("module name with nul"); - sys::nvvmLazyAddModuleToProgram( + nvvm_sys::nvvmLazyAddModuleToProgram( self.raw, bitcode.as_ptr().cast(), - bitcode.len() as u64, + bitcode.len(), cstring.as_ptr(), ) .to_result() @@ -388,10 +387,10 @@ impl NvvmProgram { pub fn compiler_log(&self) -> Result, NvvmError> { unsafe { let mut size = MaybeUninit::uninit(); - sys::nvvmGetProgramLogSize(self.raw, size.as_mut_ptr()).to_result()?; - let size = size.assume_init() as usize; + nvvm_sys::nvvmGetProgramLogSize(self.raw, size.as_mut_ptr()).to_result()?; + let size = size.assume_init(); let mut buf: Vec = Vec::with_capacity(size); - sys::nvvmGetProgramLog(self.raw, buf.as_mut_ptr().cast()).to_result()?; + nvvm_sys::nvvmGetProgramLog(self.raw, buf.as_mut_ptr().cast()).to_result()?; buf.set_len(size); // 𝖇𝖆𝖓𝖎𝖘𝖍 𝖙𝖍𝖞 𝖓𝖚𝖑 buf.pop(); @@ -403,7 +402,7 @@ impl NvvmProgram { /// Verify the program without actually compiling it. In the case of invalid IR, you can find /// more detailed error info by calling [`compiler_log`](Self::compiler_log). pub fn verify(&self) -> Result<(), NvvmError> { - unsafe { sys::nvvmVerifyProgram(self.raw, 0, null_mut()).to_result() } + unsafe { nvvm_sys::nvvmVerifyProgram(self.raw, 0, null_mut()).to_result() } } } diff --git a/crates/nvvm/src/sys.rs b/crates/nvvm/src/sys.rs deleted file mode 100644 index 7c6aa396..00000000 --- a/crates/nvvm/src/sys.rs +++ /dev/null @@ -1,305 +0,0 @@ -//! Raw bindings to libnvvm. All APIs are exposed safely so this module should generally not be used. - -// generated by bindgen - -pub const nvvmResult_NVVM_SUCCESS: nvvmResult = 0; -pub const nvvmResult_NVVM_ERROR_OUT_OF_MEMORY: nvvmResult = 1; -pub const nvvmResult_NVVM_ERROR_PROGRAM_CREATION_FAILURE: nvvmResult = 2; -pub const nvvmResult_NVVM_ERROR_IR_VERSION_MISMATCH: nvvmResult = 3; -pub const nvvmResult_NVVM_ERROR_INVALID_INPUT: nvvmResult = 4; -pub const nvvmResult_NVVM_ERROR_INVALID_PROGRAM: nvvmResult = 5; -pub const nvvmResult_NVVM_ERROR_INVALID_IR: nvvmResult = 6; -pub const nvvmResult_NVVM_ERROR_INVALID_OPTION: nvvmResult = 7; -pub const nvvmResult_NVVM_ERROR_NO_MODULE_IN_PROGRAM: nvvmResult = 8; -pub const nvvmResult_NVVM_ERROR_COMPILATION: nvvmResult = 9; - -#[doc = " \\ingroup error"] -#[doc = " \\brief NVVM API call result code."] -pub type nvvmResult = ::std::os::raw::c_int; -extern "C" { - #[doc = " \\ingroup error"] - #[doc = " \\brief Get the message string for the given #nvvmResult code."] - #[doc = ""] - #[doc = " \\param [in] result NVVM API result code."] - #[doc = " \\return Message string for the given #nvvmResult code."] - pub fn nvvmGetErrorString(result: nvvmResult) -> *const ::std::os::raw::c_char; -} -extern "C" { - #[doc = " \\ingroup query"] - #[doc = " \\brief Get the NVVM version."] - #[doc = ""] - #[doc = " \\param [out] major NVVM major version number."] - #[doc = " \\param [out] minor NVVM minor version number."] - #[doc = " \\return"] - #[doc = " - \\link ::nvvmResult NVVM_SUCCESS \\endlink"] - #[doc = ""] - pub fn nvvmVersion( - major: *mut ::std::os::raw::c_int, - minor: *mut ::std::os::raw::c_int, - ) -> nvvmResult; -} -extern "C" { - #[doc = " \\ingroup query"] - #[doc = " \\brief Get the NVVM IR version."] - #[doc = ""] - #[doc = " \\param [out] majorIR NVVM IR major version number."] - #[doc = " \\param [out] minorIR NVVM IR minor version number."] - #[doc = " \\param [out] majorDbg NVVM IR debug metadata major version number."] - #[doc = " \\param [out] minorDbg NVVM IR debug metadata minor version number."] - #[doc = " \\return"] - #[doc = " - \\link ::nvvmResult NVVM_SUCCESS \\endlink"] - #[doc = ""] - pub fn nvvmIRVersion( - majorIR: *mut ::std::os::raw::c_int, - minorIR: *mut ::std::os::raw::c_int, - majorDbg: *mut ::std::os::raw::c_int, - minorDbg: *mut ::std::os::raw::c_int, - ) -> nvvmResult; -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct _nvvmProgram { - _unused: [u8; 0], -} -#[doc = " \\ingroup compilation"] -#[doc = " \\brief NVVM Program"] -#[doc = ""] -#[doc = " An opaque handle for a program"] -pub type nvvmProgram = *mut _nvvmProgram; -extern "C" { - #[doc = " \\ingroup compilation"] - #[doc = " \\brief Create a program, and set the value of its handle to *prog."] - #[doc = ""] - #[doc = " \\param [in] prog NVVM program."] - #[doc = " \\return"] - #[doc = " - \\link ::nvvmResult NVVM_SUCCESS \\endlink"] - #[doc = " - \\link ::nvvmResult NVVM_ERROR_OUT_OF_MEMORY \\endlink"] - #[doc = " - \\link ::nvvmResult NVVM_ERROR_INVALID_PROGRAM \\endlink"] - #[doc = ""] - #[doc = " \\see nvvmDestroyProgram()"] - pub fn nvvmCreateProgram(prog: *mut nvvmProgram) -> nvvmResult; -} -extern "C" { - #[doc = " \\ingroup compilation"] - #[doc = " \\brief Destroy a program."] - #[doc = ""] - #[doc = " \\param [in] prog NVVM program."] - #[doc = " \\return"] - #[doc = " - \\link ::nvvmResult NVVM_SUCCESS \\endlink"] - #[doc = " - \\link ::nvvmResult NVVM_ERROR_INVALID_PROGRAM \\endlink"] - #[doc = ""] - #[doc = " \\see nvvmCreateProgram()"] - pub fn nvvmDestroyProgram(prog: *mut nvvmProgram) -> nvvmResult; -} -extern "C" { - #[doc = " \\ingroup compilation"] - #[doc = " \\brief Add a module level NVVM IR to a program."] - #[doc = ""] - #[doc = " The buffer should contain an NVVM IR module."] - #[doc = " The module should have NVVM IR version 1.6 either in the LLVM 7.0.1 bitcode"] - #[doc = " representation or in the LLVM 7.0.1 text representation. Support for reading"] - #[doc = " the text representation of NVVM IR is deprecated and may be removed in a"] - #[doc = " later version."] - #[doc = ""] - #[doc = " \\param [in] prog NVVM program."] - #[doc = " \\param [in] buffer NVVM IR module in the bitcode or text"] - #[doc = " representation."] - #[doc = " \\param [in] size Size of the NVVM IR module."] - #[doc = " \\param [in] name Name of the NVVM IR module."] - #[doc = " If NULL, \"\" is used as the name."] - #[doc = " \\return"] - #[doc = " - \\link ::nvvmResult NVVM_SUCCESS \\endlink"] - #[doc = " - \\link ::nvvmResult NVVM_ERROR_OUT_OF_MEMORY \\endlink"] - #[doc = " - \\link ::nvvmResult NVVM_ERROR_INVALID_INPUT \\endlink"] - #[doc = " - \\link ::nvvmResult NVVM_ERROR_INVALID_PROGRAM \\endlink"] - pub fn nvvmAddModuleToProgram( - prog: nvvmProgram, - buffer: *const ::std::os::raw::c_char, - size: size_t, - name: *const ::std::os::raw::c_char, - ) -> nvvmResult; -} -extern "C" { - #[doc = " \\ingroup compilation"] - #[doc = " \\brief Add a module level NVVM IR to a program."] - #[doc = ""] - #[doc = " The buffer should contain an NVVM IR module. The module should have NVVM IR"] - #[doc = " version 1.6 in LLVM 7.0.1 bitcode representation."] - #[doc = ""] - #[doc = " A module added using this API is lazily loaded - the only symbols loaded"] - #[doc = " are those that are required by module(s) loaded using"] - #[doc = " nvvmAddModuleToProgram. It is an error for a program to have"] - #[doc = " all modules loaded using this API. Compiler may also optimize entities"] - #[doc = " in this module by making them internal to the linked NVVM IR module,"] - #[doc = " making them eligible for other optimizations. Due to these"] - #[doc = " optimizations, this API to load a module is more efficient and should"] - #[doc = " be used where possible."] - #[doc = ""] - #[doc = " \\param [in] prog NVVM program."] - #[doc = " \\param [in] buffer NVVM IR module in the bitcode representation."] - #[doc = " \\param [in] size Size of the NVVM IR module."] - #[doc = " \\param [in] name Name of the NVVM IR module."] - #[doc = " If NULL, \"\" is used as the name."] - #[doc = " \\return"] - #[doc = " - \\link ::nvvmResult NVVM_SUCCESS \\endlink"] - #[doc = " - \\link ::nvvmResult NVVM_ERROR_OUT_OF_MEMORY \\endlink"] - #[doc = " - \\link ::nvvmResult NVVM_ERROR_INVALID_INPUT \\endlink"] - #[doc = " - \\link ::nvvmResult NVVM_ERROR_INVALID_PROGRAM \\endlink"] - pub fn nvvmLazyAddModuleToProgram( - prog: nvvmProgram, - buffer: *const ::std::os::raw::c_char, - size: size_t, - name: *const ::std::os::raw::c_char, - ) -> nvvmResult; -} -extern "C" { - #[doc = " \\ingroup compilation"] - #[doc = " \\brief Compile the NVVM program."] - #[doc = ""] - #[doc = " The NVVM IR modules in the program will be linked at the IR level."] - #[doc = " The linked IR program is compiled to PTX."] - #[doc = ""] - #[doc = " The target datalayout in the linked IR program is used to"] - #[doc = " determine the address size (32bit vs 64bit)."] - #[doc = ""] - #[doc = " The valid compiler options are:"] - #[doc = ""] - #[doc = " - -g (enable generation of debugging information, valid only with -opt=0)"] - #[doc = " - -generate-line-info (generate line number information)"] - #[doc = " - -opt="] - #[doc = " - 0 (disable optimizations)"] - #[doc = " - 3 (default, enable optimizations)"] - #[doc = " - -arch="] - #[doc = " - compute_35"] - #[doc = " - compute_37"] - #[doc = " - compute_50"] - #[doc = " - compute_52 (default)"] - #[doc = " - compute_53"] - #[doc = " - compute_60"] - #[doc = " - compute_61"] - #[doc = " - compute_62"] - #[doc = " - compute_70"] - #[doc = " - compute_72"] - #[doc = " - compute_75"] - #[doc = " - compute_80"] - #[doc = " - -ftz="] - #[doc = " - 0 (default, preserve denormal values, when performing"] - #[doc = " single-precision floating-point operations)"] - #[doc = " - 1 (flush denormal values to zero, when performing"] - #[doc = " single-precision floating-point operations)"] - #[doc = " - -prec-sqrt="] - #[doc = " - 0 (use a faster approximation for single-precision"] - #[doc = " floating-point square root)"] - #[doc = " - 1 (default, use IEEE round-to-nearest mode for"] - #[doc = " single-precision floating-point square root)"] - #[doc = " - -prec-div="] - #[doc = " - 0 (use a faster approximation for single-precision"] - #[doc = " floating-point division and reciprocals)"] - #[doc = " - 1 (default, use IEEE round-to-nearest mode for"] - #[doc = " single-precision floating-point division and reciprocals)"] - #[doc = " - -fma="] - #[doc = " - 0 (disable FMA contraction)"] - #[doc = " - 1 (default, enable FMA contraction)"] - #[doc = ""] - #[doc = " \\param [in] prog NVVM program."] - #[doc = " \\param [in] numOptions Number of compiler options passed."] - #[doc = " \\param [in] options Compiler options in the form of C string array."] - #[doc = " \\return"] - #[doc = " - \\link ::nvvmResult NVVM_SUCCESS \\endlink"] - #[doc = " - \\link ::nvvmResult NVVM_ERROR_OUT_OF_MEMORY \\endlink"] - #[doc = " - \\link ::nvvmResult NVVM_ERROR_IR_VERSION_MISMATCH \\endlink"] - #[doc = " - \\link ::nvvmResult NVVM_ERROR_INVALID_PROGRAM \\endlink"] - #[doc = " - \\link ::nvvmResult NVVM_ERROR_INVALID_OPTION \\endlink"] - #[doc = " - \\link ::nvvmResult NVVM_ERROR_NO_MODULE_IN_PROGRAM \\endlink"] - #[doc = " - \\link ::nvvmResult NVVM_ERROR_COMPILATION \\endlink"] - pub fn nvvmCompileProgram( - prog: nvvmProgram, - numOptions: ::std::os::raw::c_int, - options: *mut *const ::std::os::raw::c_char, - ) -> nvvmResult; -} -extern "C" { - #[doc = " \\ingroup compilation"] - #[doc = " \\brief Verify the NVVM program."] - #[doc = ""] - #[doc = " The valid compiler options are:"] - #[doc = ""] - #[doc = " Same as for nvvmCompileProgram()."] - #[doc = ""] - #[doc = " \\param [in] prog NVVM program."] - #[doc = " \\param [in] numOptions Number of compiler options passed."] - #[doc = " \\param [in] options Compiler options in the form of C string array."] - #[doc = " \\return"] - #[doc = " - \\link ::nvvmResult NVVM_SUCCESS \\endlink"] - #[doc = " - \\link ::nvvmResult NVVM_ERROR_OUT_OF_MEMORY \\endlink"] - #[doc = " - \\link ::nvvmResult NVVM_ERROR_IR_VERSION_MISMATCH \\endlink"] - #[doc = " - \\link ::nvvmResult NVVM_ERROR_INVALID_PROGRAM \\endlink"] - #[doc = " - \\link ::nvvmResult NVVM_ERROR_INVALID_IR \\endlink"] - #[doc = " - \\link ::nvvmResult NVVM_ERROR_INVALID_OPTION \\endlink"] - #[doc = " - \\link ::nvvmResult NVVM_ERROR_NO_MODULE_IN_PROGRAM \\endlink"] - #[doc = ""] - #[doc = " \\see nvvmCompileProgram()"] - pub fn nvvmVerifyProgram( - prog: nvvmProgram, - numOptions: ::std::os::raw::c_int, - options: *mut *const ::std::os::raw::c_char, - ) -> nvvmResult; -} -extern "C" { - #[doc = " \\ingroup compilation"] - #[doc = " \\brief Get the size of the compiled result."] - #[doc = ""] - #[doc = " \\param [in] prog NVVM program."] - #[doc = " \\param [out] bufferSizeRet Size of the compiled result (including the"] - #[doc = " trailing NULL)."] - #[doc = " \\return"] - #[doc = " - \\link ::nvvmResult NVVM_SUCCESS \\endlink"] - #[doc = " - \\link ::nvvmResult NVVM_ERROR_INVALID_PROGRAM \\endlink"] - pub fn nvvmGetCompiledResultSize(prog: nvvmProgram, bufferSizeRet: *mut size_t) -> nvvmResult; -} -extern "C" { - #[doc = " \\ingroup compilation"] - #[doc = " \\brief Get the compiled result."] - #[doc = ""] - #[doc = " The result is stored in the memory pointed by 'buffer'."] - #[doc = ""] - #[doc = " \\param [in] prog NVVM program."] - #[doc = " \\param [out] buffer Compiled result."] - #[doc = " \\return"] - #[doc = " - \\link ::nvvmResult NVVM_SUCCESS \\endlink"] - #[doc = " - \\link ::nvvmResult NVVM_ERROR_INVALID_PROGRAM \\endlink"] - pub fn nvvmGetCompiledResult( - prog: nvvmProgram, - buffer: *mut ::std::os::raw::c_char, - ) -> nvvmResult; -} -extern "C" { - #[doc = " \\ingroup compilation"] - #[doc = " \\brief Get the Size of Compiler/Verifier Message."] - #[doc = ""] - #[doc = " The size of the message string (including the trailing NULL) is stored into"] - #[doc = " 'buffer_size_ret' when the return value is NVVM_SUCCESS."] - #[doc = ""] - #[doc = " \\param [in] prog NVVM program."] - #[doc = " \\param [out] bufferSizeRet Size of the compilation/verification log"] - #[doc = "(including the trailing NULL)."] - #[doc = " \\return"] - #[doc = " - \\link ::nvvmResult NVVM_SUCCESS \\endlink"] - #[doc = " - \\link ::nvvmResult NVVM_ERROR_INVALID_PROGRAM \\endlink"] - pub fn nvvmGetProgramLogSize(prog: nvvmProgram, bufferSizeRet: *mut size_t) -> nvvmResult; -} -extern "C" { - #[doc = " \\ingroup compilation"] - #[doc = " \\brief Get the Compiler/Verifier Message"] - #[doc = ""] - #[doc = " The NULL terminated message string is stored in the memory pointed by"] - #[doc = " 'buffer' when the return value is NVVM_SUCCESS."] - #[doc = ""] - #[doc = " \\param [in] prog NVVM program program."] - #[doc = " \\param [out] buffer Compilation/Verification log."] - #[doc = " \\return"] - #[doc = " - \\link ::nvvmResult NVVM_SUCCESS \\endlink"] - #[doc = " - \\link ::nvvmResult NVVM_ERROR_INVALID_PROGRAM \\endlink"] - pub fn nvvmGetProgramLog(prog: nvvmProgram, buffer: *mut ::std::os::raw::c_char) -> nvvmResult; -} -pub type size_t = ::std::os::raw::c_ulonglong; diff --git a/crates/optix-sys/Cargo.toml b/crates/optix-sys/Cargo.toml new file mode 100644 index 00000000..494b09ed --- /dev/null +++ b/crates/optix-sys/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "optix-sys" +version = "0.1.0" +edition = "2024" +license = "MIT OR Apache-2.0" +repository = "https://github.com/Rust-GPU/Rust-CUDA" +readme = "../../README.md" +links = "optix" +build = "build/main.rs" + +[dependencies] +cust_raw = { path = "../cust_raw", default-features = false, features = ["driver"] } + +[build-dependencies] +bindgen = "0.71.1" +cc = "1.0.71" diff --git a/crates/optix-sys/build/main.rs b/crates/optix-sys/build/main.rs new file mode 100644 index 00000000..078618f6 --- /dev/null +++ b/crates/optix-sys/build/main.rs @@ -0,0 +1,112 @@ +use std::env; +use std::path; + +pub mod optix_sdk; + +// OptiX is a bit exotic in how it provides its functions. It uses a function table +// approach, a function table struct holds function pointers to every optix function. Then +// the Optix driver dll is loaded at runtime and the function table is loaded from that. +// OptiX provides this logic inside optix_stubs.h in the include dir, so we need to compile that +// to a lib and link it in so that we have the initialization and C function logic. +fn main() { + let sdk = optix_sdk::OptiXSdk::new().expect("Cannot create OptiX SDK instance."); + let cuda_include_paths = env::var_os("DEP_CUDA_CUDA_INCLUDE") + .map(|s| env::split_paths(s.as_os_str()).collect::>()) + .expect("Cannot find transitive metadata 'cuda_include' from cust_raw package."); + + println!("cargo::rerun-if-changed=build"); + // Emit metadata for the build script. + println!("cargo::metadata=root={}", sdk.optix_root().display()); + println!("cargo::metadata=version={}", sdk.optix_version()); + println!( + "cargo::metadata=version_major={}", + sdk.optix_version_major(), + ); + println!( + "cargo::metadata=version_minor={}", + sdk.optix_version_minor(), + ); + println!( + "cargo::metadata=version_micro={}", + sdk.optix_version_micro(), + ); + let metadata_optix_include = env::join_paths(sdk.optix_include_paths()) + .map(|s| s.to_string_lossy().to_string()) + .expect("Failed to build metadata for include."); + println!("cargo::metadata=include_dir={}", metadata_optix_include); + + // Generate optix bindings. + create_optix_bindings(&sdk, &cuda_include_paths); + cc::Build::new() + .file("build/optix_stubs.c") + .includes(sdk.optix_include_paths()) + .includes(&cuda_include_paths) + .cpp(false) + .compile("optix_stubs"); +} + +fn create_optix_bindings(sdk: &optix_sdk::OptiXSdk, cuda_include_paths: &[path::PathBuf]) { + let outdir = path::PathBuf::from( + env::var("OUT_DIR").expect("OUT_DIR environment variable should be set by cargo."), + ); + + let bindgen_path = path::PathBuf::from(format!("{}/optix_sys.rs", outdir.display())); + let bindings = bindgen::Builder::default() + .header("build/wrapper.h") + .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) + .raw_line("use cust_raw::driver_sys::*;") + .clang_args( + sdk.optix_include_paths() + .iter() + .map(|p| format!("-I{}", p.display())), + ) + .clang_args( + cuda_include_paths + .iter() + .map(|p| format!("-I{}", p.display())), + ) + .allowlist_recursively(false) + .allowlist_type("Optix.*") + .allowlist_type("RaygenRecord") + .allowlist_type("MissRecord") + .allowlist_type("HitgroupRecord") + .allowlist_function("optix.*") + .allowlist_var("OPTIX_VERSION") + .allowlist_var("OptixSbtRecordHeaderSize") + .allowlist_var("OptixSbtRecordAlignment") + .allowlist_var("OptixAccelBufferByteAlignment") + .allowlist_var("OptixInstanceByteAlignment") + .allowlist_var("OptixAabbBufferByteAlignment") + .allowlist_var("OptixGeometryTransformByteAlignment") + .allowlist_var("OptixTransformByteAlignment") + .allowlist_var("OptixVersion") + .allowlist_var("OptixBuildInputSize") + .allowlist_var("OptixShaderBindingTableSize") + .newtype_enum("OptixResult") + .constified_enum_module("OptixCompileOptimizationLevel") + .constified_enum_module("OptixCompileDebugLevel") + .constified_enum_module("OptixTraversableGraphFlags") + .constified_enum_module("OptixExceptionFlags") + .constified_enum_module("OptixProgramGroupKind") + .constified_enum_module("OptixDeviceProperty") + .constified_enum_module("OptixPixelFormat") + .constified_enum_module("OptixDenoiserModelKind") + .rustified_enum("GeometryFlags") + .rustified_enum("OptixGeometryFlags") + .constified_enum("OptixVertexFormat") + .constified_enum("OptixIndicesFormat") + .default_enum_style(bindgen::EnumVariation::Rust { + non_exhaustive: true, + }) + .derive_default(true) + .derive_eq(true) + .derive_hash(true) + .derive_ord(true) + .size_t_is_usize(true) + .layout_tests(true) + .generate() + .expect("Unable to generate OptiX bindings"); + bindings + .write_to_file(bindgen_path.as_path()) + .expect("Cannot write OptiX bindgen output to file."); +} diff --git a/crates/optix-sys/build/optix_sdk.rs b/crates/optix-sys/build/optix_sdk.rs new file mode 100644 index 00000000..bc0cf736 --- /dev/null +++ b/crates/optix-sys/build/optix_sdk.rs @@ -0,0 +1,88 @@ +use std::env; +use std::error; +use std::fs; +use std::path; + +/// Represents the OptiX SDK installation. +#[derive(Debug, Clone)] +pub struct OptiXSdk { + /// The root directory of the OptiX SDK installation. + optix_root: path::PathBuf, + optix_include_paths: Vec, + /// The version of the OptiX SDK, represented as an integer (e.g., 90000 for OptiX 9.0.0). + optix_version: u32, +} + +impl OptiXSdk { + /// Creates a new `OptiXSdk` instance by locating the OptiX SDK installation + /// and parsing its version from the `optix.h` header file. + /// + /// # Errors + /// Returns an error if the OptiX SDK cannot be found, if the version cannot be parsed + /// or cust_raw package does not provide metadata information. + pub fn new() -> Result> { + let optix_root = Self::find_optix_root().ok_or("OptiX SDK cannot be found.")?; + // Retrieve the OptiX VERSION. + let header_path = optix_root.join("include").join("optix.h"); + let header_content = fs::read_to_string(header_path)?; + let optix_version = Self::parse_optix_version(header_content.as_str())?; + // Retrieve the OptiX include paths. + let optix_include_paths = vec![optix_root.join("include")]; + + Ok(Self { + optix_root, + optix_include_paths, + optix_version, + }) + } + + pub fn optix_root(&self) -> &path::Path { + &self.optix_root + } + + pub fn optix_include_paths(&self) -> &[path::PathBuf] { + &self.optix_include_paths + } + + pub fn optix_version(&self) -> u32 { + self.optix_version + } + + pub fn optix_version_major(&self) -> u32 { + self.optix_version / 10000 + } + + pub fn optix_version_minor(&self) -> u32 { + (self.optix_version % 10000) / 100 + } + + pub fn optix_version_micro(&self) -> u32 { + self.optix_version % 100 + } + + fn find_optix_root() -> Option { + // the optix SDK installer sets OPTIX_ROOT_DIR whenever it installs. + // We also check OPTIX_ROOT first in case someone wants to override it without overriding + // the SDK-set variable. + env::var("OPTIX_ROOT") + .ok() + .or_else(|| env::var("OPTIX_ROOT_DIR").ok()) + .map(path::PathBuf::from) + } + + /// Parses the content of the `optix.h` header file to extract the OptiX version. + /// + /// # Errors + /// Returns an error if the `OPTIX_VERSION` definition cannot be found or parsed. + fn parse_optix_version(header_content: &str) -> Result> { + let version = header_content + .lines() + .find(|line| line.contains("#define OPTIX_VERSION")) + .and_then(|line| line.split_whitespace().last()) + .ok_or("Cannot find OPTIX_VERSION from OptiX header file.")?; + let version = version + .parse::() + .map_err(|_| format!("Cannot parse OPTIX_VERSION as u32: '{}'", version))?; + Ok(version) + } +} diff --git a/crates/optix/optix_stubs.c b/crates/optix-sys/build/optix_stubs.c similarity index 100% rename from crates/optix/optix_stubs.c rename to crates/optix-sys/build/optix_stubs.c diff --git a/crates/optix/src/optix_wrapper.h b/crates/optix-sys/build/wrapper.h similarity index 98% rename from crates/optix/src/optix_wrapper.h rename to crates/optix-sys/build/wrapper.h index bd9a4d7a..a55ff0a8 100644 --- a/crates/optix/src/optix_wrapper.h +++ b/crates/optix-sys/build/wrapper.h @@ -21,7 +21,8 @@ static const size_t OptixShaderBindingTableSize = sizeof(OptixShaderBindingTable /** *

*/ -enum GeometryFlags { +enum GeometryFlags +{ None = OPTIX_GEOMETRY_FLAG_NONE, DisableAnyHit = OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT, RequireSingleAnyHitCall = OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL diff --git a/crates/optix-sys/src/lib.rs b/crates/optix-sys/src/lib.rs new file mode 100644 index 00000000..ba504734 --- /dev/null +++ b/crates/optix-sys/src/lib.rs @@ -0,0 +1,10 @@ +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] +#![allow(unsafe_op_in_unsafe_fn)] + +mod optix_sys; +mod stub; + +pub use crate::optix_sys::*; +pub use crate::stub::*; diff --git a/crates/optix-sys/src/optix_sys.rs b/crates/optix-sys/src/optix_sys.rs new file mode 100644 index 00000000..9cbf5a7e --- /dev/null +++ b/crates/optix-sys/src/optix_sys.rs @@ -0,0 +1,5 @@ +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +include!(concat!(env!("OUT_DIR"), "/optix_sys.rs")); diff --git a/crates/optix-sys/src/stub.rs b/crates/optix-sys/src/stub.rs new file mode 100644 index 00000000..a353d4fe --- /dev/null +++ b/crates/optix-sys/src/stub.rs @@ -0,0 +1,5 @@ +use crate::optix_sys::OptixResult; + +unsafe extern "C" { + pub fn optixInit() -> OptixResult; +} diff --git a/crates/optix/Cargo.toml b/crates/optix/Cargo.toml index 92f04a47..639c58f1 100644 --- a/crates/optix/Cargo.toml +++ b/crates/optix/Cargo.toml @@ -7,17 +7,9 @@ repository = "https://github.com/Rust-GPU/Rust-CUDA" readme = "../../README.md" authors = ["Anders Langlands ", "Riccardo D'Ambrosio "] -[features] -optix71 = [] -optix72 = [] -optix73 = [] -default=["optix73", "impl_glam"] -impl_glam=["cust/impl_glam", "glam"] -impl_half=["cust/impl_half", "half"] - [dependencies] cust = { version = "0.3", path = "../cust", features=["impl_mint"] } -cust_raw = { version = "0.11.2", path = "../cust_raw" } +cust_raw = { path = "../cust_raw", features=["driver"] } cfg-if = "1.0.0" bitflags = "2.9.0" glam = { version = "0.29", features=["cuda", "libm"], default-features=false, optional=true } @@ -25,11 +17,12 @@ half = { version = "2.4.1", optional = true } memoffset = "0.9.1" mint = "0.5.9" embed-doc-image = {version = "0.1.4"} +optix-sys = { path = "../optix-sys", default-features = false } -[build-dependencies] -bindgen = "0.71.1" -cc = "1.0.71" -find_cuda_helper = { version = "0.2", path = "../find_cuda_helper" } +[features] +default=["impl_glam"] +impl_glam=["cust/impl_glam", "glam"] +impl_half=["cust/impl_half", "half"] [package.metadata.docs.rs] rustdoc-args = [ "--html-in-header", "katex-header.html" ] diff --git a/crates/optix/build.rs b/crates/optix/build.rs index feb65716..9a77d82b 100644 --- a/crates/optix/build.rs +++ b/crates/optix/build.rs @@ -1,83 +1,24 @@ -use find_cuda_helper::{find_cuda_root, find_optix_root}; -use std::path::{Path, PathBuf}; +use std::env; -// OptiX is a bit exotic in how it provides its functions. It uses a function table -// approach, a function table struct holds function pointers to every optix function. Then -// the Optix driver dll is loaded at runtime and the function table is loaded from that. -// OptiX provides this logic inside optix_stubs.h in the include dir, so we need to compile that -// to a lib and link it in so that we have the initialization and C function logic. fn main() { - let mut optix_include = find_optix_root().expect( - "Unable to find the OptiX SDK, make sure you installed it and - that OPTIX_ROOT or OPTIX_ROOT_DIR are set", - ); - optix_include = optix_include.join("include"); + let optix_version = env::var("DEP_OPTIX_VERSION") + .expect("Cannot find transitive metadata 'version' from optix-sys package.") + .parse::() + .expect("Failed to parse OptiX version"); - let mut cuda_include = find_cuda_root().expect( - "Unable to find the CUDA Toolkit, make sure you installed it and - that CUDA_ROOT, CUDA_PATH or CUDA_TOOLKIT_ROOT_DIR are set", - ); - cuda_include = cuda_include.join("include"); + println!("cargo::rustc-check-cfg=cfg(optix_build_input_instance_array_aabbs)"); + println!("cargo::rustc-check-cfg=cfg(optix_module_compile_options_bound_values)"); + println!("cargo::rustc-check-cfg=cfg(optix_pipeline_compile_options_reserved)"); + println!("cargo::rustc-check-cfg=cfg(optix_program_group_options_reserved)"); - bindgen_optix(&optix_include, &cuda_include); - - println!("cargo:rerun-if-changed=optix_stubs.c"); - cc::Build::new() - .file("./optix_stubs.c") - .include(optix_include) - .include(cuda_include) - .cpp(false) - .compile("optix_stubs"); -} - -fn bindgen_optix(optix_include: &Path, cuda_include: &Path) { - let out_path = PathBuf::from(std::env::var("OUT_DIR").unwrap()).join("optix_wrapper.rs"); - - let bindings = bindgen::Builder::default() - .header("src/optix_wrapper.h") - .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) - .clang_arg(format!("-I{}", optix_include.display())) - .clang_arg(format!("-I{}", cuda_include.display())) - .allowlist_recursively(false) - .allowlist_type("Optix.*") - .allowlist_type("RaygenRecord") - .allowlist_type("MissRecord") - .allowlist_type("HitgroupRecord") - .blocklist_type("OptixBuildInput") - .allowlist_function("optix.*") - .allowlist_var("OptixSbtRecordHeaderSize") - .allowlist_var("OptixSbtRecordAlignment") - .allowlist_var("OptixAccelBufferByteAlignment") - .allowlist_var("OptixInstanceByteAlignment") - .allowlist_var("OptixAabbBufferByteAlignment") - .allowlist_var("OptixGeometryTransformByteAlignment") - .allowlist_var("OptixTransformByteAlignment") - .allowlist_var("OptixVersion") - .allowlist_var("OptixBuildInputSize") - .allowlist_var("OptixShaderBindingTableSize") - .layout_tests(false) - .generate_comments(false) - .newtype_enum("OptixResult") - .constified_enum_module("OptixCompileOptimizationLevel") - .constified_enum_module("OptixCompileDebugLevel") - .constified_enum_module("OptixTraversableGraphFlags") - .constified_enum_module("OptixExceptionFlags") - .constified_enum_module("OptixProgramGroupKind") - .constified_enum_module("OptixDeviceProperty") - .constified_enum_module("OptixPixelFormat") - .constified_enum_module("OptixDenoiserModelKind") - .rustified_enum("GeometryFlags") - .rustified_enum("OptixGeometryFlags") - .constified_enum("OptixVertexFormat") - .constified_enum("OptixIndicesFormat") - .rust_target(bindgen::RustTarget::nightly()) - .derive_default(true) - .derive_partialeq(true) - .formatter(bindgen::Formatter::Rustfmt) - .generate() - .expect("Unable to generate optix bindings"); - - bindings - .write_to_file(out_path) - .expect("Couldn't write bindings!"); + if optix_version < 70200 { + println!("cargo::rustc-cfg=optix_build_input_instance_array_aabbs"); + } + if optix_version >= 70200 { + println!("cargo::rustc-cfg=optix_module_compile_options_bound_values"); + } + if optix_version >= 70300 { + println!("cargo::rustc-cfg=optix_pipeline_compile_options_reserved"); + println!("cargo::rustc-cfg=optix_program_group_options_reserved"); + } } diff --git a/crates/optix/examples/ex02_pipeline/Cargo.toml b/crates/optix/examples/ex02_pipeline/Cargo.toml index a468f0d3..c5ff38a7 100644 --- a/crates/optix/examples/ex02_pipeline/Cargo.toml +++ b/crates/optix/examples/ex02_pipeline/Cargo.toml @@ -6,11 +6,11 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -optix = {path = "../../"} -cust = {path = "../../../cust"} +optix = { path = "../../" } +optix-sys = { path = "../../../optix-sys" } +cust = { path = "../../../cust" } anyhow = "1.0.44" device = { path = "./device" } [build-dependencies] -find_cuda_helper = { version = "0.2", path = "../../../find_cuda_helper" } cuda_builder = { version = "0.3", path = "../../../cuda_builder" } diff --git a/crates/optix/examples/ex02_pipeline/build.rs b/crates/optix/examples/ex02_pipeline/build.rs index b7274457..4e82edf0 100644 --- a/crates/optix/examples/ex02_pipeline/build.rs +++ b/crates/optix/examples/ex02_pipeline/build.rs @@ -1,20 +1,21 @@ +use std::env; +use std::iter; + use cuda_builder::CudaBuilder; -use find_cuda_helper::find_optix_root; fn main() { - let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap(); - - let mut optix_include = find_optix_root().expect( - "Unable to find the OptiX SDK, make sure you installed it and - that OPTIX_ROOT or OPTIX_ROOT_DIR are set", - ); - optix_include = optix_include.join("include"); - - let args = vec![ - format!("-I{}", optix_include.display()), - format!("-I{}/../common/gdt", manifest_dir), - ]; - + println!("cargo::rerun-if-changed=build.rs"); + + let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap(); + let optix_include_paths = env::var_os("DEP_OPTIX_OPTIX_INCLUDE") + .map(|s| env::split_paths(s.as_os_str()).collect::>()) + .expect("Cannot find transitive metadata 'optix_include' from optix-sys package."); + + let args = optix_include_paths + .iter() + .map(|p| format!("-I{}", p.display())) + .chain(iter::once(format!("-I{}/../common/gdt", manifest_dir))) + .collect::>(); compile_to_ptx("src/ex02_pipeline.cu", &args); let ptx_path = std::path::PathBuf::from(std::env::var("OUT_DIR").unwrap()).join("device.ptx"); @@ -28,7 +29,7 @@ fn main() { } fn compile_to_ptx(cu_path: &str, args: &[String]) { - println!("cargo:rerun-if-changed={}", cu_path); + println!("cargo::rerun-if-changed={}", cu_path); let full_path = std::path::PathBuf::from(std::env::var("CARGO_MANIFEST_DIR").unwrap()).join(cu_path); diff --git a/crates/optix/examples/ex03_window/Cargo.toml b/crates/optix/examples/ex03_window/Cargo.toml index c4d1fdf7..089ef9e7 100644 --- a/crates/optix/examples/ex03_window/Cargo.toml +++ b/crates/optix/examples/ex03_window/Cargo.toml @@ -6,12 +6,10 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -optix = {path = "../../"} -cust = {path = "../../../cust"} +optix = { path = "../../" } +optix-sys = { path = "../../../optix-sys" } +cust = { path = "../../../cust" } anyhow = "1.0.44" glfw = "0.42.0" gl = "0.14.0" num-traits = "0.2.14" - -[build-dependencies] -find_cuda_helper = { version = "0.2", path = "../../../find_cuda_helper" } diff --git a/crates/optix/examples/ex03_window/build.rs b/crates/optix/examples/ex03_window/build.rs index 7a7bdade..06122b04 100644 --- a/crates/optix/examples/ex03_window/build.rs +++ b/crates/optix/examples/ex03_window/build.rs @@ -1,19 +1,19 @@ -use find_cuda_helper::find_optix_root; +use std::env; +use std::iter; fn main() { - let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap(); - - let mut optix_include = find_optix_root().expect( - "Unable to find the OptiX SDK, make sure you installed it and - that OPTIX_ROOT or OPTIX_ROOT_DIR are set", - ); - optix_include = optix_include.join("include"); - - let args = vec![ - format!("-I{}", optix_include.display()), - format!("-I{}/../common/gdt", manifest_dir), - ]; - + println!("cargo::rerun-if-changed=build.rs"); + + let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap(); + let optix_include_paths = env::var_os("DEP_OPTIX_OPTIX_INCLUDE") + .map(|s| env::split_paths(s.as_os_str()).collect::>()) + .expect("Cannot find transitive metadata 'optix_include' from optix-sys package."); + + let args = optix_include_paths + .iter() + .map(|p| format!("-I{}", p.display())) + .chain(iter::once(format!("-I{}/../common/gdt", manifest_dir))) + .collect::>(); compile_to_ptx("src/ex03_window.cu", &args); } diff --git a/crates/optix/examples/ex04_mesh/Cargo.toml b/crates/optix/examples/ex04_mesh/Cargo.toml index 190a06b6..a660b198 100644 --- a/crates/optix/examples/ex04_mesh/Cargo.toml +++ b/crates/optix/examples/ex04_mesh/Cargo.toml @@ -15,5 +15,4 @@ num-traits = "0.2.14" glam = { version = "0.29.2", features=["cuda"] } [build-dependencies] -find_cuda_helper = { version = "0.2", path = "../../../find_cuda_helper" } cuda_builder = { version = "0.3", path = "../../../cuda_builder" } diff --git a/crates/optix/src/acceleration.rs b/crates/optix/src/acceleration.rs index 18788e1b..6291bab1 100644 --- a/crates/optix/src/acceleration.rs +++ b/crates/optix/src/acceleration.rs @@ -1,6 +1,6 @@ #![allow(clippy::missing_safety_doc)] -use crate::{const_assert, const_assert_eq, context::DeviceContext, error::Error, optix_call, sys}; +use crate::{const_assert, const_assert_eq, context::DeviceContext, error::Error, optix_call}; use cust::memory::{ CopyDestination, DeviceBox, DeviceBuffer, DeviceCopy, DevicePointer, DeviceSlice, }; @@ -16,18 +16,11 @@ use std::{ marker::PhantomData, }; -use cust_raw::CUdeviceptr; +use cust_raw::driver_sys::CUdeviceptr; use mint::{RowMatrix3x4, Vector3}; -// Kinda nasty hack to work around the fact taht bindgen generates an i32 for enums on windows, -// but a u32 on linux -#[cfg(windows)] -type OptixEnumBaseType = i32; -#[cfg(unix)] -type OptixEnumBaseType = u32; - pub trait BuildInput: std::hash::Hash { - fn to_sys(&self) -> sys::OptixBuildInput; + fn to_sys(&self) -> optix_sys::OptixBuildInput; } pub trait Traversable { @@ -228,7 +221,7 @@ impl Accel { /// If this acceleration structure is copied multiple times, the same /// [`AccelRelocationInfo`] can also be used on all copies. pub fn get_relocation_info(&self, ctx: &DeviceContext) -> Result { - let mut inner = sys::OptixAccelRelocationInfo::default(); + let mut inner = optix_sys::OptixAccelRelocationInfo::default(); unsafe { Ok(optix_call!(optixAccelGetRelocationInfo( ctx.raw, @@ -563,7 +556,7 @@ pub unsafe fn accel_build( emitted_properties: &mut [AccelEmitDesc], ) -> Result { let mut traversable_handle = TraversableHandle { inner: 0 }; - let properties: Vec = + let properties: Vec = emitted_properties.iter_mut().map(|p| p.into()).collect(); let build_sys: Vec<_> = build_inputs.iter().map(|b| b.to_sys()).collect(); @@ -689,24 +682,23 @@ bitflags::bitflags! { /// /// Note that `PREFER_FAST_TRACE` and `PREFER_FAST_BUILD` are mutually exclusive. #[derive(Default, Clone, Copy, PartialEq, Eq, Debug)] - pub struct BuildFlags: OptixEnumBaseType { - const NONE = sys::OptixBuildFlags_OPTIX_BUILD_FLAG_NONE; - const ALLOW_UPDATE = sys::OptixBuildFlags_OPTIX_BUILD_FLAG_ALLOW_UPDATE; - const ALLOW_COMPACTION = sys::OptixBuildFlags_OPTIX_BUILD_FLAG_ALLOW_COMPACTION; - const PREFER_FAST_TRACE = sys::OptixBuildFlags_OPTIX_BUILD_FLAG_PREFER_FAST_TRACE; - const PREFER_FAST_BUILD = sys::OptixBuildFlags_OPTIX_BUILD_FLAG_PREFER_FAST_BUILD; - const ALLOW_RANDOM_VERTEX_ACCESS = sys::OptixBuildFlags_OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS; + pub struct BuildFlags: i32 { + const NONE = optix_sys::OptixBuildFlags::OPTIX_BUILD_FLAG_NONE as i32; + const ALLOW_UPDATE = optix_sys::OptixBuildFlags::OPTIX_BUILD_FLAG_ALLOW_UPDATE as i32; + const ALLOW_COMPACTION = optix_sys::OptixBuildFlags::OPTIX_BUILD_FLAG_ALLOW_COMPACTION as i32; + const PREFER_FAST_TRACE = optix_sys::OptixBuildFlags::OPTIX_BUILD_FLAG_PREFER_FAST_TRACE as i32; + const PREFER_FAST_BUILD = optix_sys::OptixBuildFlags::OPTIX_BUILD_FLAG_PREFER_FAST_BUILD as i32; + const ALLOW_RANDOM_VERTEX_ACCESS = optix_sys::OptixBuildFlags::OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS as i32; } } /// Select which operation to perform with [`accel_build()`]. -#[cfg_attr(windows, repr(i32))] -#[cfg_attr(unix, repr(u32))] +#[repr(i32)] #[derive(Debug, Copy, Clone, PartialEq, Default)] pub enum BuildOperation { #[default] - Build = sys::OptixBuildOperation_OPTIX_BUILD_OPERATION_BUILD, - Update = sys::OptixBuildOperation_OPTIX_BUILD_OPERATION_UPDATE, + Build = optix_sys::OptixBuildOperation::OPTIX_BUILD_OPERATION_BUILD as i32, + Update = optix_sys::OptixBuildOperation::OPTIX_BUILD_OPERATION_UPDATE as i32, } /// Configure how to handle ray times that are outside of the provided motion keys. @@ -723,9 +715,9 @@ pub struct MotionFlags(u16); bitflags::bitflags! { impl MotionFlags: u16 { - const NONE = sys::OptixMotionFlags_OPTIX_MOTION_FLAG_NONE as u16; - const START_VANISH = sys::OptixMotionFlags_OPTIX_MOTION_FLAG_START_VANISH as u16; - const END_VANISH = sys::OptixMotionFlags_OPTIX_MOTION_FLAG_END_VANISH as u16; + const NONE = optix_sys::OptixMotionFlags::OPTIX_MOTION_FLAG_NONE as u16; + const START_VANISH = optix_sys::OptixMotionFlags::OPTIX_MOTION_FLAG_START_VANISH as u16; + const END_VANISH = optix_sys::OptixMotionFlags::OPTIX_MOTION_FLAG_END_VANISH as u16; } } @@ -765,7 +757,7 @@ impl Default for MotionOptions { const_assert_eq!( std::mem::size_of::(), - std::mem::size_of::(), + std::mem::size_of::(), ); /// Options to configure the [`accel_build()`] @@ -836,7 +828,7 @@ impl AccelBuildOptions { #[repr(transparent)] pub struct AccelRelocationInfo { #[allow(dead_code)] - inner: sys::OptixAccelRelocationInfo, + inner: optix_sys::OptixAccelRelocationInfo, } /// Struct used for OptiX to communicate the necessary buffer sizes for accel @@ -889,16 +881,16 @@ impl Aabb { } } -impl From<&mut AccelEmitDesc> for sys::OptixAccelEmitDesc { +impl From<&mut AccelEmitDesc> for optix_sys::OptixAccelEmitDesc { fn from(aed: &mut AccelEmitDesc) -> Self { match aed { AccelEmitDesc::CompactedSize(p) => Self { result: p.as_raw(), - type_: sys::OptixAccelPropertyType_OPTIX_PROPERTY_TYPE_COMPACTED_SIZE, + type_: optix_sys::OptixAccelPropertyType::OPTIX_PROPERTY_TYPE_COMPACTED_SIZE, }, AccelEmitDesc::Aabbs(p) => Self { result: p.as_raw(), - type_: sys::OptixAccelPropertyType_OPTIX_PROPERTY_TYPE_AABBS, + type_: optix_sys::OptixAccelPropertyType::OPTIX_PROPERTY_TYPE_AABBS, }, } } @@ -924,18 +916,18 @@ impl From<&mut AccelEmitDesc> for sys::OptixAccelEmitDesc { #[repr(u32)] #[derive(Copy, Clone, PartialEq, Hash)] pub enum GeometryFlags { - None = sys::OptixGeometryFlags::None as u32, - DisableAnyHit = sys::OptixGeometryFlags::DisableAnyHit as u32, - RequireSingleAnyHitCall = sys::OptixGeometryFlags::RequireSingleAnyHitCall as u32, + None = optix_sys::OptixGeometryFlags::None as u32, + DisableAnyHit = optix_sys::OptixGeometryFlags::DisableAnyHit as u32, + RequireSingleAnyHitCall = optix_sys::OptixGeometryFlags::RequireSingleAnyHitCall as u32, } -impl From for sys::OptixGeometryFlags { +impl From for optix_sys::OptixGeometryFlags { fn from(f: GeometryFlags) -> Self { match f { - GeometryFlags::None => sys::OptixGeometryFlags::None, - GeometryFlags::DisableAnyHit => sys::OptixGeometryFlags::DisableAnyHit, + GeometryFlags::None => optix_sys::OptixGeometryFlags::None, + GeometryFlags::DisableAnyHit => optix_sys::OptixGeometryFlags::DisableAnyHit, GeometryFlags::RequireSingleAnyHitCall => { - sys::OptixGeometryFlags::RequireSingleAnyHitCall + optix_sys::OptixGeometryFlags::RequireSingleAnyHitCall } } } @@ -944,10 +936,10 @@ impl From for sys::OptixGeometryFlags { impl From for u32 { fn from(f: GeometryFlags) -> Self { match f { - GeometryFlags::None => sys::OptixGeometryFlags::None as u32, - GeometryFlags::DisableAnyHit => sys::OptixGeometryFlags::DisableAnyHit as u32, + GeometryFlags::None => optix_sys::OptixGeometryFlags::None as u32, + GeometryFlags::DisableAnyHit => optix_sys::OptixGeometryFlags::DisableAnyHit as u32, GeometryFlags::RequireSingleAnyHitCall => { - sys::OptixGeometryFlags::RequireSingleAnyHitCall as u32 + optix_sys::OptixGeometryFlags::RequireSingleAnyHitCall as u32 } } } @@ -1084,27 +1076,29 @@ impl<'v, 'w, 'i> CurveArray<'v, 'w, 'i> { } impl BuildInput for CurveArray<'_, '_, '_> { - fn to_sys(&self) -> sys::OptixBuildInput { - sys::OptixBuildInput { - type_: sys::OptixBuildInputType_OPTIX_BUILD_INPUT_TYPE_CURVES, - input: sys::OptixBuildInputUnion { - curve_array: std::mem::ManuallyDrop::new(sys::OptixBuildInputCurveArray { - curveType: self.curve_type.into(), - numPrimitives: self.num_primitives, - vertexBuffers: self.d_vertex_buffers.as_ptr() as *const CUdeviceptr, - numVertices: self.num_vertices, - vertexStrideInBytes: self.vertex_stride_in_bytes, - widthBuffers: self.d_width_buffers.as_ptr() as *const CUdeviceptr, - widthStrideInBytes: self.width_stride_in_bytes, - normalBuffers: std::ptr::null(), - normalStrideInBytes: 0, - indexBuffer: self.index_buffer.as_device_ptr().as_raw(), - indexStrideInBytes: self.index_stride_in_bytes, - flag: self.flags as u32, - primitiveIndexOffset: self.primitive_index_offset, - }), - }, - } + fn to_sys(&self) -> optix_sys::OptixBuildInput { + let mut v = optix_sys::OptixBuildInput { + type_: optix_sys::OptixBuildInputType::OPTIX_BUILD_INPUT_TYPE_CURVES, + ..Default::default() + }; + unsafe { + *v.__bindgen_anon_1.curveArray.as_mut() = optix_sys::OptixBuildInputCurveArray { + curveType: self.curve_type.into(), + numPrimitives: self.num_primitives, + vertexBuffers: self.d_vertex_buffers.as_ptr() as *const CUdeviceptr, + numVertices: self.num_vertices, + vertexStrideInBytes: self.vertex_stride_in_bytes, + widthBuffers: self.d_width_buffers.as_ptr() as *const CUdeviceptr, + widthStrideInBytes: self.width_stride_in_bytes, + normalBuffers: std::ptr::null(), + normalStrideInBytes: 0, + indexBuffer: self.index_buffer.as_device_ptr().as_raw(), + indexStrideInBytes: self.index_stride_in_bytes, + flag: self.flags as u32, + primitiveIndexOffset: self.primitive_index_offset, + }; + }; + v } } @@ -1116,51 +1110,50 @@ pub enum CurveType { RoundCubicBSpline, } -impl From for sys::OptixPrimitiveType { +impl From for optix_sys::OptixPrimitiveType { fn from(c: CurveType) -> Self { match c { - CurveType::RoundLinear => sys::OptixPrimitiveType_OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR, + CurveType::RoundLinear => { + optix_sys::OptixPrimitiveType::OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR + } CurveType::RoundQuadraticBSpline => { - sys::OptixPrimitiveType_OPTIX_PRIMITIVE_TYPE_ROUND_QUADRATIC_BSPLINE + optix_sys::OptixPrimitiveType::OPTIX_PRIMITIVE_TYPE_ROUND_QUADRATIC_BSPLINE } CurveType::RoundCubicBSpline => { - sys::OptixPrimitiveType_OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE + optix_sys::OptixPrimitiveType::OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE } } } } /// Specifies the type of vertex data -#[cfg_attr(windows, repr(i32))] -#[cfg_attr(unix, repr(u32))] +#[repr(i32)] #[derive(Copy, Clone, PartialEq)] pub enum VertexFormat { - None = sys::OptixVertexFormat_OPTIX_VERTEX_FORMAT_NONE, - Float3 = sys::OptixVertexFormat_OPTIX_VERTEX_FORMAT_FLOAT3, - Float2 = sys::OptixVertexFormat_OPTIX_VERTEX_FORMAT_FLOAT2, - Half3 = sys::OptixVertexFormat_OPTIX_VERTEX_FORMAT_HALF3, - Half2 = sys::OptixVertexFormat_OPTIX_VERTEX_FORMAT_HALF2, - SNorm16 = sys::OptixVertexFormat_OPTIX_VERTEX_FORMAT_SNORM16_3, - SNorm32 = sys::OptixVertexFormat_OPTIX_VERTEX_FORMAT_SNORM16_2, + None = optix_sys::OptixVertexFormat_OPTIX_VERTEX_FORMAT_NONE as i32, + Float3 = optix_sys::OptixVertexFormat_OPTIX_VERTEX_FORMAT_FLOAT3 as i32, + Float2 = optix_sys::OptixVertexFormat_OPTIX_VERTEX_FORMAT_FLOAT2 as i32, + Half3 = optix_sys::OptixVertexFormat_OPTIX_VERTEX_FORMAT_HALF3 as i32, + Half2 = optix_sys::OptixVertexFormat_OPTIX_VERTEX_FORMAT_HALF2 as i32, + SNorm16 = optix_sys::OptixVertexFormat_OPTIX_VERTEX_FORMAT_SNORM16_3 as i32, + SNorm32 = optix_sys::OptixVertexFormat_OPTIX_VERTEX_FORMAT_SNORM16_2 as i32, } /// Specifies the type of index data -#[cfg_attr(windows, repr(i32))] -#[cfg_attr(unix, repr(u32))] +#[repr(i32)] #[derive(Copy, Clone, PartialEq)] pub enum IndicesFormat { - None = sys::OptixIndicesFormat_OPTIX_INDICES_FORMAT_NONE, - Short3 = sys::OptixIndicesFormat_OPTIX_INDICES_FORMAT_UNSIGNED_SHORT3, - Int3 = sys::OptixIndicesFormat_OPTIX_INDICES_FORMAT_UNSIGNED_INT3, + None = optix_sys::OptixIndicesFormat_OPTIX_INDICES_FORMAT_NONE as i32, + Short3 = optix_sys::OptixIndicesFormat_OPTIX_INDICES_FORMAT_UNSIGNED_SHORT3 as i32, + Int3 = optix_sys::OptixIndicesFormat_OPTIX_INDICES_FORMAT_UNSIGNED_INT3 as i32, } /// Specifies the format of transform data -#[cfg_attr(windows, repr(i32))] -#[cfg_attr(unix, repr(u32))] +#[repr(i32)] #[derive(Copy, Clone, PartialEq)] pub enum TransformFormat { - None = sys::OptixTransformFormat_OPTIX_TRANSFORM_FORMAT_NONE, - MatrixFloat12 = sys::OptixTransformFormat_OPTIX_TRANSFORM_FORMAT_MATRIX_FLOAT12, + None = optix_sys::OptixTransformFormat::OPTIX_TRANSFORM_FORMAT_NONE as i32, + MatrixFloat12 = optix_sys::OptixTransformFormat::OPTIX_TRANSFORM_FORMAT_MATRIX_FLOAT12 as i32, } /// Trait allowing the triangle builds to be generic over the input vertex data. @@ -1304,38 +1297,40 @@ impl Hash for TriangleArray<'_, '_, V> { } impl BuildInput for TriangleArray<'_, '_, V> { - fn to_sys(&self) -> sys::OptixBuildInput { - sys::OptixBuildInput { - type_: sys::OptixBuildInputType_OPTIX_BUILD_INPUT_TYPE_TRIANGLES, - input: sys::OptixBuildInputUnion { - triangle_array: std::mem::ManuallyDrop::new(sys::OptixBuildInputTriangleArray { - vertexBuffers: self.d_vertex_buffers.as_ptr() as *const u64, - numVertices: self.num_vertices, - vertexFormat: V::FORMAT as _, - vertexStrideInBytes: V::STRIDE, - indexBuffer: 0, - numIndexTriplets: 0, - indexFormat: 0, - indexStrideInBytes: 0, - flags: self.geometry_flags.as_ptr() as *const _, - numSbtRecords: 1, - sbtIndexOffsetBuffer: 0, - sbtIndexOffsetSizeInBytes: 0, - sbtIndexOffsetStrideInBytes: 0, - primitiveIndexOffset: 0, - preTransform: if let Some(t) = self.pre_transform { - t.as_raw() - } else { - 0 - }, - transformFormat: if self.pre_transform.is_some() { - sys::OptixTransformFormat_OPTIX_TRANSFORM_FORMAT_MATRIX_FLOAT12 - } else { - sys::OptixTransformFormat_OPTIX_TRANSFORM_FORMAT_NONE - }, - }), - }, - } + fn to_sys(&self) -> optix_sys::OptixBuildInput { + let mut v = optix_sys::OptixBuildInput { + type_: optix_sys::OptixBuildInputType::OPTIX_BUILD_INPUT_TYPE_TRIANGLES, + ..Default::default() + }; + unsafe { + *v.__bindgen_anon_1.triangleArray.as_mut() = optix_sys::OptixBuildInputTriangleArray { + vertexBuffers: self.d_vertex_buffers.as_ptr(), + numVertices: self.num_vertices, + vertexFormat: V::FORMAT as _, + vertexStrideInBytes: V::STRIDE, + indexBuffer: 0, + numIndexTriplets: 0, + indexFormat: 0, + indexStrideInBytes: 0, + flags: self.geometry_flags.as_ptr() as *const _, + numSbtRecords: 1, + sbtIndexOffsetBuffer: 0, + sbtIndexOffsetSizeInBytes: 0, + sbtIndexOffsetStrideInBytes: 0, + primitiveIndexOffset: 0, + preTransform: if let Some(t) = self.pre_transform { + t.as_raw() + } else { + 0 + }, + transformFormat: if self.pre_transform.is_some() { + optix_sys::OptixTransformFormat::OPTIX_TRANSFORM_FORMAT_MATRIX_FLOAT12 + } else { + optix_sys::OptixTransformFormat::OPTIX_TRANSFORM_FORMAT_NONE + }, + }; + }; + v } } @@ -1388,38 +1383,40 @@ impl Hash for IndexedTriangleArray<'_, '_, V, I> { } impl BuildInput for IndexedTriangleArray<'_, '_, V, I> { - fn to_sys(&self) -> sys::OptixBuildInput { - sys::OptixBuildInput { - type_: sys::OptixBuildInputType_OPTIX_BUILD_INPUT_TYPE_TRIANGLES, - input: sys::OptixBuildInputUnion { - triangle_array: std::mem::ManuallyDrop::new(sys::OptixBuildInputTriangleArray { - vertexBuffers: self.d_vertex_buffers.as_ptr() as *const u64, - numVertices: self.num_vertices, - vertexFormat: V::FORMAT as _, - vertexStrideInBytes: V::STRIDE, - indexBuffer: self.index_buffer.as_device_ptr().as_raw(), - numIndexTriplets: self.index_buffer.len() as u32, - indexFormat: I::FORMAT as _, - indexStrideInBytes: I::STRIDE, - flags: self.geometry_flags.as_ptr() as *const _, - numSbtRecords: 1, - sbtIndexOffsetBuffer: 0, - sbtIndexOffsetSizeInBytes: 0, - sbtIndexOffsetStrideInBytes: 0, - primitiveIndexOffset: 0, - preTransform: if let Some(t) = self.pre_transform { - t.as_raw() - } else { - 0 - }, - transformFormat: if self.pre_transform.is_some() { - sys::OptixTransformFormat_OPTIX_TRANSFORM_FORMAT_MATRIX_FLOAT12 - } else { - sys::OptixTransformFormat_OPTIX_TRANSFORM_FORMAT_NONE - }, - }), - }, - } + fn to_sys(&self) -> optix_sys::OptixBuildInput { + let mut v = optix_sys::OptixBuildInput { + type_: optix_sys::OptixBuildInputType::OPTIX_BUILD_INPUT_TYPE_TRIANGLES, + ..Default::default() + }; + unsafe { + *v.__bindgen_anon_1.triangleArray.as_mut() = optix_sys::OptixBuildInputTriangleArray { + vertexBuffers: self.d_vertex_buffers.as_ptr(), + numVertices: self.num_vertices, + vertexFormat: V::FORMAT as _, + vertexStrideInBytes: V::STRIDE, + indexBuffer: self.index_buffer.as_device_ptr().as_raw(), + numIndexTriplets: self.index_buffer.len() as u32, + indexFormat: I::FORMAT as _, + indexStrideInBytes: I::STRIDE, + flags: self.geometry_flags.as_ptr() as *const _, + numSbtRecords: 1, + sbtIndexOffsetBuffer: 0, + sbtIndexOffsetSizeInBytes: 0, + sbtIndexOffsetStrideInBytes: 0, + primitiveIndexOffset: 0, + preTransform: if let Some(t) = self.pre_transform { + t.as_raw() + } else { + 0 + }, + transformFormat: if self.pre_transform.is_some() { + optix_sys::OptixTransformFormat::OPTIX_TRANSFORM_FORMAT_MATRIX_FLOAT12 + } else { + optix_sys::OptixTransformFormat::OPTIX_TRANSFORM_FORMAT_NONE + }, + }; + }; + v } } @@ -1506,31 +1503,32 @@ impl<'a, 's> CustomPrimitiveArray<'a, 's> { } impl BuildInput for CustomPrimitiveArray<'_, '_> { - fn to_sys(&self) -> sys::OptixBuildInput { - sys::OptixBuildInput { - type_: sys::OptixBuildInputType_OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES, - input: sys::OptixBuildInputUnion { - custom_primitive_array: std::mem::ManuallyDrop::new( - sys::OptixBuildInputCustomPrimitiveArray { - aabbBuffers: self.aabb_buffers.as_ptr(), - numPrimitives: self.num_primitives, - strideInBytes: self.stride_in_bytes, - flags: self.flags.as_ptr() as *const u32, - numSbtRecords: self.num_sbt_records, - sbtIndexOffsetBuffer: if let Some(sbt_index_offset_buffer) = - self.sbt_index_offset_buffer - { - sbt_index_offset_buffer.as_device_ptr().as_raw() - } else { - 0 - }, - sbtIndexOffsetSizeInBytes: 4, - sbtIndexOffsetStrideInBytes: self.sbt_index_offset_stride_in_bytes, - primitiveIndexOffset: self.primitive_index_offset, + fn to_sys(&self) -> optix_sys::OptixBuildInput { + let mut v = optix_sys::OptixBuildInput { + type_: optix_sys::OptixBuildInputType::OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES, + ..Default::default() + }; + unsafe { + *v.__bindgen_anon_1.customPrimitiveArray.as_mut() = + optix_sys::OptixBuildInputCustomPrimitiveArray { + aabbBuffers: self.aabb_buffers.as_ptr(), + numPrimitives: self.num_primitives, + strideInBytes: self.stride_in_bytes, + flags: self.flags.as_ptr() as *const u32, + numSbtRecords: self.num_sbt_records, + sbtIndexOffsetBuffer: if let Some(sbt_index_offset_buffer) = + self.sbt_index_offset_buffer + { + sbt_index_offset_buffer.as_device_ptr().as_raw() + } else { + 0 }, - ), - }, - } + sbtIndexOffsetSizeInBytes: 4, + sbtIndexOffsetStrideInBytes: self.sbt_index_offset_stride_in_bytes, + primitiveIndexOffset: self.primitive_index_offset, + }; + }; + v } } @@ -1549,23 +1547,23 @@ pub struct Instance<'a> { const_assert_eq!( std::mem::align_of::(), - sys::OptixInstanceByteAlignment + optix_sys::OptixInstanceByteAlignment ); const_assert_eq!( std::mem::size_of::(), - std::mem::size_of::() + std::mem::size_of::() ); #[derive(DeviceCopy, Clone, Copy, PartialEq, Eq, Debug)] -pub struct InstanceFlags(OptixEnumBaseType); +pub struct InstanceFlags(i32); bitflags::bitflags! { - impl InstanceFlags: OptixEnumBaseType { - const NONE = sys::OptixInstanceFlags_OPTIX_INSTANCE_FLAG_NONE; - const DISABLE_TRIANGLE_FACE_CULLING = sys::OptixInstanceFlags_OPTIX_INSTANCE_FLAG_DISABLE_TRIANGLE_FACE_CULLING; - const FLIP_TRIANGLE_FACING = sys::OptixInstanceFlags_OPTIX_INSTANCE_FLAG_FLIP_TRIANGLE_FACING; - const DISABLE_ANYHIT = sys::OptixInstanceFlags_OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT; - const ENFORCE_ANYHIT = sys::OptixInstanceFlags_OPTIX_INSTANCE_FLAG_ENFORCE_ANYHIT; - const DISABLE_TRANSFORM = sys::OptixInstanceFlags_OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM; + impl InstanceFlags: i32 { + const NONE = optix_sys::OptixInstanceFlags::OPTIX_INSTANCE_FLAG_NONE as i32; + const DISABLE_TRIANGLE_FACE_CULLING = optix_sys::OptixInstanceFlags::OPTIX_INSTANCE_FLAG_DISABLE_TRIANGLE_FACE_CULLING as i32; + const FLIP_TRIANGLE_FACING = optix_sys::OptixInstanceFlags::OPTIX_INSTANCE_FLAG_FLIP_TRIANGLE_FACING as i32; + const DISABLE_ANYHIT = optix_sys::OptixInstanceFlags::OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT as i32; + const ENFORCE_ANYHIT = optix_sys::OptixInstanceFlags::OPTIX_INSTANCE_FLAG_ENFORCE_ANYHIT as i32; + const DISABLE_TRANSFORM = optix_sys::OptixInstanceFlags::OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM as i32; } } @@ -1649,32 +1647,22 @@ impl Hash for InstanceArray<'_, '_> { } impl BuildInput for InstanceArray<'_, '_> { - fn to_sys(&self) -> sys::OptixBuildInput { - cfg_if::cfg_if! { - if #[cfg(any(feature="optix72", feature="optix73"))] { - sys::OptixBuildInput { - type_: sys::OptixBuildInputType_OPTIX_BUILD_INPUT_TYPE_INSTANCES, - input: sys::OptixBuildInputUnion { - instance_array: std::mem::ManuallyDrop::new(sys::OptixBuildInputInstanceArray { - instances: self.instances.as_device_ptr().as_raw(), - numInstances: self.instances.len() as u32, - }) - } - } - } else { - sys::OptixBuildInput { - type_: sys::OptixBuildInputType_OPTIX_BUILD_INPUT_TYPE_INSTANCES, - input: sys::OptixBuildInputUnion { - instance_array: std::mem::ManuallyDrop::new(sys::OptixBuildInputInstanceArray { - instances: self.instances.as_device_ptr(), - numInstances: self.instances.len() as u32, - aabbs: 0, - numAabbs: 0, - }) - } - } - } - } + fn to_sys(&self) -> optix_sys::OptixBuildInput { + let mut v = optix_sys::OptixBuildInput { + type_: optix_sys::OptixBuildInputType::OPTIX_BUILD_INPUT_TYPE_INSTANCES, + ..Default::default() + }; + unsafe { + *v.__bindgen_anon_1.instanceArray.as_mut() = optix_sys::OptixBuildInputInstanceArray { + instances: self.instances.as_device_ptr().as_raw(), + numInstances: self.instances.len() as u32, + #[cfg(optix_build_input_instance_array_aabbs)] + aabbs: 0, + #[cfg(optix_build_input_instance_array_aabbs)] + numAabbs: 0, + }; + }; + v } } @@ -1695,32 +1683,22 @@ impl Hash for InstancePointerArray<'_> { } impl BuildInput for InstancePointerArray<'_> { - fn to_sys(&self) -> sys::OptixBuildInput { - cfg_if::cfg_if! { - if #[cfg(any(feature="optix72", feature="optix73"))] { - sys::OptixBuildInput { - type_: sys::OptixBuildInputType_OPTIX_BUILD_INPUT_TYPE_INSTANCE_POINTERS, - input: sys::OptixBuildInputUnion { - instance_array: std::mem::ManuallyDrop::new(sys::OptixBuildInputInstanceArray { - instances: self.instances.as_device_ptr().as_raw(), - numInstances: self.instances.len() as u32, - }) - } - } - } else { - sys::OptixBuildInput { - type_: sys::OptixBuildInputType_OPTIX_BUILD_INPUT_TYPE_INSTANCE_POINTERS, - input: sys::OptixBuildInputUnion { - instance_array: std::mem::ManuallyDrop::new(sys::OptixBuildInputInstanceArray { - instances: self.instances.as_device_ptr(), - numInstances: self.instances.len() as u32, - aabbs: 0, - numAabbs: 0, - }) - } - } - } - } + fn to_sys(&self) -> optix_sys::OptixBuildInput { + let mut v = optix_sys::OptixBuildInput { + type_: optix_sys::OptixBuildInputType::OPTIX_BUILD_INPUT_TYPE_INSTANCE_POINTERS, + ..Default::default() + }; + unsafe { + *v.__bindgen_anon_1.instanceArray.as_mut() = optix_sys::OptixBuildInputInstanceArray { + instances: self.instances.as_device_ptr().as_raw(), + numInstances: self.instances.len() as u32, + #[cfg(optix_build_input_instance_array_aabbs)] + aabbs: 0, + #[cfg(optix_build_input_instance_array_aabbs)] + numAabbs: 0, + }; + }; + v } } @@ -1728,13 +1706,13 @@ impl BuildInput for InstancePointerArray<'_> { /// ray traversal. #[repr(C)] #[derive(Debug, Copy, Clone)] -pub struct StaticTransformWrapper(sys::OptixStaticTransform); +pub struct StaticTransformWrapper(optix_sys::OptixStaticTransform); unsafe impl DeviceCopy for StaticTransformWrapper {} const_assert_eq!( std::mem::size_of::(), - std::mem::size_of::(), + std::mem::size_of::(), ); /// Stores the device memory and the [`TraversableHandle`] for a [`StaticTransform`] @@ -1755,7 +1733,7 @@ impl StaticTransform { ) -> Result { let transform = (*transform).clone().into(); let inv_transform = (*inv_transform).clone().into(); - let buf = DeviceBox::new(&StaticTransformWrapper(sys::OptixStaticTransform { + let buf = DeviceBox::new(&StaticTransformWrapper(optix_sys::OptixStaticTransform { child: child.handle().inner, transform: transform.into(), invTransform: inv_transform.into(), @@ -1791,7 +1769,7 @@ impl Traversable for StaticTransform { /// A scene graph node holding a child node with a motion transform to be applied /// during ray traversal, represented as SRT Data. /// -/// Stores the device memory and the [`TraversableHandle`] for a [`sys::OptixMatrixMotionTransform`] +/// Stores the device memory and the [`TraversableHandle`] for a [`optix_sys::OptixMatrixMotionTransform`] /// and an arbitrary number of motion keys pub struct MatrixMotionTransform { #[allow(dead_code)] @@ -1823,9 +1801,9 @@ impl MatrixMotionTransform { return Err(Error::TooFewMotionKeys(num_keys)); } - let mmt = sys::OptixMatrixMotionTransform { + let mmt = optix_sys::OptixMatrixMotionTransform { child: child.handle().inner, - motionOptions: sys::OptixMotionOptions { + motionOptions: optix_sys::OptixMotionOptions { numKeys: num_keys as u16, timeBegin: time_begin, timeEnd: time_end, @@ -1834,8 +1812,8 @@ impl MatrixMotionTransform { ..Default::default() }; - let size = - size_of::() + size_of::() * 12 * (num_keys - 2); + let size = size_of::() + + size_of::() * 12 * (num_keys - 2); // copy the transform data unsafe { @@ -1845,7 +1823,7 @@ impl MatrixMotionTransform { // get the offset of the matrix data from the base of the struct let transform_ptr = buf .as_device_ptr() - .add(offset_of!(sys::OptixMatrixMotionTransform, transform)); + .add(offset_of!(optix_sys::OptixMatrixMotionTransform, transform)); // copy the transform data. // Note we're writing 24 bytes of data for the transform field that @@ -1854,7 +1832,7 @@ impl MatrixMotionTransform { cust::memory::memcpy_htod( buf.as_device_ptr().as_raw(), &mmt as *const _ as *const c_void, - size_of::(), + size_of::(), )?; // copy the matrix data @@ -1924,12 +1902,12 @@ impl Traversable for MatrixMotionTransform { /// #[repr(transparent)] #[derive(Copy, Clone, Debug)] -pub struct SrtData(sys::OptixSRTData); +pub struct SrtData(optix_sys::OptixSRTData); unsafe impl DeviceCopy for SrtData {} impl Deref for SrtData { - type Target = sys::OptixSRTData; + type Target = optix_sys::OptixSRTData; fn deref(&self) -> &Self::Target { &self.0 @@ -1939,7 +1917,7 @@ impl Deref for SrtData { /// A scene graph node holding a child node with a motion transform to be applied /// during ray traversal, represented as SRT Data. /// -/// Stores the device memory and the [`TraversableHandle`] for a [`sys::OptixSRTMotionTransform`] +/// Stores the device memory and the [`TraversableHandle`] for a [`optix_sys::OptixSRTMotionTransform`] /// and an arbitrary number of motion keys pub struct SrtMotionTransform { // TODO(RDambrosio016): ask al what this is for :p @@ -1972,9 +1950,9 @@ impl SrtMotionTransform { return Err(Error::TooFewMotionKeys(num_keys)); } - let mmt = sys::OptixSRTMotionTransform { + let mmt = optix_sys::OptixSRTMotionTransform { child: child.handle().inner, - motionOptions: sys::OptixMotionOptions { + motionOptions: optix_sys::OptixMotionOptions { numKeys: num_keys as u16, timeBegin: time_begin, timeEnd: time_end, @@ -1983,7 +1961,7 @@ impl SrtMotionTransform { ..Default::default() }; - let size = size_of::() + let size = size_of::() + size_of::() * size_of::() * (num_keys - 2); // copy the transform data @@ -1994,7 +1972,7 @@ impl SrtMotionTransform { // get the offset of the matrix data from the base of the struct let transform_ptr = buf .as_device_ptr() - .add(offset_of!(sys::OptixSRTMotionTransform, srtData)); + .add(offset_of!(optix_sys::OptixSRTMotionTransform, srtData)); // copy the transform data. // Note we're writing 24 bytes of data for the transform field that @@ -2003,7 +1981,7 @@ impl SrtMotionTransform { cust::memory::memcpy_htod( buf.as_device_ptr().as_raw(), &mmt as *const _ as *const c_void, - size_of::(), + size_of::(), )?; // copy the matrix data @@ -2043,17 +2021,17 @@ pub enum TraversableType { SrtMotionTransform, } -impl From for sys::OptixTraversableType { +impl From for optix_sys::OptixTraversableType { fn from(t: TraversableType) -> Self { match t { TraversableType::StaticTransform => { - sys::OptixTraversableType_OPTIX_TRAVERSABLE_TYPE_STATIC_TRANSFORM + optix_sys::OptixTraversableType::OPTIX_TRAVERSABLE_TYPE_STATIC_TRANSFORM } TraversableType::MatrixMotionTransform => { - sys::OptixTraversableType_OPTIX_TRAVERSABLE_TYPE_MATRIX_MOTION_TRANSFORM + optix_sys::OptixTraversableType::OPTIX_TRAVERSABLE_TYPE_MATRIX_MOTION_TRANSFORM } TraversableType::SrtMotionTransform => { - sys::OptixTraversableType_OPTIX_TRAVERSABLE_TYPE_SRT_MOTION_TRANSFORM + optix_sys::OptixTraversableType::OPTIX_TRAVERSABLE_TYPE_SRT_MOTION_TRANSFORM } } } diff --git a/crates/optix/src/context.rs b/crates/optix/src/context.rs index 486cfff1..7ee1d2a9 100644 --- a/crates/optix/src/context.rs +++ b/crates/optix/src/context.rs @@ -6,7 +6,7 @@ use std::{ use cust::context::ContextHandle; -use crate::{error::Error, optix_call, sys}; +use crate::{error::Error, optix_call}; type Result = std::result::Result; /// A certain property belonging to an OptiX device. @@ -37,18 +37,18 @@ pub enum DeviceProperty { impl DeviceProperty { // we could repr this the same as the sys version, but for better compatability // and safety in the future, we just match. - pub fn to_raw(self) -> sys::OptixDeviceProperty::Type { + pub fn to_raw(self) -> optix_sys::OptixDeviceProperty::Type { use DeviceProperty::*; match self { - MaxTraceDepth => sys::OptixDeviceProperty::OPTIX_DEVICE_PROPERTY_LIMIT_MAX_TRACE_DEPTH, - MaxTraversableGraphDepth => sys::OptixDeviceProperty::OPTIX_DEVICE_PROPERTY_LIMIT_MAX_TRAVERSABLE_GRAPH_DEPTH, - MaxPrimitivesPerGas => sys::OptixDeviceProperty::OPTIX_DEVICE_PROPERTY_LIMIT_MAX_PRIMITIVES_PER_GAS, - MaxInstancesPerIas => sys::OptixDeviceProperty::OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCES_PER_IAS, - RtCoreVersion => sys::OptixDeviceProperty::OPTIX_DEVICE_PROPERTY_RTCORE_VERSION, - MaxInstanceId => sys::OptixDeviceProperty::OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID, - NumBitsInstanceVisibilityMask => sys::OptixDeviceProperty::OPTIX_DEVICE_PROPERTY_LIMIT_NUM_BITS_INSTANCE_VISIBILITY_MASK, - MaxSbtRecordsPerGas => sys::OptixDeviceProperty::OPTIX_DEVICE_PROPERTY_LIMIT_MAX_SBT_RECORDS_PER_GAS, - MaxSbtOffset => sys::OptixDeviceProperty::OPTIX_DEVICE_PROPERTY_LIMIT_MAX_SBT_OFFSET, + MaxTraceDepth => optix_sys::OptixDeviceProperty::OPTIX_DEVICE_PROPERTY_LIMIT_MAX_TRACE_DEPTH, + MaxTraversableGraphDepth => optix_sys::OptixDeviceProperty::OPTIX_DEVICE_PROPERTY_LIMIT_MAX_TRAVERSABLE_GRAPH_DEPTH, + MaxPrimitivesPerGas => optix_sys::OptixDeviceProperty::OPTIX_DEVICE_PROPERTY_LIMIT_MAX_PRIMITIVES_PER_GAS, + MaxInstancesPerIas => optix_sys::OptixDeviceProperty::OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCES_PER_IAS, + RtCoreVersion => optix_sys::OptixDeviceProperty::OPTIX_DEVICE_PROPERTY_RTCORE_VERSION, + MaxInstanceId => optix_sys::OptixDeviceProperty::OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID, + NumBitsInstanceVisibilityMask => optix_sys::OptixDeviceProperty::OPTIX_DEVICE_PROPERTY_LIMIT_NUM_BITS_INSTANCE_VISIBILITY_MASK, + MaxSbtRecordsPerGas => optix_sys::OptixDeviceProperty::OPTIX_DEVICE_PROPERTY_LIMIT_MAX_SBT_RECORDS_PER_GAS, + MaxSbtOffset => optix_sys::OptixDeviceProperty::OPTIX_DEVICE_PROPERTY_LIMIT_MAX_SBT_OFFSET, } } } @@ -56,13 +56,13 @@ impl DeviceProperty { #[derive(Debug)] #[repr(transparent)] pub struct DeviceContext { - pub(crate) raw: sys::OptixDeviceContext, + pub(crate) raw: optix_sys::OptixDeviceContext, } impl Drop for DeviceContext { fn drop(&mut self) { unsafe { - sys::optixDeviceContextDestroy(self.raw); + optix_sys::optixDeviceContextDestroy(self.raw); } } } @@ -79,10 +79,10 @@ impl DeviceContext { pub fn new(cuda_ctx: &impl ContextHandle, enable_validation: bool) -> Result { let mut raw = MaybeUninit::uninit(); - let mut opt = sys::OptixDeviceContextOptions::default(); + let mut opt = optix_sys::OptixDeviceContextOptions::default(); if enable_validation { opt.validationMode = - sys::OptixDeviceContextValidationMode_OPTIX_DEVICE_CONTEXT_VALIDATION_MODE_ALL; + optix_sys::OptixDeviceContextValidationMode::OPTIX_DEVICE_CONTEXT_VALIDATION_MODE_ALL; } unsafe { @@ -258,7 +258,7 @@ impl DeviceContext { } /// Get the FFI context representation - pub fn as_raw(&self) -> sys::OptixDeviceContext { + pub fn as_raw(&self) -> optix_sys::OptixDeviceContext { self.raw } } diff --git a/crates/optix/src/denoiser.rs b/crates/optix/src/denoiser.rs index 3d71d6ff..70246a5e 100644 --- a/crates/optix/src/denoiser.rs +++ b/crates/optix/src/denoiser.rs @@ -12,18 +12,18 @@ use cust::{ prelude::Stream, }; -use crate::{context::DeviceContext, error::Error, optix_call, sys}; +use crate::{context::DeviceContext, error::Error, optix_call}; type Result = std::result::Result; // can't zero initialize, OptixPixelFormat is not zero-initializable. -fn null_optix_image() -> sys::OptixImage2D { - sys::OptixImage2D { +fn null_optix_image() -> optix_sys::OptixImage2D { + optix_sys::OptixImage2D { data: 0, width: 0, height: 0, pixelStrideInBytes: 0, rowStrideInBytes: 0, - format: sys::OptixPixelFormat::OPTIX_PIXEL_FORMAT_FLOAT2, + format: optix_sys::OptixPixelFormat::OPTIX_PIXEL_FORMAT_FLOAT2, } } @@ -43,12 +43,12 @@ pub enum DenoiserModelKind { impl DenoiserModelKind { /// Converts this model kind to its raw counterpart. - pub fn to_raw(self) -> sys::OptixDenoiserModelKind::Type { + pub fn to_raw(self) -> optix_sys::OptixDenoiserModelKind::Type { match self { - Self::Ldr => sys::OptixDenoiserModelKind::OPTIX_DENOISER_MODEL_KIND_LDR, - Self::Hdr => sys::OptixDenoiserModelKind::OPTIX_DENOISER_MODEL_KIND_HDR, - Self::Aov => sys::OptixDenoiserModelKind::OPTIX_DENOISER_MODEL_KIND_AOV, - Self::Temporal => sys::OptixDenoiserModelKind::OPTIX_DENOISER_MODEL_KIND_TEMPORAL, + Self::Ldr => optix_sys::OptixDenoiserModelKind::OPTIX_DENOISER_MODEL_KIND_LDR, + Self::Hdr => optix_sys::OptixDenoiserModelKind::OPTIX_DENOISER_MODEL_KIND_HDR, + Self::Aov => optix_sys::OptixDenoiserModelKind::OPTIX_DENOISER_MODEL_KIND_AOV, + Self::Temporal => optix_sys::OptixDenoiserModelKind::OPTIX_DENOISER_MODEL_KIND_TEMPORAL, } } } @@ -69,8 +69,8 @@ pub struct DenoiserOptions { } impl DenoiserOptions { - pub fn to_raw(self) -> sys::OptixDenoiserOptions { - sys::OptixDenoiserOptions { + pub fn to_raw(self) -> optix_sys::OptixDenoiserOptions { + optix_sys::OptixDenoiserOptions { guideAlbedo: self.guide_albedo as u32, guideNormal: self.guide_normal as u32, } @@ -86,7 +86,7 @@ pub struct DenoiserSizes { } impl DenoiserSizes { - pub fn from_raw(raw: sys::OptixDenoiserSizes) -> Self { + pub fn from_raw(raw: optix_sys::OptixDenoiserSizes) -> Self { Self { state_size_in_bytes: raw.stateSizeInBytes, scratch_size_in_bytes_with_overlap: raw.withOverlapScratchSizeInBytes, @@ -111,7 +111,7 @@ struct InternalDenoiserState { /// High level wrapper for OptiX's GPU-accelerated AI image denoiser. #[derive(Debug)] pub struct Denoiser { - raw: sys::OptixDenoiser, + raw: optix_sys::OptixDenoiser, // retain the options and model kind for sanity-checks when invoking // the denoiser. options: DenoiserOptions, @@ -122,7 +122,7 @@ pub struct Denoiser { impl Drop for Denoiser { fn drop(&mut self) { unsafe { - sys::optixDenoiserDestroy(self.raw); + optix_sys::optixDenoiserDestroy(self.raw); } } } @@ -375,7 +375,7 @@ impl Denoiser { let mut out = input_image.to_raw(); out.data = out_buffer.as_device_ptr().as_raw(); - let layer = sys::OptixDenoiserLayer { + let layer = optix_sys::OptixDenoiserLayer { input: input_image.to_raw(), previousOutput: null_optix_image(), output: out, @@ -421,8 +421,8 @@ pub struct DenoiserParams<'a> { } impl DenoiserParams<'_> { - pub fn to_raw(self) -> sys::OptixDenoiserParams { - sys::OptixDenoiserParams { + pub fn to_raw(self) -> optix_sys::OptixDenoiserParams { + optix_sys::OptixDenoiserParams { denoiseAlpha: self.denoise_alpha as u32, hdrIntensity: self .hdr_intensity @@ -457,8 +457,8 @@ pub struct DenoiserGuideImages<'a> { } impl DenoiserGuideImages<'_> { - pub fn to_raw(self) -> sys::OptixDenoiserGuideLayer { - sys::OptixDenoiserGuideLayer { + pub fn to_raw(self) -> optix_sys::OptixDenoiserGuideLayer { + optix_sys::OptixDenoiserGuideLayer { albedo: self .albedo .map(|i| i.to_raw()) @@ -500,18 +500,18 @@ pub enum ImageFormat { } impl ImageFormat { - pub fn to_raw(self) -> sys::OptixPixelFormat::Type { + pub fn to_raw(self) -> optix_sys::OptixPixelFormat::Type { use ImageFormat::*; match self { - Half2 => sys::OptixPixelFormat::OPTIX_PIXEL_FORMAT_HALF2, - Half3 => sys::OptixPixelFormat::OPTIX_PIXEL_FORMAT_HALF3, - Half4 => sys::OptixPixelFormat::OPTIX_PIXEL_FORMAT_HALF4, - Float2 => sys::OptixPixelFormat::OPTIX_PIXEL_FORMAT_FLOAT2, - Float3 => sys::OptixPixelFormat::OPTIX_PIXEL_FORMAT_FLOAT3, - Float4 => sys::OptixPixelFormat::OPTIX_PIXEL_FORMAT_FLOAT4, - // Uchar3 => sys::OptixPixelFormat::OPTIX_PIXEL_FORMAT_UCHAR3, - // Uchar4 => sys::OptixPixelFormat::OPTIX_PIXEL_FORMAT_UCHAR4, + Half2 => optix_sys::OptixPixelFormat::OPTIX_PIXEL_FORMAT_HALF2, + Half3 => optix_sys::OptixPixelFormat::OPTIX_PIXEL_FORMAT_HALF3, + Half4 => optix_sys::OptixPixelFormat::OPTIX_PIXEL_FORMAT_HALF4, + Float2 => optix_sys::OptixPixelFormat::OPTIX_PIXEL_FORMAT_FLOAT2, + Float3 => optix_sys::OptixPixelFormat::OPTIX_PIXEL_FORMAT_FLOAT3, + Float4 => optix_sys::OptixPixelFormat::OPTIX_PIXEL_FORMAT_FLOAT4, + // Uchar3 => optix_sys::OptixPixelFormat::OPTIX_PIXEL_FORMAT_UCHAR3, + // Uchar4 => optix_sys::OptixPixelFormat::OPTIX_PIXEL_FORMAT_UCHAR4, } } @@ -623,8 +623,8 @@ impl<'a> Image<'a> { self.format.byte_size() } - pub fn to_raw(&self) -> sys::OptixImage2D { - sys::OptixImage2D { + pub fn to_raw(&self) -> optix_sys::OptixImage2D { + optix_sys::OptixImage2D { width: self.width, height: self.height, rowStrideInBytes: self.row_stride_in_bytes(), diff --git a/crates/optix/src/error.rs b/crates/optix/src/error.rs index 96121a33..4ff50bc9 100644 --- a/crates/optix/src/error.rs +++ b/crates/optix/src/error.rs @@ -5,8 +5,6 @@ use std::{ use cust::error::CudaError; -use crate::sys; - /// Any error which may occur when executing an OptiX function. #[non_exhaustive] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] @@ -53,51 +51,60 @@ pub enum OptixError { } impl OptixError { - pub fn to_raw(self) -> sys::OptixResult { + pub fn to_raw(self) -> optix_sys::OptixResult { use OptixError::*; + match self { - InvalidValue => sys::OptixResult::OPTIX_ERROR_INVALID_VALUE, - HostOutOfMemory => sys::OptixResult::OPTIX_ERROR_HOST_OUT_OF_MEMORY, - InvalidOperation => sys::OptixResult::OPTIX_ERROR_INVALID_OPERATION, - FileIoError => sys::OptixResult::OPTIX_ERROR_FILE_IO_ERROR, - InvalidFileFormat => sys::OptixResult::OPTIX_ERROR_INVALID_FILE_FORMAT, - DiskCacheInvalidPath => sys::OptixResult::OPTIX_ERROR_DISK_CACHE_INVALID_PATH, - DiskCachePermissionError => sys::OptixResult::OPTIX_ERROR_DISK_CACHE_PERMISSION_ERROR, - DiskCacheDatabaseError => sys::OptixResult::OPTIX_ERROR_DISK_CACHE_DATABASE_ERROR, - DiskCacheInvalidData => sys::OptixResult::OPTIX_ERROR_DISK_CACHE_INVALID_DATA, - LaunchFailure => sys::OptixResult::OPTIX_ERROR_LAUNCH_FAILURE, - InvalidDeviceContext => sys::OptixResult::OPTIX_ERROR_INVALID_DEVICE_CONTEXT, - CudaNotInitialized => sys::OptixResult::OPTIX_ERROR_CUDA_NOT_INITIALIZED, - ValidationFailure => sys::OptixResult::OPTIX_ERROR_VALIDATION_FAILURE, - InvalidPtx => sys::OptixResult::OPTIX_ERROR_INVALID_PTX, - InvalidLaunchParameter => sys::OptixResult::OPTIX_ERROR_INVALID_LAUNCH_PARAMETER, - InvalidPayloadAccess => sys::OptixResult::OPTIX_ERROR_INVALID_PAYLOAD_ACCESS, - InvalidAttributeAccess => sys::OptixResult::OPTIX_ERROR_INVALID_ATTRIBUTE_ACCESS, - InvalidFunctionUse => sys::OptixResult::OPTIX_ERROR_INVALID_FUNCTION_USE, - InvalidFunctionArguments => sys::OptixResult::OPTIX_ERROR_INVALID_FUNCTION_ARGUMENTS, + InvalidValue => optix_sys::OptixResult::OPTIX_ERROR_INVALID_VALUE, + HostOutOfMemory => optix_sys::OptixResult::OPTIX_ERROR_HOST_OUT_OF_MEMORY, + InvalidOperation => optix_sys::OptixResult::OPTIX_ERROR_INVALID_OPERATION, + FileIoError => optix_sys::OptixResult::OPTIX_ERROR_FILE_IO_ERROR, + InvalidFileFormat => optix_sys::OptixResult::OPTIX_ERROR_INVALID_FILE_FORMAT, + DiskCacheInvalidPath => optix_sys::OptixResult::OPTIX_ERROR_DISK_CACHE_INVALID_PATH, + DiskCachePermissionError => { + optix_sys::OptixResult::OPTIX_ERROR_DISK_CACHE_PERMISSION_ERROR + } + DiskCacheDatabaseError => optix_sys::OptixResult::OPTIX_ERROR_DISK_CACHE_DATABASE_ERROR, + DiskCacheInvalidData => optix_sys::OptixResult::OPTIX_ERROR_DISK_CACHE_INVALID_DATA, + LaunchFailure => optix_sys::OptixResult::OPTIX_ERROR_LAUNCH_FAILURE, + InvalidDeviceContext => optix_sys::OptixResult::OPTIX_ERROR_INVALID_DEVICE_CONTEXT, + CudaNotInitialized => optix_sys::OptixResult::OPTIX_ERROR_CUDA_NOT_INITIALIZED, + ValidationFailure => optix_sys::OptixResult::OPTIX_ERROR_VALIDATION_FAILURE, + InvalidPtx => optix_sys::OptixResult::OPTIX_ERROR_INVALID_PTX, + InvalidLaunchParameter => optix_sys::OptixResult::OPTIX_ERROR_INVALID_LAUNCH_PARAMETER, + InvalidPayloadAccess => optix_sys::OptixResult::OPTIX_ERROR_INVALID_PAYLOAD_ACCESS, + InvalidAttributeAccess => optix_sys::OptixResult::OPTIX_ERROR_INVALID_ATTRIBUTE_ACCESS, + InvalidFunctionUse => optix_sys::OptixResult::OPTIX_ERROR_INVALID_FUNCTION_USE, + InvalidFunctionArguments => { + optix_sys::OptixResult::OPTIX_ERROR_INVALID_FUNCTION_ARGUMENTS + } PipelineOutOfConstantMemory => { - sys::OptixResult::OPTIX_ERROR_PIPELINE_OUT_OF_CONSTANT_MEMORY + optix_sys::OptixResult::OPTIX_ERROR_PIPELINE_OUT_OF_CONSTANT_MEMORY + } + PipelineLinkError => optix_sys::OptixResult::OPTIX_ERROR_PIPELINE_LINK_ERROR, + IllegalDuringTaskExecute => { + optix_sys::OptixResult::OPTIX_ERROR_ILLEGAL_DURING_TASK_EXECUTE + } + InternalCompilerError => optix_sys::OptixResult::OPTIX_ERROR_INTERNAL_COMPILER_ERROR, + DenoiserModelNotSet => optix_sys::OptixResult::OPTIX_ERROR_DENOISER_MODEL_NOT_SET, + DenoiserNotInitialized => optix_sys::OptixResult::OPTIX_ERROR_DENOISER_NOT_INITIALIZED, + AccelNotCompatible => optix_sys::OptixResult::OPTIX_ERROR_ACCEL_NOT_COMPATIBLE, + NotSupported => optix_sys::OptixResult::OPTIX_ERROR_NOT_SUPPORTED, + UnsupportedAbiVersion => optix_sys::OptixResult::OPTIX_ERROR_UNSUPPORTED_ABI_VERSION, + FunctionTableSizeMismatch => { + optix_sys::OptixResult::OPTIX_ERROR_FUNCTION_TABLE_SIZE_MISMATCH } - PipelineLinkError => sys::OptixResult::OPTIX_ERROR_PIPELINE_LINK_ERROR, - IllegalDuringTaskExecute => sys::OptixResult::OPTIX_ERROR_ILLEGAL_DURING_TASK_EXECUTE, - InternalCompilerError => sys::OptixResult::OPTIX_ERROR_INTERNAL_COMPILER_ERROR, - DenoiserModelNotSet => sys::OptixResult::OPTIX_ERROR_DENOISER_MODEL_NOT_SET, - DenoiserNotInitialized => sys::OptixResult::OPTIX_ERROR_DENOISER_NOT_INITIALIZED, - AccelNotCompatible => sys::OptixResult::OPTIX_ERROR_ACCEL_NOT_COMPATIBLE, - NotSupported => sys::OptixResult::OPTIX_ERROR_NOT_SUPPORTED, - UnsupportedAbiVersion => sys::OptixResult::OPTIX_ERROR_UNSUPPORTED_ABI_VERSION, - FunctionTableSizeMismatch => sys::OptixResult::OPTIX_ERROR_FUNCTION_TABLE_SIZE_MISMATCH, InvalidEntryFunctionOptions => { - sys::OptixResult::OPTIX_ERROR_INVALID_ENTRY_FUNCTION_OPTIONS + optix_sys::OptixResult::OPTIX_ERROR_INVALID_ENTRY_FUNCTION_OPTIONS } - LibraryNotFound => sys::OptixResult::OPTIX_ERROR_LIBRARY_NOT_FOUND, - EntrySymbolNotFound => sys::OptixResult::OPTIX_ERROR_ENTRY_SYMBOL_NOT_FOUND, - LibraryUnloadFailure => sys::OptixResult::OPTIX_ERROR_LIBRARY_UNLOAD_FAILURE, - CudaError => sys::OptixResult::OPTIX_ERROR_CUDA_ERROR, - InternalError => sys::OptixResult::OPTIX_ERROR_INTERNAL_ERROR, - Unknown => sys::OptixResult::OPTIX_ERROR_UNKNOWN, + LibraryNotFound => optix_sys::OptixResult::OPTIX_ERROR_LIBRARY_NOT_FOUND, + EntrySymbolNotFound => optix_sys::OptixResult::OPTIX_ERROR_ENTRY_SYMBOL_NOT_FOUND, + LibraryUnloadFailure => optix_sys::OptixResult::OPTIX_ERROR_LIBRARY_UNLOAD_FAILURE, + CudaError => optix_sys::OptixResult::OPTIX_ERROR_CUDA_ERROR, + InternalError => optix_sys::OptixResult::OPTIX_ERROR_INTERNAL_ERROR, + Unknown => optix_sys::OptixResult::OPTIX_ERROR_UNKNOWN, // close enough - OptixNotInitialized => sys::OptixResult::OPTIX_ERROR_CUDA_NOT_INITIALIZED, + OptixNotInitialized => optix_sys::OptixResult::OPTIX_ERROR_CUDA_NOT_INITIALIZED, } } } @@ -122,7 +129,7 @@ impl Display for OptixError { } // optix_stubs special cases this function if optix is not initialized so we dont need to // optix_call this. - let ptr = sys::optixGetErrorString(self.to_raw()); + let ptr = optix_sys::optixGetErrorString(self.to_raw()); let cow = CStr::from_ptr(ptr).to_string_lossy(); f.write_str(cow.as_ref()) } @@ -137,52 +144,60 @@ pub trait ToResult { fn to_result(self) -> Result<(), OptixError>; } -impl ToResult for sys::OptixResult { +impl ToResult for optix_sys::OptixResult { fn to_result(self) -> Result<(), OptixError> { use OptixError::*; Err(match self { - sys::OptixResult::OPTIX_SUCCESS => return Ok(()), - sys::OptixResult::OPTIX_ERROR_INVALID_VALUE => InvalidValue, - sys::OptixResult::OPTIX_ERROR_HOST_OUT_OF_MEMORY => HostOutOfMemory, - sys::OptixResult::OPTIX_ERROR_INVALID_OPERATION => InvalidOperation, - sys::OptixResult::OPTIX_ERROR_FILE_IO_ERROR => FileIoError, - sys::OptixResult::OPTIX_ERROR_INVALID_FILE_FORMAT => InvalidFileFormat, - sys::OptixResult::OPTIX_ERROR_DISK_CACHE_INVALID_PATH => DiskCacheInvalidPath, - sys::OptixResult::OPTIX_ERROR_DISK_CACHE_PERMISSION_ERROR => DiskCachePermissionError, - sys::OptixResult::OPTIX_ERROR_DISK_CACHE_DATABASE_ERROR => DiskCacheDatabaseError, - sys::OptixResult::OPTIX_ERROR_DISK_CACHE_INVALID_DATA => DiskCacheInvalidData, - sys::OptixResult::OPTIX_ERROR_LAUNCH_FAILURE => LaunchFailure, - sys::OptixResult::OPTIX_ERROR_INVALID_DEVICE_CONTEXT => InvalidDeviceContext, - sys::OptixResult::OPTIX_ERROR_CUDA_NOT_INITIALIZED => CudaNotInitialized, - sys::OptixResult::OPTIX_ERROR_VALIDATION_FAILURE => ValidationFailure, - sys::OptixResult::OPTIX_ERROR_INVALID_PTX => InvalidPtx, - sys::OptixResult::OPTIX_ERROR_INVALID_LAUNCH_PARAMETER => InvalidLaunchParameter, - sys::OptixResult::OPTIX_ERROR_INVALID_PAYLOAD_ACCESS => InvalidPayloadAccess, - sys::OptixResult::OPTIX_ERROR_INVALID_ATTRIBUTE_ACCESS => InvalidAttributeAccess, - sys::OptixResult::OPTIX_ERROR_INVALID_FUNCTION_USE => InvalidFunctionUse, - sys::OptixResult::OPTIX_ERROR_INVALID_FUNCTION_ARGUMENTS => InvalidFunctionArguments, - sys::OptixResult::OPTIX_ERROR_PIPELINE_OUT_OF_CONSTANT_MEMORY => { + optix_sys::OptixResult::OPTIX_SUCCESS => return Ok(()), + optix_sys::OptixResult::OPTIX_ERROR_INVALID_VALUE => InvalidValue, + optix_sys::OptixResult::OPTIX_ERROR_HOST_OUT_OF_MEMORY => HostOutOfMemory, + optix_sys::OptixResult::OPTIX_ERROR_INVALID_OPERATION => InvalidOperation, + optix_sys::OptixResult::OPTIX_ERROR_FILE_IO_ERROR => FileIoError, + optix_sys::OptixResult::OPTIX_ERROR_INVALID_FILE_FORMAT => InvalidFileFormat, + optix_sys::OptixResult::OPTIX_ERROR_DISK_CACHE_INVALID_PATH => DiskCacheInvalidPath, + optix_sys::OptixResult::OPTIX_ERROR_DISK_CACHE_PERMISSION_ERROR => { + DiskCachePermissionError + } + optix_sys::OptixResult::OPTIX_ERROR_DISK_CACHE_DATABASE_ERROR => DiskCacheDatabaseError, + optix_sys::OptixResult::OPTIX_ERROR_DISK_CACHE_INVALID_DATA => DiskCacheInvalidData, + optix_sys::OptixResult::OPTIX_ERROR_LAUNCH_FAILURE => LaunchFailure, + optix_sys::OptixResult::OPTIX_ERROR_INVALID_DEVICE_CONTEXT => InvalidDeviceContext, + optix_sys::OptixResult::OPTIX_ERROR_CUDA_NOT_INITIALIZED => CudaNotInitialized, + optix_sys::OptixResult::OPTIX_ERROR_VALIDATION_FAILURE => ValidationFailure, + optix_sys::OptixResult::OPTIX_ERROR_INVALID_PTX => InvalidPtx, + optix_sys::OptixResult::OPTIX_ERROR_INVALID_LAUNCH_PARAMETER => InvalidLaunchParameter, + optix_sys::OptixResult::OPTIX_ERROR_INVALID_PAYLOAD_ACCESS => InvalidPayloadAccess, + optix_sys::OptixResult::OPTIX_ERROR_INVALID_ATTRIBUTE_ACCESS => InvalidAttributeAccess, + optix_sys::OptixResult::OPTIX_ERROR_INVALID_FUNCTION_USE => InvalidFunctionUse, + optix_sys::OptixResult::OPTIX_ERROR_INVALID_FUNCTION_ARGUMENTS => { + InvalidFunctionArguments + } + optix_sys::OptixResult::OPTIX_ERROR_PIPELINE_OUT_OF_CONSTANT_MEMORY => { PipelineOutOfConstantMemory } - sys::OptixResult::OPTIX_ERROR_PIPELINE_LINK_ERROR => PipelineLinkError, - sys::OptixResult::OPTIX_ERROR_ILLEGAL_DURING_TASK_EXECUTE => IllegalDuringTaskExecute, - sys::OptixResult::OPTIX_ERROR_INTERNAL_COMPILER_ERROR => InternalCompilerError, - sys::OptixResult::OPTIX_ERROR_DENOISER_MODEL_NOT_SET => DenoiserModelNotSet, - sys::OptixResult::OPTIX_ERROR_DENOISER_NOT_INITIALIZED => DenoiserNotInitialized, - sys::OptixResult::OPTIX_ERROR_ACCEL_NOT_COMPATIBLE => AccelNotCompatible, - sys::OptixResult::OPTIX_ERROR_NOT_SUPPORTED => NotSupported, - sys::OptixResult::OPTIX_ERROR_UNSUPPORTED_ABI_VERSION => UnsupportedAbiVersion, - sys::OptixResult::OPTIX_ERROR_FUNCTION_TABLE_SIZE_MISMATCH => FunctionTableSizeMismatch, - sys::OptixResult::OPTIX_ERROR_INVALID_ENTRY_FUNCTION_OPTIONS => { + optix_sys::OptixResult::OPTIX_ERROR_PIPELINE_LINK_ERROR => PipelineLinkError, + optix_sys::OptixResult::OPTIX_ERROR_ILLEGAL_DURING_TASK_EXECUTE => { + IllegalDuringTaskExecute + } + optix_sys::OptixResult::OPTIX_ERROR_INTERNAL_COMPILER_ERROR => InternalCompilerError, + optix_sys::OptixResult::OPTIX_ERROR_DENOISER_MODEL_NOT_SET => DenoiserModelNotSet, + optix_sys::OptixResult::OPTIX_ERROR_DENOISER_NOT_INITIALIZED => DenoiserNotInitialized, + optix_sys::OptixResult::OPTIX_ERROR_ACCEL_NOT_COMPATIBLE => AccelNotCompatible, + optix_sys::OptixResult::OPTIX_ERROR_NOT_SUPPORTED => NotSupported, + optix_sys::OptixResult::OPTIX_ERROR_UNSUPPORTED_ABI_VERSION => UnsupportedAbiVersion, + optix_sys::OptixResult::OPTIX_ERROR_FUNCTION_TABLE_SIZE_MISMATCH => { + FunctionTableSizeMismatch + } + optix_sys::OptixResult::OPTIX_ERROR_INVALID_ENTRY_FUNCTION_OPTIONS => { InvalidEntryFunctionOptions } - sys::OptixResult::OPTIX_ERROR_LIBRARY_NOT_FOUND => LibraryNotFound, - sys::OptixResult::OPTIX_ERROR_ENTRY_SYMBOL_NOT_FOUND => EntrySymbolNotFound, - sys::OptixResult::OPTIX_ERROR_LIBRARY_UNLOAD_FAILURE => LibraryUnloadFailure, - sys::OptixResult::OPTIX_ERROR_CUDA_ERROR => CudaError, - sys::OptixResult::OPTIX_ERROR_INTERNAL_ERROR => InternalError, - sys::OptixResult::OPTIX_ERROR_UNKNOWN => Unknown, + optix_sys::OptixResult::OPTIX_ERROR_LIBRARY_NOT_FOUND => LibraryNotFound, + optix_sys::OptixResult::OPTIX_ERROR_ENTRY_SYMBOL_NOT_FOUND => EntrySymbolNotFound, + optix_sys::OptixResult::OPTIX_ERROR_LIBRARY_UNLOAD_FAILURE => LibraryUnloadFailure, + optix_sys::OptixResult::OPTIX_ERROR_CUDA_ERROR => CudaError, + optix_sys::OptixResult::OPTIX_ERROR_INTERNAL_ERROR => InternalError, + optix_sys::OptixResult::OPTIX_ERROR_UNKNOWN => Unknown, value => panic!("Unhandled OptixResult value {:?}", value), }) } diff --git a/crates/optix/src/lib.rs b/crates/optix/src/lib.rs index 9bd5e525..dbeed0fd 100644 --- a/crates/optix/src/lib.rs +++ b/crates/optix/src/lib.rs @@ -65,8 +65,6 @@ pub mod prelude; pub mod shader_binding_table; use shader_binding_table::ShaderBindingTable; -pub mod sys; - pub use cust; use cust::memory::DeviceMemory; use error::{Error, ToResult}; @@ -86,7 +84,7 @@ pub fn init() -> Result<()> { #[cold] #[inline(never)] fn init_cold() -> Result<()> { - unsafe { Ok(sys::optixInit().to_result()?) } + unsafe { Ok(optix_sys::optixInit().to_result()?) } } /// Whether OptiX is initialized. If you are calling raw [`sys`] functions you must make sure @@ -98,11 +96,11 @@ pub fn optix_is_initialized() -> bool { // Option for each field, and None is explicitly defined to be represented as a nullptr for Option, // so its default should be the same as the zero-initialized global. // And, while we do not currently expose it, optix library unloading zero initializes the global. - unsafe { g_optixFunctionTable != sys::OptixFunctionTable::default() } + unsafe { g_optixFunctionTable != optix_sys::OptixFunctionTable::default() } } extern "C" { - pub(crate) static g_optixFunctionTable: sys::OptixFunctionTable; + pub(crate) static g_optixFunctionTable: optix_sys::OptixFunctionTable; } /// Call a raw OptiX sys function, making sure that OptiX is initialized. Returning @@ -114,7 +112,7 @@ macro_rules! optix_call { if !$crate::optix_is_initialized() { Err($crate::error::OptixError::OptixNotInitialized) } else { - <$crate::sys::OptixResult as $crate::error::ToResult>::to_result($crate::sys::$name($($param),*)) + ::to_result(optix_sys::$name($($param),*)) } }}; } diff --git a/crates/optix/src/pipeline.rs b/crates/optix/src/pipeline.rs index 35f49f5d..21c0fcbf 100644 --- a/crates/optix/src/pipeline.rs +++ b/crates/optix/src/pipeline.rs @@ -1,19 +1,12 @@ -use crate::{context::DeviceContext, error::Error, optix_call, sys}; +use crate::{context::DeviceContext, error::Error, optix_call}; type Result = std::result::Result; use std::cmp::min; use std::ffi::{CStr, CString}; -// Kinda nasty hack to work around the fact taht bindgen generates an i32 for enums on windows, -// but a u32 on linux -#[cfg(windows)] -type OptixEnumBaseType = i32; -#[cfg(unix)] -type OptixEnumBaseType = u32; - #[repr(transparent)] pub struct Pipeline { - pub(crate) raw: sys::OptixPipeline, + pub(crate) raw: optix_sys::OptixPipeline, } #[repr(C)] @@ -23,9 +16,9 @@ pub struct PipelineLinkOptions { pub debug_level: CompileDebugLevel, } -impl From for sys::OptixPipelineLinkOptions { +impl From for optix_sys::OptixPipelineLinkOptions { fn from(o: PipelineLinkOptions) -> Self { - sys::OptixPipelineLinkOptions { + optix_sys::OptixPipelineLinkOptions { maxTraceDepth: o.max_trace_depth, debugLevel: o.debug_level as _, } @@ -42,12 +35,12 @@ impl Pipeline { ) -> Result<(Pipeline, String)> { let popt = pipeline_compile_options.build(); - let link_options: sys::OptixPipelineLinkOptions = link_options.into(); + let link_options: optix_sys::OptixPipelineLinkOptions = link_options.into(); let mut log = [0u8; 4096]; let mut log_len = log.len(); - let mut raw: sys::OptixPipeline = std::ptr::null_mut(); + let mut raw: optix_sys::OptixPipeline = std::ptr::null_mut(); let res = unsafe { optix_call!(optixPipelineCreate( @@ -77,7 +70,7 @@ impl Pipeline { impl Drop for Pipeline { fn drop(&mut self) { unsafe { - sys::optixPipelineDestroy(self.raw); + optix_sys::optixPipelineDestroy(self.raw); } } } @@ -129,69 +122,55 @@ impl Pipeline { #[repr(transparent)] pub struct Module { - pub(crate) raw: sys::OptixModule, + pub(crate) raw: optix_sys::OptixModule, } /// Module compilation optimization level -#[cfg_attr(windows, repr(i32))] -#[cfg_attr(unix, repr(u32))] +#[repr(i32)] #[derive(Debug, Hash, PartialEq, Copy, Clone, Default)] pub enum CompileOptimizationLevel { #[default] - Default = sys::OptixCompileOptimizationLevel::OPTIX_COMPILE_OPTIMIZATION_DEFAULT, - Level0 = sys::OptixCompileOptimizationLevel::OPTIX_COMPILE_OPTIMIZATION_LEVEL_0, - Level1 = sys::OptixCompileOptimizationLevel::OPTIX_COMPILE_OPTIMIZATION_LEVEL_1, - Level2 = sys::OptixCompileOptimizationLevel::OPTIX_COMPILE_OPTIMIZATION_LEVEL_2, - Level3 = sys::OptixCompileOptimizationLevel::OPTIX_COMPILE_OPTIMIZATION_LEVEL_3, + Default = optix_sys::OptixCompileOptimizationLevel::OPTIX_COMPILE_OPTIMIZATION_DEFAULT as i32, + Level0 = optix_sys::OptixCompileOptimizationLevel::OPTIX_COMPILE_OPTIMIZATION_LEVEL_0 as i32, + Level1 = optix_sys::OptixCompileOptimizationLevel::OPTIX_COMPILE_OPTIMIZATION_LEVEL_1 as i32, + Level2 = optix_sys::OptixCompileOptimizationLevel::OPTIX_COMPILE_OPTIMIZATION_LEVEL_2 as i32, + Level3 = optix_sys::OptixCompileOptimizationLevel::OPTIX_COMPILE_OPTIMIZATION_LEVEL_3 as i32, } /// Module compilation debug level -#[cfg_attr(windows, repr(i32))] -#[cfg_attr(unix, repr(u32))] +#[repr(i32)] #[derive(Debug, Hash, PartialEq, Copy, Clone, Default)] pub enum CompileDebugLevel { #[default] - None = sys::OptixCompileDebugLevel::OPTIX_COMPILE_DEBUG_LEVEL_NONE, - LineInfo = sys::OptixCompileDebugLevel::OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO, - Full = sys::OptixCompileDebugLevel::OPTIX_COMPILE_DEBUG_LEVEL_FULL, + None = optix_sys::OptixCompileDebugLevel::OPTIX_COMPILE_DEBUG_LEVEL_NONE as i32, + LineInfo = optix_sys::OptixCompileDebugLevel::OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO as i32, + Full = optix_sys::OptixCompileDebugLevel::OPTIX_COMPILE_DEBUG_LEVEL_FULL as i32, } -cfg_if::cfg_if! { - if #[cfg(any(feature="optix72", feature="optix73"))] { - #[repr(C)] - #[derive(Debug, Hash, PartialEq, Copy, Clone)] - pub struct ModuleCompileOptions { - pub max_register_count: i32, - pub opt_level: CompileOptimizationLevel, - pub debug_level: CompileDebugLevel, - } +#[repr(C)] +#[derive(Debug, Hash, PartialEq, Copy, Clone)] +pub struct ModuleCompileOptions { + pub max_register_count: i32, + pub opt_level: CompileOptimizationLevel, + pub debug_level: CompileDebugLevel, +} - impl From<&ModuleCompileOptions> for sys::OptixModuleCompileOptions { - fn from(o: &ModuleCompileOptions) -> sys::OptixModuleCompileOptions { - sys::OptixModuleCompileOptions { +impl From<&ModuleCompileOptions> for optix_sys::OptixModuleCompileOptions { + fn from(o: &ModuleCompileOptions) -> optix_sys::OptixModuleCompileOptions { + cfg_if::cfg_if! { + if #[cfg(optix_module_compile_options_bound_values)] { + optix_sys::OptixModuleCompileOptions { maxRegisterCount: o.max_register_count, optLevel: o.opt_level as _, debugLevel: o.debug_level as _, boundValues: std::ptr::null(), numBoundValues: 0, } - } - } - } else { - #[repr(C)] - #[derive(Debug, Hash, PartialEq, Copy, Clone)] - pub struct ModuleCompileOptions { - pub max_register_count: i32, - pub opt_level: CompileOptimizationLevel, - pub debug_level: CompileDebugLevel, - } - - impl From<&ModuleCompileOptions> for sys::OptixModuleCompileOptions { - fn from(o: &ModuleCompileOptions) -> sys::OptixModuleCompileOptions { - sys::OptixModuleCompileOptions { + } else { + optix_sys::OptixModuleCompileOptions { maxRegisterCount: o.max_register_count, - optLevel: o.opt_level as u32, - debugLevel: o.debug_level as u32, + optLevel: o.opt_level as _, + debugLevel: o.debug_level as _, } } } @@ -200,21 +179,21 @@ cfg_if::cfg_if! { bitflags::bitflags! { #[derive(Default, Hash, Clone, Copy, PartialEq, Eq, Debug)] - pub struct TraversableGraphFlags: OptixEnumBaseType { - const ALLOW_ANY = sys::OptixTraversableGraphFlags::OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY; - const ALLOW_SINGLE_GAS = sys::OptixTraversableGraphFlags::OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_GAS; - const ALLOW_SINGLE_LEVEL_INSTANCING = sys::OptixTraversableGraphFlags::OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_LEVEL_INSTANCING; + pub struct TraversableGraphFlags: i32 { + const ALLOW_ANY = optix_sys::OptixTraversableGraphFlags::OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY as i32; + const ALLOW_SINGLE_GAS = optix_sys::OptixTraversableGraphFlags::OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_GAS as i32; + const ALLOW_SINGLE_LEVEL_INSTANCING = optix_sys::OptixTraversableGraphFlags::OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_LEVEL_INSTANCING as i32; } } bitflags::bitflags! { #[derive(Default, Hash, Clone, Copy, PartialEq, Eq, Debug)] - pub struct ExceptionFlags: OptixEnumBaseType { - const NONE = sys::OptixExceptionFlags::OPTIX_EXCEPTION_FLAG_NONE; - const STACK_OVERFLOW = sys::OptixExceptionFlags::OPTIX_EXCEPTION_FLAG_STACK_OVERFLOW; - const TRACE_DEPTH = sys::OptixExceptionFlags::OPTIX_EXCEPTION_FLAG_TRACE_DEPTH; - const USER = sys::OptixExceptionFlags::OPTIX_EXCEPTION_FLAG_USER; - const DEBUG = sys::OptixExceptionFlags::OPTIX_EXCEPTION_FLAG_DEBUG; + pub struct ExceptionFlags: i32 { + const NONE = optix_sys::OptixExceptionFlags::OPTIX_EXCEPTION_FLAG_NONE as i32; + const STACK_OVERFLOW = optix_sys::OptixExceptionFlags::OPTIX_EXCEPTION_FLAG_STACK_OVERFLOW as i32; + const TRACE_DEPTH = optix_sys::OptixExceptionFlags::OPTIX_EXCEPTION_FLAG_TRACE_DEPTH as i32; + const USER = optix_sys::OptixExceptionFlags::OPTIX_EXCEPTION_FLAG_USER as i32; + const DEBUG = optix_sys::OptixExceptionFlags::OPTIX_EXCEPTION_FLAG_DEBUG as i32; } } @@ -222,22 +201,24 @@ bitflags::bitflags! { #[derive(Default, Hash, Clone, Copy, PartialEq, Eq, Debug)] pub struct PrimitiveTypeFlags: i32 { const DEFAULT = 0; - const CUSTOM = sys::OptixPrimitiveTypeFlags_OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM; - const ROUND_QUADRATIC_BSPLINE = sys::OptixPrimitiveTypeFlags_OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_QUADRATIC_BSPLINE; - const ROUND_CUBIC_BSPLINE = sys::OptixPrimitiveTypeFlags_OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE; - const ROUND_LINEAR = sys::OptixPrimitiveTypeFlags_OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_LINEAR; - const TRIANGLE = sys::OptixPrimitiveTypeFlags_OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE; + const CUSTOM = optix_sys::OptixPrimitiveTypeFlags::OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM as i32; + const ROUND_QUADRATIC_BSPLINE = optix_sys::OptixPrimitiveTypeFlags::OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_QUADRATIC_BSPLINE as i32; + const ROUND_CUBIC_BSPLINE = optix_sys::OptixPrimitiveTypeFlags::OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE as i32; + const ROUND_LINEAR = optix_sys::OptixPrimitiveTypeFlags::OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_LINEAR as i32; + const TRIANGLE = optix_sys::OptixPrimitiveTypeFlags::OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE as i32; } } -#[repr(u32)] +#[repr(i32)] pub enum PrimitiveType { RoundQuadraticBspline = - sys::OptixPrimitiveTypeFlags_OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_QUADRATIC_BSPLINE as u32, + optix_sys::OptixPrimitiveTypeFlags::OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_QUADRATIC_BSPLINE + as i32, RoundCubicBspline = - sys::OptixPrimitiveTypeFlags_OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE as u32, - RoundLinear = sys::OptixPrimitiveTypeFlags_OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_LINEAR as u32, - Triangle = sys::OptixPrimitiveTypeFlags_OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE as u32, + optix_sys::OptixPrimitiveTypeFlags::OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE as i32, + RoundLinear = + optix_sys::OptixPrimitiveTypeFlags::OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_LINEAR as i32, + Triangle = optix_sys::OptixPrimitiveTypeFlags::OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE as i32, } #[derive(Debug, Hash, PartialEq, Clone, Default)] @@ -264,41 +245,25 @@ impl PipelineCompileOptions { } } - pub fn build(&self) -> sys::OptixPipelineCompileOptions { - cfg_if::cfg_if! { - if #[cfg(feature="optix73")] { - sys::OptixPipelineCompileOptions { - usesMotionBlur: if self.uses_motion_blur { 1 } else { 0 }, - traversableGraphFlags: self.traversable_graph_flags.bits() as _, - numPayloadValues: self.num_payload_values, - numAttributeValues: self.num_attribute_values, - exceptionFlags: self.exception_flags.bits() as _, - pipelineLaunchParamsVariableName: if let Some(ref name) = self - .pipeline_launch_params_variable_name { - name.as_ptr() - } else { - std::ptr::null() - }, - usesPrimitiveTypeFlags: self.primitive_type_flags.bits() as u32, - reserved: 0, - reserved2: 0, - } + pub fn build(&self) -> optix_sys::OptixPipelineCompileOptions { + optix_sys::OptixPipelineCompileOptions { + usesMotionBlur: if self.uses_motion_blur { 1 } else { 0 }, + traversableGraphFlags: self.traversable_graph_flags.bits() as _, + numPayloadValues: self.num_payload_values, + numAttributeValues: self.num_attribute_values, + exceptionFlags: self.exception_flags.bits() as _, + pipelineLaunchParamsVariableName: if let Some(ref name) = + self.pipeline_launch_params_variable_name + { + name.as_ptr() } else { - sys::OptixPipelineCompileOptions { - usesMotionBlur: if self.uses_motion_blur { 1 } else { 0 }, - traversableGraphFlags: self.traversable_graph_flags.bits(), - numPayloadValues: self.num_payload_values, - numAttributeValues: self.num_attribute_values, - exceptionFlags: self.exception_flags.bits(), - pipelineLaunchParamsVariableName: if let Some(ref name) = self - .pipeline_launch_params_variable_name { - name.as_ptr() - } else { - std::ptr::null() - }, - usesPrimitiveTypeFlags: self.primitive_type_flags.bits() as u32, - } - } + std::ptr::null() + }, + usesPrimitiveTypeFlags: self.primitive_type_flags.bits() as u32, + #[cfg(optix_pipeline_compile_options_reserved)] + reserved: 0, + #[cfg(optix_pipeline_compile_options_reserved)] + reserved2: 0, } } @@ -386,8 +351,17 @@ impl Module { builtin_is_module_type: PrimitiveType, uses_motion_blur: bool, ) -> Result { - let is_options = sys::OptixBuiltinISOptions { - builtinISModuleType: builtin_is_module_type as _, + use optix_sys::OptixPrimitiveType::*; + + let is_options = optix_sys::OptixBuiltinISOptions { + builtinISModuleType: match builtin_is_module_type { + PrimitiveType::RoundQuadraticBspline => { + OPTIX_PRIMITIVE_TYPE_ROUND_QUADRATIC_BSPLINE + } + PrimitiveType::RoundCubicBspline => OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE, + PrimitiveType::RoundLinear => OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR, + PrimitiveType::Triangle => OPTIX_PRIMITIVE_TYPE_TRIANGLE, + }, usesMotionBlur: if uses_motion_blur { 1 } else { 0 }, }; @@ -410,7 +384,7 @@ impl Module { impl Drop for Module { fn drop(&mut self) { unsafe { - sys::optixModuleDestroy(self.raw); + optix_sys::optixModuleDestroy(self.raw); } } } @@ -513,7 +487,7 @@ impl<'m> ProgramGroupDesc<'m> { /// FIXME (AL): make this sound by storing module lifetimes here #[repr(transparent)] pub struct ProgramGroup { - pub(crate) raw: sys::OptixProgramGroup, + pub(crate) raw: optix_sys::OptixProgramGroup, } impl ProgramGroup { @@ -548,18 +522,18 @@ impl ProgramGroup { ctx: &mut DeviceContext, desc: &[ProgramGroupDesc], ) -> Result<(Vec, String)> { - cfg_if::cfg_if! { - if #[cfg(any(feature="optix73"))] { - let pg_options = sys::OptixProgramGroupOptions { reserved: 0 }; - } else { - let pg_options = sys::OptixProgramGroupOptions { placeholder: 0 }; - } - } + let pg_options = optix_sys::OptixProgramGroupOptions { + #[cfg(optix_program_group_options_reserved)] + reserved: 0, + #[cfg(not(optix_program_group_options_reserved))] + placeholder: 0, + }; let mut log = [0u8; 4096]; let mut log_len = log.len(); - let pg_desc: Vec = desc.iter().map(|d| d.into()).collect(); + let pg_desc: Vec = + desc.iter().map(|d| d.into()).collect(); let mut raws = vec![std::ptr::null_mut(); pg_desc.len()]; @@ -594,18 +568,17 @@ impl ProgramGroup { ctx: &mut DeviceContext, desc: &ProgramGroupDesc, ) -> Result<(ProgramGroup, String)> { - cfg_if::cfg_if! { - if #[cfg(any(feature="optix73"))] { - let pg_options = sys::OptixProgramGroupOptions { reserved: 0 }; - } else { - let pg_options = sys::OptixProgramGroupOptions { placeholder: 0 }; - } - } + let pg_options = optix_sys::OptixProgramGroupOptions { + #[cfg(optix_program_group_options_reserved)] + reserved: 0, + #[cfg(not(optix_program_group_options_reserved))] + placeholder: 0, + }; let mut log = [0u8; 4096]; let mut log_len = log.len(); - let pg_desc: sys::OptixProgramGroupDesc = desc.into(); + let pg_desc: optix_sys::OptixProgramGroupDesc = desc.into(); let mut raw = std::ptr::null_mut(); @@ -678,21 +651,21 @@ impl ProgramGroup { impl Drop for ProgramGroup { fn drop(&mut self) { unsafe { - sys::optixProgramGroupDestroy(self.raw); + optix_sys::optixProgramGroupDestroy(self.raw); } } } -impl<'m> From<&ProgramGroupDesc<'m>> for sys::OptixProgramGroupDesc { - fn from(desc: &ProgramGroupDesc<'m>) -> sys::OptixProgramGroupDesc { +impl<'m> From<&ProgramGroupDesc<'m>> for optix_sys::OptixProgramGroupDesc { + fn from(desc: &ProgramGroupDesc<'m>) -> optix_sys::OptixProgramGroupDesc { match &desc { ProgramGroupDesc::Raygen(ProgramGroupModule { module, entry_function_name, - }) => sys::OptixProgramGroupDesc { - kind: sys::OptixProgramGroupKind::OPTIX_PROGRAM_GROUP_KIND_RAYGEN, - __bindgen_anon_1: sys::OptixProgramGroupDesc__bindgen_ty_1 { - raygen: sys::OptixProgramGroupSingleModule { + }) => optix_sys::OptixProgramGroupDesc { + kind: optix_sys::OptixProgramGroupKind::OPTIX_PROGRAM_GROUP_KIND_RAYGEN, + __bindgen_anon_1: optix_sys::OptixProgramGroupDesc__bindgen_ty_1 { + raygen: optix_sys::OptixProgramGroupSingleModule { module: module.raw, entryFunctionName: entry_function_name.as_ptr(), }, @@ -702,10 +675,10 @@ impl<'m> From<&ProgramGroupDesc<'m>> for sys::OptixProgramGroupDesc { ProgramGroupDesc::Miss(ProgramGroupModule { module, entry_function_name, - }) => sys::OptixProgramGroupDesc { - kind: sys::OptixProgramGroupKind::OPTIX_PROGRAM_GROUP_KIND_MISS, - __bindgen_anon_1: sys::OptixProgramGroupDesc__bindgen_ty_1 { - miss: sys::OptixProgramGroupSingleModule { + }) => optix_sys::OptixProgramGroupDesc { + kind: optix_sys::OptixProgramGroupKind::OPTIX_PROGRAM_GROUP_KIND_MISS, + __bindgen_anon_1: optix_sys::OptixProgramGroupDesc__bindgen_ty_1 { + miss: optix_sys::OptixProgramGroupSingleModule { module: module.raw, entryFunctionName: entry_function_name.as_ptr(), }, @@ -715,10 +688,10 @@ impl<'m> From<&ProgramGroupDesc<'m>> for sys::OptixProgramGroupDesc { ProgramGroupDesc::Exception(ProgramGroupModule { module, entry_function_name, - }) => sys::OptixProgramGroupDesc { - kind: sys::OptixProgramGroupKind::OPTIX_PROGRAM_GROUP_KIND_EXCEPTION, - __bindgen_anon_1: sys::OptixProgramGroupDesc__bindgen_ty_1 { - miss: sys::OptixProgramGroupSingleModule { + }) => optix_sys::OptixProgramGroupDesc { + kind: optix_sys::OptixProgramGroupKind::OPTIX_PROGRAM_GROUP_KIND_EXCEPTION, + __bindgen_anon_1: optix_sys::OptixProgramGroupDesc__bindgen_ty_1 { + miss: optix_sys::OptixProgramGroupSingleModule { module: module.raw, entryFunctionName: entry_function_name.as_ptr(), }, @@ -751,10 +724,10 @@ impl<'m> From<&ProgramGroupDesc<'m>> for sys::OptixProgramGroupDesc { std::ptr::null_mut() }; - sys::OptixProgramGroupDesc { - kind: sys::OptixProgramGroupKind::OPTIX_PROGRAM_GROUP_KIND_HITGROUP, - __bindgen_anon_1: sys::OptixProgramGroupDesc__bindgen_ty_1 { - hitgroup: sys::OptixProgramGroupHitgroup { + optix_sys::OptixProgramGroupDesc { + kind: optix_sys::OptixProgramGroupKind::OPTIX_PROGRAM_GROUP_KIND_HITGROUP, + __bindgen_anon_1: optix_sys::OptixProgramGroupDesc__bindgen_ty_1 { + hitgroup: optix_sys::OptixProgramGroupHitgroup { moduleCH: module_ch, entryFunctionNameCH: efn_ch_ptr, moduleAH: module_ah, @@ -779,10 +752,10 @@ impl<'m> From<&ProgramGroupDesc<'m>> for sys::OptixProgramGroupDesc { (std::ptr::null_mut(), std::ptr::null()) }; - sys::OptixProgramGroupDesc { - kind: sys::OptixProgramGroupKind::OPTIX_PROGRAM_GROUP_KIND_CALLABLES, - __bindgen_anon_1: sys::OptixProgramGroupDesc__bindgen_ty_1 { - callables: sys::OptixProgramGroupCallables { + optix_sys::OptixProgramGroupDesc { + kind: optix_sys::OptixProgramGroupKind::OPTIX_PROGRAM_GROUP_KIND_CALLABLES, + __bindgen_anon_1: optix_sys::OptixProgramGroupDesc__bindgen_ty_1 { + callables: optix_sys::OptixProgramGroupCallables { moduleDC: module_dc, entryFunctionNameDC: efn_dc, moduleCC: module_cc, diff --git a/crates/optix/src/shader_binding_table.rs b/crates/optix/src/shader_binding_table.rs index f0edd95a..2adf9991 100644 --- a/crates/optix/src/shader_binding_table.rs +++ b/crates/optix/src/shader_binding_table.rs @@ -1,9 +1,22 @@ use crate::{const_assert, const_assert_eq}; -use crate::{error::Error, optix_call, pipeline::ProgramGroup, sys}; +use crate::{error::Error, optix_call, pipeline::ProgramGroup}; use cust::memory::{DeviceCopy, DeviceSlice}; type Result = std::result::Result; +// The SBT record header is an opaque blob used by optix +#[repr(C)] +#[derive(Default, Clone, Copy)] +pub struct SbtRecordHeader { + header: [u8; optix_sys::OptixSbtRecordHeaderSize as usize], +} + +impl SbtRecordHeader { + pub fn as_mut_ptr(&mut self) -> *mut std::os::raw::c_void { + self.header.as_mut_ptr() as *mut std::os::raw::c_void + } +} + #[repr(C)] #[repr(align(16))] #[derive(Copy, Clone)] @@ -11,7 +24,7 @@ pub struct SbtRecord where T: Copy, { - header: sys::SbtRecordHeader, + header: SbtRecordHeader, data: T, } @@ -23,7 +36,7 @@ where { pub fn pack(data: T, program_group: &ProgramGroup) -> Result> { let mut rec = SbtRecord { - header: sys::SbtRecordHeader::default(), + header: SbtRecordHeader::default(), data, }; @@ -40,12 +53,12 @@ where unsafe impl DeviceCopy for SbtRecord {} #[repr(transparent)] -pub struct ShaderBindingTable(pub(crate) sys::OptixShaderBindingTable); +pub struct ShaderBindingTable(pub(crate) optix_sys::OptixShaderBindingTable); impl ShaderBindingTable { pub fn new(buf_raygen_record: &DeviceSlice>) -> Self { let raygen_record = buf_raygen_record.as_device_ptr().as_raw(); - ShaderBindingTable(sys::OptixShaderBindingTable { + ShaderBindingTable(optix_sys::OptixShaderBindingTable { raygenRecord: raygen_record, exceptionRecord: 0, missRecordBase: 0, @@ -110,9 +123,9 @@ impl ShaderBindingTable { const_assert_eq!( std::mem::align_of::(), - std::mem::align_of::(), + std::mem::align_of::(), ); const_assert_eq!( std::mem::size_of::(), - std::mem::size_of::() + std::mem::size_of::() ); diff --git a/crates/optix/src/sys.rs b/crates/optix/src/sys.rs deleted file mode 100644 index 7243625a..00000000 --- a/crates/optix/src/sys.rs +++ /dev/null @@ -1,59 +0,0 @@ -#![allow(warnings)] - -use cust_raw::*; - -use std::mem::ManuallyDrop; - -type size_t = usize; - -include!(concat!(env!("OUT_DIR"), "/optix_wrapper.rs")); - -extern "C" { - pub fn optixInit() -> OptixResult; -} - -// The SBT record header is an opaque blob used by optix -#[repr(C)] -#[derive(Default, Clone, Copy)] -pub struct SbtRecordHeader { - header: [u8; OptixSbtRecordHeaderSize as usize], -} - -impl SbtRecordHeader { - pub fn as_mut_ptr(&mut self) -> *mut std::os::raw::c_void { - self.header.as_mut_ptr() as *mut std::os::raw::c_void - } -} - -// Manually define the build input union as the bindgen is pretty nasty -#[repr(C)] -pub union OptixBuildInputUnion { - pub triangle_array: ManuallyDrop, - pub curve_array: ManuallyDrop, - pub custom_primitive_array: ManuallyDrop, - pub instance_array: ManuallyDrop, - pad: [std::os::raw::c_char; 1024], -} - -impl Default for OptixBuildInputUnion { - fn default() -> OptixBuildInputUnion { - OptixBuildInputUnion { pad: [0i8; 1024] } - } -} - -#[repr(C)] -pub struct OptixBuildInput { - pub type_: OptixBuildInputType, - pub input: OptixBuildInputUnion, -} - -// Sanity check that the size of this union we're defining matches the one in -// optix header so we don't get any nasty surprises -fn _size_check() { - unsafe { - std::mem::transmute::(OptixBuildInput { - type_: OptixBuildInputType_OPTIX_BUILD_INPUT_TYPE_TRIANGLES, - input: { OptixBuildInputUnion { pad: [0; 1024] } }, - }); - } -} diff --git a/crates/ptx_compiler/Cargo.toml b/crates/ptx_compiler/Cargo.toml index b6902bcb..0785596a 100644 --- a/crates/ptx_compiler/Cargo.toml +++ b/crates/ptx_compiler/Cargo.toml @@ -7,5 +7,5 @@ description = "High level bindings to CUDA's ptx compilation APIs" repository = "https://github.com/Rust-GPU/Rust-CUDA" readme = "../../README.md" -[build-dependencies] -find_cuda_helper = { path = "../find_cuda_helper", version = "0.2" } +[dependencies] +cust_raw = { path = "../cust_raw", default-features = false, features = ["nvptx-compiler"] } diff --git a/crates/ptx_compiler/build.rs b/crates/ptx_compiler/build.rs deleted file mode 100644 index a6c47b89..00000000 --- a/crates/ptx_compiler/build.rs +++ /dev/null @@ -1,3 +0,0 @@ -fn main() { - find_cuda_helper::include_cuda(); -} diff --git a/crates/ptx_compiler/src/lib.rs b/crates/ptx_compiler/src/lib.rs index d2b41fa8..25881295 100644 --- a/crates/ptx_compiler/src/lib.rs +++ b/crates/ptx_compiler/src/lib.rs @@ -3,35 +3,27 @@ use std::mem::MaybeUninit; -#[allow(warnings)] -pub mod sys; +use cust_raw::nvptx_compiler_sys; trait ToResult { fn to_result(self) -> Result<(), NvptxError>; } -impl ToResult for sys::nvPTXCompileResult { +impl ToResult for nvptx_compiler_sys::nvPTXCompileResult { fn to_result(self) -> Result<(), NvptxError> { + use cust_raw::nvptx_compiler_sys::nvPTXCompileResult::*; match self { - sys::nvPTXCompileResult_NVPTXCOMPILE_SUCCESS => Ok(()), - sys::nvPTXCompileResult_NVPTXCOMPILE_ERROR_INVALID_INPUT => { - Err(NvptxError::InvalidInput) - } - sys::nvPTXCompileResult_NVPTXCOMPILE_ERROR_COMPILATION_FAILURE => { - Err(NvptxError::CompilationFailure) - } - sys::nvPTXCompileResult_NVPTXCOMPILE_ERROR_INTERNAL => Err(NvptxError::Internal), - sys::nvPTXCompileResult_NVPTXCOMPILE_ERROR_OUT_OF_MEMORY => { - Err(NvptxError::OutOfMemory) - } - sys::nvPTXCompileResult_NVPTXCOMPILE_ERROR_UNSUPPORTED_PTX_VERSION => { - Err(NvptxError::UnsupportedPtxVersion) - } + NVPTXCOMPILE_SUCCESS => Ok(()), + NVPTXCOMPILE_ERROR_INVALID_INPUT => Err(NvptxError::InvalidInput), + NVPTXCOMPILE_ERROR_COMPILATION_FAILURE => Err(NvptxError::CompilationFailure), + NVPTXCOMPILE_ERROR_INTERNAL => Err(NvptxError::Internal), + NVPTXCOMPILE_ERROR_OUT_OF_MEMORY => Err(NvptxError::OutOfMemory), + NVPTXCOMPILE_ERROR_UNSUPPORTED_PTX_VERSION => Err(NvptxError::UnsupportedPtxVersion), // these two are statically prevented so they should never happen - sys::nvPTXCompileResult_NVPTXCOMPILE_ERROR_COMPILER_INVOCATION_INCOMPLETE => { + NVPTXCOMPILE_ERROR_COMPILER_INVOCATION_INCOMPLETE => { unreachable!("nvptx yielded an incomplete invocation error") } - sys::nvPTXCompileResult_NVPTXCOMPILE_ERROR_INVALID_COMPILER_HANDLE => { + NVPTXCOMPILE_ERROR_INVALID_COMPILER_HANDLE => { unreachable!("nvptx yielded an invalid handle err") } _ => unreachable!(), @@ -53,7 +45,7 @@ pub enum NvptxError { #[repr(transparent)] #[derive(Debug)] pub struct NvptxCompiler { - raw: sys::nvPTXCompilerHandle, + raw: nvptx_compiler_sys::nvPTXCompilerHandle, } impl NvptxCompiler { @@ -63,8 +55,12 @@ impl NvptxCompiler { let mut raw = MaybeUninit::uninit(); unsafe { - sys::nvPTXCompilerCreate(raw.as_mut_ptr(), ptx.len() as u64, ptx.as_ptr().cast()) - .to_result()?; + nvptx_compiler_sys::nvPTXCompilerCreate( + raw.as_mut_ptr(), + ptx.len(), + ptx.as_ptr().cast(), + ) + .to_result()?; let raw = raw.assume_init(); Ok(Self { raw }) } @@ -74,7 +70,7 @@ impl NvptxCompiler { impl Drop for NvptxCompiler { fn drop(&mut self) { unsafe { - sys::nvPTXCompilerDestroy(&mut self.raw as *mut _) + nvptx_compiler_sys::nvPTXCompilerDestroy(&mut self.raw as *mut _) .to_result() .expect("failed to destroy nvptx compiler"); } @@ -84,13 +80,13 @@ impl Drop for NvptxCompiler { #[derive(Debug)] pub struct CompilerFailure { pub error: NvptxError, - handle: sys::nvPTXCompilerHandle, + handle: nvptx_compiler_sys::nvPTXCompilerHandle, } impl Drop for CompilerFailure { fn drop(&mut self) { unsafe { - sys::nvPTXCompilerDestroy(&mut self.handle as *mut _) + nvptx_compiler_sys::nvPTXCompilerDestroy(&mut self.handle as *mut _) .to_result() .expect("failed to destroy nvptx compiler failure"); } @@ -101,10 +97,12 @@ impl CompilerFailure { pub fn error_log(&self) -> NvptxResult { let mut size = MaybeUninit::uninit(); unsafe { - sys::nvPTXCompilerGetErrorLogSize(self.handle, size.as_mut_ptr()).to_result()?; - let size = size.assume_init() as usize; + nvptx_compiler_sys::nvPTXCompilerGetErrorLogSize(self.handle, size.as_mut_ptr()) + .to_result()?; + let size = size.assume_init(); let mut vec = Vec::with_capacity(size); - sys::nvPTXCompilerGetErrorLog(self.handle, vec.as_mut_ptr() as *mut i8).to_result()?; + nvptx_compiler_sys::nvPTXCompilerGetErrorLog(self.handle, vec.as_mut_ptr() as *mut i8) + .to_result()?; vec.set_len(size); Ok(String::from_utf8_lossy(&vec).to_string()) } @@ -115,13 +113,13 @@ impl CompilerFailure { #[derive(Debug)] pub struct CompiledProgram { pub cubin: Vec, - handle: sys::nvPTXCompilerHandle, + handle: nvptx_compiler_sys::nvPTXCompilerHandle, } impl Drop for CompiledProgram { fn drop(&mut self) { unsafe { - sys::nvPTXCompilerDestroy(&mut self.handle as *mut _) + nvptx_compiler_sys::nvPTXCompilerDestroy(&mut self.handle as *mut _) .to_result() .expect("failed to destroy nvptx compiled program"); } @@ -132,10 +130,12 @@ impl CompiledProgram { pub fn info_log(&self) -> NvptxResult { let mut size = MaybeUninit::uninit(); unsafe { - sys::nvPTXCompilerGetInfoLogSize(self.handle, size.as_mut_ptr()).to_result()?; - let size = size.assume_init() as usize; + nvptx_compiler_sys::nvPTXCompilerGetInfoLogSize(self.handle, size.as_mut_ptr()) + .to_result()?; + let size = size.assume_init(); let mut vec = Vec::with_capacity(size); - sys::nvPTXCompilerGetInfoLog(self.handle, vec.as_mut_ptr() as *mut i8).to_result()?; + nvptx_compiler_sys::nvPTXCompilerGetInfoLog(self.handle, vec.as_mut_ptr() as *mut i8) + .to_result()?; vec.set_len(size); Ok(String::from_utf8_lossy(&vec).to_string()) } diff --git a/crates/ptx_compiler/src/sys.rs b/crates/ptx_compiler/src/sys.rs deleted file mode 100644 index 88e7dd02..00000000 --- a/crates/ptx_compiler/src/sys.rs +++ /dev/null @@ -1,236 +0,0 @@ -/* automatically generated by rust-bindgen 0.58.1 */ - -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct nvPTXCompiler { - _unused: [u8; 0], -} -#[doc = " \\ingroup handle"] -#[doc = " \\brief nvPTXCompilerHandle represents a handle to the PTX Compiler."] -#[doc = ""] -#[doc = " To compile a PTX program string, an instance of nvPTXCompiler"] -#[doc = " must be created and the handle to it must be obtained using the"] -#[doc = " API nvPTXCompilerCreate(). Then the compilation can be done"] -#[doc = " using the API nvPTXCompilerCompile()."] -#[doc = ""] -pub type nvPTXCompilerHandle = *mut nvPTXCompiler; -pub const nvPTXCompileResult_NVPTXCOMPILE_SUCCESS: nvPTXCompileResult = 0; -pub const nvPTXCompileResult_NVPTXCOMPILE_ERROR_INVALID_COMPILER_HANDLE: nvPTXCompileResult = 1; -pub const nvPTXCompileResult_NVPTXCOMPILE_ERROR_INVALID_INPUT: nvPTXCompileResult = 2; -pub const nvPTXCompileResult_NVPTXCOMPILE_ERROR_COMPILATION_FAILURE: nvPTXCompileResult = 3; -pub const nvPTXCompileResult_NVPTXCOMPILE_ERROR_INTERNAL: nvPTXCompileResult = 4; -pub const nvPTXCompileResult_NVPTXCOMPILE_ERROR_OUT_OF_MEMORY: nvPTXCompileResult = 5; -pub const nvPTXCompileResult_NVPTXCOMPILE_ERROR_COMPILER_INVOCATION_INCOMPLETE: nvPTXCompileResult = - 6; -pub const nvPTXCompileResult_NVPTXCOMPILE_ERROR_UNSUPPORTED_PTX_VERSION: nvPTXCompileResult = 7; -#[doc = " \\ingroup error"] -#[doc = ""] -#[doc = " \\brief The nvPTXCompiler APIs return the nvPTXCompileResult codes to indicate the call result"] -pub type nvPTXCompileResult = ::std::os::raw::c_int; -extern "C" { - #[doc = " \\ingroup versioning"] - #[doc = ""] - #[doc = " \\brief Queries the current \\p major and \\p minor version of"] - #[doc = " PTX Compiler APIs being used"] - #[doc = ""] - #[doc = " \\param [out] major Major version of the PTX Compiler APIs"] - #[doc = " \\param [out] minor Minor version of the PTX Compiler APIs"] - #[doc = " \\note The version of PTX Compiler APIs follows the CUDA Toolkit versioning."] - #[doc = " The PTX ISA version supported by a PTX Compiler API version is listed"] - #[doc = " here."] - #[doc = ""] - #[doc = " \\return"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_SUCCESS \\endlink"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_ERROR_INTERNAL \\endlink"] - pub fn nvPTXCompilerGetVersion( - major: *mut ::std::os::raw::c_uint, - minor: *mut ::std::os::raw::c_uint, - ) -> nvPTXCompileResult; -} -extern "C" { - #[doc = " \\ingroup compilation"] - #[doc = ""] - #[doc = " \\brief Obtains the handle to an instance of the PTX compiler"] - #[doc = " initialized with the given PTX program \\p ptxCode"] - #[doc = ""] - #[doc = " \\param [out] compiler Returns a handle to PTX compiler initialized"] - #[doc = " with the PTX program \\p ptxCode"] - #[doc = " \\param [in] ptxCodeLen Size of the PTX program \\p ptxCode passed as string"] - #[doc = " \\param [in] ptxCode The PTX program which is to be compiled passed as string."] - #[doc = ""] - #[doc = ""] - #[doc = " \\return"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_SUCCESS \\endlink"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_ERROR_OUT_OF_MEMORY \\endlink"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_ERROR_INTERNAL \\endlink"] - pub fn nvPTXCompilerCreate( - compiler: *mut nvPTXCompilerHandle, - ptxCodeLen: size_t, - ptxCode: *const ::std::os::raw::c_char, - ) -> nvPTXCompileResult; -} -extern "C" { - #[doc = " \\ingroup compilation"] - #[doc = ""] - #[doc = " \\brief Destroys and cleans the already created PTX compiler"] - #[doc = ""] - #[doc = " \\param [in] compiler A handle to the PTX compiler which is to be destroyed"] - #[doc = ""] - #[doc = " \\return"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_SUCCESS \\endlink"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_ERROR_OUT_OF_MEMORY \\endlink"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_ERROR_INTERNAL \\endlink"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_ERROR_INVALID_PROGRAM_HANDLE \\endlink"] - #[doc = ""] - pub fn nvPTXCompilerDestroy(compiler: *mut nvPTXCompilerHandle) -> nvPTXCompileResult; -} -extern "C" { - #[doc = " \\ingroup compilation"] - #[doc = ""] - #[doc = " \\brief Compile a PTX program with the given compiler options"] - #[doc = ""] - #[doc = " \\param [in,out] compiler A handle to PTX compiler initialized with the"] - #[doc = " PTX program which is to be compiled."] - #[doc = " The compiled program can be accessed using the handle"] - #[doc = " \\param [in] numCompileOptions Length of the array \\p compileOptions"] - #[doc = " \\param [in] compileOptions Compiler options with which compilation should be done."] - #[doc = " The compiler options string is a null terminated character array."] - #[doc = " A valid list of compiler options is at"] - #[doc = " link."] - #[doc = " \\note --gpu-name (-arch) is a mandatory option."] - #[doc = ""] - #[doc = " \\return"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_SUCCESS \\endlink"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_ERROR_OUT_OF_MEMORY \\endlink"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_ERROR_INTERNAL \\endlink"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_ERROR_INVALID_PROGRAM_HANDLE \\endlink"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_ERROR_COMPILATION_FAILURE \\endlink"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_ERROR_UNSUPPORTED_PTX_VERSION \\endlink"] - #[doc = ""] - pub fn nvPTXCompilerCompile( - compiler: nvPTXCompilerHandle, - numCompileOptions: ::std::os::raw::c_int, - compileOptions: *const *const ::std::os::raw::c_char, - ) -> nvPTXCompileResult; -} -extern "C" { - #[doc = " \\ingroup compilation"] - #[doc = ""] - #[doc = " \\brief Obtains the size of the image of the compiled program"] - #[doc = ""] - #[doc = " \\param [in] compiler A handle to PTX compiler on which nvPTXCompilerCompile() has been performed."] - #[doc = " \\param [out] binaryImageSize The size of the image of the compiled program"] - #[doc = ""] - #[doc = " \\return"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_SUCCESS \\endlink"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_ERROR_INTERNAL \\endlink"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_ERROR_INVALID_PROGRAM_HANDLE \\endlink"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_ERROR_COMPILER_INVOCATION_INCOMPLETE \\endlink"] - #[doc = ""] - #[doc = " \\note nvPTXCompilerCompile() API should be invoked for the handle before calling this API."] - #[doc = " Otherwise, NVPTXCOMPILE_ERROR_COMPILER_INVOCATION_INCOMPLETE is returned."] - pub fn nvPTXCompilerGetCompiledProgramSize( - compiler: nvPTXCompilerHandle, - binaryImageSize: *mut size_t, - ) -> nvPTXCompileResult; -} -extern "C" { - #[doc = " \\ingroup compilation"] - #[doc = ""] - #[doc = " \\brief Obtains the image of the compiled program"] - #[doc = ""] - #[doc = " \\param [in] compiler A handle to PTX compiler on which nvPTXCompilerCompile() has been performed."] - #[doc = " \\param [out] binaryImage The image of the compiled program."] - #[doc = " Client should allocate memory for \\p binaryImage"] - #[doc = ""] - #[doc = " \\return"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_SUCCESS \\endlink"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_ERROR_INTERNAL \\endlink"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_ERROR_INVALID_PROGRAM_HANDLE \\endlink"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_ERROR_COMPILER_INVOCATION_INCOMPLETE \\endlink"] - #[doc = ""] - #[doc = " \\note nvPTXCompilerCompile() API should be invoked for the handle before calling this API."] - #[doc = " Otherwise, NVPTXCOMPILE_ERROR_COMPILER_INVOCATION_INCOMPLETE is returned."] - #[doc = ""] - pub fn nvPTXCompilerGetCompiledProgram( - compiler: nvPTXCompilerHandle, - binaryImage: *mut ::std::os::raw::c_void, - ) -> nvPTXCompileResult; -} -extern "C" { - #[doc = " \\ingroup compilation"] - #[doc = ""] - #[doc = " \\brief Query the size of the error message that was seen previously for the handle"] - #[doc = ""] - #[doc = " \\param [in] compiler A handle to PTX compiler on which nvPTXCompilerCompile() has been performed."] - #[doc = " \\param [out] errorLogSize The size of the error log in bytes which was produced"] - #[doc = " in previous call to nvPTXCompilerCompiler()."] - #[doc = ""] - #[doc = " \\return"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_SUCCESS \\endlink"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_ERROR_INTERNAL \\endlink"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_ERROR_INVALID_PROGRAM_HANDLE \\endlink"] - #[doc = ""] - pub fn nvPTXCompilerGetErrorLogSize( - compiler: nvPTXCompilerHandle, - errorLogSize: *mut size_t, - ) -> nvPTXCompileResult; -} -extern "C" { - #[doc = " \\ingroup compilation"] - #[doc = ""] - #[doc = " \\brief Query the error message that was seen previously for the handle"] - #[doc = ""] - #[doc = " \\param [in] compiler A handle to PTX compiler on which nvPTXCompilerCompile() has been performed."] - #[doc = " \\param [out] errorLog The error log which was produced in previous call to nvPTXCompilerCompiler()."] - #[doc = " Clients should allocate memory for \\p errorLog"] - #[doc = ""] - #[doc = " \\return"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_SUCCESS \\endlink"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_ERROR_INTERNAL \\endlink"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_ERROR_INVALID_PROGRAM_HANDLE \\endlink"] - #[doc = ""] - pub fn nvPTXCompilerGetErrorLog( - compiler: nvPTXCompilerHandle, - errorLog: *mut ::std::os::raw::c_char, - ) -> nvPTXCompileResult; -} -extern "C" { - #[doc = " \\ingroup compilation"] - #[doc = ""] - #[doc = " \\brief Query the size of the information message that was seen previously for the handle"] - #[doc = ""] - #[doc = " \\param [in] compiler A handle to PTX compiler on which nvPTXCompilerCompile() has been performed."] - #[doc = " \\param [out] infoLogSize The size of the information log in bytes which was produced"] - #[doc = " in previous call to nvPTXCompilerCompiler()."] - #[doc = ""] - #[doc = " \\return"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_SUCCESS \\endlink"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_ERROR_INTERNAL \\endlink"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_ERROR_INVALID_PROGRAM_HANDLE \\endlink"] - #[doc = ""] - pub fn nvPTXCompilerGetInfoLogSize( - compiler: nvPTXCompilerHandle, - infoLogSize: *mut size_t, - ) -> nvPTXCompileResult; -} -extern "C" { - #[doc = " \\ingroup compilation"] - #[doc = ""] - #[doc = " \\brief Query the information message that was seen previously for the handle"] - #[doc = ""] - #[doc = " \\param [in] compiler A handle to PTX compiler on which nvPTXCompilerCompile() has been performed."] - #[doc = " \\param [out] infoLog The information log which was produced in previous call to nvPTXCompilerCompiler()."] - #[doc = " Clients should allocate memory for \\p infoLog"] - #[doc = ""] - #[doc = " \\return"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_SUCCESS \\endlink"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_ERROR_INTERNAL \\endlink"] - #[doc = " - \\link #nvPTXCompileResult NVPTXCOMPILE_ERROR_INVALID_PROGRAM_HANDLE \\endlink"] - #[doc = ""] - pub fn nvPTXCompilerGetInfoLog( - compiler: nvPTXCompilerHandle, - infoLog: *mut ::std::os::raw::c_char, - ) -> nvPTXCompileResult; -} -pub type size_t = ::std::os::raw::c_ulonglong; diff --git a/crates/rustc_codegen_nvvm/Cargo.toml b/crates/rustc_codegen_nvvm/Cargo.toml index 054a0927..e09bb407 100644 --- a/crates/rustc_codegen_nvvm/Cargo.toml +++ b/crates/rustc_codegen_nvvm/Cargo.toml @@ -15,6 +15,7 @@ readme = "../../README.md" crate-type = ["dylib"] [dependencies] +cust_raw = { path = "../cust_raw" } nvvm = { version = "0.1", path = "../nvvm" } rustc-demangle = "0.1.24" libc = "0.2.169" @@ -26,7 +27,6 @@ bitflags = "2.8.0" # by `rustc_codegen_ssa` via its `thorin-dwp` dependency. gimli = "0.30" tracing = { version = "0.1.41", features = ["release_max_level_debug"] } -find_cuda_helper = { version = "0.2", path = "../find_cuda_helper" } tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } rustc_codegen_nvvm_macros = { version = "0.1", path = "../rustc_codegen_nvvm_macros" } smallvec = { version = "1.14.0", features = ["union", "may_dangle"] } diff --git a/crates/rustc_codegen_nvvm/src/nvvm.rs b/crates/rustc_codegen_nvvm/src/nvvm.rs index 88f5e8ef..358b5449 100644 --- a/crates/rustc_codegen_nvvm/src/nvvm.rs +++ b/crates/rustc_codegen_nvvm/src/nvvm.rs @@ -6,15 +6,13 @@ use crate::common::AsCCharPtr; use crate::context::CodegenArgs; use crate::llvm::*; use crate::lto::ThinBuffer; -use find_cuda_helper::find_cuda_root; +use cust_raw::nvvm_sys; use nvvm::*; use rustc_codegen_ssa::traits::ThinBufferMethods; use rustc_session::{Session, config::DebugInfo}; -use std::ffi::OsStr; use std::fmt::Display; use std::marker::PhantomData; -use std::path::Path; -use std::{fs, ptr}; +use std::ptr; use tracing::debug; // see libintrinsics.ll on what this is. @@ -107,18 +105,7 @@ pub fn codegen_bitcode_modules( let buf = ThinBuffer::new(module); prog.add_module(buf.data(), "merged".to_string())?; - - let libdevice = if let Some(bc) = find_libdevice() { - bc - } else { - // i would put a more helpful error here, but to actually use the codegen - // it needs to find libnvvm before this, and libdevice is in the nvvm directory - // so if it can find libnvvm there is almost no way it can't find libdevice. - sess.dcx() - .fatal("Could not find the libdevice library (libdevice.10.bc) in the CUDA directory") - }; - - prog.add_lazy_module(&libdevice, "libdevice".to_string())?; + prog.add_lazy_module(nvvm_sys::LIBDEVICE_BITCODE, "libdevice".to_string())?; prog.add_lazy_module(LIBINTRINSICS, "libintrinsics".to_string())?; // for now, while the codegen is young, we always run verification on the program. @@ -151,21 +138,6 @@ pub fn codegen_bitcode_modules( Ok(res) } -/// Find the libdevice bitcode library which contains math intrinsics and is -/// linked when building the nvvm program. -pub fn find_libdevice() -> Option> { - if let Some(base_path) = find_cuda_root() { - let libdevice_file = fs::read_dir(Path::new(&base_path).join("nvvm").join("libdevice")) - .ok()? - .filter_map(Result::ok) - .find(|f| f.path().extension() == Some(OsStr::new("bc")))? - .path(); - fs::read(libdevice_file).ok() - } else { - None - } -} - unsafe fn cleanup_dicompileunit(module: &Module) { unsafe { let mut cu1 = ptr::null_mut();