Skip to content

Commit 89909f3

Browse files
authored
bump dlpack header to 1.1 (#667)
1 parent 8a49923 commit 89909f3

File tree

1 file changed

+38
-4
lines changed
  • cuda_core/cuda/core/experimental

1 file changed

+38
-4
lines changed

cuda_core/cuda/core/experimental/dlpack.h

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
#define DLPACK_MAJOR_VERSION 1
2020

2121
/*! \brief The current minor version of dlpack */
22-
#define DLPACK_MINOR_VERSION 0
22+
#define DLPACK_MINOR_VERSION 1
2323

2424
/*! \brief DLPACK_DLL prefix for windows */
2525
#ifdef _WIN32
@@ -157,6 +157,26 @@ typedef enum {
157157
kDLComplex = 5U,
158158
/*! \brief boolean */
159159
kDLBool = 6U,
160+
/*! \brief FP8 data types */
161+
kDLFloat8_e3m4 = 7U,
162+
kDLFloat8_e4m3 = 8U,
163+
kDLFloat8_e4m3b11fnuz = 9U,
164+
kDLFloat8_e4m3fn = 10U,
165+
kDLFloat8_e4m3fnuz = 11U,
166+
kDLFloat8_e5m2 = 12U,
167+
kDLFloat8_e5m2fnuz = 13U,
168+
kDLFloat8_e8m0fnu = 14U,
169+
/*! \brief FP6 data types
170+
* Setting bits != 6 is currently unspecified, and the producer must ensure it is set
171+
* while the consumer must stop importing if the value is unexpected.
172+
*/
173+
kDLFloat6_e2m3fn = 15U,
174+
kDLFloat6_e3m2fn = 16U,
175+
/*! \brief FP4 data types
176+
* Setting bits != 4 is currently unspecified, and the producer must ensure it is set
177+
* while the consumer must stop importing if the value is unexpected.
178+
*/
179+
kDLFloat4_e2m1fn = 17U,
160180
} DLDataTypeCode;
161181

162182
/*!
@@ -170,6 +190,12 @@ typedef enum {
170190
* - int8: type_code = 0, bits = 8, lanes = 1
171191
* - std::complex<float>: type_code = 5, bits = 64, lanes = 1
172192
* - bool: type_code = 6, bits = 8, lanes = 1 (as per common array library convention, the underlying storage size of bool is 8 bits)
193+
* - float8_e4m3: type_code = 8, bits = 8, lanes = 1 (packed in memory)
194+
* - float6_e3m2fn: type_code = 16, bits = 6, lanes = 1 (packed in memory)
195+
* - float4_e2m1fn: type_code = 17, bits = 4, lanes = 1 (packed in memory)
196+
*
197+
* When a sub-byte type is packed, DLPack requires the data to be in little bit-endian, i.e.,
198+
* for a packed data set D ((D >> (i * bits)) && bit_mask) stores the i-th element.
173199
*/
174200
typedef struct {
175201
/*!
@@ -196,8 +222,8 @@ typedef struct {
196222
* types. This pointer is always aligned to 256 bytes as in CUDA. The
197223
* `byte_offset` field should be used to point to the beginning of the data.
198224
*
199-
* Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow,
200-
* TVM, perhaps others) do not adhere to this 256 byte aligment requirement
225+
* Note that as of Nov 2021, multiple libraries (CuPy, PyTorch, TensorFlow,
226+
* TVM, perhaps others) do not adhere to this 256 byte alignment requirement
201227
* on CPU/CUDA/ROCm, and always use `byte_offset=0`. This must be fixed
202228
* (after which this note will be updated); at the moment it is recommended
203229
* to not rely on the data pointer being correctly aligned.
@@ -267,7 +293,7 @@ typedef struct DLManagedTensor {
267293
void (*deleter)(struct DLManagedTensor * self);
268294
} DLManagedTensor;
269295

270-
// bit masks used in in the DLManagedTensorVersioned
296+
// bit masks used in the DLManagedTensorVersioned
271297

272298
/*! \brief bit mask to indicate that the tensor is read only. */
273299
#define DLPACK_FLAG_BITMASK_READ_ONLY (1UL << 0UL)
@@ -280,6 +306,14 @@ typedef struct DLManagedTensor {
280306
*/
281307
#define DLPACK_FLAG_BITMASK_IS_COPIED (1UL << 1UL)
282308

309+
/*!
310+
* \brief bit mask to indicate that whether a sub-byte type is packed or padded.
311+
*
312+
* The default for sub-byte types (ex: fp4/fp6) is assumed packed. This flag can
313+
* be set by the producer to signal that a tensor of sub-byte type is padded.
314+
*/
315+
#define DLPACK_FLAG_BITMASK_IS_SUBBYTE_TYPE_PADDED (1UL << 2UL)
316+
283317
/*!
284318
* \brief A versioned and managed C Tensor object, manage memory of DLTensor.
285319
*

0 commit comments

Comments
 (0)