19
19
#define DLPACK_MAJOR_VERSION 1
20
20
21
21
/* ! \brief The current minor version of dlpack */
22
- #define DLPACK_MINOR_VERSION 0
22
+ #define DLPACK_MINOR_VERSION 1
23
23
24
24
/* ! \brief DLPACK_DLL prefix for windows */
25
25
#ifdef _WIN32
@@ -157,6 +157,26 @@ typedef enum {
157
157
kDLComplex = 5U ,
158
158
/* ! \brief boolean */
159
159
kDLBool = 6U ,
160
+ /* ! \brief FP8 data types */
161
+ kDLFloat8_e3m4 = 7U ,
162
+ kDLFloat8_e4m3 = 8U ,
163
+ kDLFloat8_e4m3b11fnuz = 9U ,
164
+ kDLFloat8_e4m3fn = 10U ,
165
+ kDLFloat8_e4m3fnuz = 11U ,
166
+ kDLFloat8_e5m2 = 12U ,
167
+ kDLFloat8_e5m2fnuz = 13U ,
168
+ kDLFloat8_e8m0fnu = 14U ,
169
+ /* ! \brief FP6 data types
170
+ * Setting bits != 6 is currently unspecified, and the producer must ensure it is set
171
+ * while the consumer must stop importing if the value is unexpected.
172
+ */
173
+ kDLFloat6_e2m3fn = 15U ,
174
+ kDLFloat6_e3m2fn = 16U ,
175
+ /* ! \brief FP4 data types
176
+ * Setting bits != 4 is currently unspecified, and the producer must ensure it is set
177
+ * while the consumer must stop importing if the value is unexpected.
178
+ */
179
+ kDLFloat4_e2m1fn = 17U ,
160
180
} DLDataTypeCode;
161
181
162
182
/* !
@@ -170,6 +190,12 @@ typedef enum {
170
190
* - int8: type_code = 0, bits = 8, lanes = 1
171
191
* - std::complex<float>: type_code = 5, bits = 64, lanes = 1
172
192
* - bool: type_code = 6, bits = 8, lanes = 1 (as per common array library convention, the underlying storage size of bool is 8 bits)
193
+ * - float8_e4m3: type_code = 8, bits = 8, lanes = 1 (packed in memory)
194
+ * - float6_e3m2fn: type_code = 16, bits = 6, lanes = 1 (packed in memory)
195
+ * - float4_e2m1fn: type_code = 17, bits = 4, lanes = 1 (packed in memory)
196
+ *
197
+ * When a sub-byte type is packed, DLPack requires the data to be in little bit-endian, i.e.,
198
+ * for a packed data set D ((D >> (i * bits)) && bit_mask) stores the i-th element.
173
199
*/
174
200
typedef struct {
175
201
/* !
@@ -196,8 +222,8 @@ typedef struct {
196
222
* types. This pointer is always aligned to 256 bytes as in CUDA. The
197
223
* `byte_offset` field should be used to point to the beginning of the data.
198
224
*
199
- * Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow,
200
- * TVM, perhaps others) do not adhere to this 256 byte aligment requirement
225
+ * Note that as of Nov 2021, multiple libraries (CuPy, PyTorch, TensorFlow,
226
+ * TVM, perhaps others) do not adhere to this 256 byte alignment requirement
201
227
* on CPU/CUDA/ROCm, and always use `byte_offset=0`. This must be fixed
202
228
* (after which this note will be updated); at the moment it is recommended
203
229
* to not rely on the data pointer being correctly aligned.
@@ -267,7 +293,7 @@ typedef struct DLManagedTensor {
267
293
void (*deleter)(struct DLManagedTensor * self);
268
294
} DLManagedTensor;
269
295
270
- // bit masks used in in the DLManagedTensorVersioned
296
+ // bit masks used in the DLManagedTensorVersioned
271
297
272
298
/* ! \brief bit mask to indicate that the tensor is read only. */
273
299
#define DLPACK_FLAG_BITMASK_READ_ONLY (1UL << 0UL )
@@ -280,6 +306,14 @@ typedef struct DLManagedTensor {
280
306
*/
281
307
#define DLPACK_FLAG_BITMASK_IS_COPIED (1UL << 1UL )
282
308
309
+ /* !
310
+ * \brief bit mask to indicate that whether a sub-byte type is packed or padded.
311
+ *
312
+ * The default for sub-byte types (ex: fp4/fp6) is assumed packed. This flag can
313
+ * be set by the producer to signal that a tensor of sub-byte type is padded.
314
+ */
315
+ #define DLPACK_FLAG_BITMASK_IS_SUBBYTE_TYPE_PADDED (1UL << 2UL )
316
+
283
317
/* !
284
318
* \brief A versioned and managed C Tensor object, manage memory of DLTensor.
285
319
*
0 commit comments