Skip to content

Commit 9c02cbf

Browse files
authored
Update intrinsics.rs (the correct one this time)
1 parent b84a2ea commit 9c02cbf

File tree

1 file changed

+169
-113
lines changed

1 file changed

+169
-113
lines changed

crates/cuda_std/src/atomic/intrinsics.rs

Lines changed: 169 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,25 @@ macro_rules! ptx_type {
165165
};
166166
}
167167

168+
#[allow(unused_macros)]
169+
macro_rules! ptx_type_for_logic_op {
170+
(u32) => {
171+
"b32"
172+
};
173+
(u64) => {
174+
"b64"
175+
};
176+
(i32) => {
177+
"b32"
178+
};
179+
(i64) => {
180+
"b64"
181+
};
182+
($ty:ident) => {
183+
stringify!($ty)
184+
};
185+
}
186+
168187
#[allow(unused_macros)]
169188
macro_rules! ordering {
170189
(volatile) => {
@@ -363,7 +382,7 @@ atomic_fetch_op_2_reg! {
363382
volatile, dec, 64, i64, system, sys,
364383
}
365384

366-
macro_rules! atomic_fetch_op_3_reg {
385+
macro_rules! atomic_fetch_logic_op_3_reg {
367386
($($ordering:ident, $op:ident, $width:literal, $type:ty, $scope:ident, $scope_asm:ident),* $(,)*) => {
368387
$(
369388
paste! {
@@ -384,7 +403,7 @@ macro_rules! atomic_fetch_op_3_reg {
384403
".",
385404
stringify!($op),
386405
".",
387-
ptx_type!($type),
406+
ptx_type_for_logic_op!($type),
388407
" {}, [{}], {};"
389408
),
390409
out([<reg $width>]) out,
@@ -398,7 +417,7 @@ macro_rules! atomic_fetch_op_3_reg {
398417
};
399418
}
400419

401-
atomic_fetch_op_3_reg! {
420+
atomic_fetch_logic_op_3_reg! {
402421
// and
403422

404423
relaxed, and, 32, u32, device, gpu,
@@ -729,6 +748,153 @@ atomic_fetch_op_3_reg! {
729748
acqrel, xor, 64, f64, system, sys,
730749
volatile, xor, 64, f64, system, sys,
731750

751+
// exchange
752+
753+
relaxed, exch, 32, u32, device, gpu,
754+
acquire, exch, 32, u32, device, gpu,
755+
release, exch, 32, u32, device, gpu,
756+
acqrel, exch, 32, u32, device, gpu,
757+
volatile, exch, 32, u32, device, gpu,
758+
759+
relaxed, exch, 64, u64, device, gpu,
760+
acquire, exch, 64, u64, device, gpu,
761+
release, exch, 64, u64, device, gpu,
762+
acqrel, exch, 64, u64, device, gpu,
763+
volatile, exch, 64, u64, device, gpu,
764+
765+
relaxed, exch, 32, u32, block, cta,
766+
acquire, exch, 32, u32, block, cta,
767+
release, exch, 32, u32, block, cta,
768+
acqrel, exch, 32, u32, block, cta,
769+
volatile, exch, 32, u32, block, cta,
770+
771+
relaxed, exch, 64, u64, block, cta,
772+
acquire, exch, 64, u64, block, cta,
773+
release, exch, 64, u64, block, cta,
774+
acqrel, exch, 64, u64, block, cta,
775+
volatile, exch, 64, u64, block, cta,
776+
777+
relaxed, exch, 32, u32, system, sys,
778+
acquire, exch, 32, u32, system, sys,
779+
release, exch, 32, u32, system, sys,
780+
acqrel, exch, 32, u32, system, sys,
781+
volatile, exch, 32, u32, system, sys,
782+
783+
relaxed, exch, 64, u64, system, sys,
784+
acquire, exch, 64, u64, system, sys,
785+
release, exch, 64, u64, system, sys,
786+
acqrel, exch, 64, u64, system, sys,
787+
volatile, exch, 64, u64, system, sys,
788+
789+
relaxed, exch, 32, i32, device, gpu,
790+
acquire, exch, 32, i32, device, gpu,
791+
release, exch, 32, i32, device, gpu,
792+
acqrel, exch, 32, i32, device, gpu,
793+
volatile, exch, 32, i32, device, gpu,
794+
795+
relaxed, exch, 64, i64, device, gpu,
796+
acquire, exch, 64, i64, device, gpu,
797+
release, exch, 64, i64, device, gpu,
798+
acqrel, exch, 64, i64, device, gpu,
799+
volatile, exch, 64, i64, device, gpu,
800+
801+
relaxed, exch, 32, i32, block, cta,
802+
acquire, exch, 32, i32, block, cta,
803+
release, exch, 32, i32, block, cta,
804+
acqrel, exch, 32, i32, block, cta,
805+
volatile, exch, 32, i32, block, cta,
806+
807+
relaxed, exch, 64, i64, block, cta,
808+
acquire, exch, 64, i64, block, cta,
809+
release, exch, 64, i64, block, cta,
810+
acqrel, exch, 64, i64, block, cta,
811+
volatile, exch, 64, i64, block, cta,
812+
813+
relaxed, exch, 32, i32, system, sys,
814+
acquire, exch, 32, i32, system, sys,
815+
release, exch, 32, i32, system, sys,
816+
acqrel, exch, 32, i32, system, sys,
817+
volatile, exch, 32, i32, system, sys,
818+
819+
relaxed, exch, 64, i64, system, sys,
820+
acquire, exch, 64, i64, system, sys,
821+
release, exch, 64, i64, system, sys,
822+
acqrel, exch, 64, i64, system, sys,
823+
volatile, exch, 64, i64, system, sys,
824+
825+
relaxed, exch, 32, f32, device, gpu,
826+
acquire, exch, 32, f32, device, gpu,
827+
release, exch, 32, f32, device, gpu,
828+
acqrel, exch, 32, f32, device, gpu,
829+
volatile, exch, 32, f32, device, gpu,
830+
831+
relaxed, exch, 64, f64, device, gpu,
832+
acquire, exch, 64, f64, device, gpu,
833+
release, exch, 64, f64, device, gpu,
834+
acqrel, exch, 64, f64, device, gpu,
835+
volatile, exch, 64, f64, device, gpu,
836+
837+
relaxed, exch, 32, f32, block, cta,
838+
acquire, exch, 32, f32, block, cta,
839+
release, exch, 32, f32, block, cta,
840+
acqrel, exch, 32, f32, block, cta,
841+
volatile, exch, 32, f32, block, cta,
842+
843+
relaxed, exch, 64, f64, block, cta,
844+
acquire, exch, 64, f64, block, cta,
845+
release, exch, 64, f64, block, cta,
846+
acqrel, exch, 64, f64, block, cta,
847+
volatile, exch, 64, f64, block, cta,
848+
849+
relaxed, exch, 32, f32, system, sys,
850+
acquire, exch, 32, f32, system, sys,
851+
release, exch, 32, f32, system, sys,
852+
acqrel, exch, 32, f32, system, sys,
853+
volatile, exch, 32, f32, system, sys,
854+
855+
relaxed, exch, 64, f64, system, sys,
856+
acquire, exch, 64, f64, system, sys,
857+
release, exch, 64, f64, system, sys,
858+
acqrel, exch, 64, f64, system, sys,
859+
volatile, exch, 64, f64, system, sys,
860+
}
861+
862+
macro_rules! atomic_fetch_op_3_reg {
863+
($($ordering:ident, $op:ident, $width:literal, $type:ty, $scope:ident, $scope_asm:ident),* $(,)*) => {
864+
$(
865+
paste! {
866+
#[$crate::gpu_only]
867+
#[allow(clippy::missing_safety_doc)]
868+
#[doc = concat!(
869+
"Fetches the value in ptr, performs a ",
870+
stringify!($op),
871+
", and returns the original value"
872+
)]
873+
pub unsafe fn [<atomic_fetch_ $op _ $ordering _ $type _ $scope>](ptr: *mut $type, val: $type) -> $type {
874+
let mut out;
875+
asm!(
876+
concat!(
877+
"atom.",
878+
ordering!($ordering),
879+
stringify!($scope_asm),
880+
".",
881+
stringify!($op),
882+
".",
883+
ptx_type!($type),
884+
" {}, [{}], {};"
885+
),
886+
out([<reg $width>]) out,
887+
in(reg64) ptr,
888+
in([<reg $width>]) val,
889+
);
890+
out
891+
}
892+
}
893+
)*
894+
};
895+
}
896+
897+
atomic_fetch_op_3_reg! {
732898
// add (unsigned)
733899

734900
relaxed, add, 32, u32, device, gpu,
@@ -994,116 +1160,6 @@ atomic_fetch_op_3_reg! {
9941160
release, max, 64, i64, system, sys,
9951161
acqrel, max, 64, i64, system, sys,
9961162
volatile, max, 64, i64, system, sys,
997-
998-
// exchange
999-
1000-
relaxed, exch, 32, u32, device, gpu,
1001-
acquire, exch, 32, u32, device, gpu,
1002-
release, exch, 32, u32, device, gpu,
1003-
acqrel, exch, 32, u32, device, gpu,
1004-
volatile, exch, 32, u32, device, gpu,
1005-
1006-
relaxed, exch, 64, u64, device, gpu,
1007-
acquire, exch, 64, u64, device, gpu,
1008-
release, exch, 64, u64, device, gpu,
1009-
acqrel, exch, 64, u64, device, gpu,
1010-
volatile, exch, 64, u64, device, gpu,
1011-
1012-
relaxed, exch, 32, u32, block, cta,
1013-
acquire, exch, 32, u32, block, cta,
1014-
release, exch, 32, u32, block, cta,
1015-
acqrel, exch, 32, u32, block, cta,
1016-
volatile, exch, 32, u32, block, cta,
1017-
1018-
relaxed, exch, 64, u64, block, cta,
1019-
acquire, exch, 64, u64, block, cta,
1020-
release, exch, 64, u64, block, cta,
1021-
acqrel, exch, 64, u64, block, cta,
1022-
volatile, exch, 64, u64, block, cta,
1023-
1024-
relaxed, exch, 32, u32, system, sys,
1025-
acquire, exch, 32, u32, system, sys,
1026-
release, exch, 32, u32, system, sys,
1027-
acqrel, exch, 32, u32, system, sys,
1028-
volatile, exch, 32, u32, system, sys,
1029-
1030-
relaxed, exch, 64, u64, system, sys,
1031-
acquire, exch, 64, u64, system, sys,
1032-
release, exch, 64, u64, system, sys,
1033-
acqrel, exch, 64, u64, system, sys,
1034-
volatile, exch, 64, u64, system, sys,
1035-
1036-
relaxed, exch, 32, i32, device, gpu,
1037-
acquire, exch, 32, i32, device, gpu,
1038-
release, exch, 32, i32, device, gpu,
1039-
acqrel, exch, 32, i32, device, gpu,
1040-
volatile, exch, 32, i32, device, gpu,
1041-
1042-
relaxed, exch, 64, i64, device, gpu,
1043-
acquire, exch, 64, i64, device, gpu,
1044-
release, exch, 64, i64, device, gpu,
1045-
acqrel, exch, 64, i64, device, gpu,
1046-
volatile, exch, 64, i64, device, gpu,
1047-
1048-
relaxed, exch, 32, i32, block, cta,
1049-
acquire, exch, 32, i32, block, cta,
1050-
release, exch, 32, i32, block, cta,
1051-
acqrel, exch, 32, i32, block, cta,
1052-
volatile, exch, 32, i32, block, cta,
1053-
1054-
relaxed, exch, 64, i64, block, cta,
1055-
acquire, exch, 64, i64, block, cta,
1056-
release, exch, 64, i64, block, cta,
1057-
acqrel, exch, 64, i64, block, cta,
1058-
volatile, exch, 64, i64, block, cta,
1059-
1060-
relaxed, exch, 32, i32, system, sys,
1061-
acquire, exch, 32, i32, system, sys,
1062-
release, exch, 32, i32, system, sys,
1063-
acqrel, exch, 32, i32, system, sys,
1064-
volatile, exch, 32, i32, system, sys,
1065-
1066-
relaxed, exch, 64, i64, system, sys,
1067-
acquire, exch, 64, i64, system, sys,
1068-
release, exch, 64, i64, system, sys,
1069-
acqrel, exch, 64, i64, system, sys,
1070-
volatile, exch, 64, i64, system, sys,
1071-
1072-
relaxed, exch, 32, f32, device, gpu,
1073-
acquire, exch, 32, f32, device, gpu,
1074-
release, exch, 32, f32, device, gpu,
1075-
acqrel, exch, 32, f32, device, gpu,
1076-
volatile, exch, 32, f32, device, gpu,
1077-
1078-
relaxed, exch, 64, f64, device, gpu,
1079-
acquire, exch, 64, f64, device, gpu,
1080-
release, exch, 64, f64, device, gpu,
1081-
acqrel, exch, 64, f64, device, gpu,
1082-
volatile, exch, 64, f64, device, gpu,
1083-
1084-
relaxed, exch, 32, f32, block, cta,
1085-
acquire, exch, 32, f32, block, cta,
1086-
release, exch, 32, f32, block, cta,
1087-
acqrel, exch, 32, f32, block, cta,
1088-
volatile, exch, 32, f32, block, cta,
1089-
1090-
relaxed, exch, 64, f64, block, cta,
1091-
acquire, exch, 64, f64, block, cta,
1092-
release, exch, 64, f64, block, cta,
1093-
acqrel, exch, 64, f64, block, cta,
1094-
volatile, exch, 64, f64, block, cta,
1095-
1096-
relaxed, exch, 32, f32, system, sys,
1097-
acquire, exch, 32, f32, system, sys,
1098-
release, exch, 32, f32, system, sys,
1099-
acqrel, exch, 32, f32, system, sys,
1100-
volatile, exch, 32, f32, system, sys,
1101-
1102-
relaxed, exch, 64, f64, system, sys,
1103-
acquire, exch, 64, f64, system, sys,
1104-
release, exch, 64, f64, system, sys,
1105-
acqrel, exch, 64, f64, system, sys,
1106-
volatile, exch, 64, f64, system, sys,
11071163
}
11081164

11091165
macro_rules! atomic_fetch_op_4_reg {

0 commit comments

Comments
 (0)