@@ -165,6 +165,25 @@ macro_rules! ptx_type {
165165 } ;
166166}
167167
168+ #[ allow( unused_macros) ]
169+ macro_rules! ptx_type_for_logic_op {
170+ ( u32 ) => {
171+ "b32"
172+ } ;
173+ ( u64 ) => {
174+ "b64"
175+ } ;
176+ ( i32 ) => {
177+ "b32"
178+ } ;
179+ ( i64 ) => {
180+ "b64"
181+ } ;
182+ ( $ty: ident) => {
183+ stringify!( $ty)
184+ } ;
185+ }
186+
168187#[ allow( unused_macros) ]
169188macro_rules! ordering {
170189 ( volatile) => {
@@ -363,7 +382,7 @@ atomic_fetch_op_2_reg! {
363382 volatile, dec, 64 , i64 , system, sys,
364383}
365384
366- macro_rules! atomic_fetch_op_3_reg {
385+ macro_rules! atomic_fetch_logic_op_3_reg {
367386 ( $( $ordering: ident, $op: ident, $width: literal, $type: ty, $scope: ident, $scope_asm: ident) ,* $( , ) * ) => {
368387 $(
369388 paste! {
@@ -384,7 +403,7 @@ macro_rules! atomic_fetch_op_3_reg {
384403 "." ,
385404 stringify!( $op) ,
386405 "." ,
387- ptx_type !( $type) ,
406+ ptx_type_for_logic_op !( $type) ,
388407 " {}, [{}], {};"
389408 ) ,
390409 out( [ <reg $width>] ) out,
@@ -398,7 +417,7 @@ macro_rules! atomic_fetch_op_3_reg {
398417 } ;
399418}
400419
401- atomic_fetch_op_3_reg ! {
420+ atomic_fetch_logic_op_3_reg ! {
402421 // and
403422
404423 relaxed, and, 32 , u32 , device, gpu,
@@ -729,6 +748,153 @@ atomic_fetch_op_3_reg! {
729748 acqrel, xor, 64 , f64 , system, sys,
730749 volatile, xor, 64 , f64 , system, sys,
731750
751+ // exchange
752+
753+ relaxed, exch, 32 , u32 , device, gpu,
754+ acquire, exch, 32 , u32 , device, gpu,
755+ release, exch, 32 , u32 , device, gpu,
756+ acqrel, exch, 32 , u32 , device, gpu,
757+ volatile, exch, 32 , u32 , device, gpu,
758+
759+ relaxed, exch, 64 , u64 , device, gpu,
760+ acquire, exch, 64 , u64 , device, gpu,
761+ release, exch, 64 , u64 , device, gpu,
762+ acqrel, exch, 64 , u64 , device, gpu,
763+ volatile, exch, 64 , u64 , device, gpu,
764+
765+ relaxed, exch, 32 , u32 , block, cta,
766+ acquire, exch, 32 , u32 , block, cta,
767+ release, exch, 32 , u32 , block, cta,
768+ acqrel, exch, 32 , u32 , block, cta,
769+ volatile, exch, 32 , u32 , block, cta,
770+
771+ relaxed, exch, 64 , u64 , block, cta,
772+ acquire, exch, 64 , u64 , block, cta,
773+ release, exch, 64 , u64 , block, cta,
774+ acqrel, exch, 64 , u64 , block, cta,
775+ volatile, exch, 64 , u64 , block, cta,
776+
777+ relaxed, exch, 32 , u32 , system, sys,
778+ acquire, exch, 32 , u32 , system, sys,
779+ release, exch, 32 , u32 , system, sys,
780+ acqrel, exch, 32 , u32 , system, sys,
781+ volatile, exch, 32 , u32 , system, sys,
782+
783+ relaxed, exch, 64 , u64 , system, sys,
784+ acquire, exch, 64 , u64 , system, sys,
785+ release, exch, 64 , u64 , system, sys,
786+ acqrel, exch, 64 , u64 , system, sys,
787+ volatile, exch, 64 , u64 , system, sys,
788+
789+ relaxed, exch, 32 , i32 , device, gpu,
790+ acquire, exch, 32 , i32 , device, gpu,
791+ release, exch, 32 , i32 , device, gpu,
792+ acqrel, exch, 32 , i32 , device, gpu,
793+ volatile, exch, 32 , i32 , device, gpu,
794+
795+ relaxed, exch, 64 , i64 , device, gpu,
796+ acquire, exch, 64 , i64 , device, gpu,
797+ release, exch, 64 , i64 , device, gpu,
798+ acqrel, exch, 64 , i64 , device, gpu,
799+ volatile, exch, 64 , i64 , device, gpu,
800+
801+ relaxed, exch, 32 , i32 , block, cta,
802+ acquire, exch, 32 , i32 , block, cta,
803+ release, exch, 32 , i32 , block, cta,
804+ acqrel, exch, 32 , i32 , block, cta,
805+ volatile, exch, 32 , i32 , block, cta,
806+
807+ relaxed, exch, 64 , i64 , block, cta,
808+ acquire, exch, 64 , i64 , block, cta,
809+ release, exch, 64 , i64 , block, cta,
810+ acqrel, exch, 64 , i64 , block, cta,
811+ volatile, exch, 64 , i64 , block, cta,
812+
813+ relaxed, exch, 32 , i32 , system, sys,
814+ acquire, exch, 32 , i32 , system, sys,
815+ release, exch, 32 , i32 , system, sys,
816+ acqrel, exch, 32 , i32 , system, sys,
817+ volatile, exch, 32 , i32 , system, sys,
818+
819+ relaxed, exch, 64 , i64 , system, sys,
820+ acquire, exch, 64 , i64 , system, sys,
821+ release, exch, 64 , i64 , system, sys,
822+ acqrel, exch, 64 , i64 , system, sys,
823+ volatile, exch, 64 , i64 , system, sys,
824+
825+ relaxed, exch, 32 , f32 , device, gpu,
826+ acquire, exch, 32 , f32 , device, gpu,
827+ release, exch, 32 , f32 , device, gpu,
828+ acqrel, exch, 32 , f32 , device, gpu,
829+ volatile, exch, 32 , f32 , device, gpu,
830+
831+ relaxed, exch, 64 , f64 , device, gpu,
832+ acquire, exch, 64 , f64 , device, gpu,
833+ release, exch, 64 , f64 , device, gpu,
834+ acqrel, exch, 64 , f64 , device, gpu,
835+ volatile, exch, 64 , f64 , device, gpu,
836+
837+ relaxed, exch, 32 , f32 , block, cta,
838+ acquire, exch, 32 , f32 , block, cta,
839+ release, exch, 32 , f32 , block, cta,
840+ acqrel, exch, 32 , f32 , block, cta,
841+ volatile, exch, 32 , f32 , block, cta,
842+
843+ relaxed, exch, 64 , f64 , block, cta,
844+ acquire, exch, 64 , f64 , block, cta,
845+ release, exch, 64 , f64 , block, cta,
846+ acqrel, exch, 64 , f64 , block, cta,
847+ volatile, exch, 64 , f64 , block, cta,
848+
849+ relaxed, exch, 32 , f32 , system, sys,
850+ acquire, exch, 32 , f32 , system, sys,
851+ release, exch, 32 , f32 , system, sys,
852+ acqrel, exch, 32 , f32 , system, sys,
853+ volatile, exch, 32 , f32 , system, sys,
854+
855+ relaxed, exch, 64 , f64 , system, sys,
856+ acquire, exch, 64 , f64 , system, sys,
857+ release, exch, 64 , f64 , system, sys,
858+ acqrel, exch, 64 , f64 , system, sys,
859+ volatile, exch, 64 , f64 , system, sys,
860+ }
861+
862+ macro_rules! atomic_fetch_op_3_reg {
863+ ( $( $ordering: ident, $op: ident, $width: literal, $type: ty, $scope: ident, $scope_asm: ident) ,* $( , ) * ) => {
864+ $(
865+ paste! {
866+ #[ $crate:: gpu_only]
867+ #[ allow( clippy:: missing_safety_doc) ]
868+ #[ doc = concat!(
869+ "Fetches the value in ptr, performs a " ,
870+ stringify!( $op) ,
871+ ", and returns the original value"
872+ ) ]
873+ pub unsafe fn [ <atomic_fetch_ $op _ $ordering _ $type _ $scope>] ( ptr: * mut $type, val: $type) -> $type {
874+ let mut out;
875+ asm!(
876+ concat!(
877+ "atom." ,
878+ ordering!( $ordering) ,
879+ stringify!( $scope_asm) ,
880+ "." ,
881+ stringify!( $op) ,
882+ "." ,
883+ ptx_type!( $type) ,
884+ " {}, [{}], {};"
885+ ) ,
886+ out( [ <reg $width>] ) out,
887+ in( reg64) ptr,
888+ in( [ <reg $width>] ) val,
889+ ) ;
890+ out
891+ }
892+ }
893+ ) *
894+ } ;
895+ }
896+
897+ atomic_fetch_op_3_reg ! {
732898 // add (unsigned)
733899
734900 relaxed, add, 32 , u32 , device, gpu,
@@ -994,116 +1160,6 @@ atomic_fetch_op_3_reg! {
9941160 release, max, 64 , i64 , system, sys,
9951161 acqrel, max, 64 , i64 , system, sys,
9961162 volatile, max, 64 , i64 , system, sys,
997-
998- // exchange
999-
1000- relaxed, exch, 32 , u32 , device, gpu,
1001- acquire, exch, 32 , u32 , device, gpu,
1002- release, exch, 32 , u32 , device, gpu,
1003- acqrel, exch, 32 , u32 , device, gpu,
1004- volatile, exch, 32 , u32 , device, gpu,
1005-
1006- relaxed, exch, 64 , u64 , device, gpu,
1007- acquire, exch, 64 , u64 , device, gpu,
1008- release, exch, 64 , u64 , device, gpu,
1009- acqrel, exch, 64 , u64 , device, gpu,
1010- volatile, exch, 64 , u64 , device, gpu,
1011-
1012- relaxed, exch, 32 , u32 , block, cta,
1013- acquire, exch, 32 , u32 , block, cta,
1014- release, exch, 32 , u32 , block, cta,
1015- acqrel, exch, 32 , u32 , block, cta,
1016- volatile, exch, 32 , u32 , block, cta,
1017-
1018- relaxed, exch, 64 , u64 , block, cta,
1019- acquire, exch, 64 , u64 , block, cta,
1020- release, exch, 64 , u64 , block, cta,
1021- acqrel, exch, 64 , u64 , block, cta,
1022- volatile, exch, 64 , u64 , block, cta,
1023-
1024- relaxed, exch, 32 , u32 , system, sys,
1025- acquire, exch, 32 , u32 , system, sys,
1026- release, exch, 32 , u32 , system, sys,
1027- acqrel, exch, 32 , u32 , system, sys,
1028- volatile, exch, 32 , u32 , system, sys,
1029-
1030- relaxed, exch, 64 , u64 , system, sys,
1031- acquire, exch, 64 , u64 , system, sys,
1032- release, exch, 64 , u64 , system, sys,
1033- acqrel, exch, 64 , u64 , system, sys,
1034- volatile, exch, 64 , u64 , system, sys,
1035-
1036- relaxed, exch, 32 , i32 , device, gpu,
1037- acquire, exch, 32 , i32 , device, gpu,
1038- release, exch, 32 , i32 , device, gpu,
1039- acqrel, exch, 32 , i32 , device, gpu,
1040- volatile, exch, 32 , i32 , device, gpu,
1041-
1042- relaxed, exch, 64 , i64 , device, gpu,
1043- acquire, exch, 64 , i64 , device, gpu,
1044- release, exch, 64 , i64 , device, gpu,
1045- acqrel, exch, 64 , i64 , device, gpu,
1046- volatile, exch, 64 , i64 , device, gpu,
1047-
1048- relaxed, exch, 32 , i32 , block, cta,
1049- acquire, exch, 32 , i32 , block, cta,
1050- release, exch, 32 , i32 , block, cta,
1051- acqrel, exch, 32 , i32 , block, cta,
1052- volatile, exch, 32 , i32 , block, cta,
1053-
1054- relaxed, exch, 64 , i64 , block, cta,
1055- acquire, exch, 64 , i64 , block, cta,
1056- release, exch, 64 , i64 , block, cta,
1057- acqrel, exch, 64 , i64 , block, cta,
1058- volatile, exch, 64 , i64 , block, cta,
1059-
1060- relaxed, exch, 32 , i32 , system, sys,
1061- acquire, exch, 32 , i32 , system, sys,
1062- release, exch, 32 , i32 , system, sys,
1063- acqrel, exch, 32 , i32 , system, sys,
1064- volatile, exch, 32 , i32 , system, sys,
1065-
1066- relaxed, exch, 64 , i64 , system, sys,
1067- acquire, exch, 64 , i64 , system, sys,
1068- release, exch, 64 , i64 , system, sys,
1069- acqrel, exch, 64 , i64 , system, sys,
1070- volatile, exch, 64 , i64 , system, sys,
1071-
1072- relaxed, exch, 32 , f32 , device, gpu,
1073- acquire, exch, 32 , f32 , device, gpu,
1074- release, exch, 32 , f32 , device, gpu,
1075- acqrel, exch, 32 , f32 , device, gpu,
1076- volatile, exch, 32 , f32 , device, gpu,
1077-
1078- relaxed, exch, 64 , f64 , device, gpu,
1079- acquire, exch, 64 , f64 , device, gpu,
1080- release, exch, 64 , f64 , device, gpu,
1081- acqrel, exch, 64 , f64 , device, gpu,
1082- volatile, exch, 64 , f64 , device, gpu,
1083-
1084- relaxed, exch, 32 , f32 , block, cta,
1085- acquire, exch, 32 , f32 , block, cta,
1086- release, exch, 32 , f32 , block, cta,
1087- acqrel, exch, 32 , f32 , block, cta,
1088- volatile, exch, 32 , f32 , block, cta,
1089-
1090- relaxed, exch, 64 , f64 , block, cta,
1091- acquire, exch, 64 , f64 , block, cta,
1092- release, exch, 64 , f64 , block, cta,
1093- acqrel, exch, 64 , f64 , block, cta,
1094- volatile, exch, 64 , f64 , block, cta,
1095-
1096- relaxed, exch, 32 , f32 , system, sys,
1097- acquire, exch, 32 , f32 , system, sys,
1098- release, exch, 32 , f32 , system, sys,
1099- acqrel, exch, 32 , f32 , system, sys,
1100- volatile, exch, 32 , f32 , system, sys,
1101-
1102- relaxed, exch, 64 , f64 , system, sys,
1103- acquire, exch, 64 , f64 , system, sys,
1104- release, exch, 64 , f64 , system, sys,
1105- acqrel, exch, 64 , f64 , system, sys,
1106- volatile, exch, 64 , f64 , system, sys,
11071163}
11081164
11091165macro_rules! atomic_fetch_op_4_reg {
0 commit comments