@@ -244,6 +244,8 @@ pub fn wavefrontsize() -> u32 {
244244/// Synchronize all wavefronts in a workgroup.
245245///
246246/// Each wavefronts in a workgroup waits at the barrier until all wavefronts in the workgroup arrive at a barrier.
247+ ///
248+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
247249#[ inline]
248250#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
249251pub fn s_barrier ( ) {
@@ -253,6 +255,8 @@ pub fn s_barrier() {
253255/// Signal a specific barrier type.
254256///
255257/// Only for non-named barriers.
258+ ///
259+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
256260#[ inline]
257261#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
258262pub unsafe fn s_barrier_signal < const BARRIER_TYPE : i32 > ( ) {
@@ -265,6 +269,8 @@ pub unsafe fn s_barrier_signal<const BARRIER_TYPE: i32>() {
265269/// Provides access to the s_barrier_signal_first instruction;
266270/// additionally ensures that the result value is valid even when
267271/// the intrinsic is used from a wavefront that is not running in a workgroup.
272+ ///
273+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
268274#[ inline]
269275#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
270276pub unsafe fn s_barrier_signal_isfirst < const BARRIER_TYPE : i32 > ( ) -> bool {
@@ -274,6 +280,8 @@ pub unsafe fn s_barrier_signal_isfirst<const BARRIER_TYPE: i32>() -> bool {
274280/// Wait for a specific barrier type.
275281///
276282/// Only for non-named barriers.
283+ ///
284+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
277285#[ inline]
278286#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
279287pub unsafe fn s_barrier_wait < const BARRIER_TYPE : i16 > ( ) {
@@ -283,6 +291,8 @@ pub unsafe fn s_barrier_wait<const BARRIER_TYPE: i16>() {
283291/// Get the state of a specific barrier type.
284292///
285293/// The `barrier_type` argument must be uniform, otherwise behavior is undefined.
294+ ///
295+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
286296#[ inline]
287297#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
288298pub unsafe fn s_get_barrier_state < const BARRIER_TYPE : i32 > ( ) -> u32 {
@@ -292,6 +302,8 @@ pub unsafe fn s_get_barrier_state<const BARRIER_TYPE: i32>() -> u32 {
292302/// A barrier for only the threads within the current wavefront.
293303///
294304/// Does not result in an instruction but restricts the compiler.
305+ ///
306+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
295307#[ inline]
296308#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
297309pub fn wave_barrier ( ) {
@@ -315,6 +327,8 @@ pub fn wave_barrier() {
315327/// - 0x0100: All DS read instructions may be scheduled across `sched_barrier`.
316328/// - 0x0200: All DS write instructions may be scheduled across `sched_barrier`.
317329/// - 0x0400: All Transcendental (e.g. V_EXP) instructions may be scheduled across `sched_barrier`.
330+ ///
331+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
318332#[ inline]
319333#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
320334pub unsafe fn sched_barrier < const MASK : u32 > ( ) {
@@ -345,6 +359,8 @@ pub unsafe fn sched_barrier<const MASK: u32>() {
345359/// // 5 MFMA
346360/// sched_group_barrier::<8, 5, 0>()
347361/// ```
362+ ///
363+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
348364#[ inline]
349365#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
350366pub unsafe fn sched_group_barrier < const MASK : u32 , const SIZE : u32 , const SYNC_ID : u32 > ( ) {
@@ -366,6 +382,8 @@ pub fn s_sleep<const COUNT: u32>() {
366382/// Stop execution of the kernel.
367383///
368384/// This usually signals an error state.
385+ ///
386+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
369387#[ inline]
370388#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
371389pub fn s_sethalt < const VALUE : u32 > ( ) -> ! {
@@ -407,6 +425,8 @@ pub fn mbcnt_hi(value: u32, init: u32) -> u32 {
407425
408426/// Returns a bitfield (`u32` or `u64`) containing the result of its i1 argument
409427/// in all active lanes, and zero in all inactive lanes.
428+ ///
429+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
410430#[ inline]
411431#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
412432pub fn ballot ( b : bool ) -> u64 {
@@ -419,6 +439,8 @@ pub fn ballot(b: bool) -> u64 {
419439/// While [`ballot`] converts a `bool` to a mask, `inverse_ballot` converts a mask back to a `bool`.
420440/// This means `inverse_ballot(ballot(b)) == b`.
421441/// The inverse of `ballot(inverse_ballot(value)) ~= value` is not always true as inactive lanes are set to zero by `ballot`.
442+ ///
443+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
422444#[ inline]
423445#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
424446pub fn inverse_ballot ( value : u64 ) -> bool {
@@ -433,6 +455,8 @@ pub fn inverse_ballot(value: u64) -> bool {
433455/// - 2: DPP
434456///
435457/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
458+ ///
459+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
436460#[ inline]
437461#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
438462pub fn wave_reduce_umin < const STRATEGY : u32 > ( value : u32 ) -> u32 {
@@ -447,6 +471,8 @@ pub fn wave_reduce_umin<const STRATEGY: u32>(value: u32) -> u32 {
447471/// - 2: DPP
448472///
449473/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
474+ ///
475+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
450476#[ inline]
451477#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
452478pub fn wave_reduce_min < const STRATEGY : u32 > ( value : i32 ) -> i32 {
@@ -462,6 +488,8 @@ pub fn wave_reduce_min<const STRATEGY: u32>(value: i32) -> i32 {
462488/// - 2: DPP
463489///
464490/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
491+ ///
492+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
465493#[ inline]
466494#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
467495pub fn wave_reduce_umax < const STRATEGY : u32 > ( value : u32 ) -> u32 {
@@ -476,6 +504,8 @@ pub fn wave_reduce_umax<const STRATEGY: u32>(value: u32) -> u32 {
476504/// - 2: DPP
477505///
478506/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
507+ ///
508+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
479509#[ inline]
480510#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
481511pub fn wave_reduce_max < const STRATEGY : u32 > ( value : i32 ) -> i32 {
@@ -491,6 +521,8 @@ pub fn wave_reduce_max<const STRATEGY: u32>(value: i32) -> i32 {
491521/// - 2: DPP
492522///
493523/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
524+ ///
525+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
494526#[ inline]
495527#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
496528pub fn wave_reduce_add < const STRATEGY : u32 > ( value : u32 ) -> u32 {
@@ -506,6 +538,8 @@ pub fn wave_reduce_add<const STRATEGY: u32>(value: u32) -> u32 {
506538/// - 2: DPP
507539///
508540/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
541+ ///
542+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
509543#[ inline]
510544#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
511545pub fn wave_reduce_and < const STRATEGY : u32 > ( value : u32 ) -> u32 {
@@ -520,6 +554,8 @@ pub fn wave_reduce_and<const STRATEGY: u32>(value: u32) -> u32 {
520554/// - 2: DPP
521555///
522556/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
557+ ///
558+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
523559#[ inline]
524560#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
525561pub fn wave_reduce_or < const STRATEGY : u32 > ( value : u32 ) -> u32 {
@@ -534,6 +570,8 @@ pub fn wave_reduce_or<const STRATEGY: u32>(value: u32) -> u32 {
534570/// - 2: DPP
535571///
536572/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
573+ ///
574+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
537575#[ inline]
538576#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
539577pub fn wave_reduce_xor < const STRATEGY : u32 > ( value : u32 ) -> u32 {
@@ -544,12 +582,16 @@ pub fn wave_reduce_xor<const STRATEGY: u32>(value: u32) -> u32 {
544582// The following intrinsics can have multiple sizes
545583
546584/// Get `value` from the first active lane in the wavefront.
585+ ///
586+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
547587#[ inline]
548588#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
549589pub fn readfirstlane_u32 ( value : u32 ) -> u32 {
550590 llvm_readfirstlane_u32 ( value)
551591}
552592/// Get `value` from the first active lane in the wavefront.
593+ ///
594+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
553595#[ inline]
554596#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
555597pub fn readfirstlane_u64 ( value : u64 ) -> u64 {
@@ -559,6 +601,8 @@ pub fn readfirstlane_u64(value: u64) -> u64 {
559601///
560602/// The lane argument must be uniform across the currently active threads
561603/// of the current wavefront. Otherwise, the result is undefined.
604+ ///
605+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
562606#[ inline]
563607#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
564608pub unsafe fn readlane_u32 ( value : u32 , lane : u32 ) -> u32 {
@@ -568,6 +612,8 @@ pub unsafe fn readlane_u32(value: u32, lane: u32) -> u32 {
568612///
569613/// The lane argument must be uniform across the currently active threads
570614/// of the current wavefront. Otherwise, the result is undefined.
615+ ///
616+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
571617#[ inline]
572618#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
573619pub unsafe fn readlane_u64 ( value : u64 , lane : u32 ) -> u64 {
@@ -582,6 +628,8 @@ pub unsafe fn readlane_u64(value: u64, lane: u32) -> u64 {
582628///
583629/// `value` is the value returned by `lane`.
584630/// `default` is the value returned by all lanes other than `lane`.
631+ ///
632+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
585633#[ inline]
586634#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
587635pub unsafe fn writelane_u32 ( value : u32 , lane : u32 , default : u32 ) -> u32 {
@@ -596,6 +644,8 @@ pub unsafe fn writelane_u32(value: u32, lane: u32, default: u32) -> u32 {
596644///
597645/// `value` is the value returned by `lane`.
598646/// `default` is the value returned by all lanes other than `lane`.
647+ ///
648+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
599649#[ inline]
600650#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
601651pub unsafe fn writelane_u64 ( value : u64 , lane : u32 , default : u64 ) -> u64 {
@@ -605,6 +655,8 @@ pub unsafe fn writelane_u64(value: u64, lane: u32, default: u64) -> u64 {
605655/// Stop execution of the wavefront.
606656///
607657/// This usually signals the end of a successful execution.
658+ ///
659+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
608660#[ inline]
609661#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
610662pub fn endpgm ( ) -> ! {
@@ -621,6 +673,8 @@ pub fn endpgm() -> ! {
621673/// v_mov_b32 <dest> <old>
622674/// v_mov_b32 <dest> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
623675/// ```
676+ ///
677+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
624678#[ inline]
625679#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
626680pub unsafe fn update_dpp <
@@ -651,6 +705,8 @@ pub fn s_memrealtime() -> u64 {
651705///
652706/// Reading from inactive lanes returns `0`.
653707/// In case multiple values get written to the same `lane`, the value from the source lane with the higher index is taken.
708+ ///
709+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
654710#[ inline]
655711#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
656712pub unsafe fn ds_permute ( lane : u32 , value : u32 ) -> u32 {
@@ -661,6 +717,8 @@ pub unsafe fn ds_permute(lane: u32, value: u32) -> u32 {
661717/// Returns the `value` given to `ds_permute` by lane `lane`.
662718///
663719/// Reading from inactive lanes returns `0`.
720+ ///
721+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
664722#[ inline]
665723#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
666724pub unsafe fn ds_bpermute ( lane : u32 , value : u32 ) -> u32 {
@@ -680,6 +738,8 @@ pub unsafe fn perm(src0: u32, src1: u32, selector: u32) -> u32 {
680738///
681739/// The third and fourth inputs must be uniform across the current wavefront.
682740/// These are combined into a single 64-bit value representing lane selects used to swizzle within each row.
741+ ///
742+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
683743#[ inline]
684744#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
685745pub unsafe fn permlane16_u32 < const FI : bool , const BOUND_CONTROL : bool > (
@@ -696,6 +756,8 @@ pub unsafe fn permlane16_u32<const FI: bool, const BOUND_CONTROL: bool>(
696756///
697757/// The third and fourth inputs must be uniform across the current wavefront.
698758/// These are combined into a single 64-bit value representing lane selects used to swizzle within each row.
759+ ///
760+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
699761#[ inline]
700762#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
701763pub unsafe fn permlanex16_u32 < const FI : bool , const BOUND_CONTROL : bool > (
@@ -718,6 +780,8 @@ pub fn s_get_waveid_in_workgroup() -> u32 {
718780/// Swap `value` between upper and lower 32 lanes in a wavefront.
719781///
720782/// Does nothing for wave32.
783+ ///
784+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
721785#[ inline]
722786#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
723787pub unsafe fn permlane64_u32 ( value : u32 ) -> u32 {
@@ -728,6 +792,8 @@ pub unsafe fn permlane64_u32(value: u32) -> u32 {
728792/// Performs arbitrary gather-style operation within a row (16 contiguous lanes) of the second input operand.
729793///
730794/// In contrast to [`permlane16_u32`], allows each lane to specify its own gather lane.
795+ ///
796+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
731797#[ inline]
732798#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
733799pub unsafe fn permlane16_var < const FI : bool , const BOUND_CONTROL : bool > (
@@ -742,6 +808,8 @@ pub unsafe fn permlane16_var<const FI: bool, const BOUND_CONTROL: bool>(
742808/// Performs arbitrary gather-style operation across two rows (16 contiguous lanes) of the second input operand.
743809///
744810/// In contrast to [`permlanex16_u32`], allows each lane to specify its own gather lane.
811+ ///
812+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
745813#[ inline]
746814#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
747815pub unsafe fn permlanex16_var < const FI : bool , const BOUND_CONTROL : bool > (
@@ -766,6 +834,8 @@ pub fn wave_id() -> u32 {
766834/// Odd rows of the first operand are swapped with even rows of the second operand (one row is 16 lanes).
767835/// Returns a pair for the swapped registers.
768836/// The first element of the return corresponds to the swapped element of the first argument.
837+ ///
838+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
769839#[ inline]
770840#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
771841pub unsafe fn permlane16_swap < const FI : bool , const BOUND_CONTROL : bool > (
@@ -782,6 +852,8 @@ pub unsafe fn permlane16_swap<const FI: bool, const BOUND_CONTROL: bool>(
782852/// Rows 2 and 3 of the first operand are swapped with rows 0 and 1 of the second operand (one row is 16 lanes).
783853/// Returns a pair for the swapped registers.
784854/// The first element of the return corresponds to the swapped element of the first argument.
855+ ///
856+ #[ doc = include_str ! ( "intrinsic_is_convergent.md" ) ]
785857#[ inline]
786858#[ unstable( feature = "stdarch_amdgpu" , issue = "149988" ) ]
787859pub unsafe fn permlane32_swap < const FI : bool , const BOUND_CONTROL : bool > (
0 commit comments