@@ -1351,28 +1351,30 @@ define bfloat @test_roundeven(bfloat %a) {
1351
1351
define bfloat @test_maximum (bfloat %a , bfloat %b ) {
1352
1352
; SM70-LABEL: test_maximum(
1353
1353
; SM70: {
1354
- ; SM70-NEXT: .reg .pred %p<5 >;
1355
- ; SM70-NEXT: .reg .b16 %rs<7 >;
1354
+ ; SM70-NEXT: .reg .pred %p<6 >;
1355
+ ; SM70-NEXT: .reg .b16 %rs<8 >;
1356
1356
; SM70-NEXT: .reg .b32 %r<7>;
1357
1357
; SM70-EMPTY:
1358
1358
; SM70-NEXT: // %bb.0:
1359
1359
; SM70-NEXT: ld.param.b16 %rs1, [test_maximum_param_0];
1360
- ; SM70-NEXT: setp.eq.s16 %p1, %rs1, 0;
1361
1360
; SM70-NEXT: ld.param.b16 %rs2, [test_maximum_param_1];
1362
- ; SM70-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1;
1363
1361
; SM70-NEXT: cvt.u32.u16 %r1, %rs2;
1364
1362
; SM70-NEXT: shl.b32 %r2, %r1, 16;
1365
1363
; SM70-NEXT: cvt.u32.u16 %r3, %rs1;
1366
1364
; SM70-NEXT: shl.b32 %r4, %r3, 16;
1367
- ; SM70-NEXT: setp.gt.f32 %p2, %r4, %r2;
1368
- ; SM70-NEXT: selp.b16 %rs4, %rs1, %rs2, %p2;
1369
- ; SM70-NEXT: setp.nan.f32 %p3, %r4, %r2;
1370
- ; SM70-NEXT: selp.b16 %rs5, 0x7FC0, %rs4, %p3;
1371
- ; SM70-NEXT: cvt.u32.u16 %r5, %rs5;
1365
+ ; SM70-NEXT: setp.gt.f32 %p1, %r4, %r2;
1366
+ ; SM70-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1;
1367
+ ; SM70-NEXT: setp.nan.f32 %p2, %r4, %r2;
1368
+ ; SM70-NEXT: selp.b16 %rs4, 0x7FC0, %rs3, %p2;
1369
+ ; SM70-NEXT: setp.eq.s16 %p3, %rs1, 0;
1370
+ ; SM70-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3;
1371
+ ; SM70-NEXT: setp.eq.s16 %p4, %rs2, 0;
1372
+ ; SM70-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4;
1373
+ ; SM70-NEXT: cvt.u32.u16 %r5, %rs4;
1372
1374
; SM70-NEXT: shl.b32 %r6, %r5, 16;
1373
- ; SM70-NEXT: setp.eq.f32 %p4 , %r6, 0f00000000;
1374
- ; SM70-NEXT: selp.b16 %rs6 , %rs3 , %rs5 , %p4 ;
1375
- ; SM70-NEXT: st.param.b16 [func_retval0], %rs6 ;
1375
+ ; SM70-NEXT: setp.eq.f32 %p5 , %r6, 0f00000000;
1376
+ ; SM70-NEXT: selp.b16 %rs7 , %rs6 , %rs4 , %p5 ;
1377
+ ; SM70-NEXT: st.param.b16 [func_retval0], %rs7 ;
1376
1378
; SM70-NEXT: ret;
1377
1379
;
1378
1380
; SM80-LABEL: test_maximum(
@@ -1473,44 +1475,48 @@ define bfloat @test_maxnum(bfloat %a, bfloat %b) {
1473
1475
define <2 x bfloat> @test_maximum_v2 (<2 x bfloat> %a , <2 x bfloat> %b ) {
1474
1476
; SM70-LABEL: test_maximum_v2(
1475
1477
; SM70: {
1476
- ; SM70-NEXT: .reg .pred %p<9 >;
1477
- ; SM70-NEXT: .reg .b16 %rs<15 >;
1478
+ ; SM70-NEXT: .reg .pred %p<11 >;
1479
+ ; SM70-NEXT: .reg .b16 %rs<19 >;
1478
1480
; SM70-NEXT: .reg .b32 %r<16>;
1479
1481
; SM70-EMPTY:
1480
1482
; SM70-NEXT: // %bb.0:
1481
1483
; SM70-NEXT: ld.param.b32 %r1, [test_maximum_v2_param_0];
1482
1484
; SM70-NEXT: ld.param.b32 %r2, [test_maximum_v2_param_1];
1483
1485
; SM70-NEXT: mov.b32 {%rs1, %rs2}, %r2;
1484
- ; SM70-NEXT: mov.b32 {%rs3, %rs4}, %r1;
1485
- ; SM70-NEXT: setp.eq.s16 %p1, %rs4, 0;
1486
- ; SM70-NEXT: selp.b16 %rs7, %rs4, %rs2, %p1;
1487
1486
; SM70-NEXT: cvt.u32.u16 %r3, %rs2;
1488
1487
; SM70-NEXT: shl.b32 %r4, %r3, 16;
1488
+ ; SM70-NEXT: mov.b32 {%rs3, %rs4}, %r1;
1489
1489
; SM70-NEXT: cvt.u32.u16 %r5, %rs4;
1490
1490
; SM70-NEXT: shl.b32 %r6, %r5, 16;
1491
- ; SM70-NEXT: setp.gt.f32 %p2, %r6, %r4;
1492
- ; SM70-NEXT: selp.b16 %rs8, %rs4, %rs2, %p2;
1493
- ; SM70-NEXT: setp.nan.f32 %p3, %r6, %r4;
1494
- ; SM70-NEXT: selp.b16 %rs9, 0x7FC0, %rs8, %p3;
1495
- ; SM70-NEXT: cvt.u32.u16 %r7, %rs9;
1491
+ ; SM70-NEXT: setp.gt.f32 %p1, %r6, %r4;
1492
+ ; SM70-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1;
1493
+ ; SM70-NEXT: setp.nan.f32 %p2, %r6, %r4;
1494
+ ; SM70-NEXT: selp.b16 %rs6, 0x7FC0, %rs5, %p2;
1495
+ ; SM70-NEXT: setp.eq.s16 %p3, %rs4, 0;
1496
+ ; SM70-NEXT: selp.b16 %rs9, %rs4, %rs6, %p3;
1497
+ ; SM70-NEXT: setp.eq.s16 %p4, %rs2, 0;
1498
+ ; SM70-NEXT: selp.b16 %rs12, %rs2, %rs9, %p4;
1499
+ ; SM70-NEXT: cvt.u32.u16 %r7, %rs6;
1496
1500
; SM70-NEXT: shl.b32 %r8, %r7, 16;
1497
- ; SM70-NEXT: setp.eq.f32 %p4, %r8, 0f00000000;
1498
- ; SM70-NEXT: selp.b16 %rs10, %rs7, %rs9, %p4;
1499
- ; SM70-NEXT: setp.eq.s16 %p5, %rs3, 0;
1500
- ; SM70-NEXT: selp.b16 %rs11, %rs3, %rs1, %p5;
1501
+ ; SM70-NEXT: setp.eq.f32 %p5, %r8, 0f00000000;
1502
+ ; SM70-NEXT: selp.b16 %rs13, %rs12, %rs6, %p5;
1501
1503
; SM70-NEXT: cvt.u32.u16 %r9, %rs1;
1502
1504
; SM70-NEXT: shl.b32 %r10, %r9, 16;
1503
1505
; SM70-NEXT: cvt.u32.u16 %r11, %rs3;
1504
1506
; SM70-NEXT: shl.b32 %r12, %r11, 16;
1505
1507
; SM70-NEXT: setp.gt.f32 %p6, %r12, %r10;
1506
- ; SM70-NEXT: selp.b16 %rs12 , %rs3, %rs1, %p6;
1508
+ ; SM70-NEXT: selp.b16 %rs14 , %rs3, %rs1, %p6;
1507
1509
; SM70-NEXT: setp.nan.f32 %p7, %r12, %r10;
1508
- ; SM70-NEXT: selp.b16 %rs13, 0x7FC0, %rs12, %p7;
1509
- ; SM70-NEXT: cvt.u32.u16 %r13, %rs13;
1510
+ ; SM70-NEXT: selp.b16 %rs15, 0x7FC0, %rs14, %p7;
1511
+ ; SM70-NEXT: setp.eq.s16 %p8, %rs3, 0;
1512
+ ; SM70-NEXT: selp.b16 %rs16, %rs3, %rs15, %p8;
1513
+ ; SM70-NEXT: setp.eq.s16 %p9, %rs1, 0;
1514
+ ; SM70-NEXT: selp.b16 %rs17, %rs1, %rs16, %p9;
1515
+ ; SM70-NEXT: cvt.u32.u16 %r13, %rs15;
1510
1516
; SM70-NEXT: shl.b32 %r14, %r13, 16;
1511
- ; SM70-NEXT: setp.eq.f32 %p8 , %r14, 0f00000000;
1512
- ; SM70-NEXT: selp.b16 %rs14 , %rs11 , %rs13 , %p8 ;
1513
- ; SM70-NEXT: mov.b32 %r15, {%rs14 , %rs10 };
1517
+ ; SM70-NEXT: setp.eq.f32 %p10 , %r14, 0f00000000;
1518
+ ; SM70-NEXT: selp.b16 %rs18 , %rs17 , %rs15 , %p10 ;
1519
+ ; SM70-NEXT: mov.b32 %r15, {%rs18 , %rs13 };
1514
1520
; SM70-NEXT: st.param.b32 [func_retval0], %r15;
1515
1521
; SM70-NEXT: ret;
1516
1522
;
0 commit comments