Skip to content

Commit f5031c6

Browse files
committed
[AArch64] Fix postinc operands for Cortex-A510 scheduling
Similar to D159254, this fixes the order of WriteAdr operands on post/pre-inc loads/stores in the Cortex-A510 scheduling model. I will add the same for other models too, this will be the most impactful due to it being the default cpu scheduling model. Closes #68518
1 parent 19d1da5 commit f5031c6

File tree

9 files changed

+1167
-1167
lines changed

9 files changed

+1167
-1167
lines changed

llvm/lib/Target/AArch64/AArch64SchedA510.td

+34-34
Original file line numberDiff line numberDiff line change
@@ -295,49 +295,49 @@ def : InstRW<[CortexA510WriteVLD2], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
295295
def : InstRW<[CortexA510WriteVLD2], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
296296
def : InstRW<[CortexA510WriteVLD2], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
297297

298-
def : InstRW<[CortexA510WriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
299-
def : InstRW<[CortexA510WriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
300-
def : InstRW<[CortexA510WriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
301-
def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
302-
def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
303-
def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
304-
def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
305-
def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
306-
def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
307-
def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
298+
def : InstRW<[WriteAdr, CortexA510WriteVLD1], (instregex "LD1i(8|16|32|64)_POST$")>;
299+
def : InstRW<[WriteAdr, CortexA510WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
300+
def : InstRW<[WriteAdr, CortexA510WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
301+
def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
302+
def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
303+
def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
304+
def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
305+
def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
306+
def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
307+
def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
308308

309309
// 2-element structures
310310
def : InstRW<[CortexA510WriteVLD2], (instregex "LD2i(8|16|32|64)$")>;
311311
def : InstRW<[CortexA510WriteVLD2], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
312312
def : InstRW<[CortexA510WriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>;
313313
def : InstRW<[CortexA510WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
314314

315-
def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
316-
def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
317-
def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
318-
def : InstRW<[CortexA510WriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
315+
def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
316+
def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
317+
def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
318+
def : InstRW<[WriteAdr, CortexA510WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
319319

320320
// 3-element structures
321321
def : InstRW<[CortexA510WriteVLD2], (instregex "LD3i(8|16|32|64)$")>;
322322
def : InstRW<[CortexA510WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
323323
def : InstRW<[CortexA510WriteVLD3], (instregex "LD3Threev(8b|4h|2s|1d)$")>;
324324
def : InstRW<[CortexA510WriteVLD6], (instregex "LD3Threev(16b|8h|4s|2d)$")>;
325325

326-
def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
327-
def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
328-
def : InstRW<[CortexA510WriteVLD3, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d)_POST$")>;
329-
def : InstRW<[CortexA510WriteVLD6, WriteAdr], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
326+
def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD3i(8|16|32|64)_POST$")>;
327+
def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
328+
def : InstRW<[WriteAdr, CortexA510WriteVLD3], (instregex "LD3Threev(8b|4h|2s|1d)_POST$")>;
329+
def : InstRW<[WriteAdr, CortexA510WriteVLD6], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
330330

331331
// 4-element structures
332332
def : InstRW<[CortexA510WriteVLD2], (instregex "LD4i(8|16|32|64)$")>; // load single 4-el structure to one lane of 4 regs.
333333
def : InstRW<[CortexA510WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // load single 4-el structure, replicate to all lanes of 4 regs.
334334
def : InstRW<[CortexA510WriteVLD4], (instregex "LD4Fourv(8b|4h|2s|1d)$")>; // load multiple 4-el structures to 4 regs.
335335
def : InstRW<[CortexA510WriteVLD8], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
336336

337-
def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
338-
def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
339-
def : InstRW<[CortexA510WriteVLD4, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d)_POST$")>;
340-
def : InstRW<[CortexA510WriteVLD8, WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
337+
def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD4i(8|16|32|64)_POST$")>;
338+
def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
339+
def : InstRW<[WriteAdr, CortexA510WriteVLD4], (instregex "LD4Fourv(8b|4h|2s|1d)_POST$")>;
340+
def : InstRW<[WriteAdr, CortexA510WriteVLD8], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
341341

342342
//---
343343
// Vector Stores
@@ -347,28 +347,28 @@ def : InstRW<[CortexA510WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d
347347
def : InstRW<[CortexA510WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
348348
def : InstRW<[CortexA510WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
349349
def : InstRW<[CortexA510WriteVST4], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
350-
def : InstRW<[CortexA510WriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
351-
def : InstRW<[CortexA510WriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
352-
def : InstRW<[CortexA510WriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
353-
def : InstRW<[CortexA510WriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
354-
def : InstRW<[CortexA510WriteVST4, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
350+
def : InstRW<[WriteAdr, CortexA510WriteVST1], (instregex "ST1i(8|16|32|64)_POST$")>;
351+
def : InstRW<[WriteAdr, CortexA510WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
352+
def : InstRW<[WriteAdr, CortexA510WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
353+
def : InstRW<[WriteAdr, CortexA510WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
354+
def : InstRW<[WriteAdr, CortexA510WriteVST4], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
355355

356356
def : InstRW<[CortexA510WriteVST2], (instregex "ST2i(8|16|32|64)$")>;
357357
def : InstRW<[CortexA510WriteVST2], (instregex "ST2Twov(8b|4h|2s)$")>;
358358
def : InstRW<[CortexA510WriteVST4], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
359-
def : InstRW<[CortexA510WriteVST2, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
360-
def : InstRW<[CortexA510WriteVST2, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
361-
def : InstRW<[CortexA510WriteVST4, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
359+
def : InstRW<[WriteAdr, CortexA510WriteVST2], (instregex "ST2i(8|16|32|64)_POST$")>;
360+
def : InstRW<[WriteAdr, CortexA510WriteVST2], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
361+
def : InstRW<[WriteAdr, CortexA510WriteVST4], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
362362

363363
def : InstRW<[CortexA510WriteVST2], (instregex "ST3i(8|16|32|64)$")>;
364364
def : InstRW<[CortexA510WriteVST4], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
365-
def : InstRW<[CortexA510WriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
366-
def : InstRW<[CortexA510WriteVST4, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|2d|16b|8h|4s|4d)_POST$")>;
365+
def : InstRW<[WriteAdr, CortexA510WriteVST2], (instregex "ST3i(8|16|32|64)_POST$")>;
366+
def : InstRW<[WriteAdr, CortexA510WriteVST4], (instregex "ST3Threev(8b|4h|2s|1d|2d|16b|8h|4s|4d)_POST$")>;
367367

368368
def : InstRW<[CortexA510WriteVST2], (instregex "ST4i(8|16|32|64)$")>;
369369
def : InstRW<[CortexA510WriteVST4], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
370-
def : InstRW<[CortexA510WriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
371-
def : InstRW<[CortexA510WriteVST4, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
370+
def : InstRW<[WriteAdr, CortexA510WriteVST2], (instregex "ST4i(8|16|32|64)_POST$")>;
371+
def : InstRW<[WriteAdr, CortexA510WriteVST4], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
372372

373373
//---
374374
// Floating Point Conversions, MAC, DIV, SQRT

llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,10 @@ define void @f_undef_1(<8 x i64> %a, ptr %dst) {
4747
; CHECK-LABEL: f_undef_1:
4848
; CHECK: // %bb.0: // %BB
4949
; CHECK-NEXT: mov v16.16b, v0.16b
50-
; CHECK-NEXT: mov x8, x0
5150
; CHECK-NEXT: mov v5.16b, v2.16b
5251
; CHECK-NEXT: // kill: def $q1 killed $q1 def $q1_q2
5352
; CHECK-NEXT: // kill: def $q3 killed $q3 def $q3_q4
53+
; CHECK-NEXT: mov x8, x0
5454
; CHECK-NEXT: mov v2.16b, v1.16b
5555
; CHECK-NEXT: mov v4.16b, v3.16b
5656
; CHECK-NEXT: mov v17.16b, v16.16b

llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll

+9-9
Original file line numberDiff line numberDiff line change
@@ -14320,8 +14320,8 @@ define <8 x i8> @test_v8i8_post_imm_ld1lane(ptr %bar, ptr %ptr, <8 x i8> %A) {
1432014320
; CHECK: ; %bb.0:
1432114321
; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0
1432214322
; CHECK-NEXT: ld1.b { v0 }[1], [x0], #1
14323-
; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0
1432414323
; CHECK-NEXT: str x0, [x1]
14324+
; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0
1432514325
; CHECK-NEXT: ret
1432614326
;
1432714327
; CHECK-GISEL-LABEL: test_v8i8_post_imm_ld1lane:
@@ -14345,8 +14345,8 @@ define <8 x i8> @test_v8i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <8 x i
1434514345
; CHECK: ; %bb.0:
1434614346
; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0
1434714347
; CHECK-NEXT: ld1.b { v0 }[1], [x0], x2
14348-
; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0
1434914348
; CHECK-NEXT: str x0, [x1]
14349+
; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0
1435014350
; CHECK-NEXT: ret
1435114351
;
1435214352
; CHECK-GISEL-LABEL: test_v8i8_post_reg_ld1lane:
@@ -14413,8 +14413,8 @@ define <4 x i16> @test_v4i16_post_imm_ld1lane(ptr %bar, ptr %ptr, <4 x i16> %A)
1441314413
; CHECK: ; %bb.0:
1441414414
; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0
1441514415
; CHECK-NEXT: ld1.h { v0 }[1], [x0], #2
14416-
; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0
1441714416
; CHECK-NEXT: str x0, [x1]
14417+
; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0
1441814418
; CHECK-NEXT: ret
1441914419
;
1442014420
; CHECK-GISEL-LABEL: test_v4i16_post_imm_ld1lane:
@@ -14439,8 +14439,8 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4 x
1443914439
; CHECK-NEXT: lsl x8, x2, #1
1444014440
; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0
1444114441
; CHECK-NEXT: ld1.h { v0 }[1], [x0], x8
14442-
; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0
1444314442
; CHECK-NEXT: str x0, [x1]
14443+
; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0
1444414444
; CHECK-NEXT: ret
1444514445
;
1444614446
; CHECK-GISEL-LABEL: test_v4i16_post_reg_ld1lane:
@@ -14507,8 +14507,8 @@ define <2 x i32> @test_v2i32_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x i32> %A)
1450714507
; CHECK: ; %bb.0:
1450814508
; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0
1450914509
; CHECK-NEXT: ld1.s { v0 }[1], [x0], #4
14510-
; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0
1451114510
; CHECK-NEXT: str x0, [x1]
14511+
; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0
1451214512
; CHECK-NEXT: ret
1451314513
;
1451414514
; CHECK-GISEL-LABEL: test_v2i32_post_imm_ld1lane:
@@ -14533,8 +14533,8 @@ define <2 x i32> @test_v2i32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 x
1453314533
; CHECK-NEXT: lsl x8, x2, #2
1453414534
; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0
1453514535
; CHECK-NEXT: ld1.s { v0 }[1], [x0], x8
14536-
; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0
1453714536
; CHECK-NEXT: str x0, [x1]
14537+
; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0
1453814538
; CHECK-NEXT: ret
1453914539
;
1454014540
; CHECK-GISEL-LABEL: test_v2i32_post_reg_ld1lane:
@@ -14644,8 +14644,8 @@ define <2 x float> @test_v2f32_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x float>
1464414644
; CHECK: ; %bb.0:
1464514645
; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0
1464614646
; CHECK-NEXT: ld1.s { v0 }[1], [x0], #4
14647-
; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0
1464814647
; CHECK-NEXT: str x0, [x1]
14648+
; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0
1464914649
; CHECK-NEXT: ret
1465014650
;
1465114651
; CHECK-GISEL-LABEL: test_v2f32_post_imm_ld1lane:
@@ -14670,8 +14670,8 @@ define <2 x float> @test_v2f32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2
1467014670
; CHECK-NEXT: lsl x8, x2, #2
1467114671
; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0
1467214672
; CHECK-NEXT: ld1.s { v0 }[1], [x0], x8
14673-
; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0
1467414673
; CHECK-NEXT: str x0, [x1]
14674+
; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0
1467514675
; CHECK-NEXT: ret
1467614676
;
1467714677
; CHECK-GISEL-LABEL: test_v2f32_post_reg_ld1lane:
@@ -14776,9 +14776,9 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane_forced_narrow(ptr %bar, ptr %ptr,
1477614776
; CHECK-NEXT: lsl x8, x2, #1
1477714777
; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0
1477814778
; CHECK-NEXT: ld1.h { v0 }[1], [x0], x8
14779-
; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0
1478014779
; CHECK-NEXT: str x0, [x1]
1478114780
; CHECK-NEXT: ldr d1, [x3]
14781+
; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0
1478214782
; CHECK-NEXT: cnt.8b v1, v1
1478314783
; CHECK-NEXT: uaddlp.4h v1, v1
1478414784
; CHECK-NEXT: uaddlp.2s v1, v1

0 commit comments

Comments
 (0)