diff --git a/llvm/lib/Target/AArch64/AArch64SchedA510.td b/llvm/lib/Target/AArch64/AArch64SchedA510.td index fab2cda878075..1afbc5d9102ca 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedA510.td +++ b/llvm/lib/Target/AArch64/AArch64SchedA510.td @@ -295,16 +295,16 @@ def : InstRW<[CortexA510WriteVLD2], (instregex "LD1Threev(16b|8h|4s|2d)$")>; def : InstRW<[CortexA510WriteVLD2], (instregex "LD1Fourv(8b|4h|2s|1d)$")>; def : InstRW<[CortexA510WriteVLD2], (instregex "LD1Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[CortexA510WriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>; -def : InstRW<[CortexA510WriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[CortexA510WriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>; -def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>; -def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>; -def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>; -def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>; -def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>; -def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>; -def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, CortexA510WriteVLD1], (instregex "LD1i(8|16|32|64)_POST$")>; +def : InstRW<[WriteAdr, CortexA510WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, CortexA510WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>; +def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>; // 2-element structures def : InstRW<[CortexA510WriteVLD2], (instregex "LD2i(8|16|32|64)$")>; @@ -312,10 +312,10 @@ def : InstRW<[CortexA510WriteVLD2], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$ def : InstRW<[CortexA510WriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>; def : InstRW<[CortexA510WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>; -def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>; -def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>; -def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>; -def : InstRW<[CortexA510WriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>; +def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD2i(8|16|32|64)(_POST)?$")>; +def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>; +def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>; +def : InstRW<[WriteAdr, CortexA510WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>; // 3-element structures def : InstRW<[CortexA510WriteVLD2], (instregex "LD3i(8|16|32|64)$")>; @@ -323,10 +323,10 @@ def : InstRW<[CortexA510WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$ def : InstRW<[CortexA510WriteVLD3], (instregex "LD3Threev(8b|4h|2s|1d)$")>; def : InstRW<[CortexA510WriteVLD6], (instregex "LD3Threev(16b|8h|4s|2d)$")>; -def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>; -def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[CortexA510WriteVLD3, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d)_POST$")>; -def : InstRW<[CortexA510WriteVLD6, WriteAdr], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD3i(8|16|32|64)_POST$")>; +def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, CortexA510WriteVLD3], (instregex "LD3Threev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[WriteAdr, CortexA510WriteVLD6], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>; // 4-element structures def : InstRW<[CortexA510WriteVLD2], (instregex "LD4i(8|16|32|64)$")>; // load single 4-el structure to one lane of 4 regs. @@ -334,10 +334,10 @@ def : InstRW<[CortexA510WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$ def : InstRW<[CortexA510WriteVLD4], (instregex "LD4Fourv(8b|4h|2s|1d)$")>; // load multiple 4-el structures to 4 regs. def : InstRW<[CortexA510WriteVLD8], (instregex "LD4Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>; -def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[CortexA510WriteVLD4, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d)_POST$")>; -def : InstRW<[CortexA510WriteVLD8, WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD4i(8|16|32|64)_POST$")>; +def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, CortexA510WriteVLD4], (instregex "LD4Fourv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[WriteAdr, CortexA510WriteVLD8], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>; //--- // Vector Stores @@ -347,28 +347,28 @@ def : InstRW<[CortexA510WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d def : InstRW<[CortexA510WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; def : InstRW<[CortexA510WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; def : InstRW<[CortexA510WriteVST4], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[CortexA510WriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>; -def : InstRW<[CortexA510WriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[CortexA510WriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[CortexA510WriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[CortexA510WriteVST4, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, CortexA510WriteVST1], (instregex "ST1i(8|16|32|64)_POST$")>; +def : InstRW<[WriteAdr, CortexA510WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, CortexA510WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, CortexA510WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, CortexA510WriteVST4], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; def : InstRW<[CortexA510WriteVST2], (instregex "ST2i(8|16|32|64)$")>; def : InstRW<[CortexA510WriteVST2], (instregex "ST2Twov(8b|4h|2s)$")>; def : InstRW<[CortexA510WriteVST4], (instregex "ST2Twov(16b|8h|4s|2d)$")>; -def : InstRW<[CortexA510WriteVST2, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>; -def : InstRW<[CortexA510WriteVST2, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; -def : InstRW<[CortexA510WriteVST4, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, CortexA510WriteVST2], (instregex "ST2i(8|16|32|64)_POST$")>; +def : InstRW<[WriteAdr, CortexA510WriteVST2], (instregex "ST2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[WriteAdr, CortexA510WriteVST4], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; def : InstRW<[CortexA510WriteVST2], (instregex "ST3i(8|16|32|64)$")>; def : InstRW<[CortexA510WriteVST4], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[CortexA510WriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>; -def : InstRW<[CortexA510WriteVST4, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|2d|16b|8h|4s|4d)_POST$")>; +def : InstRW<[WriteAdr, CortexA510WriteVST2], (instregex "ST3i(8|16|32|64)_POST$")>; +def : InstRW<[WriteAdr, CortexA510WriteVST4], (instregex "ST3Threev(8b|4h|2s|1d|2d|16b|8h|4s|4d)_POST$")>; def : InstRW<[CortexA510WriteVST2], (instregex "ST4i(8|16|32|64)$")>; def : InstRW<[CortexA510WriteVST4], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[CortexA510WriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>; -def : InstRW<[CortexA510WriteVST4, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, CortexA510WriteVST2], (instregex "ST4i(8|16|32|64)_POST$")>; +def : InstRW<[WriteAdr, CortexA510WriteVST4], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; //--- // Floating Point Conversions, MAC, DIV, SQRT diff --git a/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll b/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll index cbda7b027587d..07fbe5d7310f6 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll @@ -47,10 +47,10 @@ define void @f_undef_1(<8 x i64> %a, ptr %dst) { ; CHECK-LABEL: f_undef_1: ; CHECK: // %bb.0: // %BB ; CHECK-NEXT: mov v16.16b, v0.16b -; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: mov v5.16b, v2.16b ; CHECK-NEXT: // kill: def $q1 killed $q1 def $q1_q2 ; CHECK-NEXT: // kill: def $q3 killed $q3 def $q3_q4 +; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: mov v2.16b, v1.16b ; CHECK-NEXT: mov v4.16b, v3.16b ; CHECK-NEXT: mov v17.16b, v16.16b diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll index 6657b19d24929..7d73e1c6c1d7f 100644 --- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll @@ -14320,8 +14320,8 @@ define <8 x i8> @test_v8i8_post_imm_ld1lane(ptr %bar, ptr %ptr, <8 x i8> %A) { ; CHECK: ; %bb.0: ; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: ld1.b { v0 }[1], [x0], #1 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_imm_ld1lane: @@ -14345,8 +14345,8 @@ define <8 x i8> @test_v8i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <8 x i ; CHECK: ; %bb.0: ; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: ld1.b { v0 }[1], [x0], x2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v8i8_post_reg_ld1lane: @@ -14413,8 +14413,8 @@ define <4 x i16> @test_v4i16_post_imm_ld1lane(ptr %bar, ptr %ptr, <4 x i16> %A) ; CHECK: ; %bb.0: ; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: ld1.h { v0 }[1], [x0], #2 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_imm_ld1lane: @@ -14439,8 +14439,8 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4 x ; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: ld1.h { v0 }[1], [x0], x8 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v4i16_post_reg_ld1lane: @@ -14507,8 +14507,8 @@ define <2 x i32> @test_v2i32_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x i32> %A) ; CHECK: ; %bb.0: ; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: ld1.s { v0 }[1], [x0], #4 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_imm_ld1lane: @@ -14533,8 +14533,8 @@ define <2 x i32> @test_v2i32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 x ; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: ld1.s { v0 }[1], [x0], x8 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2i32_post_reg_ld1lane: @@ -14644,8 +14644,8 @@ define <2 x float> @test_v2f32_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x float> ; CHECK: ; %bb.0: ; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: ld1.s { v0 }[1], [x0], #4 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_imm_ld1lane: @@ -14670,8 +14670,8 @@ define <2 x float> @test_v2f32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 ; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: ld1.s { v0 }[1], [x0], x8 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret ; ; CHECK-GISEL-LABEL: test_v2f32_post_reg_ld1lane: @@ -14776,9 +14776,9 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane_forced_narrow(ptr %bar, ptr %ptr, ; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: ld1.h { v0 }[1], [x0], x8 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: str x0, [x1] ; CHECK-NEXT: ldr d1, [x3] +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: cnt.8b v1, v1 ; CHECK-NEXT: uaddlp.4h v1, v1 ; CHECK-NEXT: uaddlp.2s v1, v1 diff --git a/llvm/test/CodeGen/AArch64/extbinopload.ll b/llvm/test/CodeGen/AArch64/extbinopload.ll index 99f573795489a..849fc7aa00a8e 100644 --- a/llvm/test/CodeGen/AArch64/extbinopload.ll +++ b/llvm/test/CodeGen/AArch64/extbinopload.ll @@ -365,15 +365,15 @@ define <12 x i32> @load_bv_3xv4i8_i32(ptr %p, ptr %q, ptr %r) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldp s0, s1, [x0] ; CHECK-NEXT: ld1 { v0.s }[1], [x1], #4 +; CHECK-NEXT: ld1 { v1.s }[1], [x1] ; CHECK-NEXT: ldp s3, s2, [x2] +; CHECK-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ld1 { v1.s }[1], [x1] ; CHECK-NEXT: ushll v2.8h, v2.8b, #0 ; CHECK-NEXT: ushll v3.8h, v3.8b, #0 -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: ushll v2.4s, v2.4h, #3 ; CHECK-NEXT: ushll2 v4.4s, v1.8h, #3 ; CHECK-NEXT: ushll v1.4s, v1.4h, #3 +; CHECK-NEXT: ushll v2.4s, v2.4h, #3 ; CHECK-NEXT: uaddw v2.4s, v2.4s, v3.4h ; CHECK-NEXT: uaddw2 v3.4s, v4.4s, v0.8h ; CHECK-NEXT: uaddw v0.4s, v1.4s, v0.4h @@ -407,10 +407,10 @@ define <16 x i16> @load_bv_4xv4i8_i32(ptr %p, ptr %q, ptr %r, ptr %s) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldp s0, s1, [x0] ; CHECK-NEXT: ld1 { v0.s }[1], [x1], #4 -; CHECK-NEXT: ldp s2, s3, [x2] ; CHECK-NEXT: ld1 { v1.s }[1], [x1] -; CHECK-NEXT: ld1 { v2.s }[1], [x3], #4 +; CHECK-NEXT: ldp s2, s3, [x2] ; CHECK-NEXT: uaddl v0.8h, v0.8b, v1.8b +; CHECK-NEXT: ld1 { v2.s }[1], [x3], #4 ; CHECK-NEXT: ld1 { v3.s }[1], [x3] ; CHECK-NEXT: uaddl v1.8h, v2.8b, v3.8b ; CHECK-NEXT: ret @@ -444,10 +444,10 @@ define <8 x i32> @double_bv_2xv4i8_i32(ptr %p, ptr %q, ptr %r, ptr %s) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldp s0, s1, [x0] ; CHECK-NEXT: ld1 { v0.s }[1], [x1], #4 -; CHECK-NEXT: ldp s2, s3, [x2] ; CHECK-NEXT: ld1 { v1.s }[1], [x1] -; CHECK-NEXT: ld1 { v2.s }[1], [x3], #4 +; CHECK-NEXT: ldp s2, s3, [x2] ; CHECK-NEXT: usubl v0.8h, v0.8b, v1.8b +; CHECK-NEXT: ld1 { v2.s }[1], [x3], #4 ; CHECK-NEXT: ld1 { v3.s }[1], [x3] ; CHECK-NEXT: usubl v2.8h, v2.8b, v3.8b ; CHECK-NEXT: shll v3.4s, v2.4h, #16 @@ -489,18 +489,18 @@ define <16 x i32> @double_bv_4xv4i8_i32(ptr %p, ptr %q, ptr %r, ptr %s, ptr %t, ; CHECK: // %bb.0: ; CHECK-NEXT: ldp s0, s1, [x0] ; CHECK-NEXT: ld1 { v0.s }[1], [x1], #4 -; CHECK-NEXT: ldp s2, s3, [x2] ; CHECK-NEXT: ld1 { v1.s }[1], [x1] -; CHECK-NEXT: ld1 { v2.s }[1], [x3], #4 -; CHECK-NEXT: ldp s4, s5, [x4] +; CHECK-NEXT: ldp s2, s3, [x2] ; CHECK-NEXT: usubl v1.8h, v0.8b, v1.8b +; CHECK-NEXT: ld1 { v2.s }[1], [x3], #4 ; CHECK-NEXT: ld1 { v3.s }[1], [x3] -; CHECK-NEXT: ld1 { v4.s }[1], [x5], #4 -; CHECK-NEXT: ldp s6, s7, [x6] +; CHECK-NEXT: ldp s4, s5, [x4] ; CHECK-NEXT: usubl v2.8h, v2.8b, v3.8b +; CHECK-NEXT: ld1 { v4.s }[1], [x5], #4 ; CHECK-NEXT: ld1 { v5.s }[1], [x5] -; CHECK-NEXT: ld1 { v6.s }[1], [x7], #4 +; CHECK-NEXT: ldp s6, s7, [x6] ; CHECK-NEXT: usubl v4.8h, v4.8b, v5.8b +; CHECK-NEXT: ld1 { v6.s }[1], [x7], #4 ; CHECK-NEXT: ld1 { v7.s }[1], [x7] ; CHECK-NEXT: usubl v5.8h, v6.8b, v7.8b ; CHECK-NEXT: shll v0.4s, v4.4h, #16 @@ -647,7 +647,7 @@ define <16 x i32> @extrause_load(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr s1, [x0] ; CHECK-NEXT: add x8, x3, #8 -; CHECK-NEXT: add x11, x1, #12 +; CHECK-NEXT: add x11, x3, #12 ; CHECK-NEXT: str s1, [x4] ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-NEXT: ldp s0, s5, [x2] @@ -664,16 +664,16 @@ define <16 x i32> @extrause_load(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) { ; CHECK-NEXT: add x9, x1, #4 ; CHECK-NEXT: uzp1 v1.8b, v1.8b, v2.8b ; CHECK-NEXT: mov v0.b[11], w10 -; CHECK-NEXT: add x10, x3, #12 +; CHECK-NEXT: add x10, x1, #12 ; CHECK-NEXT: ld1 { v0.s }[3], [x3], #4 ; CHECK-NEXT: ldr s4, [x0, #12] ; CHECK-NEXT: ldp s3, s16, [x0, #4] -; CHECK-NEXT: ldp s6, s7, [x2, #8] -; CHECK-NEXT: ld1 { v4.s }[1], [x11] ; CHECK-NEXT: ld1 { v5.s }[1], [x3] +; CHECK-NEXT: ldp s6, s7, [x2, #8] +; CHECK-NEXT: ld1 { v4.s }[1], [x10] ; CHECK-NEXT: ld1 { v3.s }[1], [x9] ; CHECK-NEXT: ld1 { v6.s }[1], [x8] -; CHECK-NEXT: ld1 { v7.s }[1], [x10] +; CHECK-NEXT: ld1 { v7.s }[1], [x11] ; CHECK-NEXT: add x8, x1, #8 ; CHECK-NEXT: ld1 { v16.s }[1], [x8] ; CHECK-NEXT: uaddl v2.8h, v3.8b, v4.8b @@ -757,39 +757,39 @@ define <16 x i32> @extrause_load(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) { define <16 x i32> @extrause_shuffle(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) { ; CHECK-LABEL: extrause_shuffle: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp s2, s7, [x0, #8] -; CHECK-NEXT: add x8, x3, #8 -; CHECK-NEXT: ldr s18, [x1, #12] -; CHECK-NEXT: ldp s0, s1, [x2] -; CHECK-NEXT: ldp s3, s16, [x0] -; CHECK-NEXT: add x9, x1, #8 -; CHECK-NEXT: mov v4.16b, v7.16b -; CHECK-NEXT: ldp s6, s17, [x2, #8] +; CHECK-NEXT: ldp s0, s1, [x0, #8] +; CHECK-NEXT: add x8, x1, #8 +; CHECK-NEXT: ldr s6, [x1, #12] +; CHECK-NEXT: ldp s17, s18, [x2, #8] +; CHECK-NEXT: ldp s2, s3, [x2] +; CHECK-NEXT: add x9, x3, #8 +; CHECK-NEXT: mov v4.16b, v1.16b +; CHECK-NEXT: ldp s7, s16, [x0] ; CHECK-NEXT: ldr s5, [x3, #12] -; CHECK-NEXT: mov v7.s[1], v18.s[0] -; CHECK-NEXT: ld1 { v0.s }[1], [x3], #4 -; CHECK-NEXT: mov v4.s[1], v18.s[0] -; CHECK-NEXT: ld1 { v3.s }[1], [x1], #4 -; CHECK-NEXT: ld1 { v2.s }[1], [x9] -; CHECK-NEXT: ld1 { v6.s }[1], [x8] -; CHECK-NEXT: ld1 { v1.s }[1], [x3] +; CHECK-NEXT: mov v1.s[1], v6.s[0] +; CHECK-NEXT: ld1 { v2.s }[1], [x3], #4 +; CHECK-NEXT: mov v4.s[1], v6.s[0] +; CHECK-NEXT: ld1 { v7.s }[1], [x1], #4 ; CHECK-NEXT: ld1 { v16.s }[1], [x1] -; CHECK-NEXT: mov v4.s[2], v17.s[0] -; CHECK-NEXT: mov v17.s[1], v5.s[0] -; CHECK-NEXT: uaddl v2.8h, v3.8b, v2.8b -; CHECK-NEXT: uaddl v6.8h, v0.8b, v6.8b -; CHECK-NEXT: uaddl v7.8h, v16.8b, v7.8b -; CHECK-NEXT: uaddl v1.8h, v1.8b, v17.8b +; CHECK-NEXT: ld1 { v3.s }[1], [x3] +; CHECK-NEXT: ld1 { v0.s }[1], [x8] +; CHECK-NEXT: ld1 { v17.s }[1], [x9] +; CHECK-NEXT: mov v4.s[2], v18.s[0] +; CHECK-NEXT: mov v18.s[1], v5.s[0] +; CHECK-NEXT: uaddl v1.8h, v16.8b, v1.8b +; CHECK-NEXT: uaddl v6.8h, v7.8b, v0.8b +; CHECK-NEXT: uaddl v2.8h, v2.8b, v17.8b +; CHECK-NEXT: uaddl v3.8h, v3.8b, v18.8b +; CHECK-NEXT: ushll v0.4s, v1.4h, #3 +; CHECK-NEXT: ushll2 v1.4s, v1.8h, #3 ; CHECK-NEXT: mov v4.s[3], v5.s[0] -; CHECK-NEXT: ushll v0.4s, v7.4h, #3 -; CHECK-NEXT: ushll v16.4s, v1.4h, #3 -; CHECK-NEXT: ushll2 v3.4s, v1.8h, #3 -; CHECK-NEXT: ushll2 v1.4s, v7.8h, #3 -; CHECK-NEXT: uaddw v0.4s, v0.4s, v2.4h +; CHECK-NEXT: uaddw v0.4s, v0.4s, v6.4h +; CHECK-NEXT: uaddw2 v1.4s, v1.4s, v6.8h +; CHECK-NEXT: ushll v7.4s, v3.4h, #3 +; CHECK-NEXT: ushll2 v3.4s, v3.8h, #3 ; CHECK-NEXT: str q4, [x4] -; CHECK-NEXT: uaddw2 v1.4s, v1.4s, v2.8h -; CHECK-NEXT: uaddw2 v3.4s, v3.4s, v6.8h -; CHECK-NEXT: uaddw v2.4s, v16.4s, v6.4h +; CHECK-NEXT: uaddw2 v3.4s, v3.4s, v2.8h +; CHECK-NEXT: uaddw v2.4s, v7.4s, v2.4h ; CHECK-NEXT: ret %lp1 = load <4 x i8>, ptr %p %p2 = getelementptr i8, ptr %p, i32 4 @@ -859,32 +859,32 @@ define <16 x i32> @extrause_ext(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) { ; CHECK-LABEL: extrause_ext: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp s1, s2, [x2] -; CHECK-NEXT: add x8, x3, #8 -; CHECK-NEXT: ldp s3, s5, [x0] -; CHECK-NEXT: add x9, x1, #8 ; CHECK-NEXT: add x10, x3, #12 +; CHECK-NEXT: ldp s3, s5, [x0] +; CHECK-NEXT: add x11, x1, #12 ; CHECK-NEXT: ldp s6, s0, [x2, #8] +; CHECK-NEXT: add x8, x3, #8 ; CHECK-NEXT: ldp s7, s4, [x0, #8] -; CHECK-NEXT: add x11, x1, #12 +; CHECK-NEXT: add x9, x1, #8 ; CHECK-NEXT: ld1 { v1.s }[1], [x3], #4 ; CHECK-NEXT: ld1 { v3.s }[1], [x1], #4 -; CHECK-NEXT: ld1 { v0.s }[1], [x10] +; CHECK-NEXT: ld1 { v5.s }[1], [x1] ; CHECK-NEXT: ld1 { v4.s }[1], [x11] +; CHECK-NEXT: ld1 { v2.s }[1], [x3] +; CHECK-NEXT: ld1 { v0.s }[1], [x10] ; CHECK-NEXT: ld1 { v7.s }[1], [x9] ; CHECK-NEXT: ld1 { v6.s }[1], [x8] -; CHECK-NEXT: ld1 { v2.s }[1], [x3] -; CHECK-NEXT: ld1 { v5.s }[1], [x1] +; CHECK-NEXT: uaddl v5.8h, v5.8b, v4.8b +; CHECK-NEXT: uaddl v2.8h, v2.8b, v0.8b ; CHECK-NEXT: ushll v16.8h, v0.8b, #0 ; CHECK-NEXT: uaddl v3.8h, v3.8b, v7.8b ; CHECK-NEXT: uaddl v6.8h, v1.8b, v6.8b -; CHECK-NEXT: uaddl v2.8h, v2.8b, v0.8b -; CHECK-NEXT: uaddl v5.8h, v5.8b, v4.8b ; CHECK-NEXT: ushll v4.8h, v4.8b, #0 +; CHECK-NEXT: ushll v1.4s, v5.4h, #3 ; CHECK-NEXT: ushll v7.4s, v2.4h, #3 ; CHECK-NEXT: ushll2 v2.4s, v2.8h, #3 -; CHECK-NEXT: stp q4, q16, [x4] -; CHECK-NEXT: ushll v1.4s, v5.4h, #3 ; CHECK-NEXT: ushll2 v5.4s, v5.8h, #3 +; CHECK-NEXT: stp q4, q16, [x4] ; CHECK-NEXT: uaddw v0.4s, v1.4s, v3.4h ; CHECK-NEXT: uaddw2 v1.4s, v5.4s, v3.8h ; CHECK-NEXT: uaddw2 v3.4s, v2.4s, v6.8h @@ -958,33 +958,33 @@ define <16 x i32> @extrause_add(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) { ; CHECK-LABEL: extrause_add: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp s0, s1, [x0] -; CHECK-NEXT: add x8, x3, #8 -; CHECK-NEXT: ldp s2, s3, [x2] -; CHECK-NEXT: add x9, x1, #8 ; CHECK-NEXT: add x10, x3, #12 +; CHECK-NEXT: ldp s2, s3, [x2] +; CHECK-NEXT: add x11, x1, #12 ; CHECK-NEXT: ldp s4, s5, [x0, #8] +; CHECK-NEXT: add x8, x3, #8 ; CHECK-NEXT: ldp s6, s7, [x2, #8] -; CHECK-NEXT: add x11, x1, #12 +; CHECK-NEXT: add x9, x1, #8 ; CHECK-NEXT: ld1 { v2.s }[1], [x3], #4 ; CHECK-NEXT: ld1 { v0.s }[1], [x1], #4 +; CHECK-NEXT: ld1 { v1.s }[1], [x1] ; CHECK-NEXT: ld1 { v5.s }[1], [x11] +; CHECK-NEXT: ld1 { v3.s }[1], [x3] ; CHECK-NEXT: ld1 { v7.s }[1], [x10] ; CHECK-NEXT: ld1 { v4.s }[1], [x9] ; CHECK-NEXT: ld1 { v6.s }[1], [x8] -; CHECK-NEXT: ld1 { v3.s }[1], [x3] -; CHECK-NEXT: ld1 { v1.s }[1], [x1] -; CHECK-NEXT: uaddl v2.8h, v2.8b, v6.8b -; CHECK-NEXT: uaddl v7.8h, v3.8b, v7.8b ; CHECK-NEXT: uaddl v5.8h, v1.8b, v5.8b +; CHECK-NEXT: uaddl v7.8h, v3.8b, v7.8b ; CHECK-NEXT: uaddl v1.8h, v0.8b, v4.8b +; CHECK-NEXT: uaddl v2.8h, v2.8b, v6.8b +; CHECK-NEXT: ushll v0.4s, v5.4h, #3 ; CHECK-NEXT: ushll v4.4s, v7.4h, #3 ; CHECK-NEXT: ushll2 v3.4s, v7.8h, #3 -; CHECK-NEXT: ushll v0.4s, v5.4h, #3 ; CHECK-NEXT: ushll2 v6.4s, v5.8h, #3 ; CHECK-NEXT: stp q5, q7, [x4] +; CHECK-NEXT: uaddw v0.4s, v0.4s, v1.4h ; CHECK-NEXT: uaddw2 v3.4s, v3.4s, v2.8h ; CHECK-NEXT: uaddw v2.4s, v4.4s, v2.4h -; CHECK-NEXT: uaddw v0.4s, v0.4s, v1.4h ; CHECK-NEXT: uaddw2 v1.4s, v6.4s, v1.8h ; CHECK-NEXT: ret %lp1 = load <4 x i8>, ptr %p @@ -1055,25 +1055,25 @@ define <16 x i32> @extrause_ext2(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) { ; CHECK-LABEL: extrause_ext2: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp s0, s1, [x2] -; CHECK-NEXT: add x8, x3, #8 -; CHECK-NEXT: ldp s2, s3, [x0] -; CHECK-NEXT: add x9, x1, #8 ; CHECK-NEXT: add x10, x3, #12 +; CHECK-NEXT: ldp s2, s3, [x0] +; CHECK-NEXT: add x11, x1, #12 ; CHECK-NEXT: ldp s4, s5, [x2, #8] +; CHECK-NEXT: add x8, x3, #8 ; CHECK-NEXT: ldp s6, s7, [x0, #8] -; CHECK-NEXT: add x11, x1, #12 +; CHECK-NEXT: add x9, x1, #8 ; CHECK-NEXT: ld1 { v0.s }[1], [x3], #4 ; CHECK-NEXT: ld1 { v2.s }[1], [x1], #4 -; CHECK-NEXT: ld1 { v5.s }[1], [x10] +; CHECK-NEXT: ld1 { v3.s }[1], [x1] ; CHECK-NEXT: ld1 { v7.s }[1], [x11] +; CHECK-NEXT: ld1 { v1.s }[1], [x3] +; CHECK-NEXT: ld1 { v5.s }[1], [x10] ; CHECK-NEXT: ld1 { v6.s }[1], [x9] ; CHECK-NEXT: ld1 { v4.s }[1], [x8] -; CHECK-NEXT: ld1 { v1.s }[1], [x3] -; CHECK-NEXT: ld1 { v3.s }[1], [x1] -; CHECK-NEXT: uaddl v2.8h, v2.8b, v6.8b -; CHECK-NEXT: uaddl v4.8h, v0.8b, v4.8b ; CHECK-NEXT: uaddl v7.8h, v3.8b, v7.8b ; CHECK-NEXT: uaddl v3.8h, v1.8b, v5.8b +; CHECK-NEXT: uaddl v2.8h, v2.8b, v6.8b +; CHECK-NEXT: uaddl v4.8h, v0.8b, v4.8b ; CHECK-NEXT: ushll v0.4s, v7.4h, #3 ; CHECK-NEXT: ushll2 v1.4s, v7.8h, #3 ; CHECK-NEXT: ushll v5.4s, v3.4h, #3 @@ -1157,35 +1157,35 @@ define <16 x i32> @extrause_shl(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) { ; CHECK-LABEL: extrause_shl: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp s0, s1, [x0] -; CHECK-NEXT: add x8, x3, #8 -; CHECK-NEXT: ldp s2, s3, [x2] -; CHECK-NEXT: add x9, x1, #8 ; CHECK-NEXT: add x10, x3, #12 +; CHECK-NEXT: ldp s2, s3, [x2] +; CHECK-NEXT: add x11, x1, #12 ; CHECK-NEXT: ldp s4, s5, [x0, #8] +; CHECK-NEXT: add x8, x3, #8 ; CHECK-NEXT: ldp s6, s7, [x2, #8] -; CHECK-NEXT: add x11, x1, #12 +; CHECK-NEXT: add x9, x1, #8 ; CHECK-NEXT: ld1 { v2.s }[1], [x3], #4 ; CHECK-NEXT: ld1 { v0.s }[1], [x1], #4 +; CHECK-NEXT: ld1 { v1.s }[1], [x1] ; CHECK-NEXT: ld1 { v5.s }[1], [x11] +; CHECK-NEXT: ld1 { v3.s }[1], [x3] ; CHECK-NEXT: ld1 { v7.s }[1], [x10] ; CHECK-NEXT: ld1 { v4.s }[1], [x9] ; CHECK-NEXT: ld1 { v6.s }[1], [x8] -; CHECK-NEXT: ld1 { v3.s }[1], [x3] -; CHECK-NEXT: ld1 { v1.s }[1], [x1] +; CHECK-NEXT: uaddl v1.8h, v1.8b, v5.8b +; CHECK-NEXT: uaddl v3.8h, v3.8b, v7.8b ; CHECK-NEXT: uaddl v4.8h, v0.8b, v4.8b ; CHECK-NEXT: uaddl v2.8h, v2.8b, v6.8b -; CHECK-NEXT: uaddl v3.8h, v3.8b, v7.8b -; CHECK-NEXT: uaddl v1.8h, v1.8b, v5.8b -; CHECK-NEXT: ushll v6.4s, v3.4h, #3 -; CHECK-NEXT: ushll2 v16.4s, v3.8h, #3 ; CHECK-NEXT: ushll v5.4s, v1.4h, #3 +; CHECK-NEXT: ushll v6.4s, v3.4h, #3 ; CHECK-NEXT: ushll2 v7.4s, v1.8h, #3 -; CHECK-NEXT: uaddw2 v3.4s, v16.4s, v2.8h -; CHECK-NEXT: uaddw v2.4s, v6.4s, v2.4h -; CHECK-NEXT: stp q6, q16, [x4, #32] +; CHECK-NEXT: ushll2 v16.4s, v3.8h, #3 ; CHECK-NEXT: uaddw v0.4s, v5.4s, v4.4h ; CHECK-NEXT: uaddw2 v1.4s, v7.4s, v4.8h ; CHECK-NEXT: stp q5, q7, [x4] +; CHECK-NEXT: uaddw2 v3.4s, v16.4s, v2.8h +; CHECK-NEXT: uaddw v2.4s, v6.4s, v2.4h +; CHECK-NEXT: stp q6, q16, [x4, #32] ; CHECK-NEXT: ret %lp1 = load <4 x i8>, ptr %p %p2 = getelementptr i8, ptr %p, i32 4 diff --git a/llvm/test/CodeGen/AArch64/ld1postmul.ll b/llvm/test/CodeGen/AArch64/ld1postmul.ll index 1553aab9046ed..658010d09e5ba 100644 --- a/llvm/test/CodeGen/AArch64/ld1postmul.ll +++ b/llvm/test/CodeGen/AArch64/ld1postmul.ll @@ -81,8 +81,8 @@ define ptr @fmla_v4f16(ptr %p, ptr %ps, <4 x half> %t, <4 x half> %u) { ; CHECK-NOFP16: // %bb.0: ; CHECK-NOFP16-NEXT: ld1r { v2.4h }, [x0], #2 ; CHECK-NOFP16-NEXT: fcvtl v0.4s, v0.4h -; CHECK-NOFP16-NEXT: fcvtl v2.4s, v2.4h ; CHECK-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NOFP16-NEXT: fcvtl v2.4s, v2.4h ; CHECK-NOFP16-NEXT: fmul v0.4s, v2.4s, v0.4s ; CHECK-NOFP16-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NOFP16-NEXT: fcvtl v0.4s, v0.4h diff --git a/llvm/test/CodeGen/AArch64/machine-cse-profitable-check.ll b/llvm/test/CodeGen/AArch64/machine-cse-profitable-check.ll index c89d99328ceb3..d12240a9f4f32 100644 --- a/llvm/test/CodeGen/AArch64/machine-cse-profitable-check.ll +++ b/llvm/test/CodeGen/AArch64/machine-cse-profitable-check.ll @@ -17,9 +17,9 @@ define void @foo(ptr %buf, <8 x i16> %a) { ; CHECK-AGGRESSIVE-CSE: // %bb.0: // %entry ; CHECK-AGGRESSIVE-CSE-NEXT: // kill: def $q0 killed $q0 def $q0_q1 ; CHECK-AGGRESSIVE-CSE-NEXT: movi v1.2d, #0000000000000000 +; CHECK-AGGRESSIVE-CSE-NEXT: zip2 v2.8h, v0.8h, v1.8h ; CHECK-AGGRESSIVE-CSE-NEXT: st2 { v0.4h, v1.4h }, [x0], #16 -; CHECK-AGGRESSIVE-CSE-NEXT: zip2 v0.8h, v0.8h, v1.8h -; CHECK-AGGRESSIVE-CSE-NEXT: str q0, [x0] +; CHECK-AGGRESSIVE-CSE-NEXT: str q2, [x0] ; CHECK-AGGRESSIVE-CSE-NEXT: ret entry: %vzip.i = shufflevector <8 x i16> %a, <8 x i16> , <8 x i32> diff --git a/llvm/test/CodeGen/AArch64/tbl-loops.ll b/llvm/test/CodeGen/AArch64/tbl-loops.ll index be3df664c8876..b63d540fb8e02 100644 --- a/llvm/test/CodeGen/AArch64/tbl-loops.ll +++ b/llvm/test/CodeGen/AArch64/tbl-loops.ll @@ -199,13 +199,13 @@ define void @loop2(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: .LBB1_9: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld2 { v1.4s, v2.4s }, [x1], #32 +; CHECK-NEXT: subs x12, x12, #4 ; CHECK-NEXT: fcmgt v3.4s, v1.4s, v0.4s ; CHECK-NEXT: fcmgt v4.4s, v2.4s, v0.4s ; CHECK-NEXT: fcmlt v5.4s, v1.4s, #0.0 -; CHECK-NEXT: subs x12, x12, #4 ; CHECK-NEXT: bsl v3.16b, v0.16b, v1.16b -; CHECK-NEXT: fcmlt v1.4s, v2.4s, #0.0 ; CHECK-NEXT: bsl v4.16b, v0.16b, v2.16b +; CHECK-NEXT: fcmlt v1.4s, v2.4s, #0.0 ; CHECK-NEXT: bic v2.16b, v3.16b, v5.16b ; CHECK-NEXT: bic v1.16b, v4.16b, v1.16b ; CHECK-NEXT: fcvtzs v2.4s, v2.4s @@ -346,17 +346,17 @@ define void @loop3(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: .LBB2_4: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld3 { v2.4s, v3.4s, v4.4s }, [x1], #48 +; CHECK-NEXT: add x13, x0, #8 +; CHECK-NEXT: subs x12, x12, #4 ; CHECK-NEXT: fcmgt v5.4s, v2.4s, v0.4s ; CHECK-NEXT: fcmgt v6.4s, v3.4s, v0.4s ; CHECK-NEXT: fcmgt v7.4s, v4.4s, v0.4s ; CHECK-NEXT: fcmlt v16.4s, v2.4s, #0.0 ; CHECK-NEXT: fcmlt v17.4s, v3.4s, #0.0 -; CHECK-NEXT: add x13, x0, #8 -; CHECK-NEXT: subs x12, x12, #4 ; CHECK-NEXT: bsl v5.16b, v0.16b, v2.16b -; CHECK-NEXT: fcmlt v2.4s, v4.4s, #0.0 ; CHECK-NEXT: bsl v6.16b, v0.16b, v3.16b ; CHECK-NEXT: bsl v7.16b, v0.16b, v4.16b +; CHECK-NEXT: fcmlt v2.4s, v4.4s, #0.0 ; CHECK-NEXT: bic v3.16b, v5.16b, v16.16b ; CHECK-NEXT: bic v4.16b, v6.16b, v17.16b ; CHECK-NEXT: bic v2.16b, v7.16b, v2.16b @@ -599,19 +599,19 @@ define void @loop4(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: .LBB3_9: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld4 { v2.4s, v3.4s, v4.4s, v5.4s }, [x1], #64 +; CHECK-NEXT: subs x12, x12, #4 ; CHECK-NEXT: fcmgt v6.4s, v2.4s, v0.4s ; CHECK-NEXT: fcmgt v7.4s, v3.4s, v0.4s ; CHECK-NEXT: fcmgt v16.4s, v4.4s, v0.4s ; CHECK-NEXT: fcmgt v17.4s, v5.4s, v0.4s ; CHECK-NEXT: fcmlt v18.4s, v2.4s, #0.0 ; CHECK-NEXT: fcmlt v19.4s, v3.4s, #0.0 -; CHECK-NEXT: subs x12, x12, #4 ; CHECK-NEXT: fcmlt v20.4s, v4.4s, #0.0 ; CHECK-NEXT: bsl v6.16b, v0.16b, v2.16b -; CHECK-NEXT: fcmlt v2.4s, v5.4s, #0.0 ; CHECK-NEXT: bsl v7.16b, v0.16b, v3.16b ; CHECK-NEXT: bsl v16.16b, v0.16b, v4.16b ; CHECK-NEXT: bsl v17.16b, v0.16b, v5.16b +; CHECK-NEXT: fcmlt v2.4s, v5.4s, #0.0 ; CHECK-NEXT: bic v3.16b, v6.16b, v18.16b ; CHECK-NEXT: bic v4.16b, v7.16b, v19.16b ; CHECK-NEXT: bic v5.16b, v16.16b, v20.16b diff --git a/llvm/test/CodeGen/AArch64/vldn_shuffle.ll b/llvm/test/CodeGen/AArch64/vldn_shuffle.ll index 55d3943bbc7d8..5c73ba16972be 100644 --- a/llvm/test/CodeGen/AArch64/vldn_shuffle.ll +++ b/llvm/test/CodeGen/AArch64/vldn_shuffle.ll @@ -88,10 +88,10 @@ define void @vld4(ptr nocapture readonly %pSrc, ptr noalias nocapture %pDst, i32 ; CHECK-NEXT: .LBB2_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0], #64 -; CHECK-NEXT: fmul v4.4s, v0.4s, v0.4s ; CHECK-NEXT: add x9, x1, x8 ; CHECK-NEXT: add x8, x8, #32 ; CHECK-NEXT: cmp x8, #2, lsl #12 // =8192 +; CHECK-NEXT: fmul v4.4s, v0.4s, v0.4s ; CHECK-NEXT: fmla v4.4s, v1.4s, v1.4s ; CHECK-NEXT: fmul v5.4s, v2.4s, v2.4s ; CHECK-NEXT: fmla v5.4s, v3.4s, v3.4s @@ -257,10 +257,10 @@ define void @vld4_multiuse(ptr nocapture readonly %pSrc, ptr noalias nocapture % ; CHECK-NEXT: .LBB6_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0], #64 -; CHECK-NEXT: fmul v4.4s, v0.4s, v0.4s ; CHECK-NEXT: add x9, x1, x8 ; CHECK-NEXT: add x8, x8, #32 ; CHECK-NEXT: cmp x8, #2, lsl #12 // =8192 +; CHECK-NEXT: fmul v4.4s, v0.4s, v0.4s ; CHECK-NEXT: fmla v4.4s, v1.4s, v1.4s ; CHECK-NEXT: fmul v5.4s, v2.4s, v2.4s ; CHECK-NEXT: fmla v5.4s, v3.4s, v3.4s diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A510-writeback.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A510-writeback.s index f9b4509531d35..94439acafe370 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A510-writeback.s +++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A510-writeback.s @@ -1162,28 +1162,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2201 +# CHECK-NEXT: Total Cycles: 1701 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.68 -# CHECK-NEXT: IPC: 0.45 +# CHECK-NEXT: uOps Per Cycle: 0.88 +# CHECK-NEXT: IPC: 0.59 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012 +# CHECK-NEXT: 01234567 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeE . . . . . ld1 { v1.1d }, [x27], #8 -# CHECK-NEXT: [0,1] . DE. . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE . . . . ld1 { v1.2d }, [x27], #16 -# CHECK-NEXT: [0,3] . . DE. . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeE . . . ld1 { v1.2s }, [x27], #8 -# CHECK-NEXT: [0,5] . . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeE . . ld1 { v1.4h }, [x27], #8 -# CHECK-NEXT: [0,7] . . . .DE . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeE. ld1 { v1.4s }, [x27], #16 -# CHECK-NEXT: [0,9] . . . . .DE add x0, x27, #1 +# CHECK: [0,0] DeeE . . . . ld1 { v1.1d }, [x27], #8 +# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . ld1 { v1.2d }, [x27], #16 +# CHECK-NEXT: [0,3] . .DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeE . . ld1 { v1.2s }, [x27], #8 +# CHECK-NEXT: [0,5] . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . DeeE . . ld1 { v1.4h }, [x27], #8 +# CHECK-NEXT: [0,7] . . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . DeeeE ld1 { v1.4s }, [x27], #16 +# CHECK-NEXT: [0,9] . . . .DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1208,28 +1208,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2301 +# CHECK-NEXT: Total Cycles: 1801 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.65 -# CHECK-NEXT: IPC: 0.43 +# CHECK-NEXT: uOps Per Cycle: 0.83 +# CHECK-NEXT: IPC: 0.56 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123 +# CHECK-NEXT: 012345678 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeE . . . . . ld1 { v1.8b }, [x27], #8 -# CHECK-NEXT: [0,1] . DE. . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE . . . . ld1 { v1.8h }, [x27], #16 -# CHECK-NEXT: [0,3] . . DE. . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE . . . ld1 { v1.16b }, [x27], #16 -# CHECK-NEXT: [0,5] . . . DE. . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeE . . ld1 { v1.1d }, [x27], x28 -# CHECK-NEXT: [0,7] . . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeE. ld1 { v1.2d }, [x27], x28 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeE . . . . ld1 { v1.8b }, [x27], #8 +# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . ld1 { v1.8h }, [x27], #16 +# CHECK-NEXT: [0,3] . .DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . ld1 { v1.16b }, [x27], #16 +# CHECK-NEXT: [0,5] . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . .DeeE. . ld1 { v1.1d }, [x27], x28 +# CHECK-NEXT: [0,7] . . . DE. . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . DeeeE ld1 { v1.2d }, [x27], x28 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1254,28 +1254,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2201 +# CHECK-NEXT: Total Cycles: 1701 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.68 -# CHECK-NEXT: IPC: 0.45 +# CHECK-NEXT: uOps Per Cycle: 0.88 +# CHECK-NEXT: IPC: 0.59 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012 +# CHECK-NEXT: 01234567 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeE . . . . . ld1 { v1.2s }, [x27], x28 -# CHECK-NEXT: [0,1] . DE. . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeE . . . . ld1 { v1.4h }, [x27], x28 -# CHECK-NEXT: [0,3] . . DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE . . . ld1 { v1.4s }, [x27], x28 -# CHECK-NEXT: [0,5] . . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeE . . ld1 { v1.8b }, [x27], x28 -# CHECK-NEXT: [0,7] . . . .DE . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeE. ld1 { v1.8h }, [x27], x28 -# CHECK-NEXT: [0,9] . . . . .DE add x0, x27, #1 +# CHECK: [0,0] DeeE . . . . ld1 { v1.2s }, [x27], x28 +# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeE . . . ld1 { v1.4h }, [x27], x28 +# CHECK-NEXT: [0,3] . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . .DeeeE . . ld1 { v1.4s }, [x27], x28 +# CHECK-NEXT: [0,5] . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . DeeE . . ld1 { v1.8b }, [x27], x28 +# CHECK-NEXT: [0,7] . . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . DeeeE ld1 { v1.8h }, [x27], x28 +# CHECK-NEXT: [0,9] . . . .DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1300,28 +1300,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2501 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.60 -# CHECK-NEXT: IPC: 0.40 +# CHECK-NEXT: uOps Per Cycle: 0.75 +# CHECK-NEXT: IPC: 0.50 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012345 +# CHECK-NEXT: Index 0123456789 0 -# CHECK: [0,0] DeeeE. . . . . ld1 { v1.16b }, [x27], x28 -# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE. . . . ld1 { v1.1d, v2.1d }, [x27], #16 -# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE. . . ld1 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE. . ld1 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeE. ld1 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . ld1 { v1.16b }, [x27], x28 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . ld1 { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . ld1 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . ld1 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeE ld1 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1346,28 +1346,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2501 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.60 -# CHECK-NEXT: IPC: 0.40 +# CHECK-NEXT: uOps Per Cycle: 0.75 +# CHECK-NEXT: IPC: 0.50 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012345 +# CHECK-NEXT: Index 0123456789 0 -# CHECK: [0,0] DeeeE. . . . . ld1 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE. . . . ld1 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE. . . ld1 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE. . ld1 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeE. ld1 { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . ld1 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . ld1 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . ld1 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . ld1 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeE ld1 { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1392,28 +1392,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2501 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.60 -# CHECK-NEXT: IPC: 0.40 +# CHECK-NEXT: uOps Per Cycle: 0.75 +# CHECK-NEXT: IPC: 0.50 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012345 +# CHECK-NEXT: Index 0123456789 0 -# CHECK: [0,0] DeeeE. . . . . ld1 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE. . . . ld1 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE. . . ld1 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE. . ld1 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeE. ld1 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . ld1 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . ld1 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . ld1 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . ld1 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeE ld1 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1438,28 +1438,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2501 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.60 -# CHECK-NEXT: IPC: 0.40 +# CHECK-NEXT: uOps Per Cycle: 0.75 +# CHECK-NEXT: IPC: 0.50 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012345 +# CHECK-NEXT: Index 0123456789 0 -# CHECK: [0,0] DeeeE. . . . . ld1 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE. . . . ld1 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE. . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24 -# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE. . ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeE. ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . ld1 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . ld1 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeE ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1484,28 +1484,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2501 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.60 -# CHECK-NEXT: IPC: 0.40 +# CHECK-NEXT: uOps Per Cycle: 0.75 +# CHECK-NEXT: IPC: 0.50 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012345 +# CHECK-NEXT: Index 0123456789 0 -# CHECK: [0,0] DeeeE. . . . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE. . . . ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE. . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE. . ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeE. ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeE ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1530,28 +1530,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2501 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.60 -# CHECK-NEXT: IPC: 0.40 +# CHECK-NEXT: uOps Per Cycle: 0.75 +# CHECK-NEXT: IPC: 0.50 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012345 +# CHECK-NEXT: Index 0123456789 0 -# CHECK: [0,0] DeeeE. . . . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28 -# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE. . . . ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE. . . ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE. . ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeE. ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeE ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1576,28 +1576,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2501 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.60 -# CHECK-NEXT: IPC: 0.40 +# CHECK-NEXT: uOps Per Cycle: 0.75 +# CHECK-NEXT: IPC: 0.50 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012345 +# CHECK-NEXT: Index 0123456789 0 -# CHECK: [0,0] DeeeE. . . . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE. . . . ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE. . . ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE. . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeE. ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeE ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1622,28 +1622,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2501 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.60 -# CHECK-NEXT: IPC: 0.40 +# CHECK-NEXT: uOps Per Cycle: 0.75 +# CHECK-NEXT: IPC: 0.50 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012345 +# CHECK-NEXT: Index 0123456789 0 -# CHECK: [0,0] DeeeE. . . . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE. . . . ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE. . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE. . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeE. ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeE ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1668,28 +1668,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2501 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.60 -# CHECK-NEXT: IPC: 0.40 +# CHECK-NEXT: uOps Per Cycle: 0.75 +# CHECK-NEXT: IPC: 0.50 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012345 +# CHECK-NEXT: Index 0123456789 0 -# CHECK: [0,0] DeeeE. . . . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE. . . . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE. . . ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE. . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeE. ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeE ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1714,28 +1714,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2401 +# CHECK-NEXT: Total Cycles: 1901 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.62 -# CHECK-NEXT: IPC: 0.42 +# CHECK-NEXT: uOps Per Cycle: 0.79 +# CHECK-NEXT: IPC: 0.53 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 01234 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeE. . . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE. . . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE. . . ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE. . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeE. ld1 { v1.b }[0], [x27], #1 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeE ld1 { v1.b }[0], [x27], #1 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1760,28 +1760,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2001 +# CHECK-NEXT: Total Cycles: 1501 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.75 -# CHECK-NEXT: IPC: 0.50 +# CHECK-NEXT: uOps Per Cycle: 1.00 +# CHECK-NEXT: IPC: 0.67 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0 +# CHECK-NEXT: 012345 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeE . . . . ld1 { v1.b }[8], [x27], #1 -# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeE . . . ld1 { v1.b }[0], [x27], x28 -# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeE . . ld1 { v1.b }[8], [x27], x28 -# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeE . ld1 { v1.h }[0], [x27], #2 -# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . .DeeE. ld1 { v1.h }[4], [x27], #2 -# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeE . . . ld1 { v1.b }[8], [x27], #1 +# CHECK-NEXT: [0,1] . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeE . . ld1 { v1.b }[0], [x27], x28 +# CHECK-NEXT: [0,3] . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . .DeeE. . ld1 { v1.b }[8], [x27], x28 +# CHECK-NEXT: [0,5] . . DE. . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . DeeE . ld1 { v1.h }[0], [x27], #2 +# CHECK-NEXT: [0,7] . . .DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . DeeE ld1 { v1.h }[4], [x27], #2 +# CHECK-NEXT: [0,9] . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1806,28 +1806,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2001 +# CHECK-NEXT: Total Cycles: 1501 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.75 -# CHECK-NEXT: IPC: 0.50 +# CHECK-NEXT: uOps Per Cycle: 1.00 +# CHECK-NEXT: IPC: 0.67 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0 +# CHECK-NEXT: 012345 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeE . . . . ld1 { v1.h }[0], [x27], x28 -# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeE . . . ld1 { v1.h }[4], [x27], x28 -# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeE . . ld1 { v1.s }[0], [x27], #4 -# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeE . ld1 { v1.s }[0], [x27], x28 -# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . .DeeE. ld1 { v1.d }[0], [x27], #8 -# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeE . . . ld1 { v1.h }[0], [x27], x28 +# CHECK-NEXT: [0,1] . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeE . . ld1 { v1.h }[4], [x27], x28 +# CHECK-NEXT: [0,3] . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . .DeeE. . ld1 { v1.s }[0], [x27], #4 +# CHECK-NEXT: [0,5] . . DE. . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . DeeE . ld1 { v1.s }[0], [x27], x28 +# CHECK-NEXT: [0,7] . . .DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . DeeE ld1 { v1.d }[0], [x27], #8 +# CHECK-NEXT: [0,9] . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1852,28 +1852,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2001 +# CHECK-NEXT: Total Cycles: 1501 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.75 -# CHECK-NEXT: IPC: 0.50 +# CHECK-NEXT: uOps Per Cycle: 1.00 +# CHECK-NEXT: IPC: 0.67 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0 +# CHECK-NEXT: 012345 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeE . . . . ld1 { v1.d }[0], [x27], x28 -# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeE . . . ld1r { v1.1d }, [x27], #8 -# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeE . . ld1r { v1.2d }, [x27], #8 -# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeE . ld1r { v1.2s }, [x27], #4 -# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . .DeeE. ld1r { v1.4h }, [x27], #2 -# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeE . . . ld1 { v1.d }[0], [x27], x28 +# CHECK-NEXT: [0,1] . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeE . . ld1r { v1.1d }, [x27], #8 +# CHECK-NEXT: [0,3] . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . .DeeE. . ld1r { v1.2d }, [x27], #8 +# CHECK-NEXT: [0,5] . . DE. . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . DeeE . ld1r { v1.2s }, [x27], #4 +# CHECK-NEXT: [0,7] . . .DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . DeeE ld1r { v1.4h }, [x27], #2 +# CHECK-NEXT: [0,9] . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1898,28 +1898,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2001 +# CHECK-NEXT: Total Cycles: 1501 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.75 -# CHECK-NEXT: IPC: 0.50 +# CHECK-NEXT: uOps Per Cycle: 1.00 +# CHECK-NEXT: IPC: 0.67 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0 +# CHECK-NEXT: 012345 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeE . . . . ld1r { v1.4s }, [x27], #4 -# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeE . . . ld1r { v1.8b }, [x27], #1 -# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeE . . ld1r { v1.8h }, [x27], #2 -# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeE . ld1r { v1.16b }, [x27], #1 -# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . .DeeE. ld1r { v1.1d }, [x27], x28 -# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeE . . . ld1r { v1.4s }, [x27], #4 +# CHECK-NEXT: [0,1] . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeE . . ld1r { v1.8b }, [x27], #1 +# CHECK-NEXT: [0,3] . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . .DeeE. . ld1r { v1.8h }, [x27], #2 +# CHECK-NEXT: [0,5] . . DE. . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . DeeE . ld1r { v1.16b }, [x27], #1 +# CHECK-NEXT: [0,7] . . .DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . DeeE ld1r { v1.1d }, [x27], x28 +# CHECK-NEXT: [0,9] . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1944,28 +1944,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2001 +# CHECK-NEXT: Total Cycles: 1501 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.75 -# CHECK-NEXT: IPC: 0.50 +# CHECK-NEXT: uOps Per Cycle: 1.00 +# CHECK-NEXT: IPC: 0.67 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0 +# CHECK-NEXT: 012345 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeE . . . . ld1r { v1.2d }, [x27], x28 -# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeE . . . ld1r { v1.2s }, [x27], x28 -# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeE . . ld1r { v1.4h }, [x27], x28 -# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeE . ld1r { v1.4s }, [x27], x28 -# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . .DeeE. ld1r { v1.8b }, [x27], x28 -# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeE . . . ld1r { v1.2d }, [x27], x28 +# CHECK-NEXT: [0,1] . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeE . . ld1r { v1.2s }, [x27], x28 +# CHECK-NEXT: [0,3] . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . .DeeE. . ld1r { v1.4h }, [x27], x28 +# CHECK-NEXT: [0,5] . . DE. . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . DeeE . ld1r { v1.4s }, [x27], x28 +# CHECK-NEXT: [0,7] . . .DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . DeeE ld1r { v1.8b }, [x27], x28 +# CHECK-NEXT: [0,9] . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1990,28 +1990,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2501 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.60 -# CHECK-NEXT: IPC: 0.40 +# CHECK-NEXT: uOps Per Cycle: 0.75 +# CHECK-NEXT: IPC: 0.50 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012345 +# CHECK-NEXT: Index 0123456789 0 -# CHECK: [0,0] DeeE . . . . . ld1r { v1.8h }, [x27], x28 -# CHECK-NEXT: [0,1] . DE. . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeE . . . . ld1r { v1.16b }, [x27], x28 -# CHECK-NEXT: [0,3] . . DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeeeE. . . ld2 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE. . ld2 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeE. ld2 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeE . . . . ld1r { v1.8h }, [x27], x28 +# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeE . . . ld1r { v1.16b }, [x27], x28 +# CHECK-NEXT: [0,3] . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . .DeeeeeE . . ld2 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . ld2 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeE ld2 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2036,28 +2036,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 3301 +# CHECK-NEXT: Total Cycles: 2801 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.45 -# CHECK-NEXT: IPC: 0.30 +# CHECK-NEXT: uOps Per Cycle: 0.54 +# CHECK-NEXT: IPC: 0.36 # CHECK-NEXT: Block RThroughput: 9.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 012345678 -# CHECK: [0,0] DeeeeeE . . . . . . ld2 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: [0,1] . .DE . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld2 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: [0,3] . . .DE . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . . DeeeeeE . . . . ld2 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: [0,5] . . . . DE. . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . . DeeeeeE . . ld2 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: [0,7] . . . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . . .DeeeeeE. ld2 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: [0,9] . . . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeeeE . . . . . ld2 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: [0,1] . DE . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . .DeeeE . . . . ld2 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: [0,3] . . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeeE . . . ld2 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: [0,5] . . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . .DeeeeeE . . ld2 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: [0,7] . . . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . . . DeeeeeE ld2 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: [0,9] . . . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2082,28 +2082,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2901 +# CHECK-NEXT: Total Cycles: 2401 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.52 -# CHECK-NEXT: IPC: 0.34 +# CHECK-NEXT: uOps Per Cycle: 0.62 +# CHECK-NEXT: IPC: 0.42 # CHECK-NEXT: Block RThroughput: 7.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: Index 0123456789 01234 -# CHECK: [0,0] DeeeE. . . . . . ld2 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: [0,1] . DE . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE. . . . . ld2 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: [0,3] . . DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeeeE . . . ld2 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: [0,5] . . . .DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . . DeeeE . . ld2 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: [0,7] . . . . .DE . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . . DeeeeeE. ld2 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: [0,9] . . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . . ld2 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: [0,1] . DE. . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . . ld2 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: [0,3] . . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeeE. . . ld2 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: [0,5] . . . DE. . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . . ld2 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: [0,7] . . . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . . DeeeeeE ld2 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2128,28 +2128,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2701 +# CHECK-NEXT: Total Cycles: 2201 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.56 -# CHECK-NEXT: IPC: 0.37 +# CHECK-NEXT: uOps Per Cycle: 0.68 +# CHECK-NEXT: IPC: 0.45 # CHECK-NEXT: Block RThroughput: 6.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 01234567 - -# CHECK: [0,0] DeeeeeE . . . . . ld2 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: [0,1] . .DE . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . . DeeeE . . . . ld2 { v1.b, v2.b }[0], [x27], #2 -# CHECK-NEXT: [0,3] . . .DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . . DeeeE . . . ld2 { v1.b, v2.b }[8], [x27], #2 -# CHECK-NEXT: [0,5] . . . .DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . . DeeeE . . ld2 { v1.b, v2.b }[0], [x27], x28 -# CHECK-NEXT: [0,7] . . . . .DE . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . . DeeeE. ld2 { v1.b, v2.b }[8], [x27], x28 -# CHECK-NEXT: [0,9] . . . . . .DE add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 012 + +# CHECK: [0,0] DeeeeeE . . . . ld2 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . .DeeeE . . . ld2 { v1.b, v2.b }[0], [x27], #2 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE. . . ld2 { v1.b, v2.b }[8], [x27], #2 +# CHECK-NEXT: [0,5] . . . DE. . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . . ld2 { v1.b, v2.b }[0], [x27], x28 +# CHECK-NEXT: [0,7] . . . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . . DeeeE ld2 { v1.b, v2.b }[8], [x27], x28 +# CHECK-NEXT: [0,9] . . . . .DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2174,28 +2174,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2501 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.60 -# CHECK-NEXT: IPC: 0.40 +# CHECK-NEXT: uOps Per Cycle: 0.75 +# CHECK-NEXT: IPC: 0.50 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012345 +# CHECK-NEXT: Index 0123456789 0 -# CHECK: [0,0] DeeeE. . . . . ld2 { v1.h, v2.h }[0], [x27], #4 -# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE. . . . ld2 { v1.h, v2.h }[4], [x27], #4 -# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE. . . ld2 { v1.h, v2.h }[0], [x27], x28 -# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE. . ld2 { v1.h, v2.h }[4], [x27], x28 -# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeE. ld2 { v1.s, v2.s }[0], [x27], #8 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . ld2 { v1.h, v2.h }[0], [x27], #4 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . ld2 { v1.h, v2.h }[4], [x27], #4 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . ld2 { v1.h, v2.h }[0], [x27], x28 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . ld2 { v1.h, v2.h }[4], [x27], x28 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeE ld2 { v1.s, v2.s }[0], [x27], #8 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2220,28 +2220,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2501 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.60 -# CHECK-NEXT: IPC: 0.40 +# CHECK-NEXT: uOps Per Cycle: 0.75 +# CHECK-NEXT: IPC: 0.50 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012345 +# CHECK-NEXT: Index 0123456789 0 -# CHECK: [0,0] DeeeE. . . . . ld2 { v1.s, v2.s }[0], [x27], x28 -# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE. . . . ld2 { v1.d, v2.d }[0], [x27], #16 -# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE. . . ld2 { v1.d, v2.d }[0], [x27], x28 -# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE. . ld2r { v1.1d, v2.1d }, [x27], #16 -# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeE. ld2r { v1.2d, v2.2d }, [x27], #16 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . ld2 { v1.s, v2.s }[0], [x27], x28 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . ld2 { v1.d, v2.d }[0], [x27], #16 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . ld2 { v1.d, v2.d }[0], [x27], x28 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . ld2r { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeE ld2r { v1.2d, v2.2d }, [x27], #16 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2266,28 +2266,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2501 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.60 -# CHECK-NEXT: IPC: 0.40 +# CHECK-NEXT: uOps Per Cycle: 0.75 +# CHECK-NEXT: IPC: 0.50 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012345 +# CHECK-NEXT: Index 0123456789 0 -# CHECK: [0,0] DeeeE. . . . . ld2r { v1.2s, v2.2s }, [x27], #8 -# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE. . . . ld2r { v1.4h, v2.4h }, [x27], #4 -# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE. . . ld2r { v1.4s, v2.4s }, [x27], #8 -# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE. . ld2r { v1.8b, v2.8b }, [x27], #2 -# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeE. ld2r { v1.8h, v2.8h }, [x27], #4 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . ld2r { v1.2s, v2.2s }, [x27], #8 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . ld2r { v1.4h, v2.4h }, [x27], #4 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . ld2r { v1.4s, v2.4s }, [x27], #8 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . ld2r { v1.8b, v2.8b }, [x27], #2 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeE ld2r { v1.8h, v2.8h }, [x27], #4 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2312,28 +2312,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2501 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.60 -# CHECK-NEXT: IPC: 0.40 +# CHECK-NEXT: uOps Per Cycle: 0.75 +# CHECK-NEXT: IPC: 0.50 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012345 +# CHECK-NEXT: Index 0123456789 0 -# CHECK: [0,0] DeeeE. . . . . ld2r { v1.16b, v2.16b }, [x27], #2 -# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE. . . . ld2r { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE. . . ld2r { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE. . ld2r { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeE. ld2r { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . ld2r { v1.16b, v2.16b }, [x27], #2 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . ld2r { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . ld2r { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . ld2r { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeE ld2r { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2358,28 +2358,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2601 +# CHECK-NEXT: Total Cycles: 2101 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.58 -# CHECK-NEXT: IPC: 0.38 +# CHECK-NEXT: uOps Per Cycle: 0.71 +# CHECK-NEXT: IPC: 0.48 # CHECK-NEXT: Block RThroughput: 5.5 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123456 - -# CHECK: [0,0] DeeeE. . . . .. ld2r { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE. . . .. ld2r { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE. . .. ld2r { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE. .. ld2r { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeeE. ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 01 + +# CHECK: [0,0] DeeeE. . . .. ld2r { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: [0,1] . DE. . . .. add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . .. ld2r { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . .. ld2r { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: [0,5] . . .DE . .. add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE .. ld2r { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: [0,7] . . . DE .. add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeeE ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2404,28 +2404,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 3001 +# CHECK-NEXT: Total Cycles: 2501 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.50 -# CHECK-NEXT: IPC: 0.33 +# CHECK-NEXT: uOps Per Cycle: 0.60 +# CHECK-NEXT: IPC: 0.40 # CHECK-NEXT: Block RThroughput: 7.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 012345 -# CHECK: [0,0] DeeeeE . . . . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: [0,1] . DE . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . .DeeeeE . . . . ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: [0,3] . . .DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . . DeeeeE . . . ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: [0,5] . . . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . . DeeeeE . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: [0,7] . . . . . DE. . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . . DeeeeE. ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: [0,9] . . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeeE . . . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeE . . . ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeE . . ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeeE . ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . . DeeeeE ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2450,28 +2450,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 3001 +# CHECK-NEXT: Total Cycles: 2501 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.50 -# CHECK-NEXT: IPC: 0.33 +# CHECK-NEXT: uOps Per Cycle: 0.60 +# CHECK-NEXT: IPC: 0.40 # CHECK-NEXT: Block RThroughput: 7.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 012345 -# CHECK: [0,0] DeeeeE . . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: [0,1] . DE . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . .DeeeeE . . . . ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: [0,3] . . .DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . . DeeeeE . . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: [0,5] . . . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . . DeeeeE . . ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: [0,7] . . . . . DE. . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . . DeeeeE. ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: [0,9] . . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeeE . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeE . . . ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeE . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeeE . ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . . DeeeeE ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2496,28 +2496,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2801 +# CHECK-NEXT: Total Cycles: 2301 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.54 -# CHECK-NEXT: IPC: 0.36 +# CHECK-NEXT: uOps Per Cycle: 0.65 +# CHECK-NEXT: IPC: 0.43 # CHECK-NEXT: Block RThroughput: 6.5 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012345678 +# CHECK-NEXT: Index 0123456789 0123 -# CHECK: [0,0] DeeeeE . . . . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: [0,1] . DE . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . .DeeeeE . . . . ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: [0,3] . . .DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . . DeeeeE . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: [0,5] . . . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . . DeeeE . . ld3 { v1.b, v2.b, v3.b }[0], [x27], #3 -# CHECK-NEXT: [0,7] . . . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . . DeeeE. ld3 { v1.b, v2.b, v3.b }[8], [x27], #3 -# CHECK-NEXT: [0,9] . . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeeE . . . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeE . . . ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeE . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE. . ld3 { v1.b, v2.b, v3.b }[0], [x27], #3 +# CHECK-NEXT: [0,7] . . . . DE. . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . . DeeeE ld3 { v1.b, v2.b, v3.b }[8], [x27], #3 +# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2542,28 +2542,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2501 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.60 -# CHECK-NEXT: IPC: 0.40 +# CHECK-NEXT: uOps Per Cycle: 0.75 +# CHECK-NEXT: IPC: 0.50 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012345 +# CHECK-NEXT: Index 0123456789 0 -# CHECK: [0,0] DeeeE. . . . . ld3 { v1.b, v2.b, v3.b }[0], [x27], x28 -# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE. . . . ld3 { v1.b, v2.b, v3.b }[8], [x27], x28 -# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE. . . ld3 { v1.h, v2.h, v3.h }[0], [x27], #6 -# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE. . ld3 { v1.h, v2.h, v3.h }[4], [x27], #6 -# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeE. ld3 { v1.h, v2.h, v3.h }[0], [x27], x28 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . ld3 { v1.b, v2.b, v3.b }[0], [x27], x28 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . ld3 { v1.b, v2.b, v3.b }[8], [x27], x28 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . ld3 { v1.h, v2.h, v3.h }[0], [x27], #6 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . ld3 { v1.h, v2.h, v3.h }[4], [x27], #6 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeE ld3 { v1.h, v2.h, v3.h }[0], [x27], x28 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2588,28 +2588,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2501 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.60 -# CHECK-NEXT: IPC: 0.40 +# CHECK-NEXT: uOps Per Cycle: 0.75 +# CHECK-NEXT: IPC: 0.50 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012345 +# CHECK-NEXT: Index 0123456789 0 -# CHECK: [0,0] DeeeE. . . . . ld3 { v1.h, v2.h, v3.h }[4], [x27], x28 -# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE. . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], #12 -# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE. . . ld3 { v1.s, v2.s, v3.s }[0], [x27], x28 -# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE. . ld3 { v1.d, v2.d, v3.d }[0], [x27], #24 -# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeE. ld3 { v1.d, v2.d, v3.d }[0], [x27], x28 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . ld3 { v1.h, v2.h, v3.h }[4], [x27], x28 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], #12 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . ld3 { v1.s, v2.s, v3.s }[0], [x27], x28 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . ld3 { v1.d, v2.d, v3.d }[0], [x27], #24 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeE ld3 { v1.d, v2.d, v3.d }[0], [x27], x28 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2634,28 +2634,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2501 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.60 -# CHECK-NEXT: IPC: 0.40 +# CHECK-NEXT: uOps Per Cycle: 0.75 +# CHECK-NEXT: IPC: 0.50 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012345 +# CHECK-NEXT: Index 0123456789 0 -# CHECK: [0,0] DeeeE. . . . . ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24 -# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE. . . . ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24 -# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE. . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12 -# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE. . ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6 -# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeE. ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeE ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2680,28 +2680,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2501 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.60 -# CHECK-NEXT: IPC: 0.40 +# CHECK-NEXT: uOps Per Cycle: 0.75 +# CHECK-NEXT: IPC: 0.50 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012345 +# CHECK-NEXT: Index 0123456789 0 -# CHECK: [0,0] DeeeE. . . . . ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3 -# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE. . . . ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6 -# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE. . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3 -# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE. . ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28 -# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeE. ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeE ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2726,28 +2726,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2501 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.60 -# CHECK-NEXT: IPC: 0.40 +# CHECK-NEXT: uOps Per Cycle: 0.75 +# CHECK-NEXT: IPC: 0.50 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012345 +# CHECK-NEXT: Index 0123456789 0 -# CHECK: [0,0] DeeeE. . . . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE. . . . ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE. . . ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE. . ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeE. ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeE ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2772,28 +2772,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 3301 +# CHECK-NEXT: Total Cycles: 2801 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.45 -# CHECK-NEXT: IPC: 0.30 +# CHECK-NEXT: uOps Per Cycle: 0.54 +# CHECK-NEXT: IPC: 0.36 # CHECK-NEXT: Block RThroughput: 9.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 012345678 -# CHECK: [0,0] DeeeE. . . . . . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: [0,1] . DE . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeeE . . . . . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: [0,3] . . .DE . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . . DeeeeeE . . . . ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: [0,5] . . . . DE. . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . . DeeeeeE . . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: [0,7] . . . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . . .DeeeeeE. ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: [0,9] . . . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,1] . DE. . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeE . . . . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: [0,3] . . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeeE . . . ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: [0,5] . . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . .DeeeeeE . . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: [0,7] . . . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . . . DeeeeeE ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: [0,9] . . . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2818,28 +2818,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 3501 +# CHECK-NEXT: Total Cycles: 3001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.43 -# CHECK-NEXT: IPC: 0.29 +# CHECK-NEXT: uOps Per Cycle: 0.50 +# CHECK-NEXT: IPC: 0.33 # CHECK-NEXT: Block RThroughput: 10.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 012345 +# CHECK-NEXT: 0123456789 0 # CHECK-NEXT: Index 0123456789 0123456789 -# CHECK: [0,0] DeeeeeE . . . . . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: [0,1] . .DE . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . . DeeeeeE . . . . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: [0,3] . . . DE. . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . . DeeeeeE . . . ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: [0,5] . . . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . . .DeeeeeE . . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: [0,7] . . . . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . . . DeeeeeE. ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: [0,9] . . . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeeeE . . . . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: [0,1] . DE . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . .DeeeeeE . . . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: [0,3] . . .DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . . DeeeeeE . . . ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: [0,5] . . . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . . DeeeeeE. . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,7] . . . . . DE. . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . . . DeeeeeE ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,9] . . . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2864,28 +2864,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 3501 +# CHECK-NEXT: Total Cycles: 3001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.43 -# CHECK-NEXT: IPC: 0.29 +# CHECK-NEXT: uOps Per Cycle: 0.50 +# CHECK-NEXT: IPC: 0.33 # CHECK-NEXT: Block RThroughput: 10.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 012345 +# CHECK-NEXT: 0123456789 0 # CHECK-NEXT: Index 0123456789 0123456789 -# CHECK: [0,0] DeeeeeE . . . . . . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: [0,1] . .DE . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . . DeeeeeE . . . . . ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: [0,3] . . . DE. . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . . DeeeeeE . . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: [0,5] . . . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . . .DeeeeeE . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: [0,7] . . . . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . . . DeeeeeE. ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: [0,9] . . . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeeeE . . . . . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,1] . DE . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . .DeeeeeE . . . . ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,3] . . .DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . . DeeeeeE . . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: [0,5] . . . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . . DeeeeeE. . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,7] . . . . . DE. . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . . . DeeeeeE ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,9] . . . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2910,28 +2910,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2501 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.60 -# CHECK-NEXT: IPC: 0.40 +# CHECK-NEXT: uOps Per Cycle: 0.75 +# CHECK-NEXT: IPC: 0.50 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012345 +# CHECK-NEXT: Index 0123456789 0 -# CHECK: [0,0] DeeeE. . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 -# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE. . . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 -# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE. . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 -# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE. . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 -# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeE. ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeE ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2956,28 +2956,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2501 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.60 -# CHECK-NEXT: IPC: 0.40 +# CHECK-NEXT: uOps Per Cycle: 0.75 +# CHECK-NEXT: IPC: 0.50 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012345 +# CHECK-NEXT: Index 0123456789 0 -# CHECK: [0,0] DeeeE. . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 -# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE. . . . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 -# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE. . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 -# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE. . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 -# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeE. ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeE ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3002,28 +3002,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2501 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.60 -# CHECK-NEXT: IPC: 0.40 +# CHECK-NEXT: uOps Per Cycle: 0.75 +# CHECK-NEXT: IPC: 0.50 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012345 +# CHECK-NEXT: Index 0123456789 0 -# CHECK: [0,0] DeeeE. . . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 -# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE. . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 -# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE. . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE. . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32 -# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeE. ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeE ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3048,28 +3048,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2501 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.60 -# CHECK-NEXT: IPC: 0.40 +# CHECK-NEXT: uOps Per Cycle: 0.75 +# CHECK-NEXT: IPC: 0.50 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012345 +# CHECK-NEXT: Index 0123456789 0 -# CHECK: [0,0] DeeeE. . . . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8 -# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE. . . . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16 -# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE. . . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4 -# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE. . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8 -# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeE. ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeE ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3094,28 +3094,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2501 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.60 -# CHECK-NEXT: IPC: 0.40 +# CHECK-NEXT: uOps Per Cycle: 0.75 +# CHECK-NEXT: IPC: 0.50 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012345 - -# CHECK: [0,0] DeeeE. . . . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE. . . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE. . . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE. . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeE. ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 0 + +# CHECK: [0,0] DeeeE. . . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeE ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3140,28 +3140,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2101 +# CHECK-NEXT: Total Cycles: 1801 # CHECK-NEXT: Total uOps: 1700 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.81 -# CHECK-NEXT: IPC: 0.48 +# CHECK-NEXT: uOps Per Cycle: 0.94 +# CHECK-NEXT: IPC: 0.56 # CHECK-NEXT: Block RThroughput: 5.7 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 01 +# CHECK-NEXT: 012345678 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeE. . . .. ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: [0,1] . DE . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE. . .. ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: [0,3] . . DE . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE. .. ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: [0,5] . . . DE .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeE .. ldp s1, s2, [x27], #248 -# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeE ldp d1, d2, [x27], #496 -# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeE . ldp s1, s2, [x27], #248 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . DeeE ldp d1, d2, [x27], #496 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3526,9 +3526,9 @@ add x0, x27, 1 # CHECK-NEXT: [0,3] . DE. . . add x0, x27, #1 # CHECK-NEXT: [0,4] . DeE . . ldrsw x1, [x27, #254]! # CHECK-NEXT: [0,5] . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . DeeeE. . st1 { v1.1d }, [x27], #8 +# CHECK-NEXT: [0,6] . .DeeeE . st1 { v1.1d }, [x27], #8 # CHECK-NEXT: [0,7] . . DE . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . DeeeE. st1 { v1.2d }, [x27], #16 +# CHECK-NEXT: [0,8] . . DeeeE st1 { v1.2d }, [x27], #16 # CHECK-NEXT: [0,9] . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): @@ -3554,7 +3554,7 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2002 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 @@ -3564,18 +3564,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 01 +# CHECK-NEXT: Index 0123456789 0 -# CHECK: [0,0] DeeeE. . . .. st1 { v1.2s }, [x27], #8 -# CHECK-NEXT: [0,1] . DE . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE . . .. st1 { v1.4h }, [x27], #8 -# CHECK-NEXT: [0,3] . . DE. . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE . .. st1 { v1.4s }, [x27], #16 -# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE .. st1 { v1.8b }, [x27], #8 -# CHECK-NEXT: [0,7] . . . .DE .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . .DeeeE. st1 { v1.8h }, [x27], #16 -# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . st1 { v1.2s }, [x27], #8 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . st1 { v1.4h }, [x27], #8 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . st1 { v1.4s }, [x27], #16 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . st1 { v1.8b }, [x27], #8 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeE st1 { v1.8h }, [x27], #16 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3600,7 +3600,7 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2002 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 @@ -3610,18 +3610,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 01 +# CHECK-NEXT: Index 0123456789 0 -# CHECK: [0,0] DeeeE. . . .. st1 { v1.16b }, [x27], #16 -# CHECK-NEXT: [0,1] . DE . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE . . .. st1 { v1.1d }, [x27], x28 -# CHECK-NEXT: [0,3] . . DE. . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE . .. st1 { v1.2d }, [x27], x28 -# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE .. st1 { v1.2s }, [x27], x28 -# CHECK-NEXT: [0,7] . . . .DE .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . .DeeeE. st1 { v1.4h }, [x27], x28 -# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . st1 { v1.16b }, [x27], #16 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . st1 { v1.1d }, [x27], x28 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . st1 { v1.2d }, [x27], x28 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . st1 { v1.2s }, [x27], x28 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeE st1 { v1.4h }, [x27], x28 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3646,7 +3646,7 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2002 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 @@ -3656,18 +3656,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 01 +# CHECK-NEXT: Index 0123456789 0 -# CHECK: [0,0] DeeeE. . . .. st1 { v1.4s }, [x27], x28 -# CHECK-NEXT: [0,1] . DE . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE . . .. st1 { v1.8b }, [x27], x28 -# CHECK-NEXT: [0,3] . . DE. . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE . .. st1 { v1.8h }, [x27], x28 -# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE .. st1 { v1.16b }, [x27], x28 -# CHECK-NEXT: [0,7] . . . .DE .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . .DeeeE. st1 { v1.1d, v2.1d }, [x27], #16 -# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . st1 { v1.4s }, [x27], x28 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . st1 { v1.8b }, [x27], x28 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . st1 { v1.8h }, [x27], x28 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . st1 { v1.16b }, [x27], x28 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeE st1 { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3692,7 +3692,7 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2002 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 @@ -3702,18 +3702,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 01 +# CHECK-NEXT: Index 0123456789 0 -# CHECK: [0,0] DeeeE. . . .. st1 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: [0,1] . DE . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE . . .. st1 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: [0,3] . . DE. . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE . .. st1 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE .. st1 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: [0,7] . . . .DE .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . .DeeeE. st1 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . st1 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . st1 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . st1 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . st1 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeE st1 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3738,7 +3738,7 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2002 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 @@ -3748,18 +3748,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 01 +# CHECK-NEXT: Index 0123456789 0 -# CHECK: [0,0] DeeeE. . . .. st1 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: [0,1] . DE . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE . . .. st1 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: [0,3] . . DE. . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE . .. st1 { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE .. st1 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: [0,7] . . . .DE .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . .DeeeE. st1 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . st1 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . st1 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . st1 { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . st1 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeE st1 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3784,7 +3784,7 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2002 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 @@ -3794,18 +3794,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 01 +# CHECK-NEXT: Index 0123456789 0 -# CHECK: [0,0] DeeeE. . . .. st1 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: [0,1] . DE . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE . . .. st1 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: [0,3] . . DE. . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE . .. st1 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE .. st1 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: [0,7] . . . .DE .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . .DeeeE. st1 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . st1 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . st1 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . st1 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . st1 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeE st1 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3830,7 +3830,7 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2502 +# CHECK-NEXT: Total Cycles: 2501 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 @@ -3840,18 +3840,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123456 - -# CHECK: [0,0] DeeeeE . . . .. st1 { v1.1d, v2.1d, v3.1d }, [x27], #24 -# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeE . . .. st1 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeeE . .. st1 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeeE .. st1 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeeE. st1 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 012345 + +# CHECK: [0,0] DeeeeE . . . . st1 { v1.1d, v2.1d, v3.1d }, [x27], #24 +# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeE . . . st1 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeE . . st1 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeeE . st1 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . . DeeeeE st1 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3876,7 +3876,7 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2502 +# CHECK-NEXT: Total Cycles: 2501 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 @@ -3886,18 +3886,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123456 - -# CHECK: [0,0] DeeeeE . . . .. st1 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeE . . .. st1 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeeE . .. st1 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeeE .. st1 { v1.1d, v2.1d, v3.1d }, [x27], x28 -# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeeE. st1 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 012345 + +# CHECK: [0,0] DeeeeE . . . . st1 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeE . . . st1 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeE . . st1 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeeE . st1 { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . . DeeeeE st1 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3922,7 +3922,7 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2502 +# CHECK-NEXT: Total Cycles: 2501 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 @@ -3932,18 +3932,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123456 - -# CHECK: [0,0] DeeeeE . . . .. st1 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeE . . .. st1 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeeE . .. st1 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeeE .. st1 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeeE. st1 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 012345 + +# CHECK: [0,0] DeeeeE . . . . st1 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeE . . . st1 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeE . . st1 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeeE . st1 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . . DeeeeE st1 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3968,7 +3968,7 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2502 +# CHECK-NEXT: Total Cycles: 2501 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 @@ -3978,18 +3978,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123456 - -# CHECK: [0,0] DeeeeE . . . .. st1 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeE . . .. st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeeE . .. st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeeE .. st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeeE. st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 012345 + +# CHECK: [0,0] DeeeeE . . . . st1 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeE . . . st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeE . . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeeE . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . . DeeeeE st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4014,7 +4014,7 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2502 +# CHECK-NEXT: Total Cycles: 2501 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 @@ -4024,18 +4024,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123456 - -# CHECK: [0,0] DeeeeE . . . .. st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeE . . .. st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeeE . .. st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeeE .. st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeeE. st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 012345 + +# CHECK: [0,0] DeeeeE . . . . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeE . . . st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeE . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeeE . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . . DeeeeE st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4060,7 +4060,7 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2502 +# CHECK-NEXT: Total Cycles: 2501 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 @@ -4070,18 +4070,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123456 - -# CHECK: [0,0] DeeeeE . . . .. st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeE . . .. st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeeE . .. st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeeE .. st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeeE. st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 012345 + +# CHECK: [0,0] DeeeeE . . . . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeE . . . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeE . . st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeeE . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . . DeeeeE st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4106,7 +4106,7 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2202 +# CHECK-NEXT: Total Cycles: 2201 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 @@ -4116,18 +4116,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123 +# CHECK-NEXT: Index 0123456789 012 -# CHECK: [0,0] DeeeeE . . . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeE . . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE. . . st1 { v1.b }[0], [x27], #1 -# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE . . st1 { v1.b }[8], [x27], #1 -# CHECK-NEXT: [0,7] . . . . DE. . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeE. st1 { v1.b }[0], [x27], x28 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeeE . . . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeE . . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE. . . st1 { v1.b }[0], [x27], #1 +# CHECK-NEXT: [0,5] . . . DE. . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . . st1 { v1.b }[8], [x27], #1 +# CHECK-NEXT: [0,7] . . . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . . DeeeE st1 { v1.b }[0], [x27], x28 +# CHECK-NEXT: [0,9] . . . . .DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4152,7 +4152,7 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2002 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 @@ -4162,18 +4162,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 01 +# CHECK-NEXT: Index 0123456789 0 -# CHECK: [0,0] DeeeE. . . .. st1 { v1.b }[8], [x27], x28 -# CHECK-NEXT: [0,1] . DE . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE . . .. st1 { v1.h }[0], [x27], #2 -# CHECK-NEXT: [0,3] . . DE. . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE . .. st1 { v1.h }[4], [x27], #2 -# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE .. st1 { v1.h }[0], [x27], x28 -# CHECK-NEXT: [0,7] . . . .DE .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . .DeeeE. st1 { v1.h }[4], [x27], x28 -# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . . st1 { v1.b }[8], [x27], x28 +# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . . st1 { v1.h }[0], [x27], #2 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . . st1 { v1.h }[4], [x27], #2 +# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE . st1 { v1.h }[0], [x27], x28 +# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeE st1 { v1.h }[4], [x27], x28 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4198,7 +4198,7 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2102 +# CHECK-NEXT: Total Cycles: 2101 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 @@ -4208,18 +4208,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012 +# CHECK-NEXT: Index 0123456789 01 -# CHECK: [0,0] DeeeE. . . . . st1 { v1.s }[0], [x27], #4 -# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeE . . . . st1 { v1.s }[0], [x27], x28 -# CHECK-NEXT: [0,3] . . DE. . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeE . . . st1 { v1.d }[0], [x27], #8 -# CHECK-NEXT: [0,5] . . . DE . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeE . . st1 { v1.d }[0], [x27], x28 -# CHECK-NEXT: [0,7] . . . .DE . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . .DeeeeE. st2 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: [0,9] . . . . .DE add x0, x27, #1 +# CHECK: [0,0] DeeeE. . . .. st1 { v1.s }[0], [x27], #4 +# CHECK-NEXT: [0,1] . DE. . . .. add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeE . . .. st1 { v1.s }[0], [x27], x28 +# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeE . .. st1 { v1.d }[0], [x27], #8 +# CHECK-NEXT: [0,5] . . .DE . .. add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeE .. st1 { v1.d }[0], [x27], x28 +# CHECK-NEXT: [0,7] . . . DE .. add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeeE st2 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4244,7 +4244,7 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2502 +# CHECK-NEXT: Total Cycles: 2501 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 @@ -4254,18 +4254,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123456 - -# CHECK: [0,0] DeeeeE . . . .. st2 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeE . . .. st2 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeeE . .. st2 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeeE .. st2 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeeE. st2 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 012345 + +# CHECK: [0,0] DeeeeE . . . . st2 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeE . . . st2 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeE . . st2 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeeE . st2 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . . DeeeeE st2 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4290,7 +4290,7 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2502 +# CHECK-NEXT: Total Cycles: 2501 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 @@ -4300,18 +4300,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123456 - -# CHECK: [0,0] DeeeeE . . . .. st2 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeE . . .. st2 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeeE . .. st2 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeeE .. st2 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeeE. st2 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 012345 + +# CHECK: [0,0] DeeeeE . . . . st2 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeE . . . st2 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeE . . st2 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeeE . st2 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . . DeeeeE st2 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4336,7 +4336,7 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2502 +# CHECK-NEXT: Total Cycles: 2501 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 @@ -4346,18 +4346,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123456 - -# CHECK: [0,0] DeeeeE . . . .. st2 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeE . . .. st2 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeeE . .. st2 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeeE .. st2 { v1.b, v2.b }[0], [x27], #2 -# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeeE. st2 { v1.b, v2.b }[8], [x27], #2 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 012345 + +# CHECK: [0,0] DeeeeE . . . . st2 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeE . . . st2 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeE . . st2 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeeE . st2 { v1.b, v2.b }[0], [x27], #2 +# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . . DeeeeE st2 { v1.b, v2.b }[8], [x27], #2 +# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4382,7 +4382,7 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2502 +# CHECK-NEXT: Total Cycles: 2501 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 @@ -4392,18 +4392,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123456 - -# CHECK: [0,0] DeeeeE . . . .. st2 { v1.b, v2.b }[0], [x27], x28 -# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeE . . .. st2 { v1.b, v2.b }[8], [x27], x28 -# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeeE . .. st2 { v1.h, v2.h }[0], [x27], #4 -# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeeE .. st2 { v1.h, v2.h }[4], [x27], #4 -# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeeE. st2 { v1.h, v2.h }[0], [x27], x28 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 012345 + +# CHECK: [0,0] DeeeeE . . . . st2 { v1.b, v2.b }[0], [x27], x28 +# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeE . . . st2 { v1.b, v2.b }[8], [x27], x28 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeE . . st2 { v1.h, v2.h }[0], [x27], #4 +# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeeE . st2 { v1.h, v2.h }[4], [x27], #4 +# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . . DeeeeE st2 { v1.h, v2.h }[0], [x27], x28 +# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4428,7 +4428,7 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2502 +# CHECK-NEXT: Total Cycles: 2501 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 @@ -4438,18 +4438,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123456 - -# CHECK: [0,0] DeeeeE . . . .. st2 { v1.h, v2.h }[4], [x27], x28 -# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeE . . .. st2 { v1.s, v2.s }[0], [x27], #8 -# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeeE . .. st2 { v1.s, v2.s }[0], [x27], x28 -# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeeE .. st2 { v1.d, v2.d }[0], [x27], #16 -# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeeE. st2 { v1.d, v2.d }[0], [x27], x28 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 012345 + +# CHECK: [0,0] DeeeeE . . . . st2 { v1.h, v2.h }[4], [x27], x28 +# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeE . . . st2 { v1.s, v2.s }[0], [x27], #8 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeE . . st2 { v1.s, v2.s }[0], [x27], x28 +# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeeE . st2 { v1.d, v2.d }[0], [x27], #16 +# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . . DeeeeE st2 { v1.d, v2.d }[0], [x27], x28 +# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4474,7 +4474,7 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 600 -# CHECK-NEXT: Total Cycles: 1502 +# CHECK-NEXT: Total Cycles: 1501 # CHECK-NEXT: Total uOps: 900 # CHECK: Dispatch Width: 3 @@ -4483,15 +4483,15 @@ add x0, x27, 1 # CHECK-NEXT: Block RThroughput: 12.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456 +# CHECK-NEXT: 012345 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeE . .. st3 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: [0,1] . DE . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeE .. st3 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: [0,3] . . DE .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeeE. st3 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: [0,5] . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeeE . . st3 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: [0,1] . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeE . st3 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: [0,3] . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeE st3 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: [0,5] . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4512,7 +4512,7 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2502 +# CHECK-NEXT: Total Cycles: 2501 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 @@ -4522,18 +4522,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123456 - -# CHECK: [0,0] DeeeeE . . . .. st3 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeE . . .. st3 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeeE . .. st3 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeeE .. st3 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeeE. st3 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 012345 + +# CHECK: [0,0] DeeeeE . . . . st3 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeE . . . st3 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeE . . st3 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeeE . st3 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . . DeeeeE st3 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4558,7 +4558,7 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2502 +# CHECK-NEXT: Total Cycles: 2501 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 @@ -4568,18 +4568,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123456 - -# CHECK: [0,0] DeeeeE . . . .. st3 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeE . . .. st3 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeeE . .. st3 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeeE .. st3 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeeE. st3 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 012345 + +# CHECK: [0,0] DeeeeE . . . . st3 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeE . . . st3 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeE . . st3 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeeE . st3 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . . DeeeeE st3 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4604,7 +4604,7 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2502 +# CHECK-NEXT: Total Cycles: 2501 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 @@ -4614,18 +4614,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123456 - -# CHECK: [0,0] DeeeeE . . . .. st3 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeE . . .. st3 { v1.b, v2.b, v3.b }[0], [x27], #3 -# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeeE . .. st3 { v1.b, v2.b, v3.b }[8], [x27], #3 -# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeeE .. st3 { v1.b, v2.b, v3.b }[0], [x27], x28 -# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeeE. st3 { v1.b, v2.b, v3.b }[8], [x27], x28 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 012345 + +# CHECK: [0,0] DeeeeE . . . . st3 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeE . . . st3 { v1.b, v2.b, v3.b }[0], [x27], #3 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeE . . st3 { v1.b, v2.b, v3.b }[8], [x27], #3 +# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeeE . st3 { v1.b, v2.b, v3.b }[0], [x27], x28 +# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . . DeeeeE st3 { v1.b, v2.b, v3.b }[8], [x27], x28 +# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4650,7 +4650,7 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2502 +# CHECK-NEXT: Total Cycles: 2501 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 @@ -4660,18 +4660,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123456 - -# CHECK: [0,0] DeeeeE . . . .. st3 { v1.h, v2.h, v3.h }[0], [x27], #6 -# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeE . . .. st3 { v1.h, v2.h, v3.h }[4], [x27], #6 -# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeeE . .. st3 { v1.h, v2.h, v3.h }[0], [x27], x28 -# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeeE .. st3 { v1.h, v2.h, v3.h }[4], [x27], x28 -# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeeE. st3 { v1.s, v2.s, v3.s }[0], [x27], #12 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 012345 + +# CHECK: [0,0] DeeeeE . . . . st3 { v1.h, v2.h, v3.h }[0], [x27], #6 +# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeE . . . st3 { v1.h, v2.h, v3.h }[4], [x27], #6 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeE . . st3 { v1.h, v2.h, v3.h }[0], [x27], x28 +# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeeE . st3 { v1.h, v2.h, v3.h }[4], [x27], x28 +# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . . DeeeeE st3 { v1.s, v2.s, v3.s }[0], [x27], #12 +# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4696,7 +4696,7 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2502 +# CHECK-NEXT: Total Cycles: 2501 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 @@ -4706,18 +4706,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123456 - -# CHECK: [0,0] DeeeeE . . . .. st3 { v1.s, v2.s, v3.s }[0], [x27], x28 -# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeE . . .. st3 { v1.d, v2.d, v3.d }[0], [x27], #24 -# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeeE . .. st3 { v1.d, v2.d, v3.d }[0], [x27], x28 -# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeeE .. st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeeE. st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 012345 + +# CHECK: [0,0] DeeeeE . . . . st3 { v1.s, v2.s, v3.s }[0], [x27], x28 +# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeE . . . st3 { v1.d, v2.d, v3.d }[0], [x27], #24 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeE . . st3 { v1.d, v2.d, v3.d }[0], [x27], x28 +# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeeE . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . . DeeeeE st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4742,7 +4742,7 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2502 +# CHECK-NEXT: Total Cycles: 2501 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 @@ -4752,18 +4752,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123456 - -# CHECK: [0,0] DeeeeE . . . .. st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeE . . .. st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeeE . .. st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeeE .. st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeeE. st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 012345 + +# CHECK: [0,0] DeeeeE . . . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeE . . . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeE . . st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeeE . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . . DeeeeE st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4788,7 +4788,7 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2502 +# CHECK-NEXT: Total Cycles: 2501 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 @@ -4798,18 +4798,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123456 - -# CHECK: [0,0] DeeeeE . . . .. st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeE . . .. st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeeE . .. st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeeE .. st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeeE. st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 012345 + +# CHECK: [0,0] DeeeeE . . . . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeE . . . st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeE . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeeE . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . . DeeeeE st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4834,7 +4834,7 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2502 +# CHECK-NEXT: Total Cycles: 2501 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 @@ -4844,18 +4844,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123456 - -# CHECK: [0,0] DeeeeE . . . .. st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeE . . .. st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeeE . .. st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 -# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeeE .. st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 -# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeeE. st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 012345 + +# CHECK: [0,0] DeeeeE . . . . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeE . . . st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeE . . st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 +# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeeE . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 +# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . . DeeeeE st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 +# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4880,7 +4880,7 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2502 +# CHECK-NEXT: Total Cycles: 2501 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 3 @@ -4890,18 +4890,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123456 - -# CHECK: [0,0] DeeeeE . . . .. st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 -# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeE . . .. st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 -# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeeE . .. st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 -# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeeE .. st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 -# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . . . . DeeeeE. st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 -# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 012345 + +# CHECK: [0,0] DeeeeE . . . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 +# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeE . . . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 +# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeE . . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 +# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeeE . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 +# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . . DeeeeE st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 +# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4926,7 +4926,7 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 800 -# CHECK-NEXT: Total Cycles: 2002 +# CHECK-NEXT: Total Cycles: 2001 # CHECK-NEXT: Total uOps: 1200 # CHECK: Dispatch Width: 3 @@ -4936,16 +4936,16 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 01 +# CHECK-NEXT: Index 0123456789 0 -# CHECK: [0,0] DeeeeE . . .. st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 -# CHECK-NEXT: [0,1] . DE . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeE . .. st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 -# CHECK-NEXT: [0,3] . . DE . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . . DeeeeE .. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 -# CHECK-NEXT: [0,5] . . . DE .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . . . DeeeeE. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 -# CHECK-NEXT: [0,7] . . . . DE add x0, x27, #1 +# CHECK: [0,0] DeeeeE . . . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 +# CHECK-NEXT: [0,1] . DE . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeE . . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 +# CHECK-NEXT: [0,3] . . DE . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeE . st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 +# CHECK-NEXT: [0,5] . . . DE . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeeE st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 +# CHECK-NEXT: [0,7] . . . . DE add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions