@@ -5676,6 +5676,22 @@ void emitter::emitIns_R_R_I(
5676
5676
{
5677
5677
return;
5678
5678
}
5679
+
5680
+ if (emitComp->opts.OptimizationEnabled() && IsOptimisableLdrStr(ins, reg1, reg2, imm, size, fmt))
5681
+ {
5682
+ regNumber oldReg1 = emitLastIns->idReg1();
5683
+ ssize_t oldImm =
5684
+ emitLastIns->idIsLargeCns() ? ((instrDescCns*)emitLastIns)->idcCnsVal : emitLastIns->idSmallCns();
5685
+ instruction optIns = (ins == INS_ldr) ? INS_ldp : INS_stp;
5686
+ ssize_t scaledOldImm = oldImm * size;
5687
+
5688
+ // Overwrite the "sub-optimal" instruction with the *optimised* instruction, directly
5689
+ // into the output buffer.
5690
+ emitIns_R_R_R_I(optIns, attr, oldReg1, reg1, reg2, scaledOldImm, INS_OPTS_NONE, EA_UNKNOWN, emitLastIns);
5691
+
5692
+ // And now stop here, as the second instruction descriptor is no longer emitted.
5693
+ return;
5694
+ }
5679
5695
}
5680
5696
else if (isAddSub)
5681
5697
{
@@ -6491,7 +6507,8 @@ void emitter::emitIns_R_R_R_I(instruction ins,
6491
6507
regNumber reg3,
6492
6508
ssize_t imm,
6493
6509
insOpts opt /* = INS_OPTS_NONE */,
6494
- emitAttr attrReg2 /* = EA_UNKNOWN */)
6510
+ emitAttr attrReg2 /* = EA_UNKNOWN */,
6511
+ instrDesc* reuseInstr /* = nullptr */)
6495
6512
{
6496
6513
emitAttr size = EA_SIZE(attr);
6497
6514
emitAttr elemsize = EA_UNKNOWN;
@@ -6626,6 +6643,7 @@ void emitter::emitIns_R_R_R_I(instruction ins,
6626
6643
scale = (size == EA_8BYTE) ? 3 : 2;
6627
6644
}
6628
6645
isLdSt = true;
6646
+ fmt = IF_LS_3C;
6629
6647
break;
6630
6648
6631
6649
case INS_ld1:
@@ -6906,7 +6924,58 @@ void emitter::emitIns_R_R_R_I(instruction ins,
6906
6924
}
6907
6925
assert(fmt != IF_NONE);
6908
6926
6909
- instrDesc* id = emitNewInstrCns(attr, imm);
6927
+ // An "instrDesc" will *always* be required.
6928
+ // Under normal circumstances the instruction
6929
+ // will be added to the emitted group. However,
6930
+ // this is not correct for instructions that
6931
+ // are going to overwrite already-emitted
6932
+ // instructions and we therefore need space to
6933
+ // hold the new instruction descriptor.
6934
+ instrDesc* id;
6935
+
6936
+ // One cannot simply instantiate an instruction
6937
+ // descriptor, so this array will be used to
6938
+ // hold the instruction being built.
6939
+ unsigned char tempInstrDesc[sizeof(instrDesc)];
6940
+
6941
+ // Now the instruction is either emitted OR
6942
+ // used to overwrite the previously-emitted
6943
+ // instruction.
6944
+ if (reuseInstr == nullptr)
6945
+ {
6946
+ id = emitNewInstrCns(attr, imm);
6947
+ }
6948
+ else
6949
+ {
6950
+ id = (instrDesc*)tempInstrDesc;
6951
+
6952
+ memset(id, 0, sizeof(tempInstrDesc));
6953
+
6954
+ // Store the size and handle the two special
6955
+ // values that indicate GCref and ByRef
6956
+
6957
+ if (EA_IS_GCREF(attr))
6958
+ {
6959
+ // A special value indicates a GCref pointer value
6960
+
6961
+ id->idGCref(GCT_GCREF);
6962
+ id->idOpSize(EA_PTRSIZE);
6963
+ }
6964
+ else if (EA_IS_BYREF(attr))
6965
+ {
6966
+ // A special value indicates a Byref pointer value
6967
+
6968
+ id->idGCref(GCT_BYREF);
6969
+ id->idOpSize(EA_PTRSIZE);
6970
+ }
6971
+ else
6972
+ {
6973
+ id->idGCref(GCT_NONE);
6974
+ id->idOpSize(EA_SIZE(attr));
6975
+ }
6976
+
6977
+ id->idSmallCns(imm);
6978
+ }
6910
6979
6911
6980
id->idIns(ins);
6912
6981
id->idInsFmt(fmt);
@@ -6932,8 +7001,18 @@ void emitter::emitIns_R_R_R_I(instruction ins,
6932
7001
}
6933
7002
}
6934
7003
6935
- dispIns(id);
6936
- appendToCurIG(id);
7004
+ // Now the instruction is EITHER emitted OR used to overwrite the previously-emitted instruction.
7005
+ if (reuseInstr == nullptr)
7006
+ {
7007
+ // Then this is the standard exit path and the instruction is to be appended to the instruction group.
7008
+ dispIns(id);
7009
+ appendToCurIG(id);
7010
+ }
7011
+ else
7012
+ {
7013
+ // The instruction is copied over the last emitted insdtruction.
7014
+ memcpy(reuseInstr, id, sizeof(tempInstrDesc));
7015
+ }
6937
7016
}
6938
7017
6939
7018
/*****************************************************************************
@@ -7623,8 +7702,7 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va
7623
7702
{
7624
7703
bool useRegForImm = false;
7625
7704
ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
7626
-
7627
- imm = disp;
7705
+ imm = disp;
7628
7706
if (imm == 0)
7629
7707
{
7630
7708
fmt = IF_LS_2A;
@@ -7670,6 +7748,25 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va
7670
7748
7671
7749
assert(fmt != IF_NONE);
7672
7750
7751
+ // This handles LDR duplicate instructions
7752
+ if (emitComp->opts.OptimizationEnabled() && IsOptimisableLdrStr(ins, reg1, reg2, imm, size, fmt))
7753
+ {
7754
+ regNumber oldReg1 = emitLastIns->idReg1();
7755
+ ssize_t oldImm =
7756
+ emitLastIns->idIsLargeCns() ? ((instrDescCns*)emitLastIns)->idcCnsVal : emitLastIns->idSmallCns();
7757
+ instruction optIns = (ins == INS_ldr) ? INS_ldp : INS_stp;
7758
+ ssize_t scaledOldImm = oldImm * size;
7759
+
7760
+ // Overwrite the "sub-optimal" instruction with the *optimised* instruction, directly
7761
+ // into the output buffer.
7762
+ emitIns_R_R_R_I(optIns, attr, oldReg1, reg1, reg2, scaledOldImm, INS_OPTS_NONE, EA_UNKNOWN, emitLastIns);
7763
+
7764
+ // And now stop here, as the second instruction descriptor is no longer emitted.
7765
+ return;
7766
+ }
7767
+
7768
+ // We need to simply emit the instruction unchanged
7769
+
7673
7770
instrDesc* id = emitNewInstrCns(attr, imm);
7674
7771
7675
7772
id->idIns(ins);
@@ -7901,6 +7998,22 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va
7901
7998
7902
7999
assert(fmt != IF_NONE);
7903
8000
8001
+ if (emitComp->opts.OptimizationEnabled() && IsOptimisableLdrStr(ins, reg1, reg2, imm, size, fmt))
8002
+ {
8003
+ regNumber oldReg1 = emitLastIns->idReg1();
8004
+ ssize_t oldImm =
8005
+ emitLastIns->idIsLargeCns() ? ((instrDescCns*)emitLastIns)->idcCnsVal : emitLastIns->idSmallCns();
8006
+ instruction optIns = (ins == INS_ldr) ? INS_ldp : INS_stp;
8007
+ ssize_t scaledOldImm = oldImm * size;
8008
+
8009
+ // Overwrite the "sub-optimal" instruction with the *optimised* instruction, directly
8010
+ // into the output buffer.
8011
+ emitIns_R_R_R_I(optIns, attr, oldReg1, reg1, reg2, scaledOldImm, INS_OPTS_NONE, EA_UNKNOWN, emitLastIns);
8012
+
8013
+ // And now stop here, as the second instruction descriptor is no longer emitted.
8014
+ return;
8015
+ }
8016
+
7904
8017
instrDesc* id = emitNewInstrCns(attr, imm);
7905
8018
7906
8019
id->idIns(ins);
@@ -16128,4 +16241,100 @@ bool emitter::IsRedundantLdStr(
16128
16241
16129
16242
return false;
16130
16243
}
16244
+
16245
+ //-----------------------------------------------------------------------------------
16246
+ // IsOptimisableLdrStr: Check if it is possible to optimise two "ldr" or "str"
16247
+ // instructions into a single "ldp" or "stp" instruction.
16248
+ //
16249
+ // Arguments:
16250
+ // ins - The instruction code
16251
+ // reg1 - Register 1 number
16252
+ // reg2 - Register 2 number
16253
+ // imm - Immediate offset, prior to scaling by operand size
16254
+ // size - Operand size
16255
+ // fmt - Instruction format
16256
+ //
16257
+
16258
+ bool emitter::IsOptimisableLdrStr(
16259
+ instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt)
16260
+ {
16261
+ bool isFirstInstrInBlock = (emitCurIGinsCnt == 0) && ((emitCurIG->igFlags & IGF_EXTEND) == 0);
16262
+
16263
+ if (((ins != INS_ldr) && (ins != INS_str)) || (isFirstInstrInBlock) || (emitLastIns == nullptr))
16264
+ {
16265
+ return false;
16266
+ }
16267
+
16268
+ regNumber prevReg1 = emitLastIns->idReg1();
16269
+ regNumber prevReg2 = emitLastIns->idReg2();
16270
+ insFormat lastInsFmt = emitLastIns->idInsFmt();
16271
+ emitAttr prevSize = emitLastIns->idOpSize();
16272
+ ssize_t prevImm = emitLastIns->idIsLargeCns() ? ((instrDescCns*)emitLastIns)->idcCnsVal : emitLastIns->idSmallCns();
16273
+
16274
+ // Signed, *raw* immediate value fits in 7 bits, so
16275
+ // for LDP/ STP the raw value is from -64 to +63.
16276
+ // For LDR/ STR, there are 9 bits, so we need to
16277
+ // limit the range explicitly in software.
16278
+ if ((imm < -64) || (imm > 63) || (prevImm < -64) || (prevImm > 63))
16279
+ {
16280
+ // Then one or more of the immediate values is
16281
+ // out of range, so we cannot optimise.
16282
+ return false;
16283
+ }
16284
+
16285
+ if ((!isGeneralRegisterOrZR(reg1)) || (!isGeneralRegisterOrZR(prevReg1)))
16286
+ {
16287
+ // Either register 1 is not a general register
16288
+ // or previous register 1 is not a general register
16289
+ // or the zero register, so we cannot optimise.
16290
+ return false;
16291
+ }
16292
+
16293
+ if (!((ins == emitLastIns->idIns()) && (ins == INS_ldr || ins == INS_str)))
16294
+ {
16295
+ // Not successive ldr or str instructions
16296
+ return false;
16297
+ }
16298
+
16299
+ if (lastInsFmt != fmt)
16300
+ {
16301
+ // The formats of the two instructions differ.
16302
+ return false;
16303
+ }
16304
+
16305
+ if ((emitInsIsLoad(ins)) && (reg1 == prevReg1))
16306
+ {
16307
+ // Cannot load to the same register twice.
16308
+ return false;
16309
+ }
16310
+
16311
+ if (prevSize != size)
16312
+ {
16313
+ // Operand sizes differ.
16314
+ return false;
16315
+ }
16316
+
16317
+ if (imm != (prevImm + 1))
16318
+ {
16319
+ // Not consecutive immediate values.
16320
+ return false;
16321
+ }
16322
+
16323
+ if (emitSizeOfInsDsc(emitLastIns) != sizeof(instrDesc))
16324
+ {
16325
+ // Not instruction descriptors of the
16326
+ // same, standard size.
16327
+ return false;
16328
+ }
16329
+
16330
+ if (!((reg2 == prevReg2) && isGeneralRegisterOrSP(reg2)))
16331
+ {
16332
+ // The "register 2" numbers need to be
16333
+ // the same AND general registers or
16334
+ // the stack pointer.
16335
+ return false;
16336
+ }
16337
+ return true;
16338
+ }
16339
+
16131
16340
#endif // defined(TARGET_ARM64)
0 commit comments