Skip to content

Commit 2a52876

Browse files
committed
[AVR] Fix an issue of writing 16-bit ports
For 16-bit ports, the normal devices reqiure writing high byte first and then low byte. But the XMEGA devices require the reverse order. Fixes #58395 Reviewed By: aykevl, jacquesguan Differential Revision: https://reviews.llvm.org/D141752
1 parent 82fdd5b commit 2a52876

21 files changed

+225
-133
lines changed

llvm/lib/Target/AVR/AVRDevices.td

+9-2
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,12 @@ def FeatureTinyEncoding
120120
"The device has Tiny core specific "
121121
"instruction encodings">;
122122

123+
// When writing a 16-bit port or storing a 16-bit word, do the low byte first.
124+
def FeatureLowByteFirst
125+
: SubtargetFeature<"lowbytefirst", "m_hasLowByteFirst", "true",
126+
"Do the low byte first when writing a 16-bit port or "
127+
"storing a 16-bit word">;
128+
123129
// The device has CPU registers mapped in data address space
124130
def FeatureMMR : SubtargetFeature<"memmappedregs", "m_hasMemMappedGPR", "true",
125131
"The device has CPU registers "
@@ -195,14 +201,15 @@ def FamilyXMEGA3 : Family<"xmega3",
195201
[FamilyAVR0, FeatureLPM, FeatureIJMPCALL,
196202
FeatureADDSUBIW, FeatureSRAM, FeatureJMPCALL,
197203
FeatureMultiplication, FeatureMOVW, FeatureLPMX,
198-
FeatureBREAK]>;
204+
FeatureBREAK, FeatureLowByteFirst]>;
199205

200206
def FamilyXMEGA : Family<"xmega",
201207
[FamilyAVR0, FeatureLPM, FeatureIJMPCALL,
202208
FeatureADDSUBIW, FeatureSRAM, FeatureJMPCALL,
203209
FeatureMultiplication, FeatureMOVW, FeatureLPMX,
204210
FeatureSPM, FeatureBREAK, FeatureEIJMPCALL,
205-
FeatureSPMX, FeatureDES, FeatureELPM, FeatureELPMX]>;
211+
FeatureSPMX, FeatureDES, FeatureELPM, FeatureELPMX,
212+
FeatureLowByteFirst]>;
206213

207214
def FamilyXMEGAU : Family<"xmegau", [FamilyXMEGA, FeatureRMW]>;
208215

llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp

+91-49
Original file line numberDiff line numberDiff line change
@@ -1057,45 +1057,66 @@ bool AVRExpandPseudo::expand<AVR::AtomicFence>(Block &MBB, BlockIt MBBI) {
10571057

10581058
template <>
10591059
bool AVRExpandPseudo::expand<AVR::STSWKRr>(Block &MBB, BlockIt MBBI) {
1060+
const AVRSubtarget &STI = MBB.getParent()->getSubtarget<AVRSubtarget>();
10601061
MachineInstr &MI = *MBBI;
10611062
Register SrcLoReg, SrcHiReg;
10621063
Register SrcReg = MI.getOperand(1).getReg();
10631064
bool SrcIsKill = MI.getOperand(1).isKill();
1064-
unsigned OpLo = AVR::STSKRr;
1065-
unsigned OpHi = AVR::STSKRr;
10661065
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
10671066

1068-
// Write the high byte first in case this address belongs to a special
1069-
// I/O address with a special temporary register.
1070-
auto MIBHI = buildMI(MBB, MBBI, OpHi);
1071-
auto MIBLO = buildMI(MBB, MBBI, OpLo);
1067+
auto MIB0 = buildMI(MBB, MBBI, AVR::STSKRr);
1068+
auto MIB1 = buildMI(MBB, MBBI, AVR::STSKRr);
10721069

10731070
switch (MI.getOperand(0).getType()) {
10741071
case MachineOperand::MO_GlobalAddress: {
10751072
const GlobalValue *GV = MI.getOperand(0).getGlobal();
10761073
int64_t Offs = MI.getOperand(0).getOffset();
10771074
unsigned TF = MI.getOperand(0).getTargetFlags();
10781075

1079-
MIBLO.addGlobalAddress(GV, Offs, TF);
1080-
MIBHI.addGlobalAddress(GV, Offs + 1, TF);
1076+
if (STI.hasLowByteFirst()) {
1077+
// Write the low byte first for XMEGA devices.
1078+
MIB0.addGlobalAddress(GV, Offs, TF);
1079+
MIB1.addGlobalAddress(GV, Offs + 1, TF);
1080+
} else {
1081+
// Write the high byte first for traditional devices.
1082+
MIB0.addGlobalAddress(GV, Offs + 1, TF);
1083+
MIB1.addGlobalAddress(GV, Offs, TF);
1084+
}
1085+
10811086
break;
10821087
}
10831088
case MachineOperand::MO_Immediate: {
10841089
unsigned Imm = MI.getOperand(0).getImm();
10851090

1086-
MIBLO.addImm(Imm);
1087-
MIBHI.addImm(Imm + 1);
1091+
if (STI.hasLowByteFirst()) {
1092+
// Write the low byte first for XMEGA devices.
1093+
MIB0.addImm(Imm);
1094+
MIB1.addImm(Imm + 1);
1095+
} else {
1096+
// Write the high byte first for traditional devices.
1097+
MIB0.addImm(Imm + 1);
1098+
MIB1.addImm(Imm);
1099+
}
1100+
10881101
break;
10891102
}
10901103
default:
10911104
llvm_unreachable("Unknown operand type!");
10921105
}
10931106

1094-
MIBLO.addReg(SrcLoReg, getKillRegState(SrcIsKill));
1095-
MIBHI.addReg(SrcHiReg, getKillRegState(SrcIsKill));
1096-
1097-
MIBLO.setMemRefs(MI.memoperands());
1098-
MIBHI.setMemRefs(MI.memoperands());
1107+
if (STI.hasLowByteFirst()) {
1108+
// Write the low byte first for XMEGA devices.
1109+
MIB0.addReg(SrcLoReg, getKillRegState(SrcIsKill))
1110+
.setMemRefs(MI.memoperands());
1111+
MIB1.addReg(SrcHiReg, getKillRegState(SrcIsKill))
1112+
.setMemRefs(MI.memoperands());
1113+
} else {
1114+
// Write the high byte first for traditional devices.
1115+
MIB0.addReg(SrcHiReg, getKillRegState(SrcIsKill))
1116+
.setMemRefs(MI.memoperands());
1117+
MIB1.addReg(SrcLoReg, getKillRegState(SrcIsKill))
1118+
.setMemRefs(MI.memoperands());
1119+
}
10991120

11001121
MI.eraseFromParent();
11011122
return true;
@@ -1126,16 +1147,27 @@ bool AVRExpandPseudo::expand<AVR::STWPtrRr>(Block &MBB, BlockIt MBBI) {
11261147
} else {
11271148
Register SrcLoReg, SrcHiReg;
11281149
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
1129-
buildMI(MBB, MBBI, AVR::STPtrRr)
1130-
.addReg(DstReg, getUndefRegState(DstIsUndef))
1131-
.addReg(SrcLoReg, getKillRegState(SrcIsKill))
1132-
.setMemRefs(MI.memoperands());
1133-
1134-
buildMI(MBB, MBBI, AVR::STDPtrQRr)
1135-
.addReg(DstReg, getUndefRegState(DstIsUndef))
1136-
.addImm(1)
1137-
.addReg(SrcHiReg, getKillRegState(SrcIsKill))
1138-
.setMemRefs(MI.memoperands());
1150+
if (STI.hasLowByteFirst()) {
1151+
buildMI(MBB, MBBI, AVR::STPtrRr)
1152+
.addReg(DstReg, getUndefRegState(DstIsUndef))
1153+
.addReg(SrcLoReg, getKillRegState(SrcIsKill))
1154+
.setMemRefs(MI.memoperands());
1155+
buildMI(MBB, MBBI, AVR::STDPtrQRr)
1156+
.addReg(DstReg, getUndefRegState(DstIsUndef))
1157+
.addImm(1)
1158+
.addReg(SrcHiReg, getKillRegState(SrcIsKill))
1159+
.setMemRefs(MI.memoperands());
1160+
} else {
1161+
buildMI(MBB, MBBI, AVR::STDPtrQRr)
1162+
.addReg(DstReg, getUndefRegState(DstIsUndef))
1163+
.addImm(1)
1164+
.addReg(SrcHiReg, getKillRegState(SrcIsKill))
1165+
.setMemRefs(MI.memoperands());
1166+
buildMI(MBB, MBBI, AVR::STPtrRr)
1167+
.addReg(DstReg, getUndefRegState(DstIsUndef))
1168+
.addReg(SrcLoReg, getKillRegState(SrcIsKill))
1169+
.setMemRefs(MI.memoperands());
1170+
}
11391171
}
11401172

11411173
MI.eraseFromParent();
@@ -1252,23 +1284,32 @@ bool AVRExpandPseudo::expand<AVR::STDWPtrQRr>(Block &MBB, BlockIt MBBI) {
12521284
.addImm(Imm + 2);
12531285
}
12541286
} else {
1255-
unsigned OpLo = AVR::STDPtrQRr;
1256-
unsigned OpHi = AVR::STDPtrQRr;
12571287
Register SrcLoReg, SrcHiReg;
12581288
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
12591289

1260-
auto MIBLO = buildMI(MBB, MBBI, OpLo)
1261-
.addReg(DstReg)
1262-
.addImm(Imm)
1263-
.addReg(SrcLoReg, getKillRegState(SrcIsKill));
1264-
1265-
auto MIBHI = buildMI(MBB, MBBI, OpHi)
1266-
.addReg(DstReg, getKillRegState(DstIsKill))
1267-
.addImm(Imm + 1)
1268-
.addReg(SrcHiReg, getKillRegState(SrcIsKill));
1269-
1270-
MIBLO.setMemRefs(MI.memoperands());
1271-
MIBHI.setMemRefs(MI.memoperands());
1290+
if (STI.hasLowByteFirst()) {
1291+
buildMI(MBB, MBBI, AVR::STDPtrQRr)
1292+
.addReg(DstReg)
1293+
.addImm(Imm)
1294+
.addReg(SrcLoReg, getKillRegState(SrcIsKill))
1295+
.setMemRefs(MI.memoperands());
1296+
buildMI(MBB, MBBI, AVR::STDPtrQRr)
1297+
.addReg(DstReg, getKillRegState(DstIsKill))
1298+
.addImm(Imm + 1)
1299+
.addReg(SrcHiReg, getKillRegState(SrcIsKill))
1300+
.setMemRefs(MI.memoperands());
1301+
} else {
1302+
buildMI(MBB, MBBI, AVR::STDPtrQRr)
1303+
.addReg(DstReg)
1304+
.addImm(Imm + 1)
1305+
.addReg(SrcHiReg, getKillRegState(SrcIsKill))
1306+
.setMemRefs(MI.memoperands());
1307+
buildMI(MBB, MBBI, AVR::STDPtrQRr)
1308+
.addReg(DstReg, getKillRegState(DstIsKill))
1309+
.addImm(Imm)
1310+
.addReg(SrcLoReg, getKillRegState(SrcIsKill))
1311+
.setMemRefs(MI.memoperands());
1312+
}
12721313
}
12731314

12741315
MI.eraseFromParent();
@@ -1347,27 +1388,28 @@ bool AVRExpandPseudo::expand<AVR::INWRdA>(Block &MBB, BlockIt MBBI) {
13471388

13481389
template <>
13491390
bool AVRExpandPseudo::expand<AVR::OUTWARr>(Block &MBB, BlockIt MBBI) {
1391+
const AVRSubtarget &STI = MBB.getParent()->getSubtarget<AVRSubtarget>();
13501392
MachineInstr &MI = *MBBI;
13511393
Register SrcLoReg, SrcHiReg;
13521394
unsigned Imm = MI.getOperand(0).getImm();
13531395
Register SrcReg = MI.getOperand(1).getReg();
13541396
bool SrcIsKill = MI.getOperand(1).isKill();
1355-
unsigned OpLo = AVR::OUTARr;
1356-
unsigned OpHi = AVR::OUTARr;
13571397
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
13581398

13591399
// Since we add 1 to the Imm value for the high byte below, and 63 is the
13601400
// highest Imm value allowed for the instruction, 62 is the limit here.
13611401
assert(Imm <= 62 && "Address is out of range");
13621402

1363-
// 16 bit I/O writes need the high byte first
1364-
auto MIBHI = buildMI(MBB, MBBI, OpHi)
1365-
.addImm(Imm + 1)
1366-
.addReg(SrcHiReg, getKillRegState(SrcIsKill));
1367-
1368-
auto MIBLO = buildMI(MBB, MBBI, OpLo)
1369-
.addImm(Imm)
1370-
.addReg(SrcLoReg, getKillRegState(SrcIsKill));
1403+
// 16 bit I/O writes need the high byte first on normal AVR devices,
1404+
// and in reverse order for the XMEGA/XMEGA3/XMEGAU families.
1405+
auto MIBHI = buildMI(MBB, MBBI, AVR::OUTARr)
1406+
.addImm(STI.hasLowByteFirst() ? Imm : Imm + 1)
1407+
.addReg(STI.hasLowByteFirst() ? SrcLoReg : SrcHiReg,
1408+
getKillRegState(SrcIsKill));
1409+
auto MIBLO = buildMI(MBB, MBBI, AVR::OUTARr)
1410+
.addImm(STI.hasLowByteFirst() ? Imm + 1 : Imm)
1411+
.addReg(STI.hasLowByteFirst() ? SrcHiReg : SrcLoReg,
1412+
getKillRegState(SrcIsKill));
13711413

13721414
MIBLO.setMemRefs(MI.memoperands());
13731415
MIBHI.setMemRefs(MI.memoperands());

llvm/lib/Target/AVR/AVRISelLowering.cpp

+8-2
Original file line numberDiff line numberDiff line change
@@ -1128,9 +1128,15 @@ bool AVRTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
11281128
return false;
11291129
} else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
11301130
VT = ST->getMemoryVT();
1131-
if (AVR::isProgramMemoryAccess(ST)) {
1131+
// We can not store to program memory.
1132+
if (AVR::isProgramMemoryAccess(ST))
1133+
return false;
1134+
// Since the high byte need to be stored first, we can not emit
1135+
// i16 post increment store like:
1136+
// st X+, r24
1137+
// st X+, r25
1138+
if (VT == MVT::i16 && !Subtarget.hasLowByteFirst())
11321139
return false;
1133-
}
11341140
} else {
11351141
return false;
11361142
}

llvm/lib/Target/AVR/AVRSubtarget.h

+2
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ class AVRSubtarget : public AVRGenSubtargetInfo {
8181
bool hasBREAK() const { return m_hasBREAK; }
8282
bool hasTinyEncoding() const { return m_hasTinyEncoding; }
8383
bool hasMemMappedGPR() const { return m_hasMemMappedGPR; }
84+
bool hasLowByteFirst() const { return m_hasLowByteFirst; }
8485

8586
uint8_t getIORegisterOffset() const { return hasMemMappedGPR() ? 0x20 : 0x0; }
8687

@@ -134,6 +135,7 @@ class AVRSubtarget : public AVRGenSubtargetInfo {
134135
bool m_supportsMultiplication = false;
135136
bool m_hasBREAK = false;
136137
bool m_hasTinyEncoding = false;
138+
bool m_hasLowByteFirst = false;
137139
bool m_hasMemMappedGPR = false;
138140

139141
// Dummy member, used by FeatureSet's. We cannot have a SubtargetFeature with

llvm/test/CodeGen/AVR/PR37143.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22

33
; CHECK: ld {{r[0-9]+}}, [[PTR:[XYZ]]]
44
; CHECK: ldd {{r[0-9]+}}, [[PTR]]+1
5-
; CHECK: st [[PTR2:[XYZ]]], {{r[0-9]+}}
6-
; CHECK: std [[PTR2]]+1, {{r[0-9]+}}
5+
; CHECK: std [[PTR2:[XYZ]]]+1, {{r[0-9]+}}
6+
; CHECK: st [[PTR2]], {{r[0-9]+}}
77
define void @load_store_16(i16* nocapture %ptr) local_unnamed_addr #1 {
88
entry:
99
%0 = load i16, i16* %ptr, align 2

llvm/test/CodeGen/AVR/alloca.ll

+3-3
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,12 @@ define i16 @alloca_write(i16 %x) {
4646
entry:
4747
; CHECK-LABEL: alloca_write:
4848
; Small offset here
49-
; CHECK: std Y+23, {{.*}}
5049
; CHECK: std Y+24, {{.*}}
50+
; CHECK: std Y+23, {{.*}}
5151
; Big offset here
5252
; CHECK: adiw r28, 57
53-
; CHECK: std Y+62, {{.*}}
5453
; CHECK: std Y+63, {{.*}}
54+
; CHECK: std Y+62, {{.*}}
5555
; CHECK: sbiw r28, 57
5656
%p = alloca [15 x i16]
5757
%k = alloca [14 x i16]
@@ -71,8 +71,8 @@ define void @alloca_write_huge() {
7171
; CHECK-LABEL: alloca_write_huge:
7272
; CHECK: subi r28, 41
7373
; CHECK: sbci r29, 255
74-
; CHECK: std Y+62, {{.*}}
7574
; CHECK: std Y+63, {{.*}}
75+
; CHECK: std Y+62, {{.*}}
7676
; CHECK: subi r28, 215
7777
; CHECK: sbci r29, 0
7878
%k = alloca [140 x i16]

llvm/test/CodeGen/AVR/atomics/load16.ll

+5-5
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ define i16 @atomic_load_cmp_swap16(i16* %foo) {
3333
; CHECK-NEXT: ldd [[RDH:r[0-9]+]], [[RR]]+1
3434
; CHECK-NEXT: add [[RR1L:r[0-9]+]], [[RDL]]
3535
; CHECK-NEXT: adc [[RR1H:r[0-9]+]], [[RDH]]
36-
; CHECK-NEXT: st [[RR]], [[RR1L]]
3736
; CHECK-NEXT: std [[RR]]+1, [[RR1H]]
37+
; CHECK-NEXT: st [[RR]], [[RR1L]]
3838
; CHECK-NEXT: out 63, r0
3939
define i16 @atomic_load_add16(i16* %foo) {
4040
%val = atomicrmw add i16* %foo, i16 13 seq_cst
@@ -49,8 +49,8 @@ define i16 @atomic_load_add16(i16* %foo) {
4949
; CHECK-NEXT: movw [[TMPL:r[0-9]+]], [[RDL]]
5050
; CHECK-NEXT: sub [[TMPL]], [[RR1L:r[0-9]+]]
5151
; CHECK-NEXT: sbc [[TMPH:r[0-9]+]], [[RR1H:r[0-9]+]]
52-
; CHECK-NEXT: st [[RR]], [[TMPL]]
5352
; CHECK-NEXT: std [[RR]]+1, [[TMPH]]
53+
; CHECK-NEXT: st [[RR]], [[TMPL]]
5454
; CHECK-NEXT: out 63, r0
5555
define i16 @atomic_load_sub16(i16* %foo) {
5656
%val = atomicrmw sub i16* %foo, i16 13 seq_cst
@@ -64,8 +64,8 @@ define i16 @atomic_load_sub16(i16* %foo) {
6464
; CHECK-NEXT: ldd [[RDH:r[0-9]+]], [[RR]]+1
6565
; CHECK-NEXT: and [[RD1L:r[0-9]+]], [[RDL]]
6666
; CHECK-NEXT: and [[RD1H:r[0-9]+]], [[RDH]]
67-
; CHECK-NEXT: st [[RR]], [[RD1L]]
6867
; CHECK-NEXT: std [[RR]]+1, [[RD1H]]
68+
; CHECK-NEXT: st [[RR]], [[RD1L]]
6969
; CHECK-NEXT: out 63, r0
7070
define i16 @atomic_load_and16(i16* %foo) {
7171
%val = atomicrmw and i16* %foo, i16 13 seq_cst
@@ -79,8 +79,8 @@ define i16 @atomic_load_and16(i16* %foo) {
7979
; CHECK-NEXT: ldd [[RDH:r[0-9]+]], [[RR]]+1
8080
; CHECK-NEXT: or [[RD1L:r[0-9]+]], [[RDL]]
8181
; CHECK-NEXT: or [[RD1H:r[0-9]+]], [[RDH]]
82-
; CHECK-NEXT: st [[RR]], [[RD1L]]
8382
; CHECK-NEXT: std [[RR]]+1, [[RD1H]]
83+
; CHECK-NEXT: st [[RR]], [[RD1L]]
8484
; CHECK-NEXT: out 63, r0
8585
define i16 @atomic_load_or16(i16* %foo) {
8686
%val = atomicrmw or i16* %foo, i16 13 seq_cst
@@ -94,8 +94,8 @@ define i16 @atomic_load_or16(i16* %foo) {
9494
; CHECK-NEXT: ldd [[RDH:r[0-9]+]], [[RR]]+1
9595
; CHECK-NEXT: eor [[RD1L:r[0-9]+]], [[RDL]]
9696
; CHECK-NEXT: eor [[RD1H:r[0-9]+]], [[RDH]]
97-
; CHECK-NEXT: st [[RR]], [[RD1L]]
9897
; CHECK-NEXT: std [[RR]]+1, [[RD1H]]
98+
; CHECK-NEXT: st [[RR]], [[RD1L]]
9999
; CHECK-NEXT: out 63, r0
100100
define i16 @atomic_load_xor16(i16* %foo) {
101101
%val = atomicrmw xor i16* %foo, i16 13 seq_cst

llvm/test/CodeGen/AVR/atomics/store.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ define void @atomic_store8(i8* %foo) {
1313
; CHECK-LABEL: atomic_store16
1414
; CHECK: in r0, 63
1515
; CHECK-NEXT: cli
16-
; CHECK-NEXT: st [[RD:(X|Y|Z)]], [[RR:r[0-9]+]]
17-
; CHECK-NEXT: std [[RD]]+1, [[RR:r[0-9]+]]
16+
; CHECK-NEXT: std [[RD:(X|Y|Z)]]+1, [[RR:r[0-9]+]]
17+
; CHECK-NEXT: st [[RD]], [[RR:r[0-9]+]]
1818
; CHECK-NEXT: out 63, r0
1919
define void @atomic_store16(i16* %foo) {
2020
store atomic i16 1, i16* %foo unordered, align 2

llvm/test/CodeGen/AVR/atomics/store16.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
; CHECK-LABEL: atomic_store16
44
; CHECK: in r0, 63
55
; CHECK-NEXT: cli
6-
; CHECK-NEXT: st [[RD:(X|Y|Z)]], [[RR:r[0-9]+]]
76
; CHECK-NEXT: std [[RD:(X|Y|Z)]]+1, [[RR:r[0-9]+]]
7+
; CHECK-NEXT: st [[RD:(X|Y|Z)]], [[RR:r[0-9]+]]
88
; CHECK-NEXT: out 63, r0
99
define void @atomic_store16(i16* %foo) {
1010
store atomic i16 1, i16* %foo unordered, align 2
@@ -14,8 +14,8 @@ define void @atomic_store16(i16* %foo) {
1414
; CHECK-LABEL: monotonic
1515
; CHECK: in r0, 63
1616
; CHECK-NEXT: cli
17-
; CHECK-NEXT: st Z, r24
1817
; CHECK-NEXT: std Z+1, r25
18+
; CHECK-NEXT: st Z, r24
1919
; CHECK-NEXT: out 63, r0
2020
define void @monotonic(i16) {
2121
entry-block:

0 commit comments

Comments
 (0)