@@ -89,7 +89,8 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
89
89
// Assuming ECC is enabled is the conservative default.
90
90
SmallString<256 > FullFS (" +promote-alloca,+load-store-opt,+enable-ds128,+sram-ecc,+xnack," );
91
91
92
- if (isAmdHsaOS ()) // Turn on FlatForGlobal for HSA.
92
+ // Turn on features that HSA ABI requires. Also turn on FlatForGlobal by default
93
+ if (isAmdHsaOS ())
93
94
FullFS += " +flat-for-global,+unaligned-access-mode,+trap-handler," ;
94
95
95
96
FullFS += " +enable-prt-strict-null," ; // This is overridden by a disable in FS
@@ -108,15 +109,36 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
108
109
109
110
ParseSubtargetFeatures (GPU, /* TuneCPU*/ GPU, FullFS);
110
111
112
+ // Implement the "generic" processors, which acts as the default when no
113
+ // generation features are enabled (e.g for -mcpu=''). HSA OS defaults to
114
+ // the first amdgcn target that supports flat addressing. Other OSes defaults
115
+ // to the first amdgcn target.
116
+ if (Gen == AMDGPUSubtarget::INVALID) {
117
+ Gen = TT.getOS () == Triple::AMDHSA ? AMDGPUSubtarget::SEA_ISLANDS
118
+ : AMDGPUSubtarget::SOUTHERN_ISLANDS;
119
+ }
120
+
111
121
// We don't support FP64 for EG/NI atm.
112
122
assert (!hasFP64 () || (getGeneration () >= AMDGPUSubtarget::SOUTHERN_ISLANDS));
113
123
114
- // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es
115
- // on VI and newer hardware to avoid assertion failures due to missing ADDR64
116
- // variants of MUBUF instructions.
117
- if (!hasAddr64 () && !FS.contains (" flat-for-global" )) {
124
+ // Targets must either support 64-bit offsets for MUBUF instructions, and/or
125
+ // support flat operations, otherwise they cannot access a 64-bit global
126
+ // address space
127
+ assert (hasAddr64 () || hasFlat ());
128
+ // Unless +-flat-for-global is specified, turn on FlatForGlobal for targets
129
+ // that do not support ADDR64 variants of MUBUF instructions. Such targets
130
+ // cannot use a 64 bit offset with a MUBUF instruction to access the global
131
+ // address space
132
+ if (!hasAddr64 () && !FS.contains (" flat-for-global" ) && !FlatForGlobal) {
133
+ ToggleFeature (AMDGPU::FeatureFlatForGlobal);
118
134
FlatForGlobal = true ;
119
135
}
136
+ // Unless +-flat-for-global is specified, use MUBUF instructions for global
137
+ // address space access if flat operations are not available.
138
+ if (!hasFlat () && !FS.contains (" flat-for-global" ) && FlatForGlobal) {
139
+ ToggleFeature (AMDGPU::FeatureFlatForGlobal);
140
+ FlatForGlobal = false ;
141
+ }
120
142
121
143
// Set defaults if needed.
122
144
if (MaxPrivateElementSize == 0 )
@@ -182,7 +204,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
182
204
AMDGPUGenSubtargetInfo(TT, GPU, /* TuneCPU*/ GPU, FS),
183
205
AMDGPUSubtarget(TT),
184
206
TargetTriple(TT),
185
- Gen(TT.getOS() == Triple::AMDHSA ? SEA_ISLANDS : SOUTHERN_ISLANDS ),
207
+ Gen(INVALID ),
186
208
InstrItins(getInstrItineraryForCPU(GPU)),
187
209
LDSBankCount(0 ),
188
210
MaxPrivateElementSize(0 ),
0 commit comments