1 //===- AMDGPUMCExpr.cpp - AMDGPU specific MC expression classes -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "AMDGPUMCExpr.h"
10 #include "GCNSubtarget.h"
11 #include "Utils/AMDGPUBaseInfo.h"
12 #include "llvm/IR/Function.h"
13 #include "llvm/MC/MCAssembler.h"
14 #include "llvm/MC/MCContext.h"
15 #include "llvm/MC/MCStreamer.h"
16 #include "llvm/MC/MCSymbol.h"
17 #include "llvm/MC/MCValue.h"
18 #include "llvm/Support/Allocator.h"
19 #include "llvm/Support/raw_ostream.h"
20 #include <optional>
21
22 using namespace llvm;
23 using namespace llvm::AMDGPU;
24
AMDGPUMCExpr(VariantKind Kind,ArrayRef<const MCExpr * > Args,MCContext & Ctx)25 AMDGPUMCExpr::AMDGPUMCExpr(VariantKind Kind, ArrayRef<const MCExpr *> Args,
26 MCContext &Ctx)
27 : Kind(Kind), Ctx(Ctx) {
28 assert(Args.size() >= 1 && "Needs a minimum of one expression.");
29 assert(Kind != AGVK_None && "Cannot construct AMDGPUMCExpr of kind none.");
30
31 // Allocating the variadic arguments through the same allocation mechanism
32 // that the object itself is allocated with so they end up in the same memory.
33 //
34 // Will result in an asan failure if allocated on the heap through standard
35 // allocation (e.g., through SmallVector's grow).
36 RawArgs = static_cast<const MCExpr **>(
37 Ctx.allocate(sizeof(const MCExpr *) * Args.size()));
38 std::uninitialized_copy(Args.begin(), Args.end(), RawArgs);
39 this->Args = ArrayRef<const MCExpr *>(RawArgs, Args.size());
40 }
41
~AMDGPUMCExpr()42 AMDGPUMCExpr::~AMDGPUMCExpr() { Ctx.deallocate(RawArgs); }
43
create(VariantKind Kind,ArrayRef<const MCExpr * > Args,MCContext & Ctx)44 const AMDGPUMCExpr *AMDGPUMCExpr::create(VariantKind Kind,
45 ArrayRef<const MCExpr *> Args,
46 MCContext &Ctx) {
47 return new (Ctx) AMDGPUMCExpr(Kind, Args, Ctx);
48 }
49
getSubExpr(size_t Index) const50 const MCExpr *AMDGPUMCExpr::getSubExpr(size_t Index) const {
51 assert(Index < Args.size() && "Indexing out of bounds AMDGPUMCExpr sub-expr");
52 return Args[Index];
53 }
54
printImpl(raw_ostream & OS,const MCAsmInfo * MAI) const55 void AMDGPUMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
56 switch (Kind) {
57 default:
58 llvm_unreachable("Unknown AMDGPUMCExpr kind.");
59 case AGVK_Or:
60 OS << "or(";
61 break;
62 case AGVK_Max:
63 OS << "max(";
64 break;
65 case AGVK_ExtraSGPRs:
66 OS << "extrasgprs(";
67 break;
68 case AGVK_TotalNumVGPRs:
69 OS << "totalnumvgprs(";
70 break;
71 case AGVK_AlignTo:
72 OS << "alignto(";
73 break;
74 case AGVK_Occupancy:
75 OS << "occupancy(";
76 break;
77 }
78 for (auto It = Args.begin(); It != Args.end(); ++It) {
79 (*It)->print(OS, MAI, /*InParens=*/false);
80 if ((It + 1) != Args.end())
81 OS << ", ";
82 }
83 OS << ')';
84 }
85
op(AMDGPUMCExpr::VariantKind Kind,int64_t Arg1,int64_t Arg2)86 static int64_t op(AMDGPUMCExpr::VariantKind Kind, int64_t Arg1, int64_t Arg2) {
87 switch (Kind) {
88 default:
89 llvm_unreachable("Unknown AMDGPUMCExpr kind.");
90 case AMDGPUMCExpr::AGVK_Max:
91 return std::max(Arg1, Arg2);
92 case AMDGPUMCExpr::AGVK_Or:
93 return Arg1 | Arg2;
94 }
95 }
96
evaluateExtraSGPRs(MCValue & Res,const MCAssembler * Asm,const MCFixup * Fixup) const97 bool AMDGPUMCExpr::evaluateExtraSGPRs(MCValue &Res, const MCAssembler *Asm,
98 const MCFixup *Fixup) const {
99 auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
100 MCValue MCVal;
101 if (!Arg->evaluateAsRelocatable(MCVal, Asm, Fixup) || !MCVal.isAbsolute())
102 return false;
103
104 ConstantValue = MCVal.getConstant();
105 return true;
106 };
107
108 assert(Args.size() == 3 &&
109 "AMDGPUMCExpr Argument count incorrect for ExtraSGPRs");
110 const MCSubtargetInfo *STI = Ctx.getSubtargetInfo();
111 uint64_t VCCUsed = 0, FlatScrUsed = 0, XNACKUsed = 0;
112
113 bool Success = TryGetMCExprValue(Args[2], XNACKUsed);
114
115 assert(Success && "Arguments 3 for ExtraSGPRs should be a known constant");
116 if (!Success || !TryGetMCExprValue(Args[0], VCCUsed) ||
117 !TryGetMCExprValue(Args[1], FlatScrUsed))
118 return false;
119
120 uint64_t ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
121 STI, (bool)VCCUsed, (bool)FlatScrUsed, (bool)XNACKUsed);
122 Res = MCValue::get(ExtraSGPRs);
123 return true;
124 }
125
evaluateTotalNumVGPR(MCValue & Res,const MCAssembler * Asm,const MCFixup * Fixup) const126 bool AMDGPUMCExpr::evaluateTotalNumVGPR(MCValue &Res, const MCAssembler *Asm,
127 const MCFixup *Fixup) const {
128 auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
129 MCValue MCVal;
130 if (!Arg->evaluateAsRelocatable(MCVal, Asm, Fixup) || !MCVal.isAbsolute())
131 return false;
132
133 ConstantValue = MCVal.getConstant();
134 return true;
135 };
136 assert(Args.size() == 2 &&
137 "AMDGPUMCExpr Argument count incorrect for TotalNumVGPRs");
138 const MCSubtargetInfo *STI = Ctx.getSubtargetInfo();
139 uint64_t NumAGPR = 0, NumVGPR = 0;
140
141 bool Has90AInsts = AMDGPU::isGFX90A(*STI);
142
143 if (!TryGetMCExprValue(Args[0], NumAGPR) ||
144 !TryGetMCExprValue(Args[1], NumVGPR))
145 return false;
146
147 uint64_t TotalNum = Has90AInsts && NumAGPR ? alignTo(NumVGPR, 4) + NumAGPR
148 : std::max(NumVGPR, NumAGPR);
149 Res = MCValue::get(TotalNum);
150 return true;
151 }
152
evaluateAlignTo(MCValue & Res,const MCAssembler * Asm,const MCFixup * Fixup) const153 bool AMDGPUMCExpr::evaluateAlignTo(MCValue &Res, const MCAssembler *Asm,
154 const MCFixup *Fixup) const {
155 auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
156 MCValue MCVal;
157 if (!Arg->evaluateAsRelocatable(MCVal, Asm, Fixup) || !MCVal.isAbsolute())
158 return false;
159
160 ConstantValue = MCVal.getConstant();
161 return true;
162 };
163
164 assert(Args.size() == 2 &&
165 "AMDGPUMCExpr Argument count incorrect for AlignTo");
166 uint64_t Value = 0, Align = 0;
167 if (!TryGetMCExprValue(Args[0], Value) || !TryGetMCExprValue(Args[1], Align))
168 return false;
169
170 Res = MCValue::get(alignTo(Value, Align));
171 return true;
172 }
173
evaluateOccupancy(MCValue & Res,const MCAssembler * Asm,const MCFixup * Fixup) const174 bool AMDGPUMCExpr::evaluateOccupancy(MCValue &Res, const MCAssembler *Asm,
175 const MCFixup *Fixup) const {
176 auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
177 MCValue MCVal;
178 if (!Arg->evaluateAsRelocatable(MCVal, Asm, Fixup) || !MCVal.isAbsolute())
179 return false;
180
181 ConstantValue = MCVal.getConstant();
182 return true;
183 };
184 assert(Args.size() == 7 &&
185 "AMDGPUMCExpr Argument count incorrect for Occupancy");
186 uint64_t InitOccupancy, MaxWaves, Granule, TargetTotalNumVGPRs, Generation,
187 NumSGPRs, NumVGPRs;
188
189 bool Success = true;
190 Success &= TryGetMCExprValue(Args[0], MaxWaves);
191 Success &= TryGetMCExprValue(Args[1], Granule);
192 Success &= TryGetMCExprValue(Args[2], TargetTotalNumVGPRs);
193 Success &= TryGetMCExprValue(Args[3], Generation);
194 Success &= TryGetMCExprValue(Args[4], InitOccupancy);
195
196 assert(Success && "Arguments 1 to 5 for Occupancy should be known constants");
197
198 if (!Success || !TryGetMCExprValue(Args[5], NumSGPRs) ||
199 !TryGetMCExprValue(Args[6], NumVGPRs))
200 return false;
201
202 unsigned Occupancy = InitOccupancy;
203 if (NumSGPRs)
204 Occupancy = std::min(
205 Occupancy, IsaInfo::getOccupancyWithNumSGPRs(
206 NumSGPRs, MaxWaves,
207 static_cast<AMDGPUSubtarget::Generation>(Generation)));
208 if (NumVGPRs)
209 Occupancy = std::min(Occupancy,
210 IsaInfo::getNumWavesPerEUWithNumVGPRs(
211 NumVGPRs, Granule, MaxWaves, TargetTotalNumVGPRs));
212
213 Res = MCValue::get(Occupancy);
214 return true;
215 }
216
evaluateAsRelocatableImpl(MCValue & Res,const MCAssembler * Asm,const MCFixup * Fixup) const217 bool AMDGPUMCExpr::evaluateAsRelocatableImpl(MCValue &Res,
218 const MCAssembler *Asm,
219 const MCFixup *Fixup) const {
220 std::optional<int64_t> Total;
221 switch (Kind) {
222 default:
223 break;
224 case AGVK_ExtraSGPRs:
225 return evaluateExtraSGPRs(Res, Asm, Fixup);
226 case AGVK_AlignTo:
227 return evaluateAlignTo(Res, Asm, Fixup);
228 case AGVK_TotalNumVGPRs:
229 return evaluateTotalNumVGPR(Res, Asm, Fixup);
230 case AGVK_Occupancy:
231 return evaluateOccupancy(Res, Asm, Fixup);
232 }
233
234 for (const MCExpr *Arg : Args) {
235 MCValue ArgRes;
236 if (!Arg->evaluateAsRelocatable(ArgRes, Asm, Fixup) || !ArgRes.isAbsolute())
237 return false;
238
239 if (!Total.has_value())
240 Total = ArgRes.getConstant();
241 Total = op(Kind, *Total, ArgRes.getConstant());
242 }
243
244 Res = MCValue::get(*Total);
245 return true;
246 }
247
visitUsedExpr(MCStreamer & Streamer) const248 void AMDGPUMCExpr::visitUsedExpr(MCStreamer &Streamer) const {
249 for (const MCExpr *Arg : Args)
250 Streamer.visitUsedExpr(*Arg);
251 }
252
findAssociatedFragment() const253 MCFragment *AMDGPUMCExpr::findAssociatedFragment() const {
254 for (const MCExpr *Arg : Args) {
255 if (Arg->findAssociatedFragment())
256 return Arg->findAssociatedFragment();
257 }
258 return nullptr;
259 }
260
261 /// Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed
262 /// are unresolvable but needed for further MCExprs). Derived from
263 /// implementation of IsaInfo::getNumExtraSGPRs in AMDGPUBaseInfo.cpp.
264 ///
createExtraSGPRs(const MCExpr * VCCUsed,const MCExpr * FlatScrUsed,bool XNACKUsed,MCContext & Ctx)265 const AMDGPUMCExpr *AMDGPUMCExpr::createExtraSGPRs(const MCExpr *VCCUsed,
266 const MCExpr *FlatScrUsed,
267 bool XNACKUsed,
268 MCContext &Ctx) {
269
270 return create(AGVK_ExtraSGPRs,
271 {VCCUsed, FlatScrUsed, MCConstantExpr::create(XNACKUsed, Ctx)},
272 Ctx);
273 }
274
createTotalNumVGPR(const MCExpr * NumAGPR,const MCExpr * NumVGPR,MCContext & Ctx)275 const AMDGPUMCExpr *AMDGPUMCExpr::createTotalNumVGPR(const MCExpr *NumAGPR,
276 const MCExpr *NumVGPR,
277 MCContext &Ctx) {
278 return create(AGVK_TotalNumVGPRs, {NumAGPR, NumVGPR}, Ctx);
279 }
280
281 /// Mimics GCNSubtarget::computeOccupancy for MCExpr.
282 ///
283 /// Remove dependency on GCNSubtarget and depend only only the necessary values
284 /// for said occupancy computation. Should match computeOccupancy implementation
285 /// without passing \p STM on.
createOccupancy(unsigned InitOcc,const MCExpr * NumSGPRs,const MCExpr * NumVGPRs,const GCNSubtarget & STM,MCContext & Ctx)286 const AMDGPUMCExpr *AMDGPUMCExpr::createOccupancy(unsigned InitOcc,
287 const MCExpr *NumSGPRs,
288 const MCExpr *NumVGPRs,
289 const GCNSubtarget &STM,
290 MCContext &Ctx) {
291 unsigned MaxWaves = IsaInfo::getMaxWavesPerEU(&STM);
292 unsigned Granule = IsaInfo::getVGPRAllocGranule(&STM);
293 unsigned TargetTotalNumVGPRs = IsaInfo::getTotalNumVGPRs(&STM);
294 unsigned Generation = STM.getGeneration();
295
296 auto CreateExpr = [&Ctx](unsigned Value) {
297 return MCConstantExpr::create(Value, Ctx);
298 };
299
300 return create(AGVK_Occupancy,
301 {CreateExpr(MaxWaves), CreateExpr(Granule),
302 CreateExpr(TargetTotalNumVGPRs), CreateExpr(Generation),
303 CreateExpr(InitOcc), NumSGPRs, NumVGPRs},
304 Ctx);
305 }
306