xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp (revision 3a56015a2f5d630910177fa79a522bb95511ccf7)
1 //===- AMDGPUMCExpr.cpp - AMDGPU specific MC expression classes -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPUMCExpr.h"
10 #include "GCNSubtarget.h"
11 #include "Utils/AMDGPUBaseInfo.h"
12 #include "llvm/IR/Function.h"
13 #include "llvm/MC/MCAssembler.h"
14 #include "llvm/MC/MCContext.h"
15 #include "llvm/MC/MCStreamer.h"
16 #include "llvm/MC/MCSymbol.h"
17 #include "llvm/MC/MCValue.h"
18 #include "llvm/Support/Allocator.h"
19 #include "llvm/Support/raw_ostream.h"
20 #include <optional>
21 
22 using namespace llvm;
23 using namespace llvm::AMDGPU;
24 
25 AMDGPUMCExpr::AMDGPUMCExpr(VariantKind Kind, ArrayRef<const MCExpr *> Args,
26                            MCContext &Ctx)
27     : Kind(Kind), Ctx(Ctx) {
28   assert(Args.size() >= 1 && "Needs a minimum of one expression.");
29   assert(Kind != AGVK_None && "Cannot construct AMDGPUMCExpr of kind none.");
30 
31   // Allocating the variadic arguments through the same allocation mechanism
32   // that the object itself is allocated with so they end up in the same memory.
33   //
34   // Will result in an asan failure if allocated on the heap through standard
35   // allocation (e.g., through SmallVector's grow).
36   RawArgs = static_cast<const MCExpr **>(
37       Ctx.allocate(sizeof(const MCExpr *) * Args.size()));
38   std::uninitialized_copy(Args.begin(), Args.end(), RawArgs);
39   this->Args = ArrayRef<const MCExpr *>(RawArgs, Args.size());
40 }
41 
42 AMDGPUMCExpr::~AMDGPUMCExpr() { Ctx.deallocate(RawArgs); }
43 
44 const AMDGPUMCExpr *AMDGPUMCExpr::create(VariantKind Kind,
45                                          ArrayRef<const MCExpr *> Args,
46                                          MCContext &Ctx) {
47   return new (Ctx) AMDGPUMCExpr(Kind, Args, Ctx);
48 }
49 
50 const MCExpr *AMDGPUMCExpr::getSubExpr(size_t Index) const {
51   assert(Index < Args.size() && "Indexing out of bounds AMDGPUMCExpr sub-expr");
52   return Args[Index];
53 }
54 
55 void AMDGPUMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
56   switch (Kind) {
57   default:
58     llvm_unreachable("Unknown AMDGPUMCExpr kind.");
59   case AGVK_Or:
60     OS << "or(";
61     break;
62   case AGVK_Max:
63     OS << "max(";
64     break;
65   case AGVK_ExtraSGPRs:
66     OS << "extrasgprs(";
67     break;
68   case AGVK_TotalNumVGPRs:
69     OS << "totalnumvgprs(";
70     break;
71   case AGVK_AlignTo:
72     OS << "alignto(";
73     break;
74   case AGVK_Occupancy:
75     OS << "occupancy(";
76     break;
77   }
78   for (auto It = Args.begin(); It != Args.end(); ++It) {
79     (*It)->print(OS, MAI, /*InParens=*/false);
80     if ((It + 1) != Args.end())
81       OS << ", ";
82   }
83   OS << ')';
84 }
85 
86 static int64_t op(AMDGPUMCExpr::VariantKind Kind, int64_t Arg1, int64_t Arg2) {
87   switch (Kind) {
88   default:
89     llvm_unreachable("Unknown AMDGPUMCExpr kind.");
90   case AMDGPUMCExpr::AGVK_Max:
91     return std::max(Arg1, Arg2);
92   case AMDGPUMCExpr::AGVK_Or:
93     return Arg1 | Arg2;
94   }
95 }
96 
97 bool AMDGPUMCExpr::evaluateExtraSGPRs(MCValue &Res, const MCAssembler *Asm,
98                                       const MCFixup *Fixup) const {
99   auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
100     MCValue MCVal;
101     if (!Arg->evaluateAsRelocatable(MCVal, Asm, Fixup) || !MCVal.isAbsolute())
102       return false;
103 
104     ConstantValue = MCVal.getConstant();
105     return true;
106   };
107 
108   assert(Args.size() == 3 &&
109          "AMDGPUMCExpr Argument count incorrect for ExtraSGPRs");
110   const MCSubtargetInfo *STI = Ctx.getSubtargetInfo();
111   uint64_t VCCUsed = 0, FlatScrUsed = 0, XNACKUsed = 0;
112 
113   bool Success = TryGetMCExprValue(Args[2], XNACKUsed);
114 
115   assert(Success && "Arguments 3 for ExtraSGPRs should be a known constant");
116   if (!Success || !TryGetMCExprValue(Args[0], VCCUsed) ||
117       !TryGetMCExprValue(Args[1], FlatScrUsed))
118     return false;
119 
120   uint64_t ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
121       STI, (bool)VCCUsed, (bool)FlatScrUsed, (bool)XNACKUsed);
122   Res = MCValue::get(ExtraSGPRs);
123   return true;
124 }
125 
126 bool AMDGPUMCExpr::evaluateTotalNumVGPR(MCValue &Res, const MCAssembler *Asm,
127                                         const MCFixup *Fixup) const {
128   auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
129     MCValue MCVal;
130     if (!Arg->evaluateAsRelocatable(MCVal, Asm, Fixup) || !MCVal.isAbsolute())
131       return false;
132 
133     ConstantValue = MCVal.getConstant();
134     return true;
135   };
136   assert(Args.size() == 2 &&
137          "AMDGPUMCExpr Argument count incorrect for TotalNumVGPRs");
138   const MCSubtargetInfo *STI = Ctx.getSubtargetInfo();
139   uint64_t NumAGPR = 0, NumVGPR = 0;
140 
141   bool Has90AInsts = AMDGPU::isGFX90A(*STI);
142 
143   if (!TryGetMCExprValue(Args[0], NumAGPR) ||
144       !TryGetMCExprValue(Args[1], NumVGPR))
145     return false;
146 
147   uint64_t TotalNum = Has90AInsts && NumAGPR ? alignTo(NumVGPR, 4) + NumAGPR
148                                              : std::max(NumVGPR, NumAGPR);
149   Res = MCValue::get(TotalNum);
150   return true;
151 }
152 
153 bool AMDGPUMCExpr::evaluateAlignTo(MCValue &Res, const MCAssembler *Asm,
154                                    const MCFixup *Fixup) const {
155   auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
156     MCValue MCVal;
157     if (!Arg->evaluateAsRelocatable(MCVal, Asm, Fixup) || !MCVal.isAbsolute())
158       return false;
159 
160     ConstantValue = MCVal.getConstant();
161     return true;
162   };
163 
164   assert(Args.size() == 2 &&
165          "AMDGPUMCExpr Argument count incorrect for AlignTo");
166   uint64_t Value = 0, Align = 0;
167   if (!TryGetMCExprValue(Args[0], Value) || !TryGetMCExprValue(Args[1], Align))
168     return false;
169 
170   Res = MCValue::get(alignTo(Value, Align));
171   return true;
172 }
173 
174 bool AMDGPUMCExpr::evaluateOccupancy(MCValue &Res, const MCAssembler *Asm,
175                                      const MCFixup *Fixup) const {
176   auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
177     MCValue MCVal;
178     if (!Arg->evaluateAsRelocatable(MCVal, Asm, Fixup) || !MCVal.isAbsolute())
179       return false;
180 
181     ConstantValue = MCVal.getConstant();
182     return true;
183   };
184   assert(Args.size() == 7 &&
185          "AMDGPUMCExpr Argument count incorrect for Occupancy");
186   uint64_t InitOccupancy, MaxWaves, Granule, TargetTotalNumVGPRs, Generation,
187       NumSGPRs, NumVGPRs;
188 
189   bool Success = true;
190   Success &= TryGetMCExprValue(Args[0], MaxWaves);
191   Success &= TryGetMCExprValue(Args[1], Granule);
192   Success &= TryGetMCExprValue(Args[2], TargetTotalNumVGPRs);
193   Success &= TryGetMCExprValue(Args[3], Generation);
194   Success &= TryGetMCExprValue(Args[4], InitOccupancy);
195 
196   assert(Success && "Arguments 1 to 5 for Occupancy should be known constants");
197 
198   if (!Success || !TryGetMCExprValue(Args[5], NumSGPRs) ||
199       !TryGetMCExprValue(Args[6], NumVGPRs))
200     return false;
201 
202   unsigned Occupancy = InitOccupancy;
203   if (NumSGPRs)
204     Occupancy = std::min(
205         Occupancy, IsaInfo::getOccupancyWithNumSGPRs(
206                        NumSGPRs, MaxWaves,
207                        static_cast<AMDGPUSubtarget::Generation>(Generation)));
208   if (NumVGPRs)
209     Occupancy = std::min(Occupancy,
210                          IsaInfo::getNumWavesPerEUWithNumVGPRs(
211                              NumVGPRs, Granule, MaxWaves, TargetTotalNumVGPRs));
212 
213   Res = MCValue::get(Occupancy);
214   return true;
215 }
216 
217 bool AMDGPUMCExpr::evaluateAsRelocatableImpl(MCValue &Res,
218                                              const MCAssembler *Asm,
219                                              const MCFixup *Fixup) const {
220   std::optional<int64_t> Total;
221   switch (Kind) {
222   default:
223     break;
224   case AGVK_ExtraSGPRs:
225     return evaluateExtraSGPRs(Res, Asm, Fixup);
226   case AGVK_AlignTo:
227     return evaluateAlignTo(Res, Asm, Fixup);
228   case AGVK_TotalNumVGPRs:
229     return evaluateTotalNumVGPR(Res, Asm, Fixup);
230   case AGVK_Occupancy:
231     return evaluateOccupancy(Res, Asm, Fixup);
232   }
233 
234   for (const MCExpr *Arg : Args) {
235     MCValue ArgRes;
236     if (!Arg->evaluateAsRelocatable(ArgRes, Asm, Fixup) || !ArgRes.isAbsolute())
237       return false;
238 
239     if (!Total.has_value())
240       Total = ArgRes.getConstant();
241     Total = op(Kind, *Total, ArgRes.getConstant());
242   }
243 
244   Res = MCValue::get(*Total);
245   return true;
246 }
247 
248 void AMDGPUMCExpr::visitUsedExpr(MCStreamer &Streamer) const {
249   for (const MCExpr *Arg : Args)
250     Streamer.visitUsedExpr(*Arg);
251 }
252 
253 MCFragment *AMDGPUMCExpr::findAssociatedFragment() const {
254   for (const MCExpr *Arg : Args) {
255     if (Arg->findAssociatedFragment())
256       return Arg->findAssociatedFragment();
257   }
258   return nullptr;
259 }
260 
261 /// Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed
262 /// are unresolvable but needed for further MCExprs). Derived from
263 /// implementation of IsaInfo::getNumExtraSGPRs in AMDGPUBaseInfo.cpp.
264 ///
265 const AMDGPUMCExpr *AMDGPUMCExpr::createExtraSGPRs(const MCExpr *VCCUsed,
266                                                    const MCExpr *FlatScrUsed,
267                                                    bool XNACKUsed,
268                                                    MCContext &Ctx) {
269 
270   return create(AGVK_ExtraSGPRs,
271                 {VCCUsed, FlatScrUsed, MCConstantExpr::create(XNACKUsed, Ctx)},
272                 Ctx);
273 }
274 
275 const AMDGPUMCExpr *AMDGPUMCExpr::createTotalNumVGPR(const MCExpr *NumAGPR,
276                                                      const MCExpr *NumVGPR,
277                                                      MCContext &Ctx) {
278   return create(AGVK_TotalNumVGPRs, {NumAGPR, NumVGPR}, Ctx);
279 }
280 
281 /// Mimics GCNSubtarget::computeOccupancy for MCExpr.
282 ///
283 /// Remove dependency on GCNSubtarget and depend only only the necessary values
284 /// for said occupancy computation. Should match computeOccupancy implementation
285 /// without passing \p STM on.
286 const AMDGPUMCExpr *AMDGPUMCExpr::createOccupancy(unsigned InitOcc,
287                                                   const MCExpr *NumSGPRs,
288                                                   const MCExpr *NumVGPRs,
289                                                   const GCNSubtarget &STM,
290                                                   MCContext &Ctx) {
291   unsigned MaxWaves = IsaInfo::getMaxWavesPerEU(&STM);
292   unsigned Granule = IsaInfo::getVGPRAllocGranule(&STM);
293   unsigned TargetTotalNumVGPRs = IsaInfo::getTotalNumVGPRs(&STM);
294   unsigned Generation = STM.getGeneration();
295 
296   auto CreateExpr = [&Ctx](unsigned Value) {
297     return MCConstantExpr::create(Value, Ctx);
298   };
299 
300   return create(AGVK_Occupancy,
301                 {CreateExpr(MaxWaves), CreateExpr(Granule),
302                  CreateExpr(TargetTotalNumVGPRs), CreateExpr(Generation),
303                  CreateExpr(InitOcc), NumSGPRs, NumVGPRs},
304                 Ctx);
305 }
306