1 //===- AMDGPUMCExpr.cpp - AMDGPU specific MC expression classes -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPUMCExpr.h" 10 #include "GCNSubtarget.h" 11 #include "Utils/AMDGPUBaseInfo.h" 12 #include "llvm/IR/Function.h" 13 #include "llvm/MC/MCAssembler.h" 14 #include "llvm/MC/MCContext.h" 15 #include "llvm/MC/MCStreamer.h" 16 #include "llvm/MC/MCSymbol.h" 17 #include "llvm/MC/MCValue.h" 18 #include "llvm/Support/Allocator.h" 19 #include "llvm/Support/raw_ostream.h" 20 #include <optional> 21 22 using namespace llvm; 23 using namespace llvm::AMDGPU; 24 25 AMDGPUMCExpr::AMDGPUMCExpr(VariantKind Kind, ArrayRef<const MCExpr *> Args, 26 MCContext &Ctx) 27 : Kind(Kind), Ctx(Ctx) { 28 assert(Args.size() >= 1 && "Needs a minimum of one expression."); 29 assert(Kind != AGVK_None && "Cannot construct AMDGPUMCExpr of kind none."); 30 31 // Allocating the variadic arguments through the same allocation mechanism 32 // that the object itself is allocated with so they end up in the same memory. 33 // 34 // Will result in an asan failure if allocated on the heap through standard 35 // allocation (e.g., through SmallVector's grow). 36 RawArgs = static_cast<const MCExpr **>( 37 Ctx.allocate(sizeof(const MCExpr *) * Args.size())); 38 std::uninitialized_copy(Args.begin(), Args.end(), RawArgs); 39 this->Args = ArrayRef<const MCExpr *>(RawArgs, Args.size()); 40 } 41 42 AMDGPUMCExpr::~AMDGPUMCExpr() { Ctx.deallocate(RawArgs); } 43 44 const AMDGPUMCExpr *AMDGPUMCExpr::create(VariantKind Kind, 45 ArrayRef<const MCExpr *> Args, 46 MCContext &Ctx) { 47 return new (Ctx) AMDGPUMCExpr(Kind, Args, Ctx); 48 } 49 50 const MCExpr *AMDGPUMCExpr::getSubExpr(size_t Index) const { 51 assert(Index < Args.size() && "Indexing out of bounds AMDGPUMCExpr sub-expr"); 52 return Args[Index]; 53 } 54 55 void AMDGPUMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { 56 switch (Kind) { 57 default: 58 llvm_unreachable("Unknown AMDGPUMCExpr kind."); 59 case AGVK_Or: 60 OS << "or("; 61 break; 62 case AGVK_Max: 63 OS << "max("; 64 break; 65 case AGVK_ExtraSGPRs: 66 OS << "extrasgprs("; 67 break; 68 case AGVK_TotalNumVGPRs: 69 OS << "totalnumvgprs("; 70 break; 71 case AGVK_AlignTo: 72 OS << "alignto("; 73 break; 74 case AGVK_Occupancy: 75 OS << "occupancy("; 76 break; 77 } 78 for (auto It = Args.begin(); It != Args.end(); ++It) { 79 (*It)->print(OS, MAI, /*InParens=*/false); 80 if ((It + 1) != Args.end()) 81 OS << ", "; 82 } 83 OS << ')'; 84 } 85 86 static int64_t op(AMDGPUMCExpr::VariantKind Kind, int64_t Arg1, int64_t Arg2) { 87 switch (Kind) { 88 default: 89 llvm_unreachable("Unknown AMDGPUMCExpr kind."); 90 case AMDGPUMCExpr::AGVK_Max: 91 return std::max(Arg1, Arg2); 92 case AMDGPUMCExpr::AGVK_Or: 93 return Arg1 | Arg2; 94 } 95 } 96 97 bool AMDGPUMCExpr::evaluateExtraSGPRs(MCValue &Res, const MCAssembler *Asm, 98 const MCFixup *Fixup) const { 99 auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) { 100 MCValue MCVal; 101 if (!Arg->evaluateAsRelocatable(MCVal, Asm, Fixup) || !MCVal.isAbsolute()) 102 return false; 103 104 ConstantValue = MCVal.getConstant(); 105 return true; 106 }; 107 108 assert(Args.size() == 3 && 109 "AMDGPUMCExpr Argument count incorrect for ExtraSGPRs"); 110 const MCSubtargetInfo *STI = Ctx.getSubtargetInfo(); 111 uint64_t VCCUsed = 0, FlatScrUsed = 0, XNACKUsed = 0; 112 113 bool Success = TryGetMCExprValue(Args[2], XNACKUsed); 114 115 assert(Success && "Arguments 3 for ExtraSGPRs should be a known constant"); 116 if (!Success || !TryGetMCExprValue(Args[0], VCCUsed) || 117 !TryGetMCExprValue(Args[1], FlatScrUsed)) 118 return false; 119 120 uint64_t ExtraSGPRs = IsaInfo::getNumExtraSGPRs( 121 STI, (bool)VCCUsed, (bool)FlatScrUsed, (bool)XNACKUsed); 122 Res = MCValue::get(ExtraSGPRs); 123 return true; 124 } 125 126 bool AMDGPUMCExpr::evaluateTotalNumVGPR(MCValue &Res, const MCAssembler *Asm, 127 const MCFixup *Fixup) const { 128 auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) { 129 MCValue MCVal; 130 if (!Arg->evaluateAsRelocatable(MCVal, Asm, Fixup) || !MCVal.isAbsolute()) 131 return false; 132 133 ConstantValue = MCVal.getConstant(); 134 return true; 135 }; 136 assert(Args.size() == 2 && 137 "AMDGPUMCExpr Argument count incorrect for TotalNumVGPRs"); 138 const MCSubtargetInfo *STI = Ctx.getSubtargetInfo(); 139 uint64_t NumAGPR = 0, NumVGPR = 0; 140 141 bool Has90AInsts = AMDGPU::isGFX90A(*STI); 142 143 if (!TryGetMCExprValue(Args[0], NumAGPR) || 144 !TryGetMCExprValue(Args[1], NumVGPR)) 145 return false; 146 147 uint64_t TotalNum = Has90AInsts && NumAGPR ? alignTo(NumVGPR, 4) + NumAGPR 148 : std::max(NumVGPR, NumAGPR); 149 Res = MCValue::get(TotalNum); 150 return true; 151 } 152 153 bool AMDGPUMCExpr::evaluateAlignTo(MCValue &Res, const MCAssembler *Asm, 154 const MCFixup *Fixup) const { 155 auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) { 156 MCValue MCVal; 157 if (!Arg->evaluateAsRelocatable(MCVal, Asm, Fixup) || !MCVal.isAbsolute()) 158 return false; 159 160 ConstantValue = MCVal.getConstant(); 161 return true; 162 }; 163 164 assert(Args.size() == 2 && 165 "AMDGPUMCExpr Argument count incorrect for AlignTo"); 166 uint64_t Value = 0, Align = 0; 167 if (!TryGetMCExprValue(Args[0], Value) || !TryGetMCExprValue(Args[1], Align)) 168 return false; 169 170 Res = MCValue::get(alignTo(Value, Align)); 171 return true; 172 } 173 174 bool AMDGPUMCExpr::evaluateOccupancy(MCValue &Res, const MCAssembler *Asm, 175 const MCFixup *Fixup) const { 176 auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) { 177 MCValue MCVal; 178 if (!Arg->evaluateAsRelocatable(MCVal, Asm, Fixup) || !MCVal.isAbsolute()) 179 return false; 180 181 ConstantValue = MCVal.getConstant(); 182 return true; 183 }; 184 assert(Args.size() == 7 && 185 "AMDGPUMCExpr Argument count incorrect for Occupancy"); 186 uint64_t InitOccupancy, MaxWaves, Granule, TargetTotalNumVGPRs, Generation, 187 NumSGPRs, NumVGPRs; 188 189 bool Success = true; 190 Success &= TryGetMCExprValue(Args[0], MaxWaves); 191 Success &= TryGetMCExprValue(Args[1], Granule); 192 Success &= TryGetMCExprValue(Args[2], TargetTotalNumVGPRs); 193 Success &= TryGetMCExprValue(Args[3], Generation); 194 Success &= TryGetMCExprValue(Args[4], InitOccupancy); 195 196 assert(Success && "Arguments 1 to 5 for Occupancy should be known constants"); 197 198 if (!Success || !TryGetMCExprValue(Args[5], NumSGPRs) || 199 !TryGetMCExprValue(Args[6], NumVGPRs)) 200 return false; 201 202 unsigned Occupancy = InitOccupancy; 203 if (NumSGPRs) 204 Occupancy = std::min( 205 Occupancy, IsaInfo::getOccupancyWithNumSGPRs( 206 NumSGPRs, MaxWaves, 207 static_cast<AMDGPUSubtarget::Generation>(Generation))); 208 if (NumVGPRs) 209 Occupancy = std::min(Occupancy, 210 IsaInfo::getNumWavesPerEUWithNumVGPRs( 211 NumVGPRs, Granule, MaxWaves, TargetTotalNumVGPRs)); 212 213 Res = MCValue::get(Occupancy); 214 return true; 215 } 216 217 bool AMDGPUMCExpr::evaluateAsRelocatableImpl(MCValue &Res, 218 const MCAssembler *Asm, 219 const MCFixup *Fixup) const { 220 std::optional<int64_t> Total; 221 switch (Kind) { 222 default: 223 break; 224 case AGVK_ExtraSGPRs: 225 return evaluateExtraSGPRs(Res, Asm, Fixup); 226 case AGVK_AlignTo: 227 return evaluateAlignTo(Res, Asm, Fixup); 228 case AGVK_TotalNumVGPRs: 229 return evaluateTotalNumVGPR(Res, Asm, Fixup); 230 case AGVK_Occupancy: 231 return evaluateOccupancy(Res, Asm, Fixup); 232 } 233 234 for (const MCExpr *Arg : Args) { 235 MCValue ArgRes; 236 if (!Arg->evaluateAsRelocatable(ArgRes, Asm, Fixup) || !ArgRes.isAbsolute()) 237 return false; 238 239 if (!Total.has_value()) 240 Total = ArgRes.getConstant(); 241 Total = op(Kind, *Total, ArgRes.getConstant()); 242 } 243 244 Res = MCValue::get(*Total); 245 return true; 246 } 247 248 void AMDGPUMCExpr::visitUsedExpr(MCStreamer &Streamer) const { 249 for (const MCExpr *Arg : Args) 250 Streamer.visitUsedExpr(*Arg); 251 } 252 253 MCFragment *AMDGPUMCExpr::findAssociatedFragment() const { 254 for (const MCExpr *Arg : Args) { 255 if (Arg->findAssociatedFragment()) 256 return Arg->findAssociatedFragment(); 257 } 258 return nullptr; 259 } 260 261 /// Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed 262 /// are unresolvable but needed for further MCExprs). Derived from 263 /// implementation of IsaInfo::getNumExtraSGPRs in AMDGPUBaseInfo.cpp. 264 /// 265 const AMDGPUMCExpr *AMDGPUMCExpr::createExtraSGPRs(const MCExpr *VCCUsed, 266 const MCExpr *FlatScrUsed, 267 bool XNACKUsed, 268 MCContext &Ctx) { 269 270 return create(AGVK_ExtraSGPRs, 271 {VCCUsed, FlatScrUsed, MCConstantExpr::create(XNACKUsed, Ctx)}, 272 Ctx); 273 } 274 275 const AMDGPUMCExpr *AMDGPUMCExpr::createTotalNumVGPR(const MCExpr *NumAGPR, 276 const MCExpr *NumVGPR, 277 MCContext &Ctx) { 278 return create(AGVK_TotalNumVGPRs, {NumAGPR, NumVGPR}, Ctx); 279 } 280 281 /// Mimics GCNSubtarget::computeOccupancy for MCExpr. 282 /// 283 /// Remove dependency on GCNSubtarget and depend only only the necessary values 284 /// for said occupancy computation. Should match computeOccupancy implementation 285 /// without passing \p STM on. 286 const AMDGPUMCExpr *AMDGPUMCExpr::createOccupancy(unsigned InitOcc, 287 const MCExpr *NumSGPRs, 288 const MCExpr *NumVGPRs, 289 const GCNSubtarget &STM, 290 MCContext &Ctx) { 291 unsigned MaxWaves = IsaInfo::getMaxWavesPerEU(&STM); 292 unsigned Granule = IsaInfo::getVGPRAllocGranule(&STM); 293 unsigned TargetTotalNumVGPRs = IsaInfo::getTotalNumVGPRs(&STM); 294 unsigned Generation = STM.getGeneration(); 295 296 auto CreateExpr = [&Ctx](unsigned Value) { 297 return MCConstantExpr::create(Value, Ctx); 298 }; 299 300 return create(AGVK_Occupancy, 301 {CreateExpr(MaxWaves), CreateExpr(Granule), 302 CreateExpr(TargetTotalNumVGPRs), CreateExpr(Generation), 303 CreateExpr(InitOcc), NumSGPRs, NumVGPRs}, 304 Ctx); 305 } 306