1 //===-- AMDGPUCodeGenPrepare.cpp ------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This pass does misc. AMDGPU optimizations on IR *just* before instruction 11 /// selection. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "AMDGPUTargetMachine.h" 17 #include "llvm/Analysis/AssumptionCache.h" 18 #include "llvm/Analysis/UniformityAnalysis.h" 19 #include "llvm/Analysis/ValueTracking.h" 20 #include "llvm/CodeGen/TargetPassConfig.h" 21 #include "llvm/IR/IRBuilder.h" 22 #include "llvm/IR/InstVisitor.h" 23 #include "llvm/IR/IntrinsicsAMDGPU.h" 24 #include "llvm/Support/CommandLine.h" 25 #include "llvm/Support/KnownBits.h" 26 #include "llvm/Transforms/Utils/Local.h" 27 28 #define DEBUG_TYPE "amdgpu-late-codegenprepare" 29 30 using namespace llvm; 31 32 // Scalar load widening needs running after load-store-vectorizer as that pass 33 // doesn't handle overlapping cases. In addition, this pass enhances the 34 // widening to handle cases where scalar sub-dword loads are naturally aligned 35 // only but not dword aligned. 36 static cl::opt<bool> 37 WidenLoads("amdgpu-late-codegenprepare-widen-constant-loads", 38 cl::desc("Widen sub-dword constant address space loads in " 39 "AMDGPULateCodeGenPrepare"), 40 cl::ReallyHidden, cl::init(true)); 41 42 namespace { 43 44 class AMDGPULateCodeGenPrepare 45 : public InstVisitor<AMDGPULateCodeGenPrepare, bool> { 46 Function &F; 47 const DataLayout &DL; 48 const GCNSubtarget &ST; 49 50 AssumptionCache *const AC; 51 UniformityInfo &UA; 52 53 SmallVector<WeakTrackingVH, 8> DeadInsts; 54 55 public: 56 AMDGPULateCodeGenPrepare(Function &F, const GCNSubtarget &ST, 57 AssumptionCache *AC, UniformityInfo &UA) 58 : F(F), DL(F.getDataLayout()), ST(ST), AC(AC), UA(UA) {} 59 bool run(); 60 bool visitInstruction(Instruction &) { return false; } 61 62 // Check if the specified value is at least DWORD aligned. 63 bool isDWORDAligned(const Value *V) const { 64 KnownBits Known = computeKnownBits(V, DL, AC); 65 return Known.countMinTrailingZeros() >= 2; 66 } 67 68 bool canWidenScalarExtLoad(LoadInst &LI) const; 69 bool visitLoadInst(LoadInst &LI); 70 }; 71 72 using ValueToValueMap = DenseMap<const Value *, Value *>; 73 74 class LiveRegOptimizer { 75 private: 76 Module &Mod; 77 const DataLayout &DL; 78 const GCNSubtarget &ST; 79 80 /// The scalar type to convert to 81 Type *const ConvertToScalar; 82 /// Map of Value -> Converted Value 83 ValueToValueMap ValMap; 84 /// Map of containing conversions from Optimal Type -> Original Type per BB. 85 DenseMap<BasicBlock *, ValueToValueMap> BBUseValMap; 86 87 public: 88 /// Calculate the and \p return the type to convert to given a problematic \p 89 /// OriginalType. In some instances, we may widen the type (e.g. v2i8 -> i32). 90 Type *calculateConvertType(Type *OriginalType); 91 /// Convert the virtual register defined by \p V to the compatible vector of 92 /// legal type 93 Value *convertToOptType(Instruction *V, BasicBlock::iterator &InstPt); 94 /// Convert the virtual register defined by \p V back to the original type \p 95 /// ConvertType, stripping away the MSBs in cases where there was an imperfect 96 /// fit (e.g. v2i32 -> v7i8) 97 Value *convertFromOptType(Type *ConvertType, Instruction *V, 98 BasicBlock::iterator &InstPt, 99 BasicBlock *InsertBlock); 100 /// Check for problematic PHI nodes or cross-bb values based on the value 101 /// defined by \p I, and coerce to legal types if necessary. For problematic 102 /// PHI node, we coerce all incoming values in a single invocation. 103 bool optimizeLiveType(Instruction *I, 104 SmallVectorImpl<WeakTrackingVH> &DeadInsts); 105 106 // Whether or not the type should be replaced to avoid inefficient 107 // legalization code 108 bool shouldReplace(Type *ITy) { 109 FixedVectorType *VTy = dyn_cast<FixedVectorType>(ITy); 110 if (!VTy) 111 return false; 112 113 const auto *TLI = ST.getTargetLowering(); 114 115 Type *EltTy = VTy->getElementType(); 116 // If the element size is not less than the convert to scalar size, then we 117 // can't do any bit packing 118 if (!EltTy->isIntegerTy() || 119 EltTy->getScalarSizeInBits() > ConvertToScalar->getScalarSizeInBits()) 120 return false; 121 122 // Only coerce illegal types 123 TargetLoweringBase::LegalizeKind LK = 124 TLI->getTypeConversion(EltTy->getContext(), EVT::getEVT(EltTy, false)); 125 return LK.first != TargetLoweringBase::TypeLegal; 126 } 127 128 bool isOpLegal(Instruction *I) { return isa<StoreInst, IntrinsicInst>(I); } 129 130 bool isCoercionProfitable(Instruction *II) { 131 SmallPtrSet<Instruction *, 4> CVisited; 132 SmallVector<Instruction *, 4> UserList; 133 134 // Check users for profitable conditions (across block user which can 135 // natively handle the illegal vector). 136 for (User *V : II->users()) 137 if (auto *UseInst = dyn_cast<Instruction>(V)) 138 UserList.push_back(UseInst); 139 140 auto IsLookThru = [](Instruction *II) { 141 if (const auto *Intr = dyn_cast<IntrinsicInst>(II)) 142 return Intr->getIntrinsicID() == Intrinsic::amdgcn_perm; 143 return isa<PHINode, ShuffleVectorInst, InsertElementInst, 144 ExtractElementInst, CastInst>(II); 145 }; 146 147 while (!UserList.empty()) { 148 auto CII = UserList.pop_back_val(); 149 if (!CVisited.insert(CII).second) 150 continue; 151 152 if (CII->getParent() == II->getParent() && !IsLookThru(II)) 153 continue; 154 155 if (isOpLegal(CII)) 156 return true; 157 158 if (IsLookThru(CII)) 159 for (User *V : CII->users()) 160 if (auto *UseInst = dyn_cast<Instruction>(V)) 161 UserList.push_back(UseInst); 162 } 163 return false; 164 } 165 166 LiveRegOptimizer(Module &Mod, const GCNSubtarget &ST) 167 : Mod(Mod), DL(Mod.getDataLayout()), ST(ST), 168 ConvertToScalar(Type::getInt32Ty(Mod.getContext())) {} 169 }; 170 171 } // end anonymous namespace 172 173 bool AMDGPULateCodeGenPrepare::run() { 174 // "Optimize" the virtual regs that cross basic block boundaries. When 175 // building the SelectionDAG, vectors of illegal types that cross basic blocks 176 // will be scalarized and widened, with each scalar living in its 177 // own register. To work around this, this optimization converts the 178 // vectors to equivalent vectors of legal type (which are converted back 179 // before uses in subsequent blocks), to pack the bits into fewer physical 180 // registers (used in CopyToReg/CopyFromReg pairs). 181 LiveRegOptimizer LRO(*F.getParent(), ST); 182 183 bool Changed = false; 184 185 bool HasScalarSubwordLoads = ST.hasScalarSubwordLoads(); 186 187 for (auto &BB : reverse(F)) 188 for (Instruction &I : make_early_inc_range(reverse(BB))) { 189 Changed |= !HasScalarSubwordLoads && visit(I); 190 Changed |= LRO.optimizeLiveType(&I, DeadInsts); 191 } 192 193 RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts); 194 return Changed; 195 } 196 197 Type *LiveRegOptimizer::calculateConvertType(Type *OriginalType) { 198 assert(OriginalType->getScalarSizeInBits() <= 199 ConvertToScalar->getScalarSizeInBits()); 200 201 FixedVectorType *VTy = cast<FixedVectorType>(OriginalType); 202 203 TypeSize OriginalSize = DL.getTypeSizeInBits(VTy); 204 TypeSize ConvertScalarSize = DL.getTypeSizeInBits(ConvertToScalar); 205 unsigned ConvertEltCount = 206 (OriginalSize + ConvertScalarSize - 1) / ConvertScalarSize; 207 208 if (OriginalSize <= ConvertScalarSize) 209 return IntegerType::get(Mod.getContext(), ConvertScalarSize); 210 211 return VectorType::get(Type::getIntNTy(Mod.getContext(), ConvertScalarSize), 212 ConvertEltCount, false); 213 } 214 215 Value *LiveRegOptimizer::convertToOptType(Instruction *V, 216 BasicBlock::iterator &InsertPt) { 217 FixedVectorType *VTy = cast<FixedVectorType>(V->getType()); 218 Type *NewTy = calculateConvertType(V->getType()); 219 220 TypeSize OriginalSize = DL.getTypeSizeInBits(VTy); 221 TypeSize NewSize = DL.getTypeSizeInBits(NewTy); 222 223 IRBuilder<> Builder(V->getParent(), InsertPt); 224 // If there is a bitsize match, we can fit the old vector into a new vector of 225 // desired type. 226 if (OriginalSize == NewSize) 227 return Builder.CreateBitCast(V, NewTy, V->getName() + ".bc"); 228 229 // If there is a bitsize mismatch, we must use a wider vector. 230 assert(NewSize > OriginalSize); 231 uint64_t ExpandedVecElementCount = NewSize / VTy->getScalarSizeInBits(); 232 233 SmallVector<int, 8> ShuffleMask; 234 uint64_t OriginalElementCount = VTy->getElementCount().getFixedValue(); 235 for (unsigned I = 0; I < OriginalElementCount; I++) 236 ShuffleMask.push_back(I); 237 238 for (uint64_t I = OriginalElementCount; I < ExpandedVecElementCount; I++) 239 ShuffleMask.push_back(OriginalElementCount); 240 241 Value *ExpandedVec = Builder.CreateShuffleVector(V, ShuffleMask); 242 return Builder.CreateBitCast(ExpandedVec, NewTy, V->getName() + ".bc"); 243 } 244 245 Value *LiveRegOptimizer::convertFromOptType(Type *ConvertType, Instruction *V, 246 BasicBlock::iterator &InsertPt, 247 BasicBlock *InsertBB) { 248 FixedVectorType *NewVTy = cast<FixedVectorType>(ConvertType); 249 250 TypeSize OriginalSize = DL.getTypeSizeInBits(V->getType()); 251 TypeSize NewSize = DL.getTypeSizeInBits(NewVTy); 252 253 IRBuilder<> Builder(InsertBB, InsertPt); 254 // If there is a bitsize match, we simply convert back to the original type. 255 if (OriginalSize == NewSize) 256 return Builder.CreateBitCast(V, NewVTy, V->getName() + ".bc"); 257 258 // If there is a bitsize mismatch, then we must have used a wider value to 259 // hold the bits. 260 assert(OriginalSize > NewSize); 261 // For wide scalars, we can just truncate the value. 262 if (!V->getType()->isVectorTy()) { 263 Instruction *Trunc = cast<Instruction>( 264 Builder.CreateTrunc(V, IntegerType::get(Mod.getContext(), NewSize))); 265 return cast<Instruction>(Builder.CreateBitCast(Trunc, NewVTy)); 266 } 267 268 // For wider vectors, we must strip the MSBs to convert back to the original 269 // type. 270 VectorType *ExpandedVT = VectorType::get( 271 Type::getIntNTy(Mod.getContext(), NewVTy->getScalarSizeInBits()), 272 (OriginalSize / NewVTy->getScalarSizeInBits()), false); 273 Instruction *Converted = 274 cast<Instruction>(Builder.CreateBitCast(V, ExpandedVT)); 275 276 unsigned NarrowElementCount = NewVTy->getElementCount().getFixedValue(); 277 SmallVector<int, 8> ShuffleMask(NarrowElementCount); 278 std::iota(ShuffleMask.begin(), ShuffleMask.end(), 0); 279 280 return Builder.CreateShuffleVector(Converted, ShuffleMask); 281 } 282 283 bool LiveRegOptimizer::optimizeLiveType( 284 Instruction *I, SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 285 SmallVector<Instruction *, 4> Worklist; 286 SmallPtrSet<PHINode *, 4> PhiNodes; 287 SmallPtrSet<Instruction *, 4> Defs; 288 SmallPtrSet<Instruction *, 4> Uses; 289 SmallPtrSet<Instruction *, 4> Visited; 290 291 Worklist.push_back(cast<Instruction>(I)); 292 while (!Worklist.empty()) { 293 Instruction *II = Worklist.pop_back_val(); 294 295 if (!Visited.insert(II).second) 296 continue; 297 298 if (!shouldReplace(II->getType())) 299 continue; 300 301 if (!isCoercionProfitable(II)) 302 continue; 303 304 if (PHINode *Phi = dyn_cast<PHINode>(II)) { 305 PhiNodes.insert(Phi); 306 // Collect all the incoming values of problematic PHI nodes. 307 for (Value *V : Phi->incoming_values()) { 308 // Repeat the collection process for newly found PHI nodes. 309 if (PHINode *OpPhi = dyn_cast<PHINode>(V)) { 310 if (!PhiNodes.count(OpPhi) && !Visited.count(OpPhi)) 311 Worklist.push_back(OpPhi); 312 continue; 313 } 314 315 Instruction *IncInst = dyn_cast<Instruction>(V); 316 // Other incoming value types (e.g. vector literals) are unhandled 317 if (!IncInst && !isa<ConstantAggregateZero>(V)) 318 return false; 319 320 // Collect all other incoming values for coercion. 321 if (IncInst) 322 Defs.insert(IncInst); 323 } 324 } 325 326 // Collect all relevant uses. 327 for (User *V : II->users()) { 328 // Repeat the collection process for problematic PHI nodes. 329 if (PHINode *OpPhi = dyn_cast<PHINode>(V)) { 330 if (!PhiNodes.count(OpPhi) && !Visited.count(OpPhi)) 331 Worklist.push_back(OpPhi); 332 continue; 333 } 334 335 Instruction *UseInst = cast<Instruction>(V); 336 // Collect all uses of PHINodes and any use the crosses BB boundaries. 337 if (UseInst->getParent() != II->getParent() || isa<PHINode>(II)) { 338 Uses.insert(UseInst); 339 if (!isa<PHINode>(II)) 340 Defs.insert(II); 341 } 342 } 343 } 344 345 // Coerce and track the defs. 346 for (Instruction *D : Defs) { 347 if (!ValMap.contains(D)) { 348 BasicBlock::iterator InsertPt = std::next(D->getIterator()); 349 Value *ConvertVal = convertToOptType(D, InsertPt); 350 assert(ConvertVal); 351 ValMap[D] = ConvertVal; 352 } 353 } 354 355 // Construct new-typed PHI nodes. 356 for (PHINode *Phi : PhiNodes) { 357 ValMap[Phi] = PHINode::Create(calculateConvertType(Phi->getType()), 358 Phi->getNumIncomingValues(), 359 Phi->getName() + ".tc", Phi->getIterator()); 360 } 361 362 // Connect all the PHI nodes with their new incoming values. 363 for (PHINode *Phi : PhiNodes) { 364 PHINode *NewPhi = cast<PHINode>(ValMap[Phi]); 365 bool MissingIncVal = false; 366 for (int I = 0, E = Phi->getNumIncomingValues(); I < E; I++) { 367 Value *IncVal = Phi->getIncomingValue(I); 368 if (isa<ConstantAggregateZero>(IncVal)) { 369 Type *NewType = calculateConvertType(Phi->getType()); 370 NewPhi->addIncoming(ConstantInt::get(NewType, 0, false), 371 Phi->getIncomingBlock(I)); 372 } else if (Value *Val = ValMap.lookup(IncVal)) 373 NewPhi->addIncoming(Val, Phi->getIncomingBlock(I)); 374 else 375 MissingIncVal = true; 376 } 377 if (MissingIncVal) { 378 Value *DeadVal = ValMap[Phi]; 379 // The coercion chain of the PHI is broken. Delete the Phi 380 // from the ValMap and any connected / user Phis. 381 SmallVector<Value *, 4> PHIWorklist; 382 SmallPtrSet<Value *, 4> VisitedPhis; 383 PHIWorklist.push_back(DeadVal); 384 while (!PHIWorklist.empty()) { 385 Value *NextDeadValue = PHIWorklist.pop_back_val(); 386 VisitedPhis.insert(NextDeadValue); 387 auto OriginalPhi = 388 llvm::find_if(PhiNodes, [this, &NextDeadValue](PHINode *CandPhi) { 389 return ValMap[CandPhi] == NextDeadValue; 390 }); 391 // This PHI may have already been removed from maps when 392 // unwinding a previous Phi 393 if (OriginalPhi != PhiNodes.end()) 394 ValMap.erase(*OriginalPhi); 395 396 DeadInsts.emplace_back(cast<Instruction>(NextDeadValue)); 397 398 for (User *U : NextDeadValue->users()) { 399 if (!VisitedPhis.contains(cast<PHINode>(U))) 400 PHIWorklist.push_back(U); 401 } 402 } 403 } else { 404 DeadInsts.emplace_back(cast<Instruction>(Phi)); 405 } 406 } 407 // Coerce back to the original type and replace the uses. 408 for (Instruction *U : Uses) { 409 // Replace all converted operands for a use. 410 for (auto [OpIdx, Op] : enumerate(U->operands())) { 411 if (Value *Val = ValMap.lookup(Op)) { 412 Value *NewVal = nullptr; 413 if (BBUseValMap.contains(U->getParent()) && 414 BBUseValMap[U->getParent()].contains(Val)) 415 NewVal = BBUseValMap[U->getParent()][Val]; 416 else { 417 BasicBlock::iterator InsertPt = U->getParent()->getFirstNonPHIIt(); 418 // We may pick up ops that were previously converted for users in 419 // other blocks. If there is an originally typed definition of the Op 420 // already in this block, simply reuse it. 421 if (isa<Instruction>(Op) && !isa<PHINode>(Op) && 422 U->getParent() == cast<Instruction>(Op)->getParent()) { 423 NewVal = Op; 424 } else { 425 NewVal = 426 convertFromOptType(Op->getType(), cast<Instruction>(ValMap[Op]), 427 InsertPt, U->getParent()); 428 BBUseValMap[U->getParent()][ValMap[Op]] = NewVal; 429 } 430 } 431 assert(NewVal); 432 U->setOperand(OpIdx, NewVal); 433 } 434 } 435 } 436 437 return true; 438 } 439 440 bool AMDGPULateCodeGenPrepare::canWidenScalarExtLoad(LoadInst &LI) const { 441 unsigned AS = LI.getPointerAddressSpace(); 442 // Skip non-constant address space. 443 if (AS != AMDGPUAS::CONSTANT_ADDRESS && 444 AS != AMDGPUAS::CONSTANT_ADDRESS_32BIT) 445 return false; 446 // Skip non-simple loads. 447 if (!LI.isSimple()) 448 return false; 449 Type *Ty = LI.getType(); 450 // Skip aggregate types. 451 if (Ty->isAggregateType()) 452 return false; 453 unsigned TySize = DL.getTypeStoreSize(Ty); 454 // Only handle sub-DWORD loads. 455 if (TySize >= 4) 456 return false; 457 // That load must be at least naturally aligned. 458 if (LI.getAlign() < DL.getABITypeAlign(Ty)) 459 return false; 460 // It should be uniform, i.e. a scalar load. 461 return UA.isUniform(&LI); 462 } 463 464 bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) { 465 if (!WidenLoads) 466 return false; 467 468 // Skip if that load is already aligned on DWORD at least as it's handled in 469 // SDAG. 470 if (LI.getAlign() >= 4) 471 return false; 472 473 if (!canWidenScalarExtLoad(LI)) 474 return false; 475 476 int64_t Offset = 0; 477 auto *Base = 478 GetPointerBaseWithConstantOffset(LI.getPointerOperand(), Offset, DL); 479 // If that base is not DWORD aligned, it's not safe to perform the following 480 // transforms. 481 if (!isDWORDAligned(Base)) 482 return false; 483 484 int64_t Adjust = Offset & 0x3; 485 if (Adjust == 0) { 486 // With a zero adjust, the original alignment could be promoted with a 487 // better one. 488 LI.setAlignment(Align(4)); 489 return true; 490 } 491 492 IRBuilder<> IRB(&LI); 493 IRB.SetCurrentDebugLocation(LI.getDebugLoc()); 494 495 unsigned LdBits = DL.getTypeStoreSizeInBits(LI.getType()); 496 auto *IntNTy = Type::getIntNTy(LI.getContext(), LdBits); 497 498 auto *NewPtr = IRB.CreateConstGEP1_64( 499 IRB.getInt8Ty(), 500 IRB.CreateAddrSpaceCast(Base, LI.getPointerOperand()->getType()), 501 Offset - Adjust); 502 503 LoadInst *NewLd = IRB.CreateAlignedLoad(IRB.getInt32Ty(), NewPtr, Align(4)); 504 NewLd->copyMetadata(LI); 505 NewLd->setMetadata(LLVMContext::MD_range, nullptr); 506 507 unsigned ShAmt = Adjust * 8; 508 Value *NewVal = IRB.CreateBitCast( 509 IRB.CreateTrunc(IRB.CreateLShr(NewLd, ShAmt), 510 DL.typeSizeEqualsStoreSize(LI.getType()) ? IntNTy 511 : LI.getType()), 512 LI.getType()); 513 LI.replaceAllUsesWith(NewVal); 514 DeadInsts.emplace_back(&LI); 515 516 return true; 517 } 518 519 PreservedAnalyses 520 AMDGPULateCodeGenPreparePass::run(Function &F, FunctionAnalysisManager &FAM) { 521 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 522 AssumptionCache &AC = FAM.getResult<AssumptionAnalysis>(F); 523 UniformityInfo &UI = FAM.getResult<UniformityInfoAnalysis>(F); 524 525 bool Changed = AMDGPULateCodeGenPrepare(F, ST, &AC, UI).run(); 526 527 if (!Changed) 528 return PreservedAnalyses::all(); 529 PreservedAnalyses PA = PreservedAnalyses::none(); 530 PA.preserveSet<CFGAnalyses>(); 531 return PA; 532 } 533 534 class AMDGPULateCodeGenPrepareLegacy : public FunctionPass { 535 public: 536 static char ID; 537 538 AMDGPULateCodeGenPrepareLegacy() : FunctionPass(ID) {} 539 540 StringRef getPassName() const override { 541 return "AMDGPU IR late optimizations"; 542 } 543 544 void getAnalysisUsage(AnalysisUsage &AU) const override { 545 AU.addRequired<TargetPassConfig>(); 546 AU.addRequired<AssumptionCacheTracker>(); 547 AU.addRequired<UniformityInfoWrapperPass>(); 548 AU.setPreservesAll(); 549 } 550 551 bool runOnFunction(Function &F) override; 552 }; 553 554 bool AMDGPULateCodeGenPrepareLegacy::runOnFunction(Function &F) { 555 if (skipFunction(F)) 556 return false; 557 558 const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>(); 559 const TargetMachine &TM = TPC.getTM<TargetMachine>(); 560 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 561 562 AssumptionCache &AC = 563 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 564 UniformityInfo &UI = 565 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo(); 566 567 return AMDGPULateCodeGenPrepare(F, ST, &AC, UI).run(); 568 } 569 570 INITIALIZE_PASS_BEGIN(AMDGPULateCodeGenPrepareLegacy, DEBUG_TYPE, 571 "AMDGPU IR late optimizations", false, false) 572 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 573 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 574 INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass) 575 INITIALIZE_PASS_END(AMDGPULateCodeGenPrepareLegacy, DEBUG_TYPE, 576 "AMDGPU IR late optimizations", false, false) 577 578 char AMDGPULateCodeGenPrepareLegacy::ID = 0; 579 580 FunctionPass *llvm::createAMDGPULateCodeGenPrepareLegacyPass() { 581 return new AMDGPULateCodeGenPrepareLegacy(); 582 } 583