xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //===-- AMDGPUCodeGenPrepare.cpp ------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass does misc. AMDGPU optimizations on IR *just* before instruction
11 /// selection.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
16 #include "AMDGPUTargetMachine.h"
17 #include "llvm/Analysis/AssumptionCache.h"
18 #include "llvm/Analysis/UniformityAnalysis.h"
19 #include "llvm/Analysis/ValueTracking.h"
20 #include "llvm/CodeGen/TargetPassConfig.h"
21 #include "llvm/IR/IRBuilder.h"
22 #include "llvm/IR/InstVisitor.h"
23 #include "llvm/IR/IntrinsicsAMDGPU.h"
24 #include "llvm/Support/CommandLine.h"
25 #include "llvm/Support/KnownBits.h"
26 #include "llvm/Transforms/Utils/Local.h"
27 
28 #define DEBUG_TYPE "amdgpu-late-codegenprepare"
29 
30 using namespace llvm;
31 
32 // Scalar load widening needs running after load-store-vectorizer as that pass
33 // doesn't handle overlapping cases. In addition, this pass enhances the
34 // widening to handle cases where scalar sub-dword loads are naturally aligned
35 // only but not dword aligned.
36 static cl::opt<bool>
37     WidenLoads("amdgpu-late-codegenprepare-widen-constant-loads",
38                cl::desc("Widen sub-dword constant address space loads in "
39                         "AMDGPULateCodeGenPrepare"),
40                cl::ReallyHidden, cl::init(true));
41 
42 namespace {
43 
44 class AMDGPULateCodeGenPrepare
45     : public InstVisitor<AMDGPULateCodeGenPrepare, bool> {
46   Function &F;
47   const DataLayout &DL;
48   const GCNSubtarget &ST;
49 
50   AssumptionCache *const AC;
51   UniformityInfo &UA;
52 
53   SmallVector<WeakTrackingVH, 8> DeadInsts;
54 
55 public:
56   AMDGPULateCodeGenPrepare(Function &F, const GCNSubtarget &ST,
57                            AssumptionCache *AC, UniformityInfo &UA)
58       : F(F), DL(F.getDataLayout()), ST(ST), AC(AC), UA(UA) {}
59   bool run();
60   bool visitInstruction(Instruction &) { return false; }
61 
62   // Check if the specified value is at least DWORD aligned.
63   bool isDWORDAligned(const Value *V) const {
64     KnownBits Known = computeKnownBits(V, DL, AC);
65     return Known.countMinTrailingZeros() >= 2;
66   }
67 
68   bool canWidenScalarExtLoad(LoadInst &LI) const;
69   bool visitLoadInst(LoadInst &LI);
70 };
71 
72 using ValueToValueMap = DenseMap<const Value *, Value *>;
73 
74 class LiveRegOptimizer {
75 private:
76   Module &Mod;
77   const DataLayout &DL;
78   const GCNSubtarget &ST;
79 
80   /// The scalar type to convert to
81   Type *const ConvertToScalar;
82   /// Map of Value -> Converted Value
83   ValueToValueMap ValMap;
84   /// Map of containing conversions from Optimal Type -> Original Type per BB.
85   DenseMap<BasicBlock *, ValueToValueMap> BBUseValMap;
86 
87 public:
88   /// Calculate the and \p return  the type to convert to given a problematic \p
89   /// OriginalType. In some instances, we may widen the type (e.g. v2i8 -> i32).
90   Type *calculateConvertType(Type *OriginalType);
91   /// Convert the virtual register defined by \p V to the compatible vector of
92   /// legal type
93   Value *convertToOptType(Instruction *V, BasicBlock::iterator &InstPt);
94   /// Convert the virtual register defined by \p V back to the original type \p
95   /// ConvertType, stripping away the MSBs in cases where there was an imperfect
96   /// fit (e.g. v2i32 -> v7i8)
97   Value *convertFromOptType(Type *ConvertType, Instruction *V,
98                             BasicBlock::iterator &InstPt,
99                             BasicBlock *InsertBlock);
100   /// Check for problematic PHI nodes or cross-bb values based on the value
101   /// defined by \p I, and coerce to legal types if necessary. For problematic
102   /// PHI node, we coerce all incoming values in a single invocation.
103   bool optimizeLiveType(Instruction *I,
104                         SmallVectorImpl<WeakTrackingVH> &DeadInsts);
105 
106   // Whether or not the type should be replaced to avoid inefficient
107   // legalization code
108   bool shouldReplace(Type *ITy) {
109     FixedVectorType *VTy = dyn_cast<FixedVectorType>(ITy);
110     if (!VTy)
111       return false;
112 
113     const auto *TLI = ST.getTargetLowering();
114 
115     Type *EltTy = VTy->getElementType();
116     // If the element size is not less than the convert to scalar size, then we
117     // can't do any bit packing
118     if (!EltTy->isIntegerTy() ||
119         EltTy->getScalarSizeInBits() > ConvertToScalar->getScalarSizeInBits())
120       return false;
121 
122     // Only coerce illegal types
123     TargetLoweringBase::LegalizeKind LK =
124         TLI->getTypeConversion(EltTy->getContext(), EVT::getEVT(EltTy, false));
125     return LK.first != TargetLoweringBase::TypeLegal;
126   }
127 
128   bool isOpLegal(Instruction *I) { return isa<StoreInst, IntrinsicInst>(I); }
129 
130   bool isCoercionProfitable(Instruction *II) {
131     SmallPtrSet<Instruction *, 4> CVisited;
132     SmallVector<Instruction *, 4> UserList;
133 
134     // Check users for profitable conditions (across block user which can
135     // natively handle the illegal vector).
136     for (User *V : II->users())
137       if (auto *UseInst = dyn_cast<Instruction>(V))
138         UserList.push_back(UseInst);
139 
140     auto IsLookThru = [](Instruction *II) {
141       if (const auto *Intr = dyn_cast<IntrinsicInst>(II))
142         return Intr->getIntrinsicID() == Intrinsic::amdgcn_perm;
143       return isa<PHINode, ShuffleVectorInst, InsertElementInst,
144                  ExtractElementInst, CastInst>(II);
145     };
146 
147     while (!UserList.empty()) {
148       auto CII = UserList.pop_back_val();
149       if (!CVisited.insert(CII).second)
150         continue;
151 
152       if (CII->getParent() == II->getParent() && !IsLookThru(II))
153         continue;
154 
155       if (isOpLegal(CII))
156         return true;
157 
158       if (IsLookThru(CII))
159         for (User *V : CII->users())
160           if (auto *UseInst = dyn_cast<Instruction>(V))
161             UserList.push_back(UseInst);
162     }
163     return false;
164   }
165 
166   LiveRegOptimizer(Module &Mod, const GCNSubtarget &ST)
167       : Mod(Mod), DL(Mod.getDataLayout()), ST(ST),
168         ConvertToScalar(Type::getInt32Ty(Mod.getContext())) {}
169 };
170 
171 } // end anonymous namespace
172 
173 bool AMDGPULateCodeGenPrepare::run() {
174   // "Optimize" the virtual regs that cross basic block boundaries. When
175   // building the SelectionDAG, vectors of illegal types that cross basic blocks
176   // will be scalarized and widened, with each scalar living in its
177   // own register. To work around this, this optimization converts the
178   // vectors to equivalent vectors of legal type (which are converted back
179   // before uses in subsequent blocks), to pack the bits into fewer physical
180   // registers (used in CopyToReg/CopyFromReg pairs).
181   LiveRegOptimizer LRO(*F.getParent(), ST);
182 
183   bool Changed = false;
184 
185   bool HasScalarSubwordLoads = ST.hasScalarSubwordLoads();
186 
187   for (auto &BB : reverse(F))
188     for (Instruction &I : make_early_inc_range(reverse(BB))) {
189       Changed |= !HasScalarSubwordLoads && visit(I);
190       Changed |= LRO.optimizeLiveType(&I, DeadInsts);
191     }
192 
193   RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts);
194   return Changed;
195 }
196 
197 Type *LiveRegOptimizer::calculateConvertType(Type *OriginalType) {
198   assert(OriginalType->getScalarSizeInBits() <=
199          ConvertToScalar->getScalarSizeInBits());
200 
201   FixedVectorType *VTy = cast<FixedVectorType>(OriginalType);
202 
203   TypeSize OriginalSize = DL.getTypeSizeInBits(VTy);
204   TypeSize ConvertScalarSize = DL.getTypeSizeInBits(ConvertToScalar);
205   unsigned ConvertEltCount =
206       (OriginalSize + ConvertScalarSize - 1) / ConvertScalarSize;
207 
208   if (OriginalSize <= ConvertScalarSize)
209     return IntegerType::get(Mod.getContext(), ConvertScalarSize);
210 
211   return VectorType::get(Type::getIntNTy(Mod.getContext(), ConvertScalarSize),
212                          ConvertEltCount, false);
213 }
214 
215 Value *LiveRegOptimizer::convertToOptType(Instruction *V,
216                                           BasicBlock::iterator &InsertPt) {
217   FixedVectorType *VTy = cast<FixedVectorType>(V->getType());
218   Type *NewTy = calculateConvertType(V->getType());
219 
220   TypeSize OriginalSize = DL.getTypeSizeInBits(VTy);
221   TypeSize NewSize = DL.getTypeSizeInBits(NewTy);
222 
223   IRBuilder<> Builder(V->getParent(), InsertPt);
224   // If there is a bitsize match, we can fit the old vector into a new vector of
225   // desired type.
226   if (OriginalSize == NewSize)
227     return Builder.CreateBitCast(V, NewTy, V->getName() + ".bc");
228 
229   // If there is a bitsize mismatch, we must use a wider vector.
230   assert(NewSize > OriginalSize);
231   uint64_t ExpandedVecElementCount = NewSize / VTy->getScalarSizeInBits();
232 
233   SmallVector<int, 8> ShuffleMask;
234   uint64_t OriginalElementCount = VTy->getElementCount().getFixedValue();
235   for (unsigned I = 0; I < OriginalElementCount; I++)
236     ShuffleMask.push_back(I);
237 
238   for (uint64_t I = OriginalElementCount; I < ExpandedVecElementCount; I++)
239     ShuffleMask.push_back(OriginalElementCount);
240 
241   Value *ExpandedVec = Builder.CreateShuffleVector(V, ShuffleMask);
242   return Builder.CreateBitCast(ExpandedVec, NewTy, V->getName() + ".bc");
243 }
244 
245 Value *LiveRegOptimizer::convertFromOptType(Type *ConvertType, Instruction *V,
246                                             BasicBlock::iterator &InsertPt,
247                                             BasicBlock *InsertBB) {
248   FixedVectorType *NewVTy = cast<FixedVectorType>(ConvertType);
249 
250   TypeSize OriginalSize = DL.getTypeSizeInBits(V->getType());
251   TypeSize NewSize = DL.getTypeSizeInBits(NewVTy);
252 
253   IRBuilder<> Builder(InsertBB, InsertPt);
254   // If there is a bitsize match, we simply convert back to the original type.
255   if (OriginalSize == NewSize)
256     return Builder.CreateBitCast(V, NewVTy, V->getName() + ".bc");
257 
258   // If there is a bitsize mismatch, then we must have used a wider value to
259   // hold the bits.
260   assert(OriginalSize > NewSize);
261   // For wide scalars, we can just truncate the value.
262   if (!V->getType()->isVectorTy()) {
263     Instruction *Trunc = cast<Instruction>(
264         Builder.CreateTrunc(V, IntegerType::get(Mod.getContext(), NewSize)));
265     return cast<Instruction>(Builder.CreateBitCast(Trunc, NewVTy));
266   }
267 
268   // For wider vectors, we must strip the MSBs to convert back to the original
269   // type.
270   VectorType *ExpandedVT = VectorType::get(
271       Type::getIntNTy(Mod.getContext(), NewVTy->getScalarSizeInBits()),
272       (OriginalSize / NewVTy->getScalarSizeInBits()), false);
273   Instruction *Converted =
274       cast<Instruction>(Builder.CreateBitCast(V, ExpandedVT));
275 
276   unsigned NarrowElementCount = NewVTy->getElementCount().getFixedValue();
277   SmallVector<int, 8> ShuffleMask(NarrowElementCount);
278   std::iota(ShuffleMask.begin(), ShuffleMask.end(), 0);
279 
280   return Builder.CreateShuffleVector(Converted, ShuffleMask);
281 }
282 
283 bool LiveRegOptimizer::optimizeLiveType(
284     Instruction *I, SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
285   SmallVector<Instruction *, 4> Worklist;
286   SmallPtrSet<PHINode *, 4> PhiNodes;
287   SmallPtrSet<Instruction *, 4> Defs;
288   SmallPtrSet<Instruction *, 4> Uses;
289   SmallPtrSet<Instruction *, 4> Visited;
290 
291   Worklist.push_back(cast<Instruction>(I));
292   while (!Worklist.empty()) {
293     Instruction *II = Worklist.pop_back_val();
294 
295     if (!Visited.insert(II).second)
296       continue;
297 
298     if (!shouldReplace(II->getType()))
299       continue;
300 
301     if (!isCoercionProfitable(II))
302       continue;
303 
304     if (PHINode *Phi = dyn_cast<PHINode>(II)) {
305       PhiNodes.insert(Phi);
306       // Collect all the incoming values of problematic PHI nodes.
307       for (Value *V : Phi->incoming_values()) {
308         // Repeat the collection process for newly found PHI nodes.
309         if (PHINode *OpPhi = dyn_cast<PHINode>(V)) {
310           if (!PhiNodes.count(OpPhi) && !Visited.count(OpPhi))
311             Worklist.push_back(OpPhi);
312           continue;
313         }
314 
315         Instruction *IncInst = dyn_cast<Instruction>(V);
316         // Other incoming value types (e.g. vector literals) are unhandled
317         if (!IncInst && !isa<ConstantAggregateZero>(V))
318           return false;
319 
320         // Collect all other incoming values for coercion.
321         if (IncInst)
322           Defs.insert(IncInst);
323       }
324     }
325 
326     // Collect all relevant uses.
327     for (User *V : II->users()) {
328       // Repeat the collection process for problematic PHI nodes.
329       if (PHINode *OpPhi = dyn_cast<PHINode>(V)) {
330         if (!PhiNodes.count(OpPhi) && !Visited.count(OpPhi))
331           Worklist.push_back(OpPhi);
332         continue;
333       }
334 
335       Instruction *UseInst = cast<Instruction>(V);
336       // Collect all uses of PHINodes and any use the crosses BB boundaries.
337       if (UseInst->getParent() != II->getParent() || isa<PHINode>(II)) {
338         Uses.insert(UseInst);
339         if (!isa<PHINode>(II))
340           Defs.insert(II);
341       }
342     }
343   }
344 
345   // Coerce and track the defs.
346   for (Instruction *D : Defs) {
347     if (!ValMap.contains(D)) {
348       BasicBlock::iterator InsertPt = std::next(D->getIterator());
349       Value *ConvertVal = convertToOptType(D, InsertPt);
350       assert(ConvertVal);
351       ValMap[D] = ConvertVal;
352     }
353   }
354 
355   // Construct new-typed PHI nodes.
356   for (PHINode *Phi : PhiNodes) {
357     ValMap[Phi] = PHINode::Create(calculateConvertType(Phi->getType()),
358                                   Phi->getNumIncomingValues(),
359                                   Phi->getName() + ".tc", Phi->getIterator());
360   }
361 
362   // Connect all the PHI nodes with their new incoming values.
363   for (PHINode *Phi : PhiNodes) {
364     PHINode *NewPhi = cast<PHINode>(ValMap[Phi]);
365     bool MissingIncVal = false;
366     for (int I = 0, E = Phi->getNumIncomingValues(); I < E; I++) {
367       Value *IncVal = Phi->getIncomingValue(I);
368       if (isa<ConstantAggregateZero>(IncVal)) {
369         Type *NewType = calculateConvertType(Phi->getType());
370         NewPhi->addIncoming(ConstantInt::get(NewType, 0, false),
371                             Phi->getIncomingBlock(I));
372       } else if (Value *Val = ValMap.lookup(IncVal))
373         NewPhi->addIncoming(Val, Phi->getIncomingBlock(I));
374       else
375         MissingIncVal = true;
376     }
377     if (MissingIncVal) {
378       Value *DeadVal = ValMap[Phi];
379       // The coercion chain of the PHI is broken. Delete the Phi
380       // from the ValMap and any connected / user Phis.
381       SmallVector<Value *, 4> PHIWorklist;
382       SmallPtrSet<Value *, 4> VisitedPhis;
383       PHIWorklist.push_back(DeadVal);
384       while (!PHIWorklist.empty()) {
385         Value *NextDeadValue = PHIWorklist.pop_back_val();
386         VisitedPhis.insert(NextDeadValue);
387         auto OriginalPhi =
388             llvm::find_if(PhiNodes, [this, &NextDeadValue](PHINode *CandPhi) {
389               return ValMap[CandPhi] == NextDeadValue;
390             });
391         // This PHI may have already been removed from maps when
392         // unwinding a previous Phi
393         if (OriginalPhi != PhiNodes.end())
394           ValMap.erase(*OriginalPhi);
395 
396         DeadInsts.emplace_back(cast<Instruction>(NextDeadValue));
397 
398         for (User *U : NextDeadValue->users()) {
399           if (!VisitedPhis.contains(cast<PHINode>(U)))
400             PHIWorklist.push_back(U);
401         }
402       }
403     } else {
404       DeadInsts.emplace_back(cast<Instruction>(Phi));
405     }
406   }
407   // Coerce back to the original type and replace the uses.
408   for (Instruction *U : Uses) {
409     // Replace all converted operands for a use.
410     for (auto [OpIdx, Op] : enumerate(U->operands())) {
411       if (Value *Val = ValMap.lookup(Op)) {
412         Value *NewVal = nullptr;
413         if (BBUseValMap.contains(U->getParent()) &&
414             BBUseValMap[U->getParent()].contains(Val))
415           NewVal = BBUseValMap[U->getParent()][Val];
416         else {
417           BasicBlock::iterator InsertPt = U->getParent()->getFirstNonPHIIt();
418           // We may pick up ops that were previously converted for users in
419           // other blocks. If there is an originally typed definition of the Op
420           // already in this block, simply reuse it.
421           if (isa<Instruction>(Op) && !isa<PHINode>(Op) &&
422               U->getParent() == cast<Instruction>(Op)->getParent()) {
423             NewVal = Op;
424           } else {
425             NewVal =
426                 convertFromOptType(Op->getType(), cast<Instruction>(ValMap[Op]),
427                                    InsertPt, U->getParent());
428             BBUseValMap[U->getParent()][ValMap[Op]] = NewVal;
429           }
430         }
431         assert(NewVal);
432         U->setOperand(OpIdx, NewVal);
433       }
434     }
435   }
436 
437   return true;
438 }
439 
440 bool AMDGPULateCodeGenPrepare::canWidenScalarExtLoad(LoadInst &LI) const {
441   unsigned AS = LI.getPointerAddressSpace();
442   // Skip non-constant address space.
443   if (AS != AMDGPUAS::CONSTANT_ADDRESS &&
444       AS != AMDGPUAS::CONSTANT_ADDRESS_32BIT)
445     return false;
446   // Skip non-simple loads.
447   if (!LI.isSimple())
448     return false;
449   Type *Ty = LI.getType();
450   // Skip aggregate types.
451   if (Ty->isAggregateType())
452     return false;
453   unsigned TySize = DL.getTypeStoreSize(Ty);
454   // Only handle sub-DWORD loads.
455   if (TySize >= 4)
456     return false;
457   // That load must be at least naturally aligned.
458   if (LI.getAlign() < DL.getABITypeAlign(Ty))
459     return false;
460   // It should be uniform, i.e. a scalar load.
461   return UA.isUniform(&LI);
462 }
463 
464 bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) {
465   if (!WidenLoads)
466     return false;
467 
468   // Skip if that load is already aligned on DWORD at least as it's handled in
469   // SDAG.
470   if (LI.getAlign() >= 4)
471     return false;
472 
473   if (!canWidenScalarExtLoad(LI))
474     return false;
475 
476   int64_t Offset = 0;
477   auto *Base =
478       GetPointerBaseWithConstantOffset(LI.getPointerOperand(), Offset, DL);
479   // If that base is not DWORD aligned, it's not safe to perform the following
480   // transforms.
481   if (!isDWORDAligned(Base))
482     return false;
483 
484   int64_t Adjust = Offset & 0x3;
485   if (Adjust == 0) {
486     // With a zero adjust, the original alignment could be promoted with a
487     // better one.
488     LI.setAlignment(Align(4));
489     return true;
490   }
491 
492   IRBuilder<> IRB(&LI);
493   IRB.SetCurrentDebugLocation(LI.getDebugLoc());
494 
495   unsigned LdBits = DL.getTypeStoreSizeInBits(LI.getType());
496   auto *IntNTy = Type::getIntNTy(LI.getContext(), LdBits);
497 
498   auto *NewPtr = IRB.CreateConstGEP1_64(
499       IRB.getInt8Ty(),
500       IRB.CreateAddrSpaceCast(Base, LI.getPointerOperand()->getType()),
501       Offset - Adjust);
502 
503   LoadInst *NewLd = IRB.CreateAlignedLoad(IRB.getInt32Ty(), NewPtr, Align(4));
504   NewLd->copyMetadata(LI);
505   NewLd->setMetadata(LLVMContext::MD_range, nullptr);
506 
507   unsigned ShAmt = Adjust * 8;
508   Value *NewVal = IRB.CreateBitCast(
509       IRB.CreateTrunc(IRB.CreateLShr(NewLd, ShAmt),
510                       DL.typeSizeEqualsStoreSize(LI.getType()) ? IntNTy
511                                                                : LI.getType()),
512       LI.getType());
513   LI.replaceAllUsesWith(NewVal);
514   DeadInsts.emplace_back(&LI);
515 
516   return true;
517 }
518 
519 PreservedAnalyses
520 AMDGPULateCodeGenPreparePass::run(Function &F, FunctionAnalysisManager &FAM) {
521   const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
522   AssumptionCache &AC = FAM.getResult<AssumptionAnalysis>(F);
523   UniformityInfo &UI = FAM.getResult<UniformityInfoAnalysis>(F);
524 
525   bool Changed = AMDGPULateCodeGenPrepare(F, ST, &AC, UI).run();
526 
527   if (!Changed)
528     return PreservedAnalyses::all();
529   PreservedAnalyses PA = PreservedAnalyses::none();
530   PA.preserveSet<CFGAnalyses>();
531   return PA;
532 }
533 
534 class AMDGPULateCodeGenPrepareLegacy : public FunctionPass {
535 public:
536   static char ID;
537 
538   AMDGPULateCodeGenPrepareLegacy() : FunctionPass(ID) {}
539 
540   StringRef getPassName() const override {
541     return "AMDGPU IR late optimizations";
542   }
543 
544   void getAnalysisUsage(AnalysisUsage &AU) const override {
545     AU.addRequired<TargetPassConfig>();
546     AU.addRequired<AssumptionCacheTracker>();
547     AU.addRequired<UniformityInfoWrapperPass>();
548     AU.setPreservesAll();
549   }
550 
551   bool runOnFunction(Function &F) override;
552 };
553 
554 bool AMDGPULateCodeGenPrepareLegacy::runOnFunction(Function &F) {
555   if (skipFunction(F))
556     return false;
557 
558   const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
559   const TargetMachine &TM = TPC.getTM<TargetMachine>();
560   const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
561 
562   AssumptionCache &AC =
563       getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
564   UniformityInfo &UI =
565       getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
566 
567   return AMDGPULateCodeGenPrepare(F, ST, &AC, UI).run();
568 }
569 
570 INITIALIZE_PASS_BEGIN(AMDGPULateCodeGenPrepareLegacy, DEBUG_TYPE,
571                       "AMDGPU IR late optimizations", false, false)
572 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
573 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
574 INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
575 INITIALIZE_PASS_END(AMDGPULateCodeGenPrepareLegacy, DEBUG_TYPE,
576                     "AMDGPU IR late optimizations", false, false)
577 
578 char AMDGPULateCodeGenPrepareLegacy::ID = 0;
579 
580 FunctionPass *llvm::createAMDGPULateCodeGenPrepareLegacyPass() {
581   return new AMDGPULateCodeGenPrepareLegacy();
582 }
583