1 //===-- AMDGPUCodeGenPrepare.cpp ------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass does misc. AMDGPU optimizations on IR *just* before instruction
11 /// selection.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "AMDGPU.h"
16 #include "AMDGPUTargetMachine.h"
17 #include "llvm/Analysis/AssumptionCache.h"
18 #include "llvm/Analysis/UniformityAnalysis.h"
19 #include "llvm/Analysis/ValueTracking.h"
20 #include "llvm/CodeGen/TargetPassConfig.h"
21 #include "llvm/IR/IRBuilder.h"
22 #include "llvm/IR/InstVisitor.h"
23 #include "llvm/IR/IntrinsicsAMDGPU.h"
24 #include "llvm/Support/CommandLine.h"
25 #include "llvm/Support/KnownBits.h"
26 #include "llvm/Transforms/Utils/Local.h"
27
28 #define DEBUG_TYPE "amdgpu-late-codegenprepare"
29
30 using namespace llvm;
31
32 // Scalar load widening needs running after load-store-vectorizer as that pass
33 // doesn't handle overlapping cases. In addition, this pass enhances the
34 // widening to handle cases where scalar sub-dword loads are naturally aligned
35 // only but not dword aligned.
36 static cl::opt<bool>
37 WidenLoads("amdgpu-late-codegenprepare-widen-constant-loads",
38 cl::desc("Widen sub-dword constant address space loads in "
39 "AMDGPULateCodeGenPrepare"),
40 cl::ReallyHidden, cl::init(true));
41
42 namespace {
43
44 class AMDGPULateCodeGenPrepare
45 : public InstVisitor<AMDGPULateCodeGenPrepare, bool> {
46 Function &F;
47 const DataLayout &DL;
48 const GCNSubtarget &ST;
49
50 AssumptionCache *const AC;
51 UniformityInfo &UA;
52
53 SmallVector<WeakTrackingVH, 8> DeadInsts;
54
55 public:
AMDGPULateCodeGenPrepare(Function & F,const GCNSubtarget & ST,AssumptionCache * AC,UniformityInfo & UA)56 AMDGPULateCodeGenPrepare(Function &F, const GCNSubtarget &ST,
57 AssumptionCache *AC, UniformityInfo &UA)
58 : F(F), DL(F.getDataLayout()), ST(ST), AC(AC), UA(UA) {}
59 bool run();
visitInstruction(Instruction &)60 bool visitInstruction(Instruction &) { return false; }
61
62 // Check if the specified value is at least DWORD aligned.
isDWORDAligned(const Value * V) const63 bool isDWORDAligned(const Value *V) const {
64 KnownBits Known = computeKnownBits(V, DL, AC);
65 return Known.countMinTrailingZeros() >= 2;
66 }
67
68 bool canWidenScalarExtLoad(LoadInst &LI) const;
69 bool visitLoadInst(LoadInst &LI);
70 };
71
72 using ValueToValueMap = DenseMap<const Value *, Value *>;
73
74 class LiveRegOptimizer {
75 private:
76 Module &Mod;
77 const DataLayout &DL;
78 const GCNSubtarget &ST;
79
80 /// The scalar type to convert to
81 Type *const ConvertToScalar;
82 /// Map of Value -> Converted Value
83 ValueToValueMap ValMap;
84 /// Map of containing conversions from Optimal Type -> Original Type per BB.
85 DenseMap<BasicBlock *, ValueToValueMap> BBUseValMap;
86
87 public:
88 /// Calculate the and \p return the type to convert to given a problematic \p
89 /// OriginalType. In some instances, we may widen the type (e.g. v2i8 -> i32).
90 Type *calculateConvertType(Type *OriginalType);
91 /// Convert the virtual register defined by \p V to the compatible vector of
92 /// legal type
93 Value *convertToOptType(Instruction *V, BasicBlock::iterator &InstPt);
94 /// Convert the virtual register defined by \p V back to the original type \p
95 /// ConvertType, stripping away the MSBs in cases where there was an imperfect
96 /// fit (e.g. v2i32 -> v7i8)
97 Value *convertFromOptType(Type *ConvertType, Instruction *V,
98 BasicBlock::iterator &InstPt,
99 BasicBlock *InsertBlock);
100 /// Check for problematic PHI nodes or cross-bb values based on the value
101 /// defined by \p I, and coerce to legal types if necessary. For problematic
102 /// PHI node, we coerce all incoming values in a single invocation.
103 bool optimizeLiveType(Instruction *I,
104 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
105
106 // Whether or not the type should be replaced to avoid inefficient
107 // legalization code
shouldReplace(Type * ITy)108 bool shouldReplace(Type *ITy) {
109 FixedVectorType *VTy = dyn_cast<FixedVectorType>(ITy);
110 if (!VTy)
111 return false;
112
113 const auto *TLI = ST.getTargetLowering();
114
115 Type *EltTy = VTy->getElementType();
116 // If the element size is not less than the convert to scalar size, then we
117 // can't do any bit packing
118 if (!EltTy->isIntegerTy() ||
119 EltTy->getScalarSizeInBits() > ConvertToScalar->getScalarSizeInBits())
120 return false;
121
122 // Only coerce illegal types
123 TargetLoweringBase::LegalizeKind LK =
124 TLI->getTypeConversion(EltTy->getContext(), EVT::getEVT(EltTy, false));
125 return LK.first != TargetLoweringBase::TypeLegal;
126 }
127
isOpLegal(Instruction * I)128 bool isOpLegal(Instruction *I) { return isa<StoreInst, IntrinsicInst>(I); }
129
isCoercionProfitable(Instruction * II)130 bool isCoercionProfitable(Instruction *II) {
131 SmallPtrSet<Instruction *, 4> CVisited;
132 SmallVector<Instruction *, 4> UserList;
133
134 // Check users for profitable conditions (across block user which can
135 // natively handle the illegal vector).
136 for (User *V : II->users())
137 if (auto *UseInst = dyn_cast<Instruction>(V))
138 UserList.push_back(UseInst);
139
140 auto IsLookThru = [](Instruction *II) {
141 if (const auto *Intr = dyn_cast<IntrinsicInst>(II))
142 return Intr->getIntrinsicID() == Intrinsic::amdgcn_perm;
143 return isa<PHINode, ShuffleVectorInst, InsertElementInst,
144 ExtractElementInst, CastInst>(II);
145 };
146
147 while (!UserList.empty()) {
148 auto CII = UserList.pop_back_val();
149 if (!CVisited.insert(CII).second)
150 continue;
151
152 if (CII->getParent() == II->getParent() && !IsLookThru(II))
153 continue;
154
155 if (isOpLegal(CII))
156 return true;
157
158 if (IsLookThru(CII))
159 for (User *V : CII->users())
160 if (auto *UseInst = dyn_cast<Instruction>(V))
161 UserList.push_back(UseInst);
162 }
163 return false;
164 }
165
LiveRegOptimizer(Module & Mod,const GCNSubtarget & ST)166 LiveRegOptimizer(Module &Mod, const GCNSubtarget &ST)
167 : Mod(Mod), DL(Mod.getDataLayout()), ST(ST),
168 ConvertToScalar(Type::getInt32Ty(Mod.getContext())) {}
169 };
170
171 } // end anonymous namespace
172
run()173 bool AMDGPULateCodeGenPrepare::run() {
174 // "Optimize" the virtual regs that cross basic block boundaries. When
175 // building the SelectionDAG, vectors of illegal types that cross basic blocks
176 // will be scalarized and widened, with each scalar living in its
177 // own register. To work around this, this optimization converts the
178 // vectors to equivalent vectors of legal type (which are converted back
179 // before uses in subsequent blocks), to pack the bits into fewer physical
180 // registers (used in CopyToReg/CopyFromReg pairs).
181 LiveRegOptimizer LRO(*F.getParent(), ST);
182
183 bool Changed = false;
184
185 bool HasScalarSubwordLoads = ST.hasScalarSubwordLoads();
186
187 for (auto &BB : reverse(F))
188 for (Instruction &I : make_early_inc_range(reverse(BB))) {
189 Changed |= !HasScalarSubwordLoads && visit(I);
190 Changed |= LRO.optimizeLiveType(&I, DeadInsts);
191 }
192
193 RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts);
194 return Changed;
195 }
196
calculateConvertType(Type * OriginalType)197 Type *LiveRegOptimizer::calculateConvertType(Type *OriginalType) {
198 assert(OriginalType->getScalarSizeInBits() <=
199 ConvertToScalar->getScalarSizeInBits());
200
201 FixedVectorType *VTy = cast<FixedVectorType>(OriginalType);
202
203 TypeSize OriginalSize = DL.getTypeSizeInBits(VTy);
204 TypeSize ConvertScalarSize = DL.getTypeSizeInBits(ConvertToScalar);
205 unsigned ConvertEltCount =
206 (OriginalSize + ConvertScalarSize - 1) / ConvertScalarSize;
207
208 if (OriginalSize <= ConvertScalarSize)
209 return IntegerType::get(Mod.getContext(), ConvertScalarSize);
210
211 return VectorType::get(Type::getIntNTy(Mod.getContext(), ConvertScalarSize),
212 ConvertEltCount, false);
213 }
214
convertToOptType(Instruction * V,BasicBlock::iterator & InsertPt)215 Value *LiveRegOptimizer::convertToOptType(Instruction *V,
216 BasicBlock::iterator &InsertPt) {
217 FixedVectorType *VTy = cast<FixedVectorType>(V->getType());
218 Type *NewTy = calculateConvertType(V->getType());
219
220 TypeSize OriginalSize = DL.getTypeSizeInBits(VTy);
221 TypeSize NewSize = DL.getTypeSizeInBits(NewTy);
222
223 IRBuilder<> Builder(V->getParent(), InsertPt);
224 // If there is a bitsize match, we can fit the old vector into a new vector of
225 // desired type.
226 if (OriginalSize == NewSize)
227 return Builder.CreateBitCast(V, NewTy, V->getName() + ".bc");
228
229 // If there is a bitsize mismatch, we must use a wider vector.
230 assert(NewSize > OriginalSize);
231 uint64_t ExpandedVecElementCount = NewSize / VTy->getScalarSizeInBits();
232
233 SmallVector<int, 8> ShuffleMask;
234 uint64_t OriginalElementCount = VTy->getElementCount().getFixedValue();
235 for (unsigned I = 0; I < OriginalElementCount; I++)
236 ShuffleMask.push_back(I);
237
238 for (uint64_t I = OriginalElementCount; I < ExpandedVecElementCount; I++)
239 ShuffleMask.push_back(OriginalElementCount);
240
241 Value *ExpandedVec = Builder.CreateShuffleVector(V, ShuffleMask);
242 return Builder.CreateBitCast(ExpandedVec, NewTy, V->getName() + ".bc");
243 }
244
convertFromOptType(Type * ConvertType,Instruction * V,BasicBlock::iterator & InsertPt,BasicBlock * InsertBB)245 Value *LiveRegOptimizer::convertFromOptType(Type *ConvertType, Instruction *V,
246 BasicBlock::iterator &InsertPt,
247 BasicBlock *InsertBB) {
248 FixedVectorType *NewVTy = cast<FixedVectorType>(ConvertType);
249
250 TypeSize OriginalSize = DL.getTypeSizeInBits(V->getType());
251 TypeSize NewSize = DL.getTypeSizeInBits(NewVTy);
252
253 IRBuilder<> Builder(InsertBB, InsertPt);
254 // If there is a bitsize match, we simply convert back to the original type.
255 if (OriginalSize == NewSize)
256 return Builder.CreateBitCast(V, NewVTy, V->getName() + ".bc");
257
258 // If there is a bitsize mismatch, then we must have used a wider value to
259 // hold the bits.
260 assert(OriginalSize > NewSize);
261 // For wide scalars, we can just truncate the value.
262 if (!V->getType()->isVectorTy()) {
263 Instruction *Trunc = cast<Instruction>(
264 Builder.CreateTrunc(V, IntegerType::get(Mod.getContext(), NewSize)));
265 return cast<Instruction>(Builder.CreateBitCast(Trunc, NewVTy));
266 }
267
268 // For wider vectors, we must strip the MSBs to convert back to the original
269 // type.
270 VectorType *ExpandedVT = VectorType::get(
271 Type::getIntNTy(Mod.getContext(), NewVTy->getScalarSizeInBits()),
272 (OriginalSize / NewVTy->getScalarSizeInBits()), false);
273 Instruction *Converted =
274 cast<Instruction>(Builder.CreateBitCast(V, ExpandedVT));
275
276 unsigned NarrowElementCount = NewVTy->getElementCount().getFixedValue();
277 SmallVector<int, 8> ShuffleMask(NarrowElementCount);
278 std::iota(ShuffleMask.begin(), ShuffleMask.end(), 0);
279
280 return Builder.CreateShuffleVector(Converted, ShuffleMask);
281 }
282
optimizeLiveType(Instruction * I,SmallVectorImpl<WeakTrackingVH> & DeadInsts)283 bool LiveRegOptimizer::optimizeLiveType(
284 Instruction *I, SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
285 SmallVector<Instruction *, 4> Worklist;
286 SmallPtrSet<PHINode *, 4> PhiNodes;
287 SmallPtrSet<Instruction *, 4> Defs;
288 SmallPtrSet<Instruction *, 4> Uses;
289 SmallPtrSet<Instruction *, 4> Visited;
290
291 Worklist.push_back(cast<Instruction>(I));
292 while (!Worklist.empty()) {
293 Instruction *II = Worklist.pop_back_val();
294
295 if (!Visited.insert(II).second)
296 continue;
297
298 if (!shouldReplace(II->getType()))
299 continue;
300
301 if (!isCoercionProfitable(II))
302 continue;
303
304 if (PHINode *Phi = dyn_cast<PHINode>(II)) {
305 PhiNodes.insert(Phi);
306 // Collect all the incoming values of problematic PHI nodes.
307 for (Value *V : Phi->incoming_values()) {
308 // Repeat the collection process for newly found PHI nodes.
309 if (PHINode *OpPhi = dyn_cast<PHINode>(V)) {
310 if (!PhiNodes.count(OpPhi) && !Visited.count(OpPhi))
311 Worklist.push_back(OpPhi);
312 continue;
313 }
314
315 Instruction *IncInst = dyn_cast<Instruction>(V);
316 // Other incoming value types (e.g. vector literals) are unhandled
317 if (!IncInst && !isa<ConstantAggregateZero>(V))
318 return false;
319
320 // Collect all other incoming values for coercion.
321 if (IncInst)
322 Defs.insert(IncInst);
323 }
324 }
325
326 // Collect all relevant uses.
327 for (User *V : II->users()) {
328 // Repeat the collection process for problematic PHI nodes.
329 if (PHINode *OpPhi = dyn_cast<PHINode>(V)) {
330 if (!PhiNodes.count(OpPhi) && !Visited.count(OpPhi))
331 Worklist.push_back(OpPhi);
332 continue;
333 }
334
335 Instruction *UseInst = cast<Instruction>(V);
336 // Collect all uses of PHINodes and any use the crosses BB boundaries.
337 if (UseInst->getParent() != II->getParent() || isa<PHINode>(II)) {
338 Uses.insert(UseInst);
339 if (!isa<PHINode>(II))
340 Defs.insert(II);
341 }
342 }
343 }
344
345 // Coerce and track the defs.
346 for (Instruction *D : Defs) {
347 if (!ValMap.contains(D)) {
348 BasicBlock::iterator InsertPt = std::next(D->getIterator());
349 Value *ConvertVal = convertToOptType(D, InsertPt);
350 assert(ConvertVal);
351 ValMap[D] = ConvertVal;
352 }
353 }
354
355 // Construct new-typed PHI nodes.
356 for (PHINode *Phi : PhiNodes) {
357 ValMap[Phi] = PHINode::Create(calculateConvertType(Phi->getType()),
358 Phi->getNumIncomingValues(),
359 Phi->getName() + ".tc", Phi->getIterator());
360 }
361
362 // Connect all the PHI nodes with their new incoming values.
363 for (PHINode *Phi : PhiNodes) {
364 PHINode *NewPhi = cast<PHINode>(ValMap[Phi]);
365 bool MissingIncVal = false;
366 for (int I = 0, E = Phi->getNumIncomingValues(); I < E; I++) {
367 Value *IncVal = Phi->getIncomingValue(I);
368 if (isa<ConstantAggregateZero>(IncVal)) {
369 Type *NewType = calculateConvertType(Phi->getType());
370 NewPhi->addIncoming(ConstantInt::get(NewType, 0, false),
371 Phi->getIncomingBlock(I));
372 } else if (Value *Val = ValMap.lookup(IncVal))
373 NewPhi->addIncoming(Val, Phi->getIncomingBlock(I));
374 else
375 MissingIncVal = true;
376 }
377 if (MissingIncVal) {
378 Value *DeadVal = ValMap[Phi];
379 // The coercion chain of the PHI is broken. Delete the Phi
380 // from the ValMap and any connected / user Phis.
381 SmallVector<Value *, 4> PHIWorklist;
382 SmallPtrSet<Value *, 4> VisitedPhis;
383 PHIWorklist.push_back(DeadVal);
384 while (!PHIWorklist.empty()) {
385 Value *NextDeadValue = PHIWorklist.pop_back_val();
386 VisitedPhis.insert(NextDeadValue);
387 auto OriginalPhi =
388 llvm::find_if(PhiNodes, [this, &NextDeadValue](PHINode *CandPhi) {
389 return ValMap[CandPhi] == NextDeadValue;
390 });
391 // This PHI may have already been removed from maps when
392 // unwinding a previous Phi
393 if (OriginalPhi != PhiNodes.end())
394 ValMap.erase(*OriginalPhi);
395
396 DeadInsts.emplace_back(cast<Instruction>(NextDeadValue));
397
398 for (User *U : NextDeadValue->users()) {
399 if (!VisitedPhis.contains(cast<PHINode>(U)))
400 PHIWorklist.push_back(U);
401 }
402 }
403 } else {
404 DeadInsts.emplace_back(cast<Instruction>(Phi));
405 }
406 }
407 // Coerce back to the original type and replace the uses.
408 for (Instruction *U : Uses) {
409 // Replace all converted operands for a use.
410 for (auto [OpIdx, Op] : enumerate(U->operands())) {
411 if (Value *Val = ValMap.lookup(Op)) {
412 Value *NewVal = nullptr;
413 if (BBUseValMap.contains(U->getParent()) &&
414 BBUseValMap[U->getParent()].contains(Val))
415 NewVal = BBUseValMap[U->getParent()][Val];
416 else {
417 BasicBlock::iterator InsertPt = U->getParent()->getFirstNonPHIIt();
418 // We may pick up ops that were previously converted for users in
419 // other blocks. If there is an originally typed definition of the Op
420 // already in this block, simply reuse it.
421 if (isa<Instruction>(Op) && !isa<PHINode>(Op) &&
422 U->getParent() == cast<Instruction>(Op)->getParent()) {
423 NewVal = Op;
424 } else {
425 NewVal =
426 convertFromOptType(Op->getType(), cast<Instruction>(ValMap[Op]),
427 InsertPt, U->getParent());
428 BBUseValMap[U->getParent()][ValMap[Op]] = NewVal;
429 }
430 }
431 assert(NewVal);
432 U->setOperand(OpIdx, NewVal);
433 }
434 }
435 }
436
437 return true;
438 }
439
canWidenScalarExtLoad(LoadInst & LI) const440 bool AMDGPULateCodeGenPrepare::canWidenScalarExtLoad(LoadInst &LI) const {
441 unsigned AS = LI.getPointerAddressSpace();
442 // Skip non-constant address space.
443 if (AS != AMDGPUAS::CONSTANT_ADDRESS &&
444 AS != AMDGPUAS::CONSTANT_ADDRESS_32BIT)
445 return false;
446 // Skip non-simple loads.
447 if (!LI.isSimple())
448 return false;
449 Type *Ty = LI.getType();
450 // Skip aggregate types.
451 if (Ty->isAggregateType())
452 return false;
453 unsigned TySize = DL.getTypeStoreSize(Ty);
454 // Only handle sub-DWORD loads.
455 if (TySize >= 4)
456 return false;
457 // That load must be at least naturally aligned.
458 if (LI.getAlign() < DL.getABITypeAlign(Ty))
459 return false;
460 // It should be uniform, i.e. a scalar load.
461 return UA.isUniform(&LI);
462 }
463
visitLoadInst(LoadInst & LI)464 bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) {
465 if (!WidenLoads)
466 return false;
467
468 // Skip if that load is already aligned on DWORD at least as it's handled in
469 // SDAG.
470 if (LI.getAlign() >= 4)
471 return false;
472
473 if (!canWidenScalarExtLoad(LI))
474 return false;
475
476 int64_t Offset = 0;
477 auto *Base =
478 GetPointerBaseWithConstantOffset(LI.getPointerOperand(), Offset, DL);
479 // If that base is not DWORD aligned, it's not safe to perform the following
480 // transforms.
481 if (!isDWORDAligned(Base))
482 return false;
483
484 int64_t Adjust = Offset & 0x3;
485 if (Adjust == 0) {
486 // With a zero adjust, the original alignment could be promoted with a
487 // better one.
488 LI.setAlignment(Align(4));
489 return true;
490 }
491
492 IRBuilder<> IRB(&LI);
493 IRB.SetCurrentDebugLocation(LI.getDebugLoc());
494
495 unsigned LdBits = DL.getTypeStoreSizeInBits(LI.getType());
496 auto *IntNTy = Type::getIntNTy(LI.getContext(), LdBits);
497
498 auto *NewPtr = IRB.CreateConstGEP1_64(
499 IRB.getInt8Ty(),
500 IRB.CreateAddrSpaceCast(Base, LI.getPointerOperand()->getType()),
501 Offset - Adjust);
502
503 LoadInst *NewLd = IRB.CreateAlignedLoad(IRB.getInt32Ty(), NewPtr, Align(4));
504 NewLd->copyMetadata(LI);
505 NewLd->setMetadata(LLVMContext::MD_range, nullptr);
506
507 unsigned ShAmt = Adjust * 8;
508 Value *NewVal = IRB.CreateBitCast(
509 IRB.CreateTrunc(IRB.CreateLShr(NewLd, ShAmt),
510 DL.typeSizeEqualsStoreSize(LI.getType()) ? IntNTy
511 : LI.getType()),
512 LI.getType());
513 LI.replaceAllUsesWith(NewVal);
514 DeadInsts.emplace_back(&LI);
515
516 return true;
517 }
518
519 PreservedAnalyses
run(Function & F,FunctionAnalysisManager & FAM)520 AMDGPULateCodeGenPreparePass::run(Function &F, FunctionAnalysisManager &FAM) {
521 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
522 AssumptionCache &AC = FAM.getResult<AssumptionAnalysis>(F);
523 UniformityInfo &UI = FAM.getResult<UniformityInfoAnalysis>(F);
524
525 bool Changed = AMDGPULateCodeGenPrepare(F, ST, &AC, UI).run();
526
527 if (!Changed)
528 return PreservedAnalyses::all();
529 PreservedAnalyses PA = PreservedAnalyses::none();
530 PA.preserveSet<CFGAnalyses>();
531 return PA;
532 }
533
534 class AMDGPULateCodeGenPrepareLegacy : public FunctionPass {
535 public:
536 static char ID;
537
AMDGPULateCodeGenPrepareLegacy()538 AMDGPULateCodeGenPrepareLegacy() : FunctionPass(ID) {}
539
getPassName() const540 StringRef getPassName() const override {
541 return "AMDGPU IR late optimizations";
542 }
543
getAnalysisUsage(AnalysisUsage & AU) const544 void getAnalysisUsage(AnalysisUsage &AU) const override {
545 AU.addRequired<TargetPassConfig>();
546 AU.addRequired<AssumptionCacheTracker>();
547 AU.addRequired<UniformityInfoWrapperPass>();
548 AU.setPreservesAll();
549 }
550
551 bool runOnFunction(Function &F) override;
552 };
553
runOnFunction(Function & F)554 bool AMDGPULateCodeGenPrepareLegacy::runOnFunction(Function &F) {
555 if (skipFunction(F))
556 return false;
557
558 const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
559 const TargetMachine &TM = TPC.getTM<TargetMachine>();
560 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
561
562 AssumptionCache &AC =
563 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
564 UniformityInfo &UI =
565 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
566
567 return AMDGPULateCodeGenPrepare(F, ST, &AC, UI).run();
568 }
569
570 INITIALIZE_PASS_BEGIN(AMDGPULateCodeGenPrepareLegacy, DEBUG_TYPE,
571 "AMDGPU IR late optimizations", false, false)
572 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
573 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
574 INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
575 INITIALIZE_PASS_END(AMDGPULateCodeGenPrepareLegacy, DEBUG_TYPE,
576 "AMDGPU IR late optimizations", false, false)
577
578 char AMDGPULateCodeGenPrepareLegacy::ID = 0;
579
createAMDGPULateCodeGenPrepareLegacyPass()580 FunctionPass *llvm::createAMDGPULateCodeGenPrepareLegacyPass() {
581 return new AMDGPULateCodeGenPrepareLegacy();
582 }
583