1e8d8bef9SDimitry Andric //===-- AMDGPUCodeGenPrepare.cpp ------------------------------------------===// 2e8d8bef9SDimitry Andric // 3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6e8d8bef9SDimitry Andric // 7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 8e8d8bef9SDimitry Andric // 9e8d8bef9SDimitry Andric /// \file 10e8d8bef9SDimitry Andric /// This pass does misc. AMDGPU optimizations on IR *just* before instruction 11e8d8bef9SDimitry Andric /// selection. 12e8d8bef9SDimitry Andric // 13e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 14e8d8bef9SDimitry Andric 15e8d8bef9SDimitry Andric #include "AMDGPU.h" 16e8d8bef9SDimitry Andric #include "llvm/Analysis/AssumptionCache.h" 17*06c3fb27SDimitry Andric #include "llvm/Analysis/UniformityAnalysis.h" 18e8d8bef9SDimitry Andric #include "llvm/Analysis/ValueTracking.h" 19e8d8bef9SDimitry Andric #include "llvm/IR/IRBuilder.h" 20e8d8bef9SDimitry Andric #include "llvm/IR/InstVisitor.h" 21e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h" 22e8d8bef9SDimitry Andric #include "llvm/Support/CommandLine.h" 23e8d8bef9SDimitry Andric #include "llvm/Support/KnownBits.h" 24e8d8bef9SDimitry Andric #include "llvm/Transforms/Utils/Local.h" 25e8d8bef9SDimitry Andric 26e8d8bef9SDimitry Andric #define DEBUG_TYPE "amdgpu-late-codegenprepare" 27e8d8bef9SDimitry Andric 28e8d8bef9SDimitry Andric using namespace llvm; 29e8d8bef9SDimitry Andric 30e8d8bef9SDimitry Andric // Scalar load widening needs running after load-store-vectorizer as that pass 31e8d8bef9SDimitry Andric // doesn't handle overlapping cases. In addition, this pass enhances the 32e8d8bef9SDimitry Andric // widening to handle cases where scalar sub-dword loads are naturally aligned 33e8d8bef9SDimitry Andric // only but not dword aligned. 34e8d8bef9SDimitry Andric static cl::opt<bool> 35e8d8bef9SDimitry Andric WidenLoads("amdgpu-late-codegenprepare-widen-constant-loads", 36e8d8bef9SDimitry Andric cl::desc("Widen sub-dword constant address space loads in " 37e8d8bef9SDimitry Andric "AMDGPULateCodeGenPrepare"), 38e8d8bef9SDimitry Andric cl::ReallyHidden, cl::init(true)); 39e8d8bef9SDimitry Andric 40e8d8bef9SDimitry Andric namespace { 41e8d8bef9SDimitry Andric 42e8d8bef9SDimitry Andric class AMDGPULateCodeGenPrepare 43e8d8bef9SDimitry Andric : public FunctionPass, 44e8d8bef9SDimitry Andric public InstVisitor<AMDGPULateCodeGenPrepare, bool> { 45e8d8bef9SDimitry Andric Module *Mod = nullptr; 46e8d8bef9SDimitry Andric const DataLayout *DL = nullptr; 47e8d8bef9SDimitry Andric 48e8d8bef9SDimitry Andric AssumptionCache *AC = nullptr; 49*06c3fb27SDimitry Andric UniformityInfo *UA = nullptr; 50e8d8bef9SDimitry Andric 51e8d8bef9SDimitry Andric public: 52e8d8bef9SDimitry Andric static char ID; 53e8d8bef9SDimitry Andric 54e8d8bef9SDimitry Andric AMDGPULateCodeGenPrepare() : FunctionPass(ID) {} 55e8d8bef9SDimitry Andric 56e8d8bef9SDimitry Andric StringRef getPassName() const override { 57e8d8bef9SDimitry Andric return "AMDGPU IR late optimizations"; 58e8d8bef9SDimitry Andric } 59e8d8bef9SDimitry Andric 60e8d8bef9SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 61e8d8bef9SDimitry Andric AU.addRequired<AssumptionCacheTracker>(); 62*06c3fb27SDimitry Andric AU.addRequired<UniformityInfoWrapperPass>(); 63e8d8bef9SDimitry Andric AU.setPreservesAll(); 64e8d8bef9SDimitry Andric } 65e8d8bef9SDimitry Andric 66e8d8bef9SDimitry Andric bool doInitialization(Module &M) override; 67e8d8bef9SDimitry Andric bool runOnFunction(Function &F) override; 68e8d8bef9SDimitry Andric 69e8d8bef9SDimitry Andric bool visitInstruction(Instruction &) { return false; } 70e8d8bef9SDimitry Andric 71e8d8bef9SDimitry Andric // Check if the specified value is at least DWORD aligned. 72e8d8bef9SDimitry Andric bool isDWORDAligned(const Value *V) const { 73e8d8bef9SDimitry Andric KnownBits Known = computeKnownBits(V, *DL, 0, AC); 74e8d8bef9SDimitry Andric return Known.countMinTrailingZeros() >= 2; 75e8d8bef9SDimitry Andric } 76e8d8bef9SDimitry Andric 77e8d8bef9SDimitry Andric bool canWidenScalarExtLoad(LoadInst &LI) const; 78e8d8bef9SDimitry Andric bool visitLoadInst(LoadInst &LI); 79e8d8bef9SDimitry Andric }; 80e8d8bef9SDimitry Andric 81e8d8bef9SDimitry Andric } // end anonymous namespace 82e8d8bef9SDimitry Andric 83e8d8bef9SDimitry Andric bool AMDGPULateCodeGenPrepare::doInitialization(Module &M) { 84e8d8bef9SDimitry Andric Mod = &M; 85e8d8bef9SDimitry Andric DL = &Mod->getDataLayout(); 86e8d8bef9SDimitry Andric return false; 87e8d8bef9SDimitry Andric } 88e8d8bef9SDimitry Andric 89e8d8bef9SDimitry Andric bool AMDGPULateCodeGenPrepare::runOnFunction(Function &F) { 90e8d8bef9SDimitry Andric if (skipFunction(F)) 91e8d8bef9SDimitry Andric return false; 92e8d8bef9SDimitry Andric 93e8d8bef9SDimitry Andric AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 94*06c3fb27SDimitry Andric UA = &getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo(); 95e8d8bef9SDimitry Andric 96e8d8bef9SDimitry Andric bool Changed = false; 97e8d8bef9SDimitry Andric for (auto &BB : F) 98349cc55cSDimitry Andric for (Instruction &I : llvm::make_early_inc_range(BB)) 99349cc55cSDimitry Andric Changed |= visit(I); 100e8d8bef9SDimitry Andric 101e8d8bef9SDimitry Andric return Changed; 102e8d8bef9SDimitry Andric } 103e8d8bef9SDimitry Andric 104e8d8bef9SDimitry Andric bool AMDGPULateCodeGenPrepare::canWidenScalarExtLoad(LoadInst &LI) const { 105e8d8bef9SDimitry Andric unsigned AS = LI.getPointerAddressSpace(); 106e8d8bef9SDimitry Andric // Skip non-constant address space. 107e8d8bef9SDimitry Andric if (AS != AMDGPUAS::CONSTANT_ADDRESS && 108e8d8bef9SDimitry Andric AS != AMDGPUAS::CONSTANT_ADDRESS_32BIT) 109e8d8bef9SDimitry Andric return false; 110e8d8bef9SDimitry Andric // Skip non-simple loads. 111e8d8bef9SDimitry Andric if (!LI.isSimple()) 112e8d8bef9SDimitry Andric return false; 113e8d8bef9SDimitry Andric auto *Ty = LI.getType(); 114e8d8bef9SDimitry Andric // Skip aggregate types. 115e8d8bef9SDimitry Andric if (Ty->isAggregateType()) 116e8d8bef9SDimitry Andric return false; 117e8d8bef9SDimitry Andric unsigned TySize = DL->getTypeStoreSize(Ty); 118e8d8bef9SDimitry Andric // Only handle sub-DWORD loads. 119e8d8bef9SDimitry Andric if (TySize >= 4) 120e8d8bef9SDimitry Andric return false; 121e8d8bef9SDimitry Andric // That load must be at least naturally aligned. 122e8d8bef9SDimitry Andric if (LI.getAlign() < DL->getABITypeAlign(Ty)) 123e8d8bef9SDimitry Andric return false; 124e8d8bef9SDimitry Andric // It should be uniform, i.e. a scalar load. 125*06c3fb27SDimitry Andric return UA->isUniform(&LI); 126e8d8bef9SDimitry Andric } 127e8d8bef9SDimitry Andric 128e8d8bef9SDimitry Andric bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) { 129e8d8bef9SDimitry Andric if (!WidenLoads) 130e8d8bef9SDimitry Andric return false; 131e8d8bef9SDimitry Andric 132e8d8bef9SDimitry Andric // Skip if that load is already aligned on DWORD at least as it's handled in 133e8d8bef9SDimitry Andric // SDAG. 134e8d8bef9SDimitry Andric if (LI.getAlign() >= 4) 135e8d8bef9SDimitry Andric return false; 136e8d8bef9SDimitry Andric 137e8d8bef9SDimitry Andric if (!canWidenScalarExtLoad(LI)) 138e8d8bef9SDimitry Andric return false; 139e8d8bef9SDimitry Andric 140e8d8bef9SDimitry Andric int64_t Offset = 0; 141e8d8bef9SDimitry Andric auto *Base = 142e8d8bef9SDimitry Andric GetPointerBaseWithConstantOffset(LI.getPointerOperand(), Offset, *DL); 143e8d8bef9SDimitry Andric // If that base is not DWORD aligned, it's not safe to perform the following 144e8d8bef9SDimitry Andric // transforms. 145e8d8bef9SDimitry Andric if (!isDWORDAligned(Base)) 146e8d8bef9SDimitry Andric return false; 147e8d8bef9SDimitry Andric 148e8d8bef9SDimitry Andric int64_t Adjust = Offset & 0x3; 149e8d8bef9SDimitry Andric if (Adjust == 0) { 150e8d8bef9SDimitry Andric // With a zero adjust, the original alignment could be promoted with a 151e8d8bef9SDimitry Andric // better one. 152e8d8bef9SDimitry Andric LI.setAlignment(Align(4)); 153e8d8bef9SDimitry Andric return true; 154e8d8bef9SDimitry Andric } 155e8d8bef9SDimitry Andric 156e8d8bef9SDimitry Andric IRBuilder<> IRB(&LI); 157e8d8bef9SDimitry Andric IRB.SetCurrentDebugLocation(LI.getDebugLoc()); 158e8d8bef9SDimitry Andric 159*06c3fb27SDimitry Andric unsigned LdBits = DL->getTypeStoreSizeInBits(LI.getType()); 160e8d8bef9SDimitry Andric auto IntNTy = Type::getIntNTy(LI.getContext(), LdBits); 161e8d8bef9SDimitry Andric 162*06c3fb27SDimitry Andric auto *NewPtr = IRB.CreateConstGEP1_64( 163fe6060f1SDimitry Andric IRB.getInt8Ty(), 164*06c3fb27SDimitry Andric IRB.CreateAddrSpaceCast(Base, LI.getPointerOperand()->getType()), 165*06c3fb27SDimitry Andric Offset - Adjust); 166*06c3fb27SDimitry Andric 167fe6060f1SDimitry Andric LoadInst *NewLd = IRB.CreateAlignedLoad(IRB.getInt32Ty(), NewPtr, Align(4)); 168e8d8bef9SDimitry Andric NewLd->copyMetadata(LI); 169e8d8bef9SDimitry Andric NewLd->setMetadata(LLVMContext::MD_range, nullptr); 170e8d8bef9SDimitry Andric 171e8d8bef9SDimitry Andric unsigned ShAmt = Adjust * 8; 172e8d8bef9SDimitry Andric auto *NewVal = IRB.CreateBitCast( 173e8d8bef9SDimitry Andric IRB.CreateTrunc(IRB.CreateLShr(NewLd, ShAmt), IntNTy), LI.getType()); 174e8d8bef9SDimitry Andric LI.replaceAllUsesWith(NewVal); 175e8d8bef9SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(&LI); 176e8d8bef9SDimitry Andric 177e8d8bef9SDimitry Andric return true; 178e8d8bef9SDimitry Andric } 179e8d8bef9SDimitry Andric 180e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPULateCodeGenPrepare, DEBUG_TYPE, 181e8d8bef9SDimitry Andric "AMDGPU IR late optimizations", false, false) 182e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 183*06c3fb27SDimitry Andric INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass) 184e8d8bef9SDimitry Andric INITIALIZE_PASS_END(AMDGPULateCodeGenPrepare, DEBUG_TYPE, 185e8d8bef9SDimitry Andric "AMDGPU IR late optimizations", false, false) 186e8d8bef9SDimitry Andric 187e8d8bef9SDimitry Andric char AMDGPULateCodeGenPrepare::ID = 0; 188e8d8bef9SDimitry Andric 189e8d8bef9SDimitry Andric FunctionPass *llvm::createAMDGPULateCodeGenPreparePass() { 190e8d8bef9SDimitry Andric return new AMDGPULateCodeGenPrepare(); 191e8d8bef9SDimitry Andric } 192