1*0b57cec5SDimitry Andric //==- AArch64PromoteConstant.cpp - Promote constant to global for AArch64 --==// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric // 9*0b57cec5SDimitry Andric // This file implements the AArch64PromoteConstant pass which promotes constants 10*0b57cec5SDimitry Andric // to global variables when this is likely to be more efficient. Currently only 11*0b57cec5SDimitry Andric // types related to constant vector (i.e., constant vector, array of constant 12*0b57cec5SDimitry Andric // vectors, constant structure with a constant vector field, etc.) are promoted 13*0b57cec5SDimitry Andric // to global variables. Constant vectors are likely to be lowered in target 14*0b57cec5SDimitry Andric // constant pool during instruction selection already; therefore, the access 15*0b57cec5SDimitry Andric // will remain the same (memory load), but the structure types are not split 16*0b57cec5SDimitry Andric // into different constant pool accesses for each field. A bonus side effect is 17*0b57cec5SDimitry Andric // that created globals may be merged by the global merge pass. 18*0b57cec5SDimitry Andric // 19*0b57cec5SDimitry Andric // FIXME: This pass may be useful for other targets too. 20*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 21*0b57cec5SDimitry Andric 22*0b57cec5SDimitry Andric #include "AArch64.h" 23*0b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h" 24*0b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 25*0b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 26*0b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h" 27*0b57cec5SDimitry Andric #include "llvm/IR/Constant.h" 28*0b57cec5SDimitry Andric #include "llvm/IR/Constants.h" 29*0b57cec5SDimitry Andric #include "llvm/IR/Dominators.h" 30*0b57cec5SDimitry Andric #include "llvm/IR/Function.h" 31*0b57cec5SDimitry Andric #include "llvm/IR/GlobalValue.h" 32*0b57cec5SDimitry Andric #include "llvm/IR/GlobalVariable.h" 33*0b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h" 34*0b57cec5SDimitry Andric #include "llvm/IR/InlineAsm.h" 35*0b57cec5SDimitry Andric #include "llvm/IR/InstIterator.h" 36*0b57cec5SDimitry Andric #include "llvm/IR/Instruction.h" 37*0b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 38*0b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 39*0b57cec5SDimitry Andric #include "llvm/IR/Module.h" 40*0b57cec5SDimitry Andric #include "llvm/IR/Type.h" 41*0b57cec5SDimitry Andric #include "llvm/Pass.h" 42*0b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 43*0b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 44*0b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 45*0b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 46*0b57cec5SDimitry Andric #include <algorithm> 47*0b57cec5SDimitry Andric #include <cassert> 48*0b57cec5SDimitry Andric #include <utility> 49*0b57cec5SDimitry Andric 50*0b57cec5SDimitry Andric using namespace llvm; 51*0b57cec5SDimitry Andric 52*0b57cec5SDimitry Andric #define DEBUG_TYPE "aarch64-promote-const" 53*0b57cec5SDimitry Andric 54*0b57cec5SDimitry Andric // Stress testing mode - disable heuristics. 55*0b57cec5SDimitry Andric static cl::opt<bool> Stress("aarch64-stress-promote-const", cl::Hidden, 56*0b57cec5SDimitry Andric cl::desc("Promote all vector constants")); 57*0b57cec5SDimitry Andric 58*0b57cec5SDimitry Andric STATISTIC(NumPromoted, "Number of promoted constants"); 59*0b57cec5SDimitry Andric STATISTIC(NumPromotedUses, "Number of promoted constants uses"); 60*0b57cec5SDimitry Andric 61*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 62*0b57cec5SDimitry Andric // AArch64PromoteConstant 63*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 64*0b57cec5SDimitry Andric 65*0b57cec5SDimitry Andric namespace { 66*0b57cec5SDimitry Andric 67*0b57cec5SDimitry Andric /// Promotes interesting constant into global variables. 68*0b57cec5SDimitry Andric /// The motivating example is: 69*0b57cec5SDimitry Andric /// static const uint16_t TableA[32] = { 70*0b57cec5SDimitry Andric /// 41944, 40330, 38837, 37450, 36158, 34953, 33826, 32768, 71*0b57cec5SDimitry Andric /// 31776, 30841, 29960, 29128, 28340, 27595, 26887, 26215, 72*0b57cec5SDimitry Andric /// 25576, 24967, 24386, 23832, 23302, 22796, 22311, 21846, 73*0b57cec5SDimitry Andric /// 21400, 20972, 20561, 20165, 19785, 19419, 19066, 18725, 74*0b57cec5SDimitry Andric /// }; 75*0b57cec5SDimitry Andric /// 76*0b57cec5SDimitry Andric /// uint8x16x4_t LoadStatic(void) { 77*0b57cec5SDimitry Andric /// uint8x16x4_t ret; 78*0b57cec5SDimitry Andric /// ret.val[0] = vld1q_u16(TableA + 0); 79*0b57cec5SDimitry Andric /// ret.val[1] = vld1q_u16(TableA + 8); 80*0b57cec5SDimitry Andric /// ret.val[2] = vld1q_u16(TableA + 16); 81*0b57cec5SDimitry Andric /// ret.val[3] = vld1q_u16(TableA + 24); 82*0b57cec5SDimitry Andric /// return ret; 83*0b57cec5SDimitry Andric /// } 84*0b57cec5SDimitry Andric /// 85*0b57cec5SDimitry Andric /// The constants in this example are folded into the uses. Thus, 4 different 86*0b57cec5SDimitry Andric /// constants are created. 87*0b57cec5SDimitry Andric /// 88*0b57cec5SDimitry Andric /// As their type is vector the cheapest way to create them is to load them 89*0b57cec5SDimitry Andric /// for the memory. 90*0b57cec5SDimitry Andric /// 91*0b57cec5SDimitry Andric /// Therefore the final assembly final has 4 different loads. With this pass 92*0b57cec5SDimitry Andric /// enabled, only one load is issued for the constants. 93*0b57cec5SDimitry Andric class AArch64PromoteConstant : public ModulePass { 94*0b57cec5SDimitry Andric public: 95*0b57cec5SDimitry Andric struct PromotedConstant { 96*0b57cec5SDimitry Andric bool ShouldConvert = false; 97*0b57cec5SDimitry Andric GlobalVariable *GV = nullptr; 98*0b57cec5SDimitry Andric }; 99*0b57cec5SDimitry Andric using PromotionCacheTy = SmallDenseMap<Constant *, PromotedConstant, 16>; 100*0b57cec5SDimitry Andric 101*0b57cec5SDimitry Andric struct UpdateRecord { 102*0b57cec5SDimitry Andric Constant *C; 103*0b57cec5SDimitry Andric Instruction *User; 104*0b57cec5SDimitry Andric unsigned Op; 105*0b57cec5SDimitry Andric 106*0b57cec5SDimitry Andric UpdateRecord(Constant *C, Instruction *User, unsigned Op) 107*0b57cec5SDimitry Andric : C(C), User(User), Op(Op) {} 108*0b57cec5SDimitry Andric }; 109*0b57cec5SDimitry Andric 110*0b57cec5SDimitry Andric static char ID; 111*0b57cec5SDimitry Andric 112*0b57cec5SDimitry Andric AArch64PromoteConstant() : ModulePass(ID) { 113*0b57cec5SDimitry Andric initializeAArch64PromoteConstantPass(*PassRegistry::getPassRegistry()); 114*0b57cec5SDimitry Andric } 115*0b57cec5SDimitry Andric 116*0b57cec5SDimitry Andric StringRef getPassName() const override { return "AArch64 Promote Constant"; } 117*0b57cec5SDimitry Andric 118*0b57cec5SDimitry Andric /// Iterate over the functions and promote the interesting constants into 119*0b57cec5SDimitry Andric /// global variables with module scope. 120*0b57cec5SDimitry Andric bool runOnModule(Module &M) override { 121*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << getPassName() << '\n'); 122*0b57cec5SDimitry Andric if (skipModule(M)) 123*0b57cec5SDimitry Andric return false; 124*0b57cec5SDimitry Andric bool Changed = false; 125*0b57cec5SDimitry Andric PromotionCacheTy PromotionCache; 126*0b57cec5SDimitry Andric for (auto &MF : M) { 127*0b57cec5SDimitry Andric Changed |= runOnFunction(MF, PromotionCache); 128*0b57cec5SDimitry Andric } 129*0b57cec5SDimitry Andric return Changed; 130*0b57cec5SDimitry Andric } 131*0b57cec5SDimitry Andric 132*0b57cec5SDimitry Andric private: 133*0b57cec5SDimitry Andric /// Look for interesting constants used within the given function. 134*0b57cec5SDimitry Andric /// Promote them into global variables, load these global variables within 135*0b57cec5SDimitry Andric /// the related function, so that the number of inserted load is minimal. 136*0b57cec5SDimitry Andric bool runOnFunction(Function &F, PromotionCacheTy &PromotionCache); 137*0b57cec5SDimitry Andric 138*0b57cec5SDimitry Andric // This transformation requires dominator info 139*0b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 140*0b57cec5SDimitry Andric AU.setPreservesCFG(); 141*0b57cec5SDimitry Andric AU.addRequired<DominatorTreeWrapperPass>(); 142*0b57cec5SDimitry Andric AU.addPreserved<DominatorTreeWrapperPass>(); 143*0b57cec5SDimitry Andric } 144*0b57cec5SDimitry Andric 145*0b57cec5SDimitry Andric /// Type to store a list of Uses. 146*0b57cec5SDimitry Andric using Uses = SmallVector<std::pair<Instruction *, unsigned>, 4>; 147*0b57cec5SDimitry Andric /// Map an insertion point to all the uses it dominates. 148*0b57cec5SDimitry Andric using InsertionPoints = DenseMap<Instruction *, Uses>; 149*0b57cec5SDimitry Andric 150*0b57cec5SDimitry Andric /// Find the closest point that dominates the given Use. 151*0b57cec5SDimitry Andric Instruction *findInsertionPoint(Instruction &User, unsigned OpNo); 152*0b57cec5SDimitry Andric 153*0b57cec5SDimitry Andric /// Check if the given insertion point is dominated by an existing 154*0b57cec5SDimitry Andric /// insertion point. 155*0b57cec5SDimitry Andric /// If true, the given use is added to the list of dominated uses for 156*0b57cec5SDimitry Andric /// the related existing point. 157*0b57cec5SDimitry Andric /// \param NewPt the insertion point to be checked 158*0b57cec5SDimitry Andric /// \param User the user of the constant 159*0b57cec5SDimitry Andric /// \param OpNo the operand number of the use 160*0b57cec5SDimitry Andric /// \param InsertPts existing insertion points 161*0b57cec5SDimitry Andric /// \pre NewPt and all instruction in InsertPts belong to the same function 162*0b57cec5SDimitry Andric /// \return true if one of the insertion point in InsertPts dominates NewPt, 163*0b57cec5SDimitry Andric /// false otherwise 164*0b57cec5SDimitry Andric bool isDominated(Instruction *NewPt, Instruction *User, unsigned OpNo, 165*0b57cec5SDimitry Andric InsertionPoints &InsertPts); 166*0b57cec5SDimitry Andric 167*0b57cec5SDimitry Andric /// Check if the given insertion point can be merged with an existing 168*0b57cec5SDimitry Andric /// insertion point in a common dominator. 169*0b57cec5SDimitry Andric /// If true, the given use is added to the list of the created insertion 170*0b57cec5SDimitry Andric /// point. 171*0b57cec5SDimitry Andric /// \param NewPt the insertion point to be checked 172*0b57cec5SDimitry Andric /// \param User the user of the constant 173*0b57cec5SDimitry Andric /// \param OpNo the operand number of the use 174*0b57cec5SDimitry Andric /// \param InsertPts existing insertion points 175*0b57cec5SDimitry Andric /// \pre NewPt and all instruction in InsertPts belong to the same function 176*0b57cec5SDimitry Andric /// \pre isDominated returns false for the exact same parameters. 177*0b57cec5SDimitry Andric /// \return true if it exists an insertion point in InsertPts that could 178*0b57cec5SDimitry Andric /// have been merged with NewPt in a common dominator, 179*0b57cec5SDimitry Andric /// false otherwise 180*0b57cec5SDimitry Andric bool tryAndMerge(Instruction *NewPt, Instruction *User, unsigned OpNo, 181*0b57cec5SDimitry Andric InsertionPoints &InsertPts); 182*0b57cec5SDimitry Andric 183*0b57cec5SDimitry Andric /// Compute the minimal insertion points to dominates all the interesting 184*0b57cec5SDimitry Andric /// uses of value. 185*0b57cec5SDimitry Andric /// Insertion points are group per function and each insertion point 186*0b57cec5SDimitry Andric /// contains a list of all the uses it dominates within the related function 187*0b57cec5SDimitry Andric /// \param User the user of the constant 188*0b57cec5SDimitry Andric /// \param OpNo the operand number of the constant 189*0b57cec5SDimitry Andric /// \param[out] InsertPts output storage of the analysis 190*0b57cec5SDimitry Andric void computeInsertionPoint(Instruction *User, unsigned OpNo, 191*0b57cec5SDimitry Andric InsertionPoints &InsertPts); 192*0b57cec5SDimitry Andric 193*0b57cec5SDimitry Andric /// Insert a definition of a new global variable at each point contained in 194*0b57cec5SDimitry Andric /// InsPtsPerFunc and update the related uses (also contained in 195*0b57cec5SDimitry Andric /// InsPtsPerFunc). 196*0b57cec5SDimitry Andric void insertDefinitions(Function &F, GlobalVariable &GV, 197*0b57cec5SDimitry Andric InsertionPoints &InsertPts); 198*0b57cec5SDimitry Andric 199*0b57cec5SDimitry Andric /// Do the constant promotion indicated by the Updates records, keeping track 200*0b57cec5SDimitry Andric /// of globals in PromotionCache. 201*0b57cec5SDimitry Andric void promoteConstants(Function &F, SmallVectorImpl<UpdateRecord> &Updates, 202*0b57cec5SDimitry Andric PromotionCacheTy &PromotionCache); 203*0b57cec5SDimitry Andric 204*0b57cec5SDimitry Andric /// Transfer the list of dominated uses of IPI to NewPt in InsertPts. 205*0b57cec5SDimitry Andric /// Append Use to this list and delete the entry of IPI in InsertPts. 206*0b57cec5SDimitry Andric static void appendAndTransferDominatedUses(Instruction *NewPt, 207*0b57cec5SDimitry Andric Instruction *User, unsigned OpNo, 208*0b57cec5SDimitry Andric InsertionPoints::iterator &IPI, 209*0b57cec5SDimitry Andric InsertionPoints &InsertPts) { 210*0b57cec5SDimitry Andric // Record the dominated use. 211*0b57cec5SDimitry Andric IPI->second.emplace_back(User, OpNo); 212*0b57cec5SDimitry Andric // Transfer the dominated uses of IPI to NewPt 213*0b57cec5SDimitry Andric // Inserting into the DenseMap may invalidate existing iterator. 214*0b57cec5SDimitry Andric // Keep a copy of the key to find the iterator to erase. Keep a copy of the 215*0b57cec5SDimitry Andric // value so that we don't have to dereference IPI->second. 216*0b57cec5SDimitry Andric Instruction *OldInstr = IPI->first; 217*0b57cec5SDimitry Andric Uses OldUses = std::move(IPI->second); 218*0b57cec5SDimitry Andric InsertPts[NewPt] = std::move(OldUses); 219*0b57cec5SDimitry Andric // Erase IPI. 220*0b57cec5SDimitry Andric InsertPts.erase(OldInstr); 221*0b57cec5SDimitry Andric } 222*0b57cec5SDimitry Andric }; 223*0b57cec5SDimitry Andric 224*0b57cec5SDimitry Andric } // end anonymous namespace 225*0b57cec5SDimitry Andric 226*0b57cec5SDimitry Andric char AArch64PromoteConstant::ID = 0; 227*0b57cec5SDimitry Andric 228*0b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(AArch64PromoteConstant, "aarch64-promote-const", 229*0b57cec5SDimitry Andric "AArch64 Promote Constant Pass", false, false) 230*0b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 231*0b57cec5SDimitry Andric INITIALIZE_PASS_END(AArch64PromoteConstant, "aarch64-promote-const", 232*0b57cec5SDimitry Andric "AArch64 Promote Constant Pass", false, false) 233*0b57cec5SDimitry Andric 234*0b57cec5SDimitry Andric ModulePass *llvm::createAArch64PromoteConstantPass() { 235*0b57cec5SDimitry Andric return new AArch64PromoteConstant(); 236*0b57cec5SDimitry Andric } 237*0b57cec5SDimitry Andric 238*0b57cec5SDimitry Andric /// Check if the given type uses a vector type. 239*0b57cec5SDimitry Andric static bool isConstantUsingVectorTy(const Type *CstTy) { 240*0b57cec5SDimitry Andric if (CstTy->isVectorTy()) 241*0b57cec5SDimitry Andric return true; 242*0b57cec5SDimitry Andric if (CstTy->isStructTy()) { 243*0b57cec5SDimitry Andric for (unsigned EltIdx = 0, EndEltIdx = CstTy->getStructNumElements(); 244*0b57cec5SDimitry Andric EltIdx < EndEltIdx; ++EltIdx) 245*0b57cec5SDimitry Andric if (isConstantUsingVectorTy(CstTy->getStructElementType(EltIdx))) 246*0b57cec5SDimitry Andric return true; 247*0b57cec5SDimitry Andric } else if (CstTy->isArrayTy()) 248*0b57cec5SDimitry Andric return isConstantUsingVectorTy(CstTy->getArrayElementType()); 249*0b57cec5SDimitry Andric return false; 250*0b57cec5SDimitry Andric } 251*0b57cec5SDimitry Andric 252*0b57cec5SDimitry Andric /// Check if the given use (Instruction + OpIdx) of Cst should be converted into 253*0b57cec5SDimitry Andric /// a load of a global variable initialized with Cst. 254*0b57cec5SDimitry Andric /// A use should be converted if it is legal to do so. 255*0b57cec5SDimitry Andric /// For instance, it is not legal to turn the mask operand of a shuffle vector 256*0b57cec5SDimitry Andric /// into a load of a global variable. 257*0b57cec5SDimitry Andric static bool shouldConvertUse(const Constant *Cst, const Instruction *Instr, 258*0b57cec5SDimitry Andric unsigned OpIdx) { 259*0b57cec5SDimitry Andric // shufflevector instruction expects a const for the mask argument, i.e., the 260*0b57cec5SDimitry Andric // third argument. Do not promote this use in that case. 261*0b57cec5SDimitry Andric if (isa<const ShuffleVectorInst>(Instr) && OpIdx == 2) 262*0b57cec5SDimitry Andric return false; 263*0b57cec5SDimitry Andric 264*0b57cec5SDimitry Andric // extractvalue instruction expects a const idx. 265*0b57cec5SDimitry Andric if (isa<const ExtractValueInst>(Instr) && OpIdx > 0) 266*0b57cec5SDimitry Andric return false; 267*0b57cec5SDimitry Andric 268*0b57cec5SDimitry Andric // extractvalue instruction expects a const idx. 269*0b57cec5SDimitry Andric if (isa<const InsertValueInst>(Instr) && OpIdx > 1) 270*0b57cec5SDimitry Andric return false; 271*0b57cec5SDimitry Andric 272*0b57cec5SDimitry Andric if (isa<const AllocaInst>(Instr) && OpIdx > 0) 273*0b57cec5SDimitry Andric return false; 274*0b57cec5SDimitry Andric 275*0b57cec5SDimitry Andric // Alignment argument must be constant. 276*0b57cec5SDimitry Andric if (isa<const LoadInst>(Instr) && OpIdx > 0) 277*0b57cec5SDimitry Andric return false; 278*0b57cec5SDimitry Andric 279*0b57cec5SDimitry Andric // Alignment argument must be constant. 280*0b57cec5SDimitry Andric if (isa<const StoreInst>(Instr) && OpIdx > 1) 281*0b57cec5SDimitry Andric return false; 282*0b57cec5SDimitry Andric 283*0b57cec5SDimitry Andric // Index must be constant. 284*0b57cec5SDimitry Andric if (isa<const GetElementPtrInst>(Instr) && OpIdx > 0) 285*0b57cec5SDimitry Andric return false; 286*0b57cec5SDimitry Andric 287*0b57cec5SDimitry Andric // Personality function and filters must be constant. 288*0b57cec5SDimitry Andric // Give up on that instruction. 289*0b57cec5SDimitry Andric if (isa<const LandingPadInst>(Instr)) 290*0b57cec5SDimitry Andric return false; 291*0b57cec5SDimitry Andric 292*0b57cec5SDimitry Andric // Switch instruction expects constants to compare to. 293*0b57cec5SDimitry Andric if (isa<const SwitchInst>(Instr)) 294*0b57cec5SDimitry Andric return false; 295*0b57cec5SDimitry Andric 296*0b57cec5SDimitry Andric // Expected address must be a constant. 297*0b57cec5SDimitry Andric if (isa<const IndirectBrInst>(Instr)) 298*0b57cec5SDimitry Andric return false; 299*0b57cec5SDimitry Andric 300*0b57cec5SDimitry Andric // Do not mess with intrinsics. 301*0b57cec5SDimitry Andric if (isa<const IntrinsicInst>(Instr)) 302*0b57cec5SDimitry Andric return false; 303*0b57cec5SDimitry Andric 304*0b57cec5SDimitry Andric // Do not mess with inline asm. 305*0b57cec5SDimitry Andric const CallInst *CI = dyn_cast<const CallInst>(Instr); 306*0b57cec5SDimitry Andric return !(CI && isa<const InlineAsm>(CI->getCalledValue())); 307*0b57cec5SDimitry Andric } 308*0b57cec5SDimitry Andric 309*0b57cec5SDimitry Andric /// Check if the given Cst should be converted into 310*0b57cec5SDimitry Andric /// a load of a global variable initialized with Cst. 311*0b57cec5SDimitry Andric /// A constant should be converted if it is likely that the materialization of 312*0b57cec5SDimitry Andric /// the constant will be tricky. Thus, we give up on zero or undef values. 313*0b57cec5SDimitry Andric /// 314*0b57cec5SDimitry Andric /// \todo Currently, accept only vector related types. 315*0b57cec5SDimitry Andric /// Also we give up on all simple vector type to keep the existing 316*0b57cec5SDimitry Andric /// behavior. Otherwise, we should push here all the check of the lowering of 317*0b57cec5SDimitry Andric /// BUILD_VECTOR. By giving up, we lose the potential benefit of merging 318*0b57cec5SDimitry Andric /// constant via global merge and the fact that the same constant is stored 319*0b57cec5SDimitry Andric /// only once with this method (versus, as many function that uses the constant 320*0b57cec5SDimitry Andric /// for the regular approach, even for float). 321*0b57cec5SDimitry Andric /// Again, the simplest solution would be to promote every 322*0b57cec5SDimitry Andric /// constant and rematerialize them when they are actually cheap to create. 323*0b57cec5SDimitry Andric static bool shouldConvertImpl(const Constant *Cst) { 324*0b57cec5SDimitry Andric if (isa<const UndefValue>(Cst)) 325*0b57cec5SDimitry Andric return false; 326*0b57cec5SDimitry Andric 327*0b57cec5SDimitry Andric // FIXME: In some cases, it may be interesting to promote in memory 328*0b57cec5SDimitry Andric // a zero initialized constant. 329*0b57cec5SDimitry Andric // E.g., when the type of Cst require more instructions than the 330*0b57cec5SDimitry Andric // adrp/add/load sequence or when this sequence can be shared by several 331*0b57cec5SDimitry Andric // instances of Cst. 332*0b57cec5SDimitry Andric // Ideally, we could promote this into a global and rematerialize the constant 333*0b57cec5SDimitry Andric // when it was a bad idea. 334*0b57cec5SDimitry Andric if (Cst->isZeroValue()) 335*0b57cec5SDimitry Andric return false; 336*0b57cec5SDimitry Andric 337*0b57cec5SDimitry Andric if (Stress) 338*0b57cec5SDimitry Andric return true; 339*0b57cec5SDimitry Andric 340*0b57cec5SDimitry Andric // FIXME: see function \todo 341*0b57cec5SDimitry Andric if (Cst->getType()->isVectorTy()) 342*0b57cec5SDimitry Andric return false; 343*0b57cec5SDimitry Andric return isConstantUsingVectorTy(Cst->getType()); 344*0b57cec5SDimitry Andric } 345*0b57cec5SDimitry Andric 346*0b57cec5SDimitry Andric static bool 347*0b57cec5SDimitry Andric shouldConvert(Constant &C, 348*0b57cec5SDimitry Andric AArch64PromoteConstant::PromotionCacheTy &PromotionCache) { 349*0b57cec5SDimitry Andric auto Converted = PromotionCache.insert( 350*0b57cec5SDimitry Andric std::make_pair(&C, AArch64PromoteConstant::PromotedConstant())); 351*0b57cec5SDimitry Andric if (Converted.second) 352*0b57cec5SDimitry Andric Converted.first->second.ShouldConvert = shouldConvertImpl(&C); 353*0b57cec5SDimitry Andric return Converted.first->second.ShouldConvert; 354*0b57cec5SDimitry Andric } 355*0b57cec5SDimitry Andric 356*0b57cec5SDimitry Andric Instruction *AArch64PromoteConstant::findInsertionPoint(Instruction &User, 357*0b57cec5SDimitry Andric unsigned OpNo) { 358*0b57cec5SDimitry Andric // If this user is a phi, the insertion point is in the related 359*0b57cec5SDimitry Andric // incoming basic block. 360*0b57cec5SDimitry Andric if (PHINode *PhiInst = dyn_cast<PHINode>(&User)) 361*0b57cec5SDimitry Andric return PhiInst->getIncomingBlock(OpNo)->getTerminator(); 362*0b57cec5SDimitry Andric 363*0b57cec5SDimitry Andric return &User; 364*0b57cec5SDimitry Andric } 365*0b57cec5SDimitry Andric 366*0b57cec5SDimitry Andric bool AArch64PromoteConstant::isDominated(Instruction *NewPt, Instruction *User, 367*0b57cec5SDimitry Andric unsigned OpNo, 368*0b57cec5SDimitry Andric InsertionPoints &InsertPts) { 369*0b57cec5SDimitry Andric DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>( 370*0b57cec5SDimitry Andric *NewPt->getParent()->getParent()).getDomTree(); 371*0b57cec5SDimitry Andric 372*0b57cec5SDimitry Andric // Traverse all the existing insertion points and check if one is dominating 373*0b57cec5SDimitry Andric // NewPt. If it is, remember that. 374*0b57cec5SDimitry Andric for (auto &IPI : InsertPts) { 375*0b57cec5SDimitry Andric if (NewPt == IPI.first || DT.dominates(IPI.first, NewPt) || 376*0b57cec5SDimitry Andric // When IPI.first is a terminator instruction, DT may think that 377*0b57cec5SDimitry Andric // the result is defined on the edge. 378*0b57cec5SDimitry Andric // Here we are testing the insertion point, not the definition. 379*0b57cec5SDimitry Andric (IPI.first->getParent() != NewPt->getParent() && 380*0b57cec5SDimitry Andric DT.dominates(IPI.first->getParent(), NewPt->getParent()))) { 381*0b57cec5SDimitry Andric // No need to insert this point. Just record the dominated use. 382*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Insertion point dominated by:\n"); 383*0b57cec5SDimitry Andric LLVM_DEBUG(IPI.first->print(dbgs())); 384*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << '\n'); 385*0b57cec5SDimitry Andric IPI.second.emplace_back(User, OpNo); 386*0b57cec5SDimitry Andric return true; 387*0b57cec5SDimitry Andric } 388*0b57cec5SDimitry Andric } 389*0b57cec5SDimitry Andric return false; 390*0b57cec5SDimitry Andric } 391*0b57cec5SDimitry Andric 392*0b57cec5SDimitry Andric bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt, Instruction *User, 393*0b57cec5SDimitry Andric unsigned OpNo, 394*0b57cec5SDimitry Andric InsertionPoints &InsertPts) { 395*0b57cec5SDimitry Andric DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>( 396*0b57cec5SDimitry Andric *NewPt->getParent()->getParent()).getDomTree(); 397*0b57cec5SDimitry Andric BasicBlock *NewBB = NewPt->getParent(); 398*0b57cec5SDimitry Andric 399*0b57cec5SDimitry Andric // Traverse all the existing insertion point and check if one is dominated by 400*0b57cec5SDimitry Andric // NewPt and thus useless or can be combined with NewPt into a common 401*0b57cec5SDimitry Andric // dominator. 402*0b57cec5SDimitry Andric for (InsertionPoints::iterator IPI = InsertPts.begin(), 403*0b57cec5SDimitry Andric EndIPI = InsertPts.end(); 404*0b57cec5SDimitry Andric IPI != EndIPI; ++IPI) { 405*0b57cec5SDimitry Andric BasicBlock *CurBB = IPI->first->getParent(); 406*0b57cec5SDimitry Andric if (NewBB == CurBB) { 407*0b57cec5SDimitry Andric // Instructions are in the same block. 408*0b57cec5SDimitry Andric // By construction, NewPt is dominating the other. 409*0b57cec5SDimitry Andric // Indeed, isDominated returned false with the exact same arguments. 410*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Merge insertion point with:\n"); 411*0b57cec5SDimitry Andric LLVM_DEBUG(IPI->first->print(dbgs())); 412*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\nat considered insertion point.\n"); 413*0b57cec5SDimitry Andric appendAndTransferDominatedUses(NewPt, User, OpNo, IPI, InsertPts); 414*0b57cec5SDimitry Andric return true; 415*0b57cec5SDimitry Andric } 416*0b57cec5SDimitry Andric 417*0b57cec5SDimitry Andric // Look for a common dominator 418*0b57cec5SDimitry Andric BasicBlock *CommonDominator = DT.findNearestCommonDominator(NewBB, CurBB); 419*0b57cec5SDimitry Andric // If none exists, we cannot merge these two points. 420*0b57cec5SDimitry Andric if (!CommonDominator) 421*0b57cec5SDimitry Andric continue; 422*0b57cec5SDimitry Andric 423*0b57cec5SDimitry Andric if (CommonDominator != NewBB) { 424*0b57cec5SDimitry Andric // By construction, the CommonDominator cannot be CurBB. 425*0b57cec5SDimitry Andric assert(CommonDominator != CurBB && 426*0b57cec5SDimitry Andric "Instruction has not been rejected during isDominated check!"); 427*0b57cec5SDimitry Andric // Take the last instruction of the CommonDominator as insertion point 428*0b57cec5SDimitry Andric NewPt = CommonDominator->getTerminator(); 429*0b57cec5SDimitry Andric } 430*0b57cec5SDimitry Andric // else, CommonDominator is the block of NewBB, hence NewBB is the last 431*0b57cec5SDimitry Andric // possible insertion point in that block. 432*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Merge insertion point with:\n"); 433*0b57cec5SDimitry Andric LLVM_DEBUG(IPI->first->print(dbgs())); 434*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << '\n'); 435*0b57cec5SDimitry Andric LLVM_DEBUG(NewPt->print(dbgs())); 436*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << '\n'); 437*0b57cec5SDimitry Andric appendAndTransferDominatedUses(NewPt, User, OpNo, IPI, InsertPts); 438*0b57cec5SDimitry Andric return true; 439*0b57cec5SDimitry Andric } 440*0b57cec5SDimitry Andric return false; 441*0b57cec5SDimitry Andric } 442*0b57cec5SDimitry Andric 443*0b57cec5SDimitry Andric void AArch64PromoteConstant::computeInsertionPoint( 444*0b57cec5SDimitry Andric Instruction *User, unsigned OpNo, InsertionPoints &InsertPts) { 445*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Considered use, opidx " << OpNo << ":\n"); 446*0b57cec5SDimitry Andric LLVM_DEBUG(User->print(dbgs())); 447*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << '\n'); 448*0b57cec5SDimitry Andric 449*0b57cec5SDimitry Andric Instruction *InsertionPoint = findInsertionPoint(*User, OpNo); 450*0b57cec5SDimitry Andric 451*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Considered insertion point:\n"); 452*0b57cec5SDimitry Andric LLVM_DEBUG(InsertionPoint->print(dbgs())); 453*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << '\n'); 454*0b57cec5SDimitry Andric 455*0b57cec5SDimitry Andric if (isDominated(InsertionPoint, User, OpNo, InsertPts)) 456*0b57cec5SDimitry Andric return; 457*0b57cec5SDimitry Andric // This insertion point is useful, check if we can merge some insertion 458*0b57cec5SDimitry Andric // point in a common dominator or if NewPt dominates an existing one. 459*0b57cec5SDimitry Andric if (tryAndMerge(InsertionPoint, User, OpNo, InsertPts)) 460*0b57cec5SDimitry Andric return; 461*0b57cec5SDimitry Andric 462*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Keep considered insertion point\n"); 463*0b57cec5SDimitry Andric 464*0b57cec5SDimitry Andric // It is definitely useful by its own 465*0b57cec5SDimitry Andric InsertPts[InsertionPoint].emplace_back(User, OpNo); 466*0b57cec5SDimitry Andric } 467*0b57cec5SDimitry Andric 468*0b57cec5SDimitry Andric static void ensurePromotedGV(Function &F, Constant &C, 469*0b57cec5SDimitry Andric AArch64PromoteConstant::PromotedConstant &PC) { 470*0b57cec5SDimitry Andric assert(PC.ShouldConvert && 471*0b57cec5SDimitry Andric "Expected that we should convert this to a global"); 472*0b57cec5SDimitry Andric if (PC.GV) 473*0b57cec5SDimitry Andric return; 474*0b57cec5SDimitry Andric PC.GV = new GlobalVariable( 475*0b57cec5SDimitry Andric *F.getParent(), C.getType(), true, GlobalValue::InternalLinkage, nullptr, 476*0b57cec5SDimitry Andric "_PromotedConst", nullptr, GlobalVariable::NotThreadLocal); 477*0b57cec5SDimitry Andric PC.GV->setInitializer(&C); 478*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Global replacement: "); 479*0b57cec5SDimitry Andric LLVM_DEBUG(PC.GV->print(dbgs())); 480*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << '\n'); 481*0b57cec5SDimitry Andric ++NumPromoted; 482*0b57cec5SDimitry Andric } 483*0b57cec5SDimitry Andric 484*0b57cec5SDimitry Andric void AArch64PromoteConstant::insertDefinitions(Function &F, 485*0b57cec5SDimitry Andric GlobalVariable &PromotedGV, 486*0b57cec5SDimitry Andric InsertionPoints &InsertPts) { 487*0b57cec5SDimitry Andric #ifndef NDEBUG 488*0b57cec5SDimitry Andric // Do more checking for debug purposes. 489*0b57cec5SDimitry Andric DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); 490*0b57cec5SDimitry Andric #endif 491*0b57cec5SDimitry Andric assert(!InsertPts.empty() && "Empty uses does not need a definition"); 492*0b57cec5SDimitry Andric 493*0b57cec5SDimitry Andric for (const auto &IPI : InsertPts) { 494*0b57cec5SDimitry Andric // Create the load of the global variable. 495*0b57cec5SDimitry Andric IRBuilder<> Builder(IPI.first); 496*0b57cec5SDimitry Andric LoadInst *LoadedCst = 497*0b57cec5SDimitry Andric Builder.CreateLoad(PromotedGV.getValueType(), &PromotedGV); 498*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "**********\n"); 499*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "New def: "); 500*0b57cec5SDimitry Andric LLVM_DEBUG(LoadedCst->print(dbgs())); 501*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << '\n'); 502*0b57cec5SDimitry Andric 503*0b57cec5SDimitry Andric // Update the dominated uses. 504*0b57cec5SDimitry Andric for (auto Use : IPI.second) { 505*0b57cec5SDimitry Andric #ifndef NDEBUG 506*0b57cec5SDimitry Andric assert(DT.dominates(LoadedCst, 507*0b57cec5SDimitry Andric findInsertionPoint(*Use.first, Use.second)) && 508*0b57cec5SDimitry Andric "Inserted definition does not dominate all its uses!"); 509*0b57cec5SDimitry Andric #endif 510*0b57cec5SDimitry Andric LLVM_DEBUG({ 511*0b57cec5SDimitry Andric dbgs() << "Use to update " << Use.second << ":"; 512*0b57cec5SDimitry Andric Use.first->print(dbgs()); 513*0b57cec5SDimitry Andric dbgs() << '\n'; 514*0b57cec5SDimitry Andric }); 515*0b57cec5SDimitry Andric Use.first->setOperand(Use.second, LoadedCst); 516*0b57cec5SDimitry Andric ++NumPromotedUses; 517*0b57cec5SDimitry Andric } 518*0b57cec5SDimitry Andric } 519*0b57cec5SDimitry Andric } 520*0b57cec5SDimitry Andric 521*0b57cec5SDimitry Andric void AArch64PromoteConstant::promoteConstants( 522*0b57cec5SDimitry Andric Function &F, SmallVectorImpl<UpdateRecord> &Updates, 523*0b57cec5SDimitry Andric PromotionCacheTy &PromotionCache) { 524*0b57cec5SDimitry Andric // Promote the constants. 525*0b57cec5SDimitry Andric for (auto U = Updates.begin(), E = Updates.end(); U != E;) { 526*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "** Compute insertion points **\n"); 527*0b57cec5SDimitry Andric auto First = U; 528*0b57cec5SDimitry Andric Constant *C = First->C; 529*0b57cec5SDimitry Andric InsertionPoints InsertPts; 530*0b57cec5SDimitry Andric do { 531*0b57cec5SDimitry Andric computeInsertionPoint(U->User, U->Op, InsertPts); 532*0b57cec5SDimitry Andric } while (++U != E && U->C == C); 533*0b57cec5SDimitry Andric 534*0b57cec5SDimitry Andric auto &Promotion = PromotionCache[C]; 535*0b57cec5SDimitry Andric ensurePromotedGV(F, *C, Promotion); 536*0b57cec5SDimitry Andric insertDefinitions(F, *Promotion.GV, InsertPts); 537*0b57cec5SDimitry Andric } 538*0b57cec5SDimitry Andric } 539*0b57cec5SDimitry Andric 540*0b57cec5SDimitry Andric bool AArch64PromoteConstant::runOnFunction(Function &F, 541*0b57cec5SDimitry Andric PromotionCacheTy &PromotionCache) { 542*0b57cec5SDimitry Andric // Look for instructions using constant vector. Promote that constant to a 543*0b57cec5SDimitry Andric // global variable. Create as few loads of this variable as possible and 544*0b57cec5SDimitry Andric // update the uses accordingly. 545*0b57cec5SDimitry Andric SmallVector<UpdateRecord, 64> Updates; 546*0b57cec5SDimitry Andric for (Instruction &I : instructions(&F)) { 547*0b57cec5SDimitry Andric // Traverse the operand, looking for constant vectors. Replace them by a 548*0b57cec5SDimitry Andric // load of a global variable of constant vector type. 549*0b57cec5SDimitry Andric for (Use &U : I.operands()) { 550*0b57cec5SDimitry Andric Constant *Cst = dyn_cast<Constant>(U); 551*0b57cec5SDimitry Andric // There is no point in promoting global values as they are already 552*0b57cec5SDimitry Andric // global. Do not promote constant expressions either, as they may 553*0b57cec5SDimitry Andric // require some code expansion. 554*0b57cec5SDimitry Andric if (!Cst || isa<GlobalValue>(Cst) || isa<ConstantExpr>(Cst)) 555*0b57cec5SDimitry Andric continue; 556*0b57cec5SDimitry Andric 557*0b57cec5SDimitry Andric // Check if this constant is worth promoting. 558*0b57cec5SDimitry Andric if (!shouldConvert(*Cst, PromotionCache)) 559*0b57cec5SDimitry Andric continue; 560*0b57cec5SDimitry Andric 561*0b57cec5SDimitry Andric // Check if this use should be promoted. 562*0b57cec5SDimitry Andric unsigned OpNo = &U - I.op_begin(); 563*0b57cec5SDimitry Andric if (!shouldConvertUse(Cst, &I, OpNo)) 564*0b57cec5SDimitry Andric continue; 565*0b57cec5SDimitry Andric 566*0b57cec5SDimitry Andric Updates.emplace_back(Cst, &I, OpNo); 567*0b57cec5SDimitry Andric } 568*0b57cec5SDimitry Andric } 569*0b57cec5SDimitry Andric 570*0b57cec5SDimitry Andric if (Updates.empty()) 571*0b57cec5SDimitry Andric return false; 572*0b57cec5SDimitry Andric 573*0b57cec5SDimitry Andric promoteConstants(F, Updates, PromotionCache); 574*0b57cec5SDimitry Andric return true; 575*0b57cec5SDimitry Andric } 576