10b57cec5SDimitry Andric //==- AArch64PromoteConstant.cpp - Promote constant to global for AArch64 --==// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file implements the AArch64PromoteConstant pass which promotes constants 100b57cec5SDimitry Andric // to global variables when this is likely to be more efficient. Currently only 110b57cec5SDimitry Andric // types related to constant vector (i.e., constant vector, array of constant 120b57cec5SDimitry Andric // vectors, constant structure with a constant vector field, etc.) are promoted 130b57cec5SDimitry Andric // to global variables. Constant vectors are likely to be lowered in target 140b57cec5SDimitry Andric // constant pool during instruction selection already; therefore, the access 150b57cec5SDimitry Andric // will remain the same (memory load), but the structure types are not split 160b57cec5SDimitry Andric // into different constant pool accesses for each field. A bonus side effect is 170b57cec5SDimitry Andric // that created globals may be merged by the global merge pass. 180b57cec5SDimitry Andric // 190b57cec5SDimitry Andric // FIXME: This pass may be useful for other targets too. 200b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 210b57cec5SDimitry Andric 220b57cec5SDimitry Andric #include "AArch64.h" 230b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h" 240b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 250b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 260b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h" 270b57cec5SDimitry Andric #include "llvm/IR/Constant.h" 280b57cec5SDimitry Andric #include "llvm/IR/Constants.h" 290b57cec5SDimitry Andric #include "llvm/IR/Dominators.h" 300b57cec5SDimitry Andric #include "llvm/IR/Function.h" 310b57cec5SDimitry Andric #include "llvm/IR/GlobalValue.h" 320b57cec5SDimitry Andric #include "llvm/IR/GlobalVariable.h" 330b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h" 340b57cec5SDimitry Andric #include "llvm/IR/InlineAsm.h" 350b57cec5SDimitry Andric #include "llvm/IR/InstIterator.h" 360b57cec5SDimitry Andric #include "llvm/IR/Instruction.h" 370b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 380b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 390b57cec5SDimitry Andric #include "llvm/IR/Module.h" 400b57cec5SDimitry Andric #include "llvm/IR/Type.h" 41*480093f4SDimitry Andric #include "llvm/InitializePasses.h" 420b57cec5SDimitry Andric #include "llvm/Pass.h" 430b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 440b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 450b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 460b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 470b57cec5SDimitry Andric #include <algorithm> 480b57cec5SDimitry Andric #include <cassert> 490b57cec5SDimitry Andric #include <utility> 500b57cec5SDimitry Andric 510b57cec5SDimitry Andric using namespace llvm; 520b57cec5SDimitry Andric 530b57cec5SDimitry Andric #define DEBUG_TYPE "aarch64-promote-const" 540b57cec5SDimitry Andric 550b57cec5SDimitry Andric // Stress testing mode - disable heuristics. 560b57cec5SDimitry Andric static cl::opt<bool> Stress("aarch64-stress-promote-const", cl::Hidden, 570b57cec5SDimitry Andric cl::desc("Promote all vector constants")); 580b57cec5SDimitry Andric 590b57cec5SDimitry Andric STATISTIC(NumPromoted, "Number of promoted constants"); 600b57cec5SDimitry Andric STATISTIC(NumPromotedUses, "Number of promoted constants uses"); 610b57cec5SDimitry Andric 620b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 630b57cec5SDimitry Andric // AArch64PromoteConstant 640b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 650b57cec5SDimitry Andric 660b57cec5SDimitry Andric namespace { 670b57cec5SDimitry Andric 680b57cec5SDimitry Andric /// Promotes interesting constant into global variables. 690b57cec5SDimitry Andric /// The motivating example is: 700b57cec5SDimitry Andric /// static const uint16_t TableA[32] = { 710b57cec5SDimitry Andric /// 41944, 40330, 38837, 37450, 36158, 34953, 33826, 32768, 720b57cec5SDimitry Andric /// 31776, 30841, 29960, 29128, 28340, 27595, 26887, 26215, 730b57cec5SDimitry Andric /// 25576, 24967, 24386, 23832, 23302, 22796, 22311, 21846, 740b57cec5SDimitry Andric /// 21400, 20972, 20561, 20165, 19785, 19419, 19066, 18725, 750b57cec5SDimitry Andric /// }; 760b57cec5SDimitry Andric /// 770b57cec5SDimitry Andric /// uint8x16x4_t LoadStatic(void) { 780b57cec5SDimitry Andric /// uint8x16x4_t ret; 790b57cec5SDimitry Andric /// ret.val[0] = vld1q_u16(TableA + 0); 800b57cec5SDimitry Andric /// ret.val[1] = vld1q_u16(TableA + 8); 810b57cec5SDimitry Andric /// ret.val[2] = vld1q_u16(TableA + 16); 820b57cec5SDimitry Andric /// ret.val[3] = vld1q_u16(TableA + 24); 830b57cec5SDimitry Andric /// return ret; 840b57cec5SDimitry Andric /// } 850b57cec5SDimitry Andric /// 860b57cec5SDimitry Andric /// The constants in this example are folded into the uses. Thus, 4 different 870b57cec5SDimitry Andric /// constants are created. 880b57cec5SDimitry Andric /// 890b57cec5SDimitry Andric /// As their type is vector the cheapest way to create them is to load them 900b57cec5SDimitry Andric /// for the memory. 910b57cec5SDimitry Andric /// 920b57cec5SDimitry Andric /// Therefore the final assembly final has 4 different loads. With this pass 930b57cec5SDimitry Andric /// enabled, only one load is issued for the constants. 940b57cec5SDimitry Andric class AArch64PromoteConstant : public ModulePass { 950b57cec5SDimitry Andric public: 960b57cec5SDimitry Andric struct PromotedConstant { 970b57cec5SDimitry Andric bool ShouldConvert = false; 980b57cec5SDimitry Andric GlobalVariable *GV = nullptr; 990b57cec5SDimitry Andric }; 1000b57cec5SDimitry Andric using PromotionCacheTy = SmallDenseMap<Constant *, PromotedConstant, 16>; 1010b57cec5SDimitry Andric 1020b57cec5SDimitry Andric struct UpdateRecord { 1030b57cec5SDimitry Andric Constant *C; 1040b57cec5SDimitry Andric Instruction *User; 1050b57cec5SDimitry Andric unsigned Op; 1060b57cec5SDimitry Andric 1070b57cec5SDimitry Andric UpdateRecord(Constant *C, Instruction *User, unsigned Op) 1080b57cec5SDimitry Andric : C(C), User(User), Op(Op) {} 1090b57cec5SDimitry Andric }; 1100b57cec5SDimitry Andric 1110b57cec5SDimitry Andric static char ID; 1120b57cec5SDimitry Andric 1130b57cec5SDimitry Andric AArch64PromoteConstant() : ModulePass(ID) { 1140b57cec5SDimitry Andric initializeAArch64PromoteConstantPass(*PassRegistry::getPassRegistry()); 1150b57cec5SDimitry Andric } 1160b57cec5SDimitry Andric 1170b57cec5SDimitry Andric StringRef getPassName() const override { return "AArch64 Promote Constant"; } 1180b57cec5SDimitry Andric 1190b57cec5SDimitry Andric /// Iterate over the functions and promote the interesting constants into 1200b57cec5SDimitry Andric /// global variables with module scope. 1210b57cec5SDimitry Andric bool runOnModule(Module &M) override { 1220b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << getPassName() << '\n'); 1230b57cec5SDimitry Andric if (skipModule(M)) 1240b57cec5SDimitry Andric return false; 1250b57cec5SDimitry Andric bool Changed = false; 1260b57cec5SDimitry Andric PromotionCacheTy PromotionCache; 1270b57cec5SDimitry Andric for (auto &MF : M) { 1280b57cec5SDimitry Andric Changed |= runOnFunction(MF, PromotionCache); 1290b57cec5SDimitry Andric } 1300b57cec5SDimitry Andric return Changed; 1310b57cec5SDimitry Andric } 1320b57cec5SDimitry Andric 1330b57cec5SDimitry Andric private: 1340b57cec5SDimitry Andric /// Look for interesting constants used within the given function. 1350b57cec5SDimitry Andric /// Promote them into global variables, load these global variables within 1360b57cec5SDimitry Andric /// the related function, so that the number of inserted load is minimal. 1370b57cec5SDimitry Andric bool runOnFunction(Function &F, PromotionCacheTy &PromotionCache); 1380b57cec5SDimitry Andric 1390b57cec5SDimitry Andric // This transformation requires dominator info 1400b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 1410b57cec5SDimitry Andric AU.setPreservesCFG(); 1420b57cec5SDimitry Andric AU.addRequired<DominatorTreeWrapperPass>(); 1430b57cec5SDimitry Andric AU.addPreserved<DominatorTreeWrapperPass>(); 1440b57cec5SDimitry Andric } 1450b57cec5SDimitry Andric 1460b57cec5SDimitry Andric /// Type to store a list of Uses. 1470b57cec5SDimitry Andric using Uses = SmallVector<std::pair<Instruction *, unsigned>, 4>; 1480b57cec5SDimitry Andric /// Map an insertion point to all the uses it dominates. 1490b57cec5SDimitry Andric using InsertionPoints = DenseMap<Instruction *, Uses>; 1500b57cec5SDimitry Andric 1510b57cec5SDimitry Andric /// Find the closest point that dominates the given Use. 1520b57cec5SDimitry Andric Instruction *findInsertionPoint(Instruction &User, unsigned OpNo); 1530b57cec5SDimitry Andric 1540b57cec5SDimitry Andric /// Check if the given insertion point is dominated by an existing 1550b57cec5SDimitry Andric /// insertion point. 1560b57cec5SDimitry Andric /// If true, the given use is added to the list of dominated uses for 1570b57cec5SDimitry Andric /// the related existing point. 1580b57cec5SDimitry Andric /// \param NewPt the insertion point to be checked 1590b57cec5SDimitry Andric /// \param User the user of the constant 1600b57cec5SDimitry Andric /// \param OpNo the operand number of the use 1610b57cec5SDimitry Andric /// \param InsertPts existing insertion points 1620b57cec5SDimitry Andric /// \pre NewPt and all instruction in InsertPts belong to the same function 1630b57cec5SDimitry Andric /// \return true if one of the insertion point in InsertPts dominates NewPt, 1640b57cec5SDimitry Andric /// false otherwise 1650b57cec5SDimitry Andric bool isDominated(Instruction *NewPt, Instruction *User, unsigned OpNo, 1660b57cec5SDimitry Andric InsertionPoints &InsertPts); 1670b57cec5SDimitry Andric 1680b57cec5SDimitry Andric /// Check if the given insertion point can be merged with an existing 1690b57cec5SDimitry Andric /// insertion point in a common dominator. 1700b57cec5SDimitry Andric /// If true, the given use is added to the list of the created insertion 1710b57cec5SDimitry Andric /// point. 1720b57cec5SDimitry Andric /// \param NewPt the insertion point to be checked 1730b57cec5SDimitry Andric /// \param User the user of the constant 1740b57cec5SDimitry Andric /// \param OpNo the operand number of the use 1750b57cec5SDimitry Andric /// \param InsertPts existing insertion points 1760b57cec5SDimitry Andric /// \pre NewPt and all instruction in InsertPts belong to the same function 1770b57cec5SDimitry Andric /// \pre isDominated returns false for the exact same parameters. 1780b57cec5SDimitry Andric /// \return true if it exists an insertion point in InsertPts that could 1790b57cec5SDimitry Andric /// have been merged with NewPt in a common dominator, 1800b57cec5SDimitry Andric /// false otherwise 1810b57cec5SDimitry Andric bool tryAndMerge(Instruction *NewPt, Instruction *User, unsigned OpNo, 1820b57cec5SDimitry Andric InsertionPoints &InsertPts); 1830b57cec5SDimitry Andric 1840b57cec5SDimitry Andric /// Compute the minimal insertion points to dominates all the interesting 1850b57cec5SDimitry Andric /// uses of value. 1860b57cec5SDimitry Andric /// Insertion points are group per function and each insertion point 1870b57cec5SDimitry Andric /// contains a list of all the uses it dominates within the related function 1880b57cec5SDimitry Andric /// \param User the user of the constant 1890b57cec5SDimitry Andric /// \param OpNo the operand number of the constant 1900b57cec5SDimitry Andric /// \param[out] InsertPts output storage of the analysis 1910b57cec5SDimitry Andric void computeInsertionPoint(Instruction *User, unsigned OpNo, 1920b57cec5SDimitry Andric InsertionPoints &InsertPts); 1930b57cec5SDimitry Andric 1940b57cec5SDimitry Andric /// Insert a definition of a new global variable at each point contained in 1950b57cec5SDimitry Andric /// InsPtsPerFunc and update the related uses (also contained in 1960b57cec5SDimitry Andric /// InsPtsPerFunc). 1970b57cec5SDimitry Andric void insertDefinitions(Function &F, GlobalVariable &GV, 1980b57cec5SDimitry Andric InsertionPoints &InsertPts); 1990b57cec5SDimitry Andric 2000b57cec5SDimitry Andric /// Do the constant promotion indicated by the Updates records, keeping track 2010b57cec5SDimitry Andric /// of globals in PromotionCache. 2020b57cec5SDimitry Andric void promoteConstants(Function &F, SmallVectorImpl<UpdateRecord> &Updates, 2030b57cec5SDimitry Andric PromotionCacheTy &PromotionCache); 2040b57cec5SDimitry Andric 2050b57cec5SDimitry Andric /// Transfer the list of dominated uses of IPI to NewPt in InsertPts. 2060b57cec5SDimitry Andric /// Append Use to this list and delete the entry of IPI in InsertPts. 2070b57cec5SDimitry Andric static void appendAndTransferDominatedUses(Instruction *NewPt, 2080b57cec5SDimitry Andric Instruction *User, unsigned OpNo, 2090b57cec5SDimitry Andric InsertionPoints::iterator &IPI, 2100b57cec5SDimitry Andric InsertionPoints &InsertPts) { 2110b57cec5SDimitry Andric // Record the dominated use. 2120b57cec5SDimitry Andric IPI->second.emplace_back(User, OpNo); 2130b57cec5SDimitry Andric // Transfer the dominated uses of IPI to NewPt 2140b57cec5SDimitry Andric // Inserting into the DenseMap may invalidate existing iterator. 2150b57cec5SDimitry Andric // Keep a copy of the key to find the iterator to erase. Keep a copy of the 2160b57cec5SDimitry Andric // value so that we don't have to dereference IPI->second. 2170b57cec5SDimitry Andric Instruction *OldInstr = IPI->first; 2180b57cec5SDimitry Andric Uses OldUses = std::move(IPI->second); 2190b57cec5SDimitry Andric InsertPts[NewPt] = std::move(OldUses); 2200b57cec5SDimitry Andric // Erase IPI. 2210b57cec5SDimitry Andric InsertPts.erase(OldInstr); 2220b57cec5SDimitry Andric } 2230b57cec5SDimitry Andric }; 2240b57cec5SDimitry Andric 2250b57cec5SDimitry Andric } // end anonymous namespace 2260b57cec5SDimitry Andric 2270b57cec5SDimitry Andric char AArch64PromoteConstant::ID = 0; 2280b57cec5SDimitry Andric 2290b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(AArch64PromoteConstant, "aarch64-promote-const", 2300b57cec5SDimitry Andric "AArch64 Promote Constant Pass", false, false) 2310b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 2320b57cec5SDimitry Andric INITIALIZE_PASS_END(AArch64PromoteConstant, "aarch64-promote-const", 2330b57cec5SDimitry Andric "AArch64 Promote Constant Pass", false, false) 2340b57cec5SDimitry Andric 2350b57cec5SDimitry Andric ModulePass *llvm::createAArch64PromoteConstantPass() { 2360b57cec5SDimitry Andric return new AArch64PromoteConstant(); 2370b57cec5SDimitry Andric } 2380b57cec5SDimitry Andric 2390b57cec5SDimitry Andric /// Check if the given type uses a vector type. 2400b57cec5SDimitry Andric static bool isConstantUsingVectorTy(const Type *CstTy) { 2410b57cec5SDimitry Andric if (CstTy->isVectorTy()) 2420b57cec5SDimitry Andric return true; 2430b57cec5SDimitry Andric if (CstTy->isStructTy()) { 2440b57cec5SDimitry Andric for (unsigned EltIdx = 0, EndEltIdx = CstTy->getStructNumElements(); 2450b57cec5SDimitry Andric EltIdx < EndEltIdx; ++EltIdx) 2460b57cec5SDimitry Andric if (isConstantUsingVectorTy(CstTy->getStructElementType(EltIdx))) 2470b57cec5SDimitry Andric return true; 2480b57cec5SDimitry Andric } else if (CstTy->isArrayTy()) 2490b57cec5SDimitry Andric return isConstantUsingVectorTy(CstTy->getArrayElementType()); 2500b57cec5SDimitry Andric return false; 2510b57cec5SDimitry Andric } 2520b57cec5SDimitry Andric 2530b57cec5SDimitry Andric /// Check if the given use (Instruction + OpIdx) of Cst should be converted into 2540b57cec5SDimitry Andric /// a load of a global variable initialized with Cst. 2550b57cec5SDimitry Andric /// A use should be converted if it is legal to do so. 2560b57cec5SDimitry Andric /// For instance, it is not legal to turn the mask operand of a shuffle vector 2570b57cec5SDimitry Andric /// into a load of a global variable. 2580b57cec5SDimitry Andric static bool shouldConvertUse(const Constant *Cst, const Instruction *Instr, 2590b57cec5SDimitry Andric unsigned OpIdx) { 2600b57cec5SDimitry Andric // shufflevector instruction expects a const for the mask argument, i.e., the 2610b57cec5SDimitry Andric // third argument. Do not promote this use in that case. 2620b57cec5SDimitry Andric if (isa<const ShuffleVectorInst>(Instr) && OpIdx == 2) 2630b57cec5SDimitry Andric return false; 2640b57cec5SDimitry Andric 2650b57cec5SDimitry Andric // extractvalue instruction expects a const idx. 2660b57cec5SDimitry Andric if (isa<const ExtractValueInst>(Instr) && OpIdx > 0) 2670b57cec5SDimitry Andric return false; 2680b57cec5SDimitry Andric 2690b57cec5SDimitry Andric // extractvalue instruction expects a const idx. 2700b57cec5SDimitry Andric if (isa<const InsertValueInst>(Instr) && OpIdx > 1) 2710b57cec5SDimitry Andric return false; 2720b57cec5SDimitry Andric 2730b57cec5SDimitry Andric if (isa<const AllocaInst>(Instr) && OpIdx > 0) 2740b57cec5SDimitry Andric return false; 2750b57cec5SDimitry Andric 2760b57cec5SDimitry Andric // Alignment argument must be constant. 2770b57cec5SDimitry Andric if (isa<const LoadInst>(Instr) && OpIdx > 0) 2780b57cec5SDimitry Andric return false; 2790b57cec5SDimitry Andric 2800b57cec5SDimitry Andric // Alignment argument must be constant. 2810b57cec5SDimitry Andric if (isa<const StoreInst>(Instr) && OpIdx > 1) 2820b57cec5SDimitry Andric return false; 2830b57cec5SDimitry Andric 2840b57cec5SDimitry Andric // Index must be constant. 2850b57cec5SDimitry Andric if (isa<const GetElementPtrInst>(Instr) && OpIdx > 0) 2860b57cec5SDimitry Andric return false; 2870b57cec5SDimitry Andric 2880b57cec5SDimitry Andric // Personality function and filters must be constant. 2890b57cec5SDimitry Andric // Give up on that instruction. 2900b57cec5SDimitry Andric if (isa<const LandingPadInst>(Instr)) 2910b57cec5SDimitry Andric return false; 2920b57cec5SDimitry Andric 2930b57cec5SDimitry Andric // Switch instruction expects constants to compare to. 2940b57cec5SDimitry Andric if (isa<const SwitchInst>(Instr)) 2950b57cec5SDimitry Andric return false; 2960b57cec5SDimitry Andric 2970b57cec5SDimitry Andric // Expected address must be a constant. 2980b57cec5SDimitry Andric if (isa<const IndirectBrInst>(Instr)) 2990b57cec5SDimitry Andric return false; 3000b57cec5SDimitry Andric 3010b57cec5SDimitry Andric // Do not mess with intrinsics. 3020b57cec5SDimitry Andric if (isa<const IntrinsicInst>(Instr)) 3030b57cec5SDimitry Andric return false; 3040b57cec5SDimitry Andric 3050b57cec5SDimitry Andric // Do not mess with inline asm. 3060b57cec5SDimitry Andric const CallInst *CI = dyn_cast<const CallInst>(Instr); 3070b57cec5SDimitry Andric return !(CI && isa<const InlineAsm>(CI->getCalledValue())); 3080b57cec5SDimitry Andric } 3090b57cec5SDimitry Andric 3100b57cec5SDimitry Andric /// Check if the given Cst should be converted into 3110b57cec5SDimitry Andric /// a load of a global variable initialized with Cst. 3120b57cec5SDimitry Andric /// A constant should be converted if it is likely that the materialization of 3130b57cec5SDimitry Andric /// the constant will be tricky. Thus, we give up on zero or undef values. 3140b57cec5SDimitry Andric /// 3150b57cec5SDimitry Andric /// \todo Currently, accept only vector related types. 3160b57cec5SDimitry Andric /// Also we give up on all simple vector type to keep the existing 3170b57cec5SDimitry Andric /// behavior. Otherwise, we should push here all the check of the lowering of 3180b57cec5SDimitry Andric /// BUILD_VECTOR. By giving up, we lose the potential benefit of merging 3190b57cec5SDimitry Andric /// constant via global merge and the fact that the same constant is stored 3200b57cec5SDimitry Andric /// only once with this method (versus, as many function that uses the constant 3210b57cec5SDimitry Andric /// for the regular approach, even for float). 3220b57cec5SDimitry Andric /// Again, the simplest solution would be to promote every 3230b57cec5SDimitry Andric /// constant and rematerialize them when they are actually cheap to create. 3240b57cec5SDimitry Andric static bool shouldConvertImpl(const Constant *Cst) { 3250b57cec5SDimitry Andric if (isa<const UndefValue>(Cst)) 3260b57cec5SDimitry Andric return false; 3270b57cec5SDimitry Andric 3280b57cec5SDimitry Andric // FIXME: In some cases, it may be interesting to promote in memory 3290b57cec5SDimitry Andric // a zero initialized constant. 3300b57cec5SDimitry Andric // E.g., when the type of Cst require more instructions than the 3310b57cec5SDimitry Andric // adrp/add/load sequence or when this sequence can be shared by several 3320b57cec5SDimitry Andric // instances of Cst. 3330b57cec5SDimitry Andric // Ideally, we could promote this into a global and rematerialize the constant 3340b57cec5SDimitry Andric // when it was a bad idea. 3350b57cec5SDimitry Andric if (Cst->isZeroValue()) 3360b57cec5SDimitry Andric return false; 3370b57cec5SDimitry Andric 3380b57cec5SDimitry Andric if (Stress) 3390b57cec5SDimitry Andric return true; 3400b57cec5SDimitry Andric 3410b57cec5SDimitry Andric // FIXME: see function \todo 3420b57cec5SDimitry Andric if (Cst->getType()->isVectorTy()) 3430b57cec5SDimitry Andric return false; 3440b57cec5SDimitry Andric return isConstantUsingVectorTy(Cst->getType()); 3450b57cec5SDimitry Andric } 3460b57cec5SDimitry Andric 3470b57cec5SDimitry Andric static bool 3480b57cec5SDimitry Andric shouldConvert(Constant &C, 3490b57cec5SDimitry Andric AArch64PromoteConstant::PromotionCacheTy &PromotionCache) { 3500b57cec5SDimitry Andric auto Converted = PromotionCache.insert( 3510b57cec5SDimitry Andric std::make_pair(&C, AArch64PromoteConstant::PromotedConstant())); 3520b57cec5SDimitry Andric if (Converted.second) 3530b57cec5SDimitry Andric Converted.first->second.ShouldConvert = shouldConvertImpl(&C); 3540b57cec5SDimitry Andric return Converted.first->second.ShouldConvert; 3550b57cec5SDimitry Andric } 3560b57cec5SDimitry Andric 3570b57cec5SDimitry Andric Instruction *AArch64PromoteConstant::findInsertionPoint(Instruction &User, 3580b57cec5SDimitry Andric unsigned OpNo) { 3590b57cec5SDimitry Andric // If this user is a phi, the insertion point is in the related 3600b57cec5SDimitry Andric // incoming basic block. 3610b57cec5SDimitry Andric if (PHINode *PhiInst = dyn_cast<PHINode>(&User)) 3620b57cec5SDimitry Andric return PhiInst->getIncomingBlock(OpNo)->getTerminator(); 3630b57cec5SDimitry Andric 3640b57cec5SDimitry Andric return &User; 3650b57cec5SDimitry Andric } 3660b57cec5SDimitry Andric 3670b57cec5SDimitry Andric bool AArch64PromoteConstant::isDominated(Instruction *NewPt, Instruction *User, 3680b57cec5SDimitry Andric unsigned OpNo, 3690b57cec5SDimitry Andric InsertionPoints &InsertPts) { 3700b57cec5SDimitry Andric DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>( 3710b57cec5SDimitry Andric *NewPt->getParent()->getParent()).getDomTree(); 3720b57cec5SDimitry Andric 3730b57cec5SDimitry Andric // Traverse all the existing insertion points and check if one is dominating 3740b57cec5SDimitry Andric // NewPt. If it is, remember that. 3750b57cec5SDimitry Andric for (auto &IPI : InsertPts) { 3760b57cec5SDimitry Andric if (NewPt == IPI.first || DT.dominates(IPI.first, NewPt) || 3770b57cec5SDimitry Andric // When IPI.first is a terminator instruction, DT may think that 3780b57cec5SDimitry Andric // the result is defined on the edge. 3790b57cec5SDimitry Andric // Here we are testing the insertion point, not the definition. 3800b57cec5SDimitry Andric (IPI.first->getParent() != NewPt->getParent() && 3810b57cec5SDimitry Andric DT.dominates(IPI.first->getParent(), NewPt->getParent()))) { 3820b57cec5SDimitry Andric // No need to insert this point. Just record the dominated use. 3830b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Insertion point dominated by:\n"); 3840b57cec5SDimitry Andric LLVM_DEBUG(IPI.first->print(dbgs())); 3850b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << '\n'); 3860b57cec5SDimitry Andric IPI.second.emplace_back(User, OpNo); 3870b57cec5SDimitry Andric return true; 3880b57cec5SDimitry Andric } 3890b57cec5SDimitry Andric } 3900b57cec5SDimitry Andric return false; 3910b57cec5SDimitry Andric } 3920b57cec5SDimitry Andric 3930b57cec5SDimitry Andric bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt, Instruction *User, 3940b57cec5SDimitry Andric unsigned OpNo, 3950b57cec5SDimitry Andric InsertionPoints &InsertPts) { 3960b57cec5SDimitry Andric DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>( 3970b57cec5SDimitry Andric *NewPt->getParent()->getParent()).getDomTree(); 3980b57cec5SDimitry Andric BasicBlock *NewBB = NewPt->getParent(); 3990b57cec5SDimitry Andric 4000b57cec5SDimitry Andric // Traverse all the existing insertion point and check if one is dominated by 4010b57cec5SDimitry Andric // NewPt and thus useless or can be combined with NewPt into a common 4020b57cec5SDimitry Andric // dominator. 4030b57cec5SDimitry Andric for (InsertionPoints::iterator IPI = InsertPts.begin(), 4040b57cec5SDimitry Andric EndIPI = InsertPts.end(); 4050b57cec5SDimitry Andric IPI != EndIPI; ++IPI) { 4060b57cec5SDimitry Andric BasicBlock *CurBB = IPI->first->getParent(); 4070b57cec5SDimitry Andric if (NewBB == CurBB) { 4080b57cec5SDimitry Andric // Instructions are in the same block. 4090b57cec5SDimitry Andric // By construction, NewPt is dominating the other. 4100b57cec5SDimitry Andric // Indeed, isDominated returned false with the exact same arguments. 4110b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Merge insertion point with:\n"); 4120b57cec5SDimitry Andric LLVM_DEBUG(IPI->first->print(dbgs())); 4130b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\nat considered insertion point.\n"); 4140b57cec5SDimitry Andric appendAndTransferDominatedUses(NewPt, User, OpNo, IPI, InsertPts); 4150b57cec5SDimitry Andric return true; 4160b57cec5SDimitry Andric } 4170b57cec5SDimitry Andric 4180b57cec5SDimitry Andric // Look for a common dominator 4190b57cec5SDimitry Andric BasicBlock *CommonDominator = DT.findNearestCommonDominator(NewBB, CurBB); 4200b57cec5SDimitry Andric // If none exists, we cannot merge these two points. 4210b57cec5SDimitry Andric if (!CommonDominator) 4220b57cec5SDimitry Andric continue; 4230b57cec5SDimitry Andric 4240b57cec5SDimitry Andric if (CommonDominator != NewBB) { 4250b57cec5SDimitry Andric // By construction, the CommonDominator cannot be CurBB. 4260b57cec5SDimitry Andric assert(CommonDominator != CurBB && 4270b57cec5SDimitry Andric "Instruction has not been rejected during isDominated check!"); 4280b57cec5SDimitry Andric // Take the last instruction of the CommonDominator as insertion point 4290b57cec5SDimitry Andric NewPt = CommonDominator->getTerminator(); 4300b57cec5SDimitry Andric } 4310b57cec5SDimitry Andric // else, CommonDominator is the block of NewBB, hence NewBB is the last 4320b57cec5SDimitry Andric // possible insertion point in that block. 4330b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Merge insertion point with:\n"); 4340b57cec5SDimitry Andric LLVM_DEBUG(IPI->first->print(dbgs())); 4350b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << '\n'); 4360b57cec5SDimitry Andric LLVM_DEBUG(NewPt->print(dbgs())); 4370b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << '\n'); 4380b57cec5SDimitry Andric appendAndTransferDominatedUses(NewPt, User, OpNo, IPI, InsertPts); 4390b57cec5SDimitry Andric return true; 4400b57cec5SDimitry Andric } 4410b57cec5SDimitry Andric return false; 4420b57cec5SDimitry Andric } 4430b57cec5SDimitry Andric 4440b57cec5SDimitry Andric void AArch64PromoteConstant::computeInsertionPoint( 4450b57cec5SDimitry Andric Instruction *User, unsigned OpNo, InsertionPoints &InsertPts) { 4460b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Considered use, opidx " << OpNo << ":\n"); 4470b57cec5SDimitry Andric LLVM_DEBUG(User->print(dbgs())); 4480b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << '\n'); 4490b57cec5SDimitry Andric 4500b57cec5SDimitry Andric Instruction *InsertionPoint = findInsertionPoint(*User, OpNo); 4510b57cec5SDimitry Andric 4520b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Considered insertion point:\n"); 4530b57cec5SDimitry Andric LLVM_DEBUG(InsertionPoint->print(dbgs())); 4540b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << '\n'); 4550b57cec5SDimitry Andric 4560b57cec5SDimitry Andric if (isDominated(InsertionPoint, User, OpNo, InsertPts)) 4570b57cec5SDimitry Andric return; 4580b57cec5SDimitry Andric // This insertion point is useful, check if we can merge some insertion 4590b57cec5SDimitry Andric // point in a common dominator or if NewPt dominates an existing one. 4600b57cec5SDimitry Andric if (tryAndMerge(InsertionPoint, User, OpNo, InsertPts)) 4610b57cec5SDimitry Andric return; 4620b57cec5SDimitry Andric 4630b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Keep considered insertion point\n"); 4640b57cec5SDimitry Andric 4650b57cec5SDimitry Andric // It is definitely useful by its own 4660b57cec5SDimitry Andric InsertPts[InsertionPoint].emplace_back(User, OpNo); 4670b57cec5SDimitry Andric } 4680b57cec5SDimitry Andric 4690b57cec5SDimitry Andric static void ensurePromotedGV(Function &F, Constant &C, 4700b57cec5SDimitry Andric AArch64PromoteConstant::PromotedConstant &PC) { 4710b57cec5SDimitry Andric assert(PC.ShouldConvert && 4720b57cec5SDimitry Andric "Expected that we should convert this to a global"); 4730b57cec5SDimitry Andric if (PC.GV) 4740b57cec5SDimitry Andric return; 4750b57cec5SDimitry Andric PC.GV = new GlobalVariable( 4760b57cec5SDimitry Andric *F.getParent(), C.getType(), true, GlobalValue::InternalLinkage, nullptr, 4770b57cec5SDimitry Andric "_PromotedConst", nullptr, GlobalVariable::NotThreadLocal); 4780b57cec5SDimitry Andric PC.GV->setInitializer(&C); 4790b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Global replacement: "); 4800b57cec5SDimitry Andric LLVM_DEBUG(PC.GV->print(dbgs())); 4810b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << '\n'); 4820b57cec5SDimitry Andric ++NumPromoted; 4830b57cec5SDimitry Andric } 4840b57cec5SDimitry Andric 4850b57cec5SDimitry Andric void AArch64PromoteConstant::insertDefinitions(Function &F, 4860b57cec5SDimitry Andric GlobalVariable &PromotedGV, 4870b57cec5SDimitry Andric InsertionPoints &InsertPts) { 4880b57cec5SDimitry Andric #ifndef NDEBUG 4890b57cec5SDimitry Andric // Do more checking for debug purposes. 4900b57cec5SDimitry Andric DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); 4910b57cec5SDimitry Andric #endif 4920b57cec5SDimitry Andric assert(!InsertPts.empty() && "Empty uses does not need a definition"); 4930b57cec5SDimitry Andric 4940b57cec5SDimitry Andric for (const auto &IPI : InsertPts) { 4950b57cec5SDimitry Andric // Create the load of the global variable. 4960b57cec5SDimitry Andric IRBuilder<> Builder(IPI.first); 4970b57cec5SDimitry Andric LoadInst *LoadedCst = 4980b57cec5SDimitry Andric Builder.CreateLoad(PromotedGV.getValueType(), &PromotedGV); 4990b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "**********\n"); 5000b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "New def: "); 5010b57cec5SDimitry Andric LLVM_DEBUG(LoadedCst->print(dbgs())); 5020b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << '\n'); 5030b57cec5SDimitry Andric 5040b57cec5SDimitry Andric // Update the dominated uses. 5050b57cec5SDimitry Andric for (auto Use : IPI.second) { 5060b57cec5SDimitry Andric #ifndef NDEBUG 5070b57cec5SDimitry Andric assert(DT.dominates(LoadedCst, 5080b57cec5SDimitry Andric findInsertionPoint(*Use.first, Use.second)) && 5090b57cec5SDimitry Andric "Inserted definition does not dominate all its uses!"); 5100b57cec5SDimitry Andric #endif 5110b57cec5SDimitry Andric LLVM_DEBUG({ 5120b57cec5SDimitry Andric dbgs() << "Use to update " << Use.second << ":"; 5130b57cec5SDimitry Andric Use.first->print(dbgs()); 5140b57cec5SDimitry Andric dbgs() << '\n'; 5150b57cec5SDimitry Andric }); 5160b57cec5SDimitry Andric Use.first->setOperand(Use.second, LoadedCst); 5170b57cec5SDimitry Andric ++NumPromotedUses; 5180b57cec5SDimitry Andric } 5190b57cec5SDimitry Andric } 5200b57cec5SDimitry Andric } 5210b57cec5SDimitry Andric 5220b57cec5SDimitry Andric void AArch64PromoteConstant::promoteConstants( 5230b57cec5SDimitry Andric Function &F, SmallVectorImpl<UpdateRecord> &Updates, 5240b57cec5SDimitry Andric PromotionCacheTy &PromotionCache) { 5250b57cec5SDimitry Andric // Promote the constants. 5260b57cec5SDimitry Andric for (auto U = Updates.begin(), E = Updates.end(); U != E;) { 5270b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "** Compute insertion points **\n"); 5280b57cec5SDimitry Andric auto First = U; 5290b57cec5SDimitry Andric Constant *C = First->C; 5300b57cec5SDimitry Andric InsertionPoints InsertPts; 5310b57cec5SDimitry Andric do { 5320b57cec5SDimitry Andric computeInsertionPoint(U->User, U->Op, InsertPts); 5330b57cec5SDimitry Andric } while (++U != E && U->C == C); 5340b57cec5SDimitry Andric 5350b57cec5SDimitry Andric auto &Promotion = PromotionCache[C]; 5360b57cec5SDimitry Andric ensurePromotedGV(F, *C, Promotion); 5370b57cec5SDimitry Andric insertDefinitions(F, *Promotion.GV, InsertPts); 5380b57cec5SDimitry Andric } 5390b57cec5SDimitry Andric } 5400b57cec5SDimitry Andric 5410b57cec5SDimitry Andric bool AArch64PromoteConstant::runOnFunction(Function &F, 5420b57cec5SDimitry Andric PromotionCacheTy &PromotionCache) { 5430b57cec5SDimitry Andric // Look for instructions using constant vector. Promote that constant to a 5440b57cec5SDimitry Andric // global variable. Create as few loads of this variable as possible and 5450b57cec5SDimitry Andric // update the uses accordingly. 5460b57cec5SDimitry Andric SmallVector<UpdateRecord, 64> Updates; 5470b57cec5SDimitry Andric for (Instruction &I : instructions(&F)) { 5480b57cec5SDimitry Andric // Traverse the operand, looking for constant vectors. Replace them by a 5490b57cec5SDimitry Andric // load of a global variable of constant vector type. 5500b57cec5SDimitry Andric for (Use &U : I.operands()) { 5510b57cec5SDimitry Andric Constant *Cst = dyn_cast<Constant>(U); 5520b57cec5SDimitry Andric // There is no point in promoting global values as they are already 5530b57cec5SDimitry Andric // global. Do not promote constant expressions either, as they may 5540b57cec5SDimitry Andric // require some code expansion. 5550b57cec5SDimitry Andric if (!Cst || isa<GlobalValue>(Cst) || isa<ConstantExpr>(Cst)) 5560b57cec5SDimitry Andric continue; 5570b57cec5SDimitry Andric 5580b57cec5SDimitry Andric // Check if this constant is worth promoting. 5590b57cec5SDimitry Andric if (!shouldConvert(*Cst, PromotionCache)) 5600b57cec5SDimitry Andric continue; 5610b57cec5SDimitry Andric 5620b57cec5SDimitry Andric // Check if this use should be promoted. 5630b57cec5SDimitry Andric unsigned OpNo = &U - I.op_begin(); 5640b57cec5SDimitry Andric if (!shouldConvertUse(Cst, &I, OpNo)) 5650b57cec5SDimitry Andric continue; 5660b57cec5SDimitry Andric 5670b57cec5SDimitry Andric Updates.emplace_back(Cst, &I, OpNo); 5680b57cec5SDimitry Andric } 5690b57cec5SDimitry Andric } 5700b57cec5SDimitry Andric 5710b57cec5SDimitry Andric if (Updates.empty()) 5720b57cec5SDimitry Andric return false; 5730b57cec5SDimitry Andric 5740b57cec5SDimitry Andric promoteConstants(F, Updates, PromotionCache); 5750b57cec5SDimitry Andric return true; 5760b57cec5SDimitry Andric } 577