//===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/TargetTransformInfoImpl.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include using namespace llvm; using namespace PatternMatch; #define DEBUG_TYPE "tti" static cl::opt EnableReduxCost("costmodel-reduxcost", cl::init(false), cl::Hidden, cl::desc("Recognize reduction patterns.")); namespace { /// No-op implementation of the TTI interface using the utility base /// classes. /// /// This is used when no target specific information is available. struct NoTTIImpl : TargetTransformInfoImplCRTPBase { explicit NoTTIImpl(const DataLayout &DL) : TargetTransformInfoImplCRTPBase(DL) {} }; } // namespace bool HardwareLoopInfo::canAnalyze(LoopInfo &LI) { // If the loop has irreducible control flow, it can not be converted to // Hardware loop. LoopBlocksRPO RPOT(L); RPOT.perform(&LI); if (containsIrreducibleCFG(RPOT, LI)) return false; return true; } IntrinsicCostAttributes::IntrinsicCostAttributes(const IntrinsicInst &I) : II(&I), RetTy(I.getType()), IID(I.getIntrinsicID()) { FunctionType *FTy = I.getCalledFunction()->getFunctionType(); ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end()); Arguments.insert(Arguments.begin(), I.arg_begin(), I.arg_end()); if (auto *FPMO = dyn_cast(&I)) FMF = FPMO->getFastMathFlags(); } IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI) : II(dyn_cast(&CI)), RetTy(CI.getType()), IID(Id) { if (const auto *FPMO = dyn_cast(&CI)) FMF = FPMO->getFastMathFlags(); FunctionType *FTy = CI.getCalledFunction()->getFunctionType(); ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end()); } IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI, unsigned Factor) : RetTy(CI.getType()), IID(Id), VF(Factor) { if (auto *FPMO = dyn_cast(&CI)) FMF = FPMO->getFastMathFlags(); Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end()); FunctionType *FTy = CI.getCalledFunction()->getFunctionType(); ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end()); } IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI, unsigned Factor, unsigned ScalarCost) : RetTy(CI.getType()), IID(Id), VF(Factor), ScalarizationCost(ScalarCost) { if (const auto *FPMO = dyn_cast(&CI)) FMF = FPMO->getFastMathFlags(); Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end()); FunctionType *FTy = CI.getCalledFunction()->getFunctionType(); ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end()); } IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, ArrayRef Tys, FastMathFlags Flags) : RetTy(RTy), IID(Id), FMF(Flags) { ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end()); } IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, ArrayRef Tys, FastMathFlags Flags, unsigned ScalarCost) : RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) { ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end()); } IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, ArrayRef Tys, FastMathFlags Flags, unsigned ScalarCost, const IntrinsicInst *I) : II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) { ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end()); } IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, ArrayRef Tys) : RetTy(RTy), IID(Id) { ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end()); } IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *Ty, ArrayRef Args) : RetTy(Ty), IID(Id) { Arguments.insert(Arguments.begin(), Args.begin(), Args.end()); ParamTys.reserve(Arguments.size()); for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx) ParamTys.push_back(Arguments[Idx]->getType()); } bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop, bool ForceHardwareLoopPHI) { SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); for (BasicBlock *BB : ExitingBlocks) { // If we pass the updated counter back through a phi, we need to know // which latch the updated value will be coming from. if (!L->isLoopLatch(BB)) { if (ForceHardwareLoopPHI || CounterInReg) continue; } const SCEV *EC = SE.getExitCount(L, BB); if (isa(EC)) continue; if (const SCEVConstant *ConstEC = dyn_cast(EC)) { if (ConstEC->getValue()->isZero()) continue; } else if (!SE.isLoopInvariant(EC, L)) continue; if (SE.getTypeSizeInBits(EC->getType()) > CountType->getBitWidth()) continue; // If this exiting block is contained in a nested loop, it is not eligible // for insertion of the branch-and-decrement since the inner loop would // end up messing up the value in the CTR. if (!IsNestingLegal && LI.getLoopFor(BB) != L && !ForceNestedLoop) continue; // We now have a loop-invariant count of loop iterations (which is not the // constant zero) for which we know that this loop will not exit via this // existing block. // We need to make sure that this block will run on every loop iteration. // For this to be true, we must dominate all blocks with backedges. Such // blocks are in-loop predecessors to the header block. bool NotAlways = false; for (BasicBlock *Pred : predecessors(L->getHeader())) { if (!L->contains(Pred)) continue; if (!DT.dominates(BB, Pred)) { NotAlways = true; break; } } if (NotAlways) continue; // Make sure this blocks ends with a conditional branch. Instruction *TI = BB->getTerminator(); if (!TI) continue; if (BranchInst *BI = dyn_cast(TI)) { if (!BI->isConditional()) continue; ExitBranch = BI; } else continue; // Note that this block may not be the loop latch block, even if the loop // has a latch block. ExitBlock = BB; ExitCount = EC; break; } if (!ExitBlock) return false; return true; } TargetTransformInfo::TargetTransformInfo(const DataLayout &DL) : TTIImpl(new Model(NoTTIImpl(DL))) {} TargetTransformInfo::~TargetTransformInfo() {} TargetTransformInfo::TargetTransformInfo(TargetTransformInfo &&Arg) : TTIImpl(std::move(Arg.TTIImpl)) {} TargetTransformInfo &TargetTransformInfo::operator=(TargetTransformInfo &&RHS) { TTIImpl = std::move(RHS.TTIImpl); return *this; } unsigned TargetTransformInfo::getInliningThresholdMultiplier() const { return TTIImpl->getInliningThresholdMultiplier(); } int TargetTransformInfo::getInlinerVectorBonusPercent() const { return TTIImpl->getInlinerVectorBonusPercent(); } int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef Operands, TTI::TargetCostKind CostKind) const { return TTIImpl->getGEPCost(PointeeType, Ptr, Operands, CostKind); } unsigned TargetTransformInfo::getEstimatedNumberOfCaseClusters( const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const { return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI); } int TargetTransformInfo::getUserCost(const User *U, ArrayRef Operands, enum TargetCostKind CostKind) const { int Cost = TTIImpl->getUserCost(U, Operands, CostKind); assert((CostKind == TTI::TCK_RecipThroughput || Cost >= 0) && "TTI should not produce negative costs!"); return Cost; } bool TargetTransformInfo::hasBranchDivergence() const { return TTIImpl->hasBranchDivergence(); } bool TargetTransformInfo::useGPUDivergenceAnalysis() const { return TTIImpl->useGPUDivergenceAnalysis(); } bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const { return TTIImpl->isSourceOfDivergence(V); } bool llvm::TargetTransformInfo::isAlwaysUniform(const Value *V) const { return TTIImpl->isAlwaysUniform(V); } unsigned TargetTransformInfo::getFlatAddressSpace() const { return TTIImpl->getFlatAddressSpace(); } bool TargetTransformInfo::collectFlatAddressOperands( SmallVectorImpl &OpIndexes, Intrinsic::ID IID) const { return TTIImpl->collectFlatAddressOperands(OpIndexes, IID); } bool TargetTransformInfo::isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const { return TTIImpl->isNoopAddrSpaceCast(FromAS, ToAS); } Value *TargetTransformInfo::rewriteIntrinsicWithAddressSpace( IntrinsicInst *II, Value *OldV, Value *NewV) const { return TTIImpl->rewriteIntrinsicWithAddressSpace(II, OldV, NewV); } bool TargetTransformInfo::isLoweredToCall(const Function *F) const { return TTIImpl->isLoweredToCall(F); } bool TargetTransformInfo::isHardwareLoopProfitable( Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const { return TTIImpl->isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo); } bool TargetTransformInfo::preferPredicateOverEpilogue( Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, const LoopAccessInfo *LAI) const { return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI); } bool TargetTransformInfo::emitGetActiveLaneMask() const { return TTIImpl->emitGetActiveLaneMask(); } void TargetTransformInfo::getUnrollingPreferences( Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const { return TTIImpl->getUnrollingPreferences(L, SE, UP); } void TargetTransformInfo::getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) const { return TTIImpl->getPeelingPreferences(L, SE, PP); } bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const { return TTIImpl->isLegalAddImmediate(Imm); } bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const { return TTIImpl->isLegalICmpImmediate(Imm); } bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I) const { return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, AddrSpace, I); } bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const { return TTIImpl->isLSRCostLess(C1, C2); } bool TargetTransformInfo::isProfitableLSRChainElement(Instruction *I) const { return TTIImpl->isProfitableLSRChainElement(I); } bool TargetTransformInfo::canMacroFuseCmp() const { return TTIImpl->canMacroFuseCmp(); } bool TargetTransformInfo::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const { return TTIImpl->canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo); } bool TargetTransformInfo::shouldFavorPostInc() const { return TTIImpl->shouldFavorPostInc(); } bool TargetTransformInfo::shouldFavorBackedgeIndex(const Loop *L) const { return TTIImpl->shouldFavorBackedgeIndex(L); } bool TargetTransformInfo::isLegalMaskedStore(Type *DataType, Align Alignment) const { return TTIImpl->isLegalMaskedStore(DataType, Alignment); } bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType, Align Alignment) const { return TTIImpl->isLegalMaskedLoad(DataType, Alignment); } bool TargetTransformInfo::isLegalNTStore(Type *DataType, Align Alignment) const { return TTIImpl->isLegalNTStore(DataType, Alignment); } bool TargetTransformInfo::isLegalNTLoad(Type *DataType, Align Alignment) const { return TTIImpl->isLegalNTLoad(DataType, Alignment); } bool TargetTransformInfo::isLegalMaskedGather(Type *DataType, Align Alignment) const { return TTIImpl->isLegalMaskedGather(DataType, Alignment); } bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType, Align Alignment) const { return TTIImpl->isLegalMaskedScatter(DataType, Alignment); } bool TargetTransformInfo::isLegalMaskedCompressStore(Type *DataType) const { return TTIImpl->isLegalMaskedCompressStore(DataType); } bool TargetTransformInfo::isLegalMaskedExpandLoad(Type *DataType) const { return TTIImpl->isLegalMaskedExpandLoad(DataType); } bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const { return TTIImpl->hasDivRemOp(DataType, IsSigned); } bool TargetTransformInfo::hasVolatileVariant(Instruction *I, unsigned AddrSpace) const { return TTIImpl->hasVolatileVariant(I, AddrSpace); } bool TargetTransformInfo::prefersVectorizedAddressing() const { return TTIImpl->prefersVectorizedAddressing(); } int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const { int Cost = TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, AddrSpace); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } bool TargetTransformInfo::LSRWithInstrQueries() const { return TTIImpl->LSRWithInstrQueries(); } bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const { return TTIImpl->isTruncateFree(Ty1, Ty2); } bool TargetTransformInfo::isProfitableToHoist(Instruction *I) const { return TTIImpl->isProfitableToHoist(I); } bool TargetTransformInfo::useAA() const { return TTIImpl->useAA(); } bool TargetTransformInfo::isTypeLegal(Type *Ty) const { return TTIImpl->isTypeLegal(Ty); } bool TargetTransformInfo::shouldBuildLookupTables() const { return TTIImpl->shouldBuildLookupTables(); } bool TargetTransformInfo::shouldBuildLookupTablesForConstant( Constant *C) const { return TTIImpl->shouldBuildLookupTablesForConstant(C); } bool TargetTransformInfo::useColdCCForColdCall(Function &F) const { return TTIImpl->useColdCCForColdCall(F); } unsigned TargetTransformInfo::getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract) const { return TTIImpl->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract); } unsigned TargetTransformInfo::getOperandsScalarizationOverhead( ArrayRef Args, unsigned VF) const { return TTIImpl->getOperandsScalarizationOverhead(Args, VF); } bool TargetTransformInfo::supportsEfficientVectorElementLoadStore() const { return TTIImpl->supportsEfficientVectorElementLoadStore(); } bool TargetTransformInfo::enableAggressiveInterleaving( bool LoopHasReductions) const { return TTIImpl->enableAggressiveInterleaving(LoopHasReductions); } TargetTransformInfo::MemCmpExpansionOptions TargetTransformInfo::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { return TTIImpl->enableMemCmpExpansion(OptSize, IsZeroCmp); } bool TargetTransformInfo::enableInterleavedAccessVectorization() const { return TTIImpl->enableInterleavedAccessVectorization(); } bool TargetTransformInfo::enableMaskedInterleavedAccessVectorization() const { return TTIImpl->enableMaskedInterleavedAccessVectorization(); } bool TargetTransformInfo::isFPVectorizationPotentiallyUnsafe() const { return TTIImpl->isFPVectorizationPotentiallyUnsafe(); } bool TargetTransformInfo::allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, unsigned Alignment, bool *Fast) const { return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace, Alignment, Fast); } TargetTransformInfo::PopcntSupportKind TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const { return TTIImpl->getPopcntSupport(IntTyWidthInBit); } bool TargetTransformInfo::haveFastSqrt(Type *Ty) const { return TTIImpl->haveFastSqrt(Ty); } bool TargetTransformInfo::isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return TTIImpl->isFCmpOrdCheaperThanFCmpZero(Ty); } int TargetTransformInfo::getFPOpCost(Type *Ty) const { int Cost = TTIImpl->getFPOpCost(Ty); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) const { int Cost = TTIImpl->getIntImmCodeSizeCost(Opcode, Idx, Imm, Ty); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const { int Cost = TTIImpl->getIntImmCost(Imm, Ty, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const { int Cost = TTIImpl->getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const { int Cost = TTIImpl->getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } unsigned TargetTransformInfo::getNumberOfRegisters(unsigned ClassID) const { return TTIImpl->getNumberOfRegisters(ClassID); } unsigned TargetTransformInfo::getRegisterClassForType(bool Vector, Type *Ty) const { return TTIImpl->getRegisterClassForType(Vector, Ty); } const char *TargetTransformInfo::getRegisterClassName(unsigned ClassID) const { return TTIImpl->getRegisterClassName(ClassID); } unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const { return TTIImpl->getRegisterBitWidth(Vector); } unsigned TargetTransformInfo::getMinVectorRegisterBitWidth() const { return TTIImpl->getMinVectorRegisterBitWidth(); } bool TargetTransformInfo::shouldMaximizeVectorBandwidth(bool OptSize) const { return TTIImpl->shouldMaximizeVectorBandwidth(OptSize); } unsigned TargetTransformInfo::getMinimumVF(unsigned ElemWidth) const { return TTIImpl->getMinimumVF(ElemWidth); } bool TargetTransformInfo::shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const { return TTIImpl->shouldConsiderAddressTypePromotion( I, AllowPromotionWithoutCommonHeader); } unsigned TargetTransformInfo::getCacheLineSize() const { return TTIImpl->getCacheLineSize(); } llvm::Optional TargetTransformInfo::getCacheSize(CacheLevel Level) const { return TTIImpl->getCacheSize(Level); } llvm::Optional TargetTransformInfo::getCacheAssociativity(CacheLevel Level) const { return TTIImpl->getCacheAssociativity(Level); } unsigned TargetTransformInfo::getPrefetchDistance() const { return TTIImpl->getPrefetchDistance(); } unsigned TargetTransformInfo::getMinPrefetchStride( unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const { return TTIImpl->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses, NumPrefetches, HasCall); } unsigned TargetTransformInfo::getMaxPrefetchIterationsAhead() const { return TTIImpl->getMaxPrefetchIterationsAhead(); } bool TargetTransformInfo::enableWritePrefetching() const { return TTIImpl->enableWritePrefetching(); } unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const { return TTIImpl->getMaxInterleaveFactor(VF); } TargetTransformInfo::OperandValueKind TargetTransformInfo::getOperandInfo(const Value *V, OperandValueProperties &OpProps) { OperandValueKind OpInfo = OK_AnyValue; OpProps = OP_None; if (const auto *CI = dyn_cast(V)) { if (CI->getValue().isPowerOf2()) OpProps = OP_PowerOf2; return OK_UniformConstantValue; } // A broadcast shuffle creates a uniform value. // TODO: Add support for non-zero index broadcasts. // TODO: Add support for different source vector width. if (const auto *ShuffleInst = dyn_cast(V)) if (ShuffleInst->isZeroEltSplat()) OpInfo = OK_UniformValue; const Value *Splat = getSplatValue(V); // Check for a splat of a constant or for a non uniform vector of constants // and check if the constant(s) are all powers of two. if (isa(V) || isa(V)) { OpInfo = OK_NonUniformConstantValue; if (Splat) { OpInfo = OK_UniformConstantValue; if (auto *CI = dyn_cast(Splat)) if (CI->getValue().isPowerOf2()) OpProps = OP_PowerOf2; } else if (const auto *CDS = dyn_cast(V)) { OpProps = OP_PowerOf2; for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) { if (auto *CI = dyn_cast(CDS->getElementAsConstant(I))) if (CI->getValue().isPowerOf2()) continue; OpProps = OP_None; break; } } } // Check for a splat of a uniform value. This is not loop aware, so return // true only for the obviously uniform cases (argument, globalvalue) if (Splat && (isa(Splat) || isa(Splat))) OpInfo = OK_UniformValue; return OpInfo; } int TargetTransformInfo::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, OperandValueKind Opd1Info, OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo, ArrayRef Args, const Instruction *CxtI) const { int Cost = TTIImpl->getArithmeticInstrCost( Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo, Args, CxtI); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, VectorType *Ty, int Index, VectorType *SubTp) const { int Cost = TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::TargetCostKind CostKind, const Instruction *I) const { assert((I == nullptr || I->getOpcode() == Opcode) && "Opcode should reflect passed instruction."); int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, CostKind, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) const { int Cost = TTIImpl->getExtractWithExtendCost(Opcode, Dst, VecTy, Index); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) const { int Cost = TTIImpl->getCFInstrCost(Opcode, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, TTI::TargetCostKind CostKind, const Instruction *I) const { assert((I == nullptr || I->getOpcode() == Opcode) && "Opcode should reflect passed instruction."); int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const { int Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I) const { assert((I == nullptr || I->getOpcode() == Opcode) && "Opcode should reflect passed instruction."); int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getMaskedMemoryOpCost( unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const { int Cost = TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getGatherScatterOpCost( unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const { int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, Alignment, CostKind, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) const { int Cost = TTIImpl->getInterleavedMemoryOpCost( Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const { int Cost = TTIImpl->getIntrinsicInstrCost(ICA, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy, ArrayRef Tys, TTI::TargetCostKind CostKind) const { int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const { return TTIImpl->getNumberOfParts(Tp); } int TargetTransformInfo::getAddressComputationCost(Type *Tp, ScalarEvolution *SE, const SCEV *Ptr) const { int Cost = TTIImpl->getAddressComputationCost(Tp, SE, Ptr); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getMemcpyCost(const Instruction *I) const { int Cost = TTIImpl->getMemcpyCost(I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, bool IsPairwiseForm, TTI::TargetCostKind CostKind) const { int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getMinMaxReductionCost( VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned, TTI::TargetCostKind CostKind) const { int Cost = TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } unsigned TargetTransformInfo::getCostOfKeepingLiveOverCall(ArrayRef Tys) const { return TTIImpl->getCostOfKeepingLiveOverCall(Tys); } bool TargetTransformInfo::getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const { return TTIImpl->getTgtMemIntrinsic(Inst, Info); } unsigned TargetTransformInfo::getAtomicMemIntrinsicMaxElementSize() const { return TTIImpl->getAtomicMemIntrinsicMaxElementSize(); } Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic( IntrinsicInst *Inst, Type *ExpectedType) const { return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType); } Type *TargetTransformInfo::getMemcpyLoopLoweringType( LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign) const { return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace, DestAddrSpace, SrcAlign, DestAlign); } void TargetTransformInfo::getMemcpyLoopResidualLoweringType( SmallVectorImpl &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign) const { TTIImpl->getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes, SrcAddrSpace, DestAddrSpace, SrcAlign, DestAlign); } bool TargetTransformInfo::areInlineCompatible(const Function *Caller, const Function *Callee) const { return TTIImpl->areInlineCompatible(Caller, Callee); } bool TargetTransformInfo::areFunctionArgsABICompatible( const Function *Caller, const Function *Callee, SmallPtrSetImpl &Args) const { return TTIImpl->areFunctionArgsABICompatible(Caller, Callee, Args); } bool TargetTransformInfo::isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const { return TTIImpl->isIndexedLoadLegal(Mode, Ty); } bool TargetTransformInfo::isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const { return TTIImpl->isIndexedStoreLegal(Mode, Ty); } unsigned TargetTransformInfo::getLoadStoreVecRegBitWidth(unsigned AS) const { return TTIImpl->getLoadStoreVecRegBitWidth(AS); } bool TargetTransformInfo::isLegalToVectorizeLoad(LoadInst *LI) const { return TTIImpl->isLegalToVectorizeLoad(LI); } bool TargetTransformInfo::isLegalToVectorizeStore(StoreInst *SI) const { return TTIImpl->isLegalToVectorizeStore(SI); } bool TargetTransformInfo::isLegalToVectorizeLoadChain( unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const { return TTIImpl->isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, AddrSpace); } bool TargetTransformInfo::isLegalToVectorizeStoreChain( unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const { return TTIImpl->isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment, AddrSpace); } unsigned TargetTransformInfo::getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const { return TTIImpl->getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy); } unsigned TargetTransformInfo::getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const { return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy); } bool TargetTransformInfo::useReductionIntrinsic(unsigned Opcode, Type *Ty, ReductionFlags Flags) const { return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags); } bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const { return TTIImpl->shouldExpandReduction(II); } unsigned TargetTransformInfo::getGISelRematGlobalCost() const { return TTIImpl->getGISelRematGlobalCost(); } int TargetTransformInfo::getInstructionLatency(const Instruction *I) const { return TTIImpl->getInstructionLatency(I); } static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, unsigned Level) { // We don't need a shuffle if we just want to have element 0 in position 0 of // the vector. if (!SI && Level == 0 && IsLeft) return true; else if (!SI) return false; SmallVector Mask(SI->getType()->getNumElements(), -1); // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether // we look at the left or right side. for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2) Mask[i] = val; ArrayRef ActualMask = SI->getShuffleMask(); return Mask == ActualMask; } static Optional getReductionData(Instruction *I) { Value *L, *R; if (m_BinOp(m_Value(L), m_Value(R)).match(I)) return TTI::ReductionData(TTI::RK_Arithmetic, I->getOpcode(), L, R); if (auto *SI = dyn_cast(I)) { if (m_SMin(m_Value(L), m_Value(R)).match(SI) || m_SMax(m_Value(L), m_Value(R)).match(SI) || m_OrdFMin(m_Value(L), m_Value(R)).match(SI) || m_OrdFMax(m_Value(L), m_Value(R)).match(SI) || m_UnordFMin(m_Value(L), m_Value(R)).match(SI) || m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) { auto *CI = cast(SI->getCondition()); return TTI::ReductionData(TTI::RK_MinMax, CI->getOpcode(), L, R); } if (m_UMin(m_Value(L), m_Value(R)).match(SI) || m_UMax(m_Value(L), m_Value(R)).match(SI)) { auto *CI = cast(SI->getCondition()); return TTI::ReductionData(TTI::RK_UnsignedMinMax, CI->getOpcode(), L, R); } } return llvm::None; } static TTI::ReductionKind matchPairwiseReductionAtLevel(Instruction *I, unsigned Level, unsigned NumLevels) { // Match one level of pairwise operations. // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, // <4 x i32> // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, // <4 x i32> // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 if (!I) return TTI::RK_None; assert(I->getType()->isVectorTy() && "Expecting a vector type"); Optional RD = getReductionData(I); if (!RD) return TTI::RK_None; ShuffleVectorInst *LS = dyn_cast(RD->LHS); if (!LS && Level) return TTI::RK_None; ShuffleVectorInst *RS = dyn_cast(RD->RHS); if (!RS && Level) return TTI::RK_None; // On level 0 we can omit one shufflevector instruction. if (!Level && !RS && !LS) return TTI::RK_None; // Shuffle inputs must match. Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr; Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr; Value *NextLevelOp = nullptr; if (NextLevelOpR && NextLevelOpL) { // If we have two shuffles their operands must match. if (NextLevelOpL != NextLevelOpR) return TTI::RK_None; NextLevelOp = NextLevelOpL; } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) { // On the first level we can omit the shufflevector <0, undef,...>. So the // input to the other shufflevector <1, undef> must match with one of the // inputs to the current binary operation. // Example: // %NextLevelOpL = shufflevector %R, <1, undef ...> // %BinOp = fadd %NextLevelOpL, %R if (NextLevelOpL && NextLevelOpL != RD->RHS) return TTI::RK_None; else if (NextLevelOpR && NextLevelOpR != RD->LHS) return TTI::RK_None; NextLevelOp = NextLevelOpL ? RD->RHS : RD->LHS; } else return TTI::RK_None; // Check that the next levels binary operation exists and matches with the // current one. if (Level + 1 != NumLevels) { if (!isa(NextLevelOp)) return TTI::RK_None; Optional NextLevelRD = getReductionData(cast(NextLevelOp)); if (!NextLevelRD || !RD->hasSameData(*NextLevelRD)) return TTI::RK_None; } // Shuffle mask for pairwise operation must match. if (matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)) { if (!matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level)) return TTI::RK_None; } else if (matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)) { if (!matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level)) return TTI::RK_None; } else { return TTI::RK_None; } if (++Level == NumLevels) return RD->Kind; // Match next level. return matchPairwiseReductionAtLevel(dyn_cast(NextLevelOp), Level, NumLevels); } TTI::ReductionKind TTI::matchPairwiseReduction( const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty) { if (!EnableReduxCost) return TTI::RK_None; // Need to extract the first element. ConstantInt *CI = dyn_cast(ReduxRoot->getOperand(1)); unsigned Idx = ~0u; if (CI) Idx = CI->getZExtValue(); if (Idx != 0) return TTI::RK_None; auto *RdxStart = dyn_cast(ReduxRoot->getOperand(0)); if (!RdxStart) return TTI::RK_None; Optional RD = getReductionData(RdxStart); if (!RD) return TTI::RK_None; auto *VecTy = cast(RdxStart->getType()); unsigned NumVecElems = VecTy->getNumElements(); if (!isPowerOf2_32(NumVecElems)) return TTI::RK_None; // We look for a sequence of shuffle,shuffle,add triples like the following // that builds a pairwise reduction tree. // // (X0, X1, X2, X3) // (X0 + X1, X2 + X3, undef, undef) // ((X0 + X1) + (X2 + X3), undef, undef, undef) // // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, // <4 x i32> // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, // <4 x i32> // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, // <4 x i32> // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, // <4 x i32> // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 // %r = extractelement <4 x float> %bin.rdx8, i32 0 if (matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) == TTI::RK_None) return TTI::RK_None; Opcode = RD->Opcode; Ty = VecTy; return RD->Kind; } static std::pair getShuffleAndOtherOprd(Value *L, Value *R) { ShuffleVectorInst *S = nullptr; if ((S = dyn_cast(L))) return std::make_pair(R, S); S = dyn_cast(R); return std::make_pair(L, S); } TTI::ReductionKind TTI::matchVectorSplittingReduction( const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty) { if (!EnableReduxCost) return TTI::RK_None; // Need to extract the first element. ConstantInt *CI = dyn_cast(ReduxRoot->getOperand(1)); unsigned Idx = ~0u; if (CI) Idx = CI->getZExtValue(); if (Idx != 0) return TTI::RK_None; auto *RdxStart = dyn_cast(ReduxRoot->getOperand(0)); if (!RdxStart) return TTI::RK_None; Optional RD = getReductionData(RdxStart); if (!RD) return TTI::RK_None; auto *VecTy = cast(ReduxRoot->getOperand(0)->getType()); unsigned NumVecElems = VecTy->getNumElements(); if (!isPowerOf2_32(NumVecElems)) return TTI::RK_None; // We look for a sequence of shuffles and adds like the following matching one // fadd, shuffle vector pair at a time. // // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, // <4 x i32> // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, // <4 x i32> // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 // %r = extractelement <4 x float> %bin.rdx8, i32 0 unsigned MaskStart = 1; Instruction *RdxOp = RdxStart; SmallVector ShuffleMask(NumVecElems, 0); unsigned NumVecElemsRemain = NumVecElems; while (NumVecElemsRemain - 1) { // Check for the right reduction operation. if (!RdxOp) return TTI::RK_None; Optional RDLevel = getReductionData(RdxOp); if (!RDLevel || !RDLevel->hasSameData(*RD)) return TTI::RK_None; Value *NextRdxOp; ShuffleVectorInst *Shuffle; std::tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(RDLevel->LHS, RDLevel->RHS); // Check the current reduction operation and the shuffle use the same value. if (Shuffle == nullptr) return TTI::RK_None; if (Shuffle->getOperand(0) != NextRdxOp) return TTI::RK_None; // Check that shuffle masks matches. for (unsigned j = 0; j != MaskStart; ++j) ShuffleMask[j] = MaskStart + j; // Fill the rest of the mask with -1 for undef. std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1); ArrayRef Mask = Shuffle->getShuffleMask(); if (ShuffleMask != Mask) return TTI::RK_None; RdxOp = dyn_cast(NextRdxOp); NumVecElemsRemain /= 2; MaskStart *= 2; } Opcode = RD->Opcode; Ty = VecTy; return RD->Kind; } int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; switch (I->getOpcode()) { case Instruction::GetElementPtr: case Instruction::Ret: case Instruction::PHI: case Instruction::Br: case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: case Instruction::FSub: case Instruction::Mul: case Instruction::FMul: case Instruction::UDiv: case Instruction::SDiv: case Instruction::FDiv: case Instruction::URem: case Instruction::SRem: case Instruction::FRem: case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: case Instruction::And: case Instruction::Or: case Instruction::Xor: case Instruction::FNeg: case Instruction::Select: case Instruction::ICmp: case Instruction::FCmp: case Instruction::Store: case Instruction::Load: case Instruction::ZExt: case Instruction::SExt: case Instruction::FPToUI: case Instruction::FPToSI: case Instruction::FPExt: case Instruction::PtrToInt: case Instruction::IntToPtr: case Instruction::SIToFP: case Instruction::UIToFP: case Instruction::Trunc: case Instruction::FPTrunc: case Instruction::BitCast: case Instruction::AddrSpaceCast: case Instruction::ExtractElement: case Instruction::InsertElement: case Instruction::ExtractValue: case Instruction::ShuffleVector: case Instruction::Call: return getUserCost(I, CostKind); default: // We don't have any information on this instruction. return -1; } } TargetTransformInfo::Concept::~Concept() {} TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {} TargetIRAnalysis::TargetIRAnalysis( std::function TTICallback) : TTICallback(std::move(TTICallback)) {} TargetIRAnalysis::Result TargetIRAnalysis::run(const Function &F, FunctionAnalysisManager &) { return TTICallback(F); } AnalysisKey TargetIRAnalysis::Key; TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) { return Result(F.getParent()->getDataLayout()); } // Register the basic pass. INITIALIZE_PASS(TargetTransformInfoWrapperPass, "tti", "Target Transform Information", false, true) char TargetTransformInfoWrapperPass::ID = 0; void TargetTransformInfoWrapperPass::anchor() {} TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass() : ImmutablePass(ID) { initializeTargetTransformInfoWrapperPassPass( *PassRegistry::getPassRegistry()); } TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass( TargetIRAnalysis TIRA) : ImmutablePass(ID), TIRA(std::move(TIRA)) { initializeTargetTransformInfoWrapperPassPass( *PassRegistry::getPassRegistry()); } TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(const Function &F) { FunctionAnalysisManager DummyFAM; TTI = TIRA.run(F, DummyFAM); return *TTI; } ImmutablePass * llvm::createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA) { return new TargetTransformInfoWrapperPass(std::move(TIRA)); }