1 //===- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass ---------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 /// \file 8 /// This file implements a TargetTransformInfo analysis pass specific to the 9 /// Hexagon target machine. It uses the target's detailed information to provide 10 /// more precise answers to certain TTI queries, while letting the target 11 /// independent and default TTI implementations handle the rest. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "HexagonTargetTransformInfo.h" 16 #include "HexagonSubtarget.h" 17 #include "llvm/Analysis/TargetTransformInfo.h" 18 #include "llvm/CodeGen/ValueTypes.h" 19 #include "llvm/IR/InstrTypes.h" 20 #include "llvm/IR/Instructions.h" 21 #include "llvm/IR/User.h" 22 #include "llvm/Support/Casting.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Transforms/Utils/LoopPeel.h" 25 #include "llvm/Transforms/Utils/UnrollLoop.h" 26 27 using namespace llvm; 28 29 #define DEBUG_TYPE "hexagontti" 30 31 static cl::opt<bool> HexagonAutoHVX("hexagon-autohvx", cl::init(false), 32 cl::Hidden, cl::desc("Enable loop vectorizer for HVX")); 33 34 static cl::opt<bool> EnableV68FloatAutoHVX( 35 "force-hvx-float", cl::Hidden, 36 cl::desc("Enable auto-vectorization of floatint point types on v68.")); 37 38 static cl::opt<bool> EmitLookupTables("hexagon-emit-lookup-tables", 39 cl::init(true), cl::Hidden, 40 cl::desc("Control lookup table emission on Hexagon target")); 41 42 static cl::opt<bool> HexagonMaskedVMem("hexagon-masked-vmem", cl::init(true), 43 cl::Hidden, cl::desc("Enable masked loads/stores for HVX")); 44 45 // Constant "cost factor" to make floating point operations more expensive 46 // in terms of vectorization cost. This isn't the best way, but it should 47 // do. Ultimately, the cost should use cycles. 48 static const unsigned FloatFactor = 4; 49 50 bool HexagonTTIImpl::useHVX() const { 51 return ST.useHVXOps() && HexagonAutoHVX; 52 } 53 54 bool HexagonTTIImpl::isHVXVectorType(Type *Ty) const { 55 auto *VecTy = dyn_cast<VectorType>(Ty); 56 if (!VecTy) 57 return false; 58 if (!ST.isTypeForHVX(VecTy)) 59 return false; 60 if (ST.useHVXV69Ops() || !VecTy->getElementType()->isFloatingPointTy()) 61 return true; 62 return ST.useHVXV68Ops() && EnableV68FloatAutoHVX; 63 } 64 65 unsigned HexagonTTIImpl::getTypeNumElements(Type *Ty) const { 66 if (auto *VTy = dyn_cast<FixedVectorType>(Ty)) 67 return VTy->getNumElements(); 68 assert((Ty->isIntegerTy() || Ty->isFloatingPointTy()) && 69 "Expecting scalar type"); 70 return 1; 71 } 72 73 TargetTransformInfo::PopcntSupportKind 74 HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const { 75 // Return fast hardware support as every input < 64 bits will be promoted 76 // to 64 bits. 77 return TargetTransformInfo::PSK_FastHardware; 78 } 79 80 // The Hexagon target can unroll loops with run-time trip counts. 81 void HexagonTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 82 TTI::UnrollingPreferences &UP, 83 OptimizationRemarkEmitter *ORE) { 84 UP.Runtime = UP.Partial = true; 85 } 86 87 void HexagonTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, 88 TTI::PeelingPreferences &PP) { 89 BaseT::getPeelingPreferences(L, SE, PP); 90 // Only try to peel innermost loops with small runtime trip counts. 91 if (L && L->isInnermost() && canPeel(L) && 92 SE.getSmallConstantTripCount(L) == 0 && 93 SE.getSmallConstantMaxTripCount(L) > 0 && 94 SE.getSmallConstantMaxTripCount(L) <= 5) { 95 PP.PeelCount = 2; 96 } 97 } 98 99 TTI::AddressingModeKind 100 HexagonTTIImpl::getPreferredAddressingMode(const Loop *L, 101 ScalarEvolution *SE) const { 102 return TTI::AMK_PostIndexed; 103 } 104 105 /// --- Vector TTI begin --- 106 107 unsigned HexagonTTIImpl::getNumberOfRegisters(bool Vector) const { 108 if (Vector) 109 return useHVX() ? 32 : 0; 110 return 32; 111 } 112 113 unsigned HexagonTTIImpl::getMaxInterleaveFactor(ElementCount VF) { 114 return useHVX() ? 2 : 1; 115 } 116 117 TypeSize 118 HexagonTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { 119 switch (K) { 120 case TargetTransformInfo::RGK_Scalar: 121 return TypeSize::getFixed(32); 122 case TargetTransformInfo::RGK_FixedWidthVector: 123 return TypeSize::getFixed(getMinVectorRegisterBitWidth()); 124 case TargetTransformInfo::RGK_ScalableVector: 125 return TypeSize::getScalable(0); 126 } 127 128 llvm_unreachable("Unsupported register kind"); 129 } 130 131 unsigned HexagonTTIImpl::getMinVectorRegisterBitWidth() const { 132 return useHVX() ? ST.getVectorLength()*8 : 32; 133 } 134 135 ElementCount HexagonTTIImpl::getMinimumVF(unsigned ElemWidth, 136 bool IsScalable) const { 137 assert(!IsScalable && "Scalable VFs are not supported for Hexagon"); 138 return ElementCount::getFixed((8 * ST.getVectorLength()) / ElemWidth); 139 } 140 141 InstructionCost HexagonTTIImpl::getCallInstrCost(Function *F, Type *RetTy, 142 ArrayRef<Type *> Tys, 143 TTI::TargetCostKind CostKind) { 144 return BaseT::getCallInstrCost(F, RetTy, Tys, CostKind); 145 } 146 147 InstructionCost 148 HexagonTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 149 TTI::TargetCostKind CostKind) { 150 if (ICA.getID() == Intrinsic::bswap) { 151 std::pair<InstructionCost, MVT> LT = 152 getTypeLegalizationCost(ICA.getReturnType()); 153 return LT.first + 2; 154 } 155 return BaseT::getIntrinsicInstrCost(ICA, CostKind); 156 } 157 158 InstructionCost HexagonTTIImpl::getAddressComputationCost(Type *Tp, 159 ScalarEvolution *SE, 160 const SCEV *S) { 161 return 0; 162 } 163 164 InstructionCost HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, 165 MaybeAlign Alignment, 166 unsigned AddressSpace, 167 TTI::TargetCostKind CostKind, 168 TTI::OperandValueInfo OpInfo, 169 const Instruction *I) { 170 assert(Opcode == Instruction::Load || Opcode == Instruction::Store); 171 // TODO: Handle other cost kinds. 172 if (CostKind != TTI::TCK_RecipThroughput) 173 return 1; 174 175 if (Opcode == Instruction::Store) 176 return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, 177 CostKind, OpInfo, I); 178 179 if (Src->isVectorTy()) { 180 VectorType *VecTy = cast<VectorType>(Src); 181 unsigned VecWidth = VecTy->getPrimitiveSizeInBits().getFixedValue(); 182 if (isHVXVectorType(VecTy)) { 183 unsigned RegWidth = 184 getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector) 185 .getFixedValue(); 186 assert(RegWidth && "Non-zero vector register width expected"); 187 // Cost of HVX loads. 188 if (VecWidth % RegWidth == 0) 189 return VecWidth / RegWidth; 190 // Cost of constructing HVX vector from scalar loads 191 const Align RegAlign(RegWidth / 8); 192 if (!Alignment || *Alignment > RegAlign) 193 Alignment = RegAlign; 194 assert(Alignment); 195 unsigned AlignWidth = 8 * Alignment->value(); 196 unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth; 197 return 3 * NumLoads; 198 } 199 200 // Non-HVX vectors. 201 // Add extra cost for floating point types. 202 unsigned Cost = 203 VecTy->getElementType()->isFloatingPointTy() ? FloatFactor : 1; 204 205 // At this point unspecified alignment is considered as Align(1). 206 const Align BoundAlignment = std::min(Alignment.valueOrOne(), Align(8)); 207 unsigned AlignWidth = 8 * BoundAlignment.value(); 208 unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth; 209 if (Alignment == Align(4) || Alignment == Align(8)) 210 return Cost * NumLoads; 211 // Loads of less than 32 bits will need extra inserts to compose a vector. 212 assert(BoundAlignment <= Align(8)); 213 unsigned LogA = Log2(BoundAlignment); 214 return (3 - LogA) * Cost * NumLoads; 215 } 216 217 return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind, 218 OpInfo, I); 219 } 220 221 InstructionCost 222 HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, 223 Align Alignment, unsigned AddressSpace, 224 TTI::TargetCostKind CostKind) { 225 return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, 226 CostKind); 227 } 228 229 InstructionCost HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, 230 ArrayRef<int> Mask, 231 TTI::TargetCostKind CostKind, 232 int Index, Type *SubTp, 233 ArrayRef<const Value *> Args, 234 const Instruction *CxtI) { 235 return 1; 236 } 237 238 InstructionCost HexagonTTIImpl::getGatherScatterOpCost( 239 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, 240 Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) { 241 return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, 242 Alignment, CostKind, I); 243 } 244 245 InstructionCost HexagonTTIImpl::getInterleavedMemoryOpCost( 246 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, 247 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, 248 bool UseMaskForCond, bool UseMaskForGaps) { 249 if (Indices.size() != Factor || UseMaskForCond || UseMaskForGaps) 250 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, 251 Alignment, AddressSpace, 252 CostKind, 253 UseMaskForCond, UseMaskForGaps); 254 return getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace, 255 CostKind); 256 } 257 258 InstructionCost HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, 259 Type *CondTy, 260 CmpInst::Predicate VecPred, 261 TTI::TargetCostKind CostKind, 262 const Instruction *I) { 263 if (ValTy->isVectorTy() && CostKind == TTI::TCK_RecipThroughput) { 264 if (!isHVXVectorType(ValTy) && ValTy->isFPOrFPVectorTy()) 265 return InstructionCost::getMax(); 266 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy); 267 if (Opcode == Instruction::FCmp) 268 return LT.first + FloatFactor * getTypeNumElements(ValTy); 269 } 270 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); 271 } 272 273 InstructionCost HexagonTTIImpl::getArithmeticInstrCost( 274 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, 275 TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, 276 ArrayRef<const Value *> Args, 277 const Instruction *CxtI) { 278 // TODO: Handle more cost kinds. 279 if (CostKind != TTI::TCK_RecipThroughput) 280 return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, 281 Op2Info, Args, CxtI); 282 283 if (Ty->isVectorTy()) { 284 if (!isHVXVectorType(Ty) && Ty->isFPOrFPVectorTy()) 285 return InstructionCost::getMax(); 286 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty); 287 if (LT.second.isFloatingPoint()) 288 return LT.first + FloatFactor * getTypeNumElements(Ty); 289 } 290 return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info, 291 Args, CxtI); 292 } 293 294 InstructionCost HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy, 295 Type *SrcTy, 296 TTI::CastContextHint CCH, 297 TTI::TargetCostKind CostKind, 298 const Instruction *I) { 299 auto isNonHVXFP = [this] (Type *Ty) { 300 return Ty->isVectorTy() && !isHVXVectorType(Ty) && Ty->isFPOrFPVectorTy(); 301 }; 302 if (isNonHVXFP(SrcTy) || isNonHVXFP(DstTy)) 303 return InstructionCost::getMax(); 304 305 if (SrcTy->isFPOrFPVectorTy() || DstTy->isFPOrFPVectorTy()) { 306 unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0; 307 unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(DstTy) : 0; 308 309 std::pair<InstructionCost, MVT> SrcLT = getTypeLegalizationCost(SrcTy); 310 std::pair<InstructionCost, MVT> DstLT = getTypeLegalizationCost(DstTy); 311 InstructionCost Cost = 312 std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN); 313 // TODO: Allow non-throughput costs that aren't binary. 314 if (CostKind != TTI::TCK_RecipThroughput) 315 return Cost == 0 ? 0 : 1; 316 return Cost; 317 } 318 return 1; 319 } 320 321 InstructionCost HexagonTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, 322 TTI::TargetCostKind CostKind, 323 unsigned Index, Value *Op0, 324 Value *Op1) { 325 Type *ElemTy = Val->isVectorTy() ? cast<VectorType>(Val)->getElementType() 326 : Val; 327 if (Opcode == Instruction::InsertElement) { 328 // Need two rotations for non-zero index. 329 unsigned Cost = (Index != 0) ? 2 : 0; 330 if (ElemTy->isIntegerTy(32)) 331 return Cost; 332 // If it's not a 32-bit value, there will need to be an extract. 333 return Cost + getVectorInstrCost(Instruction::ExtractElement, Val, CostKind, 334 Index, Op0, Op1); 335 } 336 337 if (Opcode == Instruction::ExtractElement) 338 return 2; 339 340 return 1; 341 } 342 343 bool HexagonTTIImpl::isLegalMaskedStore(Type *DataType, Align /*Alignment*/) { 344 // This function is called from scalarize-masked-mem-intrin, which runs 345 // in pre-isel. Use ST directly instead of calling isHVXVectorType. 346 return HexagonMaskedVMem && ST.isTypeForHVX(DataType); 347 } 348 349 bool HexagonTTIImpl::isLegalMaskedLoad(Type *DataType, Align /*Alignment*/) { 350 // This function is called from scalarize-masked-mem-intrin, which runs 351 // in pre-isel. Use ST directly instead of calling isHVXVectorType. 352 return HexagonMaskedVMem && ST.isTypeForHVX(DataType); 353 } 354 355 /// --- Vector TTI end --- 356 357 unsigned HexagonTTIImpl::getPrefetchDistance() const { 358 return ST.getL1PrefetchDistance(); 359 } 360 361 unsigned HexagonTTIImpl::getCacheLineSize() const { 362 return ST.getL1CacheLineSize(); 363 } 364 365 InstructionCost 366 HexagonTTIImpl::getInstructionCost(const User *U, 367 ArrayRef<const Value *> Operands, 368 TTI::TargetCostKind CostKind) { 369 auto isCastFoldedIntoLoad = [this](const CastInst *CI) -> bool { 370 if (!CI->isIntegerCast()) 371 return false; 372 // Only extensions from an integer type shorter than 32-bit to i32 373 // can be folded into the load. 374 const DataLayout &DL = getDataLayout(); 375 unsigned SBW = DL.getTypeSizeInBits(CI->getSrcTy()); 376 unsigned DBW = DL.getTypeSizeInBits(CI->getDestTy()); 377 if (DBW != 32 || SBW >= DBW) 378 return false; 379 380 const LoadInst *LI = dyn_cast<const LoadInst>(CI->getOperand(0)); 381 // Technically, this code could allow multiple uses of the load, and 382 // check if all the uses are the same extension operation, but this 383 // should be sufficient for most cases. 384 return LI && LI->hasOneUse(); 385 }; 386 387 if (const CastInst *CI = dyn_cast<const CastInst>(U)) 388 if (isCastFoldedIntoLoad(CI)) 389 return TargetTransformInfo::TCC_Free; 390 return BaseT::getInstructionCost(U, Operands, CostKind); 391 } 392 393 bool HexagonTTIImpl::shouldBuildLookupTables() const { 394 return EmitLookupTables; 395 } 396