1 //===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file defines the WebAssembly-specific TargetTransformInfo 11 /// implementation. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "WebAssemblyTargetTransformInfo.h" 16 #include "llvm/CodeGen/CostTable.h" 17 #include "llvm/Support/Debug.h" 18 using namespace llvm; 19 20 #define DEBUG_TYPE "wasmtti" 21 22 TargetTransformInfo::PopcntSupportKind 23 WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const { 24 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); 25 return TargetTransformInfo::PSK_FastHardware; 26 } 27 28 unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const { 29 unsigned Result = BaseT::getNumberOfRegisters(ClassID); 30 31 // For SIMD, use at least 16 registers, as a rough guess. 32 bool Vector = (ClassID == 1); 33 if (Vector) 34 Result = std::max(Result, 16u); 35 36 return Result; 37 } 38 39 TypeSize WebAssemblyTTIImpl::getRegisterBitWidth( 40 TargetTransformInfo::RegisterKind K) const { 41 switch (K) { 42 case TargetTransformInfo::RGK_Scalar: 43 return TypeSize::getFixed(64); 44 case TargetTransformInfo::RGK_FixedWidthVector: 45 return TypeSize::getFixed(getST()->hasSIMD128() ? 128 : 64); 46 case TargetTransformInfo::RGK_ScalableVector: 47 return TypeSize::getScalable(0); 48 } 49 50 llvm_unreachable("Unsupported register kind"); 51 } 52 53 InstructionCost WebAssemblyTTIImpl::getArithmeticInstrCost( 54 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, 55 TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, 56 ArrayRef<const Value *> Args, 57 const Instruction *CxtI) { 58 59 InstructionCost Cost = 60 BasicTTIImplBase<WebAssemblyTTIImpl>::getArithmeticInstrCost( 61 Opcode, Ty, CostKind, Op1Info, Op2Info); 62 63 if (auto *VTy = dyn_cast<VectorType>(Ty)) { 64 switch (Opcode) { 65 case Instruction::LShr: 66 case Instruction::AShr: 67 case Instruction::Shl: 68 // SIMD128's shifts currently only accept a scalar shift count. For each 69 // element, we'll need to extract, op, insert. The following is a rough 70 // approximation. 71 if (!Op2Info.isUniform()) 72 Cost = 73 cast<FixedVectorType>(VTy)->getNumElements() * 74 (TargetTransformInfo::TCC_Basic + 75 getArithmeticInstrCost(Opcode, VTy->getElementType(), CostKind) + 76 TargetTransformInfo::TCC_Basic); 77 break; 78 } 79 } 80 return Cost; 81 } 82 83 InstructionCost 84 WebAssemblyTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, 85 TTI::TargetCostKind CostKind, 86 unsigned Index, Value *Op0, Value *Op1) { 87 InstructionCost Cost = BasicTTIImplBase::getVectorInstrCost( 88 Opcode, Val, CostKind, Index, Op0, Op1); 89 90 // SIMD128's insert/extract currently only take constant indices. 91 if (Index == -1u) 92 return Cost + 25 * TargetTransformInfo::TCC_Expensive; 93 94 return Cost; 95 } 96 97 TTI::ReductionShuffle WebAssemblyTTIImpl::getPreferredExpandedReductionShuffle( 98 const IntrinsicInst *II) const { 99 100 switch (II->getIntrinsicID()) { 101 default: 102 break; 103 case Intrinsic::vector_reduce_fadd: 104 return TTI::ReductionShuffle::Pairwise; 105 } 106 return TTI::ReductionShuffle::SplitHalf; 107 } 108 109 bool WebAssemblyTTIImpl::areInlineCompatible(const Function *Caller, 110 const Function *Callee) const { 111 // Allow inlining only when the Callee has a subset of the Caller's 112 // features. In principle, we should be able to inline regardless of any 113 // features because WebAssembly supports features at module granularity, not 114 // function granularity, but without this restriction it would be possible for 115 // a module to "forget" about features if all the functions that used them 116 // were inlined. 117 const TargetMachine &TM = getTLI()->getTargetMachine(); 118 119 const FeatureBitset &CallerBits = 120 TM.getSubtargetImpl(*Caller)->getFeatureBits(); 121 const FeatureBitset &CalleeBits = 122 TM.getSubtargetImpl(*Callee)->getFeatureBits(); 123 124 return (CallerBits & CalleeBits) == CalleeBits; 125 } 126 127 void WebAssemblyTTIImpl::getUnrollingPreferences( 128 Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, 129 OptimizationRemarkEmitter *ORE) const { 130 // Scan the loop: don't unroll loops with calls. This is a standard approach 131 // for most (all?) targets. 132 for (BasicBlock *BB : L->blocks()) 133 for (Instruction &I : *BB) 134 if (isa<CallInst>(I) || isa<InvokeInst>(I)) 135 if (const Function *F = cast<CallBase>(I).getCalledFunction()) 136 if (isLoweredToCall(F)) 137 return; 138 139 // The chosen threshold is within the range of 'LoopMicroOpBufferSize' of 140 // the various microarchitectures that use the BasicTTI implementation and 141 // has been selected through heuristics across multiple cores and runtimes. 142 UP.Partial = UP.Runtime = UP.UpperBound = true; 143 UP.PartialThreshold = 30; 144 145 // Avoid unrolling when optimizing for size. 146 UP.OptSizeThreshold = 0; 147 UP.PartialOptSizeThreshold = 0; 148 149 // Set number of instructions optimized when "back edge" 150 // becomes "fall through" to default value of 2. 151 UP.BEInsns = 2; 152 } 153 154 bool WebAssemblyTTIImpl::supportsTailCalls() const { 155 return getST()->hasTailCall(); 156 } 157