//===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file /// This file defines the WebAssembly-specific TargetTransformInfo /// implementation. /// //===----------------------------------------------------------------------===// #include "WebAssemblyTargetTransformInfo.h" #include "llvm/CodeGen/CostTable.h" #include "llvm/Support/Debug.h" using namespace llvm; #define DEBUG_TYPE "wasmtti" TargetTransformInfo::PopcntSupportKind WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); return TargetTransformInfo::PSK_FastHardware; } unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const { unsigned Result = BaseT::getNumberOfRegisters(ClassID); // For SIMD, use at least 16 registers, as a rough guess. bool Vector = (ClassID == 1); if (Vector) Result = std::max(Result, 16u); return Result; } TypeSize WebAssemblyTTIImpl::getRegisterBitWidth( TargetTransformInfo::RegisterKind K) const { switch (K) { case TargetTransformInfo::RGK_Scalar: return TypeSize::getFixed(64); case TargetTransformInfo::RGK_FixedWidthVector: return TypeSize::getFixed(getST()->hasSIMD128() ? 128 : 64); case TargetTransformInfo::RGK_ScalableVector: return TypeSize::getScalable(0); } llvm_unreachable("Unsupported register kind"); } InstructionCost WebAssemblyTTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args, const Instruction *CxtI) { InstructionCost Cost = BasicTTIImplBase::getArithmeticInstrCost( Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); if (auto *VTy = dyn_cast(Ty)) { switch (Opcode) { case Instruction::LShr: case Instruction::AShr: case Instruction::Shl: // SIMD128's shifts currently only accept a scalar shift count. For each // element, we'll need to extract, op, insert. The following is a rough // approxmation. if (Opd2Info != TTI::OK_UniformValue && Opd2Info != TTI::OK_UniformConstantValue) Cost = cast(VTy)->getNumElements() * (TargetTransformInfo::TCC_Basic + getArithmeticInstrCost(Opcode, VTy->getElementType(), CostKind) + TargetTransformInfo::TCC_Basic); break; } } return Cost; } InstructionCost WebAssemblyTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { InstructionCost Cost = BasicTTIImplBase::getVectorInstrCost(Opcode, Val, Index); // SIMD128's insert/extract currently only take constant indices. if (Index == -1u) return Cost + 25 * TargetTransformInfo::TCC_Expensive; return Cost; } bool WebAssemblyTTIImpl::areInlineCompatible(const Function *Caller, const Function *Callee) const { // Allow inlining only when the Callee has a subset of the Caller's // features. In principle, we should be able to inline regardless of any // features because WebAssembly supports features at module granularity, not // function granularity, but without this restriction it would be possible for // a module to "forget" about features if all the functions that used them // were inlined. const TargetMachine &TM = getTLI()->getTargetMachine(); const FeatureBitset &CallerBits = TM.getSubtargetImpl(*Caller)->getFeatureBits(); const FeatureBitset &CalleeBits = TM.getSubtargetImpl(*Callee)->getFeatureBits(); return (CallerBits & CalleeBits) == CalleeBits; } void WebAssemblyTTIImpl::getUnrollingPreferences( Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const { // Scan the loop: don't unroll loops with calls. This is a standard approach // for most (all?) targets. for (BasicBlock *BB : L->blocks()) for (Instruction &I : *BB) if (isa(I) || isa(I)) if (const Function *F = cast(I).getCalledFunction()) if (isLoweredToCall(F)) return; // The chosen threshold is within the range of 'LoopMicroOpBufferSize' of // the various microarchitectures that use the BasicTTI implementation and // has been selected through heuristics across multiple cores and runtimes. UP.Partial = UP.Runtime = UP.UpperBound = true; UP.PartialThreshold = 30; // Avoid unrolling when optimizing for size. UP.OptSizeThreshold = 0; UP.PartialOptSizeThreshold = 0; // Set number of instructions optimized when "back edge" // becomes "fall through" to default value of 2. UP.BEInsns = 2; }