1 //===- R600TargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // \file 10 // This file implements a TargetTransformInfo analysis pass specific to the 11 // R600 target machine. It uses the target's detailed information to provide 12 // more precise answers to certain TTI queries, while letting the target 13 // independent and default TTI implementations handle the rest. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "R600TargetTransformInfo.h" 18 #include "AMDGPU.h" 19 #include "AMDGPUTargetMachine.h" 20 #include "R600Subtarget.h" 21 22 using namespace llvm; 23 24 #define DEBUG_TYPE "R600tti" 25 26 R600TTIImpl::R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F) 27 : BaseT(TM, F.getDataLayout()), 28 ST(static_cast<const R600Subtarget *>(TM->getSubtargetImpl(F))), 29 TLI(ST->getTargetLowering()), CommonTTI(TM, F) {} 30 31 unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const { 32 return 4 * 128; // XXX - 4 channels. Should these count as vector instead? 33 } 34 35 unsigned R600TTIImpl::getNumberOfRegisters(unsigned ClassID) const { 36 bool Vec = ClassID == 1; 37 return getHardwareNumberOfRegisters(Vec); 38 } 39 40 TypeSize 41 R600TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { 42 return TypeSize::getFixed(32); 43 } 44 45 unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const { return 32; } 46 47 unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { 48 if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS || 49 AddrSpace == AMDGPUAS::CONSTANT_ADDRESS) 50 return 128; 51 if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS || 52 AddrSpace == AMDGPUAS::REGION_ADDRESS) 53 return 64; 54 if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) 55 return 32; 56 57 if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS || 58 AddrSpace == AMDGPUAS::PARAM_I_ADDRESS || 59 (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 && 60 AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15))) 61 return 128; 62 llvm_unreachable("unhandled address space"); 63 } 64 65 bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, 66 Align Alignment, 67 unsigned AddrSpace) const { 68 // We allow vectorization of flat stores, even though we may need to decompose 69 // them later if they may access private memory. We don't have enough context 70 // here, and legalization can handle it. 71 return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS); 72 } 73 74 bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, 75 Align Alignment, 76 unsigned AddrSpace) const { 77 return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace); 78 } 79 80 bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, 81 Align Alignment, 82 unsigned AddrSpace) const { 83 return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace); 84 } 85 86 unsigned R600TTIImpl::getMaxInterleaveFactor(ElementCount VF) const { 87 // Disable unrolling if the loop is not vectorized. 88 // TODO: Enable this again. 89 if (VF.isScalar()) 90 return 1; 91 92 return 8; 93 } 94 95 InstructionCost R600TTIImpl::getCFInstrCost(unsigned Opcode, 96 TTI::TargetCostKind CostKind, 97 const Instruction *I) const { 98 if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency) 99 return Opcode == Instruction::PHI ? 0 : 1; 100 101 // XXX - For some reason this isn't called for switch. 102 switch (Opcode) { 103 case Instruction::Br: 104 case Instruction::Ret: 105 return 10; 106 default: 107 return BaseT::getCFInstrCost(Opcode, CostKind, I); 108 } 109 } 110 111 InstructionCost R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, 112 TTI::TargetCostKind CostKind, 113 unsigned Index, 114 const Value *Op0, 115 const Value *Op1) const { 116 switch (Opcode) { 117 case Instruction::ExtractElement: 118 case Instruction::InsertElement: { 119 unsigned EltSize = 120 DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType()); 121 if (EltSize < 32) { 122 return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0, 123 Op1); 124 } 125 126 // Extracts are just reads of a subregister, so are free. Inserts are 127 // considered free because we don't want to have any cost for scalarizing 128 // operations, and we don't have to copy into a different register class. 129 130 // Dynamic indexing isn't free and is best avoided. 131 return Index == ~0u ? 2 : 0; 132 } 133 default: 134 return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0, Op1); 135 } 136 } 137 138 void R600TTIImpl::getUnrollingPreferences( 139 Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, 140 OptimizationRemarkEmitter *ORE) const { 141 CommonTTI.getUnrollingPreferences(L, SE, UP, ORE); 142 } 143 144 void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, 145 TTI::PeelingPreferences &PP) const { 146 CommonTTI.getPeelingPreferences(L, SE, PP); 147 } 148