1 //===- R600TargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // \file 10 // This file implements a TargetTransformInfo analysis pass specific to the 11 // R600 target machine. It uses the target's detailed information to provide 12 // more precise answers to certain TTI queries, while letting the target 13 // independent and default TTI implementations handle the rest. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "R600TargetTransformInfo.h" 18 #include "AMDGPU.h" 19 #include "AMDGPUTargetMachine.h" 20 #include "R600Subtarget.h" 21 22 using namespace llvm; 23 24 #define DEBUG_TYPE "R600tti" 25 26 R600TTIImpl::R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F) 27 : BaseT(TM, F.getDataLayout()), 28 ST(static_cast<const R600Subtarget *>(TM->getSubtargetImpl(F))), 29 TLI(ST->getTargetLowering()), CommonTTI(TM, F) {} 30 31 unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const { 32 return 4 * 128; // XXX - 4 channels. Should these count as vector instead? 33 } 34 35 unsigned R600TTIImpl::getNumberOfRegisters(bool Vec) const { 36 return getHardwareNumberOfRegisters(Vec); 37 } 38 39 TypeSize 40 R600TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { 41 return TypeSize::getFixed(32); 42 } 43 44 unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const { return 32; } 45 46 unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { 47 if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS || 48 AddrSpace == AMDGPUAS::CONSTANT_ADDRESS) 49 return 128; 50 if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS || 51 AddrSpace == AMDGPUAS::REGION_ADDRESS) 52 return 64; 53 if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) 54 return 32; 55 56 if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS || 57 AddrSpace == AMDGPUAS::PARAM_I_ADDRESS || 58 (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 && 59 AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15))) 60 return 128; 61 llvm_unreachable("unhandled address space"); 62 } 63 64 bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, 65 Align Alignment, 66 unsigned AddrSpace) const { 67 // We allow vectorization of flat stores, even though we may need to decompose 68 // them later if they may access private memory. We don't have enough context 69 // here, and legalization can handle it. 70 return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS); 71 } 72 73 bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, 74 Align Alignment, 75 unsigned AddrSpace) const { 76 return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace); 77 } 78 79 bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, 80 Align Alignment, 81 unsigned AddrSpace) const { 82 return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace); 83 } 84 85 unsigned R600TTIImpl::getMaxInterleaveFactor(ElementCount VF) { 86 // Disable unrolling if the loop is not vectorized. 87 // TODO: Enable this again. 88 if (VF.isScalar()) 89 return 1; 90 91 return 8; 92 } 93 94 InstructionCost R600TTIImpl::getCFInstrCost(unsigned Opcode, 95 TTI::TargetCostKind CostKind, 96 const Instruction *I) { 97 if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency) 98 return Opcode == Instruction::PHI ? 0 : 1; 99 100 // XXX - For some reason this isn't called for switch. 101 switch (Opcode) { 102 case Instruction::Br: 103 case Instruction::Ret: 104 return 10; 105 default: 106 return BaseT::getCFInstrCost(Opcode, CostKind, I); 107 } 108 } 109 110 InstructionCost R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, 111 TTI::TargetCostKind CostKind, 112 unsigned Index, Value *Op0, 113 Value *Op1) { 114 switch (Opcode) { 115 case Instruction::ExtractElement: 116 case Instruction::InsertElement: { 117 unsigned EltSize = 118 DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType()); 119 if (EltSize < 32) { 120 return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0, 121 Op1); 122 } 123 124 // Extracts are just reads of a subregister, so are free. Inserts are 125 // considered free because we don't want to have any cost for scalarizing 126 // operations, and we don't have to copy into a different register class. 127 128 // Dynamic indexing isn't free and is best avoided. 129 return Index == ~0u ? 2 : 0; 130 } 131 default: 132 return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0, Op1); 133 } 134 } 135 136 void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 137 TTI::UnrollingPreferences &UP, 138 OptimizationRemarkEmitter *ORE) { 139 CommonTTI.getUnrollingPreferences(L, SE, UP, ORE); 140 } 141 142 void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, 143 TTI::PeelingPreferences &PP) { 144 CommonTTI.getPeelingPreferences(L, SE, PP); 145 } 146