1 //===- R600TargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // \file
10 // This file implements a TargetTransformInfo analysis pass specific to the
11 // R600 target machine. It uses the target's detailed information to provide
12 // more precise answers to certain TTI queries, while letting the target
13 // independent and default TTI implementations handle the rest.
14 //
15 //===----------------------------------------------------------------------===//
16
17 #include "R600TargetTransformInfo.h"
18 #include "AMDGPU.h"
19 #include "AMDGPUTargetMachine.h"
20 #include "R600Subtarget.h"
21
22 using namespace llvm;
23
24 #define DEBUG_TYPE "R600tti"
25
R600TTIImpl(const AMDGPUTargetMachine * TM,const Function & F)26 R600TTIImpl::R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
27 : BaseT(TM, F.getDataLayout()),
28 ST(static_cast<const R600Subtarget *>(TM->getSubtargetImpl(F))),
29 TLI(ST->getTargetLowering()), CommonTTI(TM, F) {}
30
getHardwareNumberOfRegisters(bool Vec) const31 unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
32 return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
33 }
34
getNumberOfRegisters(bool Vec) const35 unsigned R600TTIImpl::getNumberOfRegisters(bool Vec) const {
36 return getHardwareNumberOfRegisters(Vec);
37 }
38
39 TypeSize
getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const40 R600TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
41 return TypeSize::getFixed(32);
42 }
43
getMinVectorRegisterBitWidth() const44 unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const { return 32; }
45
getLoadStoreVecRegBitWidth(unsigned AddrSpace) const46 unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
47 if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
48 AddrSpace == AMDGPUAS::CONSTANT_ADDRESS)
49 return 128;
50 if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
51 AddrSpace == AMDGPUAS::REGION_ADDRESS)
52 return 64;
53 if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
54 return 32;
55
56 if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS ||
57 AddrSpace == AMDGPUAS::PARAM_I_ADDRESS ||
58 (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&
59 AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))
60 return 128;
61 llvm_unreachable("unhandled address space");
62 }
63
isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,Align Alignment,unsigned AddrSpace) const64 bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
65 Align Alignment,
66 unsigned AddrSpace) const {
67 // We allow vectorization of flat stores, even though we may need to decompose
68 // them later if they may access private memory. We don't have enough context
69 // here, and legalization can handle it.
70 return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS);
71 }
72
isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,Align Alignment,unsigned AddrSpace) const73 bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
74 Align Alignment,
75 unsigned AddrSpace) const {
76 return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
77 }
78
isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,Align Alignment,unsigned AddrSpace) const79 bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
80 Align Alignment,
81 unsigned AddrSpace) const {
82 return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
83 }
84
getMaxInterleaveFactor(ElementCount VF)85 unsigned R600TTIImpl::getMaxInterleaveFactor(ElementCount VF) {
86 // Disable unrolling if the loop is not vectorized.
87 // TODO: Enable this again.
88 if (VF.isScalar())
89 return 1;
90
91 return 8;
92 }
93
getCFInstrCost(unsigned Opcode,TTI::TargetCostKind CostKind,const Instruction * I)94 InstructionCost R600TTIImpl::getCFInstrCost(unsigned Opcode,
95 TTI::TargetCostKind CostKind,
96 const Instruction *I) {
97 if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency)
98 return Opcode == Instruction::PHI ? 0 : 1;
99
100 // XXX - For some reason this isn't called for switch.
101 switch (Opcode) {
102 case Instruction::Br:
103 case Instruction::Ret:
104 return 10;
105 default:
106 return BaseT::getCFInstrCost(Opcode, CostKind, I);
107 }
108 }
109
getVectorInstrCost(unsigned Opcode,Type * ValTy,TTI::TargetCostKind CostKind,unsigned Index,Value * Op0,Value * Op1)110 InstructionCost R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
111 TTI::TargetCostKind CostKind,
112 unsigned Index, Value *Op0,
113 Value *Op1) {
114 switch (Opcode) {
115 case Instruction::ExtractElement:
116 case Instruction::InsertElement: {
117 unsigned EltSize =
118 DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType());
119 if (EltSize < 32) {
120 return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0,
121 Op1);
122 }
123
124 // Extracts are just reads of a subregister, so are free. Inserts are
125 // considered free because we don't want to have any cost for scalarizing
126 // operations, and we don't have to copy into a different register class.
127
128 // Dynamic indexing isn't free and is best avoided.
129 return Index == ~0u ? 2 : 0;
130 }
131 default:
132 return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0, Op1);
133 }
134 }
135
getUnrollingPreferences(Loop * L,ScalarEvolution & SE,TTI::UnrollingPreferences & UP,OptimizationRemarkEmitter * ORE)136 void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
137 TTI::UnrollingPreferences &UP,
138 OptimizationRemarkEmitter *ORE) {
139 CommonTTI.getUnrollingPreferences(L, SE, UP, ORE);
140 }
141
getPeelingPreferences(Loop * L,ScalarEvolution & SE,TTI::PeelingPreferences & PP)142 void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
143 TTI::PeelingPreferences &PP) {
144 CommonTTI.getPeelingPreferences(L, SE, PP);
145 }
146