xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp (revision 454322c08b8aa181939c8d920472f03cfd591032)
1 //===- R600TargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // \file
10 // This file implements a TargetTransformInfo analysis pass specific to the
11 // R600 target machine. It uses the target's detailed information to provide
12 // more precise answers to certain TTI queries, while letting the target
13 // independent and default TTI implementations handle the rest.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "R600TargetTransformInfo.h"
18 #include "AMDGPU.h"
19 #include "AMDGPUTargetMachine.h"
20 #include "R600Subtarget.h"
21 
22 using namespace llvm;
23 
24 #define DEBUG_TYPE "R600tti"
25 
26 R600TTIImpl::R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
27     : BaseT(TM, F.getDataLayout()),
28       ST(static_cast<const R600Subtarget *>(TM->getSubtargetImpl(F))),
29       TLI(ST->getTargetLowering()), CommonTTI(TM, F) {}
30 
31 unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
32   return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
33 }
34 
35 unsigned R600TTIImpl::getNumberOfRegisters(unsigned ClassID) const {
36   bool Vec = ClassID == 1;
37   return getHardwareNumberOfRegisters(Vec);
38 }
39 
40 TypeSize
41 R600TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
42   return TypeSize::getFixed(32);
43 }
44 
45 unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const { return 32; }
46 
47 unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
48   if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
49       AddrSpace == AMDGPUAS::CONSTANT_ADDRESS)
50     return 128;
51   if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
52       AddrSpace == AMDGPUAS::REGION_ADDRESS)
53     return 64;
54   if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
55     return 32;
56 
57   if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS ||
58        AddrSpace == AMDGPUAS::PARAM_I_ADDRESS ||
59        (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&
60         AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))
61     return 128;
62   llvm_unreachable("unhandled address space");
63 }
64 
65 bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
66                                              Align Alignment,
67                                              unsigned AddrSpace) const {
68   // We allow vectorization of flat stores, even though we may need to decompose
69   // them later if they may access private memory. We don't have enough context
70   // here, and legalization can handle it.
71   return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS);
72 }
73 
74 bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
75                                               Align Alignment,
76                                               unsigned AddrSpace) const {
77   return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
78 }
79 
80 bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
81                                                Align Alignment,
82                                                unsigned AddrSpace) const {
83   return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
84 }
85 
86 unsigned R600TTIImpl::getMaxInterleaveFactor(ElementCount VF) const {
87   // Disable unrolling if the loop is not vectorized.
88   // TODO: Enable this again.
89   if (VF.isScalar())
90     return 1;
91 
92   return 8;
93 }
94 
95 InstructionCost R600TTIImpl::getCFInstrCost(unsigned Opcode,
96                                             TTI::TargetCostKind CostKind,
97                                             const Instruction *I) const {
98   if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency)
99     return Opcode == Instruction::PHI ? 0 : 1;
100 
101   // XXX - For some reason this isn't called for switch.
102   switch (Opcode) {
103   case Instruction::Br:
104   case Instruction::Ret:
105     return 10;
106   default:
107     return BaseT::getCFInstrCost(Opcode, CostKind, I);
108   }
109 }
110 
111 InstructionCost R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
112                                                 TTI::TargetCostKind CostKind,
113                                                 unsigned Index,
114                                                 const Value *Op0,
115                                                 const Value *Op1) const {
116   switch (Opcode) {
117   case Instruction::ExtractElement:
118   case Instruction::InsertElement: {
119     unsigned EltSize =
120         DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType());
121     if (EltSize < 32) {
122       return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0,
123                                        Op1);
124     }
125 
126     // Extracts are just reads of a subregister, so are free. Inserts are
127     // considered free because we don't want to have any cost for scalarizing
128     // operations, and we don't have to copy into a different register class.
129 
130     // Dynamic indexing isn't free and is best avoided.
131     return Index == ~0u ? 2 : 0;
132   }
133   default:
134     return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0, Op1);
135   }
136 }
137 
138 void R600TTIImpl::getUnrollingPreferences(
139     Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP,
140     OptimizationRemarkEmitter *ORE) const {
141   CommonTTI.getUnrollingPreferences(L, SE, UP, ORE);
142 }
143 
144 void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
145                                         TTI::PeelingPreferences &PP) const {
146   CommonTTI.getPeelingPreferences(L, SE, PP);
147 }
148