xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp (revision 5e801ac66d24704442eba426ed13c3effb8a34e7)
1 //===- R600TargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // \file
10 // This file implements a TargetTransformInfo analysis pass specific to the
11 // R600 target machine. It uses the target's detailed information to provide
12 // more precise answers to certain TTI queries, while letting the target
13 // independent and default TTI implementations handle the rest.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "R600TargetTransformInfo.h"
18 #include "AMDGPU.h"
19 #include "AMDGPUTargetMachine.h"
20 #include "R600Subtarget.h"
21 
22 using namespace llvm;
23 
24 #define DEBUG_TYPE "R600tti"
25 
26 R600TTIImpl::R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
27     : BaseT(TM, F.getParent()->getDataLayout()),
28       ST(static_cast<const R600Subtarget *>(TM->getSubtargetImpl(F))),
29       TLI(ST->getTargetLowering()), CommonTTI(TM, F) {}
30 
31 unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
32   return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
33 }
34 
35 unsigned R600TTIImpl::getNumberOfRegisters(bool Vec) const {
36   return getHardwareNumberOfRegisters(Vec);
37 }
38 
39 TypeSize
40 R600TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
41   return TypeSize::getFixed(32);
42 }
43 
44 unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const { return 32; }
45 
46 unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
47   if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
48       AddrSpace == AMDGPUAS::CONSTANT_ADDRESS)
49     return 128;
50   if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
51       AddrSpace == AMDGPUAS::REGION_ADDRESS)
52     return 64;
53   if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
54     return 32;
55 
56   if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS ||
57        AddrSpace == AMDGPUAS::PARAM_I_ADDRESS ||
58        (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&
59         AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))
60     return 128;
61   llvm_unreachable("unhandled address space");
62 }
63 
64 bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
65                                              Align Alignment,
66                                              unsigned AddrSpace) const {
67   // We allow vectorization of flat stores, even though we may need to decompose
68   // them later if they may access private memory. We don't have enough context
69   // here, and legalization can handle it.
70   return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS);
71 }
72 
73 bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
74                                               Align Alignment,
75                                               unsigned AddrSpace) const {
76   return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
77 }
78 
79 bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
80                                                Align Alignment,
81                                                unsigned AddrSpace) const {
82   return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
83 }
84 
85 unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) {
86   // Disable unrolling if the loop is not vectorized.
87   // TODO: Enable this again.
88   if (VF == 1)
89     return 1;
90 
91   return 8;
92 }
93 
94 InstructionCost R600TTIImpl::getCFInstrCost(unsigned Opcode,
95                                             TTI::TargetCostKind CostKind,
96                                             const Instruction *I) {
97   if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency)
98     return Opcode == Instruction::PHI ? 0 : 1;
99 
100   // XXX - For some reason this isn't called for switch.
101   switch (Opcode) {
102   case Instruction::Br:
103   case Instruction::Ret:
104     return 10;
105   default:
106     return BaseT::getCFInstrCost(Opcode, CostKind, I);
107   }
108 }
109 
110 InstructionCost R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
111                                                 unsigned Index) {
112   switch (Opcode) {
113   case Instruction::ExtractElement:
114   case Instruction::InsertElement: {
115     unsigned EltSize =
116         DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType());
117     if (EltSize < 32) {
118       return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
119     }
120 
121     // Extracts are just reads of a subregister, so are free. Inserts are
122     // considered free because we don't want to have any cost for scalarizing
123     // operations, and we don't have to copy into a different register class.
124 
125     // Dynamic indexing isn't free and is best avoided.
126     return Index == ~0u ? 2 : 0;
127   }
128   default:
129     return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
130   }
131 }
132 
133 void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
134                                           TTI::UnrollingPreferences &UP,
135                                           OptimizationRemarkEmitter *ORE) {
136   CommonTTI.getUnrollingPreferences(L, SE, UP, ORE);
137 }
138 
139 void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
140                                         TTI::PeelingPreferences &PP) {
141   CommonTTI.getPeelingPreferences(L, SE, PP);
142 }
143