1349cc55cSDimitry Andric //===- R600TargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===//
2349cc55cSDimitry Andric //
3349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6349cc55cSDimitry Andric //
7349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
8349cc55cSDimitry Andric //
9349cc55cSDimitry Andric // \file
10349cc55cSDimitry Andric // This file implements a TargetTransformInfo analysis pass specific to the
11349cc55cSDimitry Andric // R600 target machine. It uses the target's detailed information to provide
12349cc55cSDimitry Andric // more precise answers to certain TTI queries, while letting the target
13349cc55cSDimitry Andric // independent and default TTI implementations handle the rest.
14349cc55cSDimitry Andric //
15349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
16349cc55cSDimitry Andric
17349cc55cSDimitry Andric #include "R600TargetTransformInfo.h"
18349cc55cSDimitry Andric #include "AMDGPU.h"
19349cc55cSDimitry Andric #include "AMDGPUTargetMachine.h"
20349cc55cSDimitry Andric #include "R600Subtarget.h"
21349cc55cSDimitry Andric
22349cc55cSDimitry Andric using namespace llvm;
23349cc55cSDimitry Andric
24349cc55cSDimitry Andric #define DEBUG_TYPE "R600tti"
25349cc55cSDimitry Andric
R600TTIImpl(const AMDGPUTargetMachine * TM,const Function & F)26349cc55cSDimitry Andric R600TTIImpl::R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
27*0fca6ea1SDimitry Andric : BaseT(TM, F.getDataLayout()),
28349cc55cSDimitry Andric ST(static_cast<const R600Subtarget *>(TM->getSubtargetImpl(F))),
29349cc55cSDimitry Andric TLI(ST->getTargetLowering()), CommonTTI(TM, F) {}
30349cc55cSDimitry Andric
getHardwareNumberOfRegisters(bool Vec) const31349cc55cSDimitry Andric unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
32349cc55cSDimitry Andric return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
33349cc55cSDimitry Andric }
34349cc55cSDimitry Andric
getNumberOfRegisters(bool Vec) const35349cc55cSDimitry Andric unsigned R600TTIImpl::getNumberOfRegisters(bool Vec) const {
36349cc55cSDimitry Andric return getHardwareNumberOfRegisters(Vec);
37349cc55cSDimitry Andric }
38349cc55cSDimitry Andric
39349cc55cSDimitry Andric TypeSize
getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const40349cc55cSDimitry Andric R600TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
41349cc55cSDimitry Andric return TypeSize::getFixed(32);
42349cc55cSDimitry Andric }
43349cc55cSDimitry Andric
getMinVectorRegisterBitWidth() const44349cc55cSDimitry Andric unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const { return 32; }
45349cc55cSDimitry Andric
getLoadStoreVecRegBitWidth(unsigned AddrSpace) const46349cc55cSDimitry Andric unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
47349cc55cSDimitry Andric if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
48349cc55cSDimitry Andric AddrSpace == AMDGPUAS::CONSTANT_ADDRESS)
49349cc55cSDimitry Andric return 128;
50349cc55cSDimitry Andric if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
51349cc55cSDimitry Andric AddrSpace == AMDGPUAS::REGION_ADDRESS)
52349cc55cSDimitry Andric return 64;
53349cc55cSDimitry Andric if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
54349cc55cSDimitry Andric return 32;
55349cc55cSDimitry Andric
56349cc55cSDimitry Andric if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS ||
57349cc55cSDimitry Andric AddrSpace == AMDGPUAS::PARAM_I_ADDRESS ||
58349cc55cSDimitry Andric (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&
59349cc55cSDimitry Andric AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))
60349cc55cSDimitry Andric return 128;
61349cc55cSDimitry Andric llvm_unreachable("unhandled address space");
62349cc55cSDimitry Andric }
63349cc55cSDimitry Andric
isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,Align Alignment,unsigned AddrSpace) const64349cc55cSDimitry Andric bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
65349cc55cSDimitry Andric Align Alignment,
66349cc55cSDimitry Andric unsigned AddrSpace) const {
67349cc55cSDimitry Andric // We allow vectorization of flat stores, even though we may need to decompose
68349cc55cSDimitry Andric // them later if they may access private memory. We don't have enough context
69349cc55cSDimitry Andric // here, and legalization can handle it.
70349cc55cSDimitry Andric return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS);
71349cc55cSDimitry Andric }
72349cc55cSDimitry Andric
isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,Align Alignment,unsigned AddrSpace) const73349cc55cSDimitry Andric bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
74349cc55cSDimitry Andric Align Alignment,
75349cc55cSDimitry Andric unsigned AddrSpace) const {
76349cc55cSDimitry Andric return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
77349cc55cSDimitry Andric }
78349cc55cSDimitry Andric
isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,Align Alignment,unsigned AddrSpace) const79349cc55cSDimitry Andric bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
80349cc55cSDimitry Andric Align Alignment,
81349cc55cSDimitry Andric unsigned AddrSpace) const {
82349cc55cSDimitry Andric return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
83349cc55cSDimitry Andric }
84349cc55cSDimitry Andric
getMaxInterleaveFactor(ElementCount VF)8506c3fb27SDimitry Andric unsigned R600TTIImpl::getMaxInterleaveFactor(ElementCount VF) {
86349cc55cSDimitry Andric // Disable unrolling if the loop is not vectorized.
87349cc55cSDimitry Andric // TODO: Enable this again.
8806c3fb27SDimitry Andric if (VF.isScalar())
89349cc55cSDimitry Andric return 1;
90349cc55cSDimitry Andric
91349cc55cSDimitry Andric return 8;
92349cc55cSDimitry Andric }
93349cc55cSDimitry Andric
getCFInstrCost(unsigned Opcode,TTI::TargetCostKind CostKind,const Instruction * I)94349cc55cSDimitry Andric InstructionCost R600TTIImpl::getCFInstrCost(unsigned Opcode,
95349cc55cSDimitry Andric TTI::TargetCostKind CostKind,
96349cc55cSDimitry Andric const Instruction *I) {
97349cc55cSDimitry Andric if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency)
98349cc55cSDimitry Andric return Opcode == Instruction::PHI ? 0 : 1;
99349cc55cSDimitry Andric
100349cc55cSDimitry Andric // XXX - For some reason this isn't called for switch.
101349cc55cSDimitry Andric switch (Opcode) {
102349cc55cSDimitry Andric case Instruction::Br:
103349cc55cSDimitry Andric case Instruction::Ret:
104349cc55cSDimitry Andric return 10;
105349cc55cSDimitry Andric default:
106349cc55cSDimitry Andric return BaseT::getCFInstrCost(Opcode, CostKind, I);
107349cc55cSDimitry Andric }
108349cc55cSDimitry Andric }
109349cc55cSDimitry Andric
getVectorInstrCost(unsigned Opcode,Type * ValTy,TTI::TargetCostKind CostKind,unsigned Index,Value * Op0,Value * Op1)110349cc55cSDimitry Andric InstructionCost R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
111bdd1243dSDimitry Andric TTI::TargetCostKind CostKind,
112bdd1243dSDimitry Andric unsigned Index, Value *Op0,
113bdd1243dSDimitry Andric Value *Op1) {
114349cc55cSDimitry Andric switch (Opcode) {
115349cc55cSDimitry Andric case Instruction::ExtractElement:
116349cc55cSDimitry Andric case Instruction::InsertElement: {
117349cc55cSDimitry Andric unsigned EltSize =
118349cc55cSDimitry Andric DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType());
119349cc55cSDimitry Andric if (EltSize < 32) {
120bdd1243dSDimitry Andric return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0,
121bdd1243dSDimitry Andric Op1);
122349cc55cSDimitry Andric }
123349cc55cSDimitry Andric
124349cc55cSDimitry Andric // Extracts are just reads of a subregister, so are free. Inserts are
125349cc55cSDimitry Andric // considered free because we don't want to have any cost for scalarizing
126349cc55cSDimitry Andric // operations, and we don't have to copy into a different register class.
127349cc55cSDimitry Andric
128349cc55cSDimitry Andric // Dynamic indexing isn't free and is best avoided.
129349cc55cSDimitry Andric return Index == ~0u ? 2 : 0;
130349cc55cSDimitry Andric }
131349cc55cSDimitry Andric default:
132bdd1243dSDimitry Andric return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0, Op1);
133349cc55cSDimitry Andric }
134349cc55cSDimitry Andric }
135349cc55cSDimitry Andric
getUnrollingPreferences(Loop * L,ScalarEvolution & SE,TTI::UnrollingPreferences & UP,OptimizationRemarkEmitter * ORE)136349cc55cSDimitry Andric void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
137349cc55cSDimitry Andric TTI::UnrollingPreferences &UP,
138349cc55cSDimitry Andric OptimizationRemarkEmitter *ORE) {
139349cc55cSDimitry Andric CommonTTI.getUnrollingPreferences(L, SE, UP, ORE);
140349cc55cSDimitry Andric }
141349cc55cSDimitry Andric
getPeelingPreferences(Loop * L,ScalarEvolution & SE,TTI::PeelingPreferences & PP)142349cc55cSDimitry Andric void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
143349cc55cSDimitry Andric TTI::PeelingPreferences &PP) {
144349cc55cSDimitry Andric CommonTTI.getPeelingPreferences(L, SE, PP);
145349cc55cSDimitry Andric }
146