AMDGPUSubtarget.cpp - OpenGrok cross reference for /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Lines Matching +full:dim +full:- +full:mode
1 //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
37 #define DEBUG_TYPE "amdgpu-subtarget"
46   "amdgpu-enable-power-sched",
51   "amdgpu-vgpr-index-mode",
52   cl::desc("Use GPR indexing mode instead of movrel for vector indexing"),
55 static cl::opt<bool> UseAA("amdgpu-use-aa-in-codegen",
59 static cl::opt<unsigned> NSAThreshold("amdgpu-nsa-threshold",
68   // Determine default and user-specified characteristics  in initializeSubtargetDependencies()
74   // Similarly we want enable-prt-strict-null to be on by default and not to  in initializeSubtargetDependencies()
77   SmallString<256> FullFS("+promote-alloca,+load-store-opt,+enable-ds128,");  in initializeSubtargetDependencies()
81     FullFS += "+flat-for-global,+unaligned-access-mode,+trap-handler,";  in initializeSubtargetDependencies()
83   FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS  in initializeSubtargetDependencies()
88       FullFS += "-wavefrontsize16,";  in initializeSubtargetDependencies()
90       FullFS += "-wavefrontsize32,";  in initializeSubtargetDependencies()
92       FullFS += "-wavefrontsize64,";  in initializeSubtargetDependencies()
100   // generation features are enabled (e.g for -mcpu=''). HSA OS defaults to  in initializeSubtargetDependencies()
119   // Targets must either support 64-bit offsets for MUBUF instructions, and/or  in initializeSubtargetDependencies()
120   // support flat operations, otherwise they cannot access a 64-bit global  in initializeSubtargetDependencies()
123   // Unless +-flat-for-global is specified, turn on FlatForGlobal for targets  in initializeSubtargetDependencies()
127   if (!hasAddr64() && !FS.contains("flat-for-global") && !FlatForGlobal) {  in initializeSubtargetDependencies()
131   // Unless +-flat-for-global is specified, use MUBUF instructions for global  in initializeSubtargetDependencies()
133   if (!hasFlat() && !FS.contains("flat-for-global") && FlatForGlobal) {  in initializeSubtargetDependencies()
194     : // clang-format off  in GCNSubtarget()
203   // clang-format on  in GCNSubtarget()
320     // On gfx10, all 16-bit instructions preserve the high bits.  in zeroesHigh16BitsOfDest()
330     // In gfx9, the preferred handling of the unused high 16-bits changed. Most  in zeroesHigh16BitsOfDest()
341 // Returns the maximum per-workgroup LDS allocation size (in bytes) that still
350       std::max(1u, (WorkGroupSize + WaveSize - 1) / WaveSize);  in getMaxLocalMemSizeWithWaveCount()
363 // size, and the per-workgroup LDS allocation size.
406   return getOccupancyWithLocalMemSize(MFI->getLDSSize(), MF.getFunction());  in getOccupancyWithLocalMemSize()
432     F, "amdgpu-flat-work-group-size", Default);  in getFlatWorkGroupSizes()
454   // "amdgpu-flat-workgroup-size" attribute, then set default minimum/maximum  in getEffectiveWavesPerEU()
485       AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu", Default, true);  in getWavesPerEU()
489 static unsigned getReqdWorkGroupSize(const Function &Kernel, unsigned Dim) {  in getReqdWorkGroupSize()  argument
491   if (Node && Node->getNumOperands() == 3)  in getReqdWorkGroupSize()
492     return mdconst::extract<ConstantInt>(Node->getOperand(Dim))->getZExtValue();  in getReqdWorkGroupSize()
504     return ReqdSize - 1;  in getMaxWorkitemID()
505   return getFlatWorkGroupSizes(Kernel).second - 1;  in getMaxWorkitemID()
518   Function *Kernel = I->getParent()->getParent();  in makeLIDRangeMetadata()
525     const Function *F = CI->getCalledFunction();  in makeLIDRangeMetadata()
527       unsigned Dim = UINT_MAX;  in makeLIDRangeMetadata()  local
528       switch (F->getIntrinsicID()) {  in makeLIDRangeMetadata()
534         Dim = 0;  in makeLIDRangeMetadata()
541         Dim = 1;  in makeLIDRangeMetadata()
548         Dim = 2;  in makeLIDRangeMetadata()
554       if (Dim <= 3) {  in makeLIDRangeMetadata()
555         unsigned ReqdSize = getReqdWorkGroupSize(*Kernel, Dim);  in makeLIDRangeMetadata()
576     CI->addRangeRetAttr(Range);  in makeLIDRangeMetadata()
578     MDBuilder MDB(I->getContext());  in makeLIDRangeMetadata()
580     I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);  in makeLIDRangeMetadata()
590   if (F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))  in getImplicitArgNumBytes()
600   return F.getFnAttributeAsParsedInteger("amdgpu-implicitarg-num-bytes",  in getImplicitArgNumBytes()
682   return InstrInfo.pseudoToMCOpcode(AMDGPU::V_MAD_F16_e64) != -1;  in hasMadF16()
752   // "amdgpu-num-sgpr" attribute.  in getBaseMaxNumSGPRs()
753   if (F.hasFnAttribute("amdgpu-num-sgpr")) {  in getBaseMaxNumSGPRs()
755         F.getFnAttributeAsParsedInteger("amdgpu-num-sgpr", MaxNumSGPRs);  in getBaseMaxNumSGPRs()
765     // of reserved special registers in total. Theoretically you could re-use  in getBaseMaxNumSGPRs()
787   return std::min(MaxNumSGPRs - ReservedNumSGPRs, MaxAddressableNumSGPRs);  in getBaseMaxNumSGPRs()
834   // "amdgpu-num-vgpr" attribute.  in getBaseMaxNumVGPRs()
835   if (F.hasFnAttribute("amdgpu-num-vgpr")) {  in getBaseMaxNumVGPRs()
837         F.getFnAttributeAsParsedInteger("amdgpu-num-vgpr", MaxNumVGPRs);  in getBaseMaxNumVGPRs()
871       !Def->isInstr() || !Use->isInstr())  in adjustSchedDependency()
874   MachineInstr *DefI = Def->getInstr();  in adjustSchedDependency()
875   MachineInstr *UseI = Use->getInstr();  in adjustSchedDependency()
877   if (DefI->isBundle()) {  in adjustSchedDependency()
880     MachineBasicBlock::const_instr_iterator I(DefI->getIterator());  in adjustSchedDependency()
881     MachineBasicBlock::const_instr_iterator E(DefI->getParent()->instr_end());  in adjustSchedDependency()
883     for (++I; I != E && I->isBundledWithPred(); ++I) {  in adjustSchedDependency()
884       if (I->modifiesRegister(Reg, TRI))  in adjustSchedDependency()
887         --Lat;  in adjustSchedDependency()
890   } else if (UseI->isBundle()) {  in adjustSchedDependency()
893     MachineBasicBlock::const_instr_iterator I(UseI->getIterator());  in adjustSchedDependency()
894     MachineBasicBlock::const_instr_iterator E(UseI->getParent()->instr_end());  in adjustSchedDependency()
896     for (++I; I != E && I->isBundledWithPred() && Lat; ++I) {  in adjustSchedDependency()
897       if (I->readsRegister(Reg, TRI))  in adjustSchedDependency()
899       --Lat;  in adjustSchedDependency()
921     const MachineInstr *MI = SU->getInstr();  in isSALU()
922     return MI && TII->isSALU(*MI) && !MI->isTerminator();  in isSALU()
926     const MachineInstr *MI = SU->getInstr();  in isVALU()
927     return MI && TII->isVALU(*MI);  in isVALU()
937     while (!Worklist.empty() && MaxChain-- > 0) {  in linkSALUChain()
942       LLVM_DEBUG(dbgs() << "Inserting edge from\n" ; DAG->dumpNode(*From);  in linkSALUChain()
943                  dbgs() << "to\n"; DAG->dumpNode(*SU); dbgs() << '\n');  in linkSALUChain()
945       if (SU != From && From != &DAG->ExitSU && DAG->canAddEdge(SU, From))  in linkSALUChain()
946         if (DAG->addEdge(SU, SDep(From, SDep::Artificial)))  in linkSALUChain()
949       for (SDep &SI : From->Succs) {  in linkSALUChain()
951         if (SUv != From && SU != &DAG->ExitSU && isVALU(SUv) &&  in linkSALUChain()
952             DAG->canAddEdge(SUv, SU))  in linkSALUChain()
953           DAG->addEdge(SUv, SDep(SU, SDep::Artificial));  in linkSALUChain()
956       for (SDep &SI : SU->Succs) {  in linkSALUChain()
967     const GCNSubtarget &ST = DAGInstrs->MF.getSubtarget<GCNSubtarget>();  in apply()
971     const TargetSchedModel *TSchedModel = DAGInstrs->getSchedModel();  in apply()
972     if (!TSchedModel || DAG->SUnits.empty())  in apply()
979     auto LastSALU = DAG->SUnits.begin();  in apply()
980     auto E = DAG->SUnits.end();  in apply()
982     for (SUnit &SU : DAG->SUnits) {  in apply()
984       if (!TII->isMAI(MAI) ||  in apply()
989       unsigned Lat = TSchedModel->computeInstrLatency(&MAI) - 1;  in apply()
991       LLVM_DEBUG(dbgs() << "Found MFMA: "; DAG->dumpNode(SU);  in apply()
1001         if (&SU == &DAG->ExitSU || &SU == &*LastSALU || !isSALU(&*LastSALU) ||  in apply()
1002             !DAG->canAddEdge(&*LastSALU, &SU))  in apply()
1005         Lat -= linkSALUChain(&SU, &*LastSALU, Lat, Visited);  in apply()
1031       "amdgpu-nsa-threshold", -1);  in getNSAThreshold()
1059   const bool HasCalls = F.hasFnAttribute("amdgpu-calls");  in GCNUserSGPRUsageInfo()
1062   const bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");  in GCNUserSGPRUsageInfo()
1074     if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr"))  in GCNUserSGPRUsageInfo()
1078     if (!F.hasFnAttribute("amdgpu-no-queue-ptr"))  in GCNUserSGPRUsageInfo()
1081     if (!F.hasFnAttribute("amdgpu-no-dispatch-id"))  in GCNUserSGPRUsageInfo()
1127   return AMDGPU::getMaxNumUserSGPRs(ST) - NumUsedUserSGPRs;  in getNumFreeUserSGPRs()
1132   return AMDGPU::getIntegerVecAttribute(F, "amdgpu-max-num-workgroups", 3);  in getMaxNumWorkGroups()