Lines Matching +full:wait +full:- +full:state
1 //===- AMDGPUInsertDelayAlu.cpp - Insert s_delay_alu instructions ---------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
22 #define DEBUG_TYPE "amdgpu-insert-delay-alu"
44 // These instruction types wait for VA_VDST==0 before issuing. in instructionWaitsForVALU()
74 // regunit. In straight-line code there will only be one such instruction, but
76 // to represent the union of the worst-case delays of each type.
78 // One larger than the maximum number of (non-TRANS) VALU instructions we
90 // If it was written by a (non-TRANS) VALU, remember how many clock cycles
91 // are left until it completes, and how many other (non-TRANS) VALU we have
101 // Also remember how many other (non-TRANS) VALU we have seen since it was
103 // non-TRANS VALU, this is used to decide whether to encode a wait for just
127 // Guard against pseudo-instructions like SI_CALL which are marked as in DelayInfo()
143 // worst-case delays of each type.
154 // when issuing a (non-TRANS) VALU, else 0. IsTRANS should be 1 when issuing
167 VALUCycles -= Cycles; in advance()
180 TRANSCycles -= Cycles; in advance()
189 SALUCycles -= Cycles; in advance()
224 It->second.merge(KV.second); in merge()
234 if (I->second.advance(Type, Cycles)) in advance()
252 return A->first < B->first; in dump()
255 dbgs() << " " << printRegUnit(I->first, TRI); in dump()
256 I->second.dump(); in dump()
263 // The saved delay state at the end of each basic block.
271 // Wait for a TRANS instruction. in emitDelayAlu()
275 // Wait for a VALU instruction (if it's more recent than any TRANS in emitDelayAlu()
285 // Wait for an SALU instruction. in emitDelayAlu()
298 // Don't emit the s_delay_alu instruction if there's nothing to wait for. in emitDelayAlu()
302 // If we only need to wait for one instruction, try encoding it in the last in emitDelayAlu()
309 if (!I->isBundle() && !I->isMetaInstruction()) in emitDelayAlu()
313 MachineOperand &Op = LastDelayAlu->getOperand(0); in emitDelayAlu()
325 BuildMI(MBB, MI, DebugLoc(), SII->get(AMDGPU::S_DELAY_ALU)).addImm(Imm); in emitDelayAlu()
332 DelayState State; in runOnMachineBasicBlock() local
334 State.merge(BlockState[Pred]); in runOnMachineBasicBlock()
336 LLVM_DEBUG(dbgs() << " State at start of " << printMBBReference(MBB) in runOnMachineBasicBlock()
338 State.dump(TRI);); in runOnMachineBasicBlock()
360 State = DelayState(); in runOnMachineBasicBlock()
371 for (MCRegUnit Unit : TRI->regunits(Op.getReg())) { in runOnMachineBasicBlock()
372 auto It = State.find(Unit); in runOnMachineBasicBlock()
373 if (It != State.end()) { in runOnMachineBasicBlock()
374 Delay.merge(It->second); in runOnMachineBasicBlock()
375 State.erase(Unit); in runOnMachineBasicBlock()
381 // TODO: For VALU->SALU delays should we use s_delay_alu or s_nop or in runOnMachineBasicBlock()
392 for (MCRegUnit Unit : TRI->regunits(Op.getReg())) in runOnMachineBasicBlock()
393 State[Unit] = DelayInfo(Type, Latency); in runOnMachineBasicBlock()
404 State.advance(Type, Cycles); in runOnMachineBasicBlock()
406 LLVM_DEBUG(dbgs() << " State after " << MI; State.dump(TRI);); in runOnMachineBasicBlock()
410 assert(State == BlockState[&MBB] && in runOnMachineBasicBlock()
411 "Basic block state should not have changed on final pass!"); in runOnMachineBasicBlock()
412 } else if (State != BlockState[&MBB]) { in runOnMachineBasicBlock()
413 BlockState[&MBB] = std::move(State); in runOnMachineBasicBlock()
435 // Calculate the delay state for each basic block, iterating until we reach in runOnMachineFunction()