Lines Matching +full:double +full:- +full:phase
1 //===-- SIModeRegister.cpp - Mode Register --------------------------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// Note that currently it only deals with the Double Precision Floating Point
14 //===----------------------------------------------------------------------===//
23 #define DEBUG_TYPE "si-mode-register"
82 // FirstInsertionPoint (if any) in this block. Calculated in Phase 1.
86 // this block, Calculated in Phase 1.
90 // block. Calculated in Phase 2.
94 // from all predecessor blocks. Calculated in Phase 2, and used by Phase 3.
97 // In Phase 1 we record the first instruction that has a mode requirement,
98 // which is used in Phase 3 if we need to insert a mode change.
118 // point double precision rounding mode.
163 // double precision setting.
166 if (TII->usesFPDPRounding(MI) || in getInstructionMode()
173 // f16 interpolation instructions need double precision round to zero in getInstructionMode()
178 if (TII->getSubtarget().hasTrue16BitInsts()) { in getInstructionMode()
181 MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_t16_e64)); in getInstructionMode()
185 B.add(Src0); // re-add src0 operand in getInstructionMode()
189 MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32)); in getInstructionMode()
195 if (TII->getSubtarget().hasTrue16BitInsts()) { in getInstructionMode()
198 MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_t16_e64)); in getInstructionMode()
202 B.add(Src0); // re-add src0 operand in getInstructionMode()
206 MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32)); in getInstructionMode()
227 unsigned Value = (InstrMode.Mode >> Offset) & ((1 << Width) - 1); in insertSetreg()
229 BuildMI(MBB, MI, nullptr, TII->get(AMDGPU::S_SETREG_IMM32_B32)) in insertSetreg()
234 InstrMode.Mask &= ~(((1 << Width) - 1) << Offset); in insertSetreg()
238 // In Phase 1 we iterate through the instructions of the block and for each
241 // - update the Change status, which tracks the changes to the Mode register
243 // - if this instruction's requirements are compatible with the current setting
245 // - if it isn't compatible and an InsertionPoint isn't set, then we set the
248 // - if it isn't compatible and InsertionPoint is set we insert a seteg before
250 // entry requirements in which case the insertion is deferred until Phase 3
253 // - if this is a setreg instruction we treat it as an incompatible instruction.
254 // This is sub-optimal but avoids some nasty corner cases, and is expected to
256 // - on exit we have set the Require, Change, and initial Exit modes.
263 // Phase 3. It is set to false once we have set FirstInsertionPoint, or when in processBlockPhase1()
277 unsigned Dst = TII->getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm(); in processBlockPhase1()
287 insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change)); in processBlockPhase1()
295 unsigned Val = TII->getNamedOperand(MI, AMDGPU::OpName::imm)->getImm(); in processBlockPhase1()
301 NewInfo->Change = NewInfo->Change.merge(Setreg); in processBlockPhase1()
303 NewInfo->Change = NewInfo->Change.mergeUnknown(Mask); in processBlockPhase1()
305 } else if (!NewInfo->Change.isCompatible(InstrMode)) { in processBlockPhase1()
312 if (!IPChange.delta(NewInfo->Change).isCombinable(InstrMode)) { in processBlockPhase1()
315 // the insertion of the setreg to Phase 3 where we know whether or in processBlockPhase1()
317 NewInfo->FirstInsertionPoint = InsertionPoint; in processBlockPhase1()
318 NewInfo->Require = NewInfo->Change; in processBlockPhase1()
322 IPChange.delta(NewInfo->Change)); in processBlockPhase1()
323 IPChange = NewInfo->Change; in processBlockPhase1()
328 NewInfo->Change = NewInfo->Change.merge(InstrMode); in processBlockPhase1()
330 // No InsertionPoint is currently set - this is either the first in in processBlockPhase1()
333 IPChange = NewInfo->Change; in processBlockPhase1()
334 NewInfo->Change = NewInfo->Change.merge(InstrMode); in processBlockPhase1()
341 NewInfo->FirstInsertionPoint = InsertionPoint; in processBlockPhase1()
342 NewInfo->Require = NewInfo->Change; in processBlockPhase1()
345 insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change)); in processBlockPhase1()
347 NewInfo->Exit = NewInfo->Change; in processBlockPhase1()
351 // In Phase 2 we revisit each block and calculate the common Mode register
362 BlockInfo[ThisBlock]->Pred = DefaultStatus; in processBlockPhase2()
368 // added by explicit SETREG instructions or the initial default value - in processBlockPhase2()
379 BlockInfo[ThisBlock]->Pred = DefaultStatus; in processBlockPhase2()
381 } else if (BlockInfo[PredBlock]->ExitSet) { in processBlockPhase2()
382 BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit; in processBlockPhase2()
389 unsigned PredBlock = Pred->getNumber(); in processBlockPhase2()
390 if (BlockInfo[PredBlock]->ExitSet) { in processBlockPhase2()
391 if (BlockInfo[ThisBlock]->ExitSet) { in processBlockPhase2()
392 BlockInfo[ThisBlock]->Pred = in processBlockPhase2()
393 BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[PredBlock]->Exit); in processBlockPhase2()
395 BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit; in processBlockPhase2()
403 BlockInfo[ThisBlock]->Pred.merge(BlockInfo[ThisBlock]->Change); in processBlockPhase2()
404 if (BlockInfo[ThisBlock]->Exit != TmpStatus) { in processBlockPhase2()
405 BlockInfo[ThisBlock]->Exit = TmpStatus; in processBlockPhase2()
411 BlockInfo[ThisBlock]->ExitSet = ExitSet; in processBlockPhase2()
416 // In Phase 3 we revisit each block and if it has an insertion point defined we
422 if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) { in processBlockPhase3()
424 BlockInfo[ThisBlock]->Pred.delta(BlockInfo[ThisBlock]->Require); in processBlockPhase3()
425 if (BlockInfo[ThisBlock]->FirstInsertionPoint) in processBlockPhase3()
426 insertSetreg(MBB, BlockInfo[ThisBlock]->FirstInsertionPoint, TII, Delta); in processBlockPhase3()
433 // Constrained FP intrinsics are used to support non-default rounding modes. in runOnMachineFunction()
436 // a non-default rounding mode for non-strictfp functions. But it should not in runOnMachineFunction()
447 // Phase 1 - determine the initial mode required by each block, and add setreg in runOnMachineFunction()
452 // Phase 2 - determine the exit mode from each block. We add all blocks to the in runOnMachineFunction()
453 // list here, but will also add any that need to be revisited during Phase 2 in runOnMachineFunction()
462 // Phase 3 - add an initial setreg to each block where the required entry mode in runOnMachineFunction()