Lines Matching +full:s +full:- +full:mode
1 //===-- SIModeRegister.cpp - Mode Register --------------------------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// This pass inserts changes to the Mode register settings as required.
11 /// rounding mode setting, but is intended to be generic enough to be easily
14 //===----------------------------------------------------------------------===//
23 #define DEBUG_TYPE "si-mode-register"
25 STATISTIC(NumSetregInserted, "Number of setreg of mode register inserted.");
30 // Mask is a bitmask where a '1' indicates the corresponding Mode bit has a
33 unsigned Mode = 0; member
37 Status(unsigned NewMask, unsigned NewMode) : Mask(NewMask), Mode(NewMode) { in Status()
38 Mode &= Mask; in Status()
43 Status merge(const Status &S) const { in merge()
44 return Status((Mask | S.Mask), ((Mode & ~S.Mask) | (S.Mode & S.Mask))); in merge()
47 // merge an unknown value by using the unknown value's mask to remove bits
50 return Status(Mask & ~newMask, Mode & ~newMask); in mergeUnknown()
53 // intersect two Status values to produce a mode and mask that is a subset
55 Status intersect(const Status &S) const { in intersect()
56 unsigned NewMask = (Mask & S.Mask) & (Mode ^ ~S.Mode); in intersect()
57 unsigned NewMode = (Mode & NewMask); in intersect()
61 // produce the delta required to change the Mode to the required Mode
62 Status delta(const Status &S) const { in delta()
63 return Status((S.Mask & (Mode ^ S.Mode)) | (~Mask & S.Mask), S.Mode); in delta()
66 bool operator==(const Status &S) const { in operator ==()
67 return (Mask == S.Mask) && (Mode == S.Mode); in operator ==()
70 bool operator!=(const Status &S) const { return !(*this == S); } in operator !=()
72 bool isCompatible(Status &S) { in isCompatible()
73 return ((Mask & S.Mask) == S.Mask) && ((Mode & S.Mask) == S.Mode); in isCompatible()
76 bool isCombinable(Status &S) { return !(Mask & S.Mask) || isCompatible(S); } in isCombinable()
81 // The Status that represents the mode register settings required by the
85 // The Status that represents the net changes to the Mode register made by
89 // The Status that represents the mode register settings on exit from this
93 // The Status that represents the intersection of exit Mode register settings
97 // In Phase 1 we record the first instruction that has a mode requirement,
98 // which is used in Phase 3 if we need to insert a mode change.
117 // The default mode register setting currently only caters for the floating
118 // point double precision rounding mode.
119 // We currently assume the default rounding mode is Round to Nearest
120 // NOTE: this should come from a per function rounding mode setting once such
152 "Insert required mode register values", false, false)
160 // Determine the Mode register setting required for this instruction.
161 // Instructions which don't use the Mode register return a null Status.
166 if (TII->usesFPDPRounding(MI) || in getInstructionMode()
178 if (TII->getSubtarget().hasTrue16BitInsts()) { in getInstructionMode()
181 MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_t16_e64)); in getInstructionMode()
185 B.add(Src0); // re-add src0 operand in getInstructionMode()
189 MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32)); in getInstructionMode()
195 if (TII->getSubtarget().hasTrue16BitInsts()) { in getInstructionMode()
198 MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_t16_e64)); in getInstructionMode()
202 B.add(Src0); // re-add src0 operand in getInstructionMode()
206 MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32)); in getInstructionMode()
217 // Insert a setreg instruction to update the Mode register.
219 // the value of disjoint parts of the Mode register when we don't know the
227 unsigned Value = (InstrMode.Mode >> Offset) & ((1 << Width) - 1); in insertSetreg()
229 BuildMI(MBB, MI, nullptr, TII->get(AMDGPU::S_SETREG_IMM32_B32)) in insertSetreg()
234 InstrMode.Mask &= ~(((1 << Width) - 1) << Offset); in insertSetreg()
239 // instruction we get its mode usage. If the instruction uses the Mode register
241 // - update the Change status, which tracks the changes to the Mode register
243 // - if this instruction's requirements are compatible with the current setting
244 // of the Mode register we merge the modes
245 // - if it isn't compatible and an InsertionPoint isn't set, then we set the
247 // mode
248 // - if it isn't compatible and InsertionPoint is set we insert a seteg before
249 // that instruction (unless this instruction forms part of the block's
253 // - if this is a setreg instruction we treat it as an incompatible instruction.
254 // This is sub-optimal but avoids some nasty corner cases, and is expected to
256 // - on exit we have set the Require, Change, and initial Exit modes.
274 // We preserve any explicit mode register setreg instruction we encounter, in processBlockPhase1()
277 unsigned Dst = TII->getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm(); in processBlockPhase1()
287 insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change)); in processBlockPhase1()
291 // not an immediate then we treat the modified bits of the mode register in processBlockPhase1()
295 unsigned Val = TII->getNamedOperand(MI, AMDGPU::OpName::imm)->getImm(); in processBlockPhase1()
296 unsigned Mode = (Val << Offset) & Mask; in processBlockPhase1() local
297 Status Setreg = Status(Mask, Mode); in processBlockPhase1()
301 NewInfo->Change = NewInfo->Change.merge(Setreg); in processBlockPhase1()
303 NewInfo->Change = NewInfo->Change.mergeUnknown(Mask); in processBlockPhase1()
305 } else if (!NewInfo->Change.isCompatible(InstrMode)) { in processBlockPhase1()
306 // This instruction uses the Mode register and its requirements aren't in processBlockPhase1()
307 // compatible with the current mode. in processBlockPhase1()
309 // If the required mode change cannot be included in the current in processBlockPhase1()
312 if (!IPChange.delta(NewInfo->Change).isCombinable(InstrMode)) { in processBlockPhase1()
317 NewInfo->FirstInsertionPoint = InsertionPoint; in processBlockPhase1()
318 NewInfo->Require = NewInfo->Change; in processBlockPhase1()
322 IPChange.delta(NewInfo->Change)); in processBlockPhase1()
323 IPChange = NewInfo->Change; in processBlockPhase1()
328 NewInfo->Change = NewInfo->Change.merge(InstrMode); in processBlockPhase1()
330 // No InsertionPoint is currently set - this is either the first in in processBlockPhase1()
333 IPChange = NewInfo->Change; in processBlockPhase1()
334 NewInfo->Change = NewInfo->Change.merge(InstrMode); in processBlockPhase1()
341 NewInfo->FirstInsertionPoint = InsertionPoint; in processBlockPhase1()
342 NewInfo->Require = NewInfo->Change; in processBlockPhase1()
345 insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change)); in processBlockPhase1()
347 NewInfo->Exit = NewInfo->Change; in processBlockPhase1()
351 // In Phase 2 we revisit each block and calculate the common Mode register
362 BlockInfo[ThisBlock]->Pred = DefaultStatus; in processBlockPhase2()
367 // Mask bits (which represent the Mode bits with a known value) can only be in processBlockPhase2()
368 // added by explicit SETREG instructions or the initial default value - in processBlockPhase2()
379 BlockInfo[ThisBlock]->Pred = DefaultStatus; in processBlockPhase2()
381 } else if (BlockInfo[PredBlock]->ExitSet) { in processBlockPhase2()
382 BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit; in processBlockPhase2()
389 unsigned PredBlock = Pred->getNumber(); in processBlockPhase2()
390 if (BlockInfo[PredBlock]->ExitSet) { in processBlockPhase2()
391 if (BlockInfo[ThisBlock]->ExitSet) { in processBlockPhase2()
392 BlockInfo[ThisBlock]->Pred = in processBlockPhase2()
393 BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[PredBlock]->Exit); in processBlockPhase2()
395 BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit; in processBlockPhase2()
403 BlockInfo[ThisBlock]->Pred.merge(BlockInfo[ThisBlock]->Change); in processBlockPhase2()
404 if (BlockInfo[ThisBlock]->Exit != TmpStatus) { in processBlockPhase2()
405 BlockInfo[ThisBlock]->Exit = TmpStatus; in processBlockPhase2()
411 BlockInfo[ThisBlock]->ExitSet = ExitSet; in processBlockPhase2()
417 // check whether the predecessor mode meets the block's entry requirements. If
418 // not we insert an appropriate setreg instruction to modify the Mode register.
422 if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) { in processBlockPhase3()
424 BlockInfo[ThisBlock]->Pred.delta(BlockInfo[ThisBlock]->Require); in processBlockPhase3()
425 if (BlockInfo[ThisBlock]->FirstInsertionPoint) in processBlockPhase3()
426 insertSetreg(MBB, BlockInfo[ThisBlock]->FirstInsertionPoint, TII, Delta); in processBlockPhase3()
433 // Constrained FP intrinsics are used to support non-default rounding modes. in runOnMachineFunction()
436 // a non-default rounding mode for non-strictfp functions. But it should not in runOnMachineFunction()
447 // Phase 1 - determine the initial mode required by each block, and add setreg in runOnMachineFunction()
452 // Phase 2 - determine the exit mode from each block. We add all blocks to the in runOnMachineFunction()
462 // Phase 3 - add an initial setreg to each block where the required entry mode in runOnMachineFunction()
463 // is not satisfied by the exit mode of all its predecessors. in runOnMachineFunction()