xref: /freebsd/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp (revision 59c8e88e72633afbc47a4ace0d2170d00d51f7dc)
1 //=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a hazard recognizer for the SystemZ scheduler.
10 //
11 // This class is used by the SystemZ scheduling strategy to maintain
12 // the state during scheduling, and provide cost functions for
13 // scheduling candidates. This includes:
14 //
15 // * Decoder grouping. A decoder group can maximally hold 3 uops, and
16 // instructions that always begin a new group should be scheduled when
17 // the current decoder group is empty.
18 // * Processor resources usage. It is beneficial to balance the use of
19 // resources.
20 //
21 // A goal is to consider all instructions, also those outside of any
22 // scheduling region. Such instructions are "advanced" past and include
23 // single instructions before a scheduling region, branches etc.
24 //
25 // A block that has only one predecessor continues scheduling with the state
26 // of it (which may be updated by emitting branches).
27 //
28 // ===---------------------------------------------------------------------===//
29 
30 #include "SystemZHazardRecognizer.h"
31 #include "llvm/ADT/Statistic.h"
32 
33 using namespace llvm;
34 
35 #define DEBUG_TYPE "machine-scheduler"
36 
37 // This is the limit of processor resource usage at which the
38 // scheduler should try to look for other instructions (not using the
39 // critical resource).
40 static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden,
41                                    cl::desc("The OOO window for processor "
42                                             "resources during scheduling."),
43                                    cl::init(8));
44 
45 unsigned SystemZHazardRecognizer::
46 getNumDecoderSlots(SUnit *SU) const {
47   const MCSchedClassDesc *SC = getSchedClass(SU);
48   if (!SC->isValid())
49     return 0; // IMPLICIT_DEF / KILL -- will not make impact in output.
50 
51   assert((SC->NumMicroOps != 2 || (SC->BeginGroup && !SC->EndGroup)) &&
52          "Only cracked instruction can have 2 uops.");
53   assert((SC->NumMicroOps < 3 || (SC->BeginGroup && SC->EndGroup)) &&
54          "Expanded instructions always group alone.");
55   assert((SC->NumMicroOps < 3 || (SC->NumMicroOps % 3 == 0)) &&
56          "Expanded instructions fill the group(s).");
57 
58   return SC->NumMicroOps;
59 }
60 
61 unsigned SystemZHazardRecognizer::getCurrCycleIdx(SUnit *SU) const {
62   unsigned Idx = CurrGroupSize;
63   if (GrpCount % 2)
64     Idx += 3;
65 
66   if (SU != nullptr && !fitsIntoCurrentGroup(SU)) {
67     if (Idx == 1 || Idx == 2)
68       Idx = 3;
69     else if (Idx == 4 || Idx == 5)
70       Idx = 0;
71   }
72 
73   return Idx;
74 }
75 
76 ScheduleHazardRecognizer::HazardType SystemZHazardRecognizer::
77 getHazardType(SUnit *SU, int Stalls) {
78   return (fitsIntoCurrentGroup(SU) ? NoHazard : Hazard);
79 }
80 
81 void SystemZHazardRecognizer::Reset() {
82   CurrGroupSize = 0;
83   CurrGroupHas4RegOps = false;
84   clearProcResCounters();
85   GrpCount = 0;
86   LastFPdOpCycleIdx = UINT_MAX;
87   LastEmittedMI = nullptr;
88   LLVM_DEBUG(CurGroupDbg = "";);
89 }
90 
91 bool
92 SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
93   const MCSchedClassDesc *SC = getSchedClass(SU);
94   if (!SC->isValid())
95     return true;
96 
97   // A cracked instruction only fits into schedule if the current
98   // group is empty.
99   if (SC->BeginGroup)
100     return (CurrGroupSize == 0);
101 
102   // An instruction with 4 register operands will not fit in last slot.
103   assert ((CurrGroupSize < 2 || !CurrGroupHas4RegOps) &&
104           "Current decoder group is already full!");
105   if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
106     return false;
107 
108   // Since a full group is handled immediately in EmitInstruction(),
109   // SU should fit into current group. NumSlots should be 1 or 0,
110   // since it is not a cracked or expanded instruction.
111   assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) &&
112           "Expected normal instruction to fit in non-full group!");
113 
114   return true;
115 }
116 
117 bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const {
118   const MachineFunction &MF = *MI->getParent()->getParent();
119   const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
120   const MCInstrDesc &MID = MI->getDesc();
121   unsigned Count = 0;
122   for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) {
123     const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI, MF);
124     if (RC == nullptr)
125       continue;
126     if (OpIdx >= MID.getNumDefs() &&
127         MID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
128       continue;
129     Count++;
130   }
131   return Count >= 4;
132 }
133 
134 void SystemZHazardRecognizer::nextGroup() {
135   if (CurrGroupSize == 0)
136     return;
137 
138   LLVM_DEBUG(dumpCurrGroup("Completed decode group"));
139   LLVM_DEBUG(CurGroupDbg = "";);
140 
141   int NumGroups = ((CurrGroupSize > 3) ? (CurrGroupSize / 3) : 1);
142   assert((CurrGroupSize <= 3 || CurrGroupSize % 3 == 0) &&
143          "Current decoder group bad.");
144 
145   // Reset counter for next group.
146   CurrGroupSize = 0;
147   CurrGroupHas4RegOps = false;
148 
149   GrpCount += ((unsigned) NumGroups);
150 
151   // Decrease counters for execution units.
152   for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
153     ProcResourceCounters[i] = ((ProcResourceCounters[i] > NumGroups)
154                                    ? (ProcResourceCounters[i] - NumGroups)
155                                    : 0);
156 
157   // Clear CriticalResourceIdx if it is now below the threshold.
158   if (CriticalResourceIdx != UINT_MAX &&
159       (ProcResourceCounters[CriticalResourceIdx] <=
160        ProcResCostLim))
161     CriticalResourceIdx = UINT_MAX;
162 
163   LLVM_DEBUG(dumpState(););
164 }
165 
166 #ifndef NDEBUG // Debug output
167 void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {
168   OS << "SU(" << SU->NodeNum << "):";
169   OS << TII->getName(SU->getInstr()->getOpcode());
170 
171   const MCSchedClassDesc *SC = getSchedClass(SU);
172   if (!SC->isValid())
173     return;
174 
175   for (TargetSchedModel::ProcResIter
176          PI = SchedModel->getWriteProcResBegin(SC),
177          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
178     const MCProcResourceDesc &PRD =
179       *SchedModel->getProcResource(PI->ProcResourceIdx);
180     std::string FU(PRD.Name);
181     // trim e.g. Z13_FXaUnit -> FXa
182     FU = FU.substr(FU.find('_') + 1);
183     size_t Pos = FU.find("Unit");
184     if (Pos != std::string::npos)
185       FU.resize(Pos);
186     if (FU == "LS") // LSUnit -> LSU
187       FU = "LSU";
188     OS << "/" << FU;
189 
190     if (PI->Cycles > 1)
191       OS << "(" << PI->Cycles << "cyc)";
192   }
193 
194   if (SC->NumMicroOps > 1)
195     OS << "/" << SC->NumMicroOps << "uops";
196   if (SC->BeginGroup && SC->EndGroup)
197     OS << "/GroupsAlone";
198   else if (SC->BeginGroup)
199     OS << "/BeginsGroup";
200   else if (SC->EndGroup)
201     OS << "/EndsGroup";
202   if (SU->isUnbuffered)
203     OS << "/Unbuffered";
204   if (has4RegOps(SU->getInstr()))
205     OS << "/4RegOps";
206 }
207 
208 void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
209   dbgs() << "++ " << Msg;
210   dbgs() << ": ";
211 
212   if (CurGroupDbg.empty())
213     dbgs() << " <empty>\n";
214   else {
215     dbgs() << "{ " << CurGroupDbg << " }";
216     dbgs() << " (" << CurrGroupSize << " decoder slot"
217            << (CurrGroupSize > 1 ? "s":"")
218            << (CurrGroupHas4RegOps ? ", 4RegOps" : "")
219            << ")\n";
220   }
221 }
222 
223 void SystemZHazardRecognizer::dumpProcResourceCounters() const {
224   bool any = false;
225 
226   for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
227     if (ProcResourceCounters[i] > 0) {
228       any = true;
229       break;
230     }
231 
232   if (!any)
233     return;
234 
235   dbgs() << "++ | Resource counters: ";
236   for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
237     if (ProcResourceCounters[i] > 0)
238       dbgs() << SchedModel->getProcResource(i)->Name
239              << ":" << ProcResourceCounters[i] << " ";
240   dbgs() << "\n";
241 
242   if (CriticalResourceIdx != UINT_MAX)
243     dbgs() << "++ | Critical resource: "
244            << SchedModel->getProcResource(CriticalResourceIdx)->Name
245            << "\n";
246 }
247 
248 void SystemZHazardRecognizer::dumpState() const {
249   dumpCurrGroup("| Current decoder group");
250   dbgs() << "++ | Current cycle index: "
251          << getCurrCycleIdx() << "\n";
252   dumpProcResourceCounters();
253   if (LastFPdOpCycleIdx != UINT_MAX)
254     dbgs() << "++ | Last FPd cycle index: " << LastFPdOpCycleIdx << "\n";
255 }
256 
257 #endif //NDEBUG
258 
259 void SystemZHazardRecognizer::clearProcResCounters() {
260   ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0);
261   CriticalResourceIdx = UINT_MAX;
262 }
263 
264 static inline bool isBranchRetTrap(MachineInstr *MI) {
265   return (MI->isBranch() || MI->isReturn() ||
266           MI->getOpcode() == SystemZ::CondTrap);
267 }
268 
269 // Update state with SU as the next scheduled unit.
270 void SystemZHazardRecognizer::
271 EmitInstruction(SUnit *SU) {
272   const MCSchedClassDesc *SC = getSchedClass(SU);
273   LLVM_DEBUG(dbgs() << "++ HazardRecognizer emitting "; dumpSU(SU, dbgs());
274              dbgs() << "\n";);
275   LLVM_DEBUG(dumpCurrGroup("Decode group before emission"););
276 
277   // If scheduling an SU that must begin a new decoder group, move on
278   // to next group.
279   if (!fitsIntoCurrentGroup(SU))
280     nextGroup();
281 
282   LLVM_DEBUG(raw_string_ostream cgd(CurGroupDbg);
283              if (CurGroupDbg.length()) cgd << ", "; dumpSU(SU, cgd););
284 
285   LastEmittedMI = SU->getInstr();
286 
287   // After returning from a call, we don't know much about the state.
288   if (SU->isCall) {
289     LLVM_DEBUG(dbgs() << "++ Clearing state after call.\n";);
290     Reset();
291     LastEmittedMI = SU->getInstr();
292     return;
293   }
294 
295   // Increase counter for execution unit(s).
296   for (TargetSchedModel::ProcResIter
297          PI = SchedModel->getWriteProcResBegin(SC),
298          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
299     // Don't handle FPd together with the other resources.
300     if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1)
301       continue;
302     int &CurrCounter =
303       ProcResourceCounters[PI->ProcResourceIdx];
304     CurrCounter += PI->Cycles;
305     // Check if this is now the new critical resource.
306     if ((CurrCounter > ProcResCostLim) &&
307         (CriticalResourceIdx == UINT_MAX ||
308          (PI->ProcResourceIdx != CriticalResourceIdx &&
309           CurrCounter >
310           ProcResourceCounters[CriticalResourceIdx]))) {
311       LLVM_DEBUG(
312           dbgs() << "++ New critical resource: "
313                  << SchedModel->getProcResource(PI->ProcResourceIdx)->Name
314                  << "\n";);
315       CriticalResourceIdx = PI->ProcResourceIdx;
316     }
317   }
318 
319   // Make note of an instruction that uses a blocking resource (FPd).
320   if (SU->isUnbuffered) {
321     LastFPdOpCycleIdx = getCurrCycleIdx(SU);
322     LLVM_DEBUG(dbgs() << "++ Last FPd cycle index: " << LastFPdOpCycleIdx
323                       << "\n";);
324   }
325 
326   // Insert SU into current group by increasing number of slots used
327   // in current group.
328   CurrGroupSize += getNumDecoderSlots(SU);
329   CurrGroupHas4RegOps |= has4RegOps(SU->getInstr());
330   unsigned GroupLim = (CurrGroupHas4RegOps ? 2 : 3);
331   assert((CurrGroupSize <= GroupLim || CurrGroupSize == getNumDecoderSlots(SU))
332          && "SU does not fit into decoder group!");
333 
334   // Check if current group is now full/ended. If so, move on to next
335   // group to be ready to evaluate more candidates.
336   if (CurrGroupSize >= GroupLim || SC->EndGroup)
337     nextGroup();
338 }
339 
340 int SystemZHazardRecognizer::groupingCost(SUnit *SU) const {
341   const MCSchedClassDesc *SC = getSchedClass(SU);
342   if (!SC->isValid())
343     return 0;
344 
345   // If SU begins new group, it can either break a current group early
346   // or fit naturally if current group is empty (negative cost).
347   if (SC->BeginGroup) {
348     if (CurrGroupSize)
349       return 3 - CurrGroupSize;
350     return -1;
351   }
352 
353   // Similarly, a group-ending SU may either fit well (last in group), or
354   // end the group prematurely.
355   if (SC->EndGroup) {
356     unsigned resultingGroupSize =
357       (CurrGroupSize + getNumDecoderSlots(SU));
358     if (resultingGroupSize < 3)
359       return (3 - resultingGroupSize);
360     return -1;
361   }
362 
363   // An instruction with 4 register operands will not fit in last slot.
364   if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
365     return 1;
366 
367   // Most instructions can be placed in any decoder slot.
368   return 0;
369 }
370 
371 bool SystemZHazardRecognizer::isFPdOpPreferred_distance(SUnit *SU) const {
372   assert (SU->isUnbuffered);
373   // If this is the first FPd op, it should be scheduled high.
374   if (LastFPdOpCycleIdx == UINT_MAX)
375     return true;
376   // If this is not the first PFd op, it should go into the other side
377   // of the processor to use the other FPd unit there. This should
378   // generally happen if two FPd ops are placed with 2 other
379   // instructions between them (modulo 6).
380   unsigned SUCycleIdx = getCurrCycleIdx(SU);
381   if (LastFPdOpCycleIdx > SUCycleIdx)
382     return ((LastFPdOpCycleIdx - SUCycleIdx) == 3);
383   return ((SUCycleIdx - LastFPdOpCycleIdx) == 3);
384 }
385 
386 int SystemZHazardRecognizer::
387 resourcesCost(SUnit *SU) {
388   int Cost = 0;
389 
390   const MCSchedClassDesc *SC = getSchedClass(SU);
391   if (!SC->isValid())
392     return 0;
393 
394   // For a FPd op, either return min or max value as indicated by the
395   // distance to any prior FPd op.
396   if (SU->isUnbuffered)
397     Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX);
398   // For other instructions, give a cost to the use of the critical resource.
399   else if (CriticalResourceIdx != UINT_MAX) {
400     for (TargetSchedModel::ProcResIter
401            PI = SchedModel->getWriteProcResBegin(SC),
402            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI)
403       if (PI->ProcResourceIdx == CriticalResourceIdx)
404         Cost = PI->Cycles;
405   }
406 
407   return Cost;
408 }
409 
410 void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI,
411                                               bool TakenBranch) {
412   // Make a temporary SUnit.
413   SUnit SU(MI, 0);
414 
415   // Set interesting flags.
416   SU.isCall = MI->isCall();
417 
418   const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI);
419   for (const MCWriteProcResEntry &PRE :
420          make_range(SchedModel->getWriteProcResBegin(SC),
421                     SchedModel->getWriteProcResEnd(SC))) {
422     switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) {
423     case 0:
424       SU.hasReservedResource = true;
425       break;
426     case 1:
427       SU.isUnbuffered = true;
428       break;
429     default:
430       break;
431     }
432   }
433 
434   unsigned GroupSizeBeforeEmit = CurrGroupSize;
435   EmitInstruction(&SU);
436 
437   if (!TakenBranch && isBranchRetTrap(MI)) {
438     // NT Branch on second slot ends group.
439     if (GroupSizeBeforeEmit == 1)
440       nextGroup();
441   }
442 
443   if (TakenBranch && CurrGroupSize > 0)
444     nextGroup();
445 
446   assert ((!MI->isTerminator() || isBranchRetTrap(MI)) &&
447           "Scheduler: unhandled terminator!");
448 }
449 
450 void SystemZHazardRecognizer::
451 copyState(SystemZHazardRecognizer *Incoming) {
452   // Current decoder group
453   CurrGroupSize = Incoming->CurrGroupSize;
454   LLVM_DEBUG(CurGroupDbg = Incoming->CurGroupDbg;);
455 
456   // Processor resources
457   ProcResourceCounters = Incoming->ProcResourceCounters;
458   CriticalResourceIdx = Incoming->CriticalResourceIdx;
459 
460   // FPd
461   LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx;
462   GrpCount = Incoming->GrpCount;
463 }
464