1 //=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a hazard recognizer for the SystemZ scheduler. 10 // 11 // This class is used by the SystemZ scheduling strategy to maintain 12 // the state during scheduling, and provide cost functions for 13 // scheduling candidates. This includes: 14 // 15 // * Decoder grouping. A decoder group can maximally hold 3 uops, and 16 // instructions that always begin a new group should be scheduled when 17 // the current decoder group is empty. 18 // * Processor resources usage. It is beneficial to balance the use of 19 // resources. 20 // 21 // A goal is to consider all instructions, also those outside of any 22 // scheduling region. Such instructions are "advanced" past and include 23 // single instructions before a scheduling region, branches etc. 24 // 25 // A block that has only one predecessor continues scheduling with the state 26 // of it (which may be updated by emitting branches). 27 // 28 // ===---------------------------------------------------------------------===// 29 30 #include "SystemZHazardRecognizer.h" 31 #include "llvm/ADT/Statistic.h" 32 33 using namespace llvm; 34 35 #define DEBUG_TYPE "machine-scheduler" 36 37 // This is the limit of processor resource usage at which the 38 // scheduler should try to look for other instructions (not using the 39 // critical resource). 40 static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden, 41 cl::desc("The OOO window for processor " 42 "resources during scheduling."), 43 cl::init(8)); 44 45 unsigned SystemZHazardRecognizer:: 46 getNumDecoderSlots(SUnit *SU) const { 47 const MCSchedClassDesc *SC = getSchedClass(SU); 48 if (!SC->isValid()) 49 return 0; // IMPLICIT_DEF / KILL -- will not make impact in output. 50 51 assert((SC->NumMicroOps != 2 || (SC->BeginGroup && !SC->EndGroup)) && 52 "Only cracked instruction can have 2 uops."); 53 assert((SC->NumMicroOps < 3 || (SC->BeginGroup && SC->EndGroup)) && 54 "Expanded instructions always group alone."); 55 assert((SC->NumMicroOps < 3 || (SC->NumMicroOps % 3 == 0)) && 56 "Expanded instructions fill the group(s)."); 57 58 return SC->NumMicroOps; 59 } 60 61 unsigned SystemZHazardRecognizer::getCurrCycleIdx(SUnit *SU) const { 62 unsigned Idx = CurrGroupSize; 63 if (GrpCount % 2) 64 Idx += 3; 65 66 if (SU != nullptr && !fitsIntoCurrentGroup(SU)) { 67 if (Idx == 1 || Idx == 2) 68 Idx = 3; 69 else if (Idx == 4 || Idx == 5) 70 Idx = 0; 71 } 72 73 return Idx; 74 } 75 76 ScheduleHazardRecognizer::HazardType SystemZHazardRecognizer:: 77 getHazardType(SUnit *SU, int Stalls) { 78 return (fitsIntoCurrentGroup(SU) ? NoHazard : Hazard); 79 } 80 81 void SystemZHazardRecognizer::Reset() { 82 CurrGroupSize = 0; 83 CurrGroupHas4RegOps = false; 84 clearProcResCounters(); 85 GrpCount = 0; 86 LastFPdOpCycleIdx = UINT_MAX; 87 LastEmittedMI = nullptr; 88 LLVM_DEBUG(CurGroupDbg = "";); 89 } 90 91 bool 92 SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const { 93 const MCSchedClassDesc *SC = getSchedClass(SU); 94 if (!SC->isValid()) 95 return true; 96 97 // A cracked instruction only fits into schedule if the current 98 // group is empty. 99 if (SC->BeginGroup) 100 return (CurrGroupSize == 0); 101 102 // An instruction with 4 register operands will not fit in last slot. 103 assert ((CurrGroupSize < 2 || !CurrGroupHas4RegOps) && 104 "Current decoder group is already full!"); 105 if (CurrGroupSize == 2 && has4RegOps(SU->getInstr())) 106 return false; 107 108 // Since a full group is handled immediately in EmitInstruction(), 109 // SU should fit into current group. NumSlots should be 1 or 0, 110 // since it is not a cracked or expanded instruction. 111 assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) && 112 "Expected normal instruction to fit in non-full group!"); 113 114 return true; 115 } 116 117 bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const { 118 const MachineFunction &MF = *MI->getParent()->getParent(); 119 const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); 120 const MCInstrDesc &MID = MI->getDesc(); 121 unsigned Count = 0; 122 for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) { 123 const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI, MF); 124 if (RC == nullptr) 125 continue; 126 if (OpIdx >= MID.getNumDefs() && 127 MID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) 128 continue; 129 Count++; 130 } 131 return Count >= 4; 132 } 133 134 void SystemZHazardRecognizer::nextGroup() { 135 if (CurrGroupSize == 0) 136 return; 137 138 LLVM_DEBUG(dumpCurrGroup("Completed decode group")); 139 LLVM_DEBUG(CurGroupDbg = "";); 140 141 int NumGroups = ((CurrGroupSize > 3) ? (CurrGroupSize / 3) : 1); 142 assert((CurrGroupSize <= 3 || CurrGroupSize % 3 == 0) && 143 "Current decoder group bad."); 144 145 // Reset counter for next group. 146 CurrGroupSize = 0; 147 CurrGroupHas4RegOps = false; 148 149 GrpCount += ((unsigned) NumGroups); 150 151 // Decrease counters for execution units. 152 for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) 153 ProcResourceCounters[i] = ((ProcResourceCounters[i] > NumGroups) 154 ? (ProcResourceCounters[i] - NumGroups) 155 : 0); 156 157 // Clear CriticalResourceIdx if it is now below the threshold. 158 if (CriticalResourceIdx != UINT_MAX && 159 (ProcResourceCounters[CriticalResourceIdx] <= 160 ProcResCostLim)) 161 CriticalResourceIdx = UINT_MAX; 162 163 LLVM_DEBUG(dumpState();); 164 } 165 166 #ifndef NDEBUG // Debug output 167 void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const { 168 OS << "SU(" << SU->NodeNum << "):"; 169 OS << TII->getName(SU->getInstr()->getOpcode()); 170 171 const MCSchedClassDesc *SC = getSchedClass(SU); 172 if (!SC->isValid()) 173 return; 174 175 for (TargetSchedModel::ProcResIter 176 PI = SchedModel->getWriteProcResBegin(SC), 177 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 178 const MCProcResourceDesc &PRD = 179 *SchedModel->getProcResource(PI->ProcResourceIdx); 180 std::string FU(PRD.Name); 181 // trim e.g. Z13_FXaUnit -> FXa 182 FU = FU.substr(FU.find('_') + 1); 183 size_t Pos = FU.find("Unit"); 184 if (Pos != std::string::npos) 185 FU.resize(Pos); 186 if (FU == "LS") // LSUnit -> LSU 187 FU = "LSU"; 188 OS << "/" << FU; 189 190 if (PI->Cycles > 1) 191 OS << "(" << PI->Cycles << "cyc)"; 192 } 193 194 if (SC->NumMicroOps > 1) 195 OS << "/" << SC->NumMicroOps << "uops"; 196 if (SC->BeginGroup && SC->EndGroup) 197 OS << "/GroupsAlone"; 198 else if (SC->BeginGroup) 199 OS << "/BeginsGroup"; 200 else if (SC->EndGroup) 201 OS << "/EndsGroup"; 202 if (SU->isUnbuffered) 203 OS << "/Unbuffered"; 204 if (has4RegOps(SU->getInstr())) 205 OS << "/4RegOps"; 206 } 207 208 void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const { 209 dbgs() << "++ " << Msg; 210 dbgs() << ": "; 211 212 if (CurGroupDbg.empty()) 213 dbgs() << " <empty>\n"; 214 else { 215 dbgs() << "{ " << CurGroupDbg << " }"; 216 dbgs() << " (" << CurrGroupSize << " decoder slot" 217 << (CurrGroupSize > 1 ? "s":"") 218 << (CurrGroupHas4RegOps ? ", 4RegOps" : "") 219 << ")\n"; 220 } 221 } 222 223 void SystemZHazardRecognizer::dumpProcResourceCounters() const { 224 bool any = false; 225 226 for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) 227 if (ProcResourceCounters[i] > 0) { 228 any = true; 229 break; 230 } 231 232 if (!any) 233 return; 234 235 dbgs() << "++ | Resource counters: "; 236 for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) 237 if (ProcResourceCounters[i] > 0) 238 dbgs() << SchedModel->getProcResource(i)->Name 239 << ":" << ProcResourceCounters[i] << " "; 240 dbgs() << "\n"; 241 242 if (CriticalResourceIdx != UINT_MAX) 243 dbgs() << "++ | Critical resource: " 244 << SchedModel->getProcResource(CriticalResourceIdx)->Name 245 << "\n"; 246 } 247 248 void SystemZHazardRecognizer::dumpState() const { 249 dumpCurrGroup("| Current decoder group"); 250 dbgs() << "++ | Current cycle index: " 251 << getCurrCycleIdx() << "\n"; 252 dumpProcResourceCounters(); 253 if (LastFPdOpCycleIdx != UINT_MAX) 254 dbgs() << "++ | Last FPd cycle index: " << LastFPdOpCycleIdx << "\n"; 255 } 256 257 #endif //NDEBUG 258 259 void SystemZHazardRecognizer::clearProcResCounters() { 260 ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0); 261 CriticalResourceIdx = UINT_MAX; 262 } 263 264 static inline bool isBranchRetTrap(MachineInstr *MI) { 265 return (MI->isBranch() || MI->isReturn() || 266 MI->getOpcode() == SystemZ::CondTrap); 267 } 268 269 // Update state with SU as the next scheduled unit. 270 void SystemZHazardRecognizer:: 271 EmitInstruction(SUnit *SU) { 272 const MCSchedClassDesc *SC = getSchedClass(SU); 273 LLVM_DEBUG(dbgs() << "++ HazardRecognizer emitting "; dumpSU(SU, dbgs()); 274 dbgs() << "\n";); 275 LLVM_DEBUG(dumpCurrGroup("Decode group before emission");); 276 277 // If scheduling an SU that must begin a new decoder group, move on 278 // to next group. 279 if (!fitsIntoCurrentGroup(SU)) 280 nextGroup(); 281 282 LLVM_DEBUG(raw_string_ostream cgd(CurGroupDbg); 283 if (CurGroupDbg.length()) cgd << ", "; dumpSU(SU, cgd);); 284 285 LastEmittedMI = SU->getInstr(); 286 287 // After returning from a call, we don't know much about the state. 288 if (SU->isCall) { 289 LLVM_DEBUG(dbgs() << "++ Clearing state after call.\n";); 290 Reset(); 291 LastEmittedMI = SU->getInstr(); 292 return; 293 } 294 295 // Increase counter for execution unit(s). 296 for (TargetSchedModel::ProcResIter 297 PI = SchedModel->getWriteProcResBegin(SC), 298 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 299 // Don't handle FPd together with the other resources. 300 if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1) 301 continue; 302 int &CurrCounter = 303 ProcResourceCounters[PI->ProcResourceIdx]; 304 CurrCounter += PI->Cycles; 305 // Check if this is now the new critical resource. 306 if ((CurrCounter > ProcResCostLim) && 307 (CriticalResourceIdx == UINT_MAX || 308 (PI->ProcResourceIdx != CriticalResourceIdx && 309 CurrCounter > 310 ProcResourceCounters[CriticalResourceIdx]))) { 311 LLVM_DEBUG( 312 dbgs() << "++ New critical resource: " 313 << SchedModel->getProcResource(PI->ProcResourceIdx)->Name 314 << "\n";); 315 CriticalResourceIdx = PI->ProcResourceIdx; 316 } 317 } 318 319 // Make note of an instruction that uses a blocking resource (FPd). 320 if (SU->isUnbuffered) { 321 LastFPdOpCycleIdx = getCurrCycleIdx(SU); 322 LLVM_DEBUG(dbgs() << "++ Last FPd cycle index: " << LastFPdOpCycleIdx 323 << "\n";); 324 } 325 326 // Insert SU into current group by increasing number of slots used 327 // in current group. 328 CurrGroupSize += getNumDecoderSlots(SU); 329 CurrGroupHas4RegOps |= has4RegOps(SU->getInstr()); 330 unsigned GroupLim = (CurrGroupHas4RegOps ? 2 : 3); 331 assert((CurrGroupSize <= GroupLim || CurrGroupSize == getNumDecoderSlots(SU)) 332 && "SU does not fit into decoder group!"); 333 334 // Check if current group is now full/ended. If so, move on to next 335 // group to be ready to evaluate more candidates. 336 if (CurrGroupSize >= GroupLim || SC->EndGroup) 337 nextGroup(); 338 } 339 340 int SystemZHazardRecognizer::groupingCost(SUnit *SU) const { 341 const MCSchedClassDesc *SC = getSchedClass(SU); 342 if (!SC->isValid()) 343 return 0; 344 345 // If SU begins new group, it can either break a current group early 346 // or fit naturally if current group is empty (negative cost). 347 if (SC->BeginGroup) { 348 if (CurrGroupSize) 349 return 3 - CurrGroupSize; 350 return -1; 351 } 352 353 // Similarly, a group-ending SU may either fit well (last in group), or 354 // end the group prematurely. 355 if (SC->EndGroup) { 356 unsigned resultingGroupSize = 357 (CurrGroupSize + getNumDecoderSlots(SU)); 358 if (resultingGroupSize < 3) 359 return (3 - resultingGroupSize); 360 return -1; 361 } 362 363 // An instruction with 4 register operands will not fit in last slot. 364 if (CurrGroupSize == 2 && has4RegOps(SU->getInstr())) 365 return 1; 366 367 // Most instructions can be placed in any decoder slot. 368 return 0; 369 } 370 371 bool SystemZHazardRecognizer::isFPdOpPreferred_distance(SUnit *SU) const { 372 assert (SU->isUnbuffered); 373 // If this is the first FPd op, it should be scheduled high. 374 if (LastFPdOpCycleIdx == UINT_MAX) 375 return true; 376 // If this is not the first PFd op, it should go into the other side 377 // of the processor to use the other FPd unit there. This should 378 // generally happen if two FPd ops are placed with 2 other 379 // instructions between them (modulo 6). 380 unsigned SUCycleIdx = getCurrCycleIdx(SU); 381 if (LastFPdOpCycleIdx > SUCycleIdx) 382 return ((LastFPdOpCycleIdx - SUCycleIdx) == 3); 383 return ((SUCycleIdx - LastFPdOpCycleIdx) == 3); 384 } 385 386 int SystemZHazardRecognizer:: 387 resourcesCost(SUnit *SU) { 388 int Cost = 0; 389 390 const MCSchedClassDesc *SC = getSchedClass(SU); 391 if (!SC->isValid()) 392 return 0; 393 394 // For a FPd op, either return min or max value as indicated by the 395 // distance to any prior FPd op. 396 if (SU->isUnbuffered) 397 Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX); 398 // For other instructions, give a cost to the use of the critical resource. 399 else if (CriticalResourceIdx != UINT_MAX) { 400 for (TargetSchedModel::ProcResIter 401 PI = SchedModel->getWriteProcResBegin(SC), 402 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) 403 if (PI->ProcResourceIdx == CriticalResourceIdx) 404 Cost = PI->Cycles; 405 } 406 407 return Cost; 408 } 409 410 void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI, 411 bool TakenBranch) { 412 // Make a temporary SUnit. 413 SUnit SU(MI, 0); 414 415 // Set interesting flags. 416 SU.isCall = MI->isCall(); 417 418 const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI); 419 for (const MCWriteProcResEntry &PRE : 420 make_range(SchedModel->getWriteProcResBegin(SC), 421 SchedModel->getWriteProcResEnd(SC))) { 422 switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) { 423 case 0: 424 SU.hasReservedResource = true; 425 break; 426 case 1: 427 SU.isUnbuffered = true; 428 break; 429 default: 430 break; 431 } 432 } 433 434 unsigned GroupSizeBeforeEmit = CurrGroupSize; 435 EmitInstruction(&SU); 436 437 if (!TakenBranch && isBranchRetTrap(MI)) { 438 // NT Branch on second slot ends group. 439 if (GroupSizeBeforeEmit == 1) 440 nextGroup(); 441 } 442 443 if (TakenBranch && CurrGroupSize > 0) 444 nextGroup(); 445 446 assert ((!MI->isTerminator() || isBranchRetTrap(MI)) && 447 "Scheduler: unhandled terminator!"); 448 } 449 450 void SystemZHazardRecognizer:: 451 copyState(SystemZHazardRecognizer *Incoming) { 452 // Current decoder group 453 CurrGroupSize = Incoming->CurrGroupSize; 454 LLVM_DEBUG(CurGroupDbg = Incoming->CurGroupDbg;); 455 456 // Processor resources 457 ProcResourceCounters = Incoming->ProcResourceCounters; 458 CriticalResourceIdx = Incoming->CriticalResourceIdx; 459 460 // FPd 461 LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx; 462 GrpCount = Incoming->GrpCount; 463 } 464