1 //===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements hazard recognizers for scheduling on PowerPC processors. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "PPCHazardRecognizers.h" 14 #include "PPCInstrInfo.h" 15 #include "PPCSubtarget.h" 16 #include "llvm/CodeGen/ScheduleDAG.h" 17 #include "llvm/Support/Debug.h" 18 #include "llvm/Support/ErrorHandling.h" 19 #include "llvm/Support/raw_ostream.h" 20 using namespace llvm; 21 22 #define DEBUG_TYPE "pre-RA-sched" 23 24 bool PPCDispatchGroupSBHazardRecognizer::isLoadAfterStore(SUnit *SU) { 25 // FIXME: Move this. 26 if (isBCTRAfterSet(SU)) 27 return true; 28 29 const MCInstrDesc *MCID = DAG->getInstrDesc(SU); 30 if (!MCID) 31 return false; 32 33 if (!MCID->mayLoad()) 34 return false; 35 36 // SU is a load; for any predecessors in this dispatch group, that are stores, 37 // and with which we have an ordering dependency, return true. 38 for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) { 39 const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit()); 40 if (!PredMCID || !PredMCID->mayStore()) 41 continue; 42 43 if (!SU->Preds[i].isNormalMemory() && !SU->Preds[i].isBarrier()) 44 continue; 45 46 for (unsigned j = 0, je = CurGroup.size(); j != je; ++j) 47 if (SU->Preds[i].getSUnit() == CurGroup[j]) 48 return true; 49 } 50 51 return false; 52 } 53 54 bool PPCDispatchGroupSBHazardRecognizer::isBCTRAfterSet(SUnit *SU) { 55 const MCInstrDesc *MCID = DAG->getInstrDesc(SU); 56 if (!MCID) 57 return false; 58 59 if (!MCID->isBranch()) 60 return false; 61 62 // SU is a branch; for any predecessors in this dispatch group, with which we 63 // have a data dependence and set the counter register, return true. 64 for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) { 65 const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit()); 66 if (!PredMCID || PredMCID->getSchedClass() != PPC::Sched::IIC_SprMTSPR) 67 continue; 68 69 if (SU->Preds[i].isCtrl()) 70 continue; 71 72 for (unsigned j = 0, je = CurGroup.size(); j != je; ++j) 73 if (SU->Preds[i].getSUnit() == CurGroup[j]) 74 return true; 75 } 76 77 return false; 78 } 79 80 // FIXME: Remove this when we don't need this: 81 namespace llvm { namespace PPC { extern int getNonRecordFormOpcode(uint16_t); } } 82 83 // FIXME: A lot of code in PPCDispatchGroupSBHazardRecognizer is P7 specific. 84 85 bool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID, 86 unsigned &NSlots) { 87 // FIXME: Indirectly, this information is contained in the itinerary, and 88 // we should derive it from there instead of separately specifying it 89 // here. 90 unsigned IIC = MCID->getSchedClass(); 91 switch (IIC) { 92 default: 93 NSlots = 1; 94 break; 95 case PPC::Sched::IIC_IntDivW: 96 case PPC::Sched::IIC_IntDivD: 97 case PPC::Sched::IIC_LdStLoadUpd: 98 case PPC::Sched::IIC_LdStLDU: 99 case PPC::Sched::IIC_LdStLFDU: 100 case PPC::Sched::IIC_LdStLFDUX: 101 case PPC::Sched::IIC_LdStLHA: 102 case PPC::Sched::IIC_LdStLHAU: 103 case PPC::Sched::IIC_LdStLWA: 104 case PPC::Sched::IIC_LdStSTU: 105 case PPC::Sched::IIC_LdStSTFDU: 106 NSlots = 2; 107 break; 108 case PPC::Sched::IIC_LdStLoadUpdX: 109 case PPC::Sched::IIC_LdStLDUX: 110 case PPC::Sched::IIC_LdStLHAUX: 111 case PPC::Sched::IIC_LdStLWARX: 112 case PPC::Sched::IIC_LdStLDARX: 113 case PPC::Sched::IIC_LdStSTUX: 114 case PPC::Sched::IIC_LdStSTDCX: 115 case PPC::Sched::IIC_LdStSTWCX: 116 case PPC::Sched::IIC_BrMCRX: // mtcr 117 // FIXME: Add sync/isync (here and in the itinerary). 118 NSlots = 4; 119 break; 120 } 121 122 // FIXME: record-form instructions need a different itinerary class. 123 if (NSlots == 1 && PPC::getNonRecordFormOpcode(MCID->getOpcode()) != -1) 124 NSlots = 2; 125 126 switch (IIC) { 127 default: 128 // All multi-slot instructions must come first. 129 return NSlots > 1; 130 case PPC::Sched::IIC_BrCR: // cr logicals 131 case PPC::Sched::IIC_SprMFCR: 132 case PPC::Sched::IIC_SprMFCRF: 133 case PPC::Sched::IIC_SprMTSPR: 134 return true; 135 } 136 } 137 138 ScheduleHazardRecognizer::HazardType 139 PPCDispatchGroupSBHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { 140 if (Stalls == 0 && isLoadAfterStore(SU)) 141 return NoopHazard; 142 143 return ScoreboardHazardRecognizer::getHazardType(SU, Stalls); 144 } 145 146 bool PPCDispatchGroupSBHazardRecognizer::ShouldPreferAnother(SUnit *SU) { 147 const MCInstrDesc *MCID = DAG->getInstrDesc(SU); 148 unsigned NSlots; 149 if (MCID && mustComeFirst(MCID, NSlots) && CurSlots) 150 return true; 151 152 return ScoreboardHazardRecognizer::ShouldPreferAnother(SU); 153 } 154 155 unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) { 156 // We only need to fill out a maximum of 5 slots here: The 6th slot could 157 // only be a second branch, and otherwise the next instruction will start a 158 // new group. 159 if (isLoadAfterStore(SU) && CurSlots < 6) { 160 unsigned Directive = 161 DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective(); 162 // If we're using a special group-terminating nop, then we need only one. 163 // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready 164 if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 || 165 Directive == PPC::DIR_PWR8 || Directive == PPC::DIR_PWR9) 166 return 1; 167 168 return 5 - CurSlots; 169 } 170 171 return ScoreboardHazardRecognizer::PreEmitNoops(SU); 172 } 173 174 void PPCDispatchGroupSBHazardRecognizer::EmitInstruction(SUnit *SU) { 175 const MCInstrDesc *MCID = DAG->getInstrDesc(SU); 176 if (MCID) { 177 if (CurSlots == 5 || (MCID->isBranch() && CurBranches == 1)) { 178 CurGroup.clear(); 179 CurSlots = CurBranches = 0; 180 } else { 181 LLVM_DEBUG(dbgs() << "**** Adding to dispatch group: "); 182 LLVM_DEBUG(DAG->dumpNode(*SU)); 183 184 unsigned NSlots; 185 bool MustBeFirst = mustComeFirst(MCID, NSlots); 186 187 // If this instruction must come first, but does not, then it starts a 188 // new group. 189 if (MustBeFirst && CurSlots) { 190 CurSlots = CurBranches = 0; 191 CurGroup.clear(); 192 } 193 194 CurSlots += NSlots; 195 CurGroup.push_back(SU); 196 197 if (MCID->isBranch()) 198 ++CurBranches; 199 } 200 } 201 202 return ScoreboardHazardRecognizer::EmitInstruction(SU); 203 } 204 205 void PPCDispatchGroupSBHazardRecognizer::AdvanceCycle() { 206 return ScoreboardHazardRecognizer::AdvanceCycle(); 207 } 208 209 void PPCDispatchGroupSBHazardRecognizer::RecedeCycle() { 210 llvm_unreachable("Bottom-up scheduling not supported"); 211 } 212 213 void PPCDispatchGroupSBHazardRecognizer::Reset() { 214 CurGroup.clear(); 215 CurSlots = CurBranches = 0; 216 return ScoreboardHazardRecognizer::Reset(); 217 } 218 219 void PPCDispatchGroupSBHazardRecognizer::EmitNoop() { 220 unsigned Directive = 221 DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective(); 222 // If the group has now filled all of its slots, or if we're using a special 223 // group-terminating nop, the group is complete. 224 // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready 225 if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 || 226 Directive == PPC::DIR_PWR8 || Directive == PPC::DIR_PWR9 || 227 CurSlots == 6) { 228 CurGroup.clear(); 229 CurSlots = CurBranches = 0; 230 } else { 231 CurGroup.push_back(nullptr); 232 ++CurSlots; 233 } 234 } 235 236 //===----------------------------------------------------------------------===// 237 // PowerPC 970 Hazard Recognizer 238 // 239 // This models the dispatch group formation of the PPC970 processor. Dispatch 240 // groups are bundles of up to five instructions that can contain various mixes 241 // of instructions. The PPC970 can dispatch a peak of 4 non-branch and one 242 // branch instruction per-cycle. 243 // 244 // There are a number of restrictions to dispatch group formation: some 245 // instructions can only be issued in the first slot of a dispatch group, & some 246 // instructions fill an entire dispatch group. Additionally, only branches can 247 // issue in the 5th (last) slot. 248 // 249 // Finally, there are a number of "structural" hazards on the PPC970. These 250 // conditions cause large performance penalties due to misprediction, recovery, 251 // and replay logic that has to happen. These cases include setting a CTR and 252 // branching through it in the same dispatch group, and storing to an address, 253 // then loading from the same address within a dispatch group. To avoid these 254 // conditions, we insert no-op instructions when appropriate. 255 // 256 // FIXME: This is missing some significant cases: 257 // 1. Modeling of microcoded instructions. 258 // 2. Handling of serialized operations. 259 // 3. Handling of the esoteric cases in "Resource-based Instruction Grouping". 260 // 261 262 PPCHazardRecognizer970::PPCHazardRecognizer970(const ScheduleDAG &DAG) 263 : DAG(DAG) { 264 EndDispatchGroup(); 265 } 266 267 void PPCHazardRecognizer970::EndDispatchGroup() { 268 LLVM_DEBUG(errs() << "=== Start of dispatch group\n"); 269 NumIssued = 0; 270 271 // Structural hazard info. 272 HasCTRSet = false; 273 NumStores = 0; 274 } 275 276 277 PPCII::PPC970_Unit 278 PPCHazardRecognizer970::GetInstrType(unsigned Opcode, 279 bool &isFirst, bool &isSingle, 280 bool &isCracked, 281 bool &isLoad, bool &isStore) { 282 const MCInstrDesc &MCID = DAG.TII->get(Opcode); 283 284 isLoad = MCID.mayLoad(); 285 isStore = MCID.mayStore(); 286 287 uint64_t TSFlags = MCID.TSFlags; 288 289 isFirst = TSFlags & PPCII::PPC970_First; 290 isSingle = TSFlags & PPCII::PPC970_Single; 291 isCracked = TSFlags & PPCII::PPC970_Cracked; 292 return (PPCII::PPC970_Unit)(TSFlags & PPCII::PPC970_Mask); 293 } 294 295 /// isLoadOfStoredAddress - If we have a load from the previously stored pointer 296 /// as indicated by StorePtr1/StorePtr2/StoreSize, return true. 297 bool PPCHazardRecognizer970:: 298 isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset, 299 const Value *LoadValue) const { 300 for (unsigned i = 0, e = NumStores; i != e; ++i) { 301 // Handle exact and commuted addresses. 302 if (LoadValue == StoreValue[i] && LoadOffset == StoreOffset[i]) 303 return true; 304 305 // Okay, we don't have an exact match, if this is an indexed offset, see if 306 // we have overlap (which happens during fp->int conversion for example). 307 if (StoreValue[i] == LoadValue) { 308 // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check 309 // to see if the load and store actually overlap. 310 if (StoreOffset[i] < LoadOffset) { 311 if (int64_t(StoreOffset[i]+StoreSize[i]) > LoadOffset) return true; 312 } else { 313 if (int64_t(LoadOffset+LoadSize) > StoreOffset[i]) return true; 314 } 315 } 316 } 317 return false; 318 } 319 320 /// getHazardType - We return hazard for any non-branch instruction that would 321 /// terminate the dispatch group. We turn NoopHazard for any 322 /// instructions that wouldn't terminate the dispatch group that would cause a 323 /// pipeline flush. 324 ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970:: 325 getHazardType(SUnit *SU, int Stalls) { 326 assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead"); 327 328 MachineInstr *MI = SU->getInstr(); 329 330 if (MI->isDebugInstr()) 331 return NoHazard; 332 333 unsigned Opcode = MI->getOpcode(); 334 bool isFirst, isSingle, isCracked, isLoad, isStore; 335 PPCII::PPC970_Unit InstrType = 336 GetInstrType(Opcode, isFirst, isSingle, isCracked, 337 isLoad, isStore); 338 if (InstrType == PPCII::PPC970_Pseudo) return NoHazard; 339 340 // We can only issue a PPC970_First/PPC970_Single instruction (such as 341 // crand/mtspr/etc) if this is the first cycle of the dispatch group. 342 if (NumIssued != 0 && (isFirst || isSingle)) 343 return Hazard; 344 345 // If this instruction is cracked into two ops by the decoder, we know that 346 // it is not a branch and that it cannot issue if 3 other instructions are 347 // already in the dispatch group. 348 if (isCracked && NumIssued > 2) 349 return Hazard; 350 351 switch (InstrType) { 352 default: llvm_unreachable("Unknown instruction type!"); 353 case PPCII::PPC970_FXU: 354 case PPCII::PPC970_LSU: 355 case PPCII::PPC970_FPU: 356 case PPCII::PPC970_VALU: 357 case PPCII::PPC970_VPERM: 358 // We can only issue a branch as the last instruction in a group. 359 if (NumIssued == 4) return Hazard; 360 break; 361 case PPCII::PPC970_CRU: 362 // We can only issue a CR instruction in the first two slots. 363 if (NumIssued >= 2) return Hazard; 364 break; 365 case PPCII::PPC970_BRU: 366 break; 367 } 368 369 // Do not allow MTCTR and BCTRL to be in the same dispatch group. 370 if (HasCTRSet && Opcode == PPC::BCTRL) 371 return NoopHazard; 372 373 // If this is a load following a store, make sure it's not to the same or 374 // overlapping address. 375 if (isLoad && NumStores && !MI->memoperands_empty()) { 376 MachineMemOperand *MO = *MI->memoperands_begin(); 377 if (isLoadOfStoredAddress(MO->getSize(), 378 MO->getOffset(), MO->getValue())) 379 return NoopHazard; 380 } 381 382 return NoHazard; 383 } 384 385 void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) { 386 MachineInstr *MI = SU->getInstr(); 387 388 if (MI->isDebugInstr()) 389 return; 390 391 unsigned Opcode = MI->getOpcode(); 392 bool isFirst, isSingle, isCracked, isLoad, isStore; 393 PPCII::PPC970_Unit InstrType = 394 GetInstrType(Opcode, isFirst, isSingle, isCracked, 395 isLoad, isStore); 396 if (InstrType == PPCII::PPC970_Pseudo) return; 397 398 // Update structural hazard information. 399 if (Opcode == PPC::MTCTR || Opcode == PPC::MTCTR8) HasCTRSet = true; 400 401 // Track the address stored to. 402 if (isStore && NumStores < 4 && !MI->memoperands_empty()) { 403 MachineMemOperand *MO = *MI->memoperands_begin(); 404 StoreSize[NumStores] = MO->getSize(); 405 StoreOffset[NumStores] = MO->getOffset(); 406 StoreValue[NumStores] = MO->getValue(); 407 ++NumStores; 408 } 409 410 if (InstrType == PPCII::PPC970_BRU || isSingle) 411 NumIssued = 4; // Terminate a d-group. 412 ++NumIssued; 413 414 // If this instruction is cracked into two ops by the decoder, remember that 415 // we issued two pieces. 416 if (isCracked) 417 ++NumIssued; 418 419 if (NumIssued == 5) 420 EndDispatchGroup(); 421 } 422 423 void PPCHazardRecognizer970::AdvanceCycle() { 424 assert(NumIssued < 5 && "Illegal dispatch group!"); 425 ++NumIssued; 426 if (NumIssued == 5) 427 EndDispatchGroup(); 428 } 429 430 void PPCHazardRecognizer970::Reset() { 431 EndDispatchGroup(); 432 } 433 434