1 //===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// 10 /// This file implements the InstrBuilder interface. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/MCA/InstrBuilder.h" 15 #include "llvm/ADT/APInt.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/Statistic.h" 18 #include "llvm/MC/MCInst.h" 19 #include "llvm/Support/Debug.h" 20 #include "llvm/Support/WithColor.h" 21 #include "llvm/Support/raw_ostream.h" 22 23 #define DEBUG_TYPE "llvm-mca-instrbuilder" 24 25 namespace llvm { 26 namespace mca { 27 28 char RecycledInstErr::ID = 0; 29 30 InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti, 31 const llvm::MCInstrInfo &mcii, 32 const llvm::MCRegisterInfo &mri, 33 const llvm::MCInstrAnalysis *mcia) 34 : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), FirstCallInst(true), 35 FirstReturnInst(true) { 36 const MCSchedModel &SM = STI.getSchedModel(); 37 ProcResourceMasks.resize(SM.getNumProcResourceKinds()); 38 computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks); 39 } 40 41 static void initializeUsedResources(InstrDesc &ID, 42 const MCSchedClassDesc &SCDesc, 43 const MCSubtargetInfo &STI, 44 ArrayRef<uint64_t> ProcResourceMasks) { 45 const MCSchedModel &SM = STI.getSchedModel(); 46 47 // Populate resources consumed. 48 using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>; 49 SmallVector<ResourcePlusCycles, 4> Worklist; 50 51 // Track cycles contributed by resources that are in a "Super" relationship. 52 // This is required if we want to correctly match the behavior of method 53 // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set 54 // of "consumed" processor resources and resource cycles, the logic in 55 // ExpandProcResource() doesn't update the number of resource cycles 56 // contributed by a "Super" resource to a group. 57 // We need to take this into account when we find that a processor resource is 58 // part of a group, and it is also used as the "Super" of other resources. 59 // This map stores the number of cycles contributed by sub-resources that are 60 // part of a "Super" resource. The key value is the "Super" resource mask ID. 61 DenseMap<uint64_t, unsigned> SuperResources; 62 63 unsigned NumProcResources = SM.getNumProcResourceKinds(); 64 APInt Buffers(NumProcResources, 0); 65 66 bool AllInOrderResources = true; 67 bool AnyDispatchHazards = false; 68 for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) { 69 const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I; 70 const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx); 71 if (!PRE->Cycles) { 72 #ifndef NDEBUG 73 WithColor::warning() 74 << "Ignoring invalid write of zero cycles on processor resource " 75 << PR.Name << "\n"; 76 WithColor::note() << "found in scheduling class " << SCDesc.Name 77 << " (write index #" << I << ")\n"; 78 #endif 79 continue; 80 } 81 82 uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx]; 83 if (PR.BufferSize < 0) { 84 AllInOrderResources = false; 85 } else { 86 Buffers.setBit(getResourceStateIndex(Mask)); 87 AnyDispatchHazards |= (PR.BufferSize == 0); 88 AllInOrderResources &= (PR.BufferSize <= 1); 89 } 90 91 CycleSegment RCy(0, PRE->Cycles, false); 92 Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy))); 93 if (PR.SuperIdx) { 94 uint64_t Super = ProcResourceMasks[PR.SuperIdx]; 95 SuperResources[Super] += PRE->Cycles; 96 } 97 } 98 99 ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards; 100 101 // Sort elements by mask popcount, so that we prioritize resource units over 102 // resource groups, and smaller groups over larger groups. 103 sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) { 104 unsigned popcntA = countPopulation(A.first); 105 unsigned popcntB = countPopulation(B.first); 106 if (popcntA < popcntB) 107 return true; 108 if (popcntA > popcntB) 109 return false; 110 return A.first < B.first; 111 }); 112 113 uint64_t UsedResourceUnits = 0; 114 uint64_t UsedResourceGroups = 0; 115 auto GroupIt = find_if(Worklist, [](const ResourcePlusCycles &Elt) { 116 return countPopulation(Elt.first) > 1; 117 }); 118 unsigned FirstGroupIdx = std::distance(Worklist.begin(), GroupIt); 119 uint64_t ImpliedUsesOfResourceUnits = 0; 120 121 // Remove cycles contributed by smaller resources. 122 for (unsigned I = 0, E = Worklist.size(); I < E; ++I) { 123 ResourcePlusCycles &A = Worklist[I]; 124 if (!A.second.size()) { 125 assert(countPopulation(A.first) > 1 && "Expected a group!"); 126 UsedResourceGroups |= PowerOf2Floor(A.first); 127 continue; 128 } 129 130 ID.Resources.emplace_back(A); 131 uint64_t NormalizedMask = A.first; 132 if (countPopulation(A.first) == 1) { 133 UsedResourceUnits |= A.first; 134 } else { 135 // Remove the leading 1 from the resource group mask. 136 NormalizedMask ^= PowerOf2Floor(NormalizedMask); 137 UsedResourceGroups |= (A.first ^ NormalizedMask); 138 139 uint64_t AvailableMask = NormalizedMask & ~UsedResourceUnits; 140 if ((NormalizedMask != AvailableMask) && 141 countPopulation(AvailableMask) == 1) { 142 // At simulation time, this resource group use will decay into a simple 143 // use of the resource unit identified by `AvailableMask`. 144 ImpliedUsesOfResourceUnits |= AvailableMask; 145 UsedResourceUnits |= AvailableMask; 146 } 147 } 148 149 for (unsigned J = I + 1; J < E; ++J) { 150 ResourcePlusCycles &B = Worklist[J]; 151 if ((NormalizedMask & B.first) == NormalizedMask) { 152 B.second.CS.subtract(A.second.size() - SuperResources[A.first]); 153 if (countPopulation(B.first) > 1) 154 B.second.NumUnits++; 155 } 156 } 157 } 158 159 // Look for implicit uses of processor resource units. These are resource 160 // units which are indirectly consumed by resource groups, and that must be 161 // always available on instruction issue. 162 while (ImpliedUsesOfResourceUnits) { 163 ID.ImplicitlyUsedProcResUnits |= ImpliedUsesOfResourceUnits; 164 ImpliedUsesOfResourceUnits = 0; 165 for (unsigned I = FirstGroupIdx, E = Worklist.size(); I < E; ++I) { 166 ResourcePlusCycles &A = Worklist[I]; 167 if (!A.second.size()) 168 continue; 169 170 uint64_t NormalizedMask = A.first; 171 assert(countPopulation(NormalizedMask) > 1); 172 // Remove the leading 1 from the resource group mask. 173 NormalizedMask ^= PowerOf2Floor(NormalizedMask); 174 uint64_t AvailableMask = NormalizedMask & ~UsedResourceUnits; 175 if ((NormalizedMask != AvailableMask) && 176 countPopulation(AvailableMask) != 1) 177 continue; 178 179 UsedResourceUnits |= AvailableMask; 180 ImpliedUsesOfResourceUnits |= AvailableMask; 181 } 182 } 183 184 // A SchedWrite may specify a number of cycles in which a resource group 185 // is reserved. For example (on target x86; cpu Haswell): 186 // 187 // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> { 188 // let ResourceCycles = [2, 2, 3]; 189 // } 190 // 191 // This means: 192 // Resource units HWPort0 and HWPort1 are both used for 2cy. 193 // Resource group HWPort01 is the union of HWPort0 and HWPort1. 194 // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01 195 // will not be usable for 2 entire cycles from instruction issue. 196 // 197 // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency 198 // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an 199 // extra delay on top of the 2 cycles latency. 200 // During those extra cycles, HWPort01 is not usable by other instructions. 201 for (ResourcePlusCycles &RPC : ID.Resources) { 202 if (countPopulation(RPC.first) > 1 && !RPC.second.isReserved()) { 203 // Remove the leading 1 from the resource group mask. 204 uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first); 205 uint64_t MaxResourceUnits = countPopulation(Mask); 206 if (RPC.second.NumUnits > countPopulation(Mask)) { 207 RPC.second.setReserved(); 208 RPC.second.NumUnits = MaxResourceUnits; 209 } 210 } 211 } 212 213 // Identify extra buffers that are consumed through super resources. 214 for (const std::pair<uint64_t, unsigned> &SR : SuperResources) { 215 for (unsigned I = 1, E = NumProcResources; I < E; ++I) { 216 const MCProcResourceDesc &PR = *SM.getProcResource(I); 217 if (PR.BufferSize == -1) 218 continue; 219 220 uint64_t Mask = ProcResourceMasks[I]; 221 if (Mask != SR.first && ((Mask & SR.first) == SR.first)) 222 Buffers.setBit(getResourceStateIndex(Mask)); 223 } 224 } 225 226 ID.UsedBuffers = Buffers.getZExtValue(); 227 ID.UsedProcResUnits = UsedResourceUnits; 228 ID.UsedProcResGroups = UsedResourceGroups; 229 230 LLVM_DEBUG({ 231 for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources) 232 dbgs() << "\t\tResource Mask=" << format_hex(R.first, 16) << ", " 233 << "Reserved=" << R.second.isReserved() << ", " 234 << "#Units=" << R.second.NumUnits << ", " 235 << "cy=" << R.second.size() << '\n'; 236 uint64_t BufferIDs = ID.UsedBuffers; 237 while (BufferIDs) { 238 uint64_t Current = BufferIDs & (-BufferIDs); 239 dbgs() << "\t\tBuffer Mask=" << format_hex(Current, 16) << '\n'; 240 BufferIDs ^= Current; 241 } 242 dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n'; 243 dbgs() << "\t\tImplicitly Used Units=" 244 << format_hex(ID.ImplicitlyUsedProcResUnits, 16) << '\n'; 245 dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16) 246 << '\n'; 247 }); 248 } 249 250 static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc, 251 const MCSchedClassDesc &SCDesc, 252 const MCSubtargetInfo &STI) { 253 if (MCDesc.isCall()) { 254 // We cannot estimate how long this call will take. 255 // Artificially set an arbitrarily high latency (100cy). 256 ID.MaxLatency = 100U; 257 return; 258 } 259 260 int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc); 261 // If latency is unknown, then conservatively assume a MaxLatency of 100cy. 262 ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency); 263 } 264 265 static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) { 266 // Count register definitions, and skip non register operands in the process. 267 unsigned I, E; 268 unsigned NumExplicitDefs = MCDesc.getNumDefs(); 269 for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) { 270 const MCOperand &Op = MCI.getOperand(I); 271 if (Op.isReg()) 272 --NumExplicitDefs; 273 } 274 275 if (NumExplicitDefs) { 276 return make_error<InstructionError<MCInst>>( 277 "Expected more register operand definitions.", MCI); 278 } 279 280 if (MCDesc.hasOptionalDef()) { 281 // Always assume that the optional definition is the last operand. 282 const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1); 283 if (I == MCI.getNumOperands() || !Op.isReg()) { 284 std::string Message = 285 "expected a register operand for an optional definition. Instruction " 286 "has not been correctly analyzed."; 287 return make_error<InstructionError<MCInst>>(Message, MCI); 288 } 289 } 290 291 return ErrorSuccess(); 292 } 293 294 void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI, 295 unsigned SchedClassID) { 296 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); 297 const MCSchedModel &SM = STI.getSchedModel(); 298 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); 299 300 // Assumptions made by this algorithm: 301 // 1. The number of explicit and implicit register definitions in a MCInst 302 // matches the number of explicit and implicit definitions according to 303 // the opcode descriptor (MCInstrDesc). 304 // 2. Uses start at index #(MCDesc.getNumDefs()). 305 // 3. There can only be a single optional register definition, an it is 306 // either the last operand of the sequence (excluding extra operands 307 // contributed by variadic opcodes) or one of the explicit register 308 // definitions. The latter occurs for some Thumb1 instructions. 309 // 310 // These assumptions work quite well for most out-of-order in-tree targets 311 // like x86. This is mainly because the vast majority of instructions is 312 // expanded to MCInst using a straightforward lowering logic that preserves 313 // the ordering of the operands. 314 // 315 // About assumption 1. 316 // The algorithm allows non-register operands between register operand 317 // definitions. This helps to handle some special ARM instructions with 318 // implicit operand increment (-mtriple=armv7): 319 // 320 // vld1.32 {d18, d19}, [r1]! @ <MCInst #1463 VLD1q32wb_fixed 321 // @ <MCOperand Reg:59> 322 // @ <MCOperand Imm:0> (!!) 323 // @ <MCOperand Reg:67> 324 // @ <MCOperand Imm:0> 325 // @ <MCOperand Imm:14> 326 // @ <MCOperand Reg:0>> 327 // 328 // MCDesc reports: 329 // 6 explicit operands. 330 // 1 optional definition 331 // 2 explicit definitions (!!) 332 // 333 // The presence of an 'Imm' operand between the two register definitions 334 // breaks the assumption that "register definitions are always at the 335 // beginning of the operand sequence". 336 // 337 // To workaround this issue, this algorithm ignores (i.e. skips) any 338 // non-register operands between register definitions. The optional 339 // definition is still at index #(NumOperands-1). 340 // 341 // According to assumption 2. register reads start at #(NumExplicitDefs-1). 342 // That means, register R1 from the example is both read and written. 343 unsigned NumExplicitDefs = MCDesc.getNumDefs(); 344 unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs(); 345 unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries; 346 unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs; 347 if (MCDesc.hasOptionalDef()) 348 TotalDefs++; 349 350 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); 351 ID.Writes.resize(TotalDefs + NumVariadicOps); 352 // Iterate over the operands list, and skip non-register operands. 353 // The first NumExplicitDefs register operands are expected to be register 354 // definitions. 355 unsigned CurrentDef = 0; 356 unsigned OptionalDefIdx = MCDesc.getNumOperands() - 1; 357 unsigned i = 0; 358 for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) { 359 const MCOperand &Op = MCI.getOperand(i); 360 if (!Op.isReg()) 361 continue; 362 363 if (MCDesc.OpInfo[CurrentDef].isOptionalDef()) { 364 OptionalDefIdx = CurrentDef++; 365 continue; 366 } 367 368 WriteDescriptor &Write = ID.Writes[CurrentDef]; 369 Write.OpIndex = i; 370 if (CurrentDef < NumWriteLatencyEntries) { 371 const MCWriteLatencyEntry &WLE = 372 *STI.getWriteLatencyEntry(&SCDesc, CurrentDef); 373 // Conservatively default to MaxLatency. 374 Write.Latency = 375 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles); 376 Write.SClassOrWriteResourceID = WLE.WriteResourceID; 377 } else { 378 // Assign a default latency for this write. 379 Write.Latency = ID.MaxLatency; 380 Write.SClassOrWriteResourceID = 0; 381 } 382 Write.IsOptionalDef = false; 383 LLVM_DEBUG({ 384 dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex 385 << ", Latency=" << Write.Latency 386 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 387 }); 388 CurrentDef++; 389 } 390 391 assert(CurrentDef == NumExplicitDefs && 392 "Expected more register operand definitions."); 393 for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) { 394 unsigned Index = NumExplicitDefs + CurrentDef; 395 WriteDescriptor &Write = ID.Writes[Index]; 396 Write.OpIndex = ~CurrentDef; 397 Write.RegisterID = MCDesc.getImplicitDefs()[CurrentDef]; 398 if (Index < NumWriteLatencyEntries) { 399 const MCWriteLatencyEntry &WLE = 400 *STI.getWriteLatencyEntry(&SCDesc, Index); 401 // Conservatively default to MaxLatency. 402 Write.Latency = 403 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles); 404 Write.SClassOrWriteResourceID = WLE.WriteResourceID; 405 } else { 406 // Assign a default latency for this write. 407 Write.Latency = ID.MaxLatency; 408 Write.SClassOrWriteResourceID = 0; 409 } 410 411 Write.IsOptionalDef = false; 412 assert(Write.RegisterID != 0 && "Expected a valid phys register!"); 413 LLVM_DEBUG({ 414 dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex 415 << ", PhysReg=" << MRI.getName(Write.RegisterID) 416 << ", Latency=" << Write.Latency 417 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 418 }); 419 } 420 421 if (MCDesc.hasOptionalDef()) { 422 WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs]; 423 Write.OpIndex = OptionalDefIdx; 424 // Assign a default latency for this write. 425 Write.Latency = ID.MaxLatency; 426 Write.SClassOrWriteResourceID = 0; 427 Write.IsOptionalDef = true; 428 LLVM_DEBUG({ 429 dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex 430 << ", Latency=" << Write.Latency 431 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 432 }); 433 } 434 435 if (!NumVariadicOps) 436 return; 437 438 bool AssumeUsesOnly = !MCDesc.variadicOpsAreDefs(); 439 CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef(); 440 for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); 441 I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) { 442 const MCOperand &Op = MCI.getOperand(OpIndex); 443 if (!Op.isReg()) 444 continue; 445 446 WriteDescriptor &Write = ID.Writes[CurrentDef]; 447 Write.OpIndex = OpIndex; 448 // Assign a default latency for this write. 449 Write.Latency = ID.MaxLatency; 450 Write.SClassOrWriteResourceID = 0; 451 Write.IsOptionalDef = false; 452 ++CurrentDef; 453 LLVM_DEBUG({ 454 dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex 455 << ", Latency=" << Write.Latency 456 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 457 }); 458 } 459 460 ID.Writes.resize(CurrentDef); 461 } 462 463 void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI, 464 unsigned SchedClassID) { 465 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); 466 unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs(); 467 unsigned NumImplicitUses = MCDesc.getNumImplicitUses(); 468 // Remove the optional definition. 469 if (MCDesc.hasOptionalDef()) 470 --NumExplicitUses; 471 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); 472 unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps; 473 ID.Reads.resize(TotalUses); 474 unsigned CurrentUse = 0; 475 for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses; 476 ++I, ++OpIndex) { 477 const MCOperand &Op = MCI.getOperand(OpIndex); 478 if (!Op.isReg()) 479 continue; 480 481 ReadDescriptor &Read = ID.Reads[CurrentUse]; 482 Read.OpIndex = OpIndex; 483 Read.UseIndex = I; 484 Read.SchedClassID = SchedClassID; 485 ++CurrentUse; 486 LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex 487 << ", UseIndex=" << Read.UseIndex << '\n'); 488 } 489 490 // For the purpose of ReadAdvance, implicit uses come directly after explicit 491 // uses. The "UseIndex" must be updated according to that implicit layout. 492 for (unsigned I = 0; I < NumImplicitUses; ++I) { 493 ReadDescriptor &Read = ID.Reads[CurrentUse + I]; 494 Read.OpIndex = ~I; 495 Read.UseIndex = NumExplicitUses + I; 496 Read.RegisterID = MCDesc.getImplicitUses()[I]; 497 Read.SchedClassID = SchedClassID; 498 LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex 499 << ", UseIndex=" << Read.UseIndex << ", RegisterID=" 500 << MRI.getName(Read.RegisterID) << '\n'); 501 } 502 503 CurrentUse += NumImplicitUses; 504 505 bool AssumeDefsOnly = MCDesc.variadicOpsAreDefs(); 506 for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); 507 I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) { 508 const MCOperand &Op = MCI.getOperand(OpIndex); 509 if (!Op.isReg()) 510 continue; 511 512 ReadDescriptor &Read = ID.Reads[CurrentUse]; 513 Read.OpIndex = OpIndex; 514 Read.UseIndex = NumExplicitUses + NumImplicitUses + I; 515 Read.SchedClassID = SchedClassID; 516 ++CurrentUse; 517 LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex 518 << ", UseIndex=" << Read.UseIndex << '\n'); 519 } 520 521 ID.Reads.resize(CurrentUse); 522 } 523 524 Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID, 525 const MCInst &MCI) const { 526 if (ID.NumMicroOps != 0) 527 return ErrorSuccess(); 528 529 bool UsesBuffers = ID.UsedBuffers; 530 bool UsesResources = !ID.Resources.empty(); 531 if (!UsesBuffers && !UsesResources) 532 return ErrorSuccess(); 533 534 // FIXME: see PR44797. We should revisit these checks and possibly move them 535 // in CodeGenSchedule.cpp. 536 StringRef Message = "found an inconsistent instruction that decodes to zero " 537 "opcodes and that consumes scheduler resources."; 538 return make_error<InstructionError<MCInst>>(std::string(Message), MCI); 539 } 540 541 Expected<const InstrDesc &> 542 InstrBuilder::createInstrDescImpl(const MCInst &MCI) { 543 assert(STI.getSchedModel().hasInstrSchedModel() && 544 "Itineraries are not yet supported!"); 545 546 // Obtain the instruction descriptor from the opcode. 547 unsigned short Opcode = MCI.getOpcode(); 548 const MCInstrDesc &MCDesc = MCII.get(Opcode); 549 const MCSchedModel &SM = STI.getSchedModel(); 550 551 // Then obtain the scheduling class information from the instruction. 552 unsigned SchedClassID = MCDesc.getSchedClass(); 553 bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant(); 554 555 // Try to solve variant scheduling classes. 556 if (IsVariant) { 557 unsigned CPUID = SM.getProcessorID(); 558 while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant()) 559 SchedClassID = 560 STI.resolveVariantSchedClass(SchedClassID, &MCI, &MCII, CPUID); 561 562 if (!SchedClassID) { 563 return make_error<InstructionError<MCInst>>( 564 "unable to resolve scheduling class for write variant.", MCI); 565 } 566 } 567 568 // Check if this instruction is supported. Otherwise, report an error. 569 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); 570 if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) { 571 return make_error<InstructionError<MCInst>>( 572 "found an unsupported instruction in the input assembly sequence.", 573 MCI); 574 } 575 576 LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n'); 577 LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n'); 578 LLVM_DEBUG(dbgs() << "\t\tOpcode=" << Opcode << '\n'); 579 580 // Create a new empty descriptor. 581 std::unique_ptr<InstrDesc> ID = std::make_unique<InstrDesc>(); 582 ID->NumMicroOps = SCDesc.NumMicroOps; 583 ID->SchedClassID = SchedClassID; 584 585 if (MCDesc.isCall() && FirstCallInst) { 586 // We don't correctly model calls. 587 WithColor::warning() << "found a call in the input assembly sequence.\n"; 588 WithColor::note() << "call instructions are not correctly modeled. " 589 << "Assume a latency of 100cy.\n"; 590 FirstCallInst = false; 591 } 592 593 if (MCDesc.isReturn() && FirstReturnInst) { 594 WithColor::warning() << "found a return instruction in the input" 595 << " assembly sequence.\n"; 596 WithColor::note() << "program counter updates are ignored.\n"; 597 FirstReturnInst = false; 598 } 599 600 initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks); 601 computeMaxLatency(*ID, MCDesc, SCDesc, STI); 602 603 if (Error Err = verifyOperands(MCDesc, MCI)) 604 return std::move(Err); 605 606 populateWrites(*ID, MCI, SchedClassID); 607 populateReads(*ID, MCI, SchedClassID); 608 609 LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n'); 610 LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n'); 611 612 // Validation check on the instruction descriptor. 613 if (Error Err = verifyInstrDesc(*ID, MCI)) 614 return std::move(Err); 615 616 // Now add the new descriptor. 617 bool IsVariadic = MCDesc.isVariadic(); 618 if ((ID->IsRecyclable = !IsVariadic && !IsVariant)) { 619 Descriptors[MCI.getOpcode()] = std::move(ID); 620 return *Descriptors[MCI.getOpcode()]; 621 } 622 623 VariantDescriptors[&MCI] = std::move(ID); 624 return *VariantDescriptors[&MCI]; 625 } 626 627 Expected<const InstrDesc &> 628 InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) { 629 if (Descriptors.find_as(MCI.getOpcode()) != Descriptors.end()) 630 return *Descriptors[MCI.getOpcode()]; 631 632 if (VariantDescriptors.find(&MCI) != VariantDescriptors.end()) 633 return *VariantDescriptors[&MCI]; 634 635 return createInstrDescImpl(MCI); 636 } 637 638 STATISTIC(NumVariantInst, "Number of MCInsts that doesn't have static Desc"); 639 640 Expected<std::unique_ptr<Instruction>> 641 InstrBuilder::createInstruction(const MCInst &MCI) { 642 Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI); 643 if (!DescOrErr) 644 return DescOrErr.takeError(); 645 const InstrDesc &D = *DescOrErr; 646 Instruction *NewIS = nullptr; 647 std::unique_ptr<Instruction> CreatedIS; 648 bool IsInstRecycled = false; 649 650 if (!D.IsRecyclable) 651 ++NumVariantInst; 652 653 if (D.IsRecyclable && InstRecycleCB) { 654 if (auto *I = InstRecycleCB(D)) { 655 NewIS = I; 656 NewIS->reset(); 657 IsInstRecycled = true; 658 } 659 } 660 if (!IsInstRecycled) { 661 CreatedIS = std::make_unique<Instruction>(D, MCI.getOpcode()); 662 NewIS = CreatedIS.get(); 663 } 664 665 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); 666 const MCSchedClassDesc &SCDesc = 667 *STI.getSchedModel().getSchedClassDesc(D.SchedClassID); 668 669 NewIS->setMayLoad(MCDesc.mayLoad()); 670 NewIS->setMayStore(MCDesc.mayStore()); 671 NewIS->setHasSideEffects(MCDesc.hasUnmodeledSideEffects()); 672 NewIS->setBeginGroup(SCDesc.BeginGroup); 673 NewIS->setEndGroup(SCDesc.EndGroup); 674 NewIS->setRetireOOO(SCDesc.RetireOOO); 675 676 // Check if this is a dependency breaking instruction. 677 APInt Mask; 678 679 bool IsZeroIdiom = false; 680 bool IsDepBreaking = false; 681 if (MCIA) { 682 unsigned ProcID = STI.getSchedModel().getProcessorID(); 683 IsZeroIdiom = MCIA->isZeroIdiom(MCI, Mask, ProcID); 684 IsDepBreaking = 685 IsZeroIdiom || MCIA->isDependencyBreaking(MCI, Mask, ProcID); 686 if (MCIA->isOptimizableRegisterMove(MCI, ProcID)) 687 NewIS->setOptimizableMove(); 688 } 689 690 // Initialize Reads first. 691 MCPhysReg RegID = 0; 692 size_t Idx = 0U; 693 for (const ReadDescriptor &RD : D.Reads) { 694 if (!RD.isImplicitRead()) { 695 // explicit read. 696 const MCOperand &Op = MCI.getOperand(RD.OpIndex); 697 // Skip non-register operands. 698 if (!Op.isReg()) 699 continue; 700 RegID = Op.getReg(); 701 } else { 702 // Implicit read. 703 RegID = RD.RegisterID; 704 } 705 706 // Skip invalid register operands. 707 if (!RegID) 708 continue; 709 710 // Okay, this is a register operand. Create a ReadState for it. 711 ReadState *RS = nullptr; 712 if (IsInstRecycled && Idx < NewIS->getUses().size()) { 713 NewIS->getUses()[Idx] = ReadState(RD, RegID); 714 RS = &NewIS->getUses()[Idx++]; 715 } else { 716 NewIS->getUses().emplace_back(RD, RegID); 717 RS = &NewIS->getUses().back(); 718 ++Idx; 719 } 720 721 if (IsDepBreaking) { 722 // A mask of all zeroes means: explicit input operands are not 723 // independent. 724 if (Mask.isZero()) { 725 if (!RD.isImplicitRead()) 726 RS->setIndependentFromDef(); 727 } else { 728 // Check if this register operand is independent according to `Mask`. 729 // Note that Mask may not have enough bits to describe all explicit and 730 // implicit input operands. If this register operand doesn't have a 731 // corresponding bit in Mask, then conservatively assume that it is 732 // dependent. 733 if (Mask.getBitWidth() > RD.UseIndex) { 734 // Okay. This map describe register use `RD.UseIndex`. 735 if (Mask[RD.UseIndex]) 736 RS->setIndependentFromDef(); 737 } 738 } 739 } 740 } 741 if (IsInstRecycled && Idx < NewIS->getUses().size()) 742 NewIS->getUses().pop_back_n(NewIS->getUses().size() - Idx); 743 744 // Early exit if there are no writes. 745 if (D.Writes.empty()) { 746 if (IsInstRecycled) 747 return llvm::make_error<RecycledInstErr>(NewIS); 748 else 749 return std::move(CreatedIS); 750 } 751 752 // Track register writes that implicitly clear the upper portion of the 753 // underlying super-registers using an APInt. 754 APInt WriteMask(D.Writes.size(), 0); 755 756 // Now query the MCInstrAnalysis object to obtain information about which 757 // register writes implicitly clear the upper portion of a super-register. 758 if (MCIA) 759 MCIA->clearsSuperRegisters(MRI, MCI, WriteMask); 760 761 // Initialize writes. 762 unsigned WriteIndex = 0; 763 Idx = 0U; 764 for (const WriteDescriptor &WD : D.Writes) { 765 RegID = WD.isImplicitWrite() ? WD.RegisterID 766 : MCI.getOperand(WD.OpIndex).getReg(); 767 // Check if this is a optional definition that references NoReg. 768 if (WD.IsOptionalDef && !RegID) { 769 ++WriteIndex; 770 continue; 771 } 772 773 assert(RegID && "Expected a valid register ID!"); 774 if (IsInstRecycled && Idx < NewIS->getDefs().size()) { 775 NewIS->getDefs()[Idx++] = 776 WriteState(WD, RegID, 777 /* ClearsSuperRegs */ WriteMask[WriteIndex], 778 /* WritesZero */ IsZeroIdiom); 779 } else { 780 NewIS->getDefs().emplace_back(WD, RegID, 781 /* ClearsSuperRegs */ WriteMask[WriteIndex], 782 /* WritesZero */ IsZeroIdiom); 783 ++Idx; 784 } 785 ++WriteIndex; 786 } 787 if (IsInstRecycled && Idx < NewIS->getDefs().size()) 788 NewIS->getDefs().pop_back_n(NewIS->getDefs().size() - Idx); 789 790 if (IsInstRecycled) 791 return llvm::make_error<RecycledInstErr>(NewIS); 792 else 793 return std::move(CreatedIS); 794 } 795 } // namespace mca 796 } // namespace llvm 797