1 //===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// 10 /// This file implements the InstrBuilder interface. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/MCA/InstrBuilder.h" 15 #include "llvm/ADT/APInt.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/MC/MCInst.h" 18 #include "llvm/Support/Debug.h" 19 #include "llvm/Support/WithColor.h" 20 #include "llvm/Support/raw_ostream.h" 21 22 #define DEBUG_TYPE "llvm-mca" 23 24 namespace llvm { 25 namespace mca { 26 27 InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti, 28 const llvm::MCInstrInfo &mcii, 29 const llvm::MCRegisterInfo &mri, 30 const llvm::MCInstrAnalysis *mcia) 31 : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), FirstCallInst(true), 32 FirstReturnInst(true) { 33 const MCSchedModel &SM = STI.getSchedModel(); 34 ProcResourceMasks.resize(SM.getNumProcResourceKinds()); 35 computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks); 36 } 37 38 static void initializeUsedResources(InstrDesc &ID, 39 const MCSchedClassDesc &SCDesc, 40 const MCSubtargetInfo &STI, 41 ArrayRef<uint64_t> ProcResourceMasks) { 42 const MCSchedModel &SM = STI.getSchedModel(); 43 44 // Populate resources consumed. 45 using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>; 46 SmallVector<ResourcePlusCycles, 4> Worklist; 47 48 // Track cycles contributed by resources that are in a "Super" relationship. 49 // This is required if we want to correctly match the behavior of method 50 // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set 51 // of "consumed" processor resources and resource cycles, the logic in 52 // ExpandProcResource() doesn't update the number of resource cycles 53 // contributed by a "Super" resource to a group. 54 // We need to take this into account when we find that a processor resource is 55 // part of a group, and it is also used as the "Super" of other resources. 56 // This map stores the number of cycles contributed by sub-resources that are 57 // part of a "Super" resource. The key value is the "Super" resource mask ID. 58 DenseMap<uint64_t, unsigned> SuperResources; 59 60 unsigned NumProcResources = SM.getNumProcResourceKinds(); 61 APInt Buffers(NumProcResources, 0); 62 63 bool AllInOrderResources = true; 64 bool AnyDispatchHazards = false; 65 for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) { 66 const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I; 67 const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx); 68 if (!PRE->Cycles) { 69 #ifndef NDEBUG 70 WithColor::warning() 71 << "Ignoring invalid write of zero cycles on processor resource " 72 << PR.Name << "\n"; 73 WithColor::note() << "found in scheduling class " << SCDesc.Name 74 << " (write index #" << I << ")\n"; 75 #endif 76 continue; 77 } 78 79 uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx]; 80 if (PR.BufferSize < 0) { 81 AllInOrderResources = false; 82 } else { 83 Buffers.setBit(getResourceStateIndex(Mask)); 84 AnyDispatchHazards |= (PR.BufferSize == 0); 85 AllInOrderResources &= (PR.BufferSize <= 1); 86 } 87 88 CycleSegment RCy(0, PRE->Cycles, false); 89 Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy))); 90 if (PR.SuperIdx) { 91 uint64_t Super = ProcResourceMasks[PR.SuperIdx]; 92 SuperResources[Super] += PRE->Cycles; 93 } 94 } 95 96 ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards; 97 98 // Sort elements by mask popcount, so that we prioritize resource units over 99 // resource groups, and smaller groups over larger groups. 100 sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) { 101 unsigned popcntA = countPopulation(A.first); 102 unsigned popcntB = countPopulation(B.first); 103 if (popcntA < popcntB) 104 return true; 105 if (popcntA > popcntB) 106 return false; 107 return A.first < B.first; 108 }); 109 110 uint64_t UsedResourceUnits = 0; 111 uint64_t UsedResourceGroups = 0; 112 auto GroupIt = find_if(Worklist, [](const ResourcePlusCycles &Elt) { 113 return countPopulation(Elt.first) > 1; 114 }); 115 unsigned FirstGroupIdx = std::distance(Worklist.begin(), GroupIt); 116 uint64_t ImpliedUsesOfResourceUnits = 0; 117 118 // Remove cycles contributed by smaller resources. 119 for (unsigned I = 0, E = Worklist.size(); I < E; ++I) { 120 ResourcePlusCycles &A = Worklist[I]; 121 if (!A.second.size()) { 122 assert(countPopulation(A.first) > 1 && "Expected a group!"); 123 UsedResourceGroups |= PowerOf2Floor(A.first); 124 continue; 125 } 126 127 ID.Resources.emplace_back(A); 128 uint64_t NormalizedMask = A.first; 129 if (countPopulation(A.first) == 1) { 130 UsedResourceUnits |= A.first; 131 } else { 132 // Remove the leading 1 from the resource group mask. 133 NormalizedMask ^= PowerOf2Floor(NormalizedMask); 134 UsedResourceGroups |= (A.first ^ NormalizedMask); 135 136 uint64_t AvailableMask = NormalizedMask & ~UsedResourceUnits; 137 if ((NormalizedMask != AvailableMask) && 138 countPopulation(AvailableMask) == 1) { 139 // At simulation time, this resource group use will decay into a simple 140 // use of the resource unit identified by `AvailableMask`. 141 ImpliedUsesOfResourceUnits |= AvailableMask; 142 UsedResourceUnits |= AvailableMask; 143 } 144 } 145 146 for (unsigned J = I + 1; J < E; ++J) { 147 ResourcePlusCycles &B = Worklist[J]; 148 if ((NormalizedMask & B.first) == NormalizedMask) { 149 B.second.CS.subtract(A.second.size() - SuperResources[A.first]); 150 if (countPopulation(B.first) > 1) 151 B.second.NumUnits++; 152 } 153 } 154 } 155 156 // Look for implicit uses of processor resource units. These are resource 157 // units which are indirectly consumed by resource groups, and that must be 158 // always available on instruction issue. 159 while (ImpliedUsesOfResourceUnits) { 160 ID.ImplicitlyUsedProcResUnits |= ImpliedUsesOfResourceUnits; 161 ImpliedUsesOfResourceUnits = 0; 162 for (unsigned I = FirstGroupIdx, E = Worklist.size(); I < E; ++I) { 163 ResourcePlusCycles &A = Worklist[I]; 164 if (!A.second.size()) 165 continue; 166 167 uint64_t NormalizedMask = A.first; 168 assert(countPopulation(NormalizedMask) > 1); 169 // Remove the leading 1 from the resource group mask. 170 NormalizedMask ^= PowerOf2Floor(NormalizedMask); 171 uint64_t AvailableMask = NormalizedMask & ~UsedResourceUnits; 172 if ((NormalizedMask != AvailableMask) && 173 countPopulation(AvailableMask) != 1) 174 continue; 175 176 UsedResourceUnits |= AvailableMask; 177 ImpliedUsesOfResourceUnits |= AvailableMask; 178 } 179 } 180 181 // A SchedWrite may specify a number of cycles in which a resource group 182 // is reserved. For example (on target x86; cpu Haswell): 183 // 184 // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> { 185 // let ResourceCycles = [2, 2, 3]; 186 // } 187 // 188 // This means: 189 // Resource units HWPort0 and HWPort1 are both used for 2cy. 190 // Resource group HWPort01 is the union of HWPort0 and HWPort1. 191 // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01 192 // will not be usable for 2 entire cycles from instruction issue. 193 // 194 // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency 195 // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an 196 // extra delay on top of the 2 cycles latency. 197 // During those extra cycles, HWPort01 is not usable by other instructions. 198 for (ResourcePlusCycles &RPC : ID.Resources) { 199 if (countPopulation(RPC.first) > 1 && !RPC.second.isReserved()) { 200 // Remove the leading 1 from the resource group mask. 201 uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first); 202 uint64_t MaxResourceUnits = countPopulation(Mask); 203 if (RPC.second.NumUnits > countPopulation(Mask)) { 204 RPC.second.setReserved(); 205 RPC.second.NumUnits = MaxResourceUnits; 206 } 207 } 208 } 209 210 // Identify extra buffers that are consumed through super resources. 211 for (const std::pair<uint64_t, unsigned> &SR : SuperResources) { 212 for (unsigned I = 1, E = NumProcResources; I < E; ++I) { 213 const MCProcResourceDesc &PR = *SM.getProcResource(I); 214 if (PR.BufferSize == -1) 215 continue; 216 217 uint64_t Mask = ProcResourceMasks[I]; 218 if (Mask != SR.first && ((Mask & SR.first) == SR.first)) 219 Buffers.setBit(getResourceStateIndex(Mask)); 220 } 221 } 222 223 ID.UsedBuffers = Buffers.getZExtValue(); 224 ID.UsedProcResUnits = UsedResourceUnits; 225 ID.UsedProcResGroups = UsedResourceGroups; 226 227 LLVM_DEBUG({ 228 for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources) 229 dbgs() << "\t\tResource Mask=" << format_hex(R.first, 16) << ", " 230 << "Reserved=" << R.second.isReserved() << ", " 231 << "#Units=" << R.second.NumUnits << ", " 232 << "cy=" << R.second.size() << '\n'; 233 uint64_t BufferIDs = ID.UsedBuffers; 234 while (BufferIDs) { 235 uint64_t Current = BufferIDs & (-BufferIDs); 236 dbgs() << "\t\tBuffer Mask=" << format_hex(Current, 16) << '\n'; 237 BufferIDs ^= Current; 238 } 239 dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n'; 240 dbgs() << "\t\tImplicitly Used Units=" 241 << format_hex(ID.ImplicitlyUsedProcResUnits, 16) << '\n'; 242 dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16) 243 << '\n'; 244 }); 245 } 246 247 static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc, 248 const MCSchedClassDesc &SCDesc, 249 const MCSubtargetInfo &STI) { 250 if (MCDesc.isCall()) { 251 // We cannot estimate how long this call will take. 252 // Artificially set an arbitrarily high latency (100cy). 253 ID.MaxLatency = 100U; 254 return; 255 } 256 257 int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc); 258 // If latency is unknown, then conservatively assume a MaxLatency of 100cy. 259 ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency); 260 } 261 262 static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) { 263 // Count register definitions, and skip non register operands in the process. 264 unsigned I, E; 265 unsigned NumExplicitDefs = MCDesc.getNumDefs(); 266 for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) { 267 const MCOperand &Op = MCI.getOperand(I); 268 if (Op.isReg()) 269 --NumExplicitDefs; 270 } 271 272 if (NumExplicitDefs) { 273 return make_error<InstructionError<MCInst>>( 274 "Expected more register operand definitions.", MCI); 275 } 276 277 if (MCDesc.hasOptionalDef()) { 278 // Always assume that the optional definition is the last operand. 279 const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1); 280 if (I == MCI.getNumOperands() || !Op.isReg()) { 281 std::string Message = 282 "expected a register operand for an optional definition. Instruction " 283 "has not been correctly analyzed."; 284 return make_error<InstructionError<MCInst>>(Message, MCI); 285 } 286 } 287 288 return ErrorSuccess(); 289 } 290 291 void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI, 292 unsigned SchedClassID) { 293 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); 294 const MCSchedModel &SM = STI.getSchedModel(); 295 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); 296 297 // Assumptions made by this algorithm: 298 // 1. The number of explicit and implicit register definitions in a MCInst 299 // matches the number of explicit and implicit definitions according to 300 // the opcode descriptor (MCInstrDesc). 301 // 2. Uses start at index #(MCDesc.getNumDefs()). 302 // 3. There can only be a single optional register definition, an it is 303 // either the last operand of the sequence (excluding extra operands 304 // contributed by variadic opcodes) or one of the explicit register 305 // definitions. The latter occurs for some Thumb1 instructions. 306 // 307 // These assumptions work quite well for most out-of-order in-tree targets 308 // like x86. This is mainly because the vast majority of instructions is 309 // expanded to MCInst using a straightforward lowering logic that preserves 310 // the ordering of the operands. 311 // 312 // About assumption 1. 313 // The algorithm allows non-register operands between register operand 314 // definitions. This helps to handle some special ARM instructions with 315 // implicit operand increment (-mtriple=armv7): 316 // 317 // vld1.32 {d18, d19}, [r1]! @ <MCInst #1463 VLD1q32wb_fixed 318 // @ <MCOperand Reg:59> 319 // @ <MCOperand Imm:0> (!!) 320 // @ <MCOperand Reg:67> 321 // @ <MCOperand Imm:0> 322 // @ <MCOperand Imm:14> 323 // @ <MCOperand Reg:0>> 324 // 325 // MCDesc reports: 326 // 6 explicit operands. 327 // 1 optional definition 328 // 2 explicit definitions (!!) 329 // 330 // The presence of an 'Imm' operand between the two register definitions 331 // breaks the assumption that "register definitions are always at the 332 // beginning of the operand sequence". 333 // 334 // To workaround this issue, this algorithm ignores (i.e. skips) any 335 // non-register operands between register definitions. The optional 336 // definition is still at index #(NumOperands-1). 337 // 338 // According to assumption 2. register reads start at #(NumExplicitDefs-1). 339 // That means, register R1 from the example is both read and written. 340 unsigned NumExplicitDefs = MCDesc.getNumDefs(); 341 unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs(); 342 unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries; 343 unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs; 344 if (MCDesc.hasOptionalDef()) 345 TotalDefs++; 346 347 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); 348 ID.Writes.resize(TotalDefs + NumVariadicOps); 349 // Iterate over the operands list, and skip non-register operands. 350 // The first NumExplicitDefs register operands are expected to be register 351 // definitions. 352 unsigned CurrentDef = 0; 353 unsigned OptionalDefIdx = MCDesc.getNumOperands() - 1; 354 unsigned i = 0; 355 for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) { 356 const MCOperand &Op = MCI.getOperand(i); 357 if (!Op.isReg()) 358 continue; 359 360 if (MCDesc.OpInfo[CurrentDef].isOptionalDef()) { 361 OptionalDefIdx = CurrentDef++; 362 continue; 363 } 364 365 WriteDescriptor &Write = ID.Writes[CurrentDef]; 366 Write.OpIndex = i; 367 if (CurrentDef < NumWriteLatencyEntries) { 368 const MCWriteLatencyEntry &WLE = 369 *STI.getWriteLatencyEntry(&SCDesc, CurrentDef); 370 // Conservatively default to MaxLatency. 371 Write.Latency = 372 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles); 373 Write.SClassOrWriteResourceID = WLE.WriteResourceID; 374 } else { 375 // Assign a default latency for this write. 376 Write.Latency = ID.MaxLatency; 377 Write.SClassOrWriteResourceID = 0; 378 } 379 Write.IsOptionalDef = false; 380 LLVM_DEBUG({ 381 dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex 382 << ", Latency=" << Write.Latency 383 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 384 }); 385 CurrentDef++; 386 } 387 388 assert(CurrentDef == NumExplicitDefs && 389 "Expected more register operand definitions."); 390 for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) { 391 unsigned Index = NumExplicitDefs + CurrentDef; 392 WriteDescriptor &Write = ID.Writes[Index]; 393 Write.OpIndex = ~CurrentDef; 394 Write.RegisterID = MCDesc.getImplicitDefs()[CurrentDef]; 395 if (Index < NumWriteLatencyEntries) { 396 const MCWriteLatencyEntry &WLE = 397 *STI.getWriteLatencyEntry(&SCDesc, Index); 398 // Conservatively default to MaxLatency. 399 Write.Latency = 400 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles); 401 Write.SClassOrWriteResourceID = WLE.WriteResourceID; 402 } else { 403 // Assign a default latency for this write. 404 Write.Latency = ID.MaxLatency; 405 Write.SClassOrWriteResourceID = 0; 406 } 407 408 Write.IsOptionalDef = false; 409 assert(Write.RegisterID != 0 && "Expected a valid phys register!"); 410 LLVM_DEBUG({ 411 dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex 412 << ", PhysReg=" << MRI.getName(Write.RegisterID) 413 << ", Latency=" << Write.Latency 414 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 415 }); 416 } 417 418 if (MCDesc.hasOptionalDef()) { 419 WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs]; 420 Write.OpIndex = OptionalDefIdx; 421 // Assign a default latency for this write. 422 Write.Latency = ID.MaxLatency; 423 Write.SClassOrWriteResourceID = 0; 424 Write.IsOptionalDef = true; 425 LLVM_DEBUG({ 426 dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex 427 << ", Latency=" << Write.Latency 428 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 429 }); 430 } 431 432 if (!NumVariadicOps) 433 return; 434 435 bool AssumeUsesOnly = !MCDesc.variadicOpsAreDefs(); 436 CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef(); 437 for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); 438 I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) { 439 const MCOperand &Op = MCI.getOperand(OpIndex); 440 if (!Op.isReg()) 441 continue; 442 443 WriteDescriptor &Write = ID.Writes[CurrentDef]; 444 Write.OpIndex = OpIndex; 445 // Assign a default latency for this write. 446 Write.Latency = ID.MaxLatency; 447 Write.SClassOrWriteResourceID = 0; 448 Write.IsOptionalDef = false; 449 ++CurrentDef; 450 LLVM_DEBUG({ 451 dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex 452 << ", Latency=" << Write.Latency 453 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 454 }); 455 } 456 457 ID.Writes.resize(CurrentDef); 458 } 459 460 void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI, 461 unsigned SchedClassID) { 462 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); 463 unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs(); 464 unsigned NumImplicitUses = MCDesc.getNumImplicitUses(); 465 // Remove the optional definition. 466 if (MCDesc.hasOptionalDef()) 467 --NumExplicitUses; 468 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); 469 unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps; 470 ID.Reads.resize(TotalUses); 471 unsigned CurrentUse = 0; 472 for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses; 473 ++I, ++OpIndex) { 474 const MCOperand &Op = MCI.getOperand(OpIndex); 475 if (!Op.isReg()) 476 continue; 477 478 ReadDescriptor &Read = ID.Reads[CurrentUse]; 479 Read.OpIndex = OpIndex; 480 Read.UseIndex = I; 481 Read.SchedClassID = SchedClassID; 482 ++CurrentUse; 483 LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex 484 << ", UseIndex=" << Read.UseIndex << '\n'); 485 } 486 487 // For the purpose of ReadAdvance, implicit uses come directly after explicit 488 // uses. The "UseIndex" must be updated according to that implicit layout. 489 for (unsigned I = 0; I < NumImplicitUses; ++I) { 490 ReadDescriptor &Read = ID.Reads[CurrentUse + I]; 491 Read.OpIndex = ~I; 492 Read.UseIndex = NumExplicitUses + I; 493 Read.RegisterID = MCDesc.getImplicitUses()[I]; 494 Read.SchedClassID = SchedClassID; 495 LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex 496 << ", UseIndex=" << Read.UseIndex << ", RegisterID=" 497 << MRI.getName(Read.RegisterID) << '\n'); 498 } 499 500 CurrentUse += NumImplicitUses; 501 502 bool AssumeDefsOnly = MCDesc.variadicOpsAreDefs(); 503 for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); 504 I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) { 505 const MCOperand &Op = MCI.getOperand(OpIndex); 506 if (!Op.isReg()) 507 continue; 508 509 ReadDescriptor &Read = ID.Reads[CurrentUse]; 510 Read.OpIndex = OpIndex; 511 Read.UseIndex = NumExplicitUses + NumImplicitUses + I; 512 Read.SchedClassID = SchedClassID; 513 ++CurrentUse; 514 LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex 515 << ", UseIndex=" << Read.UseIndex << '\n'); 516 } 517 518 ID.Reads.resize(CurrentUse); 519 } 520 521 Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID, 522 const MCInst &MCI) const { 523 if (ID.NumMicroOps != 0) 524 return ErrorSuccess(); 525 526 bool UsesBuffers = ID.UsedBuffers; 527 bool UsesResources = !ID.Resources.empty(); 528 if (!UsesBuffers && !UsesResources) 529 return ErrorSuccess(); 530 531 // FIXME: see PR44797. We should revisit these checks and possibly move them 532 // in CodeGenSchedule.cpp. 533 StringRef Message = "found an inconsistent instruction that decodes to zero " 534 "opcodes and that consumes scheduler resources."; 535 return make_error<InstructionError<MCInst>>(std::string(Message), MCI); 536 } 537 538 Expected<const InstrDesc &> 539 InstrBuilder::createInstrDescImpl(const MCInst &MCI) { 540 assert(STI.getSchedModel().hasInstrSchedModel() && 541 "Itineraries are not yet supported!"); 542 543 // Obtain the instruction descriptor from the opcode. 544 unsigned short Opcode = MCI.getOpcode(); 545 const MCInstrDesc &MCDesc = MCII.get(Opcode); 546 const MCSchedModel &SM = STI.getSchedModel(); 547 548 // Then obtain the scheduling class information from the instruction. 549 unsigned SchedClassID = MCDesc.getSchedClass(); 550 bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant(); 551 552 // Try to solve variant scheduling classes. 553 if (IsVariant) { 554 unsigned CPUID = SM.getProcessorID(); 555 while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant()) 556 SchedClassID = 557 STI.resolveVariantSchedClass(SchedClassID, &MCI, &MCII, CPUID); 558 559 if (!SchedClassID) { 560 return make_error<InstructionError<MCInst>>( 561 "unable to resolve scheduling class for write variant.", MCI); 562 } 563 } 564 565 // Check if this instruction is supported. Otherwise, report an error. 566 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); 567 if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) { 568 return make_error<InstructionError<MCInst>>( 569 "found an unsupported instruction in the input assembly sequence.", 570 MCI); 571 } 572 573 LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n'); 574 LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n'); 575 576 // Create a new empty descriptor. 577 std::unique_ptr<InstrDesc> ID = std::make_unique<InstrDesc>(); 578 ID->NumMicroOps = SCDesc.NumMicroOps; 579 ID->SchedClassID = SchedClassID; 580 581 if (MCDesc.isCall() && FirstCallInst) { 582 // We don't correctly model calls. 583 WithColor::warning() << "found a call in the input assembly sequence.\n"; 584 WithColor::note() << "call instructions are not correctly modeled. " 585 << "Assume a latency of 100cy.\n"; 586 FirstCallInst = false; 587 } 588 589 if (MCDesc.isReturn() && FirstReturnInst) { 590 WithColor::warning() << "found a return instruction in the input" 591 << " assembly sequence.\n"; 592 WithColor::note() << "program counter updates are ignored.\n"; 593 FirstReturnInst = false; 594 } 595 596 ID->MayLoad = MCDesc.mayLoad(); 597 ID->MayStore = MCDesc.mayStore(); 598 ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects(); 599 ID->BeginGroup = SCDesc.BeginGroup; 600 ID->EndGroup = SCDesc.EndGroup; 601 ID->RetireOOO = SCDesc.RetireOOO; 602 603 initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks); 604 computeMaxLatency(*ID, MCDesc, SCDesc, STI); 605 606 if (Error Err = verifyOperands(MCDesc, MCI)) 607 return std::move(Err); 608 609 populateWrites(*ID, MCI, SchedClassID); 610 populateReads(*ID, MCI, SchedClassID); 611 612 LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n'); 613 LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n'); 614 615 // Validation check on the instruction descriptor. 616 if (Error Err = verifyInstrDesc(*ID, MCI)) 617 return std::move(Err); 618 619 // Now add the new descriptor. 620 bool IsVariadic = MCDesc.isVariadic(); 621 if (!IsVariadic && !IsVariant) { 622 Descriptors[MCI.getOpcode()] = std::move(ID); 623 return *Descriptors[MCI.getOpcode()]; 624 } 625 626 VariantDescriptors[&MCI] = std::move(ID); 627 return *VariantDescriptors[&MCI]; 628 } 629 630 Expected<const InstrDesc &> 631 InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) { 632 if (Descriptors.find_as(MCI.getOpcode()) != Descriptors.end()) 633 return *Descriptors[MCI.getOpcode()]; 634 635 if (VariantDescriptors.find(&MCI) != VariantDescriptors.end()) 636 return *VariantDescriptors[&MCI]; 637 638 return createInstrDescImpl(MCI); 639 } 640 641 Expected<std::unique_ptr<Instruction>> 642 InstrBuilder::createInstruction(const MCInst &MCI) { 643 Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI); 644 if (!DescOrErr) 645 return DescOrErr.takeError(); 646 const InstrDesc &D = *DescOrErr; 647 std::unique_ptr<Instruction> NewIS = 648 std::make_unique<Instruction>(D, MCI.getOpcode()); 649 650 // Check if this is a dependency breaking instruction. 651 APInt Mask; 652 653 bool IsZeroIdiom = false; 654 bool IsDepBreaking = false; 655 if (MCIA) { 656 unsigned ProcID = STI.getSchedModel().getProcessorID(); 657 IsZeroIdiom = MCIA->isZeroIdiom(MCI, Mask, ProcID); 658 IsDepBreaking = 659 IsZeroIdiom || MCIA->isDependencyBreaking(MCI, Mask, ProcID); 660 if (MCIA->isOptimizableRegisterMove(MCI, ProcID)) 661 NewIS->setOptimizableMove(); 662 } 663 664 // Initialize Reads first. 665 MCPhysReg RegID = 0; 666 for (const ReadDescriptor &RD : D.Reads) { 667 if (!RD.isImplicitRead()) { 668 // explicit read. 669 const MCOperand &Op = MCI.getOperand(RD.OpIndex); 670 // Skip non-register operands. 671 if (!Op.isReg()) 672 continue; 673 RegID = Op.getReg(); 674 } else { 675 // Implicit read. 676 RegID = RD.RegisterID; 677 } 678 679 // Skip invalid register operands. 680 if (!RegID) 681 continue; 682 683 // Okay, this is a register operand. Create a ReadState for it. 684 NewIS->getUses().emplace_back(RD, RegID); 685 ReadState &RS = NewIS->getUses().back(); 686 687 if (IsDepBreaking) { 688 // A mask of all zeroes means: explicit input operands are not 689 // independent. 690 if (Mask.isZero()) { 691 if (!RD.isImplicitRead()) 692 RS.setIndependentFromDef(); 693 } else { 694 // Check if this register operand is independent according to `Mask`. 695 // Note that Mask may not have enough bits to describe all explicit and 696 // implicit input operands. If this register operand doesn't have a 697 // corresponding bit in Mask, then conservatively assume that it is 698 // dependent. 699 if (Mask.getBitWidth() > RD.UseIndex) { 700 // Okay. This map describe register use `RD.UseIndex`. 701 if (Mask[RD.UseIndex]) 702 RS.setIndependentFromDef(); 703 } 704 } 705 } 706 } 707 708 // Early exit if there are no writes. 709 if (D.Writes.empty()) 710 return std::move(NewIS); 711 712 // Track register writes that implicitly clear the upper portion of the 713 // underlying super-registers using an APInt. 714 APInt WriteMask(D.Writes.size(), 0); 715 716 // Now query the MCInstrAnalysis object to obtain information about which 717 // register writes implicitly clear the upper portion of a super-register. 718 if (MCIA) 719 MCIA->clearsSuperRegisters(MRI, MCI, WriteMask); 720 721 // Initialize writes. 722 unsigned WriteIndex = 0; 723 for (const WriteDescriptor &WD : D.Writes) { 724 RegID = WD.isImplicitWrite() ? WD.RegisterID 725 : MCI.getOperand(WD.OpIndex).getReg(); 726 // Check if this is a optional definition that references NoReg. 727 if (WD.IsOptionalDef && !RegID) { 728 ++WriteIndex; 729 continue; 730 } 731 732 assert(RegID && "Expected a valid register ID!"); 733 NewIS->getDefs().emplace_back(WD, RegID, 734 /* ClearsSuperRegs */ WriteMask[WriteIndex], 735 /* WritesZero */ IsZeroIdiom); 736 ++WriteIndex; 737 } 738 739 return std::move(NewIS); 740 } 741 } // namespace mca 742 } // namespace llvm 743