1 //===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// 10 /// This file implements the InstrBuilder interface. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/MCA/InstrBuilder.h" 15 #include "llvm/ADT/APInt.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/MC/MCInst.h" 18 #include "llvm/Support/Debug.h" 19 #include "llvm/Support/WithColor.h" 20 #include "llvm/Support/raw_ostream.h" 21 22 #define DEBUG_TYPE "llvm-mca" 23 24 namespace llvm { 25 namespace mca { 26 27 InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti, 28 const llvm::MCInstrInfo &mcii, 29 const llvm::MCRegisterInfo &mri, 30 const llvm::MCInstrAnalysis *mcia) 31 : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), FirstCallInst(true), 32 FirstReturnInst(true) { 33 const MCSchedModel &SM = STI.getSchedModel(); 34 ProcResourceMasks.resize(SM.getNumProcResourceKinds()); 35 computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks); 36 } 37 38 static void initializeUsedResources(InstrDesc &ID, 39 const MCSchedClassDesc &SCDesc, 40 const MCSubtargetInfo &STI, 41 ArrayRef<uint64_t> ProcResourceMasks) { 42 const MCSchedModel &SM = STI.getSchedModel(); 43 44 // Populate resources consumed. 45 using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>; 46 std::vector<ResourcePlusCycles> Worklist; 47 48 // Track cycles contributed by resources that are in a "Super" relationship. 49 // This is required if we want to correctly match the behavior of method 50 // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set 51 // of "consumed" processor resources and resource cycles, the logic in 52 // ExpandProcResource() doesn't update the number of resource cycles 53 // contributed by a "Super" resource to a group. 54 // We need to take this into account when we find that a processor resource is 55 // part of a group, and it is also used as the "Super" of other resources. 56 // This map stores the number of cycles contributed by sub-resources that are 57 // part of a "Super" resource. The key value is the "Super" resource mask ID. 58 DenseMap<uint64_t, unsigned> SuperResources; 59 60 unsigned NumProcResources = SM.getNumProcResourceKinds(); 61 APInt Buffers(NumProcResources, 0); 62 63 bool AllInOrderResources = true; 64 bool AnyDispatchHazards = false; 65 for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) { 66 const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I; 67 const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx); 68 if (!PRE->Cycles) { 69 #ifndef NDEBUG 70 WithColor::warning() 71 << "Ignoring invalid write of zero cycles on processor resource " 72 << PR.Name << "\n"; 73 WithColor::note() << "found in scheduling class " << SCDesc.Name 74 << " (write index #" << I << ")\n"; 75 #endif 76 continue; 77 } 78 79 uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx]; 80 if (PR.BufferSize < 0) { 81 AllInOrderResources = false; 82 } else { 83 Buffers.setBit(getResourceStateIndex(Mask)); 84 AnyDispatchHazards |= (PR.BufferSize == 0); 85 AllInOrderResources &= (PR.BufferSize <= 1); 86 } 87 88 CycleSegment RCy(0, PRE->Cycles, false); 89 Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy))); 90 if (PR.SuperIdx) { 91 uint64_t Super = ProcResourceMasks[PR.SuperIdx]; 92 SuperResources[Super] += PRE->Cycles; 93 } 94 } 95 96 ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards; 97 98 // Sort elements by mask popcount, so that we prioritize resource units over 99 // resource groups, and smaller groups over larger groups. 100 sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) { 101 unsigned popcntA = countPopulation(A.first); 102 unsigned popcntB = countPopulation(B.first); 103 if (popcntA < popcntB) 104 return true; 105 if (popcntA > popcntB) 106 return false; 107 return A.first < B.first; 108 }); 109 110 uint64_t UsedResourceUnits = 0; 111 uint64_t UsedResourceGroups = 0; 112 113 // Remove cycles contributed by smaller resources. 114 for (unsigned I = 0, E = Worklist.size(); I < E; ++I) { 115 ResourcePlusCycles &A = Worklist[I]; 116 if (!A.second.size()) { 117 assert(countPopulation(A.first) > 1 && "Expected a group!"); 118 UsedResourceGroups |= PowerOf2Floor(A.first); 119 continue; 120 } 121 122 ID.Resources.emplace_back(A); 123 uint64_t NormalizedMask = A.first; 124 if (countPopulation(A.first) == 1) { 125 UsedResourceUnits |= A.first; 126 } else { 127 // Remove the leading 1 from the resource group mask. 128 NormalizedMask ^= PowerOf2Floor(NormalizedMask); 129 UsedResourceGroups |= (A.first ^ NormalizedMask); 130 } 131 132 for (unsigned J = I + 1; J < E; ++J) { 133 ResourcePlusCycles &B = Worklist[J]; 134 if ((NormalizedMask & B.first) == NormalizedMask) { 135 B.second.CS.subtract(A.second.size() - SuperResources[A.first]); 136 if (countPopulation(B.first) > 1) 137 B.second.NumUnits++; 138 } 139 } 140 } 141 142 // A SchedWrite may specify a number of cycles in which a resource group 143 // is reserved. For example (on target x86; cpu Haswell): 144 // 145 // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> { 146 // let ResourceCycles = [2, 2, 3]; 147 // } 148 // 149 // This means: 150 // Resource units HWPort0 and HWPort1 are both used for 2cy. 151 // Resource group HWPort01 is the union of HWPort0 and HWPort1. 152 // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01 153 // will not be usable for 2 entire cycles from instruction issue. 154 // 155 // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency 156 // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an 157 // extra delay on top of the 2 cycles latency. 158 // During those extra cycles, HWPort01 is not usable by other instructions. 159 for (ResourcePlusCycles &RPC : ID.Resources) { 160 if (countPopulation(RPC.first) > 1 && !RPC.second.isReserved()) { 161 // Remove the leading 1 from the resource group mask. 162 uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first); 163 uint64_t MaxResourceUnits = countPopulation(Mask); 164 if (RPC.second.NumUnits > countPopulation(Mask)) { 165 RPC.second.setReserved(); 166 RPC.second.NumUnits = MaxResourceUnits; 167 } 168 } 169 } 170 171 // Identify extra buffers that are consumed through super resources. 172 for (const std::pair<uint64_t, unsigned> &SR : SuperResources) { 173 for (unsigned I = 1, E = NumProcResources; I < E; ++I) { 174 const MCProcResourceDesc &PR = *SM.getProcResource(I); 175 if (PR.BufferSize == -1) 176 continue; 177 178 uint64_t Mask = ProcResourceMasks[I]; 179 if (Mask != SR.first && ((Mask & SR.first) == SR.first)) 180 Buffers.setBit(getResourceStateIndex(Mask)); 181 } 182 } 183 184 ID.UsedBuffers = Buffers.getZExtValue(); 185 ID.UsedProcResUnits = UsedResourceUnits; 186 ID.UsedProcResGroups = UsedResourceGroups; 187 188 LLVM_DEBUG({ 189 for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources) 190 dbgs() << "\t\tResource Mask=" << format_hex(R.first, 16) << ", " 191 << "Reserved=" << R.second.isReserved() << ", " 192 << "#Units=" << R.second.NumUnits << ", " 193 << "cy=" << R.second.size() << '\n'; 194 uint64_t BufferIDs = ID.UsedBuffers; 195 while (BufferIDs) { 196 uint64_t Current = BufferIDs & (-BufferIDs); 197 dbgs() << "\t\tBuffer Mask=" << format_hex(Current, 16) << '\n'; 198 BufferIDs ^= Current; 199 } 200 dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n'; 201 dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16) 202 << '\n'; 203 }); 204 } 205 206 static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc, 207 const MCSchedClassDesc &SCDesc, 208 const MCSubtargetInfo &STI) { 209 if (MCDesc.isCall()) { 210 // We cannot estimate how long this call will take. 211 // Artificially set an arbitrarily high latency (100cy). 212 ID.MaxLatency = 100U; 213 return; 214 } 215 216 int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc); 217 // If latency is unknown, then conservatively assume a MaxLatency of 100cy. 218 ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency); 219 } 220 221 static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) { 222 // Count register definitions, and skip non register operands in the process. 223 unsigned I, E; 224 unsigned NumExplicitDefs = MCDesc.getNumDefs(); 225 for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) { 226 const MCOperand &Op = MCI.getOperand(I); 227 if (Op.isReg()) 228 --NumExplicitDefs; 229 } 230 231 if (NumExplicitDefs) { 232 return make_error<InstructionError<MCInst>>( 233 "Expected more register operand definitions.", MCI); 234 } 235 236 if (MCDesc.hasOptionalDef()) { 237 // Always assume that the optional definition is the last operand. 238 const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1); 239 if (I == MCI.getNumOperands() || !Op.isReg()) { 240 std::string Message = 241 "expected a register operand for an optional definition. Instruction " 242 "has not been correctly analyzed."; 243 return make_error<InstructionError<MCInst>>(Message, MCI); 244 } 245 } 246 247 return ErrorSuccess(); 248 } 249 250 void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI, 251 unsigned SchedClassID) { 252 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); 253 const MCSchedModel &SM = STI.getSchedModel(); 254 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); 255 256 // Assumptions made by this algorithm: 257 // 1. The number of explicit and implicit register definitions in a MCInst 258 // matches the number of explicit and implicit definitions according to 259 // the opcode descriptor (MCInstrDesc). 260 // 2. Uses start at index #(MCDesc.getNumDefs()). 261 // 3. There can only be a single optional register definition, an it is 262 // either the last operand of the sequence (excluding extra operands 263 // contributed by variadic opcodes) or one of the explicit register 264 // definitions. The latter occurs for some Thumb1 instructions. 265 // 266 // These assumptions work quite well for most out-of-order in-tree targets 267 // like x86. This is mainly because the vast majority of instructions is 268 // expanded to MCInst using a straightforward lowering logic that preserves 269 // the ordering of the operands. 270 // 271 // About assumption 1. 272 // The algorithm allows non-register operands between register operand 273 // definitions. This helps to handle some special ARM instructions with 274 // implicit operand increment (-mtriple=armv7): 275 // 276 // vld1.32 {d18, d19}, [r1]! @ <MCInst #1463 VLD1q32wb_fixed 277 // @ <MCOperand Reg:59> 278 // @ <MCOperand Imm:0> (!!) 279 // @ <MCOperand Reg:67> 280 // @ <MCOperand Imm:0> 281 // @ <MCOperand Imm:14> 282 // @ <MCOperand Reg:0>> 283 // 284 // MCDesc reports: 285 // 6 explicit operands. 286 // 1 optional definition 287 // 2 explicit definitions (!!) 288 // 289 // The presence of an 'Imm' operand between the two register definitions 290 // breaks the assumption that "register definitions are always at the 291 // beginning of the operand sequence". 292 // 293 // To workaround this issue, this algorithm ignores (i.e. skips) any 294 // non-register operands between register definitions. The optional 295 // definition is still at index #(NumOperands-1). 296 // 297 // According to assumption 2. register reads start at #(NumExplicitDefs-1). 298 // That means, register R1 from the example is both read and written. 299 unsigned NumExplicitDefs = MCDesc.getNumDefs(); 300 unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs(); 301 unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries; 302 unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs; 303 if (MCDesc.hasOptionalDef()) 304 TotalDefs++; 305 306 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); 307 ID.Writes.resize(TotalDefs + NumVariadicOps); 308 // Iterate over the operands list, and skip non-register operands. 309 // The first NumExplicitDefs register operands are expected to be register 310 // definitions. 311 unsigned CurrentDef = 0; 312 unsigned OptionalDefIdx = MCDesc.getNumOperands() - 1; 313 unsigned i = 0; 314 for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) { 315 const MCOperand &Op = MCI.getOperand(i); 316 if (!Op.isReg()) 317 continue; 318 319 if (MCDesc.OpInfo[CurrentDef].isOptionalDef()) { 320 OptionalDefIdx = CurrentDef++; 321 continue; 322 } 323 324 WriteDescriptor &Write = ID.Writes[CurrentDef]; 325 Write.OpIndex = i; 326 if (CurrentDef < NumWriteLatencyEntries) { 327 const MCWriteLatencyEntry &WLE = 328 *STI.getWriteLatencyEntry(&SCDesc, CurrentDef); 329 // Conservatively default to MaxLatency. 330 Write.Latency = 331 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles); 332 Write.SClassOrWriteResourceID = WLE.WriteResourceID; 333 } else { 334 // Assign a default latency for this write. 335 Write.Latency = ID.MaxLatency; 336 Write.SClassOrWriteResourceID = 0; 337 } 338 Write.IsOptionalDef = false; 339 LLVM_DEBUG({ 340 dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex 341 << ", Latency=" << Write.Latency 342 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 343 }); 344 CurrentDef++; 345 } 346 347 assert(CurrentDef == NumExplicitDefs && 348 "Expected more register operand definitions."); 349 for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) { 350 unsigned Index = NumExplicitDefs + CurrentDef; 351 WriteDescriptor &Write = ID.Writes[Index]; 352 Write.OpIndex = ~CurrentDef; 353 Write.RegisterID = MCDesc.getImplicitDefs()[CurrentDef]; 354 if (Index < NumWriteLatencyEntries) { 355 const MCWriteLatencyEntry &WLE = 356 *STI.getWriteLatencyEntry(&SCDesc, Index); 357 // Conservatively default to MaxLatency. 358 Write.Latency = 359 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles); 360 Write.SClassOrWriteResourceID = WLE.WriteResourceID; 361 } else { 362 // Assign a default latency for this write. 363 Write.Latency = ID.MaxLatency; 364 Write.SClassOrWriteResourceID = 0; 365 } 366 367 Write.IsOptionalDef = false; 368 assert(Write.RegisterID != 0 && "Expected a valid phys register!"); 369 LLVM_DEBUG({ 370 dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex 371 << ", PhysReg=" << MRI.getName(Write.RegisterID) 372 << ", Latency=" << Write.Latency 373 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 374 }); 375 } 376 377 if (MCDesc.hasOptionalDef()) { 378 WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs]; 379 Write.OpIndex = OptionalDefIdx; 380 // Assign a default latency for this write. 381 Write.Latency = ID.MaxLatency; 382 Write.SClassOrWriteResourceID = 0; 383 Write.IsOptionalDef = true; 384 LLVM_DEBUG({ 385 dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex 386 << ", Latency=" << Write.Latency 387 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 388 }); 389 } 390 391 if (!NumVariadicOps) 392 return; 393 394 // FIXME: if an instruction opcode is flagged 'mayStore', and it has no 395 // "unmodeledSideEffects', then this logic optimistically assumes that any 396 // extra register operands in the variadic sequence is not a register 397 // definition. 398 // 399 // Otherwise, we conservatively assume that any register operand from the 400 // variadic sequence is both a register read and a register write. 401 bool AssumeUsesOnly = MCDesc.mayStore() && !MCDesc.mayLoad() && 402 !MCDesc.hasUnmodeledSideEffects(); 403 CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef(); 404 for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); 405 I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) { 406 const MCOperand &Op = MCI.getOperand(OpIndex); 407 if (!Op.isReg()) 408 continue; 409 410 WriteDescriptor &Write = ID.Writes[CurrentDef]; 411 Write.OpIndex = OpIndex; 412 // Assign a default latency for this write. 413 Write.Latency = ID.MaxLatency; 414 Write.SClassOrWriteResourceID = 0; 415 Write.IsOptionalDef = false; 416 ++CurrentDef; 417 LLVM_DEBUG({ 418 dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex 419 << ", Latency=" << Write.Latency 420 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 421 }); 422 } 423 424 ID.Writes.resize(CurrentDef); 425 } 426 427 void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI, 428 unsigned SchedClassID) { 429 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); 430 unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs(); 431 unsigned NumImplicitUses = MCDesc.getNumImplicitUses(); 432 // Remove the optional definition. 433 if (MCDesc.hasOptionalDef()) 434 --NumExplicitUses; 435 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); 436 unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps; 437 ID.Reads.resize(TotalUses); 438 unsigned CurrentUse = 0; 439 for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses; 440 ++I, ++OpIndex) { 441 const MCOperand &Op = MCI.getOperand(OpIndex); 442 if (!Op.isReg()) 443 continue; 444 445 ReadDescriptor &Read = ID.Reads[CurrentUse]; 446 Read.OpIndex = OpIndex; 447 Read.UseIndex = I; 448 Read.SchedClassID = SchedClassID; 449 ++CurrentUse; 450 LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex 451 << ", UseIndex=" << Read.UseIndex << '\n'); 452 } 453 454 // For the purpose of ReadAdvance, implicit uses come directly after explicit 455 // uses. The "UseIndex" must be updated according to that implicit layout. 456 for (unsigned I = 0; I < NumImplicitUses; ++I) { 457 ReadDescriptor &Read = ID.Reads[CurrentUse + I]; 458 Read.OpIndex = ~I; 459 Read.UseIndex = NumExplicitUses + I; 460 Read.RegisterID = MCDesc.getImplicitUses()[I]; 461 Read.SchedClassID = SchedClassID; 462 LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex 463 << ", UseIndex=" << Read.UseIndex << ", RegisterID=" 464 << MRI.getName(Read.RegisterID) << '\n'); 465 } 466 467 CurrentUse += NumImplicitUses; 468 469 // FIXME: If an instruction opcode is marked as 'mayLoad', and it has no 470 // "unmodeledSideEffects", then this logic optimistically assumes that any 471 // extra register operand in the variadic sequence is not a register 472 // definition. 473 bool AssumeDefsOnly = !MCDesc.mayStore() && MCDesc.mayLoad() && 474 !MCDesc.hasUnmodeledSideEffects(); 475 for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); 476 I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) { 477 const MCOperand &Op = MCI.getOperand(OpIndex); 478 if (!Op.isReg()) 479 continue; 480 481 ReadDescriptor &Read = ID.Reads[CurrentUse]; 482 Read.OpIndex = OpIndex; 483 Read.UseIndex = NumExplicitUses + NumImplicitUses + I; 484 Read.SchedClassID = SchedClassID; 485 ++CurrentUse; 486 LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex 487 << ", UseIndex=" << Read.UseIndex << '\n'); 488 } 489 490 ID.Reads.resize(CurrentUse); 491 } 492 493 Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID, 494 const MCInst &MCI) const { 495 if (ID.NumMicroOps != 0) 496 return ErrorSuccess(); 497 498 bool UsesBuffers = ID.UsedBuffers; 499 bool UsesResources = !ID.Resources.empty(); 500 if (!UsesBuffers && !UsesResources) 501 return ErrorSuccess(); 502 503 // FIXME: see PR44797. We should revisit these checks and possibly move them 504 // in CodeGenSchedule.cpp. 505 StringRef Message = "found an inconsistent instruction that decodes to zero " 506 "opcodes and that consumes scheduler resources."; 507 return make_error<InstructionError<MCInst>>(std::string(Message), MCI); 508 } 509 510 Expected<const InstrDesc &> 511 InstrBuilder::createInstrDescImpl(const MCInst &MCI) { 512 assert(STI.getSchedModel().hasInstrSchedModel() && 513 "Itineraries are not yet supported!"); 514 515 // Obtain the instruction descriptor from the opcode. 516 unsigned short Opcode = MCI.getOpcode(); 517 const MCInstrDesc &MCDesc = MCII.get(Opcode); 518 const MCSchedModel &SM = STI.getSchedModel(); 519 520 // Then obtain the scheduling class information from the instruction. 521 unsigned SchedClassID = MCDesc.getSchedClass(); 522 bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant(); 523 524 // Try to solve variant scheduling classes. 525 if (IsVariant) { 526 unsigned CPUID = SM.getProcessorID(); 527 while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant()) 528 SchedClassID = 529 STI.resolveVariantSchedClass(SchedClassID, &MCI, &MCII, CPUID); 530 531 if (!SchedClassID) { 532 return make_error<InstructionError<MCInst>>( 533 "unable to resolve scheduling class for write variant.", MCI); 534 } 535 } 536 537 // Check if this instruction is supported. Otherwise, report an error. 538 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); 539 if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) { 540 return make_error<InstructionError<MCInst>>( 541 "found an unsupported instruction in the input assembly sequence.", 542 MCI); 543 } 544 545 LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n'); 546 LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n'); 547 548 // Create a new empty descriptor. 549 std::unique_ptr<InstrDesc> ID = std::make_unique<InstrDesc>(); 550 ID->NumMicroOps = SCDesc.NumMicroOps; 551 ID->SchedClassID = SchedClassID; 552 553 if (MCDesc.isCall() && FirstCallInst) { 554 // We don't correctly model calls. 555 WithColor::warning() << "found a call in the input assembly sequence.\n"; 556 WithColor::note() << "call instructions are not correctly modeled. " 557 << "Assume a latency of 100cy.\n"; 558 FirstCallInst = false; 559 } 560 561 if (MCDesc.isReturn() && FirstReturnInst) { 562 WithColor::warning() << "found a return instruction in the input" 563 << " assembly sequence.\n"; 564 WithColor::note() << "program counter updates are ignored.\n"; 565 FirstReturnInst = false; 566 } 567 568 ID->MayLoad = MCDesc.mayLoad(); 569 ID->MayStore = MCDesc.mayStore(); 570 ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects(); 571 ID->BeginGroup = SCDesc.BeginGroup; 572 ID->EndGroup = SCDesc.EndGroup; 573 574 initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks); 575 computeMaxLatency(*ID, MCDesc, SCDesc, STI); 576 577 if (Error Err = verifyOperands(MCDesc, MCI)) 578 return std::move(Err); 579 580 populateWrites(*ID, MCI, SchedClassID); 581 populateReads(*ID, MCI, SchedClassID); 582 583 LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n'); 584 LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n'); 585 586 // Sanity check on the instruction descriptor. 587 if (Error Err = verifyInstrDesc(*ID, MCI)) 588 return std::move(Err); 589 590 // Now add the new descriptor. 591 bool IsVariadic = MCDesc.isVariadic(); 592 if (!IsVariadic && !IsVariant) { 593 Descriptors[MCI.getOpcode()] = std::move(ID); 594 return *Descriptors[MCI.getOpcode()]; 595 } 596 597 VariantDescriptors[&MCI] = std::move(ID); 598 return *VariantDescriptors[&MCI]; 599 } 600 601 Expected<const InstrDesc &> 602 InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) { 603 if (Descriptors.find_as(MCI.getOpcode()) != Descriptors.end()) 604 return *Descriptors[MCI.getOpcode()]; 605 606 if (VariantDescriptors.find(&MCI) != VariantDescriptors.end()) 607 return *VariantDescriptors[&MCI]; 608 609 return createInstrDescImpl(MCI); 610 } 611 612 Expected<std::unique_ptr<Instruction>> 613 InstrBuilder::createInstruction(const MCInst &MCI) { 614 Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI); 615 if (!DescOrErr) 616 return DescOrErr.takeError(); 617 const InstrDesc &D = *DescOrErr; 618 std::unique_ptr<Instruction> NewIS = std::make_unique<Instruction>(D); 619 620 // Check if this is a dependency breaking instruction. 621 APInt Mask; 622 623 bool IsZeroIdiom = false; 624 bool IsDepBreaking = false; 625 if (MCIA) { 626 unsigned ProcID = STI.getSchedModel().getProcessorID(); 627 IsZeroIdiom = MCIA->isZeroIdiom(MCI, Mask, ProcID); 628 IsDepBreaking = 629 IsZeroIdiom || MCIA->isDependencyBreaking(MCI, Mask, ProcID); 630 if (MCIA->isOptimizableRegisterMove(MCI, ProcID)) 631 NewIS->setOptimizableMove(); 632 } 633 634 // Initialize Reads first. 635 MCPhysReg RegID = 0; 636 for (const ReadDescriptor &RD : D.Reads) { 637 if (!RD.isImplicitRead()) { 638 // explicit read. 639 const MCOperand &Op = MCI.getOperand(RD.OpIndex); 640 // Skip non-register operands. 641 if (!Op.isReg()) 642 continue; 643 RegID = Op.getReg(); 644 } else { 645 // Implicit read. 646 RegID = RD.RegisterID; 647 } 648 649 // Skip invalid register operands. 650 if (!RegID) 651 continue; 652 653 // Okay, this is a register operand. Create a ReadState for it. 654 NewIS->getUses().emplace_back(RD, RegID); 655 ReadState &RS = NewIS->getUses().back(); 656 657 if (IsDepBreaking) { 658 // A mask of all zeroes means: explicit input operands are not 659 // independent. 660 if (Mask.isNullValue()) { 661 if (!RD.isImplicitRead()) 662 RS.setIndependentFromDef(); 663 } else { 664 // Check if this register operand is independent according to `Mask`. 665 // Note that Mask may not have enough bits to describe all explicit and 666 // implicit input operands. If this register operand doesn't have a 667 // corresponding bit in Mask, then conservatively assume that it is 668 // dependent. 669 if (Mask.getBitWidth() > RD.UseIndex) { 670 // Okay. This map describe register use `RD.UseIndex`. 671 if (Mask[RD.UseIndex]) 672 RS.setIndependentFromDef(); 673 } 674 } 675 } 676 } 677 678 // Early exit if there are no writes. 679 if (D.Writes.empty()) 680 return std::move(NewIS); 681 682 // Track register writes that implicitly clear the upper portion of the 683 // underlying super-registers using an APInt. 684 APInt WriteMask(D.Writes.size(), 0); 685 686 // Now query the MCInstrAnalysis object to obtain information about which 687 // register writes implicitly clear the upper portion of a super-register. 688 if (MCIA) 689 MCIA->clearsSuperRegisters(MRI, MCI, WriteMask); 690 691 // Initialize writes. 692 unsigned WriteIndex = 0; 693 for (const WriteDescriptor &WD : D.Writes) { 694 RegID = WD.isImplicitWrite() ? WD.RegisterID 695 : MCI.getOperand(WD.OpIndex).getReg(); 696 // Check if this is a optional definition that references NoReg. 697 if (WD.IsOptionalDef && !RegID) { 698 ++WriteIndex; 699 continue; 700 } 701 702 assert(RegID && "Expected a valid register ID!"); 703 NewIS->getDefs().emplace_back(WD, RegID, 704 /* ClearsSuperRegs */ WriteMask[WriteIndex], 705 /* WritesZero */ IsZeroIdiom); 706 ++WriteIndex; 707 } 708 709 return std::move(NewIS); 710 } 711 } // namespace mca 712 } // namespace llvm 713