1 //===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// 10 /// This file implements the InstrBuilder interface. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/MCA/InstrBuilder.h" 15 #include "llvm/ADT/APInt.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/MC/MCInst.h" 18 #include "llvm/Support/Debug.h" 19 #include "llvm/Support/WithColor.h" 20 #include "llvm/Support/raw_ostream.h" 21 22 #define DEBUG_TYPE "llvm-mca" 23 24 namespace llvm { 25 namespace mca { 26 27 InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti, 28 const llvm::MCInstrInfo &mcii, 29 const llvm::MCRegisterInfo &mri, 30 const llvm::MCInstrAnalysis *mcia) 31 : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), FirstCallInst(true), 32 FirstReturnInst(true) { 33 const MCSchedModel &SM = STI.getSchedModel(); 34 ProcResourceMasks.resize(SM.getNumProcResourceKinds()); 35 computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks); 36 } 37 38 static void initializeUsedResources(InstrDesc &ID, 39 const MCSchedClassDesc &SCDesc, 40 const MCSubtargetInfo &STI, 41 ArrayRef<uint64_t> ProcResourceMasks) { 42 const MCSchedModel &SM = STI.getSchedModel(); 43 44 // Populate resources consumed. 45 using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>; 46 std::vector<ResourcePlusCycles> Worklist; 47 48 // Track cycles contributed by resources that are in a "Super" relationship. 49 // This is required if we want to correctly match the behavior of method 50 // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set 51 // of "consumed" processor resources and resource cycles, the logic in 52 // ExpandProcResource() doesn't update the number of resource cycles 53 // contributed by a "Super" resource to a group. 54 // We need to take this into account when we find that a processor resource is 55 // part of a group, and it is also used as the "Super" of other resources. 56 // This map stores the number of cycles contributed by sub-resources that are 57 // part of a "Super" resource. The key value is the "Super" resource mask ID. 58 DenseMap<uint64_t, unsigned> SuperResources; 59 60 unsigned NumProcResources = SM.getNumProcResourceKinds(); 61 APInt Buffers(NumProcResources, 0); 62 63 bool AllInOrderResources = true; 64 bool AnyDispatchHazards = false; 65 for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) { 66 const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I; 67 const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx); 68 if (!PRE->Cycles) { 69 #ifndef NDEBUG 70 WithColor::warning() 71 << "Ignoring invalid write of zero cycles on processor resource " 72 << PR.Name << "\n"; 73 WithColor::note() << "found in scheduling class " << SCDesc.Name 74 << " (write index #" << I << ")\n"; 75 #endif 76 continue; 77 } 78 79 uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx]; 80 if (PR.BufferSize < 0) { 81 AllInOrderResources = false; 82 } else { 83 Buffers.setBit(getResourceStateIndex(Mask)); 84 AnyDispatchHazards |= (PR.BufferSize == 0); 85 AllInOrderResources &= (PR.BufferSize <= 1); 86 } 87 88 CycleSegment RCy(0, PRE->Cycles, false); 89 Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy))); 90 if (PR.SuperIdx) { 91 uint64_t Super = ProcResourceMasks[PR.SuperIdx]; 92 SuperResources[Super] += PRE->Cycles; 93 } 94 } 95 96 ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards; 97 98 // Sort elements by mask popcount, so that we prioritize resource units over 99 // resource groups, and smaller groups over larger groups. 100 sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) { 101 unsigned popcntA = countPopulation(A.first); 102 unsigned popcntB = countPopulation(B.first); 103 if (popcntA < popcntB) 104 return true; 105 if (popcntA > popcntB) 106 return false; 107 return A.first < B.first; 108 }); 109 110 uint64_t UsedResourceUnits = 0; 111 uint64_t UsedResourceGroups = 0; 112 113 // Remove cycles contributed by smaller resources. 114 for (unsigned I = 0, E = Worklist.size(); I < E; ++I) { 115 ResourcePlusCycles &A = Worklist[I]; 116 if (!A.second.size()) { 117 assert(countPopulation(A.first) > 1 && "Expected a group!"); 118 UsedResourceGroups |= PowerOf2Floor(A.first); 119 continue; 120 } 121 122 ID.Resources.emplace_back(A); 123 uint64_t NormalizedMask = A.first; 124 if (countPopulation(A.first) == 1) { 125 UsedResourceUnits |= A.first; 126 } else { 127 // Remove the leading 1 from the resource group mask. 128 NormalizedMask ^= PowerOf2Floor(NormalizedMask); 129 UsedResourceGroups |= (A.first ^ NormalizedMask); 130 } 131 132 for (unsigned J = I + 1; J < E; ++J) { 133 ResourcePlusCycles &B = Worklist[J]; 134 if ((NormalizedMask & B.first) == NormalizedMask) { 135 B.second.CS.subtract(A.second.size() - SuperResources[A.first]); 136 if (countPopulation(B.first) > 1) 137 B.second.NumUnits++; 138 } 139 } 140 } 141 142 // A SchedWrite may specify a number of cycles in which a resource group 143 // is reserved. For example (on target x86; cpu Haswell): 144 // 145 // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> { 146 // let ResourceCycles = [2, 2, 3]; 147 // } 148 // 149 // This means: 150 // Resource units HWPort0 and HWPort1 are both used for 2cy. 151 // Resource group HWPort01 is the union of HWPort0 and HWPort1. 152 // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01 153 // will not be usable for 2 entire cycles from instruction issue. 154 // 155 // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency 156 // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an 157 // extra delay on top of the 2 cycles latency. 158 // During those extra cycles, HWPort01 is not usable by other instructions. 159 for (ResourcePlusCycles &RPC : ID.Resources) { 160 if (countPopulation(RPC.first) > 1 && !RPC.second.isReserved()) { 161 // Remove the leading 1 from the resource group mask. 162 uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first); 163 uint64_t MaxResourceUnits = countPopulation(Mask); 164 if (RPC.second.NumUnits > countPopulation(Mask)) { 165 RPC.second.setReserved(); 166 RPC.second.NumUnits = MaxResourceUnits; 167 } 168 } 169 } 170 171 // Identify extra buffers that are consumed through super resources. 172 for (const std::pair<uint64_t, unsigned> &SR : SuperResources) { 173 for (unsigned I = 1, E = NumProcResources; I < E; ++I) { 174 const MCProcResourceDesc &PR = *SM.getProcResource(I); 175 if (PR.BufferSize == -1) 176 continue; 177 178 uint64_t Mask = ProcResourceMasks[I]; 179 if (Mask != SR.first && ((Mask & SR.first) == SR.first)) 180 Buffers.setBit(getResourceStateIndex(Mask)); 181 } 182 } 183 184 ID.UsedBuffers = Buffers.getZExtValue(); 185 ID.UsedProcResUnits = UsedResourceUnits; 186 ID.UsedProcResGroups = UsedResourceGroups; 187 188 LLVM_DEBUG({ 189 for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources) 190 dbgs() << "\t\tResource Mask=" << format_hex(R.first, 16) << ", " 191 << "Reserved=" << R.second.isReserved() << ", " 192 << "#Units=" << R.second.NumUnits << ", " 193 << "cy=" << R.second.size() << '\n'; 194 uint64_t BufferIDs = ID.UsedBuffers; 195 while (BufferIDs) { 196 uint64_t Current = BufferIDs & (-BufferIDs); 197 dbgs() << "\t\tBuffer Mask=" << format_hex(Current, 16) << '\n'; 198 BufferIDs ^= Current; 199 } 200 dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n'; 201 dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16) 202 << '\n'; 203 }); 204 } 205 206 static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc, 207 const MCSchedClassDesc &SCDesc, 208 const MCSubtargetInfo &STI) { 209 if (MCDesc.isCall()) { 210 // We cannot estimate how long this call will take. 211 // Artificially set an arbitrarily high latency (100cy). 212 ID.MaxLatency = 100U; 213 return; 214 } 215 216 int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc); 217 // If latency is unknown, then conservatively assume a MaxLatency of 100cy. 218 ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency); 219 } 220 221 static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) { 222 // Count register definitions, and skip non register operands in the process. 223 unsigned I, E; 224 unsigned NumExplicitDefs = MCDesc.getNumDefs(); 225 for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) { 226 const MCOperand &Op = MCI.getOperand(I); 227 if (Op.isReg()) 228 --NumExplicitDefs; 229 } 230 231 if (NumExplicitDefs) { 232 return make_error<InstructionError<MCInst>>( 233 "Expected more register operand definitions.", MCI); 234 } 235 236 if (MCDesc.hasOptionalDef()) { 237 // Always assume that the optional definition is the last operand. 238 const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1); 239 if (I == MCI.getNumOperands() || !Op.isReg()) { 240 std::string Message = 241 "expected a register operand for an optional definition. Instruction " 242 "has not been correctly analyzed."; 243 return make_error<InstructionError<MCInst>>(Message, MCI); 244 } 245 } 246 247 return ErrorSuccess(); 248 } 249 250 void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI, 251 unsigned SchedClassID) { 252 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); 253 const MCSchedModel &SM = STI.getSchedModel(); 254 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); 255 256 // Assumptions made by this algorithm: 257 // 1. The number of explicit and implicit register definitions in a MCInst 258 // matches the number of explicit and implicit definitions according to 259 // the opcode descriptor (MCInstrDesc). 260 // 2. Uses start at index #(MCDesc.getNumDefs()). 261 // 3. There can only be a single optional register definition, an it is 262 // always the last operand of the sequence (excluding extra operands 263 // contributed by variadic opcodes). 264 // 265 // These assumptions work quite well for most out-of-order in-tree targets 266 // like x86. This is mainly because the vast majority of instructions is 267 // expanded to MCInst using a straightforward lowering logic that preserves 268 // the ordering of the operands. 269 // 270 // About assumption 1. 271 // The algorithm allows non-register operands between register operand 272 // definitions. This helps to handle some special ARM instructions with 273 // implicit operand increment (-mtriple=armv7): 274 // 275 // vld1.32 {d18, d19}, [r1]! @ <MCInst #1463 VLD1q32wb_fixed 276 // @ <MCOperand Reg:59> 277 // @ <MCOperand Imm:0> (!!) 278 // @ <MCOperand Reg:67> 279 // @ <MCOperand Imm:0> 280 // @ <MCOperand Imm:14> 281 // @ <MCOperand Reg:0>> 282 // 283 // MCDesc reports: 284 // 6 explicit operands. 285 // 1 optional definition 286 // 2 explicit definitions (!!) 287 // 288 // The presence of an 'Imm' operand between the two register definitions 289 // breaks the assumption that "register definitions are always at the 290 // beginning of the operand sequence". 291 // 292 // To workaround this issue, this algorithm ignores (i.e. skips) any 293 // non-register operands between register definitions. The optional 294 // definition is still at index #(NumOperands-1). 295 // 296 // According to assumption 2. register reads start at #(NumExplicitDefs-1). 297 // That means, register R1 from the example is both read and written. 298 unsigned NumExplicitDefs = MCDesc.getNumDefs(); 299 unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs(); 300 unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries; 301 unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs; 302 if (MCDesc.hasOptionalDef()) 303 TotalDefs++; 304 305 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); 306 ID.Writes.resize(TotalDefs + NumVariadicOps); 307 // Iterate over the operands list, and skip non-register operands. 308 // The first NumExplicitDefs register operands are expected to be register 309 // definitions. 310 unsigned CurrentDef = 0; 311 unsigned i = 0; 312 for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) { 313 const MCOperand &Op = MCI.getOperand(i); 314 if (!Op.isReg()) 315 continue; 316 317 WriteDescriptor &Write = ID.Writes[CurrentDef]; 318 Write.OpIndex = i; 319 if (CurrentDef < NumWriteLatencyEntries) { 320 const MCWriteLatencyEntry &WLE = 321 *STI.getWriteLatencyEntry(&SCDesc, CurrentDef); 322 // Conservatively default to MaxLatency. 323 Write.Latency = 324 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles); 325 Write.SClassOrWriteResourceID = WLE.WriteResourceID; 326 } else { 327 // Assign a default latency for this write. 328 Write.Latency = ID.MaxLatency; 329 Write.SClassOrWriteResourceID = 0; 330 } 331 Write.IsOptionalDef = false; 332 LLVM_DEBUG({ 333 dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex 334 << ", Latency=" << Write.Latency 335 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 336 }); 337 CurrentDef++; 338 } 339 340 assert(CurrentDef == NumExplicitDefs && 341 "Expected more register operand definitions."); 342 for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) { 343 unsigned Index = NumExplicitDefs + CurrentDef; 344 WriteDescriptor &Write = ID.Writes[Index]; 345 Write.OpIndex = ~CurrentDef; 346 Write.RegisterID = MCDesc.getImplicitDefs()[CurrentDef]; 347 if (Index < NumWriteLatencyEntries) { 348 const MCWriteLatencyEntry &WLE = 349 *STI.getWriteLatencyEntry(&SCDesc, Index); 350 // Conservatively default to MaxLatency. 351 Write.Latency = 352 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles); 353 Write.SClassOrWriteResourceID = WLE.WriteResourceID; 354 } else { 355 // Assign a default latency for this write. 356 Write.Latency = ID.MaxLatency; 357 Write.SClassOrWriteResourceID = 0; 358 } 359 360 Write.IsOptionalDef = false; 361 assert(Write.RegisterID != 0 && "Expected a valid phys register!"); 362 LLVM_DEBUG({ 363 dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex 364 << ", PhysReg=" << MRI.getName(Write.RegisterID) 365 << ", Latency=" << Write.Latency 366 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 367 }); 368 } 369 370 if (MCDesc.hasOptionalDef()) { 371 WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs]; 372 Write.OpIndex = MCDesc.getNumOperands() - 1; 373 // Assign a default latency for this write. 374 Write.Latency = ID.MaxLatency; 375 Write.SClassOrWriteResourceID = 0; 376 Write.IsOptionalDef = true; 377 LLVM_DEBUG({ 378 dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex 379 << ", Latency=" << Write.Latency 380 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 381 }); 382 } 383 384 if (!NumVariadicOps) 385 return; 386 387 // FIXME: if an instruction opcode is flagged 'mayStore', and it has no 388 // "unmodeledSideEffects', then this logic optimistically assumes that any 389 // extra register operands in the variadic sequence is not a register 390 // definition. 391 // 392 // Otherwise, we conservatively assume that any register operand from the 393 // variadic sequence is both a register read and a register write. 394 bool AssumeUsesOnly = MCDesc.mayStore() && !MCDesc.mayLoad() && 395 !MCDesc.hasUnmodeledSideEffects(); 396 CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef(); 397 for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); 398 I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) { 399 const MCOperand &Op = MCI.getOperand(OpIndex); 400 if (!Op.isReg()) 401 continue; 402 403 WriteDescriptor &Write = ID.Writes[CurrentDef]; 404 Write.OpIndex = OpIndex; 405 // Assign a default latency for this write. 406 Write.Latency = ID.MaxLatency; 407 Write.SClassOrWriteResourceID = 0; 408 Write.IsOptionalDef = false; 409 ++CurrentDef; 410 LLVM_DEBUG({ 411 dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex 412 << ", Latency=" << Write.Latency 413 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 414 }); 415 } 416 417 ID.Writes.resize(CurrentDef); 418 } 419 420 void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI, 421 unsigned SchedClassID) { 422 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); 423 unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs(); 424 unsigned NumImplicitUses = MCDesc.getNumImplicitUses(); 425 // Remove the optional definition. 426 if (MCDesc.hasOptionalDef()) 427 --NumExplicitUses; 428 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); 429 unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps; 430 ID.Reads.resize(TotalUses); 431 unsigned CurrentUse = 0; 432 for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses; 433 ++I, ++OpIndex) { 434 const MCOperand &Op = MCI.getOperand(OpIndex); 435 if (!Op.isReg()) 436 continue; 437 438 ReadDescriptor &Read = ID.Reads[CurrentUse]; 439 Read.OpIndex = OpIndex; 440 Read.UseIndex = I; 441 Read.SchedClassID = SchedClassID; 442 ++CurrentUse; 443 LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex 444 << ", UseIndex=" << Read.UseIndex << '\n'); 445 } 446 447 // For the purpose of ReadAdvance, implicit uses come directly after explicit 448 // uses. The "UseIndex" must be updated according to that implicit layout. 449 for (unsigned I = 0; I < NumImplicitUses; ++I) { 450 ReadDescriptor &Read = ID.Reads[CurrentUse + I]; 451 Read.OpIndex = ~I; 452 Read.UseIndex = NumExplicitUses + I; 453 Read.RegisterID = MCDesc.getImplicitUses()[I]; 454 Read.SchedClassID = SchedClassID; 455 LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex 456 << ", UseIndex=" << Read.UseIndex << ", RegisterID=" 457 << MRI.getName(Read.RegisterID) << '\n'); 458 } 459 460 CurrentUse += NumImplicitUses; 461 462 // FIXME: If an instruction opcode is marked as 'mayLoad', and it has no 463 // "unmodeledSideEffects", then this logic optimistically assumes that any 464 // extra register operand in the variadic sequence is not a register 465 // definition. 466 bool AssumeDefsOnly = !MCDesc.mayStore() && MCDesc.mayLoad() && 467 !MCDesc.hasUnmodeledSideEffects(); 468 for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); 469 I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) { 470 const MCOperand &Op = MCI.getOperand(OpIndex); 471 if (!Op.isReg()) 472 continue; 473 474 ReadDescriptor &Read = ID.Reads[CurrentUse]; 475 Read.OpIndex = OpIndex; 476 Read.UseIndex = NumExplicitUses + NumImplicitUses + I; 477 Read.SchedClassID = SchedClassID; 478 ++CurrentUse; 479 LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex 480 << ", UseIndex=" << Read.UseIndex << '\n'); 481 } 482 483 ID.Reads.resize(CurrentUse); 484 } 485 486 Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID, 487 const MCInst &MCI) const { 488 if (ID.NumMicroOps != 0) 489 return ErrorSuccess(); 490 491 bool UsesBuffers = ID.UsedBuffers; 492 bool UsesResources = !ID.Resources.empty(); 493 if (!UsesBuffers && !UsesResources) 494 return ErrorSuccess(); 495 496 // FIXME: see PR44797. We should revisit these checks and possibly move them 497 // in CodeGenSchedule.cpp. 498 StringRef Message = "found an inconsistent instruction that decodes to zero " 499 "opcodes and that consumes scheduler resources."; 500 return make_error<InstructionError<MCInst>>(std::string(Message), MCI); 501 } 502 503 Expected<const InstrDesc &> 504 InstrBuilder::createInstrDescImpl(const MCInst &MCI) { 505 assert(STI.getSchedModel().hasInstrSchedModel() && 506 "Itineraries are not yet supported!"); 507 508 // Obtain the instruction descriptor from the opcode. 509 unsigned short Opcode = MCI.getOpcode(); 510 const MCInstrDesc &MCDesc = MCII.get(Opcode); 511 const MCSchedModel &SM = STI.getSchedModel(); 512 513 // Then obtain the scheduling class information from the instruction. 514 unsigned SchedClassID = MCDesc.getSchedClass(); 515 bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant(); 516 517 // Try to solve variant scheduling classes. 518 if (IsVariant) { 519 unsigned CPUID = SM.getProcessorID(); 520 while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant()) 521 SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, CPUID); 522 523 if (!SchedClassID) { 524 return make_error<InstructionError<MCInst>>( 525 "unable to resolve scheduling class for write variant.", MCI); 526 } 527 } 528 529 // Check if this instruction is supported. Otherwise, report an error. 530 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); 531 if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) { 532 return make_error<InstructionError<MCInst>>( 533 "found an unsupported instruction in the input assembly sequence.", 534 MCI); 535 } 536 537 LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n'); 538 LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n'); 539 540 // Create a new empty descriptor. 541 std::unique_ptr<InstrDesc> ID = std::make_unique<InstrDesc>(); 542 ID->NumMicroOps = SCDesc.NumMicroOps; 543 ID->SchedClassID = SchedClassID; 544 545 if (MCDesc.isCall() && FirstCallInst) { 546 // We don't correctly model calls. 547 WithColor::warning() << "found a call in the input assembly sequence.\n"; 548 WithColor::note() << "call instructions are not correctly modeled. " 549 << "Assume a latency of 100cy.\n"; 550 FirstCallInst = false; 551 } 552 553 if (MCDesc.isReturn() && FirstReturnInst) { 554 WithColor::warning() << "found a return instruction in the input" 555 << " assembly sequence.\n"; 556 WithColor::note() << "program counter updates are ignored.\n"; 557 FirstReturnInst = false; 558 } 559 560 ID->MayLoad = MCDesc.mayLoad(); 561 ID->MayStore = MCDesc.mayStore(); 562 ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects(); 563 ID->BeginGroup = SCDesc.BeginGroup; 564 ID->EndGroup = SCDesc.EndGroup; 565 566 initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks); 567 computeMaxLatency(*ID, MCDesc, SCDesc, STI); 568 569 if (Error Err = verifyOperands(MCDesc, MCI)) 570 return std::move(Err); 571 572 populateWrites(*ID, MCI, SchedClassID); 573 populateReads(*ID, MCI, SchedClassID); 574 575 LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n'); 576 LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n'); 577 578 // Sanity check on the instruction descriptor. 579 if (Error Err = verifyInstrDesc(*ID, MCI)) 580 return std::move(Err); 581 582 // Now add the new descriptor. 583 bool IsVariadic = MCDesc.isVariadic(); 584 if (!IsVariadic && !IsVariant) { 585 Descriptors[MCI.getOpcode()] = std::move(ID); 586 return *Descriptors[MCI.getOpcode()]; 587 } 588 589 VariantDescriptors[&MCI] = std::move(ID); 590 return *VariantDescriptors[&MCI]; 591 } 592 593 Expected<const InstrDesc &> 594 InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) { 595 if (Descriptors.find_as(MCI.getOpcode()) != Descriptors.end()) 596 return *Descriptors[MCI.getOpcode()]; 597 598 if (VariantDescriptors.find(&MCI) != VariantDescriptors.end()) 599 return *VariantDescriptors[&MCI]; 600 601 return createInstrDescImpl(MCI); 602 } 603 604 Expected<std::unique_ptr<Instruction>> 605 InstrBuilder::createInstruction(const MCInst &MCI) { 606 Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI); 607 if (!DescOrErr) 608 return DescOrErr.takeError(); 609 const InstrDesc &D = *DescOrErr; 610 std::unique_ptr<Instruction> NewIS = std::make_unique<Instruction>(D); 611 612 // Check if this is a dependency breaking instruction. 613 APInt Mask; 614 615 bool IsZeroIdiom = false; 616 bool IsDepBreaking = false; 617 if (MCIA) { 618 unsigned ProcID = STI.getSchedModel().getProcessorID(); 619 IsZeroIdiom = MCIA->isZeroIdiom(MCI, Mask, ProcID); 620 IsDepBreaking = 621 IsZeroIdiom || MCIA->isDependencyBreaking(MCI, Mask, ProcID); 622 if (MCIA->isOptimizableRegisterMove(MCI, ProcID)) 623 NewIS->setOptimizableMove(); 624 } 625 626 // Initialize Reads first. 627 MCPhysReg RegID = 0; 628 for (const ReadDescriptor &RD : D.Reads) { 629 if (!RD.isImplicitRead()) { 630 // explicit read. 631 const MCOperand &Op = MCI.getOperand(RD.OpIndex); 632 // Skip non-register operands. 633 if (!Op.isReg()) 634 continue; 635 RegID = Op.getReg(); 636 } else { 637 // Implicit read. 638 RegID = RD.RegisterID; 639 } 640 641 // Skip invalid register operands. 642 if (!RegID) 643 continue; 644 645 // Okay, this is a register operand. Create a ReadState for it. 646 NewIS->getUses().emplace_back(RD, RegID); 647 ReadState &RS = NewIS->getUses().back(); 648 649 if (IsDepBreaking) { 650 // A mask of all zeroes means: explicit input operands are not 651 // independent. 652 if (Mask.isNullValue()) { 653 if (!RD.isImplicitRead()) 654 RS.setIndependentFromDef(); 655 } else { 656 // Check if this register operand is independent according to `Mask`. 657 // Note that Mask may not have enough bits to describe all explicit and 658 // implicit input operands. If this register operand doesn't have a 659 // corresponding bit in Mask, then conservatively assume that it is 660 // dependent. 661 if (Mask.getBitWidth() > RD.UseIndex) { 662 // Okay. This map describe register use `RD.UseIndex`. 663 if (Mask[RD.UseIndex]) 664 RS.setIndependentFromDef(); 665 } 666 } 667 } 668 } 669 670 // Early exit if there are no writes. 671 if (D.Writes.empty()) 672 return std::move(NewIS); 673 674 // Track register writes that implicitly clear the upper portion of the 675 // underlying super-registers using an APInt. 676 APInt WriteMask(D.Writes.size(), 0); 677 678 // Now query the MCInstrAnalysis object to obtain information about which 679 // register writes implicitly clear the upper portion of a super-register. 680 if (MCIA) 681 MCIA->clearsSuperRegisters(MRI, MCI, WriteMask); 682 683 // Initialize writes. 684 unsigned WriteIndex = 0; 685 for (const WriteDescriptor &WD : D.Writes) { 686 RegID = WD.isImplicitWrite() ? WD.RegisterID 687 : MCI.getOperand(WD.OpIndex).getReg(); 688 // Check if this is a optional definition that references NoReg. 689 if (WD.IsOptionalDef && !RegID) { 690 ++WriteIndex; 691 continue; 692 } 693 694 assert(RegID && "Expected a valid register ID!"); 695 NewIS->getDefs().emplace_back(WD, RegID, 696 /* ClearsSuperRegs */ WriteMask[WriteIndex], 697 /* WritesZero */ IsZeroIdiom); 698 ++WriteIndex; 699 } 700 701 return std::move(NewIS); 702 } 703 } // namespace mca 704 } // namespace llvm 705