1 //===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// 10 /// This file implements the InstrBuilder interface. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/MCA/InstrBuilder.h" 15 #include "llvm/ADT/APInt.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/MC/MCInst.h" 18 #include "llvm/Support/Debug.h" 19 #include "llvm/Support/WithColor.h" 20 #include "llvm/Support/raw_ostream.h" 21 22 #define DEBUG_TYPE "llvm-mca" 23 24 namespace llvm { 25 namespace mca { 26 27 InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti, 28 const llvm::MCInstrInfo &mcii, 29 const llvm::MCRegisterInfo &mri, 30 const llvm::MCInstrAnalysis *mcia) 31 : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), FirstCallInst(true), 32 FirstReturnInst(true) { 33 const MCSchedModel &SM = STI.getSchedModel(); 34 ProcResourceMasks.resize(SM.getNumProcResourceKinds()); 35 computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks); 36 } 37 38 static void initializeUsedResources(InstrDesc &ID, 39 const MCSchedClassDesc &SCDesc, 40 const MCSubtargetInfo &STI, 41 ArrayRef<uint64_t> ProcResourceMasks) { 42 const MCSchedModel &SM = STI.getSchedModel(); 43 44 // Populate resources consumed. 45 using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>; 46 std::vector<ResourcePlusCycles> Worklist; 47 48 // Track cycles contributed by resources that are in a "Super" relationship. 49 // This is required if we want to correctly match the behavior of method 50 // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set 51 // of "consumed" processor resources and resource cycles, the logic in 52 // ExpandProcResource() doesn't update the number of resource cycles 53 // contributed by a "Super" resource to a group. 54 // We need to take this into account when we find that a processor resource is 55 // part of a group, and it is also used as the "Super" of other resources. 56 // This map stores the number of cycles contributed by sub-resources that are 57 // part of a "Super" resource. The key value is the "Super" resource mask ID. 58 DenseMap<uint64_t, unsigned> SuperResources; 59 60 unsigned NumProcResources = SM.getNumProcResourceKinds(); 61 APInt Buffers(NumProcResources, 0); 62 63 bool AllInOrderResources = true; 64 bool AnyDispatchHazards = false; 65 for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) { 66 const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I; 67 const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx); 68 if (!PRE->Cycles) { 69 #ifndef NDEBUG 70 WithColor::warning() 71 << "Ignoring invalid write of zero cycles on processor resource " 72 << PR.Name << "\n"; 73 WithColor::note() << "found in scheduling class " << SCDesc.Name 74 << " (write index #" << I << ")\n"; 75 #endif 76 continue; 77 } 78 79 uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx]; 80 if (PR.BufferSize < 0) { 81 AllInOrderResources = false; 82 } else { 83 Buffers.setBit(getResourceStateIndex(Mask)); 84 AnyDispatchHazards |= (PR.BufferSize == 0); 85 AllInOrderResources &= (PR.BufferSize <= 1); 86 } 87 88 CycleSegment RCy(0, PRE->Cycles, false); 89 Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy))); 90 if (PR.SuperIdx) { 91 uint64_t Super = ProcResourceMasks[PR.SuperIdx]; 92 SuperResources[Super] += PRE->Cycles; 93 } 94 } 95 96 ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards; 97 98 // Sort elements by mask popcount, so that we prioritize resource units over 99 // resource groups, and smaller groups over larger groups. 100 sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) { 101 unsigned popcntA = countPopulation(A.first); 102 unsigned popcntB = countPopulation(B.first); 103 if (popcntA < popcntB) 104 return true; 105 if (popcntA > popcntB) 106 return false; 107 return A.first < B.first; 108 }); 109 110 uint64_t UsedResourceUnits = 0; 111 uint64_t UsedResourceGroups = 0; 112 113 // Remove cycles contributed by smaller resources. 114 for (unsigned I = 0, E = Worklist.size(); I < E; ++I) { 115 ResourcePlusCycles &A = Worklist[I]; 116 if (!A.second.size()) { 117 assert(countPopulation(A.first) > 1 && "Expected a group!"); 118 UsedResourceGroups |= PowerOf2Floor(A.first); 119 continue; 120 } 121 122 ID.Resources.emplace_back(A); 123 uint64_t NormalizedMask = A.first; 124 if (countPopulation(A.first) == 1) { 125 UsedResourceUnits |= A.first; 126 } else { 127 // Remove the leading 1 from the resource group mask. 128 NormalizedMask ^= PowerOf2Floor(NormalizedMask); 129 UsedResourceGroups |= (A.first ^ NormalizedMask); 130 } 131 132 for (unsigned J = I + 1; J < E; ++J) { 133 ResourcePlusCycles &B = Worklist[J]; 134 if ((NormalizedMask & B.first) == NormalizedMask) { 135 B.second.CS.subtract(A.second.size() - SuperResources[A.first]); 136 if (countPopulation(B.first) > 1) 137 B.second.NumUnits++; 138 } 139 } 140 } 141 142 // A SchedWrite may specify a number of cycles in which a resource group 143 // is reserved. For example (on target x86; cpu Haswell): 144 // 145 // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> { 146 // let ResourceCycles = [2, 2, 3]; 147 // } 148 // 149 // This means: 150 // Resource units HWPort0 and HWPort1 are both used for 2cy. 151 // Resource group HWPort01 is the union of HWPort0 and HWPort1. 152 // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01 153 // will not be usable for 2 entire cycles from instruction issue. 154 // 155 // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency 156 // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an 157 // extra delay on top of the 2 cycles latency. 158 // During those extra cycles, HWPort01 is not usable by other instructions. 159 for (ResourcePlusCycles &RPC : ID.Resources) { 160 if (countPopulation(RPC.first) > 1 && !RPC.second.isReserved()) { 161 // Remove the leading 1 from the resource group mask. 162 uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first); 163 if ((Mask & UsedResourceUnits) == Mask) 164 RPC.second.setReserved(); 165 } 166 } 167 168 // Identify extra buffers that are consumed through super resources. 169 for (const std::pair<uint64_t, unsigned> &SR : SuperResources) { 170 for (unsigned I = 1, E = NumProcResources; I < E; ++I) { 171 const MCProcResourceDesc &PR = *SM.getProcResource(I); 172 if (PR.BufferSize == -1) 173 continue; 174 175 uint64_t Mask = ProcResourceMasks[I]; 176 if (Mask != SR.first && ((Mask & SR.first) == SR.first)) 177 Buffers.setBit(getResourceStateIndex(Mask)); 178 } 179 } 180 181 ID.UsedBuffers = Buffers.getZExtValue(); 182 ID.UsedProcResUnits = UsedResourceUnits; 183 ID.UsedProcResGroups = UsedResourceGroups; 184 185 LLVM_DEBUG({ 186 for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources) 187 dbgs() << "\t\tResource Mask=" << format_hex(R.first, 16) << ", " 188 << "Reserved=" << R.second.isReserved() << ", " 189 << "#Units=" << R.second.NumUnits << ", " 190 << "cy=" << R.second.size() << '\n'; 191 uint64_t BufferIDs = ID.UsedBuffers; 192 while (BufferIDs) { 193 uint64_t Current = BufferIDs & (-BufferIDs); 194 dbgs() << "\t\tBuffer Mask=" << format_hex(Current, 16) << '\n'; 195 BufferIDs ^= Current; 196 } 197 dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n'; 198 dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16) 199 << '\n'; 200 }); 201 } 202 203 static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc, 204 const MCSchedClassDesc &SCDesc, 205 const MCSubtargetInfo &STI) { 206 if (MCDesc.isCall()) { 207 // We cannot estimate how long this call will take. 208 // Artificially set an arbitrarily high latency (100cy). 209 ID.MaxLatency = 100U; 210 return; 211 } 212 213 int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc); 214 // If latency is unknown, then conservatively assume a MaxLatency of 100cy. 215 ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency); 216 } 217 218 static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) { 219 // Count register definitions, and skip non register operands in the process. 220 unsigned I, E; 221 unsigned NumExplicitDefs = MCDesc.getNumDefs(); 222 for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) { 223 const MCOperand &Op = MCI.getOperand(I); 224 if (Op.isReg()) 225 --NumExplicitDefs; 226 } 227 228 if (NumExplicitDefs) { 229 return make_error<InstructionError<MCInst>>( 230 "Expected more register operand definitions.", MCI); 231 } 232 233 if (MCDesc.hasOptionalDef()) { 234 // Always assume that the optional definition is the last operand. 235 const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1); 236 if (I == MCI.getNumOperands() || !Op.isReg()) { 237 std::string Message = 238 "expected a register operand for an optional definition. Instruction " 239 "has not been correctly analyzed."; 240 return make_error<InstructionError<MCInst>>(Message, MCI); 241 } 242 } 243 244 return ErrorSuccess(); 245 } 246 247 void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI, 248 unsigned SchedClassID) { 249 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); 250 const MCSchedModel &SM = STI.getSchedModel(); 251 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); 252 253 // Assumptions made by this algorithm: 254 // 1. The number of explicit and implicit register definitions in a MCInst 255 // matches the number of explicit and implicit definitions according to 256 // the opcode descriptor (MCInstrDesc). 257 // 2. Uses start at index #(MCDesc.getNumDefs()). 258 // 3. There can only be a single optional register definition, an it is 259 // always the last operand of the sequence (excluding extra operands 260 // contributed by variadic opcodes). 261 // 262 // These assumptions work quite well for most out-of-order in-tree targets 263 // like x86. This is mainly because the vast majority of instructions is 264 // expanded to MCInst using a straightforward lowering logic that preserves 265 // the ordering of the operands. 266 // 267 // About assumption 1. 268 // The algorithm allows non-register operands between register operand 269 // definitions. This helps to handle some special ARM instructions with 270 // implicit operand increment (-mtriple=armv7): 271 // 272 // vld1.32 {d18, d19}, [r1]! @ <MCInst #1463 VLD1q32wb_fixed 273 // @ <MCOperand Reg:59> 274 // @ <MCOperand Imm:0> (!!) 275 // @ <MCOperand Reg:67> 276 // @ <MCOperand Imm:0> 277 // @ <MCOperand Imm:14> 278 // @ <MCOperand Reg:0>> 279 // 280 // MCDesc reports: 281 // 6 explicit operands. 282 // 1 optional definition 283 // 2 explicit definitions (!!) 284 // 285 // The presence of an 'Imm' operand between the two register definitions 286 // breaks the assumption that "register definitions are always at the 287 // beginning of the operand sequence". 288 // 289 // To workaround this issue, this algorithm ignores (i.e. skips) any 290 // non-register operands between register definitions. The optional 291 // definition is still at index #(NumOperands-1). 292 // 293 // According to assumption 2. register reads start at #(NumExplicitDefs-1). 294 // That means, register R1 from the example is both read and written. 295 unsigned NumExplicitDefs = MCDesc.getNumDefs(); 296 unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs(); 297 unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries; 298 unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs; 299 if (MCDesc.hasOptionalDef()) 300 TotalDefs++; 301 302 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); 303 ID.Writes.resize(TotalDefs + NumVariadicOps); 304 // Iterate over the operands list, and skip non-register operands. 305 // The first NumExplicitDefs register operands are expected to be register 306 // definitions. 307 unsigned CurrentDef = 0; 308 unsigned i = 0; 309 for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) { 310 const MCOperand &Op = MCI.getOperand(i); 311 if (!Op.isReg()) 312 continue; 313 314 WriteDescriptor &Write = ID.Writes[CurrentDef]; 315 Write.OpIndex = i; 316 if (CurrentDef < NumWriteLatencyEntries) { 317 const MCWriteLatencyEntry &WLE = 318 *STI.getWriteLatencyEntry(&SCDesc, CurrentDef); 319 // Conservatively default to MaxLatency. 320 Write.Latency = 321 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles); 322 Write.SClassOrWriteResourceID = WLE.WriteResourceID; 323 } else { 324 // Assign a default latency for this write. 325 Write.Latency = ID.MaxLatency; 326 Write.SClassOrWriteResourceID = 0; 327 } 328 Write.IsOptionalDef = false; 329 LLVM_DEBUG({ 330 dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex 331 << ", Latency=" << Write.Latency 332 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 333 }); 334 CurrentDef++; 335 } 336 337 assert(CurrentDef == NumExplicitDefs && 338 "Expected more register operand definitions."); 339 for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) { 340 unsigned Index = NumExplicitDefs + CurrentDef; 341 WriteDescriptor &Write = ID.Writes[Index]; 342 Write.OpIndex = ~CurrentDef; 343 Write.RegisterID = MCDesc.getImplicitDefs()[CurrentDef]; 344 if (Index < NumWriteLatencyEntries) { 345 const MCWriteLatencyEntry &WLE = 346 *STI.getWriteLatencyEntry(&SCDesc, Index); 347 // Conservatively default to MaxLatency. 348 Write.Latency = 349 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles); 350 Write.SClassOrWriteResourceID = WLE.WriteResourceID; 351 } else { 352 // Assign a default latency for this write. 353 Write.Latency = ID.MaxLatency; 354 Write.SClassOrWriteResourceID = 0; 355 } 356 357 Write.IsOptionalDef = false; 358 assert(Write.RegisterID != 0 && "Expected a valid phys register!"); 359 LLVM_DEBUG({ 360 dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex 361 << ", PhysReg=" << MRI.getName(Write.RegisterID) 362 << ", Latency=" << Write.Latency 363 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 364 }); 365 } 366 367 if (MCDesc.hasOptionalDef()) { 368 WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs]; 369 Write.OpIndex = MCDesc.getNumOperands() - 1; 370 // Assign a default latency for this write. 371 Write.Latency = ID.MaxLatency; 372 Write.SClassOrWriteResourceID = 0; 373 Write.IsOptionalDef = true; 374 LLVM_DEBUG({ 375 dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex 376 << ", Latency=" << Write.Latency 377 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 378 }); 379 } 380 381 if (!NumVariadicOps) 382 return; 383 384 // FIXME: if an instruction opcode is flagged 'mayStore', and it has no 385 // "unmodeledSideEffects', then this logic optimistically assumes that any 386 // extra register operands in the variadic sequence is not a register 387 // definition. 388 // 389 // Otherwise, we conservatively assume that any register operand from the 390 // variadic sequence is both a register read and a register write. 391 bool AssumeUsesOnly = MCDesc.mayStore() && !MCDesc.mayLoad() && 392 !MCDesc.hasUnmodeledSideEffects(); 393 CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef(); 394 for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); 395 I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) { 396 const MCOperand &Op = MCI.getOperand(OpIndex); 397 if (!Op.isReg()) 398 continue; 399 400 WriteDescriptor &Write = ID.Writes[CurrentDef]; 401 Write.OpIndex = OpIndex; 402 // Assign a default latency for this write. 403 Write.Latency = ID.MaxLatency; 404 Write.SClassOrWriteResourceID = 0; 405 Write.IsOptionalDef = false; 406 ++CurrentDef; 407 LLVM_DEBUG({ 408 dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex 409 << ", Latency=" << Write.Latency 410 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 411 }); 412 } 413 414 ID.Writes.resize(CurrentDef); 415 } 416 417 void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI, 418 unsigned SchedClassID) { 419 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); 420 unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs(); 421 unsigned NumImplicitUses = MCDesc.getNumImplicitUses(); 422 // Remove the optional definition. 423 if (MCDesc.hasOptionalDef()) 424 --NumExplicitUses; 425 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); 426 unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps; 427 ID.Reads.resize(TotalUses); 428 unsigned CurrentUse = 0; 429 for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses; 430 ++I, ++OpIndex) { 431 const MCOperand &Op = MCI.getOperand(OpIndex); 432 if (!Op.isReg()) 433 continue; 434 435 ReadDescriptor &Read = ID.Reads[CurrentUse]; 436 Read.OpIndex = OpIndex; 437 Read.UseIndex = I; 438 Read.SchedClassID = SchedClassID; 439 ++CurrentUse; 440 LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex 441 << ", UseIndex=" << Read.UseIndex << '\n'); 442 } 443 444 // For the purpose of ReadAdvance, implicit uses come directly after explicit 445 // uses. The "UseIndex" must be updated according to that implicit layout. 446 for (unsigned I = 0; I < NumImplicitUses; ++I) { 447 ReadDescriptor &Read = ID.Reads[CurrentUse + I]; 448 Read.OpIndex = ~I; 449 Read.UseIndex = NumExplicitUses + I; 450 Read.RegisterID = MCDesc.getImplicitUses()[I]; 451 Read.SchedClassID = SchedClassID; 452 LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex 453 << ", UseIndex=" << Read.UseIndex << ", RegisterID=" 454 << MRI.getName(Read.RegisterID) << '\n'); 455 } 456 457 CurrentUse += NumImplicitUses; 458 459 // FIXME: If an instruction opcode is marked as 'mayLoad', and it has no 460 // "unmodeledSideEffects", then this logic optimistically assumes that any 461 // extra register operand in the variadic sequence is not a register 462 // definition. 463 bool AssumeDefsOnly = !MCDesc.mayStore() && MCDesc.mayLoad() && 464 !MCDesc.hasUnmodeledSideEffects(); 465 for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); 466 I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) { 467 const MCOperand &Op = MCI.getOperand(OpIndex); 468 if (!Op.isReg()) 469 continue; 470 471 ReadDescriptor &Read = ID.Reads[CurrentUse]; 472 Read.OpIndex = OpIndex; 473 Read.UseIndex = NumExplicitUses + NumImplicitUses + I; 474 Read.SchedClassID = SchedClassID; 475 ++CurrentUse; 476 LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex 477 << ", UseIndex=" << Read.UseIndex << '\n'); 478 } 479 480 ID.Reads.resize(CurrentUse); 481 } 482 483 Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID, 484 const MCInst &MCI) const { 485 if (ID.NumMicroOps != 0) 486 return ErrorSuccess(); 487 488 bool UsesMemory = ID.MayLoad || ID.MayStore; 489 bool UsesBuffers = ID.UsedBuffers; 490 bool UsesResources = !ID.Resources.empty(); 491 if (!UsesMemory && !UsesBuffers && !UsesResources) 492 return ErrorSuccess(); 493 494 StringRef Message; 495 if (UsesMemory) { 496 Message = "found an inconsistent instruction that decodes " 497 "into zero opcodes and that consumes load/store " 498 "unit resources."; 499 } else { 500 Message = "found an inconsistent instruction that decodes " 501 "to zero opcodes and that consumes scheduler " 502 "resources."; 503 } 504 505 return make_error<InstructionError<MCInst>>(Message, MCI); 506 } 507 508 Expected<const InstrDesc &> 509 InstrBuilder::createInstrDescImpl(const MCInst &MCI) { 510 assert(STI.getSchedModel().hasInstrSchedModel() && 511 "Itineraries are not yet supported!"); 512 513 // Obtain the instruction descriptor from the opcode. 514 unsigned short Opcode = MCI.getOpcode(); 515 const MCInstrDesc &MCDesc = MCII.get(Opcode); 516 const MCSchedModel &SM = STI.getSchedModel(); 517 518 // Then obtain the scheduling class information from the instruction. 519 unsigned SchedClassID = MCDesc.getSchedClass(); 520 bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant(); 521 522 // Try to solve variant scheduling classes. 523 if (IsVariant) { 524 unsigned CPUID = SM.getProcessorID(); 525 while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant()) 526 SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, CPUID); 527 528 if (!SchedClassID) { 529 return make_error<InstructionError<MCInst>>( 530 "unable to resolve scheduling class for write variant.", MCI); 531 } 532 } 533 534 // Check if this instruction is supported. Otherwise, report an error. 535 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); 536 if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) { 537 return make_error<InstructionError<MCInst>>( 538 "found an unsupported instruction in the input assembly sequence.", 539 MCI); 540 } 541 542 LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n'); 543 LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n'); 544 545 // Create a new empty descriptor. 546 std::unique_ptr<InstrDesc> ID = std::make_unique<InstrDesc>(); 547 ID->NumMicroOps = SCDesc.NumMicroOps; 548 ID->SchedClassID = SchedClassID; 549 550 if (MCDesc.isCall() && FirstCallInst) { 551 // We don't correctly model calls. 552 WithColor::warning() << "found a call in the input assembly sequence.\n"; 553 WithColor::note() << "call instructions are not correctly modeled. " 554 << "Assume a latency of 100cy.\n"; 555 FirstCallInst = false; 556 } 557 558 if (MCDesc.isReturn() && FirstReturnInst) { 559 WithColor::warning() << "found a return instruction in the input" 560 << " assembly sequence.\n"; 561 WithColor::note() << "program counter updates are ignored.\n"; 562 FirstReturnInst = false; 563 } 564 565 ID->MayLoad = MCDesc.mayLoad(); 566 ID->MayStore = MCDesc.mayStore(); 567 ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects(); 568 ID->BeginGroup = SCDesc.BeginGroup; 569 ID->EndGroup = SCDesc.EndGroup; 570 571 initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks); 572 computeMaxLatency(*ID, MCDesc, SCDesc, STI); 573 574 if (Error Err = verifyOperands(MCDesc, MCI)) 575 return std::move(Err); 576 577 populateWrites(*ID, MCI, SchedClassID); 578 populateReads(*ID, MCI, SchedClassID); 579 580 LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n'); 581 LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n'); 582 583 // Sanity check on the instruction descriptor. 584 if (Error Err = verifyInstrDesc(*ID, MCI)) 585 return std::move(Err); 586 587 // Now add the new descriptor. 588 bool IsVariadic = MCDesc.isVariadic(); 589 if (!IsVariadic && !IsVariant) { 590 Descriptors[MCI.getOpcode()] = std::move(ID); 591 return *Descriptors[MCI.getOpcode()]; 592 } 593 594 VariantDescriptors[&MCI] = std::move(ID); 595 return *VariantDescriptors[&MCI]; 596 } 597 598 Expected<const InstrDesc &> 599 InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) { 600 if (Descriptors.find_as(MCI.getOpcode()) != Descriptors.end()) 601 return *Descriptors[MCI.getOpcode()]; 602 603 if (VariantDescriptors.find(&MCI) != VariantDescriptors.end()) 604 return *VariantDescriptors[&MCI]; 605 606 return createInstrDescImpl(MCI); 607 } 608 609 Expected<std::unique_ptr<Instruction>> 610 InstrBuilder::createInstruction(const MCInst &MCI) { 611 Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI); 612 if (!DescOrErr) 613 return DescOrErr.takeError(); 614 const InstrDesc &D = *DescOrErr; 615 std::unique_ptr<Instruction> NewIS = std::make_unique<Instruction>(D); 616 617 // Check if this is a dependency breaking instruction. 618 APInt Mask; 619 620 bool IsZeroIdiom = false; 621 bool IsDepBreaking = false; 622 if (MCIA) { 623 unsigned ProcID = STI.getSchedModel().getProcessorID(); 624 IsZeroIdiom = MCIA->isZeroIdiom(MCI, Mask, ProcID); 625 IsDepBreaking = 626 IsZeroIdiom || MCIA->isDependencyBreaking(MCI, Mask, ProcID); 627 if (MCIA->isOptimizableRegisterMove(MCI, ProcID)) 628 NewIS->setOptimizableMove(); 629 } 630 631 // Initialize Reads first. 632 MCPhysReg RegID = 0; 633 for (const ReadDescriptor &RD : D.Reads) { 634 if (!RD.isImplicitRead()) { 635 // explicit read. 636 const MCOperand &Op = MCI.getOperand(RD.OpIndex); 637 // Skip non-register operands. 638 if (!Op.isReg()) 639 continue; 640 RegID = Op.getReg(); 641 } else { 642 // Implicit read. 643 RegID = RD.RegisterID; 644 } 645 646 // Skip invalid register operands. 647 if (!RegID) 648 continue; 649 650 // Okay, this is a register operand. Create a ReadState for it. 651 NewIS->getUses().emplace_back(RD, RegID); 652 ReadState &RS = NewIS->getUses().back(); 653 654 if (IsDepBreaking) { 655 // A mask of all zeroes means: explicit input operands are not 656 // independent. 657 if (Mask.isNullValue()) { 658 if (!RD.isImplicitRead()) 659 RS.setIndependentFromDef(); 660 } else { 661 // Check if this register operand is independent according to `Mask`. 662 // Note that Mask may not have enough bits to describe all explicit and 663 // implicit input operands. If this register operand doesn't have a 664 // corresponding bit in Mask, then conservatively assume that it is 665 // dependent. 666 if (Mask.getBitWidth() > RD.UseIndex) { 667 // Okay. This map describe register use `RD.UseIndex`. 668 if (Mask[RD.UseIndex]) 669 RS.setIndependentFromDef(); 670 } 671 } 672 } 673 } 674 675 // Early exit if there are no writes. 676 if (D.Writes.empty()) 677 return std::move(NewIS); 678 679 // Track register writes that implicitly clear the upper portion of the 680 // underlying super-registers using an APInt. 681 APInt WriteMask(D.Writes.size(), 0); 682 683 // Now query the MCInstrAnalysis object to obtain information about which 684 // register writes implicitly clear the upper portion of a super-register. 685 if (MCIA) 686 MCIA->clearsSuperRegisters(MRI, MCI, WriteMask); 687 688 // Initialize writes. 689 unsigned WriteIndex = 0; 690 for (const WriteDescriptor &WD : D.Writes) { 691 RegID = WD.isImplicitWrite() ? WD.RegisterID 692 : MCI.getOperand(WD.OpIndex).getReg(); 693 // Check if this is a optional definition that references NoReg. 694 if (WD.IsOptionalDef && !RegID) { 695 ++WriteIndex; 696 continue; 697 } 698 699 assert(RegID && "Expected a valid register ID!"); 700 NewIS->getDefs().emplace_back(WD, RegID, 701 /* ClearsSuperRegs */ WriteMask[WriteIndex], 702 /* WritesZero */ IsZeroIdiom); 703 ++WriteIndex; 704 } 705 706 return std::move(NewIS); 707 } 708 } // namespace mca 709 } // namespace llvm 710