1 //===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// 10 /// This file implements the InstrBuilder interface. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/MCA/InstrBuilder.h" 15 #include "llvm/ADT/APInt.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/MC/MCInst.h" 18 #include "llvm/Support/Debug.h" 19 #include "llvm/Support/WithColor.h" 20 #include "llvm/Support/raw_ostream.h" 21 22 #define DEBUG_TYPE "llvm-mca" 23 24 namespace llvm { 25 namespace mca { 26 27 InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti, 28 const llvm::MCInstrInfo &mcii, 29 const llvm::MCRegisterInfo &mri, 30 const llvm::MCInstrAnalysis *mcia) 31 : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), FirstCallInst(true), 32 FirstReturnInst(true) { 33 const MCSchedModel &SM = STI.getSchedModel(); 34 ProcResourceMasks.resize(SM.getNumProcResourceKinds()); 35 computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks); 36 } 37 38 static void initializeUsedResources(InstrDesc &ID, 39 const MCSchedClassDesc &SCDesc, 40 const MCSubtargetInfo &STI, 41 ArrayRef<uint64_t> ProcResourceMasks) { 42 const MCSchedModel &SM = STI.getSchedModel(); 43 44 // Populate resources consumed. 45 using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>; 46 std::vector<ResourcePlusCycles> Worklist; 47 48 // Track cycles contributed by resources that are in a "Super" relationship. 49 // This is required if we want to correctly match the behavior of method 50 // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set 51 // of "consumed" processor resources and resource cycles, the logic in 52 // ExpandProcResource() doesn't update the number of resource cycles 53 // contributed by a "Super" resource to a group. 54 // We need to take this into account when we find that a processor resource is 55 // part of a group, and it is also used as the "Super" of other resources. 56 // This map stores the number of cycles contributed by sub-resources that are 57 // part of a "Super" resource. The key value is the "Super" resource mask ID. 58 DenseMap<uint64_t, unsigned> SuperResources; 59 60 unsigned NumProcResources = SM.getNumProcResourceKinds(); 61 APInt Buffers(NumProcResources, 0); 62 63 bool AllInOrderResources = true; 64 bool AnyDispatchHazards = false; 65 for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) { 66 const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I; 67 const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx); 68 if (!PRE->Cycles) { 69 #ifndef NDEBUG 70 WithColor::warning() 71 << "Ignoring invalid write of zero cycles on processor resource " 72 << PR.Name << "\n"; 73 WithColor::note() << "found in scheduling class " << SCDesc.Name 74 << " (write index #" << I << ")\n"; 75 #endif 76 continue; 77 } 78 79 uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx]; 80 if (PR.BufferSize < 0) { 81 AllInOrderResources = false; 82 } else { 83 Buffers.setBit(PRE->ProcResourceIdx); 84 AnyDispatchHazards |= (PR.BufferSize == 0); 85 AllInOrderResources &= (PR.BufferSize <= 1); 86 } 87 88 CycleSegment RCy(0, PRE->Cycles, false); 89 Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy))); 90 if (PR.SuperIdx) { 91 uint64_t Super = ProcResourceMasks[PR.SuperIdx]; 92 SuperResources[Super] += PRE->Cycles; 93 } 94 } 95 96 ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards; 97 98 // Sort elements by mask popcount, so that we prioritize resource units over 99 // resource groups, and smaller groups over larger groups. 100 sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) { 101 unsigned popcntA = countPopulation(A.first); 102 unsigned popcntB = countPopulation(B.first); 103 if (popcntA < popcntB) 104 return true; 105 if (popcntA > popcntB) 106 return false; 107 return A.first < B.first; 108 }); 109 110 uint64_t UsedResourceUnits = 0; 111 uint64_t UsedResourceGroups = 0; 112 113 // Remove cycles contributed by smaller resources. 114 for (unsigned I = 0, E = Worklist.size(); I < E; ++I) { 115 ResourcePlusCycles &A = Worklist[I]; 116 if (!A.second.size()) { 117 assert(countPopulation(A.first) > 1 && "Expected a group!"); 118 UsedResourceGroups |= PowerOf2Floor(A.first); 119 continue; 120 } 121 122 ID.Resources.emplace_back(A); 123 uint64_t NormalizedMask = A.first; 124 if (countPopulation(A.first) == 1) { 125 UsedResourceUnits |= A.first; 126 } else { 127 // Remove the leading 1 from the resource group mask. 128 NormalizedMask ^= PowerOf2Floor(NormalizedMask); 129 UsedResourceGroups |= (A.first ^ NormalizedMask); 130 } 131 132 for (unsigned J = I + 1; J < E; ++J) { 133 ResourcePlusCycles &B = Worklist[J]; 134 if ((NormalizedMask & B.first) == NormalizedMask) { 135 B.second.CS.subtract(A.second.size() - SuperResources[A.first]); 136 if (countPopulation(B.first) > 1) 137 B.second.NumUnits++; 138 } 139 } 140 } 141 142 ID.UsedProcResUnits = UsedResourceUnits; 143 ID.UsedProcResGroups = UsedResourceGroups; 144 145 // A SchedWrite may specify a number of cycles in which a resource group 146 // is reserved. For example (on target x86; cpu Haswell): 147 // 148 // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> { 149 // let ResourceCycles = [2, 2, 3]; 150 // } 151 // 152 // This means: 153 // Resource units HWPort0 and HWPort1 are both used for 2cy. 154 // Resource group HWPort01 is the union of HWPort0 and HWPort1. 155 // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01 156 // will not be usable for 2 entire cycles from instruction issue. 157 // 158 // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency 159 // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an 160 // extra delay on top of the 2 cycles latency. 161 // During those extra cycles, HWPort01 is not usable by other instructions. 162 for (ResourcePlusCycles &RPC : ID.Resources) { 163 if (countPopulation(RPC.first) > 1 && !RPC.second.isReserved()) { 164 // Remove the leading 1 from the resource group mask. 165 uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first); 166 if ((Mask & UsedResourceUnits) == Mask) 167 RPC.second.setReserved(); 168 } 169 } 170 171 // Identify extra buffers that are consumed through super resources. 172 for (const std::pair<uint64_t, unsigned> &SR : SuperResources) { 173 for (unsigned I = 1, E = NumProcResources; I < E; ++I) { 174 const MCProcResourceDesc &PR = *SM.getProcResource(I); 175 if (PR.BufferSize == -1) 176 continue; 177 178 uint64_t Mask = ProcResourceMasks[I]; 179 if (Mask != SR.first && ((Mask & SR.first) == SR.first)) 180 Buffers.setBit(I); 181 } 182 } 183 184 // Now set the buffers. 185 if (unsigned NumBuffers = Buffers.countPopulation()) { 186 ID.Buffers.resize(NumBuffers); 187 for (unsigned I = 0, E = NumProcResources; I < E && NumBuffers; ++I) { 188 if (Buffers[I]) { 189 --NumBuffers; 190 ID.Buffers[NumBuffers] = ProcResourceMasks[I]; 191 } 192 } 193 } 194 195 LLVM_DEBUG({ 196 for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources) 197 dbgs() << "\t\tResource Mask=" << format_hex(R.first, 16) << ", " 198 << "Reserved=" << R.second.isReserved() << ", " 199 << "#Units=" << R.second.NumUnits << ", " 200 << "cy=" << R.second.size() << '\n'; 201 for (const uint64_t R : ID.Buffers) 202 dbgs() << "\t\tBuffer Mask=" << format_hex(R, 16) << '\n'; 203 dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n'; 204 dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16) 205 << '\n'; 206 }); 207 } 208 209 static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc, 210 const MCSchedClassDesc &SCDesc, 211 const MCSubtargetInfo &STI) { 212 if (MCDesc.isCall()) { 213 // We cannot estimate how long this call will take. 214 // Artificially set an arbitrarily high latency (100cy). 215 ID.MaxLatency = 100U; 216 return; 217 } 218 219 int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc); 220 // If latency is unknown, then conservatively assume a MaxLatency of 100cy. 221 ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency); 222 } 223 224 static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) { 225 // Count register definitions, and skip non register operands in the process. 226 unsigned I, E; 227 unsigned NumExplicitDefs = MCDesc.getNumDefs(); 228 for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) { 229 const MCOperand &Op = MCI.getOperand(I); 230 if (Op.isReg()) 231 --NumExplicitDefs; 232 } 233 234 if (NumExplicitDefs) { 235 return make_error<InstructionError<MCInst>>( 236 "Expected more register operand definitions.", MCI); 237 } 238 239 if (MCDesc.hasOptionalDef()) { 240 // Always assume that the optional definition is the last operand. 241 const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1); 242 if (I == MCI.getNumOperands() || !Op.isReg()) { 243 std::string Message = 244 "expected a register operand for an optional definition. Instruction " 245 "has not been correctly analyzed."; 246 return make_error<InstructionError<MCInst>>(Message, MCI); 247 } 248 } 249 250 return ErrorSuccess(); 251 } 252 253 void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI, 254 unsigned SchedClassID) { 255 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); 256 const MCSchedModel &SM = STI.getSchedModel(); 257 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); 258 259 // Assumptions made by this algorithm: 260 // 1. The number of explicit and implicit register definitions in a MCInst 261 // matches the number of explicit and implicit definitions according to 262 // the opcode descriptor (MCInstrDesc). 263 // 2. Uses start at index #(MCDesc.getNumDefs()). 264 // 3. There can only be a single optional register definition, an it is 265 // always the last operand of the sequence (excluding extra operands 266 // contributed by variadic opcodes). 267 // 268 // These assumptions work quite well for most out-of-order in-tree targets 269 // like x86. This is mainly because the vast majority of instructions is 270 // expanded to MCInst using a straightforward lowering logic that preserves 271 // the ordering of the operands. 272 // 273 // About assumption 1. 274 // The algorithm allows non-register operands between register operand 275 // definitions. This helps to handle some special ARM instructions with 276 // implicit operand increment (-mtriple=armv7): 277 // 278 // vld1.32 {d18, d19}, [r1]! @ <MCInst #1463 VLD1q32wb_fixed 279 // @ <MCOperand Reg:59> 280 // @ <MCOperand Imm:0> (!!) 281 // @ <MCOperand Reg:67> 282 // @ <MCOperand Imm:0> 283 // @ <MCOperand Imm:14> 284 // @ <MCOperand Reg:0>> 285 // 286 // MCDesc reports: 287 // 6 explicit operands. 288 // 1 optional definition 289 // 2 explicit definitions (!!) 290 // 291 // The presence of an 'Imm' operand between the two register definitions 292 // breaks the assumption that "register definitions are always at the 293 // beginning of the operand sequence". 294 // 295 // To workaround this issue, this algorithm ignores (i.e. skips) any 296 // non-register operands between register definitions. The optional 297 // definition is still at index #(NumOperands-1). 298 // 299 // According to assumption 2. register reads start at #(NumExplicitDefs-1). 300 // That means, register R1 from the example is both read and written. 301 unsigned NumExplicitDefs = MCDesc.getNumDefs(); 302 unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs(); 303 unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries; 304 unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs; 305 if (MCDesc.hasOptionalDef()) 306 TotalDefs++; 307 308 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); 309 ID.Writes.resize(TotalDefs + NumVariadicOps); 310 // Iterate over the operands list, and skip non-register operands. 311 // The first NumExplictDefs register operands are expected to be register 312 // definitions. 313 unsigned CurrentDef = 0; 314 unsigned i = 0; 315 for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) { 316 const MCOperand &Op = MCI.getOperand(i); 317 if (!Op.isReg()) 318 continue; 319 320 WriteDescriptor &Write = ID.Writes[CurrentDef]; 321 Write.OpIndex = i; 322 if (CurrentDef < NumWriteLatencyEntries) { 323 const MCWriteLatencyEntry &WLE = 324 *STI.getWriteLatencyEntry(&SCDesc, CurrentDef); 325 // Conservatively default to MaxLatency. 326 Write.Latency = 327 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles); 328 Write.SClassOrWriteResourceID = WLE.WriteResourceID; 329 } else { 330 // Assign a default latency for this write. 331 Write.Latency = ID.MaxLatency; 332 Write.SClassOrWriteResourceID = 0; 333 } 334 Write.IsOptionalDef = false; 335 LLVM_DEBUG({ 336 dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex 337 << ", Latency=" << Write.Latency 338 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 339 }); 340 CurrentDef++; 341 } 342 343 assert(CurrentDef == NumExplicitDefs && 344 "Expected more register operand definitions."); 345 for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) { 346 unsigned Index = NumExplicitDefs + CurrentDef; 347 WriteDescriptor &Write = ID.Writes[Index]; 348 Write.OpIndex = ~CurrentDef; 349 Write.RegisterID = MCDesc.getImplicitDefs()[CurrentDef]; 350 if (Index < NumWriteLatencyEntries) { 351 const MCWriteLatencyEntry &WLE = 352 *STI.getWriteLatencyEntry(&SCDesc, Index); 353 // Conservatively default to MaxLatency. 354 Write.Latency = 355 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles); 356 Write.SClassOrWriteResourceID = WLE.WriteResourceID; 357 } else { 358 // Assign a default latency for this write. 359 Write.Latency = ID.MaxLatency; 360 Write.SClassOrWriteResourceID = 0; 361 } 362 363 Write.IsOptionalDef = false; 364 assert(Write.RegisterID != 0 && "Expected a valid phys register!"); 365 LLVM_DEBUG({ 366 dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex 367 << ", PhysReg=" << MRI.getName(Write.RegisterID) 368 << ", Latency=" << Write.Latency 369 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 370 }); 371 } 372 373 if (MCDesc.hasOptionalDef()) { 374 WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs]; 375 Write.OpIndex = MCDesc.getNumOperands() - 1; 376 // Assign a default latency for this write. 377 Write.Latency = ID.MaxLatency; 378 Write.SClassOrWriteResourceID = 0; 379 Write.IsOptionalDef = true; 380 LLVM_DEBUG({ 381 dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex 382 << ", Latency=" << Write.Latency 383 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 384 }); 385 } 386 387 if (!NumVariadicOps) 388 return; 389 390 // FIXME: if an instruction opcode is flagged 'mayStore', and it has no 391 // "unmodeledSideEffects', then this logic optimistically assumes that any 392 // extra register operands in the variadic sequence is not a register 393 // definition. 394 // 395 // Otherwise, we conservatively assume that any register operand from the 396 // variadic sequence is both a register read and a register write. 397 bool AssumeUsesOnly = MCDesc.mayStore() && !MCDesc.mayLoad() && 398 !MCDesc.hasUnmodeledSideEffects(); 399 CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef(); 400 for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); 401 I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) { 402 const MCOperand &Op = MCI.getOperand(OpIndex); 403 if (!Op.isReg()) 404 continue; 405 406 WriteDescriptor &Write = ID.Writes[CurrentDef]; 407 Write.OpIndex = OpIndex; 408 // Assign a default latency for this write. 409 Write.Latency = ID.MaxLatency; 410 Write.SClassOrWriteResourceID = 0; 411 Write.IsOptionalDef = false; 412 ++CurrentDef; 413 LLVM_DEBUG({ 414 dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex 415 << ", Latency=" << Write.Latency 416 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 417 }); 418 } 419 420 ID.Writes.resize(CurrentDef); 421 } 422 423 void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI, 424 unsigned SchedClassID) { 425 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); 426 unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs(); 427 unsigned NumImplicitUses = MCDesc.getNumImplicitUses(); 428 // Remove the optional definition. 429 if (MCDesc.hasOptionalDef()) 430 --NumExplicitUses; 431 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); 432 unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps; 433 ID.Reads.resize(TotalUses); 434 unsigned CurrentUse = 0; 435 for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses; 436 ++I, ++OpIndex) { 437 const MCOperand &Op = MCI.getOperand(OpIndex); 438 if (!Op.isReg()) 439 continue; 440 441 ReadDescriptor &Read = ID.Reads[CurrentUse]; 442 Read.OpIndex = OpIndex; 443 Read.UseIndex = I; 444 Read.SchedClassID = SchedClassID; 445 ++CurrentUse; 446 LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex 447 << ", UseIndex=" << Read.UseIndex << '\n'); 448 } 449 450 // For the purpose of ReadAdvance, implicit uses come directly after explicit 451 // uses. The "UseIndex" must be updated according to that implicit layout. 452 for (unsigned I = 0; I < NumImplicitUses; ++I) { 453 ReadDescriptor &Read = ID.Reads[CurrentUse + I]; 454 Read.OpIndex = ~I; 455 Read.UseIndex = NumExplicitUses + I; 456 Read.RegisterID = MCDesc.getImplicitUses()[I]; 457 Read.SchedClassID = SchedClassID; 458 LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex 459 << ", UseIndex=" << Read.UseIndex << ", RegisterID=" 460 << MRI.getName(Read.RegisterID) << '\n'); 461 } 462 463 CurrentUse += NumImplicitUses; 464 465 // FIXME: If an instruction opcode is marked as 'mayLoad', and it has no 466 // "unmodeledSideEffects", then this logic optimistically assumes that any 467 // extra register operands in the variadic sequence are not register 468 // definition. 469 470 bool AssumeDefsOnly = !MCDesc.mayStore() && MCDesc.mayLoad() && 471 !MCDesc.hasUnmodeledSideEffects(); 472 for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); 473 I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) { 474 const MCOperand &Op = MCI.getOperand(OpIndex); 475 if (!Op.isReg()) 476 continue; 477 478 ReadDescriptor &Read = ID.Reads[CurrentUse]; 479 Read.OpIndex = OpIndex; 480 Read.UseIndex = NumExplicitUses + NumImplicitUses + I; 481 Read.SchedClassID = SchedClassID; 482 ++CurrentUse; 483 LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex 484 << ", UseIndex=" << Read.UseIndex << '\n'); 485 } 486 487 ID.Reads.resize(CurrentUse); 488 } 489 490 Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID, 491 const MCInst &MCI) const { 492 if (ID.NumMicroOps != 0) 493 return ErrorSuccess(); 494 495 bool UsesMemory = ID.MayLoad || ID.MayStore; 496 bool UsesBuffers = !ID.Buffers.empty(); 497 bool UsesResources = !ID.Resources.empty(); 498 if (!UsesMemory && !UsesBuffers && !UsesResources) 499 return ErrorSuccess(); 500 501 StringRef Message; 502 if (UsesMemory) { 503 Message = "found an inconsistent instruction that decodes " 504 "into zero opcodes and that consumes load/store " 505 "unit resources."; 506 } else { 507 Message = "found an inconsistent instruction that decodes " 508 "to zero opcodes and that consumes scheduler " 509 "resources."; 510 } 511 512 return make_error<InstructionError<MCInst>>(Message, MCI); 513 } 514 515 Expected<const InstrDesc &> 516 InstrBuilder::createInstrDescImpl(const MCInst &MCI) { 517 assert(STI.getSchedModel().hasInstrSchedModel() && 518 "Itineraries are not yet supported!"); 519 520 // Obtain the instruction descriptor from the opcode. 521 unsigned short Opcode = MCI.getOpcode(); 522 const MCInstrDesc &MCDesc = MCII.get(Opcode); 523 const MCSchedModel &SM = STI.getSchedModel(); 524 525 // Then obtain the scheduling class information from the instruction. 526 unsigned SchedClassID = MCDesc.getSchedClass(); 527 bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant(); 528 529 // Try to solve variant scheduling classes. 530 if (IsVariant) { 531 unsigned CPUID = SM.getProcessorID(); 532 while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant()) 533 SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, CPUID); 534 535 if (!SchedClassID) { 536 return make_error<InstructionError<MCInst>>( 537 "unable to resolve scheduling class for write variant.", MCI); 538 } 539 } 540 541 // Check if this instruction is supported. Otherwise, report an error. 542 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); 543 if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) { 544 return make_error<InstructionError<MCInst>>( 545 "found an unsupported instruction in the input assembly sequence.", 546 MCI); 547 } 548 549 LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n'); 550 LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n'); 551 552 // Create a new empty descriptor. 553 std::unique_ptr<InstrDesc> ID = llvm::make_unique<InstrDesc>(); 554 ID->NumMicroOps = SCDesc.NumMicroOps; 555 ID->SchedClassID = SchedClassID; 556 557 if (MCDesc.isCall() && FirstCallInst) { 558 // We don't correctly model calls. 559 WithColor::warning() << "found a call in the input assembly sequence.\n"; 560 WithColor::note() << "call instructions are not correctly modeled. " 561 << "Assume a latency of 100cy.\n"; 562 FirstCallInst = false; 563 } 564 565 if (MCDesc.isReturn() && FirstReturnInst) { 566 WithColor::warning() << "found a return instruction in the input" 567 << " assembly sequence.\n"; 568 WithColor::note() << "program counter updates are ignored.\n"; 569 FirstReturnInst = false; 570 } 571 572 ID->MayLoad = MCDesc.mayLoad(); 573 ID->MayStore = MCDesc.mayStore(); 574 ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects(); 575 ID->BeginGroup = SCDesc.BeginGroup; 576 ID->EndGroup = SCDesc.EndGroup; 577 578 initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks); 579 computeMaxLatency(*ID, MCDesc, SCDesc, STI); 580 581 if (Error Err = verifyOperands(MCDesc, MCI)) 582 return std::move(Err); 583 584 populateWrites(*ID, MCI, SchedClassID); 585 populateReads(*ID, MCI, SchedClassID); 586 587 LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n'); 588 LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n'); 589 590 // Sanity check on the instruction descriptor. 591 if (Error Err = verifyInstrDesc(*ID, MCI)) 592 return std::move(Err); 593 594 // Now add the new descriptor. 595 bool IsVariadic = MCDesc.isVariadic(); 596 if (!IsVariadic && !IsVariant) { 597 Descriptors[MCI.getOpcode()] = std::move(ID); 598 return *Descriptors[MCI.getOpcode()]; 599 } 600 601 VariantDescriptors[&MCI] = std::move(ID); 602 return *VariantDescriptors[&MCI]; 603 } 604 605 Expected<const InstrDesc &> 606 InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) { 607 if (Descriptors.find_as(MCI.getOpcode()) != Descriptors.end()) 608 return *Descriptors[MCI.getOpcode()]; 609 610 if (VariantDescriptors.find(&MCI) != VariantDescriptors.end()) 611 return *VariantDescriptors[&MCI]; 612 613 return createInstrDescImpl(MCI); 614 } 615 616 Expected<std::unique_ptr<Instruction>> 617 InstrBuilder::createInstruction(const MCInst &MCI) { 618 Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI); 619 if (!DescOrErr) 620 return DescOrErr.takeError(); 621 const InstrDesc &D = *DescOrErr; 622 std::unique_ptr<Instruction> NewIS = llvm::make_unique<Instruction>(D); 623 624 // Check if this is a dependency breaking instruction. 625 APInt Mask; 626 627 bool IsZeroIdiom = false; 628 bool IsDepBreaking = false; 629 if (MCIA) { 630 unsigned ProcID = STI.getSchedModel().getProcessorID(); 631 IsZeroIdiom = MCIA->isZeroIdiom(MCI, Mask, ProcID); 632 IsDepBreaking = 633 IsZeroIdiom || MCIA->isDependencyBreaking(MCI, Mask, ProcID); 634 if (MCIA->isOptimizableRegisterMove(MCI, ProcID)) 635 NewIS->setOptimizableMove(); 636 } 637 638 // Initialize Reads first. 639 for (const ReadDescriptor &RD : D.Reads) { 640 int RegID = -1; 641 if (!RD.isImplicitRead()) { 642 // explicit read. 643 const MCOperand &Op = MCI.getOperand(RD.OpIndex); 644 // Skip non-register operands. 645 if (!Op.isReg()) 646 continue; 647 RegID = Op.getReg(); 648 } else { 649 // Implicit read. 650 RegID = RD.RegisterID; 651 } 652 653 // Skip invalid register operands. 654 if (!RegID) 655 continue; 656 657 // Okay, this is a register operand. Create a ReadState for it. 658 assert(RegID > 0 && "Invalid register ID found!"); 659 NewIS->getUses().emplace_back(RD, RegID); 660 ReadState &RS = NewIS->getUses().back(); 661 662 if (IsDepBreaking) { 663 // A mask of all zeroes means: explicit input operands are not 664 // independent. 665 if (Mask.isNullValue()) { 666 if (!RD.isImplicitRead()) 667 RS.setIndependentFromDef(); 668 } else { 669 // Check if this register operand is independent according to `Mask`. 670 // Note that Mask may not have enough bits to describe all explicit and 671 // implicit input operands. If this register operand doesn't have a 672 // corresponding bit in Mask, then conservatively assume that it is 673 // dependent. 674 if (Mask.getBitWidth() > RD.UseIndex) { 675 // Okay. This map describe register use `RD.UseIndex`. 676 if (Mask[RD.UseIndex]) 677 RS.setIndependentFromDef(); 678 } 679 } 680 } 681 } 682 683 // Early exit if there are no writes. 684 if (D.Writes.empty()) 685 return std::move(NewIS); 686 687 // Track register writes that implicitly clear the upper portion of the 688 // underlying super-registers using an APInt. 689 APInt WriteMask(D.Writes.size(), 0); 690 691 // Now query the MCInstrAnalysis object to obtain information about which 692 // register writes implicitly clear the upper portion of a super-register. 693 if (MCIA) 694 MCIA->clearsSuperRegisters(MRI, MCI, WriteMask); 695 696 // Initialize writes. 697 unsigned WriteIndex = 0; 698 for (const WriteDescriptor &WD : D.Writes) { 699 unsigned RegID = WD.isImplicitWrite() ? WD.RegisterID 700 : MCI.getOperand(WD.OpIndex).getReg(); 701 // Check if this is a optional definition that references NoReg. 702 if (WD.IsOptionalDef && !RegID) { 703 ++WriteIndex; 704 continue; 705 } 706 707 assert(RegID && "Expected a valid register ID!"); 708 NewIS->getDefs().emplace_back(WD, RegID, 709 /* ClearsSuperRegs */ WriteMask[WriteIndex], 710 /* WritesZero */ IsZeroIdiom); 711 ++WriteIndex; 712 } 713 714 return std::move(NewIS); 715 } 716 } // namespace mca 717 } // namespace llvm 718