1 //===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// 10 /// This file implements the InstrBuilder interface. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/MCA/InstrBuilder.h" 15 #include "llvm/ADT/APInt.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/Hashing.h" 18 #include "llvm/ADT/Statistic.h" 19 #include "llvm/MC/MCInst.h" 20 #include "llvm/Support/Debug.h" 21 #include "llvm/Support/WithColor.h" 22 #include "llvm/Support/raw_ostream.h" 23 24 #define DEBUG_TYPE "llvm-mca-instrbuilder" 25 26 namespace llvm { 27 namespace mca { 28 29 char RecycledInstErr::ID = 0; 30 31 InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti, 32 const llvm::MCInstrInfo &mcii, 33 const llvm::MCRegisterInfo &mri, 34 const llvm::MCInstrAnalysis *mcia, 35 const mca::InstrumentManager &im, unsigned cl) 36 : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), IM(im), FirstCallInst(true), 37 FirstReturnInst(true), CallLatency(cl) { 38 const MCSchedModel &SM = STI.getSchedModel(); 39 ProcResourceMasks.resize(SM.getNumProcResourceKinds()); 40 computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks); 41 } 42 43 static void initializeUsedResources(InstrDesc &ID, 44 const MCSchedClassDesc &SCDesc, 45 const MCSubtargetInfo &STI, 46 ArrayRef<uint64_t> ProcResourceMasks) { 47 const MCSchedModel &SM = STI.getSchedModel(); 48 49 // Populate resources consumed. 50 using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>; 51 SmallVector<ResourcePlusCycles, 4> Worklist; 52 53 // Track cycles contributed by resources that are in a "Super" relationship. 54 // This is required if we want to correctly match the behavior of method 55 // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set 56 // of "consumed" processor resources and resource cycles, the logic in 57 // ExpandProcResource() doesn't update the number of resource cycles 58 // contributed by a "Super" resource to a group. 59 // We need to take this into account when we find that a processor resource is 60 // part of a group, and it is also used as the "Super" of other resources. 61 // This map stores the number of cycles contributed by sub-resources that are 62 // part of a "Super" resource. The key value is the "Super" resource mask ID. 63 DenseMap<uint64_t, unsigned> SuperResources; 64 65 unsigned NumProcResources = SM.getNumProcResourceKinds(); 66 APInt Buffers(NumProcResources, 0); 67 68 bool AllInOrderResources = true; 69 bool AnyDispatchHazards = false; 70 for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) { 71 const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I; 72 const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx); 73 if (!PRE->ReleaseAtCycle) { 74 #ifndef NDEBUG 75 WithColor::warning() 76 << "Ignoring invalid write of zero cycles on processor resource " 77 << PR.Name << "\n"; 78 WithColor::note() << "found in scheduling class " << SCDesc.Name 79 << " (write index #" << I << ")\n"; 80 #endif 81 continue; 82 } 83 84 uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx]; 85 if (PR.BufferSize < 0) { 86 AllInOrderResources = false; 87 } else { 88 Buffers.setBit(getResourceStateIndex(Mask)); 89 AnyDispatchHazards |= (PR.BufferSize == 0); 90 AllInOrderResources &= (PR.BufferSize <= 1); 91 } 92 93 CycleSegment RCy(0, PRE->ReleaseAtCycle, false); 94 Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy))); 95 if (PR.SuperIdx) { 96 uint64_t Super = ProcResourceMasks[PR.SuperIdx]; 97 SuperResources[Super] += PRE->ReleaseAtCycle; 98 } 99 } 100 101 ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards; 102 103 // Sort elements by mask popcount, so that we prioritize resource units over 104 // resource groups, and smaller groups over larger groups. 105 sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) { 106 unsigned popcntA = llvm::popcount(A.first); 107 unsigned popcntB = llvm::popcount(B.first); 108 if (popcntA < popcntB) 109 return true; 110 if (popcntA > popcntB) 111 return false; 112 return A.first < B.first; 113 }); 114 115 uint64_t UsedResourceUnits = 0; 116 uint64_t UsedResourceGroups = 0; 117 uint64_t UnitsFromResourceGroups = 0; 118 119 // Remove cycles contributed by smaller resources, and check if there 120 // are partially overlapping resource groups. 121 ID.HasPartiallyOverlappingGroups = false; 122 123 for (unsigned I = 0, E = Worklist.size(); I < E; ++I) { 124 ResourcePlusCycles &A = Worklist[I]; 125 if (!A.second.size()) { 126 assert(llvm::popcount(A.first) > 1 && "Expected a group!"); 127 UsedResourceGroups |= llvm::bit_floor(A.first); 128 continue; 129 } 130 131 ID.Resources.emplace_back(A); 132 uint64_t NormalizedMask = A.first; 133 134 if (llvm::popcount(A.first) == 1) { 135 UsedResourceUnits |= A.first; 136 } else { 137 // Remove the leading 1 from the resource group mask. 138 NormalizedMask ^= llvm::bit_floor(NormalizedMask); 139 if (UnitsFromResourceGroups & NormalizedMask) 140 ID.HasPartiallyOverlappingGroups = true; 141 142 UnitsFromResourceGroups |= NormalizedMask; 143 UsedResourceGroups |= (A.first ^ NormalizedMask); 144 } 145 146 for (unsigned J = I + 1; J < E; ++J) { 147 ResourcePlusCycles &B = Worklist[J]; 148 if ((NormalizedMask & B.first) == NormalizedMask) { 149 B.second.CS.subtract(A.second.size() - SuperResources[A.first]); 150 if (llvm::popcount(B.first) > 1) 151 B.second.NumUnits++; 152 } 153 } 154 } 155 156 // A SchedWrite may specify a number of cycles in which a resource group 157 // is reserved. For example (on target x86; cpu Haswell): 158 // 159 // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> { 160 // let ReleaseAtCycles = [2, 2, 3]; 161 // } 162 // 163 // This means: 164 // Resource units HWPort0 and HWPort1 are both used for 2cy. 165 // Resource group HWPort01 is the union of HWPort0 and HWPort1. 166 // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01 167 // will not be usable for 2 entire cycles from instruction issue. 168 // 169 // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency 170 // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an 171 // extra delay on top of the 2 cycles latency. 172 // During those extra cycles, HWPort01 is not usable by other instructions. 173 for (ResourcePlusCycles &RPC : ID.Resources) { 174 if (llvm::popcount(RPC.first) > 1 && !RPC.second.isReserved()) { 175 // Remove the leading 1 from the resource group mask. 176 uint64_t Mask = RPC.first ^ llvm::bit_floor(RPC.first); 177 uint64_t MaxResourceUnits = llvm::popcount(Mask); 178 if (RPC.second.NumUnits > (unsigned)llvm::popcount(Mask)) { 179 RPC.second.setReserved(); 180 RPC.second.NumUnits = MaxResourceUnits; 181 } 182 } 183 } 184 185 // Identify extra buffers that are consumed through super resources. 186 for (const std::pair<uint64_t, unsigned> &SR : SuperResources) { 187 for (unsigned I = 1, E = NumProcResources; I < E; ++I) { 188 const MCProcResourceDesc &PR = *SM.getProcResource(I); 189 if (PR.BufferSize == -1) 190 continue; 191 192 uint64_t Mask = ProcResourceMasks[I]; 193 if (Mask != SR.first && ((Mask & SR.first) == SR.first)) 194 Buffers.setBit(getResourceStateIndex(Mask)); 195 } 196 } 197 198 ID.UsedBuffers = Buffers.getZExtValue(); 199 ID.UsedProcResUnits = UsedResourceUnits; 200 ID.UsedProcResGroups = UsedResourceGroups; 201 202 LLVM_DEBUG({ 203 for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources) 204 dbgs() << "\t\tResource Mask=" << format_hex(R.first, 16) << ", " 205 << "Reserved=" << R.second.isReserved() << ", " 206 << "#Units=" << R.second.NumUnits << ", " 207 << "cy=" << R.second.size() << '\n'; 208 uint64_t BufferIDs = ID.UsedBuffers; 209 while (BufferIDs) { 210 uint64_t Current = BufferIDs & (-BufferIDs); 211 dbgs() << "\t\tBuffer Mask=" << format_hex(Current, 16) << '\n'; 212 BufferIDs ^= Current; 213 } 214 dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n'; 215 dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16) 216 << '\n'; 217 dbgs() << "\t\tHasPartiallyOverlappingGroups=" 218 << ID.HasPartiallyOverlappingGroups << '\n'; 219 }); 220 } 221 222 static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc, 223 const MCSchedClassDesc &SCDesc, 224 const MCSubtargetInfo &STI, 225 unsigned CallLatency) { 226 if (MCDesc.isCall()) { 227 // We cannot estimate how long this call will take. 228 // Artificially set an arbitrarily high latency. 229 ID.MaxLatency = CallLatency; 230 return; 231 } 232 233 int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc); 234 // If latency is unknown, then conservatively assume the MaxLatency set for 235 // calls. 236 ID.MaxLatency = Latency < 0 ? CallLatency : static_cast<unsigned>(Latency); 237 } 238 239 static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) { 240 // Count register definitions, and skip non register operands in the process. 241 unsigned I, E; 242 unsigned NumExplicitDefs = MCDesc.getNumDefs(); 243 for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) { 244 const MCOperand &Op = MCI.getOperand(I); 245 if (Op.isReg()) 246 --NumExplicitDefs; 247 } 248 249 if (NumExplicitDefs) { 250 return make_error<InstructionError<MCInst>>( 251 "Expected more register operand definitions.", MCI); 252 } 253 254 if (MCDesc.hasOptionalDef()) { 255 // Always assume that the optional definition is the last operand. 256 const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1); 257 if (I == MCI.getNumOperands() || !Op.isReg()) { 258 std::string Message = 259 "expected a register operand for an optional definition. Instruction " 260 "has not been correctly analyzed."; 261 return make_error<InstructionError<MCInst>>(Message, MCI); 262 } 263 } 264 265 return ErrorSuccess(); 266 } 267 268 void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI, 269 unsigned SchedClassID) { 270 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); 271 const MCSchedModel &SM = STI.getSchedModel(); 272 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); 273 274 // Assumptions made by this algorithm: 275 // 1. The number of explicit and implicit register definitions in a MCInst 276 // matches the number of explicit and implicit definitions according to 277 // the opcode descriptor (MCInstrDesc). 278 // 2. Uses start at index #(MCDesc.getNumDefs()). 279 // 3. There can only be a single optional register definition, an it is 280 // either the last operand of the sequence (excluding extra operands 281 // contributed by variadic opcodes) or one of the explicit register 282 // definitions. The latter occurs for some Thumb1 instructions. 283 // 284 // These assumptions work quite well for most out-of-order in-tree targets 285 // like x86. This is mainly because the vast majority of instructions is 286 // expanded to MCInst using a straightforward lowering logic that preserves 287 // the ordering of the operands. 288 // 289 // About assumption 1. 290 // The algorithm allows non-register operands between register operand 291 // definitions. This helps to handle some special ARM instructions with 292 // implicit operand increment (-mtriple=armv7): 293 // 294 // vld1.32 {d18, d19}, [r1]! @ <MCInst #1463 VLD1q32wb_fixed 295 // @ <MCOperand Reg:59> 296 // @ <MCOperand Imm:0> (!!) 297 // @ <MCOperand Reg:67> 298 // @ <MCOperand Imm:0> 299 // @ <MCOperand Imm:14> 300 // @ <MCOperand Reg:0>> 301 // 302 // MCDesc reports: 303 // 6 explicit operands. 304 // 1 optional definition 305 // 2 explicit definitions (!!) 306 // 307 // The presence of an 'Imm' operand between the two register definitions 308 // breaks the assumption that "register definitions are always at the 309 // beginning of the operand sequence". 310 // 311 // To workaround this issue, this algorithm ignores (i.e. skips) any 312 // non-register operands between register definitions. The optional 313 // definition is still at index #(NumOperands-1). 314 // 315 // According to assumption 2. register reads start at #(NumExplicitDefs-1). 316 // That means, register R1 from the example is both read and written. 317 unsigned NumExplicitDefs = MCDesc.getNumDefs(); 318 unsigned NumImplicitDefs = MCDesc.implicit_defs().size(); 319 unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries; 320 unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs; 321 if (MCDesc.hasOptionalDef()) 322 TotalDefs++; 323 324 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); 325 ID.Writes.resize(TotalDefs + NumVariadicOps); 326 // Iterate over the operands list, and skip non-register or constant register 327 // operands. The first NumExplicitDefs register operands are expected to be 328 // register definitions. 329 unsigned CurrentDef = 0; 330 unsigned OptionalDefIdx = MCDesc.getNumOperands() - 1; 331 unsigned i = 0; 332 for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) { 333 const MCOperand &Op = MCI.getOperand(i); 334 if (!Op.isReg()) 335 continue; 336 337 if (MCDesc.operands()[CurrentDef].isOptionalDef()) { 338 OptionalDefIdx = CurrentDef++; 339 continue; 340 } 341 if (MRI.isConstant(Op.getReg())) { 342 CurrentDef++; 343 continue; 344 } 345 346 WriteDescriptor &Write = ID.Writes[CurrentDef]; 347 Write.OpIndex = i; 348 if (CurrentDef < NumWriteLatencyEntries) { 349 const MCWriteLatencyEntry &WLE = 350 *STI.getWriteLatencyEntry(&SCDesc, CurrentDef); 351 // Conservatively default to MaxLatency. 352 Write.Latency = 353 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles); 354 Write.SClassOrWriteResourceID = WLE.WriteResourceID; 355 } else { 356 // Assign a default latency for this write. 357 Write.Latency = ID.MaxLatency; 358 Write.SClassOrWriteResourceID = 0; 359 } 360 Write.IsOptionalDef = false; 361 LLVM_DEBUG({ 362 dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex 363 << ", Latency=" << Write.Latency 364 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 365 }); 366 CurrentDef++; 367 } 368 369 assert(CurrentDef == NumExplicitDefs && 370 "Expected more register operand definitions."); 371 for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) { 372 unsigned Index = NumExplicitDefs + CurrentDef; 373 WriteDescriptor &Write = ID.Writes[Index]; 374 Write.OpIndex = ~CurrentDef; 375 Write.RegisterID = MCDesc.implicit_defs()[CurrentDef]; 376 if (Index < NumWriteLatencyEntries) { 377 const MCWriteLatencyEntry &WLE = 378 *STI.getWriteLatencyEntry(&SCDesc, Index); 379 // Conservatively default to MaxLatency. 380 Write.Latency = 381 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles); 382 Write.SClassOrWriteResourceID = WLE.WriteResourceID; 383 } else { 384 // Assign a default latency for this write. 385 Write.Latency = ID.MaxLatency; 386 Write.SClassOrWriteResourceID = 0; 387 } 388 389 Write.IsOptionalDef = false; 390 assert(Write.RegisterID != 0 && "Expected a valid phys register!"); 391 LLVM_DEBUG({ 392 dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex 393 << ", PhysReg=" << MRI.getName(Write.RegisterID) 394 << ", Latency=" << Write.Latency 395 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 396 }); 397 } 398 399 if (MCDesc.hasOptionalDef()) { 400 WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs]; 401 Write.OpIndex = OptionalDefIdx; 402 // Assign a default latency for this write. 403 Write.Latency = ID.MaxLatency; 404 Write.SClassOrWriteResourceID = 0; 405 Write.IsOptionalDef = true; 406 LLVM_DEBUG({ 407 dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex 408 << ", Latency=" << Write.Latency 409 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 410 }); 411 } 412 413 if (!NumVariadicOps) 414 return; 415 416 bool AssumeUsesOnly = !MCDesc.variadicOpsAreDefs(); 417 CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef(); 418 for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); 419 I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) { 420 const MCOperand &Op = MCI.getOperand(OpIndex); 421 if (!Op.isReg()) 422 continue; 423 if (MRI.isConstant(Op.getReg())) 424 continue; 425 426 WriteDescriptor &Write = ID.Writes[CurrentDef]; 427 Write.OpIndex = OpIndex; 428 // Assign a default latency for this write. 429 Write.Latency = ID.MaxLatency; 430 Write.SClassOrWriteResourceID = 0; 431 Write.IsOptionalDef = false; 432 ++CurrentDef; 433 LLVM_DEBUG({ 434 dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex 435 << ", Latency=" << Write.Latency 436 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 437 }); 438 } 439 440 ID.Writes.resize(CurrentDef); 441 } 442 443 void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI, 444 unsigned SchedClassID) { 445 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); 446 unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs(); 447 unsigned NumImplicitUses = MCDesc.implicit_uses().size(); 448 // Remove the optional definition. 449 if (MCDesc.hasOptionalDef()) 450 --NumExplicitUses; 451 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); 452 unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps; 453 ID.Reads.resize(TotalUses); 454 unsigned CurrentUse = 0; 455 for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses; 456 ++I, ++OpIndex) { 457 const MCOperand &Op = MCI.getOperand(OpIndex); 458 if (!Op.isReg()) 459 continue; 460 if (MRI.isConstant(Op.getReg())) 461 continue; 462 463 ReadDescriptor &Read = ID.Reads[CurrentUse]; 464 Read.OpIndex = OpIndex; 465 Read.UseIndex = I; 466 Read.SchedClassID = SchedClassID; 467 ++CurrentUse; 468 LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex 469 << ", UseIndex=" << Read.UseIndex << '\n'); 470 } 471 472 // For the purpose of ReadAdvance, implicit uses come directly after explicit 473 // uses. The "UseIndex" must be updated according to that implicit layout. 474 for (unsigned I = 0; I < NumImplicitUses; ++I) { 475 ReadDescriptor &Read = ID.Reads[CurrentUse + I]; 476 Read.OpIndex = ~I; 477 Read.UseIndex = NumExplicitUses + I; 478 Read.RegisterID = MCDesc.implicit_uses()[I]; 479 if (MRI.isConstant(Read.RegisterID)) 480 continue; 481 Read.SchedClassID = SchedClassID; 482 LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex 483 << ", UseIndex=" << Read.UseIndex << ", RegisterID=" 484 << MRI.getName(Read.RegisterID) << '\n'); 485 } 486 487 CurrentUse += NumImplicitUses; 488 489 bool AssumeDefsOnly = MCDesc.variadicOpsAreDefs(); 490 for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); 491 I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) { 492 const MCOperand &Op = MCI.getOperand(OpIndex); 493 if (!Op.isReg()) 494 continue; 495 496 ReadDescriptor &Read = ID.Reads[CurrentUse]; 497 Read.OpIndex = OpIndex; 498 Read.UseIndex = NumExplicitUses + NumImplicitUses + I; 499 Read.SchedClassID = SchedClassID; 500 ++CurrentUse; 501 LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex 502 << ", UseIndex=" << Read.UseIndex << '\n'); 503 } 504 505 ID.Reads.resize(CurrentUse); 506 } 507 508 hash_code hashMCOperand(const MCOperand &MCO) { 509 hash_code TypeHash = hash_combine(MCO.isReg(), MCO.isImm(), MCO.isSFPImm(), 510 MCO.isDFPImm(), MCO.isExpr(), MCO.isInst()); 511 if (MCO.isReg()) 512 return hash_combine(TypeHash, MCO.getReg()); 513 514 return TypeHash; 515 } 516 517 hash_code hashMCInst(const MCInst &MCI) { 518 hash_code InstructionHash = hash_combine(MCI.getOpcode(), MCI.getFlags()); 519 for (unsigned I = 0; I < MCI.getNumOperands(); ++I) { 520 InstructionHash = 521 hash_combine(InstructionHash, hashMCOperand(MCI.getOperand(I))); 522 } 523 return InstructionHash; 524 } 525 526 Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID, 527 const MCInst &MCI) const { 528 if (ID.NumMicroOps != 0) 529 return ErrorSuccess(); 530 531 bool UsesBuffers = ID.UsedBuffers; 532 bool UsesResources = !ID.Resources.empty(); 533 if (!UsesBuffers && !UsesResources) 534 return ErrorSuccess(); 535 536 // FIXME: see PR44797. We should revisit these checks and possibly move them 537 // in CodeGenSchedule.cpp. 538 StringRef Message = "found an inconsistent instruction that decodes to zero " 539 "opcodes and that consumes scheduler resources."; 540 return make_error<InstructionError<MCInst>>(std::string(Message), MCI); 541 } 542 543 Expected<unsigned> InstrBuilder::getVariantSchedClassID(const MCInst &MCI, 544 unsigned SchedClassID) { 545 const MCSchedModel &SM = STI.getSchedModel(); 546 unsigned CPUID = SM.getProcessorID(); 547 while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant()) 548 SchedClassID = 549 STI.resolveVariantSchedClass(SchedClassID, &MCI, &MCII, CPUID); 550 551 if (!SchedClassID) { 552 return make_error<InstructionError<MCInst>>( 553 "unable to resolve scheduling class for write variant.", MCI); 554 } 555 556 return SchedClassID; 557 } 558 559 Expected<const InstrDesc &> 560 InstrBuilder::createInstrDescImpl(const MCInst &MCI, 561 const SmallVector<Instrument *> &IVec) { 562 assert(STI.getSchedModel().hasInstrSchedModel() && 563 "Itineraries are not yet supported!"); 564 565 // Obtain the instruction descriptor from the opcode. 566 unsigned short Opcode = MCI.getOpcode(); 567 const MCInstrDesc &MCDesc = MCII.get(Opcode); 568 const MCSchedModel &SM = STI.getSchedModel(); 569 570 // Then obtain the scheduling class information from the instruction. 571 // Allow InstrumentManager to override and use a different SchedClassID 572 unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec); 573 bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant(); 574 575 // Try to solve variant scheduling classes. 576 if (IsVariant) { 577 Expected<unsigned> VariantSchedClassIDOrErr = 578 getVariantSchedClassID(MCI, SchedClassID); 579 if (!VariantSchedClassIDOrErr) { 580 return VariantSchedClassIDOrErr.takeError(); 581 } 582 583 SchedClassID = *VariantSchedClassIDOrErr; 584 } 585 586 // Check if this instruction is supported. Otherwise, report an error. 587 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); 588 if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) { 589 return make_error<InstructionError<MCInst>>( 590 "found an unsupported instruction in the input assembly sequence", MCI); 591 } 592 593 LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n'); 594 LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n'); 595 LLVM_DEBUG(dbgs() << "\t\tOpcode=" << Opcode << '\n'); 596 597 // Create a new empty descriptor. 598 std::unique_ptr<InstrDesc> ID = std::make_unique<InstrDesc>(); 599 ID->NumMicroOps = SCDesc.NumMicroOps; 600 ID->SchedClassID = SchedClassID; 601 602 if (MCDesc.isCall() && FirstCallInst) { 603 // We don't correctly model calls. 604 WithColor::warning() << "found a call in the input assembly sequence.\n"; 605 WithColor::note() << "call instructions are not correctly modeled. " 606 << "Assume a latency of " << CallLatency << "cy.\n"; 607 FirstCallInst = false; 608 } 609 610 if (MCDesc.isReturn() && FirstReturnInst) { 611 WithColor::warning() << "found a return instruction in the input" 612 << " assembly sequence.\n"; 613 WithColor::note() << "program counter updates are ignored.\n"; 614 FirstReturnInst = false; 615 } 616 617 initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks); 618 computeMaxLatency(*ID, MCDesc, SCDesc, STI, CallLatency); 619 620 if (Error Err = verifyOperands(MCDesc, MCI)) 621 return std::move(Err); 622 623 populateWrites(*ID, MCI, SchedClassID); 624 populateReads(*ID, MCI, SchedClassID); 625 626 LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n'); 627 LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n'); 628 629 // Validation check on the instruction descriptor. 630 if (Error Err = verifyInstrDesc(*ID, MCI)) 631 return std::move(Err); 632 633 // Now add the new descriptor. 634 bool IsVariadic = MCDesc.isVariadic(); 635 if ((ID->IsRecyclable = !IsVariadic && !IsVariant)) { 636 auto DKey = std::make_pair(MCI.getOpcode(), SchedClassID); 637 Descriptors[DKey] = std::move(ID); 638 return *Descriptors[DKey]; 639 } 640 641 auto VDKey = std::make_pair(hashMCInst(MCI), SchedClassID); 642 assert( 643 !VariantDescriptors.contains(VDKey) && 644 "Expected VariantDescriptors to not already have a value for this key."); 645 VariantDescriptors[VDKey] = std::move(ID); 646 return *VariantDescriptors[VDKey]; 647 } 648 649 Expected<const InstrDesc &> 650 InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI, 651 const SmallVector<Instrument *> &IVec) { 652 // Cache lookup using SchedClassID from Instrumentation 653 unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec); 654 655 auto DKey = std::make_pair(MCI.getOpcode(), SchedClassID); 656 if (Descriptors.find_as(DKey) != Descriptors.end()) 657 return *Descriptors[DKey]; 658 659 Expected<unsigned> VariantSchedClassIDOrErr = 660 getVariantSchedClassID(MCI, SchedClassID); 661 if (!VariantSchedClassIDOrErr) { 662 return VariantSchedClassIDOrErr.takeError(); 663 } 664 665 SchedClassID = *VariantSchedClassIDOrErr; 666 667 auto VDKey = std::make_pair(hashMCInst(MCI), SchedClassID); 668 if (VariantDescriptors.contains(VDKey)) 669 return *VariantDescriptors[VDKey]; 670 671 return createInstrDescImpl(MCI, IVec); 672 } 673 674 STATISTIC(NumVariantInst, "Number of MCInsts that doesn't have static Desc"); 675 676 Expected<std::unique_ptr<Instruction>> 677 InstrBuilder::createInstruction(const MCInst &MCI, 678 const SmallVector<Instrument *> &IVec) { 679 Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI, IVec); 680 if (!DescOrErr) 681 return DescOrErr.takeError(); 682 const InstrDesc &D = *DescOrErr; 683 Instruction *NewIS = nullptr; 684 std::unique_ptr<Instruction> CreatedIS; 685 bool IsInstRecycled = false; 686 687 if (!D.IsRecyclable) 688 ++NumVariantInst; 689 690 if (D.IsRecyclable && InstRecycleCB) { 691 if (auto *I = InstRecycleCB(D)) { 692 NewIS = I; 693 NewIS->reset(); 694 IsInstRecycled = true; 695 } 696 } 697 if (!IsInstRecycled) { 698 CreatedIS = std::make_unique<Instruction>(D, MCI.getOpcode()); 699 NewIS = CreatedIS.get(); 700 } 701 702 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); 703 const MCSchedClassDesc &SCDesc = 704 *STI.getSchedModel().getSchedClassDesc(D.SchedClassID); 705 706 NewIS->setMayLoad(MCDesc.mayLoad()); 707 NewIS->setMayStore(MCDesc.mayStore()); 708 NewIS->setHasSideEffects(MCDesc.hasUnmodeledSideEffects()); 709 NewIS->setBeginGroup(SCDesc.BeginGroup); 710 NewIS->setEndGroup(SCDesc.EndGroup); 711 NewIS->setRetireOOO(SCDesc.RetireOOO); 712 713 // Check if this is a dependency breaking instruction. 714 APInt Mask; 715 716 bool IsZeroIdiom = false; 717 bool IsDepBreaking = false; 718 if (MCIA) { 719 unsigned ProcID = STI.getSchedModel().getProcessorID(); 720 IsZeroIdiom = MCIA->isZeroIdiom(MCI, Mask, ProcID); 721 IsDepBreaking = 722 IsZeroIdiom || MCIA->isDependencyBreaking(MCI, Mask, ProcID); 723 if (MCIA->isOptimizableRegisterMove(MCI, ProcID)) 724 NewIS->setOptimizableMove(); 725 } 726 727 // Initialize Reads first. 728 MCPhysReg RegID = 0; 729 size_t Idx = 0U; 730 for (const ReadDescriptor &RD : D.Reads) { 731 if (!RD.isImplicitRead()) { 732 // explicit read. 733 const MCOperand &Op = MCI.getOperand(RD.OpIndex); 734 // Skip non-register operands. 735 if (!Op.isReg()) 736 continue; 737 RegID = Op.getReg(); 738 } else { 739 // Implicit read. 740 RegID = RD.RegisterID; 741 } 742 743 // Skip invalid register operands. 744 if (!RegID) 745 continue; 746 747 // Okay, this is a register operand. Create a ReadState for it. 748 ReadState *RS = nullptr; 749 if (IsInstRecycled && Idx < NewIS->getUses().size()) { 750 NewIS->getUses()[Idx] = ReadState(RD, RegID); 751 RS = &NewIS->getUses()[Idx++]; 752 } else { 753 NewIS->getUses().emplace_back(RD, RegID); 754 RS = &NewIS->getUses().back(); 755 ++Idx; 756 } 757 758 if (IsDepBreaking) { 759 // A mask of all zeroes means: explicit input operands are not 760 // independent. 761 if (Mask.isZero()) { 762 if (!RD.isImplicitRead()) 763 RS->setIndependentFromDef(); 764 } else { 765 // Check if this register operand is independent according to `Mask`. 766 // Note that Mask may not have enough bits to describe all explicit and 767 // implicit input operands. If this register operand doesn't have a 768 // corresponding bit in Mask, then conservatively assume that it is 769 // dependent. 770 if (Mask.getBitWidth() > RD.UseIndex) { 771 // Okay. This map describe register use `RD.UseIndex`. 772 if (Mask[RD.UseIndex]) 773 RS->setIndependentFromDef(); 774 } 775 } 776 } 777 } 778 if (IsInstRecycled && Idx < NewIS->getUses().size()) 779 NewIS->getUses().pop_back_n(NewIS->getUses().size() - Idx); 780 781 // Early exit if there are no writes. 782 if (D.Writes.empty()) { 783 if (IsInstRecycled) 784 return llvm::make_error<RecycledInstErr>(NewIS); 785 else 786 return std::move(CreatedIS); 787 } 788 789 // Track register writes that implicitly clear the upper portion of the 790 // underlying super-registers using an APInt. 791 APInt WriteMask(D.Writes.size(), 0); 792 793 // Now query the MCInstrAnalysis object to obtain information about which 794 // register writes implicitly clear the upper portion of a super-register. 795 if (MCIA) 796 MCIA->clearsSuperRegisters(MRI, MCI, WriteMask); 797 798 // Initialize writes. 799 unsigned WriteIndex = 0; 800 Idx = 0U; 801 for (const WriteDescriptor &WD : D.Writes) { 802 RegID = WD.isImplicitWrite() ? WD.RegisterID 803 : MCI.getOperand(WD.OpIndex).getReg(); 804 // Check if this is a optional definition that references NoReg or a write 805 // to a constant register. 806 if ((WD.IsOptionalDef && !RegID) || MRI.isConstant(RegID)) { 807 ++WriteIndex; 808 continue; 809 } 810 811 assert(RegID && "Expected a valid register ID!"); 812 if (IsInstRecycled && Idx < NewIS->getDefs().size()) { 813 NewIS->getDefs()[Idx++] = 814 WriteState(WD, RegID, 815 /* ClearsSuperRegs */ WriteMask[WriteIndex], 816 /* WritesZero */ IsZeroIdiom); 817 } else { 818 NewIS->getDefs().emplace_back(WD, RegID, 819 /* ClearsSuperRegs */ WriteMask[WriteIndex], 820 /* WritesZero */ IsZeroIdiom); 821 ++Idx; 822 } 823 ++WriteIndex; 824 } 825 if (IsInstRecycled && Idx < NewIS->getDefs().size()) 826 NewIS->getDefs().pop_back_n(NewIS->getDefs().size() - Idx); 827 828 if (IsInstRecycled) 829 return llvm::make_error<RecycledInstErr>(NewIS); 830 else 831 return std::move(CreatedIS); 832 } 833 } // namespace mca 834 } // namespace llvm 835