xref: /freebsd/contrib/llvm-project/llvm/lib/MCA/InstrBuilder.cpp (revision 53071ed1c96db7f89defc99c95b0ad1031d48f45)
1 //===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file implements the InstrBuilder interface.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/MCA/InstrBuilder.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/MC/MCInst.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/WithColor.h"
20 #include "llvm/Support/raw_ostream.h"
21 
22 #define DEBUG_TYPE "llvm-mca"
23 
24 namespace llvm {
25 namespace mca {
26 
27 InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti,
28                            const llvm::MCInstrInfo &mcii,
29                            const llvm::MCRegisterInfo &mri,
30                            const llvm::MCInstrAnalysis *mcia)
31     : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), FirstCallInst(true),
32       FirstReturnInst(true) {
33   const MCSchedModel &SM = STI.getSchedModel();
34   ProcResourceMasks.resize(SM.getNumProcResourceKinds());
35   computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks);
36 }
37 
38 static void initializeUsedResources(InstrDesc &ID,
39                                     const MCSchedClassDesc &SCDesc,
40                                     const MCSubtargetInfo &STI,
41                                     ArrayRef<uint64_t> ProcResourceMasks) {
42   const MCSchedModel &SM = STI.getSchedModel();
43 
44   // Populate resources consumed.
45   using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
46   std::vector<ResourcePlusCycles> Worklist;
47 
48   // Track cycles contributed by resources that are in a "Super" relationship.
49   // This is required if we want to correctly match the behavior of method
50   // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set
51   // of "consumed" processor resources and resource cycles, the logic in
52   // ExpandProcResource() doesn't update the number of resource cycles
53   // contributed by a "Super" resource to a group.
54   // We need to take this into account when we find that a processor resource is
55   // part of a group, and it is also used as the "Super" of other resources.
56   // This map stores the number of cycles contributed by sub-resources that are
57   // part of a "Super" resource. The key value is the "Super" resource mask ID.
58   DenseMap<uint64_t, unsigned> SuperResources;
59 
60   unsigned NumProcResources = SM.getNumProcResourceKinds();
61   APInt Buffers(NumProcResources, 0);
62 
63   bool AllInOrderResources = true;
64   bool AnyDispatchHazards = false;
65   for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) {
66     const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I;
67     const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx);
68     if (!PRE->Cycles) {
69 #ifndef NDEBUG
70       WithColor::warning()
71           << "Ignoring invalid write of zero cycles on processor resource "
72           << PR.Name << "\n";
73       WithColor::note() << "found in scheduling class " << SCDesc.Name
74                         << " (write index #" << I << ")\n";
75 #endif
76       continue;
77     }
78 
79     uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx];
80     if (PR.BufferSize < 0) {
81       AllInOrderResources = false;
82     } else {
83       Buffers.setBit(PRE->ProcResourceIdx);
84       AnyDispatchHazards |= (PR.BufferSize == 0);
85       AllInOrderResources &= (PR.BufferSize <= 1);
86     }
87 
88     CycleSegment RCy(0, PRE->Cycles, false);
89     Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy)));
90     if (PR.SuperIdx) {
91       uint64_t Super = ProcResourceMasks[PR.SuperIdx];
92       SuperResources[Super] += PRE->Cycles;
93     }
94   }
95 
96   ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards;
97 
98   // Sort elements by mask popcount, so that we prioritize resource units over
99   // resource groups, and smaller groups over larger groups.
100   sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) {
101     unsigned popcntA = countPopulation(A.first);
102     unsigned popcntB = countPopulation(B.first);
103     if (popcntA < popcntB)
104       return true;
105     if (popcntA > popcntB)
106       return false;
107     return A.first < B.first;
108   });
109 
110   uint64_t UsedResourceUnits = 0;
111   uint64_t UsedResourceGroups = 0;
112 
113   // Remove cycles contributed by smaller resources.
114   for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
115     ResourcePlusCycles &A = Worklist[I];
116     if (!A.second.size()) {
117       assert(countPopulation(A.first) > 1 && "Expected a group!");
118       UsedResourceGroups |= PowerOf2Floor(A.first);
119       continue;
120     }
121 
122     ID.Resources.emplace_back(A);
123     uint64_t NormalizedMask = A.first;
124     if (countPopulation(A.first) == 1) {
125       UsedResourceUnits |= A.first;
126     } else {
127       // Remove the leading 1 from the resource group mask.
128       NormalizedMask ^= PowerOf2Floor(NormalizedMask);
129       UsedResourceGroups |= (A.first ^ NormalizedMask);
130     }
131 
132     for (unsigned J = I + 1; J < E; ++J) {
133       ResourcePlusCycles &B = Worklist[J];
134       if ((NormalizedMask & B.first) == NormalizedMask) {
135         B.second.CS.subtract(A.second.size() - SuperResources[A.first]);
136         if (countPopulation(B.first) > 1)
137           B.second.NumUnits++;
138       }
139     }
140   }
141 
142   ID.UsedProcResUnits = UsedResourceUnits;
143   ID.UsedProcResGroups = UsedResourceGroups;
144 
145   // A SchedWrite may specify a number of cycles in which a resource group
146   // is reserved. For example (on target x86; cpu Haswell):
147   //
148   //  SchedWriteRes<[HWPort0, HWPort1, HWPort01]> {
149   //    let ResourceCycles = [2, 2, 3];
150   //  }
151   //
152   // This means:
153   // Resource units HWPort0 and HWPort1 are both used for 2cy.
154   // Resource group HWPort01 is the union of HWPort0 and HWPort1.
155   // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01
156   // will not be usable for 2 entire cycles from instruction issue.
157   //
158   // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency
159   // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an
160   // extra delay on top of the 2 cycles latency.
161   // During those extra cycles, HWPort01 is not usable by other instructions.
162   for (ResourcePlusCycles &RPC : ID.Resources) {
163     if (countPopulation(RPC.first) > 1 && !RPC.second.isReserved()) {
164       // Remove the leading 1 from the resource group mask.
165       uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first);
166       if ((Mask & UsedResourceUnits) == Mask)
167         RPC.second.setReserved();
168     }
169   }
170 
171   // Identify extra buffers that are consumed through super resources.
172   for (const std::pair<uint64_t, unsigned> &SR : SuperResources) {
173     for (unsigned I = 1, E = NumProcResources; I < E; ++I) {
174       const MCProcResourceDesc &PR = *SM.getProcResource(I);
175       if (PR.BufferSize == -1)
176         continue;
177 
178       uint64_t Mask = ProcResourceMasks[I];
179       if (Mask != SR.first && ((Mask & SR.first) == SR.first))
180         Buffers.setBit(I);
181     }
182   }
183 
184   // Now set the buffers.
185   if (unsigned NumBuffers = Buffers.countPopulation()) {
186     ID.Buffers.resize(NumBuffers);
187     for (unsigned I = 0, E = NumProcResources; I < E && NumBuffers; ++I) {
188       if (Buffers[I]) {
189         --NumBuffers;
190         ID.Buffers[NumBuffers] = ProcResourceMasks[I];
191       }
192     }
193   }
194 
195   LLVM_DEBUG({
196     for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)
197       dbgs() << "\t\tResource Mask=" << format_hex(R.first, 16) << ", "
198              << "Reserved=" << R.second.isReserved() << ", "
199              << "#Units=" << R.second.NumUnits << ", "
200              << "cy=" << R.second.size() << '\n';
201     for (const uint64_t R : ID.Buffers)
202       dbgs() << "\t\tBuffer Mask=" << format_hex(R, 16) << '\n';
203     dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n';
204     dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16)
205            << '\n';
206   });
207 }
208 
209 static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc,
210                               const MCSchedClassDesc &SCDesc,
211                               const MCSubtargetInfo &STI) {
212   if (MCDesc.isCall()) {
213     // We cannot estimate how long this call will take.
214     // Artificially set an arbitrarily high latency (100cy).
215     ID.MaxLatency = 100U;
216     return;
217   }
218 
219   int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);
220   // If latency is unknown, then conservatively assume a MaxLatency of 100cy.
221   ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency);
222 }
223 
224 static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) {
225   // Count register definitions, and skip non register operands in the process.
226   unsigned I, E;
227   unsigned NumExplicitDefs = MCDesc.getNumDefs();
228   for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) {
229     const MCOperand &Op = MCI.getOperand(I);
230     if (Op.isReg())
231       --NumExplicitDefs;
232   }
233 
234   if (NumExplicitDefs) {
235     return make_error<InstructionError<MCInst>>(
236         "Expected more register operand definitions.", MCI);
237   }
238 
239   if (MCDesc.hasOptionalDef()) {
240     // Always assume that the optional definition is the last operand.
241     const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1);
242     if (I == MCI.getNumOperands() || !Op.isReg()) {
243       std::string Message =
244           "expected a register operand for an optional definition. Instruction "
245           "has not been correctly analyzed.";
246       return make_error<InstructionError<MCInst>>(Message, MCI);
247     }
248   }
249 
250   return ErrorSuccess();
251 }
252 
253 void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
254                                   unsigned SchedClassID) {
255   const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
256   const MCSchedModel &SM = STI.getSchedModel();
257   const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
258 
259   // Assumptions made by this algorithm:
260   //  1. The number of explicit and implicit register definitions in a MCInst
261   //     matches the number of explicit and implicit definitions according to
262   //     the opcode descriptor (MCInstrDesc).
263   //  2. Uses start at index #(MCDesc.getNumDefs()).
264   //  3. There can only be a single optional register definition, an it is
265   //     always the last operand of the sequence (excluding extra operands
266   //     contributed by variadic opcodes).
267   //
268   // These assumptions work quite well for most out-of-order in-tree targets
269   // like x86. This is mainly because the vast majority of instructions is
270   // expanded to MCInst using a straightforward lowering logic that preserves
271   // the ordering of the operands.
272   //
273   // About assumption 1.
274   // The algorithm allows non-register operands between register operand
275   // definitions. This helps to handle some special ARM instructions with
276   // implicit operand increment (-mtriple=armv7):
277   //
278   // vld1.32  {d18, d19}, [r1]!  @ <MCInst #1463 VLD1q32wb_fixed
279   //                             @  <MCOperand Reg:59>
280   //                             @  <MCOperand Imm:0>     (!!)
281   //                             @  <MCOperand Reg:67>
282   //                             @  <MCOperand Imm:0>
283   //                             @  <MCOperand Imm:14>
284   //                             @  <MCOperand Reg:0>>
285   //
286   // MCDesc reports:
287   //  6 explicit operands.
288   //  1 optional definition
289   //  2 explicit definitions (!!)
290   //
291   // The presence of an 'Imm' operand between the two register definitions
292   // breaks the assumption that "register definitions are always at the
293   // beginning of the operand sequence".
294   //
295   // To workaround this issue, this algorithm ignores (i.e. skips) any
296   // non-register operands between register definitions.  The optional
297   // definition is still at index #(NumOperands-1).
298   //
299   // According to assumption 2. register reads start at #(NumExplicitDefs-1).
300   // That means, register R1 from the example is both read and written.
301   unsigned NumExplicitDefs = MCDesc.getNumDefs();
302   unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs();
303   unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries;
304   unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
305   if (MCDesc.hasOptionalDef())
306     TotalDefs++;
307 
308   unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
309   ID.Writes.resize(TotalDefs + NumVariadicOps);
310   // Iterate over the operands list, and skip non-register operands.
311   // The first NumExplictDefs register operands are expected to be register
312   // definitions.
313   unsigned CurrentDef = 0;
314   unsigned i = 0;
315   for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) {
316     const MCOperand &Op = MCI.getOperand(i);
317     if (!Op.isReg())
318       continue;
319 
320     WriteDescriptor &Write = ID.Writes[CurrentDef];
321     Write.OpIndex = i;
322     if (CurrentDef < NumWriteLatencyEntries) {
323       const MCWriteLatencyEntry &WLE =
324           *STI.getWriteLatencyEntry(&SCDesc, CurrentDef);
325       // Conservatively default to MaxLatency.
326       Write.Latency =
327           WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
328       Write.SClassOrWriteResourceID = WLE.WriteResourceID;
329     } else {
330       // Assign a default latency for this write.
331       Write.Latency = ID.MaxLatency;
332       Write.SClassOrWriteResourceID = 0;
333     }
334     Write.IsOptionalDef = false;
335     LLVM_DEBUG({
336       dbgs() << "\t\t[Def]    OpIdx=" << Write.OpIndex
337              << ", Latency=" << Write.Latency
338              << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
339     });
340     CurrentDef++;
341   }
342 
343   assert(CurrentDef == NumExplicitDefs &&
344          "Expected more register operand definitions.");
345   for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {
346     unsigned Index = NumExplicitDefs + CurrentDef;
347     WriteDescriptor &Write = ID.Writes[Index];
348     Write.OpIndex = ~CurrentDef;
349     Write.RegisterID = MCDesc.getImplicitDefs()[CurrentDef];
350     if (Index < NumWriteLatencyEntries) {
351       const MCWriteLatencyEntry &WLE =
352           *STI.getWriteLatencyEntry(&SCDesc, Index);
353       // Conservatively default to MaxLatency.
354       Write.Latency =
355           WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
356       Write.SClassOrWriteResourceID = WLE.WriteResourceID;
357     } else {
358       // Assign a default latency for this write.
359       Write.Latency = ID.MaxLatency;
360       Write.SClassOrWriteResourceID = 0;
361     }
362 
363     Write.IsOptionalDef = false;
364     assert(Write.RegisterID != 0 && "Expected a valid phys register!");
365     LLVM_DEBUG({
366       dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex
367              << ", PhysReg=" << MRI.getName(Write.RegisterID)
368              << ", Latency=" << Write.Latency
369              << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
370     });
371   }
372 
373   if (MCDesc.hasOptionalDef()) {
374     WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs];
375     Write.OpIndex = MCDesc.getNumOperands() - 1;
376     // Assign a default latency for this write.
377     Write.Latency = ID.MaxLatency;
378     Write.SClassOrWriteResourceID = 0;
379     Write.IsOptionalDef = true;
380     LLVM_DEBUG({
381       dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex
382              << ", Latency=" << Write.Latency
383              << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
384     });
385   }
386 
387   if (!NumVariadicOps)
388     return;
389 
390   // FIXME: if an instruction opcode is flagged 'mayStore', and it has no
391   // "unmodeledSideEffects', then this logic optimistically assumes that any
392   // extra register operands in the variadic sequence is not a register
393   // definition.
394   //
395   // Otherwise, we conservatively assume that any register operand from the
396   // variadic sequence is both a register read and a register write.
397   bool AssumeUsesOnly = MCDesc.mayStore() && !MCDesc.mayLoad() &&
398                         !MCDesc.hasUnmodeledSideEffects();
399   CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef();
400   for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
401        I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) {
402     const MCOperand &Op = MCI.getOperand(OpIndex);
403     if (!Op.isReg())
404       continue;
405 
406     WriteDescriptor &Write = ID.Writes[CurrentDef];
407     Write.OpIndex = OpIndex;
408     // Assign a default latency for this write.
409     Write.Latency = ID.MaxLatency;
410     Write.SClassOrWriteResourceID = 0;
411     Write.IsOptionalDef = false;
412     ++CurrentDef;
413     LLVM_DEBUG({
414       dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex
415              << ", Latency=" << Write.Latency
416              << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
417     });
418   }
419 
420   ID.Writes.resize(CurrentDef);
421 }
422 
423 void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
424                                  unsigned SchedClassID) {
425   const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
426   unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs();
427   unsigned NumImplicitUses = MCDesc.getNumImplicitUses();
428   // Remove the optional definition.
429   if (MCDesc.hasOptionalDef())
430     --NumExplicitUses;
431   unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
432   unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps;
433   ID.Reads.resize(TotalUses);
434   unsigned CurrentUse = 0;
435   for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses;
436        ++I, ++OpIndex) {
437     const MCOperand &Op = MCI.getOperand(OpIndex);
438     if (!Op.isReg())
439       continue;
440 
441     ReadDescriptor &Read = ID.Reads[CurrentUse];
442     Read.OpIndex = OpIndex;
443     Read.UseIndex = I;
444     Read.SchedClassID = SchedClassID;
445     ++CurrentUse;
446     LLVM_DEBUG(dbgs() << "\t\t[Use]    OpIdx=" << Read.OpIndex
447                       << ", UseIndex=" << Read.UseIndex << '\n');
448   }
449 
450   // For the purpose of ReadAdvance, implicit uses come directly after explicit
451   // uses. The "UseIndex" must be updated according to that implicit layout.
452   for (unsigned I = 0; I < NumImplicitUses; ++I) {
453     ReadDescriptor &Read = ID.Reads[CurrentUse + I];
454     Read.OpIndex = ~I;
455     Read.UseIndex = NumExplicitUses + I;
456     Read.RegisterID = MCDesc.getImplicitUses()[I];
457     Read.SchedClassID = SchedClassID;
458     LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex
459                       << ", UseIndex=" << Read.UseIndex << ", RegisterID="
460                       << MRI.getName(Read.RegisterID) << '\n');
461   }
462 
463   CurrentUse += NumImplicitUses;
464 
465   // FIXME: If an instruction opcode is marked as 'mayLoad', and it has no
466   // "unmodeledSideEffects", then this logic optimistically assumes that any
467   // extra register operands in the variadic sequence are not register
468   // definition.
469 
470   bool AssumeDefsOnly = !MCDesc.mayStore() && MCDesc.mayLoad() &&
471                         !MCDesc.hasUnmodeledSideEffects();
472   for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
473        I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) {
474     const MCOperand &Op = MCI.getOperand(OpIndex);
475     if (!Op.isReg())
476       continue;
477 
478     ReadDescriptor &Read = ID.Reads[CurrentUse];
479     Read.OpIndex = OpIndex;
480     Read.UseIndex = NumExplicitUses + NumImplicitUses + I;
481     Read.SchedClassID = SchedClassID;
482     ++CurrentUse;
483     LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex
484                       << ", UseIndex=" << Read.UseIndex << '\n');
485   }
486 
487   ID.Reads.resize(CurrentUse);
488 }
489 
490 Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID,
491                                     const MCInst &MCI) const {
492   if (ID.NumMicroOps != 0)
493     return ErrorSuccess();
494 
495   bool UsesMemory = ID.MayLoad || ID.MayStore;
496   bool UsesBuffers = !ID.Buffers.empty();
497   bool UsesResources = !ID.Resources.empty();
498   if (!UsesMemory && !UsesBuffers && !UsesResources)
499     return ErrorSuccess();
500 
501   StringRef Message;
502   if (UsesMemory) {
503     Message = "found an inconsistent instruction that decodes "
504               "into zero opcodes and that consumes load/store "
505               "unit resources.";
506   } else {
507     Message = "found an inconsistent instruction that decodes "
508               "to zero opcodes and that consumes scheduler "
509               "resources.";
510   }
511 
512   return make_error<InstructionError<MCInst>>(Message, MCI);
513 }
514 
515 Expected<const InstrDesc &>
516 InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
517   assert(STI.getSchedModel().hasInstrSchedModel() &&
518          "Itineraries are not yet supported!");
519 
520   // Obtain the instruction descriptor from the opcode.
521   unsigned short Opcode = MCI.getOpcode();
522   const MCInstrDesc &MCDesc = MCII.get(Opcode);
523   const MCSchedModel &SM = STI.getSchedModel();
524 
525   // Then obtain the scheduling class information from the instruction.
526   unsigned SchedClassID = MCDesc.getSchedClass();
527   bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant();
528 
529   // Try to solve variant scheduling classes.
530   if (IsVariant) {
531     unsigned CPUID = SM.getProcessorID();
532     while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
533       SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, CPUID);
534 
535     if (!SchedClassID) {
536       return make_error<InstructionError<MCInst>>(
537           "unable to resolve scheduling class for write variant.", MCI);
538     }
539   }
540 
541   // Check if this instruction is supported. Otherwise, report an error.
542   const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
543   if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) {
544     return make_error<InstructionError<MCInst>>(
545         "found an unsupported instruction in the input assembly sequence.",
546         MCI);
547   }
548 
549   LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n');
550   LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n');
551 
552   // Create a new empty descriptor.
553   std::unique_ptr<InstrDesc> ID = llvm::make_unique<InstrDesc>();
554   ID->NumMicroOps = SCDesc.NumMicroOps;
555   ID->SchedClassID = SchedClassID;
556 
557   if (MCDesc.isCall() && FirstCallInst) {
558     // We don't correctly model calls.
559     WithColor::warning() << "found a call in the input assembly sequence.\n";
560     WithColor::note() << "call instructions are not correctly modeled. "
561                       << "Assume a latency of 100cy.\n";
562     FirstCallInst = false;
563   }
564 
565   if (MCDesc.isReturn() && FirstReturnInst) {
566     WithColor::warning() << "found a return instruction in the input"
567                          << " assembly sequence.\n";
568     WithColor::note() << "program counter updates are ignored.\n";
569     FirstReturnInst = false;
570   }
571 
572   ID->MayLoad = MCDesc.mayLoad();
573   ID->MayStore = MCDesc.mayStore();
574   ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects();
575   ID->BeginGroup = SCDesc.BeginGroup;
576   ID->EndGroup = SCDesc.EndGroup;
577 
578   initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
579   computeMaxLatency(*ID, MCDesc, SCDesc, STI);
580 
581   if (Error Err = verifyOperands(MCDesc, MCI))
582     return std::move(Err);
583 
584   populateWrites(*ID, MCI, SchedClassID);
585   populateReads(*ID, MCI, SchedClassID);
586 
587   LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
588   LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
589 
590   // Sanity check on the instruction descriptor.
591   if (Error Err = verifyInstrDesc(*ID, MCI))
592     return std::move(Err);
593 
594   // Now add the new descriptor.
595   bool IsVariadic = MCDesc.isVariadic();
596   if (!IsVariadic && !IsVariant) {
597     Descriptors[MCI.getOpcode()] = std::move(ID);
598     return *Descriptors[MCI.getOpcode()];
599   }
600 
601   VariantDescriptors[&MCI] = std::move(ID);
602   return *VariantDescriptors[&MCI];
603 }
604 
605 Expected<const InstrDesc &>
606 InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) {
607   if (Descriptors.find_as(MCI.getOpcode()) != Descriptors.end())
608     return *Descriptors[MCI.getOpcode()];
609 
610   if (VariantDescriptors.find(&MCI) != VariantDescriptors.end())
611     return *VariantDescriptors[&MCI];
612 
613   return createInstrDescImpl(MCI);
614 }
615 
616 Expected<std::unique_ptr<Instruction>>
617 InstrBuilder::createInstruction(const MCInst &MCI) {
618   Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI);
619   if (!DescOrErr)
620     return DescOrErr.takeError();
621   const InstrDesc &D = *DescOrErr;
622   std::unique_ptr<Instruction> NewIS = llvm::make_unique<Instruction>(D);
623 
624   // Check if this is a dependency breaking instruction.
625   APInt Mask;
626 
627   bool IsZeroIdiom = false;
628   bool IsDepBreaking = false;
629   if (MCIA) {
630     unsigned ProcID = STI.getSchedModel().getProcessorID();
631     IsZeroIdiom = MCIA->isZeroIdiom(MCI, Mask, ProcID);
632     IsDepBreaking =
633         IsZeroIdiom || MCIA->isDependencyBreaking(MCI, Mask, ProcID);
634     if (MCIA->isOptimizableRegisterMove(MCI, ProcID))
635       NewIS->setOptimizableMove();
636   }
637 
638   // Initialize Reads first.
639   for (const ReadDescriptor &RD : D.Reads) {
640     int RegID = -1;
641     if (!RD.isImplicitRead()) {
642       // explicit read.
643       const MCOperand &Op = MCI.getOperand(RD.OpIndex);
644       // Skip non-register operands.
645       if (!Op.isReg())
646         continue;
647       RegID = Op.getReg();
648     } else {
649       // Implicit read.
650       RegID = RD.RegisterID;
651     }
652 
653     // Skip invalid register operands.
654     if (!RegID)
655       continue;
656 
657     // Okay, this is a register operand. Create a ReadState for it.
658     assert(RegID > 0 && "Invalid register ID found!");
659     NewIS->getUses().emplace_back(RD, RegID);
660     ReadState &RS = NewIS->getUses().back();
661 
662     if (IsDepBreaking) {
663       // A mask of all zeroes means: explicit input operands are not
664       // independent.
665       if (Mask.isNullValue()) {
666         if (!RD.isImplicitRead())
667           RS.setIndependentFromDef();
668       } else {
669         // Check if this register operand is independent according to `Mask`.
670         // Note that Mask may not have enough bits to describe all explicit and
671         // implicit input operands. If this register operand doesn't have a
672         // corresponding bit in Mask, then conservatively assume that it is
673         // dependent.
674         if (Mask.getBitWidth() > RD.UseIndex) {
675           // Okay. This map describe register use `RD.UseIndex`.
676           if (Mask[RD.UseIndex])
677             RS.setIndependentFromDef();
678         }
679       }
680     }
681   }
682 
683   // Early exit if there are no writes.
684   if (D.Writes.empty())
685     return std::move(NewIS);
686 
687   // Track register writes that implicitly clear the upper portion of the
688   // underlying super-registers using an APInt.
689   APInt WriteMask(D.Writes.size(), 0);
690 
691   // Now query the MCInstrAnalysis object to obtain information about which
692   // register writes implicitly clear the upper portion of a super-register.
693   if (MCIA)
694     MCIA->clearsSuperRegisters(MRI, MCI, WriteMask);
695 
696   // Initialize writes.
697   unsigned WriteIndex = 0;
698   for (const WriteDescriptor &WD : D.Writes) {
699     unsigned RegID = WD.isImplicitWrite() ? WD.RegisterID
700                                           : MCI.getOperand(WD.OpIndex).getReg();
701     // Check if this is a optional definition that references NoReg.
702     if (WD.IsOptionalDef && !RegID) {
703       ++WriteIndex;
704       continue;
705     }
706 
707     assert(RegID && "Expected a valid register ID!");
708     NewIS->getDefs().emplace_back(WD, RegID,
709                                   /* ClearsSuperRegs */ WriteMask[WriteIndex],
710                                   /* WritesZero */ IsZeroIdiom);
711     ++WriteIndex;
712   }
713 
714   return std::move(NewIS);
715 }
716 } // namespace mca
717 } // namespace llvm
718