xref: /freebsd/contrib/llvm-project/llvm/utils/TableGen/CodeEmitterGen.cpp (revision be092bcde96bdcfde9013d60e442cca023bfbd1b)
1  //===- CodeEmitterGen.cpp - Code Emitter Generator ------------------------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // CodeEmitterGen uses the descriptions of instructions and their fields to
10  // construct an automated code emitter: a function that, given a MachineInstr,
11  // returns the (currently, 32-bit unsigned) value of the instruction.
12  //
13  //===----------------------------------------------------------------------===//
14  
15  #include "CodeGenInstruction.h"
16  #include "CodeGenTarget.h"
17  #include "SubtargetFeatureInfo.h"
18  #include "Types.h"
19  #include "VarLenCodeEmitterGen.h"
20  #include "llvm/ADT/APInt.h"
21  #include "llvm/ADT/ArrayRef.h"
22  #include "llvm/ADT/StringExtras.h"
23  #include "llvm/Support/Casting.h"
24  #include "llvm/Support/raw_ostream.h"
25  #include "llvm/TableGen/Error.h"
26  #include "llvm/TableGen/Record.h"
27  #include "llvm/TableGen/TableGenBackend.h"
28  #include <cstdint>
29  #include <map>
30  #include <set>
31  #include <string>
32  #include <utility>
33  #include <vector>
34  
35  using namespace llvm;
36  
37  namespace {
38  
39  class CodeEmitterGen {
40    RecordKeeper &Records;
41  
42  public:
43    CodeEmitterGen(RecordKeeper &R) : Records(R) {}
44  
45    void run(raw_ostream &o);
46  
47  private:
48    int getVariableBit(const std::string &VarName, BitsInit *BI, int bit);
49    std::string getInstructionCase(Record *R, CodeGenTarget &Target);
50    std::string getInstructionCaseForEncoding(Record *R, Record *EncodingDef,
51                                              CodeGenTarget &Target);
52    bool addCodeToMergeInOperand(Record *R, BitsInit *BI,
53                                 const std::string &VarName, unsigned &NumberedOp,
54                                 std::set<unsigned> &NamedOpIndices,
55                                 std::string &Case, CodeGenTarget &Target);
56  
57    void emitInstructionBaseValues(
58        raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
59        CodeGenTarget &Target, int HwMode = -1);
60    unsigned BitWidth;
61    bool UseAPInt;
62  };
63  
64  // If the VarBitInit at position 'bit' matches the specified variable then
65  // return the variable bit position.  Otherwise return -1.
66  int CodeEmitterGen::getVariableBit(const std::string &VarName,
67                                     BitsInit *BI, int bit) {
68    if (VarBitInit *VBI = dyn_cast<VarBitInit>(BI->getBit(bit))) {
69      if (VarInit *VI = dyn_cast<VarInit>(VBI->getBitVar()))
70        if (VI->getName() == VarName)
71          return VBI->getBitNum();
72    } else if (VarInit *VI = dyn_cast<VarInit>(BI->getBit(bit))) {
73      if (VI->getName() == VarName)
74        return 0;
75    }
76  
77    return -1;
78  }
79  
80  // Returns true if it succeeds, false if an error.
81  bool CodeEmitterGen::addCodeToMergeInOperand(Record *R, BitsInit *BI,
82                                               const std::string &VarName,
83                                               unsigned &NumberedOp,
84                                               std::set<unsigned> &NamedOpIndices,
85                                               std::string &Case,
86                                               CodeGenTarget &Target) {
87    CodeGenInstruction &CGI = Target.getInstruction(R);
88  
89    // Determine if VarName actually contributes to the Inst encoding.
90    int bit = BI->getNumBits()-1;
91  
92    // Scan for a bit that this contributed to.
93    for (; bit >= 0; ) {
94      if (getVariableBit(VarName, BI, bit) != -1)
95        break;
96  
97      --bit;
98    }
99  
100    // If we found no bits, ignore this value, otherwise emit the call to get the
101    // operand encoding.
102    if (bit < 0)
103      return true;
104  
105    // If the operand matches by name, reference according to that
106    // operand number. Non-matching operands are assumed to be in
107    // order.
108    unsigned OpIdx;
109    std::pair<unsigned, unsigned> SubOp;
110    if (CGI.Operands.hasSubOperandAlias(VarName, SubOp)) {
111      OpIdx = CGI.Operands[SubOp.first].MIOperandNo + SubOp.second;
112    } else if (CGI.Operands.hasOperandNamed(VarName, OpIdx)) {
113      // Get the machine operand number for the indicated operand.
114      OpIdx = CGI.Operands[OpIdx].MIOperandNo;
115    } else {
116      // Fall back to positional lookup. By default, we now disable positional
117      // lookup (and print an error, below), but even so, we'll do the lookup to
118      // help print a helpful diagnostic message.
119      //
120      // TODO: When we remove useDeprecatedPositionallyEncodedOperands, delete all
121      // this code, just leaving a "no operand named X in record Y" error.
122  
123      unsigned NumberOps = CGI.Operands.size();
124      /// If this operand is not supposed to be emitted by the
125      /// generated emitter, skip it.
126      while (NumberedOp < NumberOps &&
127             (CGI.Operands.isFlatOperandNotEmitted(NumberedOp) ||
128                (!NamedOpIndices.empty() && NamedOpIndices.count(
129                  CGI.Operands.getSubOperandNumber(NumberedOp).first)))) {
130        ++NumberedOp;
131      }
132  
133      if (NumberedOp >=
134          CGI.Operands.back().MIOperandNo + CGI.Operands.back().MINumOperands) {
135        if (!Target.getInstructionSet()->getValueAsBit(
136                "useDeprecatedPositionallyEncodedOperands")) {
137          PrintError(R, Twine("No operand named ") + VarName + " in record " +
138                            R->getName() +
139                            " (would've given 'too few operands' error with "
140                            "useDeprecatedPositionallyEncodedOperands=true)");
141        } else {
142          PrintError(R, "Too few operands in record " + R->getName() +
143                            " (no match for variable " + VarName + ")");
144        }
145        return false;
146      }
147  
148      OpIdx = NumberedOp++;
149  
150      if (!Target.getInstructionSet()->getValueAsBit(
151              "useDeprecatedPositionallyEncodedOperands")) {
152        std::pair<unsigned, unsigned> SO =
153            CGI.Operands.getSubOperandNumber(OpIdx);
154        std::string OpName = CGI.Operands[SO.first].Name;
155        PrintError(R, Twine("No operand named ") + VarName + " in record " +
156                          R->getName() + " (would've used positional operand #" +
157                          Twine(SO.first) + " ('" + OpName + "') sub-op #" +
158                          Twine(SO.second) +
159                          " with useDeprecatedPositionallyEncodedOperands=true)");
160        return false;
161      }
162    }
163  
164    if (CGI.Operands.isFlatOperandNotEmitted(OpIdx)) {
165      PrintError(R, "Operand " + VarName + " used but also marked as not emitted!");
166      return false;
167    }
168  
169    std::pair<unsigned, unsigned> SO = CGI.Operands.getSubOperandNumber(OpIdx);
170    std::string &EncoderMethodName =
171        CGI.Operands[SO.first].EncoderMethodNames[SO.second];
172  
173    if (UseAPInt)
174      Case += "      op.clearAllBits();\n";
175  
176    Case += "      // op: " + VarName + "\n";
177  
178    // If the source operand has a custom encoder, use it.
179    if (!EncoderMethodName.empty()) {
180      if (UseAPInt) {
181        Case += "      " + EncoderMethodName + "(MI, " + utostr(OpIdx);
182        Case += ", op";
183      } else {
184        Case += "      op = " + EncoderMethodName + "(MI, " + utostr(OpIdx);
185      }
186      Case += ", Fixups, STI);\n";
187    } else {
188      if (UseAPInt) {
189        Case += "      getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")";
190        Case += ", op, Fixups, STI";
191      } else {
192        Case += "      op = getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")";
193        Case += ", Fixups, STI";
194      }
195      Case += ");\n";
196    }
197  
198    // Precalculate the number of lits this variable contributes to in the
199    // operand. If there is a single lit (consecutive range of bits) we can use a
200    // destructive sequence on APInt that reduces memory allocations.
201    int numOperandLits = 0;
202    for (int tmpBit = bit; tmpBit >= 0;) {
203      int varBit = getVariableBit(VarName, BI, tmpBit);
204  
205      // If this bit isn't from a variable, skip it.
206      if (varBit == -1) {
207        --tmpBit;
208        continue;
209      }
210  
211      // Figure out the consecutive range of bits covered by this operand, in
212      // order to generate better encoding code.
213      int beginVarBit = varBit;
214      int N = 1;
215      for (--tmpBit; tmpBit >= 0;) {
216        varBit = getVariableBit(VarName, BI, tmpBit);
217        if (varBit == -1 || varBit != (beginVarBit - N))
218          break;
219        ++N;
220        --tmpBit;
221      }
222      ++numOperandLits;
223    }
224  
225    for (; bit >= 0; ) {
226      int varBit = getVariableBit(VarName, BI, bit);
227  
228      // If this bit isn't from a variable, skip it.
229      if (varBit == -1) {
230        --bit;
231        continue;
232      }
233  
234      // Figure out the consecutive range of bits covered by this operand, in
235      // order to generate better encoding code.
236      int beginInstBit = bit;
237      int beginVarBit = varBit;
238      int N = 1;
239      for (--bit; bit >= 0;) {
240        varBit = getVariableBit(VarName, BI, bit);
241        if (varBit == -1 || varBit != (beginVarBit - N)) break;
242        ++N;
243        --bit;
244      }
245  
246      std::string maskStr;
247      int opShift;
248  
249      unsigned loBit = beginVarBit - N + 1;
250      unsigned hiBit = loBit + N;
251      unsigned loInstBit = beginInstBit - N + 1;
252      if (UseAPInt) {
253        std::string extractStr;
254        if (N >= 64) {
255          extractStr = "op.extractBits(" + itostr(hiBit - loBit) + ", " +
256                       itostr(loBit) + ")";
257          Case += "      Value.insertBits(" + extractStr + ", " +
258                  itostr(loInstBit) + ");\n";
259        } else {
260          extractStr = "op.extractBitsAsZExtValue(" + itostr(hiBit - loBit) +
261                       ", " + itostr(loBit) + ")";
262          Case += "      Value.insertBits(" + extractStr + ", " +
263                  itostr(loInstBit) + ", " + itostr(hiBit - loBit) + ");\n";
264        }
265      } else {
266        uint64_t opMask = ~(uint64_t)0 >> (64 - N);
267        opShift = beginVarBit - N + 1;
268        opMask <<= opShift;
269        maskStr = "UINT64_C(" + utostr(opMask) + ")";
270        opShift = beginInstBit - beginVarBit;
271  
272        if (numOperandLits == 1) {
273          Case += "      op &= " + maskStr + ";\n";
274          if (opShift > 0) {
275            Case += "      op <<= " + itostr(opShift) + ";\n";
276          } else if (opShift < 0) {
277            Case += "      op >>= " + itostr(-opShift) + ";\n";
278          }
279          Case += "      Value |= op;\n";
280        } else {
281          if (opShift > 0) {
282            Case += "      Value |= (op & " + maskStr + ") << " +
283                    itostr(opShift) + ";\n";
284          } else if (opShift < 0) {
285            Case += "      Value |= (op & " + maskStr + ") >> " +
286                    itostr(-opShift) + ";\n";
287          } else {
288            Case += "      Value |= (op & " + maskStr + ");\n";
289          }
290        }
291      }
292    }
293    return true;
294  }
295  
296  std::string CodeEmitterGen::getInstructionCase(Record *R,
297                                                 CodeGenTarget &Target) {
298    std::string Case;
299    if (const RecordVal *RV = R->getValue("EncodingInfos")) {
300      if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
301        const CodeGenHwModes &HWM = Target.getHwModes();
302        EncodingInfoByHwMode EBM(DI->getDef(), HWM);
303        Case += "      switch (HwMode) {\n";
304        Case += "      default: llvm_unreachable(\"Unhandled HwMode\");\n";
305        for (auto &KV : EBM) {
306          Case += "      case " + itostr(KV.first) + ": {\n";
307          Case += getInstructionCaseForEncoding(R, KV.second, Target);
308          Case += "      break;\n";
309          Case += "      }\n";
310        }
311        Case += "      }\n";
312        return Case;
313      }
314    }
315    return getInstructionCaseForEncoding(R, R, Target);
316  }
317  
318  std::string CodeEmitterGen::getInstructionCaseForEncoding(Record *R, Record *EncodingDef,
319                                                            CodeGenTarget &Target) {
320    std::string Case;
321    BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst");
322    unsigned NumberedOp = 0;
323    std::set<unsigned> NamedOpIndices;
324  
325    // Collect the set of operand indices that might correspond to named
326    // operand, and skip these when assigning operands based on position.
327    if (Target.getInstructionSet()->
328         getValueAsBit("noNamedPositionallyEncodedOperands")) {
329      CodeGenInstruction &CGI = Target.getInstruction(R);
330      for (const RecordVal &RV : R->getValues()) {
331        unsigned OpIdx;
332        if (!CGI.Operands.hasOperandNamed(RV.getName(), OpIdx))
333          continue;
334  
335        NamedOpIndices.insert(OpIdx);
336      }
337    }
338  
339    // Loop over all of the fields in the instruction, determining which are the
340    // operands to the instruction.
341    bool Success = true;
342    for (const RecordVal &RV : EncodingDef->getValues()) {
343      // Ignore fixed fields in the record, we're looking for values like:
344      //    bits<5> RST = { ?, ?, ?, ?, ? };
345      if (RV.isNonconcreteOK() || RV.getValue()->isComplete())
346        continue;
347  
348      Success &=
349          addCodeToMergeInOperand(R, BI, std::string(RV.getName()), NumberedOp,
350                                  NamedOpIndices, Case, Target);
351    }
352  
353    if (!Success) {
354      // Dump the record, so we can see what's going on...
355      std::string E;
356      raw_string_ostream S(E);
357      S << "Dumping record for previous error:\n";
358      S << *R;
359      PrintNote(E);
360    }
361  
362    StringRef PostEmitter = R->getValueAsString("PostEncoderMethod");
363    if (!PostEmitter.empty()) {
364      Case += "      Value = ";
365      Case += PostEmitter;
366      Case += "(MI, Value";
367      Case += ", STI";
368      Case += ");\n";
369    }
370  
371    return Case;
372  }
373  
374  static void emitInstBits(raw_ostream &OS, const APInt &Bits) {
375    for (unsigned I = 0; I < Bits.getNumWords(); ++I)
376      OS << ((I > 0) ? ", " : "") << "UINT64_C(" << utostr(Bits.getRawData()[I])
377         << ")";
378  }
379  
380  void CodeEmitterGen::emitInstructionBaseValues(
381      raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
382      CodeGenTarget &Target, int HwMode) {
383    const CodeGenHwModes &HWM = Target.getHwModes();
384    if (HwMode == -1)
385      o << "  static const uint64_t InstBits[] = {\n";
386    else
387      o << "  static const uint64_t InstBits_" << HWM.getMode(HwMode).Name
388        << "[] = {\n";
389  
390    for (const CodeGenInstruction *CGI : NumberedInstructions) {
391      Record *R = CGI->TheDef;
392  
393      if (R->getValueAsString("Namespace") == "TargetOpcode" ||
394          R->getValueAsBit("isPseudo")) {
395        o << "    "; emitInstBits(o, APInt(BitWidth, 0)); o << ",\n";
396        continue;
397      }
398  
399      Record *EncodingDef = R;
400      if (const RecordVal *RV = R->getValue("EncodingInfos")) {
401        if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
402          EncodingInfoByHwMode EBM(DI->getDef(), HWM);
403          if (EBM.hasMode(HwMode))
404            EncodingDef = EBM.get(HwMode);
405        }
406      }
407      BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst");
408  
409      // Start by filling in fixed values.
410      APInt Value(BitWidth, 0);
411      for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) {
412        if (auto *B = dyn_cast<BitInit>(BI->getBit(i)); B && B->getValue())
413          Value.setBit(i);
414      }
415      o << "    ";
416      emitInstBits(o, Value);
417      o << "," << '\t' << "// " << R->getName() << "\n";
418    }
419    o << "    UINT64_C(0)\n  };\n";
420  }
421  
422  void CodeEmitterGen::run(raw_ostream &o) {
423    CodeGenTarget Target(Records);
424    std::vector<Record*> Insts = Records.getAllDerivedDefinitions("Instruction");
425  
426    // For little-endian instruction bit encodings, reverse the bit order
427    Target.reverseBitsForLittleEndianEncoding();
428  
429    ArrayRef<const CodeGenInstruction*> NumberedInstructions =
430      Target.getInstructionsByEnumValue();
431  
432    if (any_of(NumberedInstructions, [](const CodeGenInstruction *CGI) {
433          Record *R = CGI->TheDef;
434          return R->getValue("Inst") && isa<DagInit>(R->getValueInit("Inst"));
435        })) {
436      emitVarLenCodeEmitter(Records, o);
437    } else {
438      const CodeGenHwModes &HWM = Target.getHwModes();
439      // The set of HwModes used by instruction encodings.
440      std::set<unsigned> HwModes;
441      BitWidth = 0;
442      for (const CodeGenInstruction *CGI : NumberedInstructions) {
443        Record *R = CGI->TheDef;
444        if (R->getValueAsString("Namespace") == "TargetOpcode" ||
445            R->getValueAsBit("isPseudo"))
446          continue;
447  
448        if (const RecordVal *RV = R->getValue("EncodingInfos")) {
449          if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
450            EncodingInfoByHwMode EBM(DI->getDef(), HWM);
451            for (auto &KV : EBM) {
452              BitsInit *BI = KV.second->getValueAsBitsInit("Inst");
453              BitWidth = std::max(BitWidth, BI->getNumBits());
454              HwModes.insert(KV.first);
455            }
456            continue;
457          }
458        }
459        BitsInit *BI = R->getValueAsBitsInit("Inst");
460        BitWidth = std::max(BitWidth, BI->getNumBits());
461      }
462      UseAPInt = BitWidth > 64;
463  
464      // Emit function declaration
465      if (UseAPInt) {
466        o << "void " << Target.getName()
467          << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
468          << "    SmallVectorImpl<MCFixup> &Fixups,\n"
469          << "    APInt &Inst,\n"
470          << "    APInt &Scratch,\n"
471          << "    const MCSubtargetInfo &STI) const {\n";
472      } else {
473        o << "uint64_t " << Target.getName();
474        o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
475          << "    SmallVectorImpl<MCFixup> &Fixups,\n"
476          << "    const MCSubtargetInfo &STI) const {\n";
477      }
478  
479      // Emit instruction base values
480      if (HwModes.empty()) {
481        emitInstructionBaseValues(o, NumberedInstructions, Target, -1);
482      } else {
483        for (unsigned HwMode : HwModes)
484          emitInstructionBaseValues(o, NumberedInstructions, Target, (int)HwMode);
485      }
486  
487      if (!HwModes.empty()) {
488        o << "  const uint64_t *InstBits;\n";
489        o << "  unsigned HwMode = STI.getHwMode();\n";
490        o << "  switch (HwMode) {\n";
491        o << "  default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n";
492        for (unsigned I : HwModes) {
493          o << "  case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name
494            << "; break;\n";
495        }
496        o << "  };\n";
497      }
498  
499      // Map to accumulate all the cases.
500      std::map<std::string, std::vector<std::string>> CaseMap;
501  
502      // Construct all cases statement for each opcode
503      for (Record *R : Insts) {
504        if (R->getValueAsString("Namespace") == "TargetOpcode" ||
505            R->getValueAsBit("isPseudo"))
506          continue;
507        std::string InstName =
508            (R->getValueAsString("Namespace") + "::" + R->getName()).str();
509        std::string Case = getInstructionCase(R, Target);
510  
511        CaseMap[Case].push_back(std::move(InstName));
512      }
513  
514      // Emit initial function code
515      if (UseAPInt) {
516        int NumWords = APInt::getNumWords(BitWidth);
517        o << "  const unsigned opcode = MI.getOpcode();\n"
518          << "  if (Scratch.getBitWidth() != " << BitWidth << ")\n"
519          << "    Scratch = Scratch.zext(" << BitWidth << ");\n"
520          << "  Inst = APInt(" << BitWidth << ", ArrayRef(InstBits + opcode * "
521          << NumWords << ", " << NumWords << "));\n"
522          << "  APInt &Value = Inst;\n"
523          << "  APInt &op = Scratch;\n"
524          << "  switch (opcode) {\n";
525      } else {
526        o << "  const unsigned opcode = MI.getOpcode();\n"
527          << "  uint64_t Value = InstBits[opcode];\n"
528          << "  uint64_t op = 0;\n"
529          << "  (void)op;  // suppress warning\n"
530          << "  switch (opcode) {\n";
531      }
532  
533      // Emit each case statement
534      std::map<std::string, std::vector<std::string>>::iterator IE, EE;
535      for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) {
536        const std::string &Case = IE->first;
537        std::vector<std::string> &InstList = IE->second;
538  
539        for (int i = 0, N = InstList.size(); i < N; i++) {
540          if (i)
541            o << "\n";
542          o << "    case " << InstList[i] << ":";
543        }
544        o << " {\n";
545        o << Case;
546        o << "      break;\n"
547          << "    }\n";
548      }
549  
550      // Default case: unhandled opcode
551      o << "  default:\n"
552        << "    std::string msg;\n"
553        << "    raw_string_ostream Msg(msg);\n"
554        << "    Msg << \"Not supported instr: \" << MI;\n"
555        << "    report_fatal_error(Msg.str().c_str());\n"
556        << "  }\n";
557      if (UseAPInt)
558        o << "  Inst = Value;\n";
559      else
560        o << "  return Value;\n";
561      o << "}\n\n";
562    }
563  }
564  
565  } // end anonymous namespace
566  
567  namespace llvm {
568  
569  void EmitCodeEmitter(RecordKeeper &RK, raw_ostream &OS) {
570    emitSourceFileHeader("Machine Code Emitter", OS);
571    CodeEmitterGen(RK).run(OS);
572  }
573  
574  } // end namespace llvm
575