xref: /freebsd/contrib/llvm-project/llvm/utils/TableGen/CodeEmitterGen.cpp (revision 43e29d03f416d7dda52112a29600a7c82ee1a91e)
1 //===- CodeEmitterGen.cpp - Code Emitter Generator ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // CodeEmitterGen uses the descriptions of instructions and their fields to
10 // construct an automated code emitter: a function that, given a MachineInstr,
11 // returns the (currently, 32-bit unsigned) value of the instruction.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "CodeGenInstruction.h"
16 #include "CodeGenTarget.h"
17 #include "SubtargetFeatureInfo.h"
18 #include "Types.h"
19 #include "VarLenCodeEmitterGen.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/StringExtras.h"
23 #include "llvm/Support/Casting.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include "llvm/TableGen/Error.h"
26 #include "llvm/TableGen/Record.h"
27 #include "llvm/TableGen/TableGenBackend.h"
28 #include <cstdint>
29 #include <map>
30 #include <set>
31 #include <string>
32 #include <utility>
33 #include <vector>
34 
35 using namespace llvm;
36 
37 namespace {
38 
39 class CodeEmitterGen {
40   RecordKeeper &Records;
41 
42 public:
43   CodeEmitterGen(RecordKeeper &R) : Records(R) {}
44 
45   void run(raw_ostream &o);
46 
47 private:
48   int getVariableBit(const std::string &VarName, BitsInit *BI, int bit);
49   std::string getInstructionCase(Record *R, CodeGenTarget &Target);
50   std::string getInstructionCaseForEncoding(Record *R, Record *EncodingDef,
51                                             CodeGenTarget &Target);
52   bool addCodeToMergeInOperand(Record *R, BitsInit *BI,
53                                const std::string &VarName, unsigned &NumberedOp,
54                                std::set<unsigned> &NamedOpIndices,
55                                std::string &Case, CodeGenTarget &Target);
56 
57   void emitInstructionBaseValues(
58       raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
59       CodeGenTarget &Target, int HwMode = -1);
60   unsigned BitWidth;
61   bool UseAPInt;
62 };
63 
64 // If the VarBitInit at position 'bit' matches the specified variable then
65 // return the variable bit position.  Otherwise return -1.
66 int CodeEmitterGen::getVariableBit(const std::string &VarName,
67                                    BitsInit *BI, int bit) {
68   if (VarBitInit *VBI = dyn_cast<VarBitInit>(BI->getBit(bit))) {
69     if (VarInit *VI = dyn_cast<VarInit>(VBI->getBitVar()))
70       if (VI->getName() == VarName)
71         return VBI->getBitNum();
72   } else if (VarInit *VI = dyn_cast<VarInit>(BI->getBit(bit))) {
73     if (VI->getName() == VarName)
74       return 0;
75   }
76 
77   return -1;
78 }
79 
80 // Returns true if it succeeds, false if an error.
81 bool CodeEmitterGen::addCodeToMergeInOperand(Record *R, BitsInit *BI,
82                                              const std::string &VarName,
83                                              unsigned &NumberedOp,
84                                              std::set<unsigned> &NamedOpIndices,
85                                              std::string &Case,
86                                              CodeGenTarget &Target) {
87   CodeGenInstruction &CGI = Target.getInstruction(R);
88 
89   // Determine if VarName actually contributes to the Inst encoding.
90   int bit = BI->getNumBits()-1;
91 
92   // Scan for a bit that this contributed to.
93   for (; bit >= 0; ) {
94     if (getVariableBit(VarName, BI, bit) != -1)
95       break;
96 
97     --bit;
98   }
99 
100   // If we found no bits, ignore this value, otherwise emit the call to get the
101   // operand encoding.
102   if (bit < 0)
103     return true;
104 
105   // If the operand matches by name, reference according to that
106   // operand number. Non-matching operands are assumed to be in
107   // order.
108   unsigned OpIdx;
109   std::pair<unsigned, unsigned> SubOp;
110   if (CGI.Operands.hasSubOperandAlias(VarName, SubOp)) {
111     OpIdx = CGI.Operands[SubOp.first].MIOperandNo + SubOp.second;
112   } else if (CGI.Operands.hasOperandNamed(VarName, OpIdx)) {
113     // Get the machine operand number for the indicated operand.
114     OpIdx = CGI.Operands[OpIdx].MIOperandNo;
115   } else {
116     // Fall back to positional lookup. By default, we now disable positional
117     // lookup (and print an error, below), but even so, we'll do the lookup to
118     // help print a helpful diagnostic message.
119     //
120     // TODO: When we remove useDeprecatedPositionallyEncodedOperands, delete all
121     // this code, just leaving a "no operand named X in record Y" error.
122 
123     unsigned NumberOps = CGI.Operands.size();
124     /// If this operand is not supposed to be emitted by the
125     /// generated emitter, skip it.
126     while (NumberedOp < NumberOps &&
127            (CGI.Operands.isFlatOperandNotEmitted(NumberedOp) ||
128               (!NamedOpIndices.empty() && NamedOpIndices.count(
129                 CGI.Operands.getSubOperandNumber(NumberedOp).first)))) {
130       ++NumberedOp;
131     }
132 
133     if (NumberedOp >=
134         CGI.Operands.back().MIOperandNo + CGI.Operands.back().MINumOperands) {
135       if (!Target.getInstructionSet()->getValueAsBit(
136               "useDeprecatedPositionallyEncodedOperands")) {
137         PrintError(R, Twine("No operand named ") + VarName + " in record " +
138                           R->getName() +
139                           " (would've given 'too few operands' error with "
140                           "useDeprecatedPositionallyEncodedOperands=true)");
141       } else {
142         PrintError(R, "Too few operands in record " + R->getName() +
143                           " (no match for variable " + VarName + ")");
144       }
145       return false;
146     }
147 
148     OpIdx = NumberedOp++;
149 
150     if (!Target.getInstructionSet()->getValueAsBit(
151             "useDeprecatedPositionallyEncodedOperands")) {
152       std::pair<unsigned, unsigned> SO =
153           CGI.Operands.getSubOperandNumber(OpIdx);
154       std::string OpName = CGI.Operands[SO.first].Name;
155       PrintError(R, Twine("No operand named ") + VarName + " in record " +
156                         R->getName() + " (would've used positional operand #" +
157                         Twine(SO.first) + " ('" + OpName + "') sub-op #" +
158                         Twine(SO.second) +
159                         " with useDeprecatedPositionallyEncodedOperands=true)");
160       return false;
161     }
162   }
163 
164   if (CGI.Operands.isFlatOperandNotEmitted(OpIdx)) {
165     PrintError(R, "Operand " + VarName + " used but also marked as not emitted!");
166     return false;
167   }
168 
169   std::pair<unsigned, unsigned> SO = CGI.Operands.getSubOperandNumber(OpIdx);
170   std::string &EncoderMethodName =
171       CGI.Operands[SO.first].EncoderMethodNames[SO.second];
172 
173   if (UseAPInt)
174     Case += "      op.clearAllBits();\n";
175 
176   Case += "      // op: " + VarName + "\n";
177 
178   // If the source operand has a custom encoder, use it.
179   if (!EncoderMethodName.empty()) {
180     if (UseAPInt) {
181       Case += "      " + EncoderMethodName + "(MI, " + utostr(OpIdx);
182       Case += ", op";
183     } else {
184       Case += "      op = " + EncoderMethodName + "(MI, " + utostr(OpIdx);
185     }
186     Case += ", Fixups, STI);\n";
187   } else {
188     if (UseAPInt) {
189       Case += "      getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")";
190       Case += ", op, Fixups, STI";
191     } else {
192       Case += "      op = getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")";
193       Case += ", Fixups, STI";
194     }
195     Case += ");\n";
196   }
197 
198   // Precalculate the number of lits this variable contributes to in the
199   // operand. If there is a single lit (consecutive range of bits) we can use a
200   // destructive sequence on APInt that reduces memory allocations.
201   int numOperandLits = 0;
202   for (int tmpBit = bit; tmpBit >= 0;) {
203     int varBit = getVariableBit(VarName, BI, tmpBit);
204 
205     // If this bit isn't from a variable, skip it.
206     if (varBit == -1) {
207       --tmpBit;
208       continue;
209     }
210 
211     // Figure out the consecutive range of bits covered by this operand, in
212     // order to generate better encoding code.
213     int beginVarBit = varBit;
214     int N = 1;
215     for (--tmpBit; tmpBit >= 0;) {
216       varBit = getVariableBit(VarName, BI, tmpBit);
217       if (varBit == -1 || varBit != (beginVarBit - N))
218         break;
219       ++N;
220       --tmpBit;
221     }
222     ++numOperandLits;
223   }
224 
225   for (; bit >= 0; ) {
226     int varBit = getVariableBit(VarName, BI, bit);
227 
228     // If this bit isn't from a variable, skip it.
229     if (varBit == -1) {
230       --bit;
231       continue;
232     }
233 
234     // Figure out the consecutive range of bits covered by this operand, in
235     // order to generate better encoding code.
236     int beginInstBit = bit;
237     int beginVarBit = varBit;
238     int N = 1;
239     for (--bit; bit >= 0;) {
240       varBit = getVariableBit(VarName, BI, bit);
241       if (varBit == -1 || varBit != (beginVarBit - N)) break;
242       ++N;
243       --bit;
244     }
245 
246     std::string maskStr;
247     int opShift;
248 
249     unsigned loBit = beginVarBit - N + 1;
250     unsigned hiBit = loBit + N;
251     unsigned loInstBit = beginInstBit - N + 1;
252     if (UseAPInt) {
253       std::string extractStr;
254       if (N >= 64) {
255         extractStr = "op.extractBits(" + itostr(hiBit - loBit) + ", " +
256                      itostr(loBit) + ")";
257         Case += "      Value.insertBits(" + extractStr + ", " +
258                 itostr(loInstBit) + ");\n";
259       } else {
260         extractStr = "op.extractBitsAsZExtValue(" + itostr(hiBit - loBit) +
261                      ", " + itostr(loBit) + ")";
262         Case += "      Value.insertBits(" + extractStr + ", " +
263                 itostr(loInstBit) + ", " + itostr(hiBit - loBit) + ");\n";
264       }
265     } else {
266       uint64_t opMask = ~(uint64_t)0 >> (64 - N);
267       opShift = beginVarBit - N + 1;
268       opMask <<= opShift;
269       maskStr = "UINT64_C(" + utostr(opMask) + ")";
270       opShift = beginInstBit - beginVarBit;
271 
272       if (numOperandLits == 1) {
273         Case += "      op &= " + maskStr + ";\n";
274         if (opShift > 0) {
275           Case += "      op <<= " + itostr(opShift) + ";\n";
276         } else if (opShift < 0) {
277           Case += "      op >>= " + itostr(-opShift) + ";\n";
278         }
279         Case += "      Value |= op;\n";
280       } else {
281         if (opShift > 0) {
282           Case += "      Value |= (op & " + maskStr + ") << " +
283                   itostr(opShift) + ";\n";
284         } else if (opShift < 0) {
285           Case += "      Value |= (op & " + maskStr + ") >> " +
286                   itostr(-opShift) + ";\n";
287         } else {
288           Case += "      Value |= (op & " + maskStr + ");\n";
289         }
290       }
291     }
292   }
293   return true;
294 }
295 
296 std::string CodeEmitterGen::getInstructionCase(Record *R,
297                                                CodeGenTarget &Target) {
298   std::string Case;
299   if (const RecordVal *RV = R->getValue("EncodingInfos")) {
300     if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
301       const CodeGenHwModes &HWM = Target.getHwModes();
302       EncodingInfoByHwMode EBM(DI->getDef(), HWM);
303       Case += "      switch (HwMode) {\n";
304       Case += "      default: llvm_unreachable(\"Unhandled HwMode\");\n";
305       for (auto &KV : EBM) {
306         Case += "      case " + itostr(KV.first) + ": {\n";
307         Case += getInstructionCaseForEncoding(R, KV.second, Target);
308         Case += "      break;\n";
309         Case += "      }\n";
310       }
311       Case += "      }\n";
312       return Case;
313     }
314   }
315   return getInstructionCaseForEncoding(R, R, Target);
316 }
317 
318 std::string CodeEmitterGen::getInstructionCaseForEncoding(Record *R, Record *EncodingDef,
319                                                           CodeGenTarget &Target) {
320   std::string Case;
321   BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst");
322   unsigned NumberedOp = 0;
323   std::set<unsigned> NamedOpIndices;
324 
325   // Collect the set of operand indices that might correspond to named
326   // operand, and skip these when assigning operands based on position.
327   if (Target.getInstructionSet()->
328        getValueAsBit("noNamedPositionallyEncodedOperands")) {
329     CodeGenInstruction &CGI = Target.getInstruction(R);
330     for (const RecordVal &RV : R->getValues()) {
331       unsigned OpIdx;
332       if (!CGI.Operands.hasOperandNamed(RV.getName(), OpIdx))
333         continue;
334 
335       NamedOpIndices.insert(OpIdx);
336     }
337   }
338 
339   // Loop over all of the fields in the instruction, determining which are the
340   // operands to the instruction.
341   bool Success = true;
342   for (const RecordVal &RV : EncodingDef->getValues()) {
343     // Ignore fixed fields in the record, we're looking for values like:
344     //    bits<5> RST = { ?, ?, ?, ?, ? };
345     if (RV.isNonconcreteOK() || RV.getValue()->isComplete())
346       continue;
347 
348     Success &=
349         addCodeToMergeInOperand(R, BI, std::string(RV.getName()), NumberedOp,
350                                 NamedOpIndices, Case, Target);
351   }
352 
353   if (!Success) {
354     // Dump the record, so we can see what's going on...
355     std::string E;
356     raw_string_ostream S(E);
357     S << "Dumping record for previous error:\n";
358     S << *R;
359     PrintNote(E);
360   }
361 
362   StringRef PostEmitter = R->getValueAsString("PostEncoderMethod");
363   if (!PostEmitter.empty()) {
364     Case += "      Value = ";
365     Case += PostEmitter;
366     Case += "(MI, Value";
367     Case += ", STI";
368     Case += ");\n";
369   }
370 
371   return Case;
372 }
373 
374 static void emitInstBits(raw_ostream &OS, const APInt &Bits) {
375   for (unsigned I = 0; I < Bits.getNumWords(); ++I)
376     OS << ((I > 0) ? ", " : "") << "UINT64_C(" << utostr(Bits.getRawData()[I])
377        << ")";
378 }
379 
380 void CodeEmitterGen::emitInstructionBaseValues(
381     raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
382     CodeGenTarget &Target, int HwMode) {
383   const CodeGenHwModes &HWM = Target.getHwModes();
384   if (HwMode == -1)
385     o << "  static const uint64_t InstBits[] = {\n";
386   else
387     o << "  static const uint64_t InstBits_" << HWM.getMode(HwMode).Name
388       << "[] = {\n";
389 
390   for (const CodeGenInstruction *CGI : NumberedInstructions) {
391     Record *R = CGI->TheDef;
392 
393     if (R->getValueAsString("Namespace") == "TargetOpcode" ||
394         R->getValueAsBit("isPseudo")) {
395       o << "    "; emitInstBits(o, APInt(BitWidth, 0)); o << ",\n";
396       continue;
397     }
398 
399     Record *EncodingDef = R;
400     if (const RecordVal *RV = R->getValue("EncodingInfos")) {
401       if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
402         EncodingInfoByHwMode EBM(DI->getDef(), HWM);
403         if (EBM.hasMode(HwMode))
404           EncodingDef = EBM.get(HwMode);
405       }
406     }
407     BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst");
408 
409     // Start by filling in fixed values.
410     APInt Value(BitWidth, 0);
411     for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) {
412       if (auto *B = dyn_cast<BitInit>(BI->getBit(i)); B && B->getValue())
413         Value.setBit(i);
414     }
415     o << "    ";
416     emitInstBits(o, Value);
417     o << "," << '\t' << "// " << R->getName() << "\n";
418   }
419   o << "    UINT64_C(0)\n  };\n";
420 }
421 
422 void CodeEmitterGen::run(raw_ostream &o) {
423   CodeGenTarget Target(Records);
424   std::vector<Record*> Insts = Records.getAllDerivedDefinitions("Instruction");
425 
426   // For little-endian instruction bit encodings, reverse the bit order
427   Target.reverseBitsForLittleEndianEncoding();
428 
429   ArrayRef<const CodeGenInstruction*> NumberedInstructions =
430     Target.getInstructionsByEnumValue();
431 
432   if (any_of(NumberedInstructions, [](const CodeGenInstruction *CGI) {
433         Record *R = CGI->TheDef;
434         return R->getValue("Inst") && isa<DagInit>(R->getValueInit("Inst"));
435       })) {
436     emitVarLenCodeEmitter(Records, o);
437   } else {
438     const CodeGenHwModes &HWM = Target.getHwModes();
439     // The set of HwModes used by instruction encodings.
440     std::set<unsigned> HwModes;
441     BitWidth = 0;
442     for (const CodeGenInstruction *CGI : NumberedInstructions) {
443       Record *R = CGI->TheDef;
444       if (R->getValueAsString("Namespace") == "TargetOpcode" ||
445           R->getValueAsBit("isPseudo"))
446         continue;
447 
448       if (const RecordVal *RV = R->getValue("EncodingInfos")) {
449         if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
450           EncodingInfoByHwMode EBM(DI->getDef(), HWM);
451           for (auto &KV : EBM) {
452             BitsInit *BI = KV.second->getValueAsBitsInit("Inst");
453             BitWidth = std::max(BitWidth, BI->getNumBits());
454             HwModes.insert(KV.first);
455           }
456           continue;
457         }
458       }
459       BitsInit *BI = R->getValueAsBitsInit("Inst");
460       BitWidth = std::max(BitWidth, BI->getNumBits());
461     }
462     UseAPInt = BitWidth > 64;
463 
464     // Emit function declaration
465     if (UseAPInt) {
466       o << "void " << Target.getName()
467         << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
468         << "    SmallVectorImpl<MCFixup> &Fixups,\n"
469         << "    APInt &Inst,\n"
470         << "    APInt &Scratch,\n"
471         << "    const MCSubtargetInfo &STI) const {\n";
472     } else {
473       o << "uint64_t " << Target.getName();
474       o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
475         << "    SmallVectorImpl<MCFixup> &Fixups,\n"
476         << "    const MCSubtargetInfo &STI) const {\n";
477     }
478 
479     // Emit instruction base values
480     if (HwModes.empty()) {
481       emitInstructionBaseValues(o, NumberedInstructions, Target, -1);
482     } else {
483       for (unsigned HwMode : HwModes)
484         emitInstructionBaseValues(o, NumberedInstructions, Target, (int)HwMode);
485     }
486 
487     if (!HwModes.empty()) {
488       o << "  const uint64_t *InstBits;\n";
489       o << "  unsigned HwMode = STI.getHwMode();\n";
490       o << "  switch (HwMode) {\n";
491       o << "  default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n";
492       for (unsigned I : HwModes) {
493         o << "  case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name
494           << "; break;\n";
495       }
496       o << "  };\n";
497     }
498 
499     // Map to accumulate all the cases.
500     std::map<std::string, std::vector<std::string>> CaseMap;
501 
502     // Construct all cases statement for each opcode
503     for (Record *R : Insts) {
504       if (R->getValueAsString("Namespace") == "TargetOpcode" ||
505           R->getValueAsBit("isPseudo"))
506         continue;
507       std::string InstName =
508           (R->getValueAsString("Namespace") + "::" + R->getName()).str();
509       std::string Case = getInstructionCase(R, Target);
510 
511       CaseMap[Case].push_back(std::move(InstName));
512     }
513 
514     // Emit initial function code
515     if (UseAPInt) {
516       int NumWords = APInt::getNumWords(BitWidth);
517       o << "  const unsigned opcode = MI.getOpcode();\n"
518         << "  if (Scratch.getBitWidth() != " << BitWidth << ")\n"
519         << "    Scratch = Scratch.zext(" << BitWidth << ");\n"
520         << "  Inst = APInt(" << BitWidth << ", ArrayRef(InstBits + opcode * "
521         << NumWords << ", " << NumWords << "));\n"
522         << "  APInt &Value = Inst;\n"
523         << "  APInt &op = Scratch;\n"
524         << "  switch (opcode) {\n";
525     } else {
526       o << "  const unsigned opcode = MI.getOpcode();\n"
527         << "  uint64_t Value = InstBits[opcode];\n"
528         << "  uint64_t op = 0;\n"
529         << "  (void)op;  // suppress warning\n"
530         << "  switch (opcode) {\n";
531     }
532 
533     // Emit each case statement
534     std::map<std::string, std::vector<std::string>>::iterator IE, EE;
535     for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) {
536       const std::string &Case = IE->first;
537       std::vector<std::string> &InstList = IE->second;
538 
539       for (int i = 0, N = InstList.size(); i < N; i++) {
540         if (i)
541           o << "\n";
542         o << "    case " << InstList[i] << ":";
543       }
544       o << " {\n";
545       o << Case;
546       o << "      break;\n"
547         << "    }\n";
548     }
549 
550     // Default case: unhandled opcode
551     o << "  default:\n"
552       << "    std::string msg;\n"
553       << "    raw_string_ostream Msg(msg);\n"
554       << "    Msg << \"Not supported instr: \" << MI;\n"
555       << "    report_fatal_error(Msg.str().c_str());\n"
556       << "  }\n";
557     if (UseAPInt)
558       o << "  Inst = Value;\n";
559     else
560       o << "  return Value;\n";
561     o << "}\n\n";
562   }
563 }
564 
565 } // end anonymous namespace
566 
567 namespace llvm {
568 
569 void EmitCodeEmitter(RecordKeeper &RK, raw_ostream &OS) {
570   emitSourceFileHeader("Machine Code Emitter", OS);
571   CodeEmitterGen(RK).run(OS);
572 }
573 
574 } // end namespace llvm
575