1 //===- CodeEmitterGen.cpp - Code Emitter Generator ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // CodeEmitterGen uses the descriptions of instructions and their fields to 10 // construct an automated code emitter: a function that, given a MachineInstr, 11 // returns the (currently, 32-bit unsigned) value of the instruction. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "CodeGenInstruction.h" 16 #include "CodeGenTarget.h" 17 #include "SubtargetFeatureInfo.h" 18 #include "Types.h" 19 #include "VarLenCodeEmitterGen.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/StringExtras.h" 23 #include "llvm/Support/Casting.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include "llvm/TableGen/Error.h" 26 #include "llvm/TableGen/Record.h" 27 #include "llvm/TableGen/TableGenBackend.h" 28 #include <cassert> 29 #include <cstdint> 30 #include <map> 31 #include <set> 32 #include <string> 33 #include <utility> 34 #include <vector> 35 36 using namespace llvm; 37 38 namespace { 39 40 class CodeEmitterGen { 41 RecordKeeper &Records; 42 43 public: 44 CodeEmitterGen(RecordKeeper &R) : Records(R) {} 45 46 void run(raw_ostream &o); 47 48 private: 49 int getVariableBit(const std::string &VarName, BitsInit *BI, int bit); 50 std::string getInstructionCase(Record *R, CodeGenTarget &Target); 51 std::string getInstructionCaseForEncoding(Record *R, Record *EncodingDef, 52 CodeGenTarget &Target); 53 void AddCodeToMergeInOperand(Record *R, BitsInit *BI, 54 const std::string &VarName, 55 unsigned &NumberedOp, 56 std::set<unsigned> &NamedOpIndices, 57 std::string &Case, CodeGenTarget &Target); 58 59 void emitInstructionBaseValues( 60 raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions, 61 CodeGenTarget &Target, int HwMode = -1); 62 unsigned BitWidth; 63 bool UseAPInt; 64 }; 65 66 // If the VarBitInit at position 'bit' matches the specified variable then 67 // return the variable bit position. Otherwise return -1. 68 int CodeEmitterGen::getVariableBit(const std::string &VarName, 69 BitsInit *BI, int bit) { 70 if (VarBitInit *VBI = dyn_cast<VarBitInit>(BI->getBit(bit))) { 71 if (VarInit *VI = dyn_cast<VarInit>(VBI->getBitVar())) 72 if (VI->getName() == VarName) 73 return VBI->getBitNum(); 74 } else if (VarInit *VI = dyn_cast<VarInit>(BI->getBit(bit))) { 75 if (VI->getName() == VarName) 76 return 0; 77 } 78 79 return -1; 80 } 81 82 void CodeEmitterGen:: 83 AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName, 84 unsigned &NumberedOp, 85 std::set<unsigned> &NamedOpIndices, 86 std::string &Case, CodeGenTarget &Target) { 87 CodeGenInstruction &CGI = Target.getInstruction(R); 88 89 // Determine if VarName actually contributes to the Inst encoding. 90 int bit = BI->getNumBits()-1; 91 92 // Scan for a bit that this contributed to. 93 for (; bit >= 0; ) { 94 if (getVariableBit(VarName, BI, bit) != -1) 95 break; 96 97 --bit; 98 } 99 100 // If we found no bits, ignore this value, otherwise emit the call to get the 101 // operand encoding. 102 if (bit < 0) return; 103 104 // If the operand matches by name, reference according to that 105 // operand number. Non-matching operands are assumed to be in 106 // order. 107 unsigned OpIdx; 108 if (CGI.Operands.hasOperandNamed(VarName, OpIdx)) { 109 // Get the machine operand number for the indicated operand. 110 OpIdx = CGI.Operands[OpIdx].MIOperandNo; 111 assert(!CGI.Operands.isFlatOperandNotEmitted(OpIdx) && 112 "Explicitly used operand also marked as not emitted!"); 113 } else { 114 unsigned NumberOps = CGI.Operands.size(); 115 /// If this operand is not supposed to be emitted by the 116 /// generated emitter, skip it. 117 while (NumberedOp < NumberOps && 118 (CGI.Operands.isFlatOperandNotEmitted(NumberedOp) || 119 (!NamedOpIndices.empty() && NamedOpIndices.count( 120 CGI.Operands.getSubOperandNumber(NumberedOp).first)))) { 121 ++NumberedOp; 122 } 123 124 if (NumberedOp >= 125 CGI.Operands.back().MIOperandNo + CGI.Operands.back().MINumOperands) { 126 std::string E; 127 raw_string_ostream S(E); 128 S << "Too few operands in record " << R->getName() 129 << " (no match for variable " << VarName << "):\n"; 130 S << *R; 131 PrintFatalError(R, E); 132 } 133 134 OpIdx = NumberedOp++; 135 } 136 137 std::pair<unsigned, unsigned> SO = CGI.Operands.getSubOperandNumber(OpIdx); 138 std::string &EncoderMethodName = CGI.Operands[SO.first].EncoderMethodName; 139 140 if (UseAPInt) 141 Case += " op.clearAllBits();\n"; 142 143 // If the source operand has a custom encoder, use it. This will 144 // get the encoding for all of the suboperands. 145 if (!EncoderMethodName.empty()) { 146 // A custom encoder has all of the information for the 147 // sub-operands, if there are more than one, so only 148 // query the encoder once per source operand. 149 if (SO.second == 0) { 150 Case += " // op: " + VarName + "\n"; 151 if (UseAPInt) { 152 Case += " " + EncoderMethodName + "(MI, " + utostr(OpIdx); 153 Case += ", op"; 154 } else { 155 Case += " op = " + EncoderMethodName + "(MI, " + utostr(OpIdx); 156 } 157 Case += ", Fixups, STI);\n"; 158 } 159 } else { 160 Case += " // op: " + VarName + "\n"; 161 if (UseAPInt) { 162 Case += " getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")"; 163 Case += ", op, Fixups, STI"; 164 } else { 165 Case += " op = getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")"; 166 Case += ", Fixups, STI"; 167 } 168 Case += ");\n"; 169 } 170 171 // Precalculate the number of lits this variable contributes to in the 172 // operand. If there is a single lit (consecutive range of bits) we can use a 173 // destructive sequence on APInt that reduces memory allocations. 174 int numOperandLits = 0; 175 for (int tmpBit = bit; tmpBit >= 0;) { 176 int varBit = getVariableBit(VarName, BI, tmpBit); 177 178 // If this bit isn't from a variable, skip it. 179 if (varBit == -1) { 180 --tmpBit; 181 continue; 182 } 183 184 // Figure out the consecutive range of bits covered by this operand, in 185 // order to generate better encoding code. 186 int beginVarBit = varBit; 187 int N = 1; 188 for (--tmpBit; tmpBit >= 0;) { 189 varBit = getVariableBit(VarName, BI, tmpBit); 190 if (varBit == -1 || varBit != (beginVarBit - N)) 191 break; 192 ++N; 193 --tmpBit; 194 } 195 ++numOperandLits; 196 } 197 198 for (; bit >= 0; ) { 199 int varBit = getVariableBit(VarName, BI, bit); 200 201 // If this bit isn't from a variable, skip it. 202 if (varBit == -1) { 203 --bit; 204 continue; 205 } 206 207 // Figure out the consecutive range of bits covered by this operand, in 208 // order to generate better encoding code. 209 int beginInstBit = bit; 210 int beginVarBit = varBit; 211 int N = 1; 212 for (--bit; bit >= 0;) { 213 varBit = getVariableBit(VarName, BI, bit); 214 if (varBit == -1 || varBit != (beginVarBit - N)) break; 215 ++N; 216 --bit; 217 } 218 219 std::string maskStr; 220 int opShift; 221 222 unsigned loBit = beginVarBit - N + 1; 223 unsigned hiBit = loBit + N; 224 unsigned loInstBit = beginInstBit - N + 1; 225 if (UseAPInt) { 226 std::string extractStr; 227 if (N >= 64) { 228 extractStr = "op.extractBits(" + itostr(hiBit - loBit) + ", " + 229 itostr(loBit) + ")"; 230 Case += " Value.insertBits(" + extractStr + ", " + 231 itostr(loInstBit) + ");\n"; 232 } else { 233 extractStr = "op.extractBitsAsZExtValue(" + itostr(hiBit - loBit) + 234 ", " + itostr(loBit) + ")"; 235 Case += " Value.insertBits(" + extractStr + ", " + 236 itostr(loInstBit) + ", " + itostr(hiBit - loBit) + ");\n"; 237 } 238 } else { 239 uint64_t opMask = ~(uint64_t)0 >> (64 - N); 240 opShift = beginVarBit - N + 1; 241 opMask <<= opShift; 242 maskStr = "UINT64_C(" + utostr(opMask) + ")"; 243 opShift = beginInstBit - beginVarBit; 244 245 if (numOperandLits == 1) { 246 Case += " op &= " + maskStr + ";\n"; 247 if (opShift > 0) { 248 Case += " op <<= " + itostr(opShift) + ";\n"; 249 } else if (opShift < 0) { 250 Case += " op >>= " + itostr(-opShift) + ";\n"; 251 } 252 Case += " Value |= op;\n"; 253 } else { 254 if (opShift > 0) { 255 Case += " Value |= (op & " + maskStr + ") << " + 256 itostr(opShift) + ";\n"; 257 } else if (opShift < 0) { 258 Case += " Value |= (op & " + maskStr + ") >> " + 259 itostr(-opShift) + ";\n"; 260 } else { 261 Case += " Value |= (op & " + maskStr + ");\n"; 262 } 263 } 264 } 265 } 266 } 267 268 std::string CodeEmitterGen::getInstructionCase(Record *R, 269 CodeGenTarget &Target) { 270 std::string Case; 271 if (const RecordVal *RV = R->getValue("EncodingInfos")) { 272 if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 273 const CodeGenHwModes &HWM = Target.getHwModes(); 274 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 275 Case += " switch (HwMode) {\n"; 276 Case += " default: llvm_unreachable(\"Unhandled HwMode\");\n"; 277 for (auto &KV : EBM) { 278 Case += " case " + itostr(KV.first) + ": {\n"; 279 Case += getInstructionCaseForEncoding(R, KV.second, Target); 280 Case += " break;\n"; 281 Case += " }\n"; 282 } 283 Case += " }\n"; 284 return Case; 285 } 286 } 287 return getInstructionCaseForEncoding(R, R, Target); 288 } 289 290 std::string CodeEmitterGen::getInstructionCaseForEncoding(Record *R, Record *EncodingDef, 291 CodeGenTarget &Target) { 292 std::string Case; 293 BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst"); 294 unsigned NumberedOp = 0; 295 std::set<unsigned> NamedOpIndices; 296 297 // Collect the set of operand indices that might correspond to named 298 // operand, and skip these when assigning operands based on position. 299 if (Target.getInstructionSet()-> 300 getValueAsBit("noNamedPositionallyEncodedOperands")) { 301 CodeGenInstruction &CGI = Target.getInstruction(R); 302 for (const RecordVal &RV : R->getValues()) { 303 unsigned OpIdx; 304 if (!CGI.Operands.hasOperandNamed(RV.getName(), OpIdx)) 305 continue; 306 307 NamedOpIndices.insert(OpIdx); 308 } 309 } 310 311 // Loop over all of the fields in the instruction, determining which are the 312 // operands to the instruction. 313 for (const RecordVal &RV : EncodingDef->getValues()) { 314 // Ignore fixed fields in the record, we're looking for values like: 315 // bits<5> RST = { ?, ?, ?, ?, ? }; 316 if (RV.isNonconcreteOK() || RV.getValue()->isComplete()) 317 continue; 318 319 AddCodeToMergeInOperand(R, BI, std::string(RV.getName()), NumberedOp, 320 NamedOpIndices, Case, Target); 321 } 322 323 StringRef PostEmitter = R->getValueAsString("PostEncoderMethod"); 324 if (!PostEmitter.empty()) { 325 Case += " Value = "; 326 Case += PostEmitter; 327 Case += "(MI, Value"; 328 Case += ", STI"; 329 Case += ");\n"; 330 } 331 332 return Case; 333 } 334 335 static void emitInstBits(raw_ostream &OS, const APInt &Bits) { 336 for (unsigned I = 0; I < Bits.getNumWords(); ++I) 337 OS << ((I > 0) ? ", " : "") << "UINT64_C(" << utostr(Bits.getRawData()[I]) 338 << ")"; 339 } 340 341 void CodeEmitterGen::emitInstructionBaseValues( 342 raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions, 343 CodeGenTarget &Target, int HwMode) { 344 const CodeGenHwModes &HWM = Target.getHwModes(); 345 if (HwMode == -1) 346 o << " static const uint64_t InstBits[] = {\n"; 347 else 348 o << " static const uint64_t InstBits_" << HWM.getMode(HwMode).Name 349 << "[] = {\n"; 350 351 for (const CodeGenInstruction *CGI : NumberedInstructions) { 352 Record *R = CGI->TheDef; 353 354 if (R->getValueAsString("Namespace") == "TargetOpcode" || 355 R->getValueAsBit("isPseudo")) { 356 o << " "; emitInstBits(o, APInt(BitWidth, 0)); o << ",\n"; 357 continue; 358 } 359 360 Record *EncodingDef = R; 361 if (const RecordVal *RV = R->getValue("EncodingInfos")) { 362 if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 363 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 364 if (EBM.hasMode(HwMode)) 365 EncodingDef = EBM.get(HwMode); 366 } 367 } 368 BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst"); 369 370 // Start by filling in fixed values. 371 APInt Value(BitWidth, 0); 372 for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) { 373 if (BitInit *B = dyn_cast<BitInit>(BI->getBit(e - i - 1))) 374 Value |= APInt(BitWidth, (uint64_t)B->getValue()) << (e - i - 1); 375 } 376 o << " "; 377 emitInstBits(o, Value); 378 o << "," << '\t' << "// " << R->getName() << "\n"; 379 } 380 o << " UINT64_C(0)\n };\n"; 381 } 382 383 void CodeEmitterGen::run(raw_ostream &o) { 384 CodeGenTarget Target(Records); 385 std::vector<Record*> Insts = Records.getAllDerivedDefinitions("Instruction"); 386 387 // For little-endian instruction bit encodings, reverse the bit order 388 Target.reverseBitsForLittleEndianEncoding(); 389 390 ArrayRef<const CodeGenInstruction*> NumberedInstructions = 391 Target.getInstructionsByEnumValue(); 392 393 if (any_of(NumberedInstructions, [](const CodeGenInstruction *CGI) { 394 Record *R = CGI->TheDef; 395 return R->getValue("Inst") && isa<DagInit>(R->getValueInit("Inst")); 396 })) { 397 emitVarLenCodeEmitter(Records, o); 398 } else { 399 const CodeGenHwModes &HWM = Target.getHwModes(); 400 // The set of HwModes used by instruction encodings. 401 std::set<unsigned> HwModes; 402 BitWidth = 0; 403 for (const CodeGenInstruction *CGI : NumberedInstructions) { 404 Record *R = CGI->TheDef; 405 if (R->getValueAsString("Namespace") == "TargetOpcode" || 406 R->getValueAsBit("isPseudo")) 407 continue; 408 409 if (const RecordVal *RV = R->getValue("EncodingInfos")) { 410 if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 411 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 412 for (auto &KV : EBM) { 413 BitsInit *BI = KV.second->getValueAsBitsInit("Inst"); 414 BitWidth = std::max(BitWidth, BI->getNumBits()); 415 HwModes.insert(KV.first); 416 } 417 continue; 418 } 419 } 420 BitsInit *BI = R->getValueAsBitsInit("Inst"); 421 BitWidth = std::max(BitWidth, BI->getNumBits()); 422 } 423 UseAPInt = BitWidth > 64; 424 425 // Emit function declaration 426 if (UseAPInt) { 427 o << "void " << Target.getName() 428 << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" 429 << " SmallVectorImpl<MCFixup> &Fixups,\n" 430 << " APInt &Inst,\n" 431 << " APInt &Scratch,\n" 432 << " const MCSubtargetInfo &STI) const {\n"; 433 } else { 434 o << "uint64_t " << Target.getName(); 435 o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" 436 << " SmallVectorImpl<MCFixup> &Fixups,\n" 437 << " const MCSubtargetInfo &STI) const {\n"; 438 } 439 440 // Emit instruction base values 441 if (HwModes.empty()) { 442 emitInstructionBaseValues(o, NumberedInstructions, Target, -1); 443 } else { 444 for (unsigned HwMode : HwModes) 445 emitInstructionBaseValues(o, NumberedInstructions, Target, (int)HwMode); 446 } 447 448 if (!HwModes.empty()) { 449 o << " const uint64_t *InstBits;\n"; 450 o << " unsigned HwMode = STI.getHwMode();\n"; 451 o << " switch (HwMode) {\n"; 452 o << " default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n"; 453 for (unsigned I : HwModes) { 454 o << " case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name 455 << "; break;\n"; 456 } 457 o << " };\n"; 458 } 459 460 // Map to accumulate all the cases. 461 std::map<std::string, std::vector<std::string>> CaseMap; 462 463 // Construct all cases statement for each opcode 464 for (Record *R : Insts) { 465 if (R->getValueAsString("Namespace") == "TargetOpcode" || 466 R->getValueAsBit("isPseudo")) 467 continue; 468 std::string InstName = 469 (R->getValueAsString("Namespace") + "::" + R->getName()).str(); 470 std::string Case = getInstructionCase(R, Target); 471 472 CaseMap[Case].push_back(std::move(InstName)); 473 } 474 475 // Emit initial function code 476 if (UseAPInt) { 477 int NumWords = APInt::getNumWords(BitWidth); 478 o << " const unsigned opcode = MI.getOpcode();\n" 479 << " if (Scratch.getBitWidth() != " << BitWidth << ")\n" 480 << " Scratch = Scratch.zext(" << BitWidth << ");\n" 481 << " Inst = APInt(" << BitWidth 482 << ", makeArrayRef(InstBits + opcode * " << NumWords << ", " << NumWords 483 << "));\n" 484 << " APInt &Value = Inst;\n" 485 << " APInt &op = Scratch;\n" 486 << " switch (opcode) {\n"; 487 } else { 488 o << " const unsigned opcode = MI.getOpcode();\n" 489 << " uint64_t Value = InstBits[opcode];\n" 490 << " uint64_t op = 0;\n" 491 << " (void)op; // suppress warning\n" 492 << " switch (opcode) {\n"; 493 } 494 495 // Emit each case statement 496 std::map<std::string, std::vector<std::string>>::iterator IE, EE; 497 for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) { 498 const std::string &Case = IE->first; 499 std::vector<std::string> &InstList = IE->second; 500 501 for (int i = 0, N = InstList.size(); i < N; i++) { 502 if (i) 503 o << "\n"; 504 o << " case " << InstList[i] << ":"; 505 } 506 o << " {\n"; 507 o << Case; 508 o << " break;\n" 509 << " }\n"; 510 } 511 512 // Default case: unhandled opcode 513 o << " default:\n" 514 << " std::string msg;\n" 515 << " raw_string_ostream Msg(msg);\n" 516 << " Msg << \"Not supported instr: \" << MI;\n" 517 << " report_fatal_error(Msg.str().c_str());\n" 518 << " }\n"; 519 if (UseAPInt) 520 o << " Inst = Value;\n"; 521 else 522 o << " return Value;\n"; 523 o << "}\n\n"; 524 } 525 } 526 527 } // end anonymous namespace 528 529 namespace llvm { 530 531 void EmitCodeEmitter(RecordKeeper &RK, raw_ostream &OS) { 532 emitSourceFileHeader("Machine Code Emitter", OS); 533 CodeEmitterGen(RK).run(OS); 534 } 535 536 } // end namespace llvm 537