1 //===- CodeEmitterGen.cpp - Code Emitter Generator ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // CodeEmitterGen uses the descriptions of instructions and their fields to 10 // construct an automated code emitter: a function called 11 // getBinaryCodeForInstr() that, given a MCInst, returns the value of the 12 // instruction - either as an uint64_t or as an APInt, depending on the 13 // maximum bit width of all Inst definitions. 14 // 15 // In addition, it generates another function called getOperandBitOffset() 16 // that, given a MCInst and an operand index, returns the minimum of indices of 17 // all bits that carry some portion of the respective operand. When the target's 18 // encodeInstruction() stores the instruction in a little-endian byte order, the 19 // returned value is the offset of the start of the operand in the encoded 20 // instruction. Other targets might need to adjust the returned value according 21 // to their encodeInstruction() implementation. 22 // 23 //===----------------------------------------------------------------------===// 24 25 #include "CodeGenHwModes.h" 26 #include "CodeGenInstruction.h" 27 #include "CodeGenTarget.h" 28 #include "InfoByHwMode.h" 29 #include "VarLenCodeEmitterGen.h" 30 #include "llvm/ADT/APInt.h" 31 #include "llvm/ADT/ArrayRef.h" 32 #include "llvm/ADT/StringExtras.h" 33 #include "llvm/Support/Casting.h" 34 #include "llvm/Support/raw_ostream.h" 35 #include "llvm/TableGen/Error.h" 36 #include "llvm/TableGen/Record.h" 37 #include "llvm/TableGen/TableGenBackend.h" 38 #include <cstdint> 39 #include <map> 40 #include <set> 41 #include <string> 42 #include <utility> 43 #include <vector> 44 45 using namespace llvm; 46 47 namespace { 48 49 class CodeEmitterGen { 50 RecordKeeper &Records; 51 52 public: 53 CodeEmitterGen(RecordKeeper &R) : Records(R) {} 54 55 void run(raw_ostream &o); 56 57 private: 58 int getVariableBit(const std::string &VarName, BitsInit *BI, int bit); 59 std::pair<std::string, std::string> 60 getInstructionCases(Record *R, CodeGenTarget &Target); 61 void addInstructionCasesForEncoding(Record *R, Record *EncodingDef, 62 CodeGenTarget &Target, std::string &Case, 63 std::string &BitOffsetCase); 64 bool addCodeToMergeInOperand(Record *R, BitsInit *BI, 65 const std::string &VarName, std::string &Case, 66 std::string &BitOffsetCase, 67 CodeGenTarget &Target); 68 69 void emitInstructionBaseValues( 70 raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions, 71 CodeGenTarget &Target, int HwMode = -1); 72 void 73 emitCaseMap(raw_ostream &o, 74 const std::map<std::string, std::vector<std::string>> &CaseMap); 75 unsigned BitWidth = 0u; 76 bool UseAPInt = false; 77 }; 78 79 // If the VarBitInit at position 'bit' matches the specified variable then 80 // return the variable bit position. Otherwise return -1. 81 int CodeEmitterGen::getVariableBit(const std::string &VarName, 82 BitsInit *BI, int bit) { 83 if (VarBitInit *VBI = dyn_cast<VarBitInit>(BI->getBit(bit))) { 84 if (VarInit *VI = dyn_cast<VarInit>(VBI->getBitVar())) 85 if (VI->getName() == VarName) 86 return VBI->getBitNum(); 87 } else if (VarInit *VI = dyn_cast<VarInit>(BI->getBit(bit))) { 88 if (VI->getName() == VarName) 89 return 0; 90 } 91 92 return -1; 93 } 94 95 // Returns true if it succeeds, false if an error. 96 bool CodeEmitterGen::addCodeToMergeInOperand(Record *R, BitsInit *BI, 97 const std::string &VarName, 98 std::string &Case, 99 std::string &BitOffsetCase, 100 CodeGenTarget &Target) { 101 CodeGenInstruction &CGI = Target.getInstruction(R); 102 103 // Determine if VarName actually contributes to the Inst encoding. 104 int bit = BI->getNumBits()-1; 105 106 // Scan for a bit that this contributed to. 107 for (; bit >= 0; ) { 108 if (getVariableBit(VarName, BI, bit) != -1) 109 break; 110 111 --bit; 112 } 113 114 // If we found no bits, ignore this value, otherwise emit the call to get the 115 // operand encoding. 116 if (bit < 0) 117 return true; 118 119 // If the operand matches by name, reference according to that 120 // operand number. Non-matching operands are assumed to be in 121 // order. 122 unsigned OpIdx; 123 std::pair<unsigned, unsigned> SubOp; 124 if (CGI.Operands.hasSubOperandAlias(VarName, SubOp)) { 125 OpIdx = CGI.Operands[SubOp.first].MIOperandNo + SubOp.second; 126 } else if (CGI.Operands.hasOperandNamed(VarName, OpIdx)) { 127 // Get the machine operand number for the indicated operand. 128 OpIdx = CGI.Operands[OpIdx].MIOperandNo; 129 } else { 130 PrintError(R, Twine("No operand named ") + VarName + " in record " + R->getName()); 131 return false; 132 } 133 134 if (CGI.Operands.isFlatOperandNotEmitted(OpIdx)) { 135 PrintError(R, "Operand " + VarName + " used but also marked as not emitted!"); 136 return false; 137 } 138 139 std::pair<unsigned, unsigned> SO = CGI.Operands.getSubOperandNumber(OpIdx); 140 std::string &EncoderMethodName = 141 CGI.Operands[SO.first].EncoderMethodNames[SO.second]; 142 143 if (UseAPInt) 144 Case += " op.clearAllBits();\n"; 145 146 Case += " // op: " + VarName + "\n"; 147 148 // If the source operand has a custom encoder, use it. 149 if (!EncoderMethodName.empty()) { 150 if (UseAPInt) { 151 Case += " " + EncoderMethodName + "(MI, " + utostr(OpIdx); 152 Case += ", op"; 153 } else { 154 Case += " op = " + EncoderMethodName + "(MI, " + utostr(OpIdx); 155 } 156 Case += ", Fixups, STI);\n"; 157 } else { 158 if (UseAPInt) { 159 Case += " getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")"; 160 Case += ", op, Fixups, STI"; 161 } else { 162 Case += " op = getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")"; 163 Case += ", Fixups, STI"; 164 } 165 Case += ");\n"; 166 } 167 168 // Precalculate the number of lits this variable contributes to in the 169 // operand. If there is a single lit (consecutive range of bits) we can use a 170 // destructive sequence on APInt that reduces memory allocations. 171 int numOperandLits = 0; 172 for (int tmpBit = bit; tmpBit >= 0;) { 173 int varBit = getVariableBit(VarName, BI, tmpBit); 174 175 // If this bit isn't from a variable, skip it. 176 if (varBit == -1) { 177 --tmpBit; 178 continue; 179 } 180 181 // Figure out the consecutive range of bits covered by this operand, in 182 // order to generate better encoding code. 183 int beginVarBit = varBit; 184 int N = 1; 185 for (--tmpBit; tmpBit >= 0;) { 186 varBit = getVariableBit(VarName, BI, tmpBit); 187 if (varBit == -1 || varBit != (beginVarBit - N)) 188 break; 189 ++N; 190 --tmpBit; 191 } 192 ++numOperandLits; 193 } 194 195 unsigned BitOffset = -1; 196 for (; bit >= 0; ) { 197 int varBit = getVariableBit(VarName, BI, bit); 198 199 // If this bit isn't from a variable, skip it. 200 if (varBit == -1) { 201 --bit; 202 continue; 203 } 204 205 // Figure out the consecutive range of bits covered by this operand, in 206 // order to generate better encoding code. 207 int beginInstBit = bit; 208 int beginVarBit = varBit; 209 int N = 1; 210 for (--bit; bit >= 0;) { 211 varBit = getVariableBit(VarName, BI, bit); 212 if (varBit == -1 || varBit != (beginVarBit - N)) break; 213 ++N; 214 --bit; 215 } 216 217 std::string maskStr; 218 int opShift; 219 220 unsigned loBit = beginVarBit - N + 1; 221 unsigned hiBit = loBit + N; 222 unsigned loInstBit = beginInstBit - N + 1; 223 BitOffset = loInstBit; 224 if (UseAPInt) { 225 std::string extractStr; 226 if (N >= 64) { 227 extractStr = "op.extractBits(" + itostr(hiBit - loBit) + ", " + 228 itostr(loBit) + ")"; 229 Case += " Value.insertBits(" + extractStr + ", " + 230 itostr(loInstBit) + ");\n"; 231 } else { 232 extractStr = "op.extractBitsAsZExtValue(" + itostr(hiBit - loBit) + 233 ", " + itostr(loBit) + ")"; 234 Case += " Value.insertBits(" + extractStr + ", " + 235 itostr(loInstBit) + ", " + itostr(hiBit - loBit) + ");\n"; 236 } 237 } else { 238 uint64_t opMask = ~(uint64_t)0 >> (64 - N); 239 opShift = beginVarBit - N + 1; 240 opMask <<= opShift; 241 maskStr = "UINT64_C(" + utostr(opMask) + ")"; 242 opShift = beginInstBit - beginVarBit; 243 244 if (numOperandLits == 1) { 245 Case += " op &= " + maskStr + ";\n"; 246 if (opShift > 0) { 247 Case += " op <<= " + itostr(opShift) + ";\n"; 248 } else if (opShift < 0) { 249 Case += " op >>= " + itostr(-opShift) + ";\n"; 250 } 251 Case += " Value |= op;\n"; 252 } else { 253 if (opShift > 0) { 254 Case += " Value |= (op & " + maskStr + ") << " + 255 itostr(opShift) + ";\n"; 256 } else if (opShift < 0) { 257 Case += " Value |= (op & " + maskStr + ") >> " + 258 itostr(-opShift) + ";\n"; 259 } else { 260 Case += " Value |= (op & " + maskStr + ");\n"; 261 } 262 } 263 } 264 } 265 266 if (BitOffset != (unsigned)-1) { 267 BitOffsetCase += " case " + utostr(OpIdx) + ":\n"; 268 BitOffsetCase += " // op: " + VarName + "\n"; 269 BitOffsetCase += " return " + utostr(BitOffset) + ";\n"; 270 } 271 272 return true; 273 } 274 275 std::pair<std::string, std::string> 276 CodeEmitterGen::getInstructionCases(Record *R, CodeGenTarget &Target) { 277 std::string Case, BitOffsetCase; 278 279 auto append = [&](const char *S) { 280 Case += S; 281 BitOffsetCase += S; 282 }; 283 284 if (const RecordVal *RV = R->getValue("EncodingInfos")) { 285 if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 286 const CodeGenHwModes &HWM = Target.getHwModes(); 287 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 288 append(" switch (HwMode) {\n"); 289 append(" default: llvm_unreachable(\"Unhandled HwMode\");\n"); 290 for (auto &KV : EBM) { 291 append((" case " + itostr(KV.first) + ": {\n").c_str()); 292 addInstructionCasesForEncoding(R, KV.second, Target, Case, 293 BitOffsetCase); 294 append(" break;\n"); 295 append(" }\n"); 296 } 297 append(" }\n"); 298 return std::make_pair(std::move(Case), std::move(BitOffsetCase)); 299 } 300 } 301 addInstructionCasesForEncoding(R, R, Target, Case, BitOffsetCase); 302 return std::make_pair(std::move(Case), std::move(BitOffsetCase)); 303 } 304 305 void CodeEmitterGen::addInstructionCasesForEncoding( 306 Record *R, Record *EncodingDef, CodeGenTarget &Target, std::string &Case, 307 std::string &BitOffsetCase) { 308 BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst"); 309 310 // Loop over all of the fields in the instruction, determining which are the 311 // operands to the instruction. 312 bool Success = true; 313 size_t OrigBitOffsetCaseSize = BitOffsetCase.size(); 314 BitOffsetCase += " switch (OpNum) {\n"; 315 size_t BitOffsetCaseSizeBeforeLoop = BitOffsetCase.size(); 316 for (const RecordVal &RV : EncodingDef->getValues()) { 317 // Ignore fixed fields in the record, we're looking for values like: 318 // bits<5> RST = { ?, ?, ?, ?, ? }; 319 if (RV.isNonconcreteOK() || RV.getValue()->isComplete()) 320 continue; 321 322 Success &= addCodeToMergeInOperand(R, BI, std::string(RV.getName()), Case, 323 BitOffsetCase, Target); 324 } 325 // Avoid empty switches. 326 if (BitOffsetCase.size() == BitOffsetCaseSizeBeforeLoop) 327 BitOffsetCase.resize(OrigBitOffsetCaseSize); 328 else 329 BitOffsetCase += " }\n"; 330 331 if (!Success) { 332 // Dump the record, so we can see what's going on... 333 std::string E; 334 raw_string_ostream S(E); 335 S << "Dumping record for previous error:\n"; 336 S << *R; 337 PrintNote(E); 338 } 339 340 StringRef PostEmitter = R->getValueAsString("PostEncoderMethod"); 341 if (!PostEmitter.empty()) { 342 Case += " Value = "; 343 Case += PostEmitter; 344 Case += "(MI, Value"; 345 Case += ", STI"; 346 Case += ");\n"; 347 } 348 } 349 350 static void emitInstBits(raw_ostream &OS, const APInt &Bits) { 351 for (unsigned I = 0; I < Bits.getNumWords(); ++I) 352 OS << ((I > 0) ? ", " : "") << "UINT64_C(" << utostr(Bits.getRawData()[I]) 353 << ")"; 354 } 355 356 void CodeEmitterGen::emitInstructionBaseValues( 357 raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions, 358 CodeGenTarget &Target, int HwMode) { 359 const CodeGenHwModes &HWM = Target.getHwModes(); 360 if (HwMode == -1) 361 o << " static const uint64_t InstBits[] = {\n"; 362 else 363 o << " static const uint64_t InstBits_" << HWM.getMode(HwMode).Name 364 << "[] = {\n"; 365 366 for (const CodeGenInstruction *CGI : NumberedInstructions) { 367 Record *R = CGI->TheDef; 368 369 if (R->getValueAsString("Namespace") == "TargetOpcode" || 370 R->getValueAsBit("isPseudo")) { 371 o << " "; emitInstBits(o, APInt(BitWidth, 0)); o << ",\n"; 372 continue; 373 } 374 375 Record *EncodingDef = R; 376 if (const RecordVal *RV = R->getValue("EncodingInfos")) { 377 if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 378 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 379 if (EBM.hasMode(HwMode)) 380 EncodingDef = EBM.get(HwMode); 381 } 382 } 383 BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst"); 384 385 // Start by filling in fixed values. 386 APInt Value(BitWidth, 0); 387 for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) { 388 if (auto *B = dyn_cast<BitInit>(BI->getBit(i)); B && B->getValue()) 389 Value.setBit(i); 390 } 391 o << " "; 392 emitInstBits(o, Value); 393 o << "," << '\t' << "// " << R->getName() << "\n"; 394 } 395 o << " UINT64_C(0)\n };\n"; 396 } 397 398 void CodeEmitterGen::emitCaseMap( 399 raw_ostream &o, 400 const std::map<std::string, std::vector<std::string>> &CaseMap) { 401 std::map<std::string, std::vector<std::string>>::const_iterator IE, EE; 402 for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) { 403 const std::string &Case = IE->first; 404 const std::vector<std::string> &InstList = IE->second; 405 406 for (int i = 0, N = InstList.size(); i < N; i++) { 407 if (i) 408 o << "\n"; 409 o << " case " << InstList[i] << ":"; 410 } 411 o << " {\n"; 412 o << Case; 413 o << " break;\n" 414 << " }\n"; 415 } 416 } 417 418 void CodeEmitterGen::run(raw_ostream &o) { 419 emitSourceFileHeader("Machine Code Emitter", o); 420 421 CodeGenTarget Target(Records); 422 std::vector<Record*> Insts = Records.getAllDerivedDefinitions("Instruction"); 423 424 // For little-endian instruction bit encodings, reverse the bit order 425 Target.reverseBitsForLittleEndianEncoding(); 426 427 ArrayRef<const CodeGenInstruction*> NumberedInstructions = 428 Target.getInstructionsByEnumValue(); 429 430 if (any_of(NumberedInstructions, [](const CodeGenInstruction *CGI) { 431 Record *R = CGI->TheDef; 432 return R->getValue("Inst") && isa<DagInit>(R->getValueInit("Inst")); 433 })) { 434 emitVarLenCodeEmitter(Records, o); 435 } else { 436 const CodeGenHwModes &HWM = Target.getHwModes(); 437 // The set of HwModes used by instruction encodings. 438 std::set<unsigned> HwModes; 439 BitWidth = 0; 440 for (const CodeGenInstruction *CGI : NumberedInstructions) { 441 Record *R = CGI->TheDef; 442 if (R->getValueAsString("Namespace") == "TargetOpcode" || 443 R->getValueAsBit("isPseudo")) 444 continue; 445 446 if (const RecordVal *RV = R->getValue("EncodingInfos")) { 447 if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 448 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 449 for (auto &KV : EBM) { 450 BitsInit *BI = KV.second->getValueAsBitsInit("Inst"); 451 BitWidth = std::max(BitWidth, BI->getNumBits()); 452 HwModes.insert(KV.first); 453 } 454 continue; 455 } 456 } 457 BitsInit *BI = R->getValueAsBitsInit("Inst"); 458 BitWidth = std::max(BitWidth, BI->getNumBits()); 459 } 460 UseAPInt = BitWidth > 64; 461 462 // Emit function declaration 463 if (UseAPInt) { 464 o << "void " << Target.getName() 465 << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" 466 << " SmallVectorImpl<MCFixup> &Fixups,\n" 467 << " APInt &Inst,\n" 468 << " APInt &Scratch,\n" 469 << " const MCSubtargetInfo &STI) const {\n"; 470 } else { 471 o << "uint64_t " << Target.getName(); 472 o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" 473 << " SmallVectorImpl<MCFixup> &Fixups,\n" 474 << " const MCSubtargetInfo &STI) const {\n"; 475 } 476 477 // Emit instruction base values 478 if (HwModes.empty()) { 479 emitInstructionBaseValues(o, NumberedInstructions, Target, -1); 480 } else { 481 for (unsigned HwMode : HwModes) 482 emitInstructionBaseValues(o, NumberedInstructions, Target, (int)HwMode); 483 } 484 485 if (!HwModes.empty()) { 486 o << " const uint64_t *InstBits;\n"; 487 o << " unsigned HwMode = STI.getHwMode();\n"; 488 o << " switch (HwMode) {\n"; 489 o << " default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n"; 490 for (unsigned I : HwModes) { 491 o << " case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name 492 << "; break;\n"; 493 } 494 o << " };\n"; 495 } 496 497 // Map to accumulate all the cases. 498 std::map<std::string, std::vector<std::string>> CaseMap; 499 std::map<std::string, std::vector<std::string>> BitOffsetCaseMap; 500 501 // Construct all cases statement for each opcode 502 for (Record *R : Insts) { 503 if (R->getValueAsString("Namespace") == "TargetOpcode" || 504 R->getValueAsBit("isPseudo")) 505 continue; 506 std::string InstName = 507 (R->getValueAsString("Namespace") + "::" + R->getName()).str(); 508 std::string Case, BitOffsetCase; 509 std::tie(Case, BitOffsetCase) = getInstructionCases(R, Target); 510 511 CaseMap[Case].push_back(InstName); 512 BitOffsetCaseMap[BitOffsetCase].push_back(std::move(InstName)); 513 } 514 515 // Emit initial function code 516 if (UseAPInt) { 517 int NumWords = APInt::getNumWords(BitWidth); 518 o << " const unsigned opcode = MI.getOpcode();\n" 519 << " if (Scratch.getBitWidth() != " << BitWidth << ")\n" 520 << " Scratch = Scratch.zext(" << BitWidth << ");\n" 521 << " Inst = APInt(" << BitWidth << ", ArrayRef(InstBits + opcode * " 522 << NumWords << ", " << NumWords << "));\n" 523 << " APInt &Value = Inst;\n" 524 << " APInt &op = Scratch;\n" 525 << " switch (opcode) {\n"; 526 } else { 527 o << " const unsigned opcode = MI.getOpcode();\n" 528 << " uint64_t Value = InstBits[opcode];\n" 529 << " uint64_t op = 0;\n" 530 << " (void)op; // suppress warning\n" 531 << " switch (opcode) {\n"; 532 } 533 534 // Emit each case statement 535 emitCaseMap(o, CaseMap); 536 537 // Default case: unhandled opcode 538 o << " default:\n" 539 << " std::string msg;\n" 540 << " raw_string_ostream Msg(msg);\n" 541 << " Msg << \"Not supported instr: \" << MI;\n" 542 << " report_fatal_error(Msg.str().c_str());\n" 543 << " }\n"; 544 if (UseAPInt) 545 o << " Inst = Value;\n"; 546 else 547 o << " return Value;\n"; 548 o << "}\n\n"; 549 550 o << "#ifdef GET_OPERAND_BIT_OFFSET\n" 551 << "#undef GET_OPERAND_BIT_OFFSET\n\n" 552 << "uint32_t " << Target.getName() 553 << "MCCodeEmitter::getOperandBitOffset(const MCInst &MI,\n" 554 << " unsigned OpNum,\n" 555 << " const MCSubtargetInfo &STI) const {\n" 556 << " switch (MI.getOpcode()) {\n"; 557 emitCaseMap(o, BitOffsetCaseMap); 558 o << " }\n" 559 << " std::string msg;\n" 560 << " raw_string_ostream Msg(msg);\n" 561 << " Msg << \"Not supported instr[opcode]: \" << MI << \"[\" << OpNum " 562 "<< \"]\";\n" 563 << " report_fatal_error(Msg.str().c_str());\n" 564 << "}\n\n" 565 << "#endif // GET_OPERAND_BIT_OFFSET\n\n"; 566 } 567 } 568 569 } // end anonymous namespace 570 571 static TableGen::Emitter::OptClass<CodeEmitterGen> 572 X("gen-emitter", "Generate machine code emitter"); 573