1 //===- CodeEmitterGen.cpp - Code Emitter Generator ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // CodeEmitterGen uses the descriptions of instructions and their fields to 10 // construct an automated code emitter: a function that, given a MachineInstr, 11 // returns the (currently, 32-bit unsigned) value of the instruction. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "CodeGenInstruction.h" 16 #include "CodeGenTarget.h" 17 #include "SubtargetFeatureInfo.h" 18 #include "Types.h" 19 #include "VarLenCodeEmitterGen.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/StringExtras.h" 23 #include "llvm/Support/Casting.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include "llvm/TableGen/Error.h" 26 #include "llvm/TableGen/Record.h" 27 #include "llvm/TableGen/TableGenBackend.h" 28 #include <cstdint> 29 #include <map> 30 #include <set> 31 #include <string> 32 #include <utility> 33 #include <vector> 34 35 using namespace llvm; 36 37 namespace { 38 39 class CodeEmitterGen { 40 RecordKeeper &Records; 41 42 public: 43 CodeEmitterGen(RecordKeeper &R) : Records(R) {} 44 45 void run(raw_ostream &o); 46 47 private: 48 int getVariableBit(const std::string &VarName, BitsInit *BI, int bit); 49 std::string getInstructionCase(Record *R, CodeGenTarget &Target); 50 std::string getInstructionCaseForEncoding(Record *R, Record *EncodingDef, 51 CodeGenTarget &Target); 52 bool addCodeToMergeInOperand(Record *R, BitsInit *BI, 53 const std::string &VarName, unsigned &NumberedOp, 54 std::set<unsigned> &NamedOpIndices, 55 std::string &Case, CodeGenTarget &Target); 56 57 void emitInstructionBaseValues( 58 raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions, 59 CodeGenTarget &Target, int HwMode = -1); 60 unsigned BitWidth; 61 bool UseAPInt; 62 }; 63 64 // If the VarBitInit at position 'bit' matches the specified variable then 65 // return the variable bit position. Otherwise return -1. 66 int CodeEmitterGen::getVariableBit(const std::string &VarName, 67 BitsInit *BI, int bit) { 68 if (VarBitInit *VBI = dyn_cast<VarBitInit>(BI->getBit(bit))) { 69 if (VarInit *VI = dyn_cast<VarInit>(VBI->getBitVar())) 70 if (VI->getName() == VarName) 71 return VBI->getBitNum(); 72 } else if (VarInit *VI = dyn_cast<VarInit>(BI->getBit(bit))) { 73 if (VI->getName() == VarName) 74 return 0; 75 } 76 77 return -1; 78 } 79 80 // Returns true if it succeeds, false if an error. 81 bool CodeEmitterGen::addCodeToMergeInOperand(Record *R, BitsInit *BI, 82 const std::string &VarName, 83 unsigned &NumberedOp, 84 std::set<unsigned> &NamedOpIndices, 85 std::string &Case, 86 CodeGenTarget &Target) { 87 CodeGenInstruction &CGI = Target.getInstruction(R); 88 89 // Determine if VarName actually contributes to the Inst encoding. 90 int bit = BI->getNumBits()-1; 91 92 // Scan for a bit that this contributed to. 93 for (; bit >= 0; ) { 94 if (getVariableBit(VarName, BI, bit) != -1) 95 break; 96 97 --bit; 98 } 99 100 // If we found no bits, ignore this value, otherwise emit the call to get the 101 // operand encoding. 102 if (bit < 0) 103 return true; 104 105 // If the operand matches by name, reference according to that 106 // operand number. Non-matching operands are assumed to be in 107 // order. 108 unsigned OpIdx; 109 std::pair<unsigned, unsigned> SubOp; 110 if (CGI.Operands.hasSubOperandAlias(VarName, SubOp)) { 111 OpIdx = CGI.Operands[SubOp.first].MIOperandNo + SubOp.second; 112 } else if (CGI.Operands.hasOperandNamed(VarName, OpIdx)) { 113 // Get the machine operand number for the indicated operand. 114 OpIdx = CGI.Operands[OpIdx].MIOperandNo; 115 } else { 116 // Fall back to positional lookup. By default, we now disable positional 117 // lookup (and print an error, below), but even so, we'll do the lookup to 118 // help print a helpful diagnostic message. 119 // 120 // TODO: When we remove useDeprecatedPositionallyEncodedOperands, delete all 121 // this code, just leaving a "no operand named X in record Y" error. 122 123 unsigned NumberOps = CGI.Operands.size(); 124 /// If this operand is not supposed to be emitted by the 125 /// generated emitter, skip it. 126 while (NumberedOp < NumberOps && 127 (CGI.Operands.isFlatOperandNotEmitted(NumberedOp) || 128 (!NamedOpIndices.empty() && NamedOpIndices.count( 129 CGI.Operands.getSubOperandNumber(NumberedOp).first)))) { 130 ++NumberedOp; 131 } 132 133 if (NumberedOp >= 134 CGI.Operands.back().MIOperandNo + CGI.Operands.back().MINumOperands) { 135 if (!Target.getInstructionSet()->getValueAsBit( 136 "useDeprecatedPositionallyEncodedOperands")) { 137 PrintError(R, Twine("No operand named ") + VarName + " in record " + 138 R->getName() + 139 " (would've given 'too few operands' error with " 140 "useDeprecatedPositionallyEncodedOperands=true)"); 141 } else { 142 PrintError(R, "Too few operands in record " + R->getName() + 143 " (no match for variable " + VarName + ")"); 144 } 145 return false; 146 } 147 148 OpIdx = NumberedOp++; 149 150 if (!Target.getInstructionSet()->getValueAsBit( 151 "useDeprecatedPositionallyEncodedOperands")) { 152 std::pair<unsigned, unsigned> SO = 153 CGI.Operands.getSubOperandNumber(OpIdx); 154 std::string OpName = CGI.Operands[SO.first].Name; 155 PrintError(R, Twine("No operand named ") + VarName + " in record " + 156 R->getName() + " (would've used positional operand #" + 157 Twine(SO.first) + " ('" + OpName + "') sub-op #" + 158 Twine(SO.second) + 159 " with useDeprecatedPositionallyEncodedOperands=true)"); 160 return false; 161 } 162 } 163 164 if (CGI.Operands.isFlatOperandNotEmitted(OpIdx)) { 165 PrintError(R, "Operand " + VarName + " used but also marked as not emitted!"); 166 return false; 167 } 168 169 std::pair<unsigned, unsigned> SO = CGI.Operands.getSubOperandNumber(OpIdx); 170 std::string &EncoderMethodName = 171 CGI.Operands[SO.first].EncoderMethodNames[SO.second]; 172 173 if (UseAPInt) 174 Case += " op.clearAllBits();\n"; 175 176 Case += " // op: " + VarName + "\n"; 177 178 // If the source operand has a custom encoder, use it. 179 if (!EncoderMethodName.empty()) { 180 if (UseAPInt) { 181 Case += " " + EncoderMethodName + "(MI, " + utostr(OpIdx); 182 Case += ", op"; 183 } else { 184 Case += " op = " + EncoderMethodName + "(MI, " + utostr(OpIdx); 185 } 186 Case += ", Fixups, STI);\n"; 187 } else { 188 if (UseAPInt) { 189 Case += " getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")"; 190 Case += ", op, Fixups, STI"; 191 } else { 192 Case += " op = getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")"; 193 Case += ", Fixups, STI"; 194 } 195 Case += ");\n"; 196 } 197 198 // Precalculate the number of lits this variable contributes to in the 199 // operand. If there is a single lit (consecutive range of bits) we can use a 200 // destructive sequence on APInt that reduces memory allocations. 201 int numOperandLits = 0; 202 for (int tmpBit = bit; tmpBit >= 0;) { 203 int varBit = getVariableBit(VarName, BI, tmpBit); 204 205 // If this bit isn't from a variable, skip it. 206 if (varBit == -1) { 207 --tmpBit; 208 continue; 209 } 210 211 // Figure out the consecutive range of bits covered by this operand, in 212 // order to generate better encoding code. 213 int beginVarBit = varBit; 214 int N = 1; 215 for (--tmpBit; tmpBit >= 0;) { 216 varBit = getVariableBit(VarName, BI, tmpBit); 217 if (varBit == -1 || varBit != (beginVarBit - N)) 218 break; 219 ++N; 220 --tmpBit; 221 } 222 ++numOperandLits; 223 } 224 225 for (; bit >= 0; ) { 226 int varBit = getVariableBit(VarName, BI, bit); 227 228 // If this bit isn't from a variable, skip it. 229 if (varBit == -1) { 230 --bit; 231 continue; 232 } 233 234 // Figure out the consecutive range of bits covered by this operand, in 235 // order to generate better encoding code. 236 int beginInstBit = bit; 237 int beginVarBit = varBit; 238 int N = 1; 239 for (--bit; bit >= 0;) { 240 varBit = getVariableBit(VarName, BI, bit); 241 if (varBit == -1 || varBit != (beginVarBit - N)) break; 242 ++N; 243 --bit; 244 } 245 246 std::string maskStr; 247 int opShift; 248 249 unsigned loBit = beginVarBit - N + 1; 250 unsigned hiBit = loBit + N; 251 unsigned loInstBit = beginInstBit - N + 1; 252 if (UseAPInt) { 253 std::string extractStr; 254 if (N >= 64) { 255 extractStr = "op.extractBits(" + itostr(hiBit - loBit) + ", " + 256 itostr(loBit) + ")"; 257 Case += " Value.insertBits(" + extractStr + ", " + 258 itostr(loInstBit) + ");\n"; 259 } else { 260 extractStr = "op.extractBitsAsZExtValue(" + itostr(hiBit - loBit) + 261 ", " + itostr(loBit) + ")"; 262 Case += " Value.insertBits(" + extractStr + ", " + 263 itostr(loInstBit) + ", " + itostr(hiBit - loBit) + ");\n"; 264 } 265 } else { 266 uint64_t opMask = ~(uint64_t)0 >> (64 - N); 267 opShift = beginVarBit - N + 1; 268 opMask <<= opShift; 269 maskStr = "UINT64_C(" + utostr(opMask) + ")"; 270 opShift = beginInstBit - beginVarBit; 271 272 if (numOperandLits == 1) { 273 Case += " op &= " + maskStr + ";\n"; 274 if (opShift > 0) { 275 Case += " op <<= " + itostr(opShift) + ";\n"; 276 } else if (opShift < 0) { 277 Case += " op >>= " + itostr(-opShift) + ";\n"; 278 } 279 Case += " Value |= op;\n"; 280 } else { 281 if (opShift > 0) { 282 Case += " Value |= (op & " + maskStr + ") << " + 283 itostr(opShift) + ";\n"; 284 } else if (opShift < 0) { 285 Case += " Value |= (op & " + maskStr + ") >> " + 286 itostr(-opShift) + ";\n"; 287 } else { 288 Case += " Value |= (op & " + maskStr + ");\n"; 289 } 290 } 291 } 292 } 293 return true; 294 } 295 296 std::string CodeEmitterGen::getInstructionCase(Record *R, 297 CodeGenTarget &Target) { 298 std::string Case; 299 if (const RecordVal *RV = R->getValue("EncodingInfos")) { 300 if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 301 const CodeGenHwModes &HWM = Target.getHwModes(); 302 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 303 Case += " switch (HwMode) {\n"; 304 Case += " default: llvm_unreachable(\"Unhandled HwMode\");\n"; 305 for (auto &KV : EBM) { 306 Case += " case " + itostr(KV.first) + ": {\n"; 307 Case += getInstructionCaseForEncoding(R, KV.second, Target); 308 Case += " break;\n"; 309 Case += " }\n"; 310 } 311 Case += " }\n"; 312 return Case; 313 } 314 } 315 return getInstructionCaseForEncoding(R, R, Target); 316 } 317 318 std::string CodeEmitterGen::getInstructionCaseForEncoding(Record *R, Record *EncodingDef, 319 CodeGenTarget &Target) { 320 std::string Case; 321 BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst"); 322 unsigned NumberedOp = 0; 323 std::set<unsigned> NamedOpIndices; 324 325 // Collect the set of operand indices that might correspond to named 326 // operand, and skip these when assigning operands based on position. 327 if (Target.getInstructionSet()-> 328 getValueAsBit("noNamedPositionallyEncodedOperands")) { 329 CodeGenInstruction &CGI = Target.getInstruction(R); 330 for (const RecordVal &RV : R->getValues()) { 331 unsigned OpIdx; 332 if (!CGI.Operands.hasOperandNamed(RV.getName(), OpIdx)) 333 continue; 334 335 NamedOpIndices.insert(OpIdx); 336 } 337 } 338 339 // Loop over all of the fields in the instruction, determining which are the 340 // operands to the instruction. 341 bool Success = true; 342 for (const RecordVal &RV : EncodingDef->getValues()) { 343 // Ignore fixed fields in the record, we're looking for values like: 344 // bits<5> RST = { ?, ?, ?, ?, ? }; 345 if (RV.isNonconcreteOK() || RV.getValue()->isComplete()) 346 continue; 347 348 Success &= 349 addCodeToMergeInOperand(R, BI, std::string(RV.getName()), NumberedOp, 350 NamedOpIndices, Case, Target); 351 } 352 353 if (!Success) { 354 // Dump the record, so we can see what's going on... 355 std::string E; 356 raw_string_ostream S(E); 357 S << "Dumping record for previous error:\n"; 358 S << *R; 359 PrintNote(E); 360 } 361 362 StringRef PostEmitter = R->getValueAsString("PostEncoderMethod"); 363 if (!PostEmitter.empty()) { 364 Case += " Value = "; 365 Case += PostEmitter; 366 Case += "(MI, Value"; 367 Case += ", STI"; 368 Case += ");\n"; 369 } 370 371 return Case; 372 } 373 374 static void emitInstBits(raw_ostream &OS, const APInt &Bits) { 375 for (unsigned I = 0; I < Bits.getNumWords(); ++I) 376 OS << ((I > 0) ? ", " : "") << "UINT64_C(" << utostr(Bits.getRawData()[I]) 377 << ")"; 378 } 379 380 void CodeEmitterGen::emitInstructionBaseValues( 381 raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions, 382 CodeGenTarget &Target, int HwMode) { 383 const CodeGenHwModes &HWM = Target.getHwModes(); 384 if (HwMode == -1) 385 o << " static const uint64_t InstBits[] = {\n"; 386 else 387 o << " static const uint64_t InstBits_" << HWM.getMode(HwMode).Name 388 << "[] = {\n"; 389 390 for (const CodeGenInstruction *CGI : NumberedInstructions) { 391 Record *R = CGI->TheDef; 392 393 if (R->getValueAsString("Namespace") == "TargetOpcode" || 394 R->getValueAsBit("isPseudo")) { 395 o << " "; emitInstBits(o, APInt(BitWidth, 0)); o << ",\n"; 396 continue; 397 } 398 399 Record *EncodingDef = R; 400 if (const RecordVal *RV = R->getValue("EncodingInfos")) { 401 if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 402 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 403 if (EBM.hasMode(HwMode)) 404 EncodingDef = EBM.get(HwMode); 405 } 406 } 407 BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst"); 408 409 // Start by filling in fixed values. 410 APInt Value(BitWidth, 0); 411 for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) { 412 if (auto *B = dyn_cast<BitInit>(BI->getBit(i)); B && B->getValue()) 413 Value.setBit(i); 414 } 415 o << " "; 416 emitInstBits(o, Value); 417 o << "," << '\t' << "// " << R->getName() << "\n"; 418 } 419 o << " UINT64_C(0)\n };\n"; 420 } 421 422 void CodeEmitterGen::run(raw_ostream &o) { 423 CodeGenTarget Target(Records); 424 std::vector<Record*> Insts = Records.getAllDerivedDefinitions("Instruction"); 425 426 // For little-endian instruction bit encodings, reverse the bit order 427 Target.reverseBitsForLittleEndianEncoding(); 428 429 ArrayRef<const CodeGenInstruction*> NumberedInstructions = 430 Target.getInstructionsByEnumValue(); 431 432 if (any_of(NumberedInstructions, [](const CodeGenInstruction *CGI) { 433 Record *R = CGI->TheDef; 434 return R->getValue("Inst") && isa<DagInit>(R->getValueInit("Inst")); 435 })) { 436 emitVarLenCodeEmitter(Records, o); 437 } else { 438 const CodeGenHwModes &HWM = Target.getHwModes(); 439 // The set of HwModes used by instruction encodings. 440 std::set<unsigned> HwModes; 441 BitWidth = 0; 442 for (const CodeGenInstruction *CGI : NumberedInstructions) { 443 Record *R = CGI->TheDef; 444 if (R->getValueAsString("Namespace") == "TargetOpcode" || 445 R->getValueAsBit("isPseudo")) 446 continue; 447 448 if (const RecordVal *RV = R->getValue("EncodingInfos")) { 449 if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 450 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 451 for (auto &KV : EBM) { 452 BitsInit *BI = KV.second->getValueAsBitsInit("Inst"); 453 BitWidth = std::max(BitWidth, BI->getNumBits()); 454 HwModes.insert(KV.first); 455 } 456 continue; 457 } 458 } 459 BitsInit *BI = R->getValueAsBitsInit("Inst"); 460 BitWidth = std::max(BitWidth, BI->getNumBits()); 461 } 462 UseAPInt = BitWidth > 64; 463 464 // Emit function declaration 465 if (UseAPInt) { 466 o << "void " << Target.getName() 467 << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" 468 << " SmallVectorImpl<MCFixup> &Fixups,\n" 469 << " APInt &Inst,\n" 470 << " APInt &Scratch,\n" 471 << " const MCSubtargetInfo &STI) const {\n"; 472 } else { 473 o << "uint64_t " << Target.getName(); 474 o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" 475 << " SmallVectorImpl<MCFixup> &Fixups,\n" 476 << " const MCSubtargetInfo &STI) const {\n"; 477 } 478 479 // Emit instruction base values 480 if (HwModes.empty()) { 481 emitInstructionBaseValues(o, NumberedInstructions, Target, -1); 482 } else { 483 for (unsigned HwMode : HwModes) 484 emitInstructionBaseValues(o, NumberedInstructions, Target, (int)HwMode); 485 } 486 487 if (!HwModes.empty()) { 488 o << " const uint64_t *InstBits;\n"; 489 o << " unsigned HwMode = STI.getHwMode();\n"; 490 o << " switch (HwMode) {\n"; 491 o << " default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n"; 492 for (unsigned I : HwModes) { 493 o << " case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name 494 << "; break;\n"; 495 } 496 o << " };\n"; 497 } 498 499 // Map to accumulate all the cases. 500 std::map<std::string, std::vector<std::string>> CaseMap; 501 502 // Construct all cases statement for each opcode 503 for (Record *R : Insts) { 504 if (R->getValueAsString("Namespace") == "TargetOpcode" || 505 R->getValueAsBit("isPseudo")) 506 continue; 507 std::string InstName = 508 (R->getValueAsString("Namespace") + "::" + R->getName()).str(); 509 std::string Case = getInstructionCase(R, Target); 510 511 CaseMap[Case].push_back(std::move(InstName)); 512 } 513 514 // Emit initial function code 515 if (UseAPInt) { 516 int NumWords = APInt::getNumWords(BitWidth); 517 o << " const unsigned opcode = MI.getOpcode();\n" 518 << " if (Scratch.getBitWidth() != " << BitWidth << ")\n" 519 << " Scratch = Scratch.zext(" << BitWidth << ");\n" 520 << " Inst = APInt(" << BitWidth << ", ArrayRef(InstBits + opcode * " 521 << NumWords << ", " << NumWords << "));\n" 522 << " APInt &Value = Inst;\n" 523 << " APInt &op = Scratch;\n" 524 << " switch (opcode) {\n"; 525 } else { 526 o << " const unsigned opcode = MI.getOpcode();\n" 527 << " uint64_t Value = InstBits[opcode];\n" 528 << " uint64_t op = 0;\n" 529 << " (void)op; // suppress warning\n" 530 << " switch (opcode) {\n"; 531 } 532 533 // Emit each case statement 534 std::map<std::string, std::vector<std::string>>::iterator IE, EE; 535 for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) { 536 const std::string &Case = IE->first; 537 std::vector<std::string> &InstList = IE->second; 538 539 for (int i = 0, N = InstList.size(); i < N; i++) { 540 if (i) 541 o << "\n"; 542 o << " case " << InstList[i] << ":"; 543 } 544 o << " {\n"; 545 o << Case; 546 o << " break;\n" 547 << " }\n"; 548 } 549 550 // Default case: unhandled opcode 551 o << " default:\n" 552 << " std::string msg;\n" 553 << " raw_string_ostream Msg(msg);\n" 554 << " Msg << \"Not supported instr: \" << MI;\n" 555 << " report_fatal_error(Msg.str().c_str());\n" 556 << " }\n"; 557 if (UseAPInt) 558 o << " Inst = Value;\n"; 559 else 560 o << " return Value;\n"; 561 o << "}\n\n"; 562 } 563 } 564 565 } // end anonymous namespace 566 567 namespace llvm { 568 569 void EmitCodeEmitter(RecordKeeper &RK, raw_ostream &OS) { 570 emitSourceFileHeader("Machine Code Emitter", OS); 571 CodeEmitterGen(RK).run(OS); 572 } 573 574 } // end namespace llvm 575