1 //===-- WebAssemblyRegStackify.cpp - Register Stackification --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements a register stacking pass. 11 /// 12 /// This pass reorders instructions to put register uses and defs in an order 13 /// such that they form single-use expression trees. Registers fitting this form 14 /// are then marked as "stackified", meaning references to them are replaced by 15 /// "push" and "pop" from the value stack. 16 /// 17 /// This is primarily a code size optimization, since temporary values on the 18 /// value stack don't need to be named. 19 /// 20 //===----------------------------------------------------------------------===// 21 22 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_* 23 #include "WebAssembly.h" 24 #include "WebAssemblyDebugValueManager.h" 25 #include "WebAssemblyMachineFunctionInfo.h" 26 #include "WebAssemblySubtarget.h" 27 #include "WebAssemblyUtilities.h" 28 #include "llvm/CodeGen/LiveIntervals.h" 29 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" 30 #include "llvm/CodeGen/MachineDominators.h" 31 #include "llvm/CodeGen/MachineInstrBuilder.h" 32 #include "llvm/CodeGen/MachineRegisterInfo.h" 33 #include "llvm/CodeGen/Passes.h" 34 #include "llvm/IR/GlobalAlias.h" 35 #include "llvm/Support/Debug.h" 36 #include "llvm/Support/raw_ostream.h" 37 #include <iterator> 38 using namespace llvm; 39 40 #define DEBUG_TYPE "wasm-reg-stackify" 41 42 namespace { 43 class WebAssemblyRegStackify final : public MachineFunctionPass { 44 bool Optimize; 45 46 StringRef getPassName() const override { 47 return "WebAssembly Register Stackify"; 48 } 49 50 void getAnalysisUsage(AnalysisUsage &AU) const override { 51 AU.setPreservesCFG(); 52 if (Optimize) { 53 AU.addRequired<LiveIntervalsWrapperPass>(); 54 AU.addRequired<MachineDominatorTreeWrapperPass>(); 55 } 56 AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>(); 57 AU.addPreserved<SlotIndexesWrapperPass>(); 58 AU.addPreserved<LiveIntervalsWrapperPass>(); 59 AU.addPreservedID(LiveVariablesID); 60 AU.addPreserved<MachineDominatorTreeWrapperPass>(); 61 MachineFunctionPass::getAnalysisUsage(AU); 62 } 63 64 bool runOnMachineFunction(MachineFunction &MF) override; 65 66 public: 67 static char ID; // Pass identification, replacement for typeid 68 WebAssemblyRegStackify(CodeGenOptLevel OptLevel) 69 : MachineFunctionPass(ID), Optimize(OptLevel != CodeGenOptLevel::None) {} 70 WebAssemblyRegStackify() : WebAssemblyRegStackify(CodeGenOptLevel::Default) {} 71 }; 72 } // end anonymous namespace 73 74 char WebAssemblyRegStackify::ID = 0; 75 INITIALIZE_PASS(WebAssemblyRegStackify, DEBUG_TYPE, 76 "Reorder instructions to use the WebAssembly value stack", 77 false, false) 78 79 FunctionPass *llvm::createWebAssemblyRegStackify(CodeGenOptLevel OptLevel) { 80 return new WebAssemblyRegStackify(OptLevel); 81 } 82 83 // Decorate the given instruction with implicit operands that enforce the 84 // expression stack ordering constraints for an instruction which is on 85 // the expression stack. 86 static void imposeStackOrdering(MachineInstr *MI) { 87 // Write the opaque VALUE_STACK register. 88 if (!MI->definesRegister(WebAssembly::VALUE_STACK, /*TRI=*/nullptr)) 89 MI->addOperand(MachineOperand::CreateReg(WebAssembly::VALUE_STACK, 90 /*isDef=*/true, 91 /*isImp=*/true)); 92 93 // Also read the opaque VALUE_STACK register. 94 if (!MI->readsRegister(WebAssembly::VALUE_STACK, /*TRI=*/nullptr)) 95 MI->addOperand(MachineOperand::CreateReg(WebAssembly::VALUE_STACK, 96 /*isDef=*/false, 97 /*isImp=*/true)); 98 } 99 100 // Convert an IMPLICIT_DEF instruction into an instruction which defines 101 // a constant zero value. 102 static void convertImplicitDefToConstZero(MachineInstr *MI, 103 MachineRegisterInfo &MRI, 104 const TargetInstrInfo *TII, 105 MachineFunction &MF) { 106 assert(MI->getOpcode() == TargetOpcode::IMPLICIT_DEF); 107 108 const auto *RegClass = MRI.getRegClass(MI->getOperand(0).getReg()); 109 if (RegClass == &WebAssembly::I32RegClass) { 110 MI->setDesc(TII->get(WebAssembly::CONST_I32)); 111 MI->addOperand(MachineOperand::CreateImm(0)); 112 } else if (RegClass == &WebAssembly::I64RegClass) { 113 MI->setDesc(TII->get(WebAssembly::CONST_I64)); 114 MI->addOperand(MachineOperand::CreateImm(0)); 115 } else if (RegClass == &WebAssembly::F32RegClass) { 116 MI->setDesc(TII->get(WebAssembly::CONST_F32)); 117 auto *Val = cast<ConstantFP>(Constant::getNullValue( 118 Type::getFloatTy(MF.getFunction().getContext()))); 119 MI->addOperand(MachineOperand::CreateFPImm(Val)); 120 } else if (RegClass == &WebAssembly::F64RegClass) { 121 MI->setDesc(TII->get(WebAssembly::CONST_F64)); 122 auto *Val = cast<ConstantFP>(Constant::getNullValue( 123 Type::getDoubleTy(MF.getFunction().getContext()))); 124 MI->addOperand(MachineOperand::CreateFPImm(Val)); 125 } else if (RegClass == &WebAssembly::V128RegClass) { 126 MI->setDesc(TII->get(WebAssembly::CONST_V128_I64x2)); 127 MI->addOperand(MachineOperand::CreateImm(0)); 128 MI->addOperand(MachineOperand::CreateImm(0)); 129 } else { 130 llvm_unreachable("Unexpected reg class"); 131 } 132 } 133 134 // Determine whether a call to the callee referenced by 135 // MI->getOperand(CalleeOpNo) reads memory, writes memory, and/or has side 136 // effects. 137 static void queryCallee(const MachineInstr &MI, bool &Read, bool &Write, 138 bool &Effects, bool &StackPointer) { 139 // All calls can use the stack pointer. 140 StackPointer = true; 141 142 const MachineOperand &MO = WebAssembly::getCalleeOp(MI); 143 if (MO.isGlobal()) { 144 const Constant *GV = MO.getGlobal(); 145 if (const auto *GA = dyn_cast<GlobalAlias>(GV)) 146 if (!GA->isInterposable()) 147 GV = GA->getAliasee(); 148 149 if (const auto *F = dyn_cast<Function>(GV)) { 150 if (!F->doesNotThrow()) 151 Effects = true; 152 if (F->doesNotAccessMemory()) 153 return; 154 if (F->onlyReadsMemory()) { 155 Read = true; 156 return; 157 } 158 } 159 } 160 161 // Assume the worst. 162 Write = true; 163 Read = true; 164 Effects = true; 165 } 166 167 // Determine whether MI reads memory, writes memory, has side effects, 168 // and/or uses the stack pointer value. 169 static void query(const MachineInstr &MI, bool &Read, bool &Write, 170 bool &Effects, bool &StackPointer) { 171 assert(!MI.isTerminator()); 172 173 if (MI.isDebugInstr() || MI.isPosition()) 174 return; 175 176 // Check for loads. 177 if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad()) 178 Read = true; 179 180 // Check for stores. 181 if (MI.mayStore()) { 182 Write = true; 183 } else if (MI.hasOrderedMemoryRef()) { 184 switch (MI.getOpcode()) { 185 case WebAssembly::DIV_S_I32: 186 case WebAssembly::DIV_S_I64: 187 case WebAssembly::REM_S_I32: 188 case WebAssembly::REM_S_I64: 189 case WebAssembly::DIV_U_I32: 190 case WebAssembly::DIV_U_I64: 191 case WebAssembly::REM_U_I32: 192 case WebAssembly::REM_U_I64: 193 case WebAssembly::I32_TRUNC_S_F32: 194 case WebAssembly::I64_TRUNC_S_F32: 195 case WebAssembly::I32_TRUNC_S_F64: 196 case WebAssembly::I64_TRUNC_S_F64: 197 case WebAssembly::I32_TRUNC_U_F32: 198 case WebAssembly::I64_TRUNC_U_F32: 199 case WebAssembly::I32_TRUNC_U_F64: 200 case WebAssembly::I64_TRUNC_U_F64: 201 // These instruction have hasUnmodeledSideEffects() returning true 202 // because they trap on overflow and invalid so they can't be arbitrarily 203 // moved, however hasOrderedMemoryRef() interprets this plus their lack 204 // of memoperands as having a potential unknown memory reference. 205 break; 206 default: 207 // Record volatile accesses, unless it's a call, as calls are handled 208 // specially below. 209 if (!MI.isCall()) { 210 Write = true; 211 Effects = true; 212 } 213 break; 214 } 215 } 216 217 // Check for side effects. 218 if (MI.hasUnmodeledSideEffects()) { 219 switch (MI.getOpcode()) { 220 case WebAssembly::DIV_S_I32: 221 case WebAssembly::DIV_S_I64: 222 case WebAssembly::REM_S_I32: 223 case WebAssembly::REM_S_I64: 224 case WebAssembly::DIV_U_I32: 225 case WebAssembly::DIV_U_I64: 226 case WebAssembly::REM_U_I32: 227 case WebAssembly::REM_U_I64: 228 case WebAssembly::I32_TRUNC_S_F32: 229 case WebAssembly::I64_TRUNC_S_F32: 230 case WebAssembly::I32_TRUNC_S_F64: 231 case WebAssembly::I64_TRUNC_S_F64: 232 case WebAssembly::I32_TRUNC_U_F32: 233 case WebAssembly::I64_TRUNC_U_F32: 234 case WebAssembly::I32_TRUNC_U_F64: 235 case WebAssembly::I64_TRUNC_U_F64: 236 // These instructions have hasUnmodeledSideEffects() returning true 237 // because they trap on overflow and invalid so they can't be arbitrarily 238 // moved, however in the specific case of register stackifying, it is safe 239 // to move them because overflow and invalid are Undefined Behavior. 240 break; 241 default: 242 Effects = true; 243 break; 244 } 245 } 246 247 // Check for writes to __stack_pointer global. 248 if ((MI.getOpcode() == WebAssembly::GLOBAL_SET_I32 || 249 MI.getOpcode() == WebAssembly::GLOBAL_SET_I64) && 250 strcmp(MI.getOperand(0).getSymbolName(), "__stack_pointer") == 0) 251 StackPointer = true; 252 253 // Analyze calls. 254 if (MI.isCall()) { 255 queryCallee(MI, Read, Write, Effects, StackPointer); 256 } 257 } 258 259 // Test whether Def is safe and profitable to rematerialize. 260 static bool shouldRematerialize(const MachineInstr &Def, 261 const WebAssemblyInstrInfo *TII) { 262 return Def.isAsCheapAsAMove() && TII->isTriviallyReMaterializable(Def); 263 } 264 265 // Identify the definition for this register at this point. This is a 266 // generalization of MachineRegisterInfo::getUniqueVRegDef that uses 267 // LiveIntervals to handle complex cases. 268 static MachineInstr *getVRegDef(unsigned Reg, const MachineInstr *Insert, 269 const MachineRegisterInfo &MRI, 270 const LiveIntervals *LIS) { 271 // Most registers are in SSA form here so we try a quick MRI query first. 272 if (MachineInstr *Def = MRI.getUniqueVRegDef(Reg)) 273 return Def; 274 275 // MRI doesn't know what the Def is. Try asking LIS. 276 if (LIS != nullptr) { 277 SlotIndex InstIndex = LIS->getInstructionIndex(*Insert); 278 if (const VNInfo *ValNo = LIS->getInterval(Reg).getVNInfoBefore(InstIndex)) 279 return LIS->getInstructionFromIndex(ValNo->def); 280 } 281 282 return nullptr; 283 } 284 285 // Test whether Reg, as defined at Def, has exactly one use. This is a 286 // generalization of MachineRegisterInfo::hasOneNonDBGUse that uses 287 // LiveIntervals to handle complex cases in optimized code. 288 static bool hasSingleUse(unsigned Reg, MachineRegisterInfo &MRI, 289 WebAssemblyFunctionInfo &MFI, bool Optimize, 290 MachineInstr *Def, LiveIntervals *LIS) { 291 if (!Optimize) { 292 // Using "hasOneUse" instead of "hasOneNonDBGUse" here because we don't 293 // want to stackify DBG_VALUE operands - WASM stack locations are less 294 // useful and less widely supported than WASM local locations. 295 if (!MRI.hasOneUse(Reg)) 296 return false; 297 // The frame base always has an implicit DBG use as DW_AT_frame_base. 298 if (MFI.isFrameBaseVirtual() && MFI.getFrameBaseVreg() == Reg) 299 return false; 300 return true; 301 } 302 303 // Most registers are in SSA form here so we try a quick MRI query first. 304 if (MRI.hasOneNonDBGUse(Reg)) 305 return true; 306 307 if (LIS == nullptr) 308 return false; 309 310 bool HasOne = false; 311 const LiveInterval &LI = LIS->getInterval(Reg); 312 const VNInfo *DefVNI = 313 LI.getVNInfoAt(LIS->getInstructionIndex(*Def).getRegSlot()); 314 assert(DefVNI); 315 for (auto &I : MRI.use_nodbg_operands(Reg)) { 316 const auto &Result = LI.Query(LIS->getInstructionIndex(*I.getParent())); 317 if (Result.valueIn() == DefVNI) { 318 if (!Result.isKill()) 319 return false; 320 if (HasOne) 321 return false; 322 HasOne = true; 323 } 324 } 325 return HasOne; 326 } 327 328 // Test whether it's safe to move Def to just before Insert. 329 // TODO: Compute memory dependencies in a way that doesn't require always 330 // walking the block. 331 // TODO: Compute memory dependencies in a way that uses AliasAnalysis to be 332 // more precise. 333 static bool isSafeToMove(const MachineOperand *Def, const MachineOperand *Use, 334 const MachineInstr *Insert, 335 const WebAssemblyFunctionInfo &MFI, 336 const MachineRegisterInfo &MRI, bool Optimize) { 337 const MachineInstr *DefI = Def->getParent(); 338 const MachineInstr *UseI = Use->getParent(); 339 assert(DefI->getParent() == Insert->getParent()); 340 assert(UseI->getParent() == Insert->getParent()); 341 342 // The first def of a multivalue instruction can be stackified by moving, 343 // since the later defs can always be placed into locals if necessary. Later 344 // defs can only be stackified if all previous defs are already stackified 345 // since ExplicitLocals will not know how to place a def in a local if a 346 // subsequent def is stackified. But only one def can be stackified by moving 347 // the instruction, so it must be the first one. 348 // 349 // TODO: This could be loosened to be the first *live* def, but care would 350 // have to be taken to ensure the drops of the initial dead defs can be 351 // placed. This would require checking that no previous defs are used in the 352 // same instruction as subsequent defs. 353 if (Def != DefI->defs().begin()) 354 return false; 355 356 // If any subsequent def is used prior to the current value by the same 357 // instruction in which the current value is used, we cannot 358 // stackify. Stackifying in this case would require that def moving below the 359 // current def in the stack, which cannot be achieved, even with locals. 360 // Also ensure we don't sink the def past any other prior uses. 361 for (const auto &SubsequentDef : drop_begin(DefI->defs())) { 362 auto I = std::next(MachineBasicBlock::const_iterator(DefI)); 363 auto E = std::next(MachineBasicBlock::const_iterator(UseI)); 364 for (; I != E; ++I) { 365 for (const auto &PriorUse : I->uses()) { 366 if (&PriorUse == Use) 367 break; 368 if (PriorUse.isReg() && SubsequentDef.getReg() == PriorUse.getReg()) 369 return false; 370 } 371 } 372 } 373 374 // If moving is a semantic nop, it is always allowed 375 const MachineBasicBlock *MBB = DefI->getParent(); 376 auto NextI = std::next(MachineBasicBlock::const_iterator(DefI)); 377 for (auto E = MBB->end(); NextI != E && NextI->isDebugInstr(); ++NextI) 378 ; 379 if (NextI == Insert) 380 return true; 381 382 // When not optimizing, we only handle the trivial case above 383 // to guarantee no impact to debugging and to avoid spending 384 // compile time. 385 if (!Optimize) 386 return false; 387 388 // 'catch' and 'catch_all' should be the first instruction of a BB and cannot 389 // move. 390 if (WebAssembly::isCatch(DefI->getOpcode())) 391 return false; 392 393 // Check for register dependencies. 394 SmallVector<unsigned, 4> MutableRegisters; 395 for (const MachineOperand &MO : DefI->operands()) { 396 if (!MO.isReg() || MO.isUndef()) 397 continue; 398 Register Reg = MO.getReg(); 399 400 // If the register is dead here and at Insert, ignore it. 401 if (MO.isDead() && Insert->definesRegister(Reg, /*TRI=*/nullptr) && 402 !Insert->readsRegister(Reg, /*TRI=*/nullptr)) 403 continue; 404 405 if (Reg.isPhysical()) { 406 // Ignore ARGUMENTS; it's just used to keep the ARGUMENT_* instructions 407 // from moving down, and we've already checked for that. 408 if (Reg == WebAssembly::ARGUMENTS) 409 continue; 410 // If the physical register is never modified, ignore it. 411 if (!MRI.isPhysRegModified(Reg)) 412 continue; 413 // Otherwise, it's a physical register with unknown liveness. 414 return false; 415 } 416 417 // If one of the operands isn't in SSA form, it has different values at 418 // different times, and we need to make sure we don't move our use across 419 // a different def. 420 if (!MO.isDef() && !MRI.hasOneDef(Reg)) 421 MutableRegisters.push_back(Reg); 422 } 423 424 bool Read = false, Write = false, Effects = false, StackPointer = false; 425 query(*DefI, Read, Write, Effects, StackPointer); 426 427 // If the instruction does not access memory and has no side effects, it has 428 // no additional dependencies. 429 bool HasMutableRegisters = !MutableRegisters.empty(); 430 if (!Read && !Write && !Effects && !StackPointer && !HasMutableRegisters) 431 return true; 432 433 // Scan through the intervening instructions between DefI and Insert. 434 MachineBasicBlock::const_iterator D(DefI), I(Insert); 435 for (--I; I != D; --I) { 436 bool InterveningRead = false; 437 bool InterveningWrite = false; 438 bool InterveningEffects = false; 439 bool InterveningStackPointer = false; 440 query(*I, InterveningRead, InterveningWrite, InterveningEffects, 441 InterveningStackPointer); 442 if (Effects && InterveningEffects) 443 return false; 444 if (Read && InterveningWrite) 445 return false; 446 if (Write && (InterveningRead || InterveningWrite)) 447 return false; 448 if (StackPointer && InterveningStackPointer) 449 return false; 450 451 for (unsigned Reg : MutableRegisters) 452 for (const MachineOperand &MO : I->operands()) 453 if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) 454 return false; 455 } 456 457 return true; 458 } 459 460 /// Test whether OneUse, a use of Reg, dominates all of Reg's other uses. 461 static bool oneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse, 462 const MachineBasicBlock &MBB, 463 const MachineRegisterInfo &MRI, 464 const MachineDominatorTree &MDT, 465 LiveIntervals &LIS, 466 WebAssemblyFunctionInfo &MFI) { 467 const LiveInterval &LI = LIS.getInterval(Reg); 468 469 const MachineInstr *OneUseInst = OneUse.getParent(); 470 VNInfo *OneUseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*OneUseInst)); 471 472 for (const MachineOperand &Use : MRI.use_nodbg_operands(Reg)) { 473 if (&Use == &OneUse) 474 continue; 475 476 const MachineInstr *UseInst = Use.getParent(); 477 VNInfo *UseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*UseInst)); 478 479 if (UseVNI != OneUseVNI) 480 continue; 481 482 if (UseInst == OneUseInst) { 483 // Another use in the same instruction. We need to ensure that the one 484 // selected use happens "before" it. 485 if (&OneUse > &Use) 486 return false; 487 } else { 488 // Test that the use is dominated by the one selected use. 489 while (!MDT.dominates(OneUseInst, UseInst)) { 490 // Actually, dominating is over-conservative. Test that the use would 491 // happen after the one selected use in the stack evaluation order. 492 // 493 // This is needed as a consequence of using implicit local.gets for 494 // uses and implicit local.sets for defs. 495 if (UseInst->getDesc().getNumDefs() == 0) 496 return false; 497 const MachineOperand &MO = UseInst->getOperand(0); 498 if (!MO.isReg()) 499 return false; 500 Register DefReg = MO.getReg(); 501 if (!DefReg.isVirtual() || !MFI.isVRegStackified(DefReg)) 502 return false; 503 assert(MRI.hasOneNonDBGUse(DefReg)); 504 const MachineOperand &NewUse = *MRI.use_nodbg_begin(DefReg); 505 const MachineInstr *NewUseInst = NewUse.getParent(); 506 if (NewUseInst == OneUseInst) { 507 if (&OneUse > &NewUse) 508 return false; 509 break; 510 } 511 UseInst = NewUseInst; 512 } 513 } 514 } 515 return true; 516 } 517 518 /// Get the appropriate tee opcode for the given register class. 519 static unsigned getTeeOpcode(const TargetRegisterClass *RC) { 520 if (RC == &WebAssembly::I32RegClass) 521 return WebAssembly::TEE_I32; 522 if (RC == &WebAssembly::I64RegClass) 523 return WebAssembly::TEE_I64; 524 if (RC == &WebAssembly::F32RegClass) 525 return WebAssembly::TEE_F32; 526 if (RC == &WebAssembly::F64RegClass) 527 return WebAssembly::TEE_F64; 528 if (RC == &WebAssembly::V128RegClass) 529 return WebAssembly::TEE_V128; 530 if (RC == &WebAssembly::EXTERNREFRegClass) 531 return WebAssembly::TEE_EXTERNREF; 532 if (RC == &WebAssembly::FUNCREFRegClass) 533 return WebAssembly::TEE_FUNCREF; 534 if (RC == &WebAssembly::EXNREFRegClass) 535 return WebAssembly::TEE_EXNREF; 536 llvm_unreachable("Unexpected register class"); 537 } 538 539 // Shrink LI to its uses, cleaning up LI. 540 static void shrinkToUses(LiveInterval &LI, LiveIntervals &LIS) { 541 if (LIS.shrinkToUses(&LI)) { 542 SmallVector<LiveInterval *, 4> SplitLIs; 543 LIS.splitSeparateComponents(LI, SplitLIs); 544 } 545 } 546 547 /// A single-use def in the same block with no intervening memory or register 548 /// dependencies; move the def down and nest it with the current instruction. 549 static MachineInstr *moveForSingleUse(unsigned Reg, MachineOperand &Op, 550 MachineInstr *Def, MachineBasicBlock &MBB, 551 MachineInstr *Insert, LiveIntervals *LIS, 552 WebAssemblyFunctionInfo &MFI, 553 MachineRegisterInfo &MRI) { 554 LLVM_DEBUG(dbgs() << "Move for single use: "; Def->dump()); 555 556 WebAssemblyDebugValueManager DefDIs(Def); 557 DefDIs.sink(Insert); 558 if (LIS != nullptr) 559 LIS->handleMove(*Def); 560 561 if (MRI.hasOneDef(Reg) && MRI.hasOneNonDBGUse(Reg)) { 562 // No one else is using this register for anything so we can just stackify 563 // it in place. 564 MFI.stackifyVReg(MRI, Reg); 565 } else { 566 // The register may have unrelated uses or defs; create a new register for 567 // just our one def and use so that we can stackify it. 568 Register NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg)); 569 Op.setReg(NewReg); 570 DefDIs.updateReg(NewReg); 571 572 if (LIS != nullptr) { 573 // Tell LiveIntervals about the new register. 574 LIS->createAndComputeVirtRegInterval(NewReg); 575 576 // Tell LiveIntervals about the changes to the old register. 577 LiveInterval &LI = LIS->getInterval(Reg); 578 LI.removeSegment(LIS->getInstructionIndex(*Def).getRegSlot(), 579 LIS->getInstructionIndex(*Op.getParent()).getRegSlot(), 580 /*RemoveDeadValNo=*/true); 581 } 582 583 MFI.stackifyVReg(MRI, NewReg); 584 LLVM_DEBUG(dbgs() << " - Replaced register: "; Def->dump()); 585 } 586 587 imposeStackOrdering(Def); 588 return Def; 589 } 590 591 static MachineInstr *getPrevNonDebugInst(MachineInstr *MI) { 592 for (auto *I = MI->getPrevNode(); I; I = I->getPrevNode()) 593 if (!I->isDebugInstr()) 594 return I; 595 return nullptr; 596 } 597 598 /// A trivially cloneable instruction; clone it and nest the new copy with the 599 /// current instruction. 600 static MachineInstr * 601 rematerializeCheapDef(unsigned Reg, MachineOperand &Op, MachineInstr &Def, 602 MachineBasicBlock::instr_iterator Insert, 603 LiveIntervals &LIS, WebAssemblyFunctionInfo &MFI, 604 MachineRegisterInfo &MRI, 605 const WebAssemblyInstrInfo *TII) { 606 LLVM_DEBUG(dbgs() << "Rematerializing cheap def: "; Def.dump()); 607 LLVM_DEBUG(dbgs() << " - for use in "; Op.getParent()->dump()); 608 609 WebAssemblyDebugValueManager DefDIs(&Def); 610 611 Register NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg)); 612 DefDIs.cloneSink(&*Insert, NewReg); 613 Op.setReg(NewReg); 614 MachineInstr *Clone = getPrevNonDebugInst(&*Insert); 615 assert(Clone); 616 LIS.InsertMachineInstrInMaps(*Clone); 617 LIS.createAndComputeVirtRegInterval(NewReg); 618 MFI.stackifyVReg(MRI, NewReg); 619 imposeStackOrdering(Clone); 620 621 LLVM_DEBUG(dbgs() << " - Cloned to "; Clone->dump()); 622 623 // Shrink the interval. 624 bool IsDead = MRI.use_empty(Reg); 625 if (!IsDead) { 626 LiveInterval &LI = LIS.getInterval(Reg); 627 shrinkToUses(LI, LIS); 628 IsDead = !LI.liveAt(LIS.getInstructionIndex(Def).getDeadSlot()); 629 } 630 631 // If that was the last use of the original, delete the original. 632 if (IsDead) { 633 LLVM_DEBUG(dbgs() << " - Deleting original\n"); 634 SlotIndex Idx = LIS.getInstructionIndex(Def).getRegSlot(); 635 LIS.removePhysRegDefAt(MCRegister::from(WebAssembly::ARGUMENTS), Idx); 636 LIS.removeInterval(Reg); 637 LIS.RemoveMachineInstrFromMaps(Def); 638 DefDIs.removeDef(); 639 } 640 641 return Clone; 642 } 643 644 /// A multiple-use def in the same block with no intervening memory or register 645 /// dependencies; move the def down, nest it with the current instruction, and 646 /// insert a tee to satisfy the rest of the uses. As an illustration, rewrite 647 /// this: 648 /// 649 /// Reg = INST ... // Def 650 /// INST ..., Reg, ... // Insert 651 /// INST ..., Reg, ... 652 /// INST ..., Reg, ... 653 /// 654 /// to this: 655 /// 656 /// DefReg = INST ... // Def (to become the new Insert) 657 /// TeeReg, Reg = TEE_... DefReg 658 /// INST ..., TeeReg, ... // Insert 659 /// INST ..., Reg, ... 660 /// INST ..., Reg, ... 661 /// 662 /// with DefReg and TeeReg stackified. This eliminates a local.get from the 663 /// resulting code. 664 static MachineInstr *moveAndTeeForMultiUse( 665 unsigned Reg, MachineOperand &Op, MachineInstr *Def, MachineBasicBlock &MBB, 666 MachineInstr *Insert, LiveIntervals &LIS, WebAssemblyFunctionInfo &MFI, 667 MachineRegisterInfo &MRI, const WebAssemblyInstrInfo *TII) { 668 LLVM_DEBUG(dbgs() << "Move and tee for multi-use:"; Def->dump()); 669 670 const auto *RegClass = MRI.getRegClass(Reg); 671 Register TeeReg = MRI.createVirtualRegister(RegClass); 672 Register DefReg = MRI.createVirtualRegister(RegClass); 673 674 // Move Def into place. 675 WebAssemblyDebugValueManager DefDIs(Def); 676 DefDIs.sink(Insert); 677 LIS.handleMove(*Def); 678 679 // Create the Tee and attach the registers. 680 MachineOperand &DefMO = Def->getOperand(0); 681 MachineInstr *Tee = BuildMI(MBB, Insert, Insert->getDebugLoc(), 682 TII->get(getTeeOpcode(RegClass)), TeeReg) 683 .addReg(Reg, RegState::Define) 684 .addReg(DefReg, getUndefRegState(DefMO.isDead())); 685 Op.setReg(TeeReg); 686 DefDIs.updateReg(DefReg); 687 SlotIndex TeeIdx = LIS.InsertMachineInstrInMaps(*Tee).getRegSlot(); 688 SlotIndex DefIdx = LIS.getInstructionIndex(*Def).getRegSlot(); 689 690 // Tell LiveIntervals we moved the original vreg def from Def to Tee. 691 LiveInterval &LI = LIS.getInterval(Reg); 692 LiveInterval::iterator I = LI.FindSegmentContaining(DefIdx); 693 VNInfo *ValNo = LI.getVNInfoAt(DefIdx); 694 I->start = TeeIdx; 695 ValNo->def = TeeIdx; 696 shrinkToUses(LI, LIS); 697 698 // Finish stackifying the new regs. 699 LIS.createAndComputeVirtRegInterval(TeeReg); 700 LIS.createAndComputeVirtRegInterval(DefReg); 701 MFI.stackifyVReg(MRI, DefReg); 702 MFI.stackifyVReg(MRI, TeeReg); 703 imposeStackOrdering(Def); 704 imposeStackOrdering(Tee); 705 706 // Even though 'TeeReg, Reg = TEE ...', has two defs, we don't need to clone 707 // DBG_VALUEs for both of them, given that the latter will cancel the former 708 // anyway. Here we only clone DBG_VALUEs for TeeReg, which will be converted 709 // to a local index in ExplicitLocals pass. 710 DefDIs.cloneSink(Insert, TeeReg, /* CloneDef */ false); 711 712 LLVM_DEBUG(dbgs() << " - Replaced register: "; Def->dump()); 713 LLVM_DEBUG(dbgs() << " - Tee instruction: "; Tee->dump()); 714 return Def; 715 } 716 717 namespace { 718 /// A stack for walking the tree of instructions being built, visiting the 719 /// MachineOperands in DFS order. 720 class TreeWalkerState { 721 using mop_iterator = MachineInstr::mop_iterator; 722 using mop_reverse_iterator = std::reverse_iterator<mop_iterator>; 723 using RangeTy = iterator_range<mop_reverse_iterator>; 724 SmallVector<RangeTy, 4> Worklist; 725 726 public: 727 explicit TreeWalkerState(MachineInstr *Insert) { 728 const iterator_range<mop_iterator> &Range = Insert->explicit_uses(); 729 if (!Range.empty()) 730 Worklist.push_back(reverse(Range)); 731 } 732 733 bool done() const { return Worklist.empty(); } 734 735 MachineOperand &pop() { 736 RangeTy &Range = Worklist.back(); 737 MachineOperand &Op = *Range.begin(); 738 Range = drop_begin(Range); 739 if (Range.empty()) 740 Worklist.pop_back(); 741 assert((Worklist.empty() || !Worklist.back().empty()) && 742 "Empty ranges shouldn't remain in the worklist"); 743 return Op; 744 } 745 746 /// Push Instr's operands onto the stack to be visited. 747 void pushOperands(MachineInstr *Instr) { 748 const iterator_range<mop_iterator> &Range(Instr->explicit_uses()); 749 if (!Range.empty()) 750 Worklist.push_back(reverse(Range)); 751 } 752 753 /// Some of Instr's operands are on the top of the stack; remove them and 754 /// re-insert them starting from the beginning (because we've commuted them). 755 void resetTopOperands(MachineInstr *Instr) { 756 assert(hasRemainingOperands(Instr) && 757 "Reseting operands should only be done when the instruction has " 758 "an operand still on the stack"); 759 Worklist.back() = reverse(Instr->explicit_uses()); 760 } 761 762 /// Test whether Instr has operands remaining to be visited at the top of 763 /// the stack. 764 bool hasRemainingOperands(const MachineInstr *Instr) const { 765 if (Worklist.empty()) 766 return false; 767 const RangeTy &Range = Worklist.back(); 768 return !Range.empty() && Range.begin()->getParent() == Instr; 769 } 770 771 /// Test whether the given register is present on the stack, indicating an 772 /// operand in the tree that we haven't visited yet. Moving a definition of 773 /// Reg to a point in the tree after that would change its value. 774 /// 775 /// This is needed as a consequence of using implicit local.gets for 776 /// uses and implicit local.sets for defs. 777 bool isOnStack(unsigned Reg) const { 778 for (const RangeTy &Range : Worklist) 779 for (const MachineOperand &MO : Range) 780 if (MO.isReg() && MO.getReg() == Reg) 781 return true; 782 return false; 783 } 784 }; 785 786 /// State to keep track of whether commuting is in flight or whether it's been 787 /// tried for the current instruction and didn't work. 788 class CommutingState { 789 /// There are effectively three states: the initial state where we haven't 790 /// started commuting anything and we don't know anything yet, the tentative 791 /// state where we've commuted the operands of the current instruction and are 792 /// revisiting it, and the declined state where we've reverted the operands 793 /// back to their original order and will no longer commute it further. 794 bool TentativelyCommuting = false; 795 bool Declined = false; 796 797 /// During the tentative state, these hold the operand indices of the commuted 798 /// operands. 799 unsigned Operand0, Operand1; 800 801 public: 802 /// Stackification for an operand was not successful due to ordering 803 /// constraints. If possible, and if we haven't already tried it and declined 804 /// it, commute Insert's operands and prepare to revisit it. 805 void maybeCommute(MachineInstr *Insert, TreeWalkerState &TreeWalker, 806 const WebAssemblyInstrInfo *TII) { 807 if (TentativelyCommuting) { 808 assert(!Declined && 809 "Don't decline commuting until you've finished trying it"); 810 // Commuting didn't help. Revert it. 811 TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1); 812 TentativelyCommuting = false; 813 Declined = true; 814 } else if (!Declined && TreeWalker.hasRemainingOperands(Insert)) { 815 Operand0 = TargetInstrInfo::CommuteAnyOperandIndex; 816 Operand1 = TargetInstrInfo::CommuteAnyOperandIndex; 817 if (TII->findCommutedOpIndices(*Insert, Operand0, Operand1)) { 818 // Tentatively commute the operands and try again. 819 TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1); 820 TreeWalker.resetTopOperands(Insert); 821 TentativelyCommuting = true; 822 Declined = false; 823 } 824 } 825 } 826 827 /// Stackification for some operand was successful. Reset to the default 828 /// state. 829 void reset() { 830 TentativelyCommuting = false; 831 Declined = false; 832 } 833 }; 834 } // end anonymous namespace 835 836 bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { 837 LLVM_DEBUG(dbgs() << "********** Register Stackifying **********\n" 838 "********** Function: " 839 << MF.getName() << '\n'); 840 841 bool Changed = false; 842 MachineRegisterInfo &MRI = MF.getRegInfo(); 843 WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); 844 const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); 845 MachineDominatorTree *MDT = nullptr; 846 LiveIntervals *LIS = nullptr; 847 if (Optimize) { 848 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); 849 LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS(); 850 } 851 852 // Walk the instructions from the bottom up. Currently we don't look past 853 // block boundaries, and the blocks aren't ordered so the block visitation 854 // order isn't significant, but we may want to change this in the future. 855 for (MachineBasicBlock &MBB : MF) { 856 // Don't use a range-based for loop, because we modify the list as we're 857 // iterating over it and the end iterator may change. 858 for (auto MII = MBB.rbegin(); MII != MBB.rend(); ++MII) { 859 MachineInstr *Insert = &*MII; 860 // Don't nest anything inside an inline asm, because we don't have 861 // constraints for $push inputs. 862 if (Insert->isInlineAsm()) 863 continue; 864 865 // Ignore debugging intrinsics. 866 if (Insert->isDebugValue()) 867 continue; 868 869 // Iterate through the inputs in reverse order, since we'll be pulling 870 // operands off the stack in LIFO order. 871 CommutingState Commuting; 872 TreeWalkerState TreeWalker(Insert); 873 while (!TreeWalker.done()) { 874 MachineOperand &Use = TreeWalker.pop(); 875 876 // We're only interested in explicit virtual register operands. 877 if (!Use.isReg()) 878 continue; 879 880 Register Reg = Use.getReg(); 881 assert(Use.isUse() && "explicit_uses() should only iterate over uses"); 882 assert(!Use.isImplicit() && 883 "explicit_uses() should only iterate over explicit operands"); 884 if (Reg.isPhysical()) 885 continue; 886 887 // Identify the definition for this register at this point. 888 MachineInstr *DefI = getVRegDef(Reg, Insert, MRI, LIS); 889 if (!DefI) 890 continue; 891 892 // Don't nest an INLINE_ASM def into anything, because we don't have 893 // constraints for $pop outputs. 894 if (DefI->isInlineAsm()) 895 continue; 896 897 // Argument instructions represent live-in registers and not real 898 // instructions. 899 if (WebAssembly::isArgument(DefI->getOpcode())) 900 continue; 901 902 MachineOperand *Def = 903 DefI->findRegisterDefOperand(Reg, /*TRI=*/nullptr); 904 assert(Def != nullptr); 905 906 // Decide which strategy to take. Prefer to move a single-use value 907 // over cloning it, and prefer cloning over introducing a tee. 908 // For moving, we require the def to be in the same block as the use; 909 // this makes things simpler (LiveIntervals' handleMove function only 910 // supports intra-block moves) and it's MachineSink's job to catch all 911 // the sinking opportunities anyway. 912 bool SameBlock = DefI->getParent() == &MBB; 913 bool CanMove = SameBlock && 914 isSafeToMove(Def, &Use, Insert, MFI, MRI, Optimize) && 915 !TreeWalker.isOnStack(Reg); 916 if (CanMove && hasSingleUse(Reg, MRI, MFI, Optimize, DefI, LIS)) { 917 Insert = moveForSingleUse(Reg, Use, DefI, MBB, Insert, LIS, MFI, MRI); 918 919 // If we are removing the frame base reg completely, remove the debug 920 // info as well. 921 // TODO: Encode this properly as a stackified value. 922 if (MFI.isFrameBaseVirtual() && MFI.getFrameBaseVreg() == Reg) { 923 assert( 924 Optimize && 925 "Stackifying away frame base in unoptimized code not expected"); 926 MFI.clearFrameBaseVreg(); 927 } 928 } else if (Optimize && shouldRematerialize(*DefI, TII)) { 929 Insert = rematerializeCheapDef(Reg, Use, *DefI, Insert->getIterator(), 930 *LIS, MFI, MRI, TII); 931 } else if (Optimize && CanMove && 932 oneUseDominatesOtherUses(Reg, Use, MBB, MRI, *MDT, *LIS, 933 MFI)) { 934 Insert = moveAndTeeForMultiUse(Reg, Use, DefI, MBB, Insert, *LIS, MFI, 935 MRI, TII); 936 } else { 937 // We failed to stackify the operand. If the problem was ordering 938 // constraints, Commuting may be able to help. 939 if (!CanMove && SameBlock) 940 Commuting.maybeCommute(Insert, TreeWalker, TII); 941 // Proceed to the next operand. 942 continue; 943 } 944 945 // Stackifying a multivalue def may unlock in-place stackification of 946 // subsequent defs. TODO: Handle the case where the consecutive uses are 947 // not all in the same instruction. 948 auto *SubsequentDef = Insert->defs().begin(); 949 auto *SubsequentUse = &Use; 950 while (SubsequentDef != Insert->defs().end() && 951 SubsequentUse != Use.getParent()->uses().end()) { 952 if (!SubsequentDef->isReg() || !SubsequentUse->isReg()) 953 break; 954 Register DefReg = SubsequentDef->getReg(); 955 Register UseReg = SubsequentUse->getReg(); 956 // TODO: This single-use restriction could be relaxed by using tees 957 if (DefReg != UseReg || 958 !hasSingleUse(DefReg, MRI, MFI, Optimize, nullptr, nullptr)) 959 break; 960 MFI.stackifyVReg(MRI, DefReg); 961 ++SubsequentDef; 962 ++SubsequentUse; 963 } 964 965 // If the instruction we just stackified is an IMPLICIT_DEF, convert it 966 // to a constant 0 so that the def is explicit, and the push/pop 967 // correspondence is maintained. 968 if (Insert->getOpcode() == TargetOpcode::IMPLICIT_DEF) 969 convertImplicitDefToConstZero(Insert, MRI, TII, MF); 970 971 // We stackified an operand. Add the defining instruction's operands to 972 // the worklist stack now to continue to build an ever deeper tree. 973 Commuting.reset(); 974 TreeWalker.pushOperands(Insert); 975 } 976 977 // If we stackified any operands, skip over the tree to start looking for 978 // the next instruction we can build a tree on. 979 if (Insert != &*MII) { 980 imposeStackOrdering(&*MII); 981 MII = MachineBasicBlock::iterator(Insert).getReverse(); 982 Changed = true; 983 } 984 } 985 } 986 987 // If we used VALUE_STACK anywhere, add it to the live-in sets everywhere so 988 // that it never looks like a use-before-def. 989 if (Changed) { 990 MF.getRegInfo().addLiveIn(WebAssembly::VALUE_STACK); 991 for (MachineBasicBlock &MBB : MF) 992 MBB.addLiveIn(WebAssembly::VALUE_STACK); 993 } 994 995 #ifndef NDEBUG 996 // Verify that pushes and pops are performed in LIFO order. 997 SmallVector<unsigned, 0> Stack; 998 for (MachineBasicBlock &MBB : MF) { 999 for (MachineInstr &MI : MBB) { 1000 if (MI.isDebugInstr()) 1001 continue; 1002 for (MachineOperand &MO : reverse(MI.explicit_uses())) { 1003 if (!MO.isReg()) 1004 continue; 1005 Register Reg = MO.getReg(); 1006 if (MFI.isVRegStackified(Reg)) 1007 assert(Stack.pop_back_val() == Reg && 1008 "Register stack pop should be paired with a push"); 1009 } 1010 for (MachineOperand &MO : MI.defs()) { 1011 if (!MO.isReg()) 1012 continue; 1013 Register Reg = MO.getReg(); 1014 if (MFI.isVRegStackified(Reg)) 1015 Stack.push_back(MO.getReg()); 1016 } 1017 } 1018 // TODO: Generalize this code to support keeping values on the stack across 1019 // basic block boundaries. 1020 assert(Stack.empty() && 1021 "Register stack pushes and pops should be balanced"); 1022 } 1023 #endif 1024 1025 return Changed; 1026 } 1027