1 //===-- X86FastPreTileConfig.cpp - Fast Tile Register Configure------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file Pass to preconfig the shape of physical tile registers 10 /// It inserts ldtilecfg ahead of each group of tile registers. The algorithm 11 /// walk each instruction of basic block in reverse order. All the tile 12 /// registers that live out the basic block would be spilled and reloaded 13 /// before its user. It also check the depenedency of the shape to ensure 14 /// the shape is defined before ldtilecfg. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "X86.h" 19 #include "X86InstrBuilder.h" 20 #include "X86MachineFunctionInfo.h" 21 #include "X86RegisterInfo.h" 22 #include "X86Subtarget.h" 23 #include "llvm/ADT/DepthFirstIterator.h" 24 #include "llvm/ADT/PostOrderIterator.h" 25 #include "llvm/ADT/Statistic.h" 26 #include "llvm/CodeGen/MachineFrameInfo.h" 27 #include "llvm/CodeGen/MachineFunctionPass.h" 28 #include "llvm/CodeGen/MachineInstr.h" 29 #include "llvm/CodeGen/MachineRegisterInfo.h" 30 #include "llvm/CodeGen/Passes.h" 31 #include "llvm/CodeGen/TargetInstrInfo.h" 32 #include "llvm/CodeGen/TargetRegisterInfo.h" 33 #include "llvm/InitializePasses.h" 34 #include "llvm/Support/Debug.h" 35 36 using namespace llvm; 37 38 #define DEBUG_TYPE "fastpretileconfig" 39 40 STATISTIC(NumStores, "Number of stores added"); 41 STATISTIC(NumLoads, "Number of loads added"); 42 43 namespace { 44 45 class X86FastPreTileConfig : public MachineFunctionPass { 46 MachineFunction *MF = nullptr; 47 const X86Subtarget *ST = nullptr; 48 const TargetInstrInfo *TII = nullptr; 49 MachineRegisterInfo *MRI = nullptr; 50 X86MachineFunctionInfo *X86FI = nullptr; 51 MachineFrameInfo *MFI = nullptr; 52 const TargetRegisterInfo *TRI = nullptr; 53 MachineBasicBlock *MBB = nullptr; 54 int CfgSS = -1; 55 struct PHIInfo { 56 Register Row; 57 Register Col; 58 Register StackAddr; 59 }; 60 DenseMap<MachineInstr *, struct PHIInfo> VisitedPHIs; 61 62 /// Maps virtual regs to the frame index where these values are spilled. 63 IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg; 64 65 /// Has a bit set for tile virtual register for which it was determined 66 /// that it is alive across blocks. 67 BitVector MayLiveAcrossBlocks; 68 69 int getStackSpaceFor(Register VirtReg); 70 void InitializeTileConfigStackSpace(); 71 bool mayLiveOut(Register VirtReg, MachineInstr *CfgMI); 72 void spill(MachineBasicBlock::iterator Before, Register VirtReg, bool Kill); 73 void reload(MachineBasicBlock::iterator UseMI, Register VirtReg, 74 MachineOperand *RowMO, MachineOperand *ColMO); 75 void canonicalizePHIs(MachineBasicBlock &MBB); 76 void convertPHI(MachineBasicBlock *MBB, MachineInstr &PHI); 77 void convertPHIs(MachineBasicBlock &MBB); 78 bool configBasicBlock(MachineBasicBlock &MBB); 79 80 public: 81 X86FastPreTileConfig() : MachineFunctionPass(ID), StackSlotForVirtReg(-1) {} 82 83 /// Return the pass name. 84 StringRef getPassName() const override { 85 return "Fast Tile Register Preconfigure"; 86 } 87 88 /// Perform tile register configure. 89 bool runOnMachineFunction(MachineFunction &MFunc) override; 90 91 static char ID; 92 }; 93 94 } // end anonymous namespace 95 96 char X86FastPreTileConfig::ID = 0; 97 98 INITIALIZE_PASS_BEGIN(X86FastPreTileConfig, DEBUG_TYPE, 99 "Fast Tile Register Preconfigure", false, false) 100 INITIALIZE_PASS_END(X86FastPreTileConfig, DEBUG_TYPE, 101 "Fast Tile Register Preconfigure", false, false) 102 103 static bool dominates(MachineBasicBlock &MBB, 104 MachineBasicBlock::const_iterator A, 105 MachineBasicBlock::const_iterator B) { 106 auto MBBEnd = MBB.end(); 107 if (B == MBBEnd) 108 return true; 109 110 MachineBasicBlock::const_iterator I = MBB.begin(); 111 for (; &*I != A && &*I != B; ++I) 112 ; 113 114 return &*I == A; 115 } 116 117 /// This allocates space for the specified virtual register to be held on the 118 /// stack. 119 int X86FastPreTileConfig::getStackSpaceFor(Register VirtReg) { 120 // Find the location Reg would belong... 121 int SS = StackSlotForVirtReg[VirtReg]; 122 // Already has space allocated? 123 if (SS != -1) 124 return SS; 125 126 // Allocate a new stack object for this spill location... 127 const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); 128 unsigned Size = TRI->getSpillSize(RC); 129 Align Alignment = TRI->getSpillAlign(RC); 130 int FrameIdx = MFI->CreateSpillStackObject(Size, Alignment); 131 132 // Assign the slot. 133 StackSlotForVirtReg[VirtReg] = FrameIdx; 134 return FrameIdx; 135 } 136 137 /// Returns false if \p VirtReg is known to not live out of the current config. 138 /// If \p VirtReg live out of the current MBB, it must live out of the current 139 /// config 140 bool X86FastPreTileConfig::mayLiveOut(Register VirtReg, MachineInstr *CfgMI) { 141 if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg))) 142 return true; 143 144 for (const MachineInstr &UseInst : MRI->use_nodbg_instructions(VirtReg)) { 145 if (UseInst.getParent() != MBB) { 146 MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); 147 return true; 148 } 149 150 // The use and def are in the same MBB. If the tile register is 151 // reconfigured, it is crobbered and we need to spill and reload 152 // tile register. 153 if (CfgMI) { 154 if (dominates(*MBB, *CfgMI, UseInst)) { 155 MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); 156 return true; 157 } 158 } 159 } 160 161 return false; 162 } 163 164 void X86FastPreTileConfig::InitializeTileConfigStackSpace() { 165 MachineBasicBlock &MBB = MF->front(); 166 MachineInstr *MI = &*MBB.getFirstNonPHI(); 167 DebugLoc DL; 168 if (ST->hasAVX512()) { 169 Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass); 170 BuildMI(MBB, MI, DL, TII->get(X86::AVX512_512_SET0), Zmm); 171 addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSZmr)), CfgSS) 172 .addReg(Zmm); 173 } else if (ST->hasAVX2()) { 174 Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass); 175 BuildMI(MBB, MI, DL, TII->get(X86::AVX_SET0), Ymm); 176 addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS) 177 .addReg(Ymm); 178 addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS, 179 32) 180 .addReg(Ymm); 181 } else { 182 assert(ST->hasSSE2() && "AMX should assume SSE2 enabled"); 183 unsigned StoreOpc = ST->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr; 184 Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass); 185 BuildMI(MBB, MI, DL, TII->get(X86::V_SET0), Xmm); 186 addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS) 187 .addReg(Xmm); 188 addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 16) 189 .addReg(Xmm); 190 addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 32) 191 .addReg(Xmm); 192 addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 48) 193 .addReg(Xmm); 194 } 195 // Fill in the palette first. 196 addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV8mi)), CfgSS) 197 .addImm(1); 198 } 199 200 /// Insert spill instruction for \p AssignedReg before \p Before. 201 /// TODO: Update DBG_VALUEs with \p VirtReg operands with the stack slot. 202 void X86FastPreTileConfig::spill(MachineBasicBlock::iterator Before, 203 Register VirtReg, bool Kill) { 204 LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI) << " \n"); 205 int FI = getStackSpaceFor(VirtReg); 206 LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n'); 207 208 const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); 209 // Don't need shape information for tile store, becasue it is adjacent to 210 // the tile def instruction. 211 TII->storeRegToStackSlot(*MBB, Before, VirtReg, Kill, FI, &RC, TRI, 212 Register()); 213 ++NumStores; 214 215 // TODO: update DBG_VALUEs 216 } 217 218 /// Insert reload instruction for \p PhysReg before \p Before. 219 void X86FastPreTileConfig::reload(MachineBasicBlock::iterator UseMI, 220 Register OrigReg, MachineOperand *RowMO, 221 MachineOperand *ColMO) { 222 int FI = getStackSpaceFor(OrigReg); 223 const TargetRegisterClass &RC = *MRI->getRegClass(OrigReg); 224 Register TileReg; 225 // Fold copy to tileload 226 // BB1: 227 // spill src to s 228 // 229 // BB2: 230 // t = copy src 231 // --> 232 // t = tileload (s) 233 if (UseMI->isCopy()) 234 TileReg = UseMI->getOperand(0).getReg(); 235 else 236 TileReg = MRI->createVirtualRegister(&RC); 237 // Can't use TII->loadRegFromStackSlot(), because we need the shape 238 // information for reload. 239 // tileloadd (%sp, %idx), %tmm 240 unsigned Opc = X86::PTILELOADDV; 241 Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); 242 // FIXME: MBB is not the parent of UseMI. 243 MachineInstr *NewMI = BuildMI(*UseMI->getParent(), UseMI, DebugLoc(), 244 TII->get(X86::MOV64ri), StrideReg) 245 .addImm(64); 246 NewMI = addFrameReference( 247 BuildMI(*UseMI->getParent(), UseMI, DebugLoc(), TII->get(Opc), TileReg) 248 .addReg(RowMO->getReg()) 249 .addReg(ColMO->getReg()), 250 FI); 251 MachineOperand &MO = NewMI->getOperand(5); 252 MO.setReg(StrideReg); 253 MO.setIsKill(true); 254 RowMO->setIsKill(false); 255 ColMO->setIsKill(false); 256 // Erase copy instruction after it is folded. 257 if (UseMI->isCopy()) { 258 UseMI->eraseFromParent(); 259 } else { 260 // Replace the register in the user MI. 261 for (auto &MO : UseMI->operands()) { 262 if (MO.isReg() && MO.getReg() == OrigReg) 263 MO.setReg(TileReg); 264 } 265 } 266 267 ++NumLoads; 268 LLVM_DEBUG(dbgs() << "Reloading " << printReg(OrigReg, TRI) << " into " 269 << printReg(TileReg, TRI) << '\n'); 270 } 271 272 static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) { 273 // The instruction must have 3 operands: tile def, row, col. 274 if (MI.isDebugInstr() || MI.getNumOperands() < 3 || !MI.isPseudo()) 275 return false; 276 MachineOperand &MO = MI.getOperand(0); 277 278 if (MO.isReg()) { 279 Register Reg = MO.getReg(); 280 // FIXME it may be used after Greedy RA and the physical 281 // register is not rewritten yet. 282 if (Reg.isVirtual() && 283 MRI->getRegClass(Reg)->getID() == X86::TILERegClassID) 284 return true; 285 if (Reg >= X86::TMM0 && Reg <= X86::TMM7) 286 return true; 287 } 288 289 return false; 290 } 291 292 static ShapeT getShape(MachineRegisterInfo *MRI, Register TileReg) { 293 MachineInstr *MI = MRI->getVRegDef(TileReg); 294 if (isTileDef(MRI, *MI)) { 295 MachineOperand *RowMO = &MI->getOperand(1); 296 MachineOperand *ColMO = &MI->getOperand(2); 297 return ShapeT(RowMO, ColMO, MRI); 298 } else if (MI->isCopy()) { 299 TileReg = MI->getOperand(1).getReg(); 300 return getShape(MRI, TileReg); 301 } 302 303 // The def should not be PHI node, because we walk the MBB in reverse post 304 // order. 305 assert(MI->isPHI() && "Unexpected PHI when get shape."); 306 llvm_unreachable("Unexpected MI when get shape."); 307 } 308 309 // BB0: 310 // spill t0 to s0 311 // BB1: 312 // spill t1 to s1 313 // 314 // BB2: 315 // t = phi [t0, bb0] [t1, bb1] 316 // --> 317 // row = phi [r0, bb0] [r1, bb1] 318 // col = phi [c0, bb0] [c1, bb1] 319 // s = phi [s0, bb0] [s1, bb1] 320 // t = tileload row, col, s 321 // The new instruction is inserted at the end of the phi node. The order 322 // of the original phi node is not ensured. 323 void X86FastPreTileConfig::convertPHI(MachineBasicBlock *MBB, 324 MachineInstr &PHI) { 325 // 1. Create instruction to get stack slot address of each incoming block. 326 // 2. Create PHI node for the stack address. 327 // 3. Create PHI node for shape. If one of the incoming shape is immediate 328 // use the immediate and delete the PHI node. 329 // 4. Create tileload instruction from the stack address. 330 Register StackAddrReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); 331 MachineInstrBuilder AddrPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(), 332 TII->get(X86::PHI), StackAddrReg); 333 Register RowReg = MRI->createVirtualRegister(&X86::GR16RegClass); 334 MachineInstrBuilder RowPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(), 335 TII->get(X86::PHI), RowReg); 336 Register ColReg = MRI->createVirtualRegister(&X86::GR16RegClass); 337 MachineInstrBuilder ColPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(), 338 TII->get(X86::PHI), ColReg); 339 // Record the mapping of phi node and its row/column information. 340 VisitedPHIs[&PHI] = {RowReg, ColReg, StackAddrReg}; 341 342 for (unsigned I = 1, E = PHI.getNumOperands(); I != E; I += 2) { 343 // Get the 2 incoming value of tile register and MBB. 344 Register InTileReg = PHI.getOperand(I).getReg(); 345 // Mark it as liveout, so that it will be spilled when visit 346 // the incoming MBB. Otherwise since phi will be deleted, it 347 // would miss spill when visit incoming MBB. 348 MayLiveAcrossBlocks.set(Register::virtReg2Index(InTileReg)); 349 MachineBasicBlock *InMBB = PHI.getOperand(I + 1).getMBB(); 350 351 MachineInstr *TileDefMI = MRI->getVRegDef(InTileReg); 352 MachineBasicBlock::iterator InsertPos; 353 if (TileDefMI->isPHI()) { 354 InsertPos = TileDefMI->getParent()->getFirstNonPHI(); 355 if (VisitedPHIs.count(TileDefMI)) { // circular phi reference 356 // def t1 357 // / \ 358 // def t2 t3 = phi(t1, t4) <-- 359 // \ / | 360 // t4 = phi(t2, t3)------------- 361 // 362 // For each (row, column and stack address) append phi incoming value. 363 // Create r3 = phi(r1, r4) 364 // Create r4 = phi(r2, r3) 365 Register InRowReg = VisitedPHIs[TileDefMI].Row; 366 Register InColReg = VisitedPHIs[TileDefMI].Col; 367 Register InStackAddrReg = VisitedPHIs[TileDefMI].StackAddr; 368 RowPHI.addReg(InRowReg).addMBB(InMBB); 369 ColPHI.addReg(InColReg).addMBB(InMBB); 370 AddrPHI.addReg(InStackAddrReg).addMBB(InMBB); 371 continue; 372 } else { 373 // Recursively convert PHI to tileload 374 convertPHI(TileDefMI->getParent(), *TileDefMI); 375 // The PHI node is coverted to tileload instruction. Get the stack 376 // address from tileload operands. 377 MachineInstr *TileLoad = MRI->getVRegDef(InTileReg); 378 assert(TileLoad && TileLoad->getOpcode() == X86::PTILELOADDV); 379 Register InRowReg = TileLoad->getOperand(1).getReg(); 380 Register InColReg = TileLoad->getOperand(2).getReg(); 381 Register InStackAddrReg = TileLoad->getOperand(3).getReg(); 382 RowPHI.addReg(InRowReg).addMBB(InMBB); 383 ColPHI.addReg(InColReg).addMBB(InMBB); 384 AddrPHI.addReg(InStackAddrReg).addMBB(InMBB); 385 } 386 } else { 387 InsertPos = TileDefMI->getIterator(); 388 389 // Fill the incoming operand of row/column phi instruction. 390 ShapeT Shape = getShape(MRI, InTileReg); 391 Shape.getRow()->setIsKill(false); 392 Shape.getCol()->setIsKill(false); 393 RowPHI.addReg(Shape.getRow()->getReg()).addMBB(InMBB); 394 ColPHI.addReg(Shape.getCol()->getReg()).addMBB(InMBB); 395 396 // The incoming tile register live out of its def BB, it would be spilled. 397 // Create MI to get the spill stack slot address for the tile register 398 int FI = getStackSpaceFor(InTileReg); 399 Register InStackAddrReg = 400 MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); 401 addOffset(BuildMI(*TileDefMI->getParent(), InsertPos, DebugLoc(), 402 TII->get(X86::LEA64r), InStackAddrReg) 403 .addFrameIndex(FI), 404 0); 405 AddrPHI.addReg(InStackAddrReg).addMBB(InMBB); 406 } 407 } 408 409 MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI(); 410 Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); 411 BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::MOV64ri), StrideReg) 412 .addImm(64); 413 Register TileReg = PHI.getOperand(0).getReg(); 414 MachineInstr *NewMI = addDirectMem( 415 BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::PTILELOADDV), TileReg) 416 .addReg(RowReg) 417 .addReg(ColReg), 418 StackAddrReg); 419 MachineOperand &MO = NewMI->getOperand(5); 420 MO.setReg(StrideReg); 421 MO.setIsKill(true); 422 PHI.eraseFromParent(); 423 VisitedPHIs.erase(&PHI); 424 } 425 426 static bool isTileRegDef(MachineRegisterInfo *MRI, MachineInstr &MI) { 427 MachineOperand &MO = MI.getOperand(0); 428 if (MO.isReg() && MO.getReg().isVirtual() && 429 MRI->getRegClass(MO.getReg())->getID() == X86::TILERegClassID) 430 return true; 431 return false; 432 } 433 434 void X86FastPreTileConfig::canonicalizePHIs(MachineBasicBlock &MBB) { 435 SmallVector<MachineInstr *, 8> PHIs; 436 437 for (MachineInstr &MI : MBB) { 438 if (!MI.isPHI()) 439 break; 440 if (!isTileRegDef(MRI, MI)) 441 continue; 442 PHIs.push_back(&MI); 443 } 444 // Canonicalize the phi node first. One tile phi may depeneds previous 445 // phi node. For below case, we need convert %t4. 446 // 447 // BB0: 448 // %t3 = phi (t1 BB1, t2 BB0) 449 // %t4 = phi (t5 BB1, t3 BB0) 450 // --> 451 // %t3 = phi (t1 BB1, t2 BB0) 452 // %t4 = phi (t5 BB1, t2 BB0) 453 // 454 while (!PHIs.empty()) { 455 MachineInstr *PHI = PHIs.pop_back_val(); 456 457 // Find the operand that is incoming from the same MBB and the def 458 // is also phi node. 459 MachineOperand *InMO = nullptr; 460 MachineInstr *DefMI = nullptr; 461 for (unsigned I = 1, E = PHI->getNumOperands(); I != E; I += 2) { 462 Register InTileReg = PHI->getOperand(I).getReg(); 463 MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB(); 464 DefMI = MRI->getVRegDef(InTileReg); 465 if (InMBB != &MBB || !DefMI->isPHI()) 466 continue; 467 468 InMO = &PHI->getOperand(I); 469 break; 470 } 471 // If can't find such operand, do nothing. 472 if (!InMO) 473 continue; 474 475 // Current phi node depends on previous phi node. Break the 476 // dependency. 477 Register DefTileReg; 478 for (unsigned I = 1, E = DefMI->getNumOperands(); I != E; I += 2) { 479 MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB(); 480 if (InMBB != &MBB) 481 continue; 482 DefTileReg = DefMI->getOperand(I).getReg(); 483 InMO->setReg(DefTileReg); 484 break; 485 } 486 } 487 } 488 489 void X86FastPreTileConfig::convertPHIs(MachineBasicBlock &MBB) { 490 SmallVector<MachineInstr *, 8> PHIs; 491 for (MachineInstr &MI : MBB) { 492 if (!MI.isPHI()) 493 break; 494 if (!isTileRegDef(MRI, MI)) 495 continue; 496 PHIs.push_back(&MI); 497 } 498 while (!PHIs.empty()) { 499 MachineInstr *MI = PHIs.pop_back_val(); 500 VisitedPHIs.clear(); 501 convertPHI(&MBB, *MI); 502 } 503 } 504 505 // PreTileConfig should configure the tile registers based on basic 506 // block. 507 bool X86FastPreTileConfig::configBasicBlock(MachineBasicBlock &MBB) { 508 this->MBB = &MBB; 509 bool Change = false; 510 MachineInstr *LastShapeMI = nullptr; 511 MachineInstr *LastTileCfg = nullptr; 512 bool HasUnconfigTile = false; 513 514 auto Config = [&](MachineInstr &Before) { 515 if (CfgSS == -1) 516 CfgSS = MFI->CreateStackObject(ST->getTileConfigSize(), 517 ST->getTileConfigAlignment(), false); 518 LastTileCfg = addFrameReference( 519 BuildMI(MBB, Before, DebugLoc(), TII->get(X86::PLDTILECFGV)), CfgSS); 520 LastShapeMI = nullptr; 521 Change = true; 522 }; 523 auto HasTileOperand = [](MachineRegisterInfo *MRI, MachineInstr &MI) { 524 for (const MachineOperand &MO : MI.operands()) { 525 if (!MO.isReg()) 526 continue; 527 Register Reg = MO.getReg(); 528 if (Reg.isVirtual() && 529 MRI->getRegClass(Reg)->getID() == X86::TILERegClassID) 530 return true; 531 } 532 return false; 533 }; 534 for (MachineInstr &MI : reverse(MBB)) { 535 // We have transformed phi node before configuring BB. 536 if (MI.isPHI()) 537 break; 538 // Don't collect the shape of used tile, the tile should be defined 539 // before the tile use. Spill and reload would happen if there is only 540 // tile use after ldtilecfg, so the shape can be collected from reload. 541 // Take below code for example. %t would be reloaded before tilestore 542 // call 543 // .... 544 // tilestore %r, %c, %t 545 // --> 546 // call 547 // ldtilecfg 548 // %t = tileload %r, %c 549 // tilestore %r, %c, %t 550 if (HasTileOperand(MRI, MI)) 551 HasUnconfigTile = true; 552 // According to AMX ABI, all the tile registers including config register 553 // are volatile. Caller need to save/restore config register. 554 if (MI.isCall() && HasUnconfigTile) { 555 MachineBasicBlock::iterator I; 556 if (LastShapeMI && dominates(MBB, MI, LastShapeMI)) 557 I = ++LastShapeMI->getIterator(); 558 else 559 I = ++MI.getIterator(); 560 Config(*I); 561 HasUnconfigTile = false; 562 continue; 563 } 564 if (!isTileDef(MRI, MI)) 565 continue; 566 // 567 //--------------------------------------------------------------------- 568 // Don't handle COPY instruction. If the src and dst of the COPY can be 569 // in the same config in below case, we just check the shape of t0. 570 // def row0 571 // def col0 572 // ldtilecfg 573 // t0 = tielzero(row0, col0) 574 // t1 = copy t0 575 // ... 576 // If the src and dst of the COPY can NOT be in the same config in below 577 // case. Reload would be generated befor the copy instruction. 578 // def row0 579 // def col0 580 // t0 = tielzero(row0, col0) 581 // spill t0 582 // ... 583 // def row1 584 // def col1 585 // ldtilecfg 586 // t1 = tilezero(row1, col1) 587 // reload t0 588 // t1 = copy t0 589 //--------------------------------------------------------------------- 590 // 591 // If MI dominate the last shape def instruction, we need insert 592 // ldtilecfg after LastShapeMI now. The config doesn't include 593 // current MI. 594 // def row0 595 // def col0 596 // tilezero(row0, col0) <- MI 597 // def row1 598 // def col1 599 // ldtilecfg <- insert 600 // tilezero(row1, col1) 601 if (LastShapeMI && dominates(MBB, MI, LastShapeMI)) 602 Config(*(++LastShapeMI->getIterator())); 603 MachineOperand *RowMO = &MI.getOperand(1); 604 MachineOperand *ColMO = &MI.getOperand(2); 605 MachineInstr *RowMI = MRI->getVRegDef(RowMO->getReg()); 606 MachineInstr *ColMI = MRI->getVRegDef(ColMO->getReg()); 607 // If the shape is defined in current MBB, check the domination. 608 // FIXME how about loop? 609 if (RowMI->getParent() == &MBB) { 610 if (!LastShapeMI) 611 LastShapeMI = RowMI; 612 else if (dominates(MBB, LastShapeMI, RowMI)) 613 LastShapeMI = RowMI; 614 } 615 if (ColMI->getParent() == &MBB) { 616 if (!LastShapeMI) 617 LastShapeMI = ColMI; 618 else if (dominates(MBB, LastShapeMI, ColMI)) 619 LastShapeMI = ColMI; 620 } 621 // If there is user live out of the tilecfg, spill it and reload in 622 // before the user. 623 Register TileReg = MI.getOperand(0).getReg(); 624 if (mayLiveOut(TileReg, LastTileCfg)) 625 spill(++MI.getIterator(), TileReg, false); 626 for (MachineInstr &UseMI : MRI->use_instructions(TileReg)) { 627 if (UseMI.getParent() == &MBB) { 628 // check user should not across ldtilecfg 629 if (!LastTileCfg || !dominates(MBB, LastTileCfg, UseMI)) 630 continue; 631 // reload befor UseMI 632 reload(UseMI.getIterator(), TileReg, RowMO, ColMO); 633 } else { 634 // Don't reload for phi instruction, we handle phi reload separately. 635 // TODO: merge the reload for the same user MBB. 636 if (!UseMI.isPHI()) 637 reload(UseMI.getIterator(), TileReg, RowMO, ColMO); 638 } 639 } 640 } 641 642 // Configure tile registers at the head of the MBB 643 if (HasUnconfigTile) { 644 MachineInstr *Before; 645 if (LastShapeMI == nullptr || LastShapeMI->isPHI()) 646 Before = &*MBB.getFirstNonPHI(); 647 else 648 Before = &*(++LastShapeMI->getIterator()); 649 650 Config(*Before); 651 } 652 653 return Change; 654 } 655 656 bool X86FastPreTileConfig::runOnMachineFunction(MachineFunction &MFunc) { 657 MF = &MFunc; 658 MRI = &MFunc.getRegInfo(); 659 ST = &MFunc.getSubtarget<X86Subtarget>(); 660 TII = ST->getInstrInfo(); 661 X86FI = MFunc.getInfo<X86MachineFunctionInfo>(); 662 MFI = &MFunc.getFrameInfo(); 663 TRI = ST->getRegisterInfo(); 664 CfgSS = -1; 665 666 unsigned NumVirtRegs = MRI->getNumVirtRegs(); 667 // Abandon early if there is no tile register to config. 668 bool HasVirtTileReg = false; 669 for (unsigned I = 0, E = NumVirtRegs; I != E; ++I) { 670 Register VirtReg = Register::index2VirtReg(I); 671 if (MRI->getRegClass(VirtReg)->getID() == X86::TILERegClassID) { 672 HasVirtTileReg = true; 673 break; 674 } 675 } 676 if (!HasVirtTileReg) 677 return false; 678 679 StackSlotForVirtReg.resize(NumVirtRegs); 680 MayLiveAcrossBlocks.clear(); 681 // We will create register during config. *3 is to make sure 682 // the virtual register number doesn't exceed the size of 683 // the bit vector. 684 MayLiveAcrossBlocks.resize(NumVirtRegs * 3); 685 bool Change = false; 686 assert(MRI->isSSA()); 687 688 // Canonicalize the phi node first. 689 for (MachineBasicBlock &MBB : MFunc) 690 canonicalizePHIs(MBB); 691 692 // Loop over all of the basic blocks in reverse post order and insert 693 // ldtilecfg for tile registers. The reserse post order is to facilitate 694 // PHI node convert. 695 ReversePostOrderTraversal<MachineFunction *> RPOT(MF); 696 for (MachineBasicBlock *MBB : RPOT) { 697 convertPHIs(*MBB); 698 Change |= configBasicBlock(*MBB); 699 } 700 701 if (Change) 702 InitializeTileConfigStackSpace(); 703 704 StackSlotForVirtReg.clear(); 705 return Change; 706 } 707 708 FunctionPass *llvm::createX86FastPreTileConfigPass() { 709 return new X86FastPreTileConfig(); 710 } 711