1 //===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a pass that expands pseudo instructions into target 10 // instructions to allow proper scheduling and other late optimizations. This 11 // pass should be run after register allocation but before the post-regalloc 12 // scheduling pass. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AArch64ExpandImm.h" 17 #include "AArch64InstrInfo.h" 18 #include "AArch64MachineFunctionInfo.h" 19 #include "AArch64Subtarget.h" 20 #include "MCTargetDesc/AArch64AddressingModes.h" 21 #include "Utils/AArch64BaseInfo.h" 22 #include "llvm/ADT/DenseMap.h" 23 #include "llvm/ADT/Triple.h" 24 #include "llvm/CodeGen/LivePhysRegs.h" 25 #include "llvm/CodeGen/MachineBasicBlock.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/CodeGen/MachineFunctionPass.h" 28 #include "llvm/CodeGen/MachineInstr.h" 29 #include "llvm/CodeGen/MachineInstrBuilder.h" 30 #include "llvm/CodeGen/MachineOperand.h" 31 #include "llvm/CodeGen/TargetSubtargetInfo.h" 32 #include "llvm/IR/DebugLoc.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/Pass.h" 35 #include "llvm/Support/CodeGen.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Target/TargetMachine.h" 38 #include <cassert> 39 #include <cstdint> 40 #include <iterator> 41 #include <limits> 42 #include <utility> 43 44 using namespace llvm; 45 46 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass" 47 48 namespace { 49 50 class AArch64ExpandPseudo : public MachineFunctionPass { 51 public: 52 const AArch64InstrInfo *TII; 53 54 static char ID; 55 56 AArch64ExpandPseudo() : MachineFunctionPass(ID) { 57 initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry()); 58 } 59 60 bool runOnMachineFunction(MachineFunction &Fn) override; 61 62 StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; } 63 64 private: 65 bool expandMBB(MachineBasicBlock &MBB); 66 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 67 MachineBasicBlock::iterator &NextMBBI); 68 bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 69 unsigned BitSize); 70 71 bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB, 72 MachineBasicBlock::iterator MBBI); 73 bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 74 unsigned LdarOp, unsigned StlrOp, unsigned CmpOp, 75 unsigned ExtendImm, unsigned ZeroReg, 76 MachineBasicBlock::iterator &NextMBBI); 77 bool expandCMP_SWAP_128(MachineBasicBlock &MBB, 78 MachineBasicBlock::iterator MBBI, 79 MachineBasicBlock::iterator &NextMBBI); 80 bool expandSetTagLoop(MachineBasicBlock &MBB, 81 MachineBasicBlock::iterator MBBI, 82 MachineBasicBlock::iterator &NextMBBI); 83 bool expandSVESpillFill(MachineBasicBlock &MBB, 84 MachineBasicBlock::iterator MBBI, unsigned Opc, 85 unsigned N); 86 bool expandCALL_RVMARKER(MachineBasicBlock &MBB, 87 MachineBasicBlock::iterator MBBI); 88 }; 89 90 } // end anonymous namespace 91 92 char AArch64ExpandPseudo::ID = 0; 93 94 INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo", 95 AARCH64_EXPAND_PSEUDO_NAME, false, false) 96 97 /// Transfer implicit operands on the pseudo instruction to the 98 /// instructions created from the expansion. 99 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, 100 MachineInstrBuilder &DefMI) { 101 const MCInstrDesc &Desc = OldMI.getDesc(); 102 for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e; 103 ++i) { 104 const MachineOperand &MO = OldMI.getOperand(i); 105 assert(MO.isReg() && MO.getReg()); 106 if (MO.isUse()) 107 UseMI.add(MO); 108 else 109 DefMI.add(MO); 110 } 111 } 112 113 /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more 114 /// real move-immediate instructions to synthesize the immediate. 115 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, 116 MachineBasicBlock::iterator MBBI, 117 unsigned BitSize) { 118 MachineInstr &MI = *MBBI; 119 Register DstReg = MI.getOperand(0).getReg(); 120 uint64_t RenamableState = 121 MI.getOperand(0).isRenamable() ? RegState::Renamable : 0; 122 uint64_t Imm = MI.getOperand(1).getImm(); 123 124 if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) { 125 // Useless def, and we don't want to risk creating an invalid ORR (which 126 // would really write to sp). 127 MI.eraseFromParent(); 128 return true; 129 } 130 131 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; 132 AArch64_IMM::expandMOVImm(Imm, BitSize, Insn); 133 assert(Insn.size() != 0); 134 135 SmallVector<MachineInstrBuilder, 4> MIBS; 136 for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) { 137 bool LastItem = std::next(I) == E; 138 switch (I->Opcode) 139 { 140 default: llvm_unreachable("unhandled!"); break; 141 142 case AArch64::ORRWri: 143 case AArch64::ORRXri: 144 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 145 .add(MI.getOperand(0)) 146 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR) 147 .addImm(I->Op2)); 148 break; 149 case AArch64::MOVNWi: 150 case AArch64::MOVNXi: 151 case AArch64::MOVZWi: 152 case AArch64::MOVZXi: { 153 bool DstIsDead = MI.getOperand(0).isDead(); 154 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 155 .addReg(DstReg, RegState::Define | 156 getDeadRegState(DstIsDead && LastItem) | 157 RenamableState) 158 .addImm(I->Op1) 159 .addImm(I->Op2)); 160 } break; 161 case AArch64::MOVKWi: 162 case AArch64::MOVKXi: { 163 Register DstReg = MI.getOperand(0).getReg(); 164 bool DstIsDead = MI.getOperand(0).isDead(); 165 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 166 .addReg(DstReg, 167 RegState::Define | 168 getDeadRegState(DstIsDead && LastItem) | 169 RenamableState) 170 .addReg(DstReg) 171 .addImm(I->Op1) 172 .addImm(I->Op2)); 173 } break; 174 } 175 } 176 transferImpOps(MI, MIBS.front(), MIBS.back()); 177 MI.eraseFromParent(); 178 return true; 179 } 180 181 bool AArch64ExpandPseudo::expandCMP_SWAP( 182 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp, 183 unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg, 184 MachineBasicBlock::iterator &NextMBBI) { 185 MachineInstr &MI = *MBBI; 186 DebugLoc DL = MI.getDebugLoc(); 187 const MachineOperand &Dest = MI.getOperand(0); 188 Register StatusReg = MI.getOperand(1).getReg(); 189 bool StatusDead = MI.getOperand(1).isDead(); 190 // Duplicating undef operands into 2 instructions does not guarantee the same 191 // value on both; However undef should be replaced by xzr anyway. 192 assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); 193 Register AddrReg = MI.getOperand(2).getReg(); 194 Register DesiredReg = MI.getOperand(3).getReg(); 195 Register NewReg = MI.getOperand(4).getReg(); 196 197 MachineFunction *MF = MBB.getParent(); 198 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 199 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 200 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 201 202 MF->insert(++MBB.getIterator(), LoadCmpBB); 203 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 204 MF->insert(++StoreBB->getIterator(), DoneBB); 205 206 // .Lloadcmp: 207 // mov wStatus, 0 208 // ldaxr xDest, [xAddr] 209 // cmp xDest, xDesired 210 // b.ne .Ldone 211 if (!StatusDead) 212 BuildMI(LoadCmpBB, DL, TII->get(AArch64::MOVZWi), StatusReg) 213 .addImm(0).addImm(0); 214 BuildMI(LoadCmpBB, DL, TII->get(LdarOp), Dest.getReg()) 215 .addReg(AddrReg); 216 BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg) 217 .addReg(Dest.getReg(), getKillRegState(Dest.isDead())) 218 .addReg(DesiredReg) 219 .addImm(ExtendImm); 220 BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc)) 221 .addImm(AArch64CC::NE) 222 .addMBB(DoneBB) 223 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill); 224 LoadCmpBB->addSuccessor(DoneBB); 225 LoadCmpBB->addSuccessor(StoreBB); 226 227 // .Lstore: 228 // stlxr wStatus, xNew, [xAddr] 229 // cbnz wStatus, .Lloadcmp 230 BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg) 231 .addReg(NewReg) 232 .addReg(AddrReg); 233 BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW)) 234 .addReg(StatusReg, getKillRegState(StatusDead)) 235 .addMBB(LoadCmpBB); 236 StoreBB->addSuccessor(LoadCmpBB); 237 StoreBB->addSuccessor(DoneBB); 238 239 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 240 DoneBB->transferSuccessors(&MBB); 241 242 MBB.addSuccessor(LoadCmpBB); 243 244 NextMBBI = MBB.end(); 245 MI.eraseFromParent(); 246 247 // Recompute livein lists. 248 LivePhysRegs LiveRegs; 249 computeAndAddLiveIns(LiveRegs, *DoneBB); 250 computeAndAddLiveIns(LiveRegs, *StoreBB); 251 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 252 // Do an extra pass around the loop to get loop carried registers right. 253 StoreBB->clearLiveIns(); 254 computeAndAddLiveIns(LiveRegs, *StoreBB); 255 LoadCmpBB->clearLiveIns(); 256 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 257 258 return true; 259 } 260 261 bool AArch64ExpandPseudo::expandCMP_SWAP_128( 262 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 263 MachineBasicBlock::iterator &NextMBBI) { 264 MachineInstr &MI = *MBBI; 265 DebugLoc DL = MI.getDebugLoc(); 266 MachineOperand &DestLo = MI.getOperand(0); 267 MachineOperand &DestHi = MI.getOperand(1); 268 Register StatusReg = MI.getOperand(2).getReg(); 269 bool StatusDead = MI.getOperand(2).isDead(); 270 // Duplicating undef operands into 2 instructions does not guarantee the same 271 // value on both; However undef should be replaced by xzr anyway. 272 assert(!MI.getOperand(3).isUndef() && "cannot handle undef"); 273 Register AddrReg = MI.getOperand(3).getReg(); 274 Register DesiredLoReg = MI.getOperand(4).getReg(); 275 Register DesiredHiReg = MI.getOperand(5).getReg(); 276 Register NewLoReg = MI.getOperand(6).getReg(); 277 Register NewHiReg = MI.getOperand(7).getReg(); 278 279 MachineFunction *MF = MBB.getParent(); 280 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 281 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 282 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 283 284 MF->insert(++MBB.getIterator(), LoadCmpBB); 285 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 286 MF->insert(++StoreBB->getIterator(), DoneBB); 287 288 // .Lloadcmp: 289 // ldaxp xDestLo, xDestHi, [xAddr] 290 // cmp xDestLo, xDesiredLo 291 // sbcs xDestHi, xDesiredHi 292 // b.ne .Ldone 293 BuildMI(LoadCmpBB, DL, TII->get(AArch64::LDAXPX)) 294 .addReg(DestLo.getReg(), RegState::Define) 295 .addReg(DestHi.getReg(), RegState::Define) 296 .addReg(AddrReg); 297 BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR) 298 .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead())) 299 .addReg(DesiredLoReg) 300 .addImm(0); 301 BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg) 302 .addUse(AArch64::WZR) 303 .addUse(AArch64::WZR) 304 .addImm(AArch64CC::EQ); 305 BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR) 306 .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead())) 307 .addReg(DesiredHiReg) 308 .addImm(0); 309 BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg) 310 .addUse(StatusReg, RegState::Kill) 311 .addUse(StatusReg, RegState::Kill) 312 .addImm(AArch64CC::EQ); 313 BuildMI(LoadCmpBB, DL, TII->get(AArch64::CBNZW)) 314 .addUse(StatusReg, getKillRegState(StatusDead)) 315 .addMBB(DoneBB); 316 LoadCmpBB->addSuccessor(DoneBB); 317 LoadCmpBB->addSuccessor(StoreBB); 318 319 // .Lstore: 320 // stlxp wStatus, xNewLo, xNewHi, [xAddr] 321 // cbnz wStatus, .Lloadcmp 322 BuildMI(StoreBB, DL, TII->get(AArch64::STLXPX), StatusReg) 323 .addReg(NewLoReg) 324 .addReg(NewHiReg) 325 .addReg(AddrReg); 326 BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW)) 327 .addReg(StatusReg, getKillRegState(StatusDead)) 328 .addMBB(LoadCmpBB); 329 StoreBB->addSuccessor(LoadCmpBB); 330 StoreBB->addSuccessor(DoneBB); 331 332 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 333 DoneBB->transferSuccessors(&MBB); 334 335 MBB.addSuccessor(LoadCmpBB); 336 337 NextMBBI = MBB.end(); 338 MI.eraseFromParent(); 339 340 // Recompute liveness bottom up. 341 LivePhysRegs LiveRegs; 342 computeAndAddLiveIns(LiveRegs, *DoneBB); 343 computeAndAddLiveIns(LiveRegs, *StoreBB); 344 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 345 // Do an extra pass in the loop to get the loop carried dependencies right. 346 StoreBB->clearLiveIns(); 347 computeAndAddLiveIns(LiveRegs, *StoreBB); 348 LoadCmpBB->clearLiveIns(); 349 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 350 351 return true; 352 } 353 354 /// \brief Expand Pseudos to Instructions with destructive operands. 355 /// 356 /// This mechanism uses MOVPRFX instructions for zeroing the false lanes 357 /// or for fixing relaxed register allocation conditions to comply with 358 /// the instructions register constraints. The latter case may be cheaper 359 /// than setting the register constraints in the register allocator, 360 /// since that will insert regular MOV instructions rather than MOVPRFX. 361 /// 362 /// Example (after register allocation): 363 /// 364 /// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0 365 /// 366 /// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B. 367 /// * We cannot map directly to FSUB_ZPmZ_B because the register 368 /// constraints of the instruction are not met. 369 /// * Also the _ZERO specifies the false lanes need to be zeroed. 370 /// 371 /// We first try to see if the destructive operand == result operand, 372 /// if not, we try to swap the operands, e.g. 373 /// 374 /// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1 375 /// 376 /// But because FSUB_ZPmZ is not commutative, this is semantically 377 /// different, so we need a reverse instruction: 378 /// 379 /// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1 380 /// 381 /// Then we implement the zeroing of the false lanes of Z0 by adding 382 /// a zeroing MOVPRFX instruction: 383 /// 384 /// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0 385 /// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1 386 /// 387 /// Note that this can only be done for _ZERO or _UNDEF variants where 388 /// we can guarantee the false lanes to be zeroed (by implementing this) 389 /// or that they are undef (don't care / not used), otherwise the 390 /// swapping of operands is illegal because the operation is not 391 /// (or cannot be emulated to be) fully commutative. 392 bool AArch64ExpandPseudo::expand_DestructiveOp( 393 MachineInstr &MI, 394 MachineBasicBlock &MBB, 395 MachineBasicBlock::iterator MBBI) { 396 unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode()); 397 uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask; 398 uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask; 399 bool FalseZero = FalseLanes == AArch64::FalseLanesZero; 400 401 unsigned DstReg = MI.getOperand(0).getReg(); 402 bool DstIsDead = MI.getOperand(0).isDead(); 403 404 if (DType == AArch64::DestructiveBinary) 405 assert(DstReg != MI.getOperand(3).getReg()); 406 407 bool UseRev = false; 408 unsigned PredIdx, DOPIdx, SrcIdx; 409 switch (DType) { 410 case AArch64::DestructiveBinaryComm: 411 case AArch64::DestructiveBinaryCommWithRev: 412 if (DstReg == MI.getOperand(3).getReg()) { 413 // FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1 414 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2); 415 UseRev = true; 416 break; 417 } 418 LLVM_FALLTHROUGH; 419 case AArch64::DestructiveBinary: 420 case AArch64::DestructiveBinaryImm: 421 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3); 422 break; 423 default: 424 llvm_unreachable("Unsupported Destructive Operand type"); 425 } 426 427 #ifndef NDEBUG 428 // MOVPRFX can only be used if the destination operand 429 // is the destructive operand, not as any other operand, 430 // so the Destructive Operand must be unique. 431 bool DOPRegIsUnique = false; 432 switch (DType) { 433 case AArch64::DestructiveBinaryComm: 434 case AArch64::DestructiveBinaryCommWithRev: 435 DOPRegIsUnique = 436 DstReg != MI.getOperand(DOPIdx).getReg() || 437 MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg(); 438 break; 439 case AArch64::DestructiveBinaryImm: 440 DOPRegIsUnique = true; 441 break; 442 } 443 #endif 444 445 // Resolve the reverse opcode 446 if (UseRev) { 447 int NewOpcode; 448 // e.g. DIV -> DIVR 449 if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1) 450 Opcode = NewOpcode; 451 // e.g. DIVR -> DIV 452 else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1) 453 Opcode = NewOpcode; 454 } 455 456 // Get the right MOVPRFX 457 uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode); 458 unsigned MovPrfx, MovPrfxZero; 459 switch (ElementSize) { 460 case AArch64::ElementSizeNone: 461 case AArch64::ElementSizeB: 462 MovPrfx = AArch64::MOVPRFX_ZZ; 463 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B; 464 break; 465 case AArch64::ElementSizeH: 466 MovPrfx = AArch64::MOVPRFX_ZZ; 467 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H; 468 break; 469 case AArch64::ElementSizeS: 470 MovPrfx = AArch64::MOVPRFX_ZZ; 471 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S; 472 break; 473 case AArch64::ElementSizeD: 474 MovPrfx = AArch64::MOVPRFX_ZZ; 475 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D; 476 break; 477 default: 478 llvm_unreachable("Unsupported ElementSize"); 479 } 480 481 // 482 // Create the destructive operation (if required) 483 // 484 MachineInstrBuilder PRFX, DOP; 485 if (FalseZero) { 486 #ifndef NDEBUG 487 assert(DOPRegIsUnique && "The destructive operand should be unique"); 488 #endif 489 assert(ElementSize != AArch64::ElementSizeNone && 490 "This instruction is unpredicated"); 491 492 // Merge source operand into destination register 493 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero)) 494 .addReg(DstReg, RegState::Define) 495 .addReg(MI.getOperand(PredIdx).getReg()) 496 .addReg(MI.getOperand(DOPIdx).getReg()); 497 498 // After the movprfx, the destructive operand is same as Dst 499 DOPIdx = 0; 500 } else if (DstReg != MI.getOperand(DOPIdx).getReg()) { 501 #ifndef NDEBUG 502 assert(DOPRegIsUnique && "The destructive operand should be unique"); 503 #endif 504 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx)) 505 .addReg(DstReg, RegState::Define) 506 .addReg(MI.getOperand(DOPIdx).getReg()); 507 DOPIdx = 0; 508 } 509 510 // 511 // Create the destructive operation 512 // 513 DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode)) 514 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)); 515 516 switch (DType) { 517 case AArch64::DestructiveBinaryImm: 518 case AArch64::DestructiveBinaryComm: 519 case AArch64::DestructiveBinaryCommWithRev: 520 DOP.add(MI.getOperand(PredIdx)) 521 .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill) 522 .add(MI.getOperand(SrcIdx)); 523 break; 524 } 525 526 if (PRFX) { 527 finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator()); 528 transferImpOps(MI, PRFX, DOP); 529 } else 530 transferImpOps(MI, DOP, DOP); 531 532 MI.eraseFromParent(); 533 return true; 534 } 535 536 bool AArch64ExpandPseudo::expandSetTagLoop( 537 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 538 MachineBasicBlock::iterator &NextMBBI) { 539 MachineInstr &MI = *MBBI; 540 DebugLoc DL = MI.getDebugLoc(); 541 Register SizeReg = MI.getOperand(0).getReg(); 542 Register AddressReg = MI.getOperand(1).getReg(); 543 544 MachineFunction *MF = MBB.getParent(); 545 546 bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback; 547 const unsigned OpCode1 = 548 ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex; 549 const unsigned OpCode2 = 550 ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex; 551 552 unsigned Size = MI.getOperand(2).getImm(); 553 assert(Size > 0 && Size % 16 == 0); 554 if (Size % (16 * 2) != 0) { 555 BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg) 556 .addReg(AddressReg) 557 .addReg(AddressReg) 558 .addImm(1); 559 Size -= 16; 560 } 561 MachineBasicBlock::iterator I = 562 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg) 563 .addImm(Size); 564 expandMOVImm(MBB, I, 64); 565 566 auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 567 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 568 569 MF->insert(++MBB.getIterator(), LoopBB); 570 MF->insert(++LoopBB->getIterator(), DoneBB); 571 572 BuildMI(LoopBB, DL, TII->get(OpCode2)) 573 .addDef(AddressReg) 574 .addReg(AddressReg) 575 .addReg(AddressReg) 576 .addImm(2) 577 .cloneMemRefs(MI) 578 .setMIFlags(MI.getFlags()); 579 BuildMI(LoopBB, DL, TII->get(AArch64::SUBXri)) 580 .addDef(SizeReg) 581 .addReg(SizeReg) 582 .addImm(16 * 2) 583 .addImm(0); 584 BuildMI(LoopBB, DL, TII->get(AArch64::CBNZX)).addUse(SizeReg).addMBB(LoopBB); 585 586 LoopBB->addSuccessor(LoopBB); 587 LoopBB->addSuccessor(DoneBB); 588 589 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 590 DoneBB->transferSuccessors(&MBB); 591 592 MBB.addSuccessor(LoopBB); 593 594 NextMBBI = MBB.end(); 595 MI.eraseFromParent(); 596 // Recompute liveness bottom up. 597 LivePhysRegs LiveRegs; 598 computeAndAddLiveIns(LiveRegs, *DoneBB); 599 computeAndAddLiveIns(LiveRegs, *LoopBB); 600 // Do an extra pass in the loop to get the loop carried dependencies right. 601 // FIXME: is this necessary? 602 LoopBB->clearLiveIns(); 603 computeAndAddLiveIns(LiveRegs, *LoopBB); 604 DoneBB->clearLiveIns(); 605 computeAndAddLiveIns(LiveRegs, *DoneBB); 606 607 return true; 608 } 609 610 bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB, 611 MachineBasicBlock::iterator MBBI, 612 unsigned Opc, unsigned N) { 613 const TargetRegisterInfo *TRI = 614 MBB.getParent()->getSubtarget().getRegisterInfo(); 615 MachineInstr &MI = *MBBI; 616 for (unsigned Offset = 0; Offset < N; ++Offset) { 617 int ImmOffset = MI.getOperand(2).getImm() + Offset; 618 bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false; 619 assert(ImmOffset >= -256 && ImmOffset < 256 && 620 "Immediate spill offset out of range"); 621 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) 622 .addReg( 623 TRI->getSubReg(MI.getOperand(0).getReg(), AArch64::zsub0 + Offset), 624 Opc == AArch64::LDR_ZXI ? RegState::Define : 0) 625 .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill)) 626 .addImm(ImmOffset); 627 } 628 MI.eraseFromParent(); 629 return true; 630 } 631 632 bool AArch64ExpandPseudo::expandCALL_RVMARKER( 633 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { 634 // Expand CALL_RVMARKER pseudo to a branch, followed by the special `mov x29, 635 // x29` marker. Mark the sequence as bundle, to avoid passes moving other code 636 // in between. 637 MachineInstr &MI = *MBBI; 638 639 MachineInstr *OriginalCall; 640 MachineOperand &CallTarget = MI.getOperand(0); 641 assert((CallTarget.isGlobal() || CallTarget.isReg()) && 642 "invalid operand for regular call"); 643 unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR; 644 OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr(); 645 OriginalCall->addOperand(CallTarget); 646 647 unsigned RegMaskStartIdx = 1; 648 // Skip register arguments. Those are added during ISel, but are not 649 // needed for the concrete branch. 650 while (!MI.getOperand(RegMaskStartIdx).isRegMask()) { 651 assert(MI.getOperand(RegMaskStartIdx).isReg() && 652 "should only skip register operands"); 653 RegMaskStartIdx++; 654 } 655 for (; RegMaskStartIdx < MI.getNumOperands(); ++RegMaskStartIdx) 656 OriginalCall->addOperand(MI.getOperand(RegMaskStartIdx)); 657 658 auto *Marker = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs)) 659 .addReg(AArch64::FP, RegState::Define) 660 .addReg(AArch64::XZR) 661 .addReg(AArch64::FP) 662 .addImm(0) 663 .getInstr(); 664 if (MI.shouldUpdateCallSiteInfo()) 665 MBB.getParent()->moveCallSiteInfo(&MI, Marker); 666 MI.eraseFromParent(); 667 finalizeBundle(MBB, OriginalCall->getIterator(), 668 std::next(Marker->getIterator())); 669 return true; 670 } 671 672 /// If MBBI references a pseudo instruction that should be expanded here, 673 /// do the expansion and return true. Otherwise return false. 674 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, 675 MachineBasicBlock::iterator MBBI, 676 MachineBasicBlock::iterator &NextMBBI) { 677 MachineInstr &MI = *MBBI; 678 unsigned Opcode = MI.getOpcode(); 679 680 // Check if we can expand the destructive op 681 int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode()); 682 if (OrigInstr != -1) { 683 auto &Orig = TII->get(OrigInstr); 684 if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask) 685 != AArch64::NotDestructive) { 686 return expand_DestructiveOp(MI, MBB, MBBI); 687 } 688 } 689 690 switch (Opcode) { 691 default: 692 break; 693 694 case AArch64::BSPv8i8: 695 case AArch64::BSPv16i8: { 696 Register DstReg = MI.getOperand(0).getReg(); 697 if (DstReg == MI.getOperand(3).getReg()) { 698 // Expand to BIT 699 BuildMI(MBB, MBBI, MI.getDebugLoc(), 700 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8 701 : AArch64::BITv16i8)) 702 .add(MI.getOperand(0)) 703 .add(MI.getOperand(3)) 704 .add(MI.getOperand(2)) 705 .add(MI.getOperand(1)); 706 } else if (DstReg == MI.getOperand(2).getReg()) { 707 // Expand to BIF 708 BuildMI(MBB, MBBI, MI.getDebugLoc(), 709 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8 710 : AArch64::BIFv16i8)) 711 .add(MI.getOperand(0)) 712 .add(MI.getOperand(2)) 713 .add(MI.getOperand(3)) 714 .add(MI.getOperand(1)); 715 } else { 716 // Expand to BSL, use additional move if required 717 if (DstReg == MI.getOperand(1).getReg()) { 718 BuildMI(MBB, MBBI, MI.getDebugLoc(), 719 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8 720 : AArch64::BSLv16i8)) 721 .add(MI.getOperand(0)) 722 .add(MI.getOperand(1)) 723 .add(MI.getOperand(2)) 724 .add(MI.getOperand(3)); 725 } else { 726 BuildMI(MBB, MBBI, MI.getDebugLoc(), 727 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8 728 : AArch64::ORRv16i8)) 729 .addReg(DstReg, 730 RegState::Define | 731 getRenamableRegState(MI.getOperand(0).isRenamable())) 732 .add(MI.getOperand(1)) 733 .add(MI.getOperand(1)); 734 BuildMI(MBB, MBBI, MI.getDebugLoc(), 735 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8 736 : AArch64::BSLv16i8)) 737 .add(MI.getOperand(0)) 738 .addReg(DstReg, 739 RegState::Kill | 740 getRenamableRegState(MI.getOperand(0).isRenamable())) 741 .add(MI.getOperand(2)) 742 .add(MI.getOperand(3)); 743 } 744 } 745 MI.eraseFromParent(); 746 return true; 747 } 748 749 case AArch64::ADDWrr: 750 case AArch64::SUBWrr: 751 case AArch64::ADDXrr: 752 case AArch64::SUBXrr: 753 case AArch64::ADDSWrr: 754 case AArch64::SUBSWrr: 755 case AArch64::ADDSXrr: 756 case AArch64::SUBSXrr: 757 case AArch64::ANDWrr: 758 case AArch64::ANDXrr: 759 case AArch64::BICWrr: 760 case AArch64::BICXrr: 761 case AArch64::ANDSWrr: 762 case AArch64::ANDSXrr: 763 case AArch64::BICSWrr: 764 case AArch64::BICSXrr: 765 case AArch64::EONWrr: 766 case AArch64::EONXrr: 767 case AArch64::EORWrr: 768 case AArch64::EORXrr: 769 case AArch64::ORNWrr: 770 case AArch64::ORNXrr: 771 case AArch64::ORRWrr: 772 case AArch64::ORRXrr: { 773 unsigned Opcode; 774 switch (MI.getOpcode()) { 775 default: 776 return false; 777 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break; 778 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break; 779 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break; 780 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break; 781 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break; 782 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break; 783 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break; 784 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break; 785 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break; 786 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break; 787 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break; 788 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break; 789 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break; 790 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break; 791 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break; 792 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break; 793 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break; 794 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break; 795 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break; 796 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break; 797 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break; 798 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break; 799 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break; 800 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break; 801 } 802 MachineInstrBuilder MIB1 = 803 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode), 804 MI.getOperand(0).getReg()) 805 .add(MI.getOperand(1)) 806 .add(MI.getOperand(2)) 807 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 808 transferImpOps(MI, MIB1, MIB1); 809 MI.eraseFromParent(); 810 return true; 811 } 812 813 case AArch64::LOADgot: { 814 MachineFunction *MF = MBB.getParent(); 815 Register DstReg = MI.getOperand(0).getReg(); 816 const MachineOperand &MO1 = MI.getOperand(1); 817 unsigned Flags = MO1.getTargetFlags(); 818 819 if (MF->getTarget().getCodeModel() == CodeModel::Tiny) { 820 // Tiny codemodel expand to LDR 821 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), 822 TII->get(AArch64::LDRXl), DstReg); 823 824 if (MO1.isGlobal()) { 825 MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags); 826 } else if (MO1.isSymbol()) { 827 MIB.addExternalSymbol(MO1.getSymbolName(), Flags); 828 } else { 829 assert(MO1.isCPI() && 830 "Only expect globals, externalsymbols, or constant pools"); 831 MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags); 832 } 833 } else { 834 // Small codemodel expand into ADRP + LDR. 835 MachineFunction &MF = *MI.getParent()->getParent(); 836 DebugLoc DL = MI.getDebugLoc(); 837 MachineInstrBuilder MIB1 = 838 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg); 839 840 MachineInstrBuilder MIB2; 841 if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) { 842 auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); 843 unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32); 844 unsigned DstFlags = MI.getOperand(0).getTargetFlags(); 845 MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui)) 846 .addDef(Reg32) 847 .addReg(DstReg, RegState::Kill) 848 .addReg(DstReg, DstFlags | RegState::Implicit); 849 } else { 850 unsigned DstReg = MI.getOperand(0).getReg(); 851 MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui)) 852 .add(MI.getOperand(0)) 853 .addUse(DstReg, RegState::Kill); 854 } 855 856 if (MO1.isGlobal()) { 857 MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE); 858 MIB2.addGlobalAddress(MO1.getGlobal(), 0, 859 Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 860 } else if (MO1.isSymbol()) { 861 MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE); 862 MIB2.addExternalSymbol(MO1.getSymbolName(), Flags | 863 AArch64II::MO_PAGEOFF | 864 AArch64II::MO_NC); 865 } else { 866 assert(MO1.isCPI() && 867 "Only expect globals, externalsymbols, or constant pools"); 868 MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), 869 Flags | AArch64II::MO_PAGE); 870 MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), 871 Flags | AArch64II::MO_PAGEOFF | 872 AArch64II::MO_NC); 873 } 874 875 transferImpOps(MI, MIB1, MIB2); 876 } 877 MI.eraseFromParent(); 878 return true; 879 } 880 881 case AArch64::MOVaddr: 882 case AArch64::MOVaddrJT: 883 case AArch64::MOVaddrCP: 884 case AArch64::MOVaddrBA: 885 case AArch64::MOVaddrTLS: 886 case AArch64::MOVaddrEXT: { 887 // Expand into ADRP + ADD. 888 Register DstReg = MI.getOperand(0).getReg(); 889 MachineInstrBuilder MIB1 = 890 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) 891 .add(MI.getOperand(1)); 892 893 if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) { 894 // MO_TAGGED on the page indicates a tagged address. Set the tag now. 895 // We do so by creating a MOVK that sets bits 48-63 of the register to 896 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in 897 // the small code model so we can assume a binary size of <= 4GB, which 898 // makes the untagged PC relative offset positive. The binary must also be 899 // loaded into address range [0, 2^48). Both of these properties need to 900 // be ensured at runtime when using tagged addresses. 901 auto Tag = MI.getOperand(1); 902 Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3); 903 Tag.setOffset(0x100000000); 904 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg) 905 .addReg(DstReg) 906 .add(Tag) 907 .addImm(48); 908 } 909 910 MachineInstrBuilder MIB2 = 911 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) 912 .add(MI.getOperand(0)) 913 .addReg(DstReg) 914 .add(MI.getOperand(2)) 915 .addImm(0); 916 917 transferImpOps(MI, MIB1, MIB2); 918 MI.eraseFromParent(); 919 return true; 920 } 921 case AArch64::ADDlowTLS: 922 // Produce a plain ADD 923 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) 924 .add(MI.getOperand(0)) 925 .add(MI.getOperand(1)) 926 .add(MI.getOperand(2)) 927 .addImm(0); 928 MI.eraseFromParent(); 929 return true; 930 931 case AArch64::MOVbaseTLS: { 932 Register DstReg = MI.getOperand(0).getReg(); 933 auto SysReg = AArch64SysReg::TPIDR_EL0; 934 MachineFunction *MF = MBB.getParent(); 935 if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP()) 936 SysReg = AArch64SysReg::TPIDR_EL3; 937 else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP()) 938 SysReg = AArch64SysReg::TPIDR_EL2; 939 else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP()) 940 SysReg = AArch64SysReg::TPIDR_EL1; 941 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg) 942 .addImm(SysReg); 943 MI.eraseFromParent(); 944 return true; 945 } 946 947 case AArch64::MOVi32imm: 948 return expandMOVImm(MBB, MBBI, 32); 949 case AArch64::MOVi64imm: 950 return expandMOVImm(MBB, MBBI, 64); 951 case AArch64::RET_ReallyLR: { 952 // Hiding the LR use with RET_ReallyLR may lead to extra kills in the 953 // function and missing live-ins. We are fine in practice because callee 954 // saved register handling ensures the register value is restored before 955 // RET, but we need the undef flag here to appease the MachineVerifier 956 // liveness checks. 957 MachineInstrBuilder MIB = 958 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET)) 959 .addReg(AArch64::LR, RegState::Undef); 960 transferImpOps(MI, MIB, MIB); 961 MI.eraseFromParent(); 962 return true; 963 } 964 case AArch64::CMP_SWAP_8: 965 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB, 966 AArch64::SUBSWrx, 967 AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0), 968 AArch64::WZR, NextMBBI); 969 case AArch64::CMP_SWAP_16: 970 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH, 971 AArch64::SUBSWrx, 972 AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0), 973 AArch64::WZR, NextMBBI); 974 case AArch64::CMP_SWAP_32: 975 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW, 976 AArch64::SUBSWrs, 977 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0), 978 AArch64::WZR, NextMBBI); 979 case AArch64::CMP_SWAP_64: 980 return expandCMP_SWAP(MBB, MBBI, 981 AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs, 982 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0), 983 AArch64::XZR, NextMBBI); 984 case AArch64::CMP_SWAP_128: 985 return expandCMP_SWAP_128(MBB, MBBI, NextMBBI); 986 987 case AArch64::AESMCrrTied: 988 case AArch64::AESIMCrrTied: { 989 MachineInstrBuilder MIB = 990 BuildMI(MBB, MBBI, MI.getDebugLoc(), 991 TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr : 992 AArch64::AESIMCrr)) 993 .add(MI.getOperand(0)) 994 .add(MI.getOperand(1)); 995 transferImpOps(MI, MIB, MIB); 996 MI.eraseFromParent(); 997 return true; 998 } 999 case AArch64::IRGstack: { 1000 MachineFunction &MF = *MBB.getParent(); 1001 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 1002 const AArch64FrameLowering *TFI = 1003 MF.getSubtarget<AArch64Subtarget>().getFrameLowering(); 1004 1005 // IRG does not allow immediate offset. getTaggedBasePointerOffset should 1006 // almost always point to SP-after-prologue; if not, emit a longer 1007 // instruction sequence. 1008 int BaseOffset = -AFI->getTaggedBasePointerOffset(); 1009 Register FrameReg; 1010 StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference( 1011 MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg, 1012 /*PreferFP=*/false, 1013 /*ForSimm=*/true); 1014 Register SrcReg = FrameReg; 1015 if (FrameRegOffset) { 1016 // Use output register as temporary. 1017 SrcReg = MI.getOperand(0).getReg(); 1018 emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg, 1019 FrameRegOffset, TII); 1020 } 1021 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG)) 1022 .add(MI.getOperand(0)) 1023 .addUse(SrcReg) 1024 .add(MI.getOperand(2)); 1025 MI.eraseFromParent(); 1026 return true; 1027 } 1028 case AArch64::TAGPstack: { 1029 int64_t Offset = MI.getOperand(2).getImm(); 1030 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1031 TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG)) 1032 .add(MI.getOperand(0)) 1033 .add(MI.getOperand(1)) 1034 .addImm(std::abs(Offset)) 1035 .add(MI.getOperand(4)); 1036 MI.eraseFromParent(); 1037 return true; 1038 } 1039 case AArch64::STGloop_wback: 1040 case AArch64::STZGloop_wback: 1041 return expandSetTagLoop(MBB, MBBI, NextMBBI); 1042 case AArch64::STGloop: 1043 case AArch64::STZGloop: 1044 report_fatal_error( 1045 "Non-writeback variants of STGloop / STZGloop should not " 1046 "survive past PrologEpilogInserter."); 1047 case AArch64::STR_ZZZZXI: 1048 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4); 1049 case AArch64::STR_ZZZXI: 1050 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3); 1051 case AArch64::STR_ZZXI: 1052 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2); 1053 case AArch64::LDR_ZZZZXI: 1054 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4); 1055 case AArch64::LDR_ZZZXI: 1056 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3); 1057 case AArch64::LDR_ZZXI: 1058 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2); 1059 case AArch64::BLR_RVMARKER: 1060 return expandCALL_RVMARKER(MBB, MBBI); 1061 } 1062 return false; 1063 } 1064 1065 /// Iterate over the instructions in basic block MBB and expand any 1066 /// pseudo instructions. Return true if anything was modified. 1067 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) { 1068 bool Modified = false; 1069 1070 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1071 while (MBBI != E) { 1072 MachineBasicBlock::iterator NMBBI = std::next(MBBI); 1073 Modified |= expandMI(MBB, MBBI, NMBBI); 1074 MBBI = NMBBI; 1075 } 1076 1077 return Modified; 1078 } 1079 1080 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) { 1081 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 1082 1083 bool Modified = false; 1084 for (auto &MBB : MF) 1085 Modified |= expandMBB(MBB); 1086 return Modified; 1087 } 1088 1089 /// Returns an instance of the pseudo instruction expansion pass. 1090 FunctionPass *llvm::createAArch64ExpandPseudoPass() { 1091 return new AArch64ExpandPseudo(); 1092 } 1093