1 //===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a pass that expands pseudo instructions into target 10 // instructions to allow proper scheduling and other late optimizations. This 11 // pass should be run after register allocation but before the post-regalloc 12 // scheduling pass. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AArch64ExpandImm.h" 17 #include "AArch64InstrInfo.h" 18 #include "AArch64MachineFunctionInfo.h" 19 #include "AArch64Subtarget.h" 20 #include "MCTargetDesc/AArch64AddressingModes.h" 21 #include "Utils/AArch64BaseInfo.h" 22 #include "llvm/ADT/DenseMap.h" 23 #include "llvm/CodeGen/LivePhysRegs.h" 24 #include "llvm/CodeGen/MachineBasicBlock.h" 25 #include "llvm/CodeGen/MachineConstantPool.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/CodeGen/MachineFunctionPass.h" 28 #include "llvm/CodeGen/MachineInstr.h" 29 #include "llvm/CodeGen/MachineInstrBuilder.h" 30 #include "llvm/CodeGen/MachineOperand.h" 31 #include "llvm/CodeGen/TargetSubtargetInfo.h" 32 #include "llvm/IR/DebugLoc.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/Pass.h" 35 #include "llvm/Support/CodeGen.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Target/TargetMachine.h" 38 #include "llvm/TargetParser/Triple.h" 39 #include <cassert> 40 #include <cstdint> 41 #include <iterator> 42 #include <limits> 43 #include <utility> 44 45 using namespace llvm; 46 47 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass" 48 49 namespace { 50 51 class AArch64ExpandPseudo : public MachineFunctionPass { 52 public: 53 const AArch64InstrInfo *TII; 54 55 static char ID; 56 57 AArch64ExpandPseudo() : MachineFunctionPass(ID) { 58 initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry()); 59 } 60 61 bool runOnMachineFunction(MachineFunction &Fn) override; 62 63 StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; } 64 65 private: 66 bool expandMBB(MachineBasicBlock &MBB); 67 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 68 MachineBasicBlock::iterator &NextMBBI); 69 bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 70 unsigned BitSize); 71 72 bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB, 73 MachineBasicBlock::iterator MBBI); 74 bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 75 unsigned LdarOp, unsigned StlrOp, unsigned CmpOp, 76 unsigned ExtendImm, unsigned ZeroReg, 77 MachineBasicBlock::iterator &NextMBBI); 78 bool expandCMP_SWAP_128(MachineBasicBlock &MBB, 79 MachineBasicBlock::iterator MBBI, 80 MachineBasicBlock::iterator &NextMBBI); 81 bool expandSetTagLoop(MachineBasicBlock &MBB, 82 MachineBasicBlock::iterator MBBI, 83 MachineBasicBlock::iterator &NextMBBI); 84 bool expandSVESpillFill(MachineBasicBlock &MBB, 85 MachineBasicBlock::iterator MBBI, unsigned Opc, 86 unsigned N); 87 bool expandCALL_RVMARKER(MachineBasicBlock &MBB, 88 MachineBasicBlock::iterator MBBI); 89 bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); 90 bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB, 91 MachineBasicBlock::iterator MBBI); 92 MachineBasicBlock *expandRestoreZA(MachineBasicBlock &MBB, 93 MachineBasicBlock::iterator MBBI); 94 MachineBasicBlock *expandCondSMToggle(MachineBasicBlock &MBB, 95 MachineBasicBlock::iterator MBBI); 96 }; 97 98 } // end anonymous namespace 99 100 char AArch64ExpandPseudo::ID = 0; 101 102 INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo", 103 AARCH64_EXPAND_PSEUDO_NAME, false, false) 104 105 /// Transfer implicit operands on the pseudo instruction to the 106 /// instructions created from the expansion. 107 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, 108 MachineInstrBuilder &DefMI) { 109 const MCInstrDesc &Desc = OldMI.getDesc(); 110 for (const MachineOperand &MO : 111 llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) { 112 assert(MO.isReg() && MO.getReg()); 113 if (MO.isUse()) 114 UseMI.add(MO); 115 else 116 DefMI.add(MO); 117 } 118 } 119 120 /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more 121 /// real move-immediate instructions to synthesize the immediate. 122 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, 123 MachineBasicBlock::iterator MBBI, 124 unsigned BitSize) { 125 MachineInstr &MI = *MBBI; 126 Register DstReg = MI.getOperand(0).getReg(); 127 uint64_t RenamableState = 128 MI.getOperand(0).isRenamable() ? RegState::Renamable : 0; 129 uint64_t Imm = MI.getOperand(1).getImm(); 130 131 if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) { 132 // Useless def, and we don't want to risk creating an invalid ORR (which 133 // would really write to sp). 134 MI.eraseFromParent(); 135 return true; 136 } 137 138 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; 139 AArch64_IMM::expandMOVImm(Imm, BitSize, Insn); 140 assert(Insn.size() != 0); 141 142 SmallVector<MachineInstrBuilder, 4> MIBS; 143 for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) { 144 bool LastItem = std::next(I) == E; 145 switch (I->Opcode) 146 { 147 default: llvm_unreachable("unhandled!"); break; 148 149 case AArch64::ORRWri: 150 case AArch64::ORRXri: 151 if (I->Op1 == 0) { 152 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 153 .add(MI.getOperand(0)) 154 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR) 155 .addImm(I->Op2)); 156 } else { 157 Register DstReg = MI.getOperand(0).getReg(); 158 bool DstIsDead = MI.getOperand(0).isDead(); 159 MIBS.push_back( 160 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 161 .addReg(DstReg, RegState::Define | 162 getDeadRegState(DstIsDead && LastItem) | 163 RenamableState) 164 .addReg(DstReg) 165 .addImm(I->Op2)); 166 } 167 break; 168 case AArch64::ANDXri: 169 if (I->Op1 == 0) { 170 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 171 .add(MI.getOperand(0)) 172 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR) 173 .addImm(I->Op2)); 174 } else { 175 Register DstReg = MI.getOperand(0).getReg(); 176 bool DstIsDead = MI.getOperand(0).isDead(); 177 MIBS.push_back( 178 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 179 .addReg(DstReg, RegState::Define | 180 getDeadRegState(DstIsDead && LastItem) | 181 RenamableState) 182 .addReg(DstReg) 183 .addImm(I->Op2)); 184 } 185 break; 186 case AArch64::MOVNWi: 187 case AArch64::MOVNXi: 188 case AArch64::MOVZWi: 189 case AArch64::MOVZXi: { 190 bool DstIsDead = MI.getOperand(0).isDead(); 191 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 192 .addReg(DstReg, RegState::Define | 193 getDeadRegState(DstIsDead && LastItem) | 194 RenamableState) 195 .addImm(I->Op1) 196 .addImm(I->Op2)); 197 } break; 198 case AArch64::MOVKWi: 199 case AArch64::MOVKXi: { 200 Register DstReg = MI.getOperand(0).getReg(); 201 bool DstIsDead = MI.getOperand(0).isDead(); 202 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 203 .addReg(DstReg, 204 RegState::Define | 205 getDeadRegState(DstIsDead && LastItem) | 206 RenamableState) 207 .addReg(DstReg) 208 .addImm(I->Op1) 209 .addImm(I->Op2)); 210 } break; 211 } 212 } 213 transferImpOps(MI, MIBS.front(), MIBS.back()); 214 MI.eraseFromParent(); 215 return true; 216 } 217 218 bool AArch64ExpandPseudo::expandCMP_SWAP( 219 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp, 220 unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg, 221 MachineBasicBlock::iterator &NextMBBI) { 222 MachineInstr &MI = *MBBI; 223 MIMetadata MIMD(MI); 224 const MachineOperand &Dest = MI.getOperand(0); 225 Register StatusReg = MI.getOperand(1).getReg(); 226 bool StatusDead = MI.getOperand(1).isDead(); 227 // Duplicating undef operands into 2 instructions does not guarantee the same 228 // value on both; However undef should be replaced by xzr anyway. 229 assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); 230 Register AddrReg = MI.getOperand(2).getReg(); 231 Register DesiredReg = MI.getOperand(3).getReg(); 232 Register NewReg = MI.getOperand(4).getReg(); 233 234 MachineFunction *MF = MBB.getParent(); 235 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 236 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 237 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 238 239 MF->insert(++MBB.getIterator(), LoadCmpBB); 240 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 241 MF->insert(++StoreBB->getIterator(), DoneBB); 242 243 // .Lloadcmp: 244 // mov wStatus, 0 245 // ldaxr xDest, [xAddr] 246 // cmp xDest, xDesired 247 // b.ne .Ldone 248 if (!StatusDead) 249 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::MOVZWi), StatusReg) 250 .addImm(0).addImm(0); 251 BuildMI(LoadCmpBB, MIMD, TII->get(LdarOp), Dest.getReg()) 252 .addReg(AddrReg); 253 BuildMI(LoadCmpBB, MIMD, TII->get(CmpOp), ZeroReg) 254 .addReg(Dest.getReg(), getKillRegState(Dest.isDead())) 255 .addReg(DesiredReg) 256 .addImm(ExtendImm); 257 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::Bcc)) 258 .addImm(AArch64CC::NE) 259 .addMBB(DoneBB) 260 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill); 261 LoadCmpBB->addSuccessor(DoneBB); 262 LoadCmpBB->addSuccessor(StoreBB); 263 264 // .Lstore: 265 // stlxr wStatus, xNew, [xAddr] 266 // cbnz wStatus, .Lloadcmp 267 BuildMI(StoreBB, MIMD, TII->get(StlrOp), StatusReg) 268 .addReg(NewReg) 269 .addReg(AddrReg); 270 BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW)) 271 .addReg(StatusReg, getKillRegState(StatusDead)) 272 .addMBB(LoadCmpBB); 273 StoreBB->addSuccessor(LoadCmpBB); 274 StoreBB->addSuccessor(DoneBB); 275 276 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 277 DoneBB->transferSuccessors(&MBB); 278 279 MBB.addSuccessor(LoadCmpBB); 280 281 NextMBBI = MBB.end(); 282 MI.eraseFromParent(); 283 284 // Recompute livein lists. 285 LivePhysRegs LiveRegs; 286 computeAndAddLiveIns(LiveRegs, *DoneBB); 287 computeAndAddLiveIns(LiveRegs, *StoreBB); 288 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 289 // Do an extra pass around the loop to get loop carried registers right. 290 StoreBB->clearLiveIns(); 291 computeAndAddLiveIns(LiveRegs, *StoreBB); 292 LoadCmpBB->clearLiveIns(); 293 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 294 295 return true; 296 } 297 298 bool AArch64ExpandPseudo::expandCMP_SWAP_128( 299 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 300 MachineBasicBlock::iterator &NextMBBI) { 301 MachineInstr &MI = *MBBI; 302 MIMetadata MIMD(MI); 303 MachineOperand &DestLo = MI.getOperand(0); 304 MachineOperand &DestHi = MI.getOperand(1); 305 Register StatusReg = MI.getOperand(2).getReg(); 306 bool StatusDead = MI.getOperand(2).isDead(); 307 // Duplicating undef operands into 2 instructions does not guarantee the same 308 // value on both; However undef should be replaced by xzr anyway. 309 assert(!MI.getOperand(3).isUndef() && "cannot handle undef"); 310 Register AddrReg = MI.getOperand(3).getReg(); 311 Register DesiredLoReg = MI.getOperand(4).getReg(); 312 Register DesiredHiReg = MI.getOperand(5).getReg(); 313 Register NewLoReg = MI.getOperand(6).getReg(); 314 Register NewHiReg = MI.getOperand(7).getReg(); 315 316 unsigned LdxpOp, StxpOp; 317 318 switch (MI.getOpcode()) { 319 case AArch64::CMP_SWAP_128_MONOTONIC: 320 LdxpOp = AArch64::LDXPX; 321 StxpOp = AArch64::STXPX; 322 break; 323 case AArch64::CMP_SWAP_128_RELEASE: 324 LdxpOp = AArch64::LDXPX; 325 StxpOp = AArch64::STLXPX; 326 break; 327 case AArch64::CMP_SWAP_128_ACQUIRE: 328 LdxpOp = AArch64::LDAXPX; 329 StxpOp = AArch64::STXPX; 330 break; 331 case AArch64::CMP_SWAP_128: 332 LdxpOp = AArch64::LDAXPX; 333 StxpOp = AArch64::STLXPX; 334 break; 335 default: 336 llvm_unreachable("Unexpected opcode"); 337 } 338 339 MachineFunction *MF = MBB.getParent(); 340 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 341 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 342 auto FailBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 343 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 344 345 MF->insert(++MBB.getIterator(), LoadCmpBB); 346 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 347 MF->insert(++StoreBB->getIterator(), FailBB); 348 MF->insert(++FailBB->getIterator(), DoneBB); 349 350 // .Lloadcmp: 351 // ldaxp xDestLo, xDestHi, [xAddr] 352 // cmp xDestLo, xDesiredLo 353 // sbcs xDestHi, xDesiredHi 354 // b.ne .Ldone 355 BuildMI(LoadCmpBB, MIMD, TII->get(LdxpOp)) 356 .addReg(DestLo.getReg(), RegState::Define) 357 .addReg(DestHi.getReg(), RegState::Define) 358 .addReg(AddrReg); 359 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR) 360 .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead())) 361 .addReg(DesiredLoReg) 362 .addImm(0); 363 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg) 364 .addUse(AArch64::WZR) 365 .addUse(AArch64::WZR) 366 .addImm(AArch64CC::EQ); 367 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR) 368 .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead())) 369 .addReg(DesiredHiReg) 370 .addImm(0); 371 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg) 372 .addUse(StatusReg, RegState::Kill) 373 .addUse(StatusReg, RegState::Kill) 374 .addImm(AArch64CC::EQ); 375 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CBNZW)) 376 .addUse(StatusReg, getKillRegState(StatusDead)) 377 .addMBB(FailBB); 378 LoadCmpBB->addSuccessor(FailBB); 379 LoadCmpBB->addSuccessor(StoreBB); 380 381 // .Lstore: 382 // stlxp wStatus, xNewLo, xNewHi, [xAddr] 383 // cbnz wStatus, .Lloadcmp 384 BuildMI(StoreBB, MIMD, TII->get(StxpOp), StatusReg) 385 .addReg(NewLoReg) 386 .addReg(NewHiReg) 387 .addReg(AddrReg); 388 BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW)) 389 .addReg(StatusReg, getKillRegState(StatusDead)) 390 .addMBB(LoadCmpBB); 391 BuildMI(StoreBB, MIMD, TII->get(AArch64::B)).addMBB(DoneBB); 392 StoreBB->addSuccessor(LoadCmpBB); 393 StoreBB->addSuccessor(DoneBB); 394 395 // .Lfail: 396 // stlxp wStatus, xDestLo, xDestHi, [xAddr] 397 // cbnz wStatus, .Lloadcmp 398 BuildMI(FailBB, MIMD, TII->get(StxpOp), StatusReg) 399 .addReg(DestLo.getReg()) 400 .addReg(DestHi.getReg()) 401 .addReg(AddrReg); 402 BuildMI(FailBB, MIMD, TII->get(AArch64::CBNZW)) 403 .addReg(StatusReg, getKillRegState(StatusDead)) 404 .addMBB(LoadCmpBB); 405 FailBB->addSuccessor(LoadCmpBB); 406 FailBB->addSuccessor(DoneBB); 407 408 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 409 DoneBB->transferSuccessors(&MBB); 410 411 MBB.addSuccessor(LoadCmpBB); 412 413 NextMBBI = MBB.end(); 414 MI.eraseFromParent(); 415 416 // Recompute liveness bottom up. 417 LivePhysRegs LiveRegs; 418 computeAndAddLiveIns(LiveRegs, *DoneBB); 419 computeAndAddLiveIns(LiveRegs, *FailBB); 420 computeAndAddLiveIns(LiveRegs, *StoreBB); 421 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 422 423 // Do an extra pass in the loop to get the loop carried dependencies right. 424 FailBB->clearLiveIns(); 425 computeAndAddLiveIns(LiveRegs, *FailBB); 426 StoreBB->clearLiveIns(); 427 computeAndAddLiveIns(LiveRegs, *StoreBB); 428 LoadCmpBB->clearLiveIns(); 429 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 430 431 return true; 432 } 433 434 /// \brief Expand Pseudos to Instructions with destructive operands. 435 /// 436 /// This mechanism uses MOVPRFX instructions for zeroing the false lanes 437 /// or for fixing relaxed register allocation conditions to comply with 438 /// the instructions register constraints. The latter case may be cheaper 439 /// than setting the register constraints in the register allocator, 440 /// since that will insert regular MOV instructions rather than MOVPRFX. 441 /// 442 /// Example (after register allocation): 443 /// 444 /// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0 445 /// 446 /// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B. 447 /// * We cannot map directly to FSUB_ZPmZ_B because the register 448 /// constraints of the instruction are not met. 449 /// * Also the _ZERO specifies the false lanes need to be zeroed. 450 /// 451 /// We first try to see if the destructive operand == result operand, 452 /// if not, we try to swap the operands, e.g. 453 /// 454 /// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1 455 /// 456 /// But because FSUB_ZPmZ is not commutative, this is semantically 457 /// different, so we need a reverse instruction: 458 /// 459 /// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1 460 /// 461 /// Then we implement the zeroing of the false lanes of Z0 by adding 462 /// a zeroing MOVPRFX instruction: 463 /// 464 /// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0 465 /// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1 466 /// 467 /// Note that this can only be done for _ZERO or _UNDEF variants where 468 /// we can guarantee the false lanes to be zeroed (by implementing this) 469 /// or that they are undef (don't care / not used), otherwise the 470 /// swapping of operands is illegal because the operation is not 471 /// (or cannot be emulated to be) fully commutative. 472 bool AArch64ExpandPseudo::expand_DestructiveOp( 473 MachineInstr &MI, 474 MachineBasicBlock &MBB, 475 MachineBasicBlock::iterator MBBI) { 476 unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode()); 477 uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask; 478 uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask; 479 bool FalseZero = FalseLanes == AArch64::FalseLanesZero; 480 Register DstReg = MI.getOperand(0).getReg(); 481 bool DstIsDead = MI.getOperand(0).isDead(); 482 bool UseRev = false; 483 unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx; 484 485 switch (DType) { 486 case AArch64::DestructiveBinaryComm: 487 case AArch64::DestructiveBinaryCommWithRev: 488 if (DstReg == MI.getOperand(3).getReg()) { 489 // FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1 490 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2); 491 UseRev = true; 492 break; 493 } 494 [[fallthrough]]; 495 case AArch64::DestructiveBinary: 496 case AArch64::DestructiveBinaryImm: 497 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3); 498 break; 499 case AArch64::DestructiveUnaryPassthru: 500 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(2, 3, 3); 501 break; 502 case AArch64::DestructiveTernaryCommWithRev: 503 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4); 504 if (DstReg == MI.getOperand(3).getReg()) { 505 // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za 506 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 3, 4, 2); 507 UseRev = true; 508 } else if (DstReg == MI.getOperand(4).getReg()) { 509 // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za 510 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 4, 3, 2); 511 UseRev = true; 512 } 513 break; 514 default: 515 llvm_unreachable("Unsupported Destructive Operand type"); 516 } 517 518 // MOVPRFX can only be used if the destination operand 519 // is the destructive operand, not as any other operand, 520 // so the Destructive Operand must be unique. 521 bool DOPRegIsUnique = false; 522 switch (DType) { 523 case AArch64::DestructiveBinary: 524 DOPRegIsUnique = DstReg != MI.getOperand(SrcIdx).getReg(); 525 break; 526 case AArch64::DestructiveBinaryComm: 527 case AArch64::DestructiveBinaryCommWithRev: 528 DOPRegIsUnique = 529 DstReg != MI.getOperand(DOPIdx).getReg() || 530 MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg(); 531 break; 532 case AArch64::DestructiveUnaryPassthru: 533 case AArch64::DestructiveBinaryImm: 534 DOPRegIsUnique = true; 535 break; 536 case AArch64::DestructiveTernaryCommWithRev: 537 DOPRegIsUnique = 538 DstReg != MI.getOperand(DOPIdx).getReg() || 539 (MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg() && 540 MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg()); 541 break; 542 } 543 544 // Resolve the reverse opcode 545 if (UseRev) { 546 int NewOpcode; 547 // e.g. DIV -> DIVR 548 if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1) 549 Opcode = NewOpcode; 550 // e.g. DIVR -> DIV 551 else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1) 552 Opcode = NewOpcode; 553 } 554 555 // Get the right MOVPRFX 556 uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode); 557 unsigned MovPrfx, LSLZero, MovPrfxZero; 558 switch (ElementSize) { 559 case AArch64::ElementSizeNone: 560 case AArch64::ElementSizeB: 561 MovPrfx = AArch64::MOVPRFX_ZZ; 562 LSLZero = AArch64::LSL_ZPmI_B; 563 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B; 564 break; 565 case AArch64::ElementSizeH: 566 MovPrfx = AArch64::MOVPRFX_ZZ; 567 LSLZero = AArch64::LSL_ZPmI_H; 568 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H; 569 break; 570 case AArch64::ElementSizeS: 571 MovPrfx = AArch64::MOVPRFX_ZZ; 572 LSLZero = AArch64::LSL_ZPmI_S; 573 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S; 574 break; 575 case AArch64::ElementSizeD: 576 MovPrfx = AArch64::MOVPRFX_ZZ; 577 LSLZero = AArch64::LSL_ZPmI_D; 578 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D; 579 break; 580 default: 581 llvm_unreachable("Unsupported ElementSize"); 582 } 583 584 // 585 // Create the destructive operation (if required) 586 // 587 MachineInstrBuilder PRFX, DOP; 588 if (FalseZero) { 589 // If we cannot prefix the requested instruction we'll instead emit a 590 // prefixed_zeroing_mov for DestructiveBinary. 591 assert((DOPRegIsUnique || DType == AArch64::DestructiveBinary || 592 DType == AArch64::DestructiveBinaryComm || 593 DType == AArch64::DestructiveBinaryCommWithRev) && 594 "The destructive operand should be unique"); 595 assert(ElementSize != AArch64::ElementSizeNone && 596 "This instruction is unpredicated"); 597 598 // Merge source operand into destination register 599 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero)) 600 .addReg(DstReg, RegState::Define) 601 .addReg(MI.getOperand(PredIdx).getReg()) 602 .addReg(MI.getOperand(DOPIdx).getReg()); 603 604 // After the movprfx, the destructive operand is same as Dst 605 DOPIdx = 0; 606 607 // Create the additional LSL to zero the lanes when the DstReg is not 608 // unique. Zeros the lanes in z0 that aren't active in p0 with sequence 609 // movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0; 610 if ((DType == AArch64::DestructiveBinary || 611 DType == AArch64::DestructiveBinaryComm || 612 DType == AArch64::DestructiveBinaryCommWithRev) && 613 !DOPRegIsUnique) { 614 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LSLZero)) 615 .addReg(DstReg, RegState::Define) 616 .add(MI.getOperand(PredIdx)) 617 .addReg(DstReg) 618 .addImm(0); 619 } 620 } else if (DstReg != MI.getOperand(DOPIdx).getReg()) { 621 assert(DOPRegIsUnique && "The destructive operand should be unique"); 622 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx)) 623 .addReg(DstReg, RegState::Define) 624 .addReg(MI.getOperand(DOPIdx).getReg()); 625 DOPIdx = 0; 626 } 627 628 // 629 // Create the destructive operation 630 // 631 DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode)) 632 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)); 633 634 switch (DType) { 635 case AArch64::DestructiveUnaryPassthru: 636 DOP.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill) 637 .add(MI.getOperand(PredIdx)) 638 .add(MI.getOperand(SrcIdx)); 639 break; 640 case AArch64::DestructiveBinary: 641 case AArch64::DestructiveBinaryImm: 642 case AArch64::DestructiveBinaryComm: 643 case AArch64::DestructiveBinaryCommWithRev: 644 DOP.add(MI.getOperand(PredIdx)) 645 .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill) 646 .add(MI.getOperand(SrcIdx)); 647 break; 648 case AArch64::DestructiveTernaryCommWithRev: 649 DOP.add(MI.getOperand(PredIdx)) 650 .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill) 651 .add(MI.getOperand(SrcIdx)) 652 .add(MI.getOperand(Src2Idx)); 653 break; 654 } 655 656 if (PRFX) { 657 finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator()); 658 transferImpOps(MI, PRFX, DOP); 659 } else 660 transferImpOps(MI, DOP, DOP); 661 662 MI.eraseFromParent(); 663 return true; 664 } 665 666 bool AArch64ExpandPseudo::expandSetTagLoop( 667 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 668 MachineBasicBlock::iterator &NextMBBI) { 669 MachineInstr &MI = *MBBI; 670 DebugLoc DL = MI.getDebugLoc(); 671 Register SizeReg = MI.getOperand(0).getReg(); 672 Register AddressReg = MI.getOperand(1).getReg(); 673 674 MachineFunction *MF = MBB.getParent(); 675 676 bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback; 677 const unsigned OpCode1 = 678 ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex; 679 const unsigned OpCode2 = 680 ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex; 681 682 unsigned Size = MI.getOperand(2).getImm(); 683 assert(Size > 0 && Size % 16 == 0); 684 if (Size % (16 * 2) != 0) { 685 BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg) 686 .addReg(AddressReg) 687 .addReg(AddressReg) 688 .addImm(1); 689 Size -= 16; 690 } 691 MachineBasicBlock::iterator I = 692 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg) 693 .addImm(Size); 694 expandMOVImm(MBB, I, 64); 695 696 auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 697 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 698 699 MF->insert(++MBB.getIterator(), LoopBB); 700 MF->insert(++LoopBB->getIterator(), DoneBB); 701 702 BuildMI(LoopBB, DL, TII->get(OpCode2)) 703 .addDef(AddressReg) 704 .addReg(AddressReg) 705 .addReg(AddressReg) 706 .addImm(2) 707 .cloneMemRefs(MI) 708 .setMIFlags(MI.getFlags()); 709 BuildMI(LoopBB, DL, TII->get(AArch64::SUBSXri)) 710 .addDef(SizeReg) 711 .addReg(SizeReg) 712 .addImm(16 * 2) 713 .addImm(0); 714 BuildMI(LoopBB, DL, TII->get(AArch64::Bcc)) 715 .addImm(AArch64CC::NE) 716 .addMBB(LoopBB) 717 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill); 718 719 LoopBB->addSuccessor(LoopBB); 720 LoopBB->addSuccessor(DoneBB); 721 722 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 723 DoneBB->transferSuccessors(&MBB); 724 725 MBB.addSuccessor(LoopBB); 726 727 NextMBBI = MBB.end(); 728 MI.eraseFromParent(); 729 // Recompute liveness bottom up. 730 LivePhysRegs LiveRegs; 731 computeAndAddLiveIns(LiveRegs, *DoneBB); 732 computeAndAddLiveIns(LiveRegs, *LoopBB); 733 // Do an extra pass in the loop to get the loop carried dependencies right. 734 // FIXME: is this necessary? 735 LoopBB->clearLiveIns(); 736 computeAndAddLiveIns(LiveRegs, *LoopBB); 737 DoneBB->clearLiveIns(); 738 computeAndAddLiveIns(LiveRegs, *DoneBB); 739 740 return true; 741 } 742 743 bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB, 744 MachineBasicBlock::iterator MBBI, 745 unsigned Opc, unsigned N) { 746 const TargetRegisterInfo *TRI = 747 MBB.getParent()->getSubtarget().getRegisterInfo(); 748 MachineInstr &MI = *MBBI; 749 for (unsigned Offset = 0; Offset < N; ++Offset) { 750 int ImmOffset = MI.getOperand(2).getImm() + Offset; 751 bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false; 752 assert(ImmOffset >= -256 && ImmOffset < 256 && 753 "Immediate spill offset out of range"); 754 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) 755 .addReg( 756 TRI->getSubReg(MI.getOperand(0).getReg(), AArch64::zsub0 + Offset), 757 Opc == AArch64::LDR_ZXI ? RegState::Define : 0) 758 .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill)) 759 .addImm(ImmOffset); 760 } 761 MI.eraseFromParent(); 762 return true; 763 } 764 765 bool AArch64ExpandPseudo::expandCALL_RVMARKER( 766 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { 767 // Expand CALL_RVMARKER pseudo to: 768 // - a branch to the call target, followed by 769 // - the special `mov x29, x29` marker, and 770 // - another branch, to the runtime function 771 // Mark the sequence as bundle, to avoid passes moving other code in between. 772 MachineInstr &MI = *MBBI; 773 774 MachineInstr *OriginalCall; 775 MachineOperand &RVTarget = MI.getOperand(0); 776 MachineOperand &CallTarget = MI.getOperand(1); 777 assert((CallTarget.isGlobal() || CallTarget.isReg()) && 778 "invalid operand for regular call"); 779 assert(RVTarget.isGlobal() && "invalid operand for attached call"); 780 unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR; 781 OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr(); 782 OriginalCall->addOperand(CallTarget); 783 784 unsigned RegMaskStartIdx = 2; 785 // Skip register arguments. Those are added during ISel, but are not 786 // needed for the concrete branch. 787 while (!MI.getOperand(RegMaskStartIdx).isRegMask()) { 788 auto MOP = MI.getOperand(RegMaskStartIdx); 789 assert(MOP.isReg() && "can only add register operands"); 790 OriginalCall->addOperand(MachineOperand::CreateReg( 791 MOP.getReg(), /*Def=*/false, /*Implicit=*/true)); 792 RegMaskStartIdx++; 793 } 794 for (const MachineOperand &MO : 795 llvm::drop_begin(MI.operands(), RegMaskStartIdx)) 796 OriginalCall->addOperand(MO); 797 798 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs)) 799 .addReg(AArch64::FP, RegState::Define) 800 .addReg(AArch64::XZR) 801 .addReg(AArch64::FP) 802 .addImm(0); 803 804 auto *RVCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::BL)) 805 .add(RVTarget) 806 .getInstr(); 807 808 if (MI.shouldUpdateCallSiteInfo()) 809 MBB.getParent()->moveCallSiteInfo(&MI, OriginalCall); 810 811 MI.eraseFromParent(); 812 finalizeBundle(MBB, OriginalCall->getIterator(), 813 std::next(RVCall->getIterator())); 814 return true; 815 } 816 817 bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB, 818 MachineBasicBlock::iterator MBBI) { 819 // Expand CALL_BTI pseudo to: 820 // - a branch to the call target 821 // - a BTI instruction 822 // Mark the sequence as a bundle, to avoid passes moving other code in 823 // between. 824 825 MachineInstr &MI = *MBBI; 826 MachineOperand &CallTarget = MI.getOperand(0); 827 assert((CallTarget.isGlobal() || CallTarget.isReg()) && 828 "invalid operand for regular call"); 829 unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR; 830 MachineInstr *Call = 831 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr(); 832 Call->addOperand(CallTarget); 833 Call->setCFIType(*MBB.getParent(), MI.getCFIType()); 834 835 MachineInstr *BTI = 836 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::HINT)) 837 // BTI J so that setjmp can to BR to this. 838 .addImm(36) 839 .getInstr(); 840 841 if (MI.shouldUpdateCallSiteInfo()) 842 MBB.getParent()->moveCallSiteInfo(&MI, Call); 843 844 MI.eraseFromParent(); 845 finalizeBundle(MBB, Call->getIterator(), std::next(BTI->getIterator())); 846 return true; 847 } 848 849 bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext( 850 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { 851 Register CtxReg = MBBI->getOperand(0).getReg(); 852 Register BaseReg = MBBI->getOperand(1).getReg(); 853 int Offset = MBBI->getOperand(2).getImm(); 854 DebugLoc DL(MBBI->getDebugLoc()); 855 auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>(); 856 857 if (STI.getTargetTriple().getArchName() != "arm64e") { 858 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui)) 859 .addUse(CtxReg) 860 .addUse(BaseReg) 861 .addImm(Offset / 8) 862 .setMIFlag(MachineInstr::FrameSetup); 863 MBBI->eraseFromParent(); 864 return true; 865 } 866 867 // We need to sign the context in an address-discriminated way. 0xc31a is a 868 // fixed random value, chosen as part of the ABI. 869 // add x16, xBase, #Offset 870 // movk x16, #0xc31a, lsl #48 871 // mov x17, x22/xzr 872 // pacdb x17, x16 873 // str x17, [xBase, #Offset] 874 unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri; 875 BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16) 876 .addUse(BaseReg) 877 .addImm(abs(Offset)) 878 .addImm(0) 879 .setMIFlag(MachineInstr::FrameSetup); 880 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16) 881 .addUse(AArch64::X16) 882 .addImm(0xc31a) 883 .addImm(48) 884 .setMIFlag(MachineInstr::FrameSetup); 885 // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so 886 // move it somewhere before signing. 887 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17) 888 .addUse(AArch64::XZR) 889 .addUse(CtxReg) 890 .addImm(0) 891 .setMIFlag(MachineInstr::FrameSetup); 892 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17) 893 .addUse(AArch64::X17) 894 .addUse(AArch64::X16) 895 .setMIFlag(MachineInstr::FrameSetup); 896 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui)) 897 .addUse(AArch64::X17) 898 .addUse(BaseReg) 899 .addImm(Offset / 8) 900 .setMIFlag(MachineInstr::FrameSetup); 901 902 MBBI->eraseFromParent(); 903 return true; 904 } 905 906 MachineBasicBlock * 907 AArch64ExpandPseudo::expandRestoreZA(MachineBasicBlock &MBB, 908 MachineBasicBlock::iterator MBBI) { 909 MachineInstr &MI = *MBBI; 910 assert((std::next(MBBI) != MBB.end() || 911 MI.getParent()->successors().begin() != 912 MI.getParent()->successors().end()) && 913 "Unexpected unreachable in block that restores ZA"); 914 915 // Compare TPIDR2_EL0 value against 0. 916 DebugLoc DL = MI.getDebugLoc(); 917 MachineInstrBuilder Cbz = BuildMI(MBB, MBBI, DL, TII->get(AArch64::CBZX)) 918 .add(MI.getOperand(0)); 919 920 // Split MBB and create two new blocks: 921 // - MBB now contains all instructions before RestoreZAPseudo. 922 // - SMBB contains the RestoreZAPseudo instruction only. 923 // - EndBB contains all instructions after RestoreZAPseudo. 924 MachineInstr &PrevMI = *std::prev(MBBI); 925 MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true); 926 MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end() 927 ? *SMBB->successors().begin() 928 : SMBB->splitAt(MI, /*UpdateLiveIns*/ true); 929 930 // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB. 931 Cbz.addMBB(SMBB); 932 BuildMI(&MBB, DL, TII->get(AArch64::B)) 933 .addMBB(EndBB); 934 MBB.addSuccessor(EndBB); 935 936 // Replace the pseudo with a call (BL). 937 MachineInstrBuilder MIB = 938 BuildMI(*SMBB, SMBB->end(), DL, TII->get(AArch64::BL)); 939 MIB.addReg(MI.getOperand(1).getReg(), RegState::Implicit); 940 for (unsigned I = 2; I < MI.getNumOperands(); ++I) 941 MIB.add(MI.getOperand(I)); 942 BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB); 943 944 MI.eraseFromParent(); 945 return EndBB; 946 } 947 948 MachineBasicBlock * 949 AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB, 950 MachineBasicBlock::iterator MBBI) { 951 MachineInstr &MI = *MBBI; 952 // In the case of a smstart/smstop before a unreachable, just remove the pseudo. 953 // Exception handling code generated by Clang may introduce unreachables and it 954 // seems unnecessary to restore pstate.sm when that happens. Note that it is 955 // not just an optimisation, the code below expects a successor instruction/block 956 // in order to split the block at MBBI. 957 if (std::next(MBBI) == MBB.end() && 958 MI.getParent()->successors().begin() == 959 MI.getParent()->successors().end()) { 960 MI.eraseFromParent(); 961 return &MBB; 962 } 963 964 // Expand the pseudo into smstart or smstop instruction. The pseudo has the 965 // following operands: 966 // 967 // MSRpstatePseudo <za|sm|both>, <0|1>, pstate.sm, expectedval, <regmask> 968 // 969 // The pseudo is expanded into a conditional smstart/smstop, with a 970 // check if pstate.sm (register) equals the expected value, and if not, 971 // invokes the smstart/smstop. 972 // 973 // As an example, the following block contains a normal call from a 974 // streaming-compatible function: 975 // 976 // OrigBB: 977 // MSRpstatePseudo 3, 0, %0, 0, <regmask> <- Conditional SMSTOP 978 // bl @normal_callee 979 // MSRpstatePseudo 3, 1, %0, 0, <regmask> <- Conditional SMSTART 980 // 981 // ...which will be transformed into: 982 // 983 // OrigBB: 984 // TBNZx %0:gpr64, 0, SMBB 985 // b EndBB 986 // 987 // SMBB: 988 // MSRpstatesvcrImm1 3, 0, <regmask> <- SMSTOP 989 // 990 // EndBB: 991 // bl @normal_callee 992 // MSRcond_pstatesvcrImm1 3, 1, <regmask> <- SMSTART 993 // 994 DebugLoc DL = MI.getDebugLoc(); 995 996 // Create the conditional branch based on the third operand of the 997 // instruction, which tells us if we are wrapping a normal or streaming 998 // function. 999 // We test the live value of pstate.sm and toggle pstate.sm if this is not the 1000 // expected value for the callee (0 for a normal callee and 1 for a streaming 1001 // callee). 1002 auto PStateSM = MI.getOperand(2).getReg(); 1003 bool IsStreamingCallee = MI.getOperand(3).getImm(); 1004 unsigned Opc = IsStreamingCallee ? AArch64::TBZX : AArch64::TBNZX; 1005 MachineInstrBuilder Tbx = 1006 BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(PStateSM).addImm(0); 1007 1008 // Split MBB and create two new blocks: 1009 // - MBB now contains all instructions before MSRcond_pstatesvcrImm1. 1010 // - SMBB contains the MSRcond_pstatesvcrImm1 instruction only. 1011 // - EndBB contains all instructions after MSRcond_pstatesvcrImm1. 1012 MachineInstr &PrevMI = *std::prev(MBBI); 1013 MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true); 1014 MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end() 1015 ? *SMBB->successors().begin() 1016 : SMBB->splitAt(MI, /*UpdateLiveIns*/ true); 1017 1018 // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB. 1019 Tbx.addMBB(SMBB); 1020 BuildMI(&MBB, DL, TII->get(AArch64::B)) 1021 .addMBB(EndBB); 1022 MBB.addSuccessor(EndBB); 1023 1024 // Create the SMSTART/SMSTOP (MSRpstatesvcrImm1) instruction in SMBB. 1025 MachineInstrBuilder MIB = BuildMI(*SMBB, SMBB->begin(), MI.getDebugLoc(), 1026 TII->get(AArch64::MSRpstatesvcrImm1)); 1027 // Copy all but the second and third operands of MSRcond_pstatesvcrImm1 (as 1028 // these contain the CopyFromReg for the first argument and the flag to 1029 // indicate whether the callee is streaming or normal). 1030 MIB.add(MI.getOperand(0)); 1031 MIB.add(MI.getOperand(1)); 1032 for (unsigned i = 4; i < MI.getNumOperands(); ++i) 1033 MIB.add(MI.getOperand(i)); 1034 1035 BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB); 1036 1037 MI.eraseFromParent(); 1038 return EndBB; 1039 } 1040 1041 /// If MBBI references a pseudo instruction that should be expanded here, 1042 /// do the expansion and return true. Otherwise return false. 1043 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, 1044 MachineBasicBlock::iterator MBBI, 1045 MachineBasicBlock::iterator &NextMBBI) { 1046 MachineInstr &MI = *MBBI; 1047 unsigned Opcode = MI.getOpcode(); 1048 1049 // Check if we can expand the destructive op 1050 int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode()); 1051 if (OrigInstr != -1) { 1052 auto &Orig = TII->get(OrigInstr); 1053 if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask) != 1054 AArch64::NotDestructive) { 1055 return expand_DestructiveOp(MI, MBB, MBBI); 1056 } 1057 } 1058 1059 switch (Opcode) { 1060 default: 1061 break; 1062 1063 case AArch64::BSPv8i8: 1064 case AArch64::BSPv16i8: { 1065 Register DstReg = MI.getOperand(0).getReg(); 1066 if (DstReg == MI.getOperand(3).getReg()) { 1067 // Expand to BIT 1068 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1069 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8 1070 : AArch64::BITv16i8)) 1071 .add(MI.getOperand(0)) 1072 .add(MI.getOperand(3)) 1073 .add(MI.getOperand(2)) 1074 .add(MI.getOperand(1)); 1075 } else if (DstReg == MI.getOperand(2).getReg()) { 1076 // Expand to BIF 1077 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1078 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8 1079 : AArch64::BIFv16i8)) 1080 .add(MI.getOperand(0)) 1081 .add(MI.getOperand(2)) 1082 .add(MI.getOperand(3)) 1083 .add(MI.getOperand(1)); 1084 } else { 1085 // Expand to BSL, use additional move if required 1086 if (DstReg == MI.getOperand(1).getReg()) { 1087 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1088 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8 1089 : AArch64::BSLv16i8)) 1090 .add(MI.getOperand(0)) 1091 .add(MI.getOperand(1)) 1092 .add(MI.getOperand(2)) 1093 .add(MI.getOperand(3)); 1094 } else { 1095 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1096 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8 1097 : AArch64::ORRv16i8)) 1098 .addReg(DstReg, 1099 RegState::Define | 1100 getRenamableRegState(MI.getOperand(0).isRenamable())) 1101 .add(MI.getOperand(1)) 1102 .add(MI.getOperand(1)); 1103 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1104 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8 1105 : AArch64::BSLv16i8)) 1106 .add(MI.getOperand(0)) 1107 .addReg(DstReg, 1108 RegState::Kill | 1109 getRenamableRegState(MI.getOperand(0).isRenamable())) 1110 .add(MI.getOperand(2)) 1111 .add(MI.getOperand(3)); 1112 } 1113 } 1114 MI.eraseFromParent(); 1115 return true; 1116 } 1117 1118 case AArch64::ADDWrr: 1119 case AArch64::SUBWrr: 1120 case AArch64::ADDXrr: 1121 case AArch64::SUBXrr: 1122 case AArch64::ADDSWrr: 1123 case AArch64::SUBSWrr: 1124 case AArch64::ADDSXrr: 1125 case AArch64::SUBSXrr: 1126 case AArch64::ANDWrr: 1127 case AArch64::ANDXrr: 1128 case AArch64::BICWrr: 1129 case AArch64::BICXrr: 1130 case AArch64::ANDSWrr: 1131 case AArch64::ANDSXrr: 1132 case AArch64::BICSWrr: 1133 case AArch64::BICSXrr: 1134 case AArch64::EONWrr: 1135 case AArch64::EONXrr: 1136 case AArch64::EORWrr: 1137 case AArch64::EORXrr: 1138 case AArch64::ORNWrr: 1139 case AArch64::ORNXrr: 1140 case AArch64::ORRWrr: 1141 case AArch64::ORRXrr: { 1142 unsigned Opcode; 1143 switch (MI.getOpcode()) { 1144 default: 1145 return false; 1146 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break; 1147 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break; 1148 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break; 1149 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break; 1150 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break; 1151 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break; 1152 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break; 1153 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break; 1154 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break; 1155 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break; 1156 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break; 1157 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break; 1158 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break; 1159 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break; 1160 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break; 1161 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break; 1162 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break; 1163 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break; 1164 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break; 1165 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break; 1166 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break; 1167 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break; 1168 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break; 1169 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break; 1170 } 1171 MachineFunction &MF = *MBB.getParent(); 1172 // Try to create new inst without implicit operands added. 1173 MachineInstr *NewMI = MF.CreateMachineInstr( 1174 TII->get(Opcode), MI.getDebugLoc(), /*NoImplicit=*/true); 1175 MBB.insert(MBBI, NewMI); 1176 MachineInstrBuilder MIB1(MF, NewMI); 1177 MIB1->setPCSections(MF, MI.getPCSections()); 1178 MIB1.addReg(MI.getOperand(0).getReg(), RegState::Define) 1179 .add(MI.getOperand(1)) 1180 .add(MI.getOperand(2)) 1181 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 1182 transferImpOps(MI, MIB1, MIB1); 1183 if (auto DebugNumber = MI.peekDebugInstrNum()) 1184 NewMI->setDebugInstrNum(DebugNumber); 1185 MI.eraseFromParent(); 1186 return true; 1187 } 1188 1189 case AArch64::LOADgot: { 1190 MachineFunction *MF = MBB.getParent(); 1191 Register DstReg = MI.getOperand(0).getReg(); 1192 const MachineOperand &MO1 = MI.getOperand(1); 1193 unsigned Flags = MO1.getTargetFlags(); 1194 1195 if (MF->getTarget().getCodeModel() == CodeModel::Tiny) { 1196 // Tiny codemodel expand to LDR 1197 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), 1198 TII->get(AArch64::LDRXl), DstReg); 1199 1200 if (MO1.isGlobal()) { 1201 MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags); 1202 } else if (MO1.isSymbol()) { 1203 MIB.addExternalSymbol(MO1.getSymbolName(), Flags); 1204 } else { 1205 assert(MO1.isCPI() && 1206 "Only expect globals, externalsymbols, or constant pools"); 1207 MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags); 1208 } 1209 } else { 1210 // Small codemodel expand into ADRP + LDR. 1211 MachineFunction &MF = *MI.getParent()->getParent(); 1212 DebugLoc DL = MI.getDebugLoc(); 1213 MachineInstrBuilder MIB1 = 1214 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg); 1215 1216 MachineInstrBuilder MIB2; 1217 if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) { 1218 auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); 1219 unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32); 1220 unsigned DstFlags = MI.getOperand(0).getTargetFlags(); 1221 MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui)) 1222 .addDef(Reg32) 1223 .addReg(DstReg, RegState::Kill) 1224 .addReg(DstReg, DstFlags | RegState::Implicit); 1225 } else { 1226 Register DstReg = MI.getOperand(0).getReg(); 1227 MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui)) 1228 .add(MI.getOperand(0)) 1229 .addUse(DstReg, RegState::Kill); 1230 } 1231 1232 if (MO1.isGlobal()) { 1233 MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE); 1234 MIB2.addGlobalAddress(MO1.getGlobal(), 0, 1235 Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 1236 } else if (MO1.isSymbol()) { 1237 MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE); 1238 MIB2.addExternalSymbol(MO1.getSymbolName(), Flags | 1239 AArch64II::MO_PAGEOFF | 1240 AArch64II::MO_NC); 1241 } else { 1242 assert(MO1.isCPI() && 1243 "Only expect globals, externalsymbols, or constant pools"); 1244 MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), 1245 Flags | AArch64II::MO_PAGE); 1246 MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), 1247 Flags | AArch64II::MO_PAGEOFF | 1248 AArch64II::MO_NC); 1249 } 1250 1251 transferImpOps(MI, MIB1, MIB2); 1252 } 1253 MI.eraseFromParent(); 1254 return true; 1255 } 1256 case AArch64::MOVaddrBA: { 1257 MachineFunction &MF = *MI.getParent()->getParent(); 1258 if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) { 1259 // blockaddress expressions have to come from a constant pool because the 1260 // largest addend (and hence offset within a function) allowed for ADRP is 1261 // only 8MB. 1262 const BlockAddress *BA = MI.getOperand(1).getBlockAddress(); 1263 assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset"); 1264 1265 MachineConstantPool *MCP = MF.getConstantPool(); 1266 unsigned CPIdx = MCP->getConstantPoolIndex(BA, Align(8)); 1267 1268 Register DstReg = MI.getOperand(0).getReg(); 1269 auto MIB1 = 1270 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) 1271 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE); 1272 auto MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), 1273 TII->get(AArch64::LDRXui), DstReg) 1274 .addUse(DstReg) 1275 .addConstantPoolIndex( 1276 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 1277 transferImpOps(MI, MIB1, MIB2); 1278 MI.eraseFromParent(); 1279 return true; 1280 } 1281 } 1282 [[fallthrough]]; 1283 case AArch64::MOVaddr: 1284 case AArch64::MOVaddrJT: 1285 case AArch64::MOVaddrCP: 1286 case AArch64::MOVaddrTLS: 1287 case AArch64::MOVaddrEXT: { 1288 // Expand into ADRP + ADD. 1289 Register DstReg = MI.getOperand(0).getReg(); 1290 assert(DstReg != AArch64::XZR); 1291 MachineInstrBuilder MIB1 = 1292 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) 1293 .add(MI.getOperand(1)); 1294 1295 if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) { 1296 // MO_TAGGED on the page indicates a tagged address. Set the tag now. 1297 // We do so by creating a MOVK that sets bits 48-63 of the register to 1298 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in 1299 // the small code model so we can assume a binary size of <= 4GB, which 1300 // makes the untagged PC relative offset positive. The binary must also be 1301 // loaded into address range [0, 2^48). Both of these properties need to 1302 // be ensured at runtime when using tagged addresses. 1303 auto Tag = MI.getOperand(1); 1304 Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3); 1305 Tag.setOffset(0x100000000); 1306 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg) 1307 .addReg(DstReg) 1308 .add(Tag) 1309 .addImm(48); 1310 } 1311 1312 MachineInstrBuilder MIB2 = 1313 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) 1314 .add(MI.getOperand(0)) 1315 .addReg(DstReg) 1316 .add(MI.getOperand(2)) 1317 .addImm(0); 1318 1319 transferImpOps(MI, MIB1, MIB2); 1320 MI.eraseFromParent(); 1321 return true; 1322 } 1323 case AArch64::ADDlowTLS: 1324 // Produce a plain ADD 1325 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) 1326 .add(MI.getOperand(0)) 1327 .add(MI.getOperand(1)) 1328 .add(MI.getOperand(2)) 1329 .addImm(0); 1330 MI.eraseFromParent(); 1331 return true; 1332 1333 case AArch64::MOVbaseTLS: { 1334 Register DstReg = MI.getOperand(0).getReg(); 1335 auto SysReg = AArch64SysReg::TPIDR_EL0; 1336 MachineFunction *MF = MBB.getParent(); 1337 if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP()) 1338 SysReg = AArch64SysReg::TPIDR_EL3; 1339 else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP()) 1340 SysReg = AArch64SysReg::TPIDR_EL2; 1341 else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP()) 1342 SysReg = AArch64SysReg::TPIDR_EL1; 1343 else if (MF->getSubtarget<AArch64Subtarget>().useROEL0ForTP()) 1344 SysReg = AArch64SysReg::TPIDRRO_EL0; 1345 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg) 1346 .addImm(SysReg); 1347 MI.eraseFromParent(); 1348 return true; 1349 } 1350 1351 case AArch64::MOVi32imm: 1352 return expandMOVImm(MBB, MBBI, 32); 1353 case AArch64::MOVi64imm: 1354 return expandMOVImm(MBB, MBBI, 64); 1355 case AArch64::RET_ReallyLR: { 1356 // Hiding the LR use with RET_ReallyLR may lead to extra kills in the 1357 // function and missing live-ins. We are fine in practice because callee 1358 // saved register handling ensures the register value is restored before 1359 // RET, but we need the undef flag here to appease the MachineVerifier 1360 // liveness checks. 1361 MachineInstrBuilder MIB = 1362 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET)) 1363 .addReg(AArch64::LR, RegState::Undef); 1364 transferImpOps(MI, MIB, MIB); 1365 MI.eraseFromParent(); 1366 return true; 1367 } 1368 case AArch64::CMP_SWAP_8: 1369 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB, 1370 AArch64::SUBSWrx, 1371 AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0), 1372 AArch64::WZR, NextMBBI); 1373 case AArch64::CMP_SWAP_16: 1374 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH, 1375 AArch64::SUBSWrx, 1376 AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0), 1377 AArch64::WZR, NextMBBI); 1378 case AArch64::CMP_SWAP_32: 1379 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW, 1380 AArch64::SUBSWrs, 1381 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0), 1382 AArch64::WZR, NextMBBI); 1383 case AArch64::CMP_SWAP_64: 1384 return expandCMP_SWAP(MBB, MBBI, 1385 AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs, 1386 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0), 1387 AArch64::XZR, NextMBBI); 1388 case AArch64::CMP_SWAP_128: 1389 case AArch64::CMP_SWAP_128_RELEASE: 1390 case AArch64::CMP_SWAP_128_ACQUIRE: 1391 case AArch64::CMP_SWAP_128_MONOTONIC: 1392 return expandCMP_SWAP_128(MBB, MBBI, NextMBBI); 1393 1394 case AArch64::AESMCrrTied: 1395 case AArch64::AESIMCrrTied: { 1396 MachineInstrBuilder MIB = 1397 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1398 TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr : 1399 AArch64::AESIMCrr)) 1400 .add(MI.getOperand(0)) 1401 .add(MI.getOperand(1)); 1402 transferImpOps(MI, MIB, MIB); 1403 MI.eraseFromParent(); 1404 return true; 1405 } 1406 case AArch64::IRGstack: { 1407 MachineFunction &MF = *MBB.getParent(); 1408 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 1409 const AArch64FrameLowering *TFI = 1410 MF.getSubtarget<AArch64Subtarget>().getFrameLowering(); 1411 1412 // IRG does not allow immediate offset. getTaggedBasePointerOffset should 1413 // almost always point to SP-after-prologue; if not, emit a longer 1414 // instruction sequence. 1415 int BaseOffset = -AFI->getTaggedBasePointerOffset(); 1416 Register FrameReg; 1417 StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference( 1418 MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg, 1419 /*PreferFP=*/false, 1420 /*ForSimm=*/true); 1421 Register SrcReg = FrameReg; 1422 if (FrameRegOffset) { 1423 // Use output register as temporary. 1424 SrcReg = MI.getOperand(0).getReg(); 1425 emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg, 1426 FrameRegOffset, TII); 1427 } 1428 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG)) 1429 .add(MI.getOperand(0)) 1430 .addUse(SrcReg) 1431 .add(MI.getOperand(2)); 1432 MI.eraseFromParent(); 1433 return true; 1434 } 1435 case AArch64::TAGPstack: { 1436 int64_t Offset = MI.getOperand(2).getImm(); 1437 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1438 TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG)) 1439 .add(MI.getOperand(0)) 1440 .add(MI.getOperand(1)) 1441 .addImm(std::abs(Offset)) 1442 .add(MI.getOperand(4)); 1443 MI.eraseFromParent(); 1444 return true; 1445 } 1446 case AArch64::STGloop_wback: 1447 case AArch64::STZGloop_wback: 1448 return expandSetTagLoop(MBB, MBBI, NextMBBI); 1449 case AArch64::STGloop: 1450 case AArch64::STZGloop: 1451 report_fatal_error( 1452 "Non-writeback variants of STGloop / STZGloop should not " 1453 "survive past PrologEpilogInserter."); 1454 case AArch64::STR_ZZZZXI: 1455 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4); 1456 case AArch64::STR_ZZZXI: 1457 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3); 1458 case AArch64::STR_ZZXI: 1459 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2); 1460 case AArch64::LDR_ZZZZXI: 1461 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4); 1462 case AArch64::LDR_ZZZXI: 1463 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3); 1464 case AArch64::LDR_ZZXI: 1465 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2); 1466 case AArch64::BLR_RVMARKER: 1467 return expandCALL_RVMARKER(MBB, MBBI); 1468 case AArch64::BLR_BTI: 1469 return expandCALL_BTI(MBB, MBBI); 1470 case AArch64::StoreSwiftAsyncContext: 1471 return expandStoreSwiftAsyncContext(MBB, MBBI); 1472 case AArch64::RestoreZAPseudo: { 1473 auto *NewMBB = expandRestoreZA(MBB, MBBI); 1474 if (NewMBB != &MBB) 1475 NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated. 1476 return true; 1477 } 1478 case AArch64::MSRpstatePseudo: { 1479 auto *NewMBB = expandCondSMToggle(MBB, MBBI); 1480 if (NewMBB != &MBB) 1481 NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated. 1482 return true; 1483 } 1484 case AArch64::OBSCURE_COPY: { 1485 if (MI.getOperand(0).getReg() != MI.getOperand(1).getReg()) { 1486 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs)) 1487 .add(MI.getOperand(0)) 1488 .addReg(AArch64::XZR) 1489 .add(MI.getOperand(1)) 1490 .addImm(0); 1491 } 1492 MI.eraseFromParent(); 1493 return true; 1494 } 1495 } 1496 return false; 1497 } 1498 1499 /// Iterate over the instructions in basic block MBB and expand any 1500 /// pseudo instructions. Return true if anything was modified. 1501 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) { 1502 bool Modified = false; 1503 1504 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1505 while (MBBI != E) { 1506 MachineBasicBlock::iterator NMBBI = std::next(MBBI); 1507 Modified |= expandMI(MBB, MBBI, NMBBI); 1508 MBBI = NMBBI; 1509 } 1510 1511 return Modified; 1512 } 1513 1514 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) { 1515 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 1516 1517 bool Modified = false; 1518 for (auto &MBB : MF) 1519 Modified |= expandMBB(MBB); 1520 return Modified; 1521 } 1522 1523 /// Returns an instance of the pseudo instruction expansion pass. 1524 FunctionPass *llvm::createAArch64ExpandPseudoPass() { 1525 return new AArch64ExpandPseudo(); 1526 } 1527