1 //===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a pass that expands pseudo instructions into target 10 // instructions to allow proper scheduling and other late optimizations. This 11 // pass should be run after register allocation but before the post-regalloc 12 // scheduling pass. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AArch64ExpandImm.h" 17 #include "AArch64InstrInfo.h" 18 #include "AArch64MachineFunctionInfo.h" 19 #include "AArch64Subtarget.h" 20 #include "MCTargetDesc/AArch64AddressingModes.h" 21 #include "Utils/AArch64BaseInfo.h" 22 #include "llvm/CodeGen/LivePhysRegs.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineConstantPool.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineFunctionPass.h" 27 #include "llvm/CodeGen/MachineInstr.h" 28 #include "llvm/CodeGen/MachineInstrBuilder.h" 29 #include "llvm/CodeGen/MachineOperand.h" 30 #include "llvm/CodeGen/TargetSubtargetInfo.h" 31 #include "llvm/IR/DebugLoc.h" 32 #include "llvm/MC/MCInstrDesc.h" 33 #include "llvm/Pass.h" 34 #include "llvm/Support/CodeGen.h" 35 #include "llvm/Support/MathExtras.h" 36 #include "llvm/Target/TargetMachine.h" 37 #include "llvm/TargetParser/Triple.h" 38 #include <cassert> 39 #include <cstdint> 40 #include <iterator> 41 #include <utility> 42 43 using namespace llvm; 44 45 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass" 46 47 namespace { 48 49 class AArch64ExpandPseudo : public MachineFunctionPass { 50 public: 51 const AArch64InstrInfo *TII; 52 53 static char ID; 54 55 AArch64ExpandPseudo() : MachineFunctionPass(ID) { 56 initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry()); 57 } 58 59 bool runOnMachineFunction(MachineFunction &Fn) override; 60 61 StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; } 62 63 private: 64 bool expandMBB(MachineBasicBlock &MBB); 65 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 66 MachineBasicBlock::iterator &NextMBBI); 67 bool expandMultiVecPseudo(MachineBasicBlock &MBB, 68 MachineBasicBlock::iterator MBBI, 69 TargetRegisterClass ContiguousClass, 70 TargetRegisterClass StridedClass, 71 unsigned ContiguousOpc, unsigned StridedOpc); 72 bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 73 unsigned BitSize); 74 75 bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB, 76 MachineBasicBlock::iterator MBBI); 77 bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 78 unsigned LdarOp, unsigned StlrOp, unsigned CmpOp, 79 unsigned ExtendImm, unsigned ZeroReg, 80 MachineBasicBlock::iterator &NextMBBI); 81 bool expandCMP_SWAP_128(MachineBasicBlock &MBB, 82 MachineBasicBlock::iterator MBBI, 83 MachineBasicBlock::iterator &NextMBBI); 84 bool expandSetTagLoop(MachineBasicBlock &MBB, 85 MachineBasicBlock::iterator MBBI, 86 MachineBasicBlock::iterator &NextMBBI); 87 bool expandSVESpillFill(MachineBasicBlock &MBB, 88 MachineBasicBlock::iterator MBBI, unsigned Opc, 89 unsigned N); 90 bool expandCALL_RVMARKER(MachineBasicBlock &MBB, 91 MachineBasicBlock::iterator MBBI); 92 bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); 93 bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB, 94 MachineBasicBlock::iterator MBBI); 95 MachineBasicBlock *expandRestoreZA(MachineBasicBlock &MBB, 96 MachineBasicBlock::iterator MBBI); 97 MachineBasicBlock *expandCondSMToggle(MachineBasicBlock &MBB, 98 MachineBasicBlock::iterator MBBI); 99 }; 100 101 } // end anonymous namespace 102 103 char AArch64ExpandPseudo::ID = 0; 104 105 INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo", 106 AARCH64_EXPAND_PSEUDO_NAME, false, false) 107 108 /// Transfer implicit operands on the pseudo instruction to the 109 /// instructions created from the expansion. 110 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, 111 MachineInstrBuilder &DefMI) { 112 const MCInstrDesc &Desc = OldMI.getDesc(); 113 for (const MachineOperand &MO : 114 llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) { 115 assert(MO.isReg() && MO.getReg()); 116 if (MO.isUse()) 117 UseMI.add(MO); 118 else 119 DefMI.add(MO); 120 } 121 } 122 123 /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more 124 /// real move-immediate instructions to synthesize the immediate. 125 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, 126 MachineBasicBlock::iterator MBBI, 127 unsigned BitSize) { 128 MachineInstr &MI = *MBBI; 129 Register DstReg = MI.getOperand(0).getReg(); 130 uint64_t RenamableState = 131 MI.getOperand(0).isRenamable() ? RegState::Renamable : 0; 132 uint64_t Imm = MI.getOperand(1).getImm(); 133 134 if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) { 135 // Useless def, and we don't want to risk creating an invalid ORR (which 136 // would really write to sp). 137 MI.eraseFromParent(); 138 return true; 139 } 140 141 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; 142 AArch64_IMM::expandMOVImm(Imm, BitSize, Insn); 143 assert(Insn.size() != 0); 144 145 SmallVector<MachineInstrBuilder, 4> MIBS; 146 for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) { 147 bool LastItem = std::next(I) == E; 148 switch (I->Opcode) 149 { 150 default: llvm_unreachable("unhandled!"); break; 151 152 case AArch64::ORRWri: 153 case AArch64::ORRXri: 154 if (I->Op1 == 0) { 155 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 156 .add(MI.getOperand(0)) 157 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR) 158 .addImm(I->Op2)); 159 } else { 160 Register DstReg = MI.getOperand(0).getReg(); 161 bool DstIsDead = MI.getOperand(0).isDead(); 162 MIBS.push_back( 163 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 164 .addReg(DstReg, RegState::Define | 165 getDeadRegState(DstIsDead && LastItem) | 166 RenamableState) 167 .addReg(DstReg) 168 .addImm(I->Op2)); 169 } 170 break; 171 case AArch64::ANDXri: 172 case AArch64::EORXri: 173 if (I->Op1 == 0) { 174 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 175 .add(MI.getOperand(0)) 176 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR) 177 .addImm(I->Op2)); 178 } else { 179 Register DstReg = MI.getOperand(0).getReg(); 180 bool DstIsDead = MI.getOperand(0).isDead(); 181 MIBS.push_back( 182 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 183 .addReg(DstReg, RegState::Define | 184 getDeadRegState(DstIsDead && LastItem) | 185 RenamableState) 186 .addReg(DstReg) 187 .addImm(I->Op2)); 188 } 189 break; 190 case AArch64::MOVNWi: 191 case AArch64::MOVNXi: 192 case AArch64::MOVZWi: 193 case AArch64::MOVZXi: { 194 bool DstIsDead = MI.getOperand(0).isDead(); 195 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 196 .addReg(DstReg, RegState::Define | 197 getDeadRegState(DstIsDead && LastItem) | 198 RenamableState) 199 .addImm(I->Op1) 200 .addImm(I->Op2)); 201 } break; 202 case AArch64::MOVKWi: 203 case AArch64::MOVKXi: { 204 Register DstReg = MI.getOperand(0).getReg(); 205 bool DstIsDead = MI.getOperand(0).isDead(); 206 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 207 .addReg(DstReg, 208 RegState::Define | 209 getDeadRegState(DstIsDead && LastItem) | 210 RenamableState) 211 .addReg(DstReg) 212 .addImm(I->Op1) 213 .addImm(I->Op2)); 214 } break; 215 } 216 } 217 transferImpOps(MI, MIBS.front(), MIBS.back()); 218 MI.eraseFromParent(); 219 return true; 220 } 221 222 bool AArch64ExpandPseudo::expandCMP_SWAP( 223 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp, 224 unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg, 225 MachineBasicBlock::iterator &NextMBBI) { 226 MachineInstr &MI = *MBBI; 227 MIMetadata MIMD(MI); 228 const MachineOperand &Dest = MI.getOperand(0); 229 Register StatusReg = MI.getOperand(1).getReg(); 230 bool StatusDead = MI.getOperand(1).isDead(); 231 // Duplicating undef operands into 2 instructions does not guarantee the same 232 // value on both; However undef should be replaced by xzr anyway. 233 assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); 234 Register AddrReg = MI.getOperand(2).getReg(); 235 Register DesiredReg = MI.getOperand(3).getReg(); 236 Register NewReg = MI.getOperand(4).getReg(); 237 238 MachineFunction *MF = MBB.getParent(); 239 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 240 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 241 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 242 243 MF->insert(++MBB.getIterator(), LoadCmpBB); 244 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 245 MF->insert(++StoreBB->getIterator(), DoneBB); 246 247 // .Lloadcmp: 248 // mov wStatus, 0 249 // ldaxr xDest, [xAddr] 250 // cmp xDest, xDesired 251 // b.ne .Ldone 252 if (!StatusDead) 253 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::MOVZWi), StatusReg) 254 .addImm(0).addImm(0); 255 BuildMI(LoadCmpBB, MIMD, TII->get(LdarOp), Dest.getReg()) 256 .addReg(AddrReg); 257 BuildMI(LoadCmpBB, MIMD, TII->get(CmpOp), ZeroReg) 258 .addReg(Dest.getReg(), getKillRegState(Dest.isDead())) 259 .addReg(DesiredReg) 260 .addImm(ExtendImm); 261 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::Bcc)) 262 .addImm(AArch64CC::NE) 263 .addMBB(DoneBB) 264 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill); 265 LoadCmpBB->addSuccessor(DoneBB); 266 LoadCmpBB->addSuccessor(StoreBB); 267 268 // .Lstore: 269 // stlxr wStatus, xNew, [xAddr] 270 // cbnz wStatus, .Lloadcmp 271 BuildMI(StoreBB, MIMD, TII->get(StlrOp), StatusReg) 272 .addReg(NewReg) 273 .addReg(AddrReg); 274 BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW)) 275 .addReg(StatusReg, getKillRegState(StatusDead)) 276 .addMBB(LoadCmpBB); 277 StoreBB->addSuccessor(LoadCmpBB); 278 StoreBB->addSuccessor(DoneBB); 279 280 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 281 DoneBB->transferSuccessors(&MBB); 282 283 MBB.addSuccessor(LoadCmpBB); 284 285 NextMBBI = MBB.end(); 286 MI.eraseFromParent(); 287 288 // Recompute livein lists. 289 LivePhysRegs LiveRegs; 290 computeAndAddLiveIns(LiveRegs, *DoneBB); 291 computeAndAddLiveIns(LiveRegs, *StoreBB); 292 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 293 // Do an extra pass around the loop to get loop carried registers right. 294 StoreBB->clearLiveIns(); 295 computeAndAddLiveIns(LiveRegs, *StoreBB); 296 LoadCmpBB->clearLiveIns(); 297 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 298 299 return true; 300 } 301 302 bool AArch64ExpandPseudo::expandCMP_SWAP_128( 303 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 304 MachineBasicBlock::iterator &NextMBBI) { 305 MachineInstr &MI = *MBBI; 306 MIMetadata MIMD(MI); 307 MachineOperand &DestLo = MI.getOperand(0); 308 MachineOperand &DestHi = MI.getOperand(1); 309 Register StatusReg = MI.getOperand(2).getReg(); 310 bool StatusDead = MI.getOperand(2).isDead(); 311 // Duplicating undef operands into 2 instructions does not guarantee the same 312 // value on both; However undef should be replaced by xzr anyway. 313 assert(!MI.getOperand(3).isUndef() && "cannot handle undef"); 314 Register AddrReg = MI.getOperand(3).getReg(); 315 Register DesiredLoReg = MI.getOperand(4).getReg(); 316 Register DesiredHiReg = MI.getOperand(5).getReg(); 317 Register NewLoReg = MI.getOperand(6).getReg(); 318 Register NewHiReg = MI.getOperand(7).getReg(); 319 320 unsigned LdxpOp, StxpOp; 321 322 switch (MI.getOpcode()) { 323 case AArch64::CMP_SWAP_128_MONOTONIC: 324 LdxpOp = AArch64::LDXPX; 325 StxpOp = AArch64::STXPX; 326 break; 327 case AArch64::CMP_SWAP_128_RELEASE: 328 LdxpOp = AArch64::LDXPX; 329 StxpOp = AArch64::STLXPX; 330 break; 331 case AArch64::CMP_SWAP_128_ACQUIRE: 332 LdxpOp = AArch64::LDAXPX; 333 StxpOp = AArch64::STXPX; 334 break; 335 case AArch64::CMP_SWAP_128: 336 LdxpOp = AArch64::LDAXPX; 337 StxpOp = AArch64::STLXPX; 338 break; 339 default: 340 llvm_unreachable("Unexpected opcode"); 341 } 342 343 MachineFunction *MF = MBB.getParent(); 344 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 345 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 346 auto FailBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 347 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 348 349 MF->insert(++MBB.getIterator(), LoadCmpBB); 350 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 351 MF->insert(++StoreBB->getIterator(), FailBB); 352 MF->insert(++FailBB->getIterator(), DoneBB); 353 354 // .Lloadcmp: 355 // ldaxp xDestLo, xDestHi, [xAddr] 356 // cmp xDestLo, xDesiredLo 357 // sbcs xDestHi, xDesiredHi 358 // b.ne .Ldone 359 BuildMI(LoadCmpBB, MIMD, TII->get(LdxpOp)) 360 .addReg(DestLo.getReg(), RegState::Define) 361 .addReg(DestHi.getReg(), RegState::Define) 362 .addReg(AddrReg); 363 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR) 364 .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead())) 365 .addReg(DesiredLoReg) 366 .addImm(0); 367 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg) 368 .addUse(AArch64::WZR) 369 .addUse(AArch64::WZR) 370 .addImm(AArch64CC::EQ); 371 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR) 372 .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead())) 373 .addReg(DesiredHiReg) 374 .addImm(0); 375 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg) 376 .addUse(StatusReg, RegState::Kill) 377 .addUse(StatusReg, RegState::Kill) 378 .addImm(AArch64CC::EQ); 379 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CBNZW)) 380 .addUse(StatusReg, getKillRegState(StatusDead)) 381 .addMBB(FailBB); 382 LoadCmpBB->addSuccessor(FailBB); 383 LoadCmpBB->addSuccessor(StoreBB); 384 385 // .Lstore: 386 // stlxp wStatus, xNewLo, xNewHi, [xAddr] 387 // cbnz wStatus, .Lloadcmp 388 BuildMI(StoreBB, MIMD, TII->get(StxpOp), StatusReg) 389 .addReg(NewLoReg) 390 .addReg(NewHiReg) 391 .addReg(AddrReg); 392 BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW)) 393 .addReg(StatusReg, getKillRegState(StatusDead)) 394 .addMBB(LoadCmpBB); 395 BuildMI(StoreBB, MIMD, TII->get(AArch64::B)).addMBB(DoneBB); 396 StoreBB->addSuccessor(LoadCmpBB); 397 StoreBB->addSuccessor(DoneBB); 398 399 // .Lfail: 400 // stlxp wStatus, xDestLo, xDestHi, [xAddr] 401 // cbnz wStatus, .Lloadcmp 402 BuildMI(FailBB, MIMD, TII->get(StxpOp), StatusReg) 403 .addReg(DestLo.getReg()) 404 .addReg(DestHi.getReg()) 405 .addReg(AddrReg); 406 BuildMI(FailBB, MIMD, TII->get(AArch64::CBNZW)) 407 .addReg(StatusReg, getKillRegState(StatusDead)) 408 .addMBB(LoadCmpBB); 409 FailBB->addSuccessor(LoadCmpBB); 410 FailBB->addSuccessor(DoneBB); 411 412 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 413 DoneBB->transferSuccessors(&MBB); 414 415 MBB.addSuccessor(LoadCmpBB); 416 417 NextMBBI = MBB.end(); 418 MI.eraseFromParent(); 419 420 // Recompute liveness bottom up. 421 LivePhysRegs LiveRegs; 422 computeAndAddLiveIns(LiveRegs, *DoneBB); 423 computeAndAddLiveIns(LiveRegs, *FailBB); 424 computeAndAddLiveIns(LiveRegs, *StoreBB); 425 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 426 427 // Do an extra pass in the loop to get the loop carried dependencies right. 428 FailBB->clearLiveIns(); 429 computeAndAddLiveIns(LiveRegs, *FailBB); 430 StoreBB->clearLiveIns(); 431 computeAndAddLiveIns(LiveRegs, *StoreBB); 432 LoadCmpBB->clearLiveIns(); 433 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 434 435 return true; 436 } 437 438 /// \brief Expand Pseudos to Instructions with destructive operands. 439 /// 440 /// This mechanism uses MOVPRFX instructions for zeroing the false lanes 441 /// or for fixing relaxed register allocation conditions to comply with 442 /// the instructions register constraints. The latter case may be cheaper 443 /// than setting the register constraints in the register allocator, 444 /// since that will insert regular MOV instructions rather than MOVPRFX. 445 /// 446 /// Example (after register allocation): 447 /// 448 /// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0 449 /// 450 /// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B. 451 /// * We cannot map directly to FSUB_ZPmZ_B because the register 452 /// constraints of the instruction are not met. 453 /// * Also the _ZERO specifies the false lanes need to be zeroed. 454 /// 455 /// We first try to see if the destructive operand == result operand, 456 /// if not, we try to swap the operands, e.g. 457 /// 458 /// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1 459 /// 460 /// But because FSUB_ZPmZ is not commutative, this is semantically 461 /// different, so we need a reverse instruction: 462 /// 463 /// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1 464 /// 465 /// Then we implement the zeroing of the false lanes of Z0 by adding 466 /// a zeroing MOVPRFX instruction: 467 /// 468 /// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0 469 /// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1 470 /// 471 /// Note that this can only be done for _ZERO or _UNDEF variants where 472 /// we can guarantee the false lanes to be zeroed (by implementing this) 473 /// or that they are undef (don't care / not used), otherwise the 474 /// swapping of operands is illegal because the operation is not 475 /// (or cannot be emulated to be) fully commutative. 476 bool AArch64ExpandPseudo::expand_DestructiveOp( 477 MachineInstr &MI, 478 MachineBasicBlock &MBB, 479 MachineBasicBlock::iterator MBBI) { 480 unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode()); 481 uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask; 482 uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask; 483 bool FalseZero = FalseLanes == AArch64::FalseLanesZero; 484 Register DstReg = MI.getOperand(0).getReg(); 485 bool DstIsDead = MI.getOperand(0).isDead(); 486 bool UseRev = false; 487 unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx; 488 489 switch (DType) { 490 case AArch64::DestructiveBinaryComm: 491 case AArch64::DestructiveBinaryCommWithRev: 492 if (DstReg == MI.getOperand(3).getReg()) { 493 // FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1 494 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2); 495 UseRev = true; 496 break; 497 } 498 [[fallthrough]]; 499 case AArch64::DestructiveBinary: 500 case AArch64::DestructiveBinaryImm: 501 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3); 502 break; 503 case AArch64::DestructiveUnaryPassthru: 504 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(2, 3, 3); 505 break; 506 case AArch64::DestructiveTernaryCommWithRev: 507 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4); 508 if (DstReg == MI.getOperand(3).getReg()) { 509 // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za 510 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 3, 4, 2); 511 UseRev = true; 512 } else if (DstReg == MI.getOperand(4).getReg()) { 513 // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za 514 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 4, 3, 2); 515 UseRev = true; 516 } 517 break; 518 default: 519 llvm_unreachable("Unsupported Destructive Operand type"); 520 } 521 522 // MOVPRFX can only be used if the destination operand 523 // is the destructive operand, not as any other operand, 524 // so the Destructive Operand must be unique. 525 bool DOPRegIsUnique = false; 526 switch (DType) { 527 case AArch64::DestructiveBinary: 528 DOPRegIsUnique = DstReg != MI.getOperand(SrcIdx).getReg(); 529 break; 530 case AArch64::DestructiveBinaryComm: 531 case AArch64::DestructiveBinaryCommWithRev: 532 DOPRegIsUnique = 533 DstReg != MI.getOperand(DOPIdx).getReg() || 534 MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg(); 535 break; 536 case AArch64::DestructiveUnaryPassthru: 537 case AArch64::DestructiveBinaryImm: 538 DOPRegIsUnique = true; 539 break; 540 case AArch64::DestructiveTernaryCommWithRev: 541 DOPRegIsUnique = 542 DstReg != MI.getOperand(DOPIdx).getReg() || 543 (MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg() && 544 MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg()); 545 break; 546 } 547 548 // Resolve the reverse opcode 549 if (UseRev) { 550 int NewOpcode; 551 // e.g. DIV -> DIVR 552 if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1) 553 Opcode = NewOpcode; 554 // e.g. DIVR -> DIV 555 else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1) 556 Opcode = NewOpcode; 557 } 558 559 // Get the right MOVPRFX 560 uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode); 561 unsigned MovPrfx, LSLZero, MovPrfxZero; 562 switch (ElementSize) { 563 case AArch64::ElementSizeNone: 564 case AArch64::ElementSizeB: 565 MovPrfx = AArch64::MOVPRFX_ZZ; 566 LSLZero = AArch64::LSL_ZPmI_B; 567 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B; 568 break; 569 case AArch64::ElementSizeH: 570 MovPrfx = AArch64::MOVPRFX_ZZ; 571 LSLZero = AArch64::LSL_ZPmI_H; 572 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H; 573 break; 574 case AArch64::ElementSizeS: 575 MovPrfx = AArch64::MOVPRFX_ZZ; 576 LSLZero = AArch64::LSL_ZPmI_S; 577 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S; 578 break; 579 case AArch64::ElementSizeD: 580 MovPrfx = AArch64::MOVPRFX_ZZ; 581 LSLZero = AArch64::LSL_ZPmI_D; 582 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D; 583 break; 584 default: 585 llvm_unreachable("Unsupported ElementSize"); 586 } 587 588 // 589 // Create the destructive operation (if required) 590 // 591 MachineInstrBuilder PRFX, DOP; 592 if (FalseZero) { 593 // If we cannot prefix the requested instruction we'll instead emit a 594 // prefixed_zeroing_mov for DestructiveBinary. 595 assert((DOPRegIsUnique || DType == AArch64::DestructiveBinary || 596 DType == AArch64::DestructiveBinaryComm || 597 DType == AArch64::DestructiveBinaryCommWithRev) && 598 "The destructive operand should be unique"); 599 assert(ElementSize != AArch64::ElementSizeNone && 600 "This instruction is unpredicated"); 601 602 // Merge source operand into destination register 603 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero)) 604 .addReg(DstReg, RegState::Define) 605 .addReg(MI.getOperand(PredIdx).getReg()) 606 .addReg(MI.getOperand(DOPIdx).getReg()); 607 608 // After the movprfx, the destructive operand is same as Dst 609 DOPIdx = 0; 610 611 // Create the additional LSL to zero the lanes when the DstReg is not 612 // unique. Zeros the lanes in z0 that aren't active in p0 with sequence 613 // movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0; 614 if ((DType == AArch64::DestructiveBinary || 615 DType == AArch64::DestructiveBinaryComm || 616 DType == AArch64::DestructiveBinaryCommWithRev) && 617 !DOPRegIsUnique) { 618 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LSLZero)) 619 .addReg(DstReg, RegState::Define) 620 .add(MI.getOperand(PredIdx)) 621 .addReg(DstReg) 622 .addImm(0); 623 } 624 } else if (DstReg != MI.getOperand(DOPIdx).getReg()) { 625 assert(DOPRegIsUnique && "The destructive operand should be unique"); 626 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx)) 627 .addReg(DstReg, RegState::Define) 628 .addReg(MI.getOperand(DOPIdx).getReg()); 629 DOPIdx = 0; 630 } 631 632 // 633 // Create the destructive operation 634 // 635 DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode)) 636 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)); 637 638 switch (DType) { 639 case AArch64::DestructiveUnaryPassthru: 640 DOP.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill) 641 .add(MI.getOperand(PredIdx)) 642 .add(MI.getOperand(SrcIdx)); 643 break; 644 case AArch64::DestructiveBinary: 645 case AArch64::DestructiveBinaryImm: 646 case AArch64::DestructiveBinaryComm: 647 case AArch64::DestructiveBinaryCommWithRev: 648 DOP.add(MI.getOperand(PredIdx)) 649 .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill) 650 .add(MI.getOperand(SrcIdx)); 651 break; 652 case AArch64::DestructiveTernaryCommWithRev: 653 DOP.add(MI.getOperand(PredIdx)) 654 .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill) 655 .add(MI.getOperand(SrcIdx)) 656 .add(MI.getOperand(Src2Idx)); 657 break; 658 } 659 660 if (PRFX) { 661 finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator()); 662 transferImpOps(MI, PRFX, DOP); 663 } else 664 transferImpOps(MI, DOP, DOP); 665 666 MI.eraseFromParent(); 667 return true; 668 } 669 670 bool AArch64ExpandPseudo::expandSetTagLoop( 671 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 672 MachineBasicBlock::iterator &NextMBBI) { 673 MachineInstr &MI = *MBBI; 674 DebugLoc DL = MI.getDebugLoc(); 675 Register SizeReg = MI.getOperand(0).getReg(); 676 Register AddressReg = MI.getOperand(1).getReg(); 677 678 MachineFunction *MF = MBB.getParent(); 679 680 bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback; 681 const unsigned OpCode1 = 682 ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex; 683 const unsigned OpCode2 = 684 ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex; 685 686 unsigned Size = MI.getOperand(2).getImm(); 687 assert(Size > 0 && Size % 16 == 0); 688 if (Size % (16 * 2) != 0) { 689 BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg) 690 .addReg(AddressReg) 691 .addReg(AddressReg) 692 .addImm(1); 693 Size -= 16; 694 } 695 MachineBasicBlock::iterator I = 696 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg) 697 .addImm(Size); 698 expandMOVImm(MBB, I, 64); 699 700 auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 701 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 702 703 MF->insert(++MBB.getIterator(), LoopBB); 704 MF->insert(++LoopBB->getIterator(), DoneBB); 705 706 BuildMI(LoopBB, DL, TII->get(OpCode2)) 707 .addDef(AddressReg) 708 .addReg(AddressReg) 709 .addReg(AddressReg) 710 .addImm(2) 711 .cloneMemRefs(MI) 712 .setMIFlags(MI.getFlags()); 713 BuildMI(LoopBB, DL, TII->get(AArch64::SUBSXri)) 714 .addDef(SizeReg) 715 .addReg(SizeReg) 716 .addImm(16 * 2) 717 .addImm(0); 718 BuildMI(LoopBB, DL, TII->get(AArch64::Bcc)) 719 .addImm(AArch64CC::NE) 720 .addMBB(LoopBB) 721 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill); 722 723 LoopBB->addSuccessor(LoopBB); 724 LoopBB->addSuccessor(DoneBB); 725 726 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 727 DoneBB->transferSuccessors(&MBB); 728 729 MBB.addSuccessor(LoopBB); 730 731 NextMBBI = MBB.end(); 732 MI.eraseFromParent(); 733 // Recompute liveness bottom up. 734 LivePhysRegs LiveRegs; 735 computeAndAddLiveIns(LiveRegs, *DoneBB); 736 computeAndAddLiveIns(LiveRegs, *LoopBB); 737 // Do an extra pass in the loop to get the loop carried dependencies right. 738 // FIXME: is this necessary? 739 LoopBB->clearLiveIns(); 740 computeAndAddLiveIns(LiveRegs, *LoopBB); 741 DoneBB->clearLiveIns(); 742 computeAndAddLiveIns(LiveRegs, *DoneBB); 743 744 return true; 745 } 746 747 bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB, 748 MachineBasicBlock::iterator MBBI, 749 unsigned Opc, unsigned N) { 750 assert((Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI || 751 Opc == AArch64::LDR_PXI || Opc == AArch64::STR_PXI) && 752 "Unexpected opcode"); 753 unsigned RState = (Opc == AArch64::LDR_ZXI || Opc == AArch64::LDR_PXI) 754 ? RegState::Define 755 : 0; 756 unsigned sub0 = (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI) 757 ? AArch64::zsub0 758 : AArch64::psub0; 759 const TargetRegisterInfo *TRI = 760 MBB.getParent()->getSubtarget().getRegisterInfo(); 761 MachineInstr &MI = *MBBI; 762 for (unsigned Offset = 0; Offset < N; ++Offset) { 763 int ImmOffset = MI.getOperand(2).getImm() + Offset; 764 bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false; 765 assert(ImmOffset >= -256 && ImmOffset < 256 && 766 "Immediate spill offset out of range"); 767 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) 768 .addReg(TRI->getSubReg(MI.getOperand(0).getReg(), sub0 + Offset), 769 RState) 770 .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill)) 771 .addImm(ImmOffset); 772 } 773 MI.eraseFromParent(); 774 return true; 775 } 776 777 bool AArch64ExpandPseudo::expandCALL_RVMARKER( 778 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { 779 // Expand CALL_RVMARKER pseudo to: 780 // - a branch to the call target, followed by 781 // - the special `mov x29, x29` marker, and 782 // - another branch, to the runtime function 783 // Mark the sequence as bundle, to avoid passes moving other code in between. 784 MachineInstr &MI = *MBBI; 785 786 MachineInstr *OriginalCall; 787 MachineOperand &RVTarget = MI.getOperand(0); 788 MachineOperand &CallTarget = MI.getOperand(1); 789 assert((CallTarget.isGlobal() || CallTarget.isReg()) && 790 "invalid operand for regular call"); 791 assert(RVTarget.isGlobal() && "invalid operand for attached call"); 792 unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR; 793 OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr(); 794 OriginalCall->addOperand(CallTarget); 795 796 unsigned RegMaskStartIdx = 2; 797 // Skip register arguments. Those are added during ISel, but are not 798 // needed for the concrete branch. 799 while (!MI.getOperand(RegMaskStartIdx).isRegMask()) { 800 auto MOP = MI.getOperand(RegMaskStartIdx); 801 assert(MOP.isReg() && "can only add register operands"); 802 OriginalCall->addOperand(MachineOperand::CreateReg( 803 MOP.getReg(), /*Def=*/false, /*Implicit=*/true, /*isKill=*/false, 804 /*isDead=*/false, /*isUndef=*/MOP.isUndef())); 805 RegMaskStartIdx++; 806 } 807 for (const MachineOperand &MO : 808 llvm::drop_begin(MI.operands(), RegMaskStartIdx)) 809 OriginalCall->addOperand(MO); 810 811 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs)) 812 .addReg(AArch64::FP, RegState::Define) 813 .addReg(AArch64::XZR) 814 .addReg(AArch64::FP) 815 .addImm(0); 816 817 auto *RVCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::BL)) 818 .add(RVTarget) 819 .getInstr(); 820 821 if (MI.shouldUpdateCallSiteInfo()) 822 MBB.getParent()->moveCallSiteInfo(&MI, OriginalCall); 823 824 MI.eraseFromParent(); 825 finalizeBundle(MBB, OriginalCall->getIterator(), 826 std::next(RVCall->getIterator())); 827 return true; 828 } 829 830 bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB, 831 MachineBasicBlock::iterator MBBI) { 832 // Expand CALL_BTI pseudo to: 833 // - a branch to the call target 834 // - a BTI instruction 835 // Mark the sequence as a bundle, to avoid passes moving other code in 836 // between. 837 838 MachineInstr &MI = *MBBI; 839 MachineOperand &CallTarget = MI.getOperand(0); 840 assert((CallTarget.isGlobal() || CallTarget.isReg()) && 841 "invalid operand for regular call"); 842 unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR; 843 MachineInstr *Call = 844 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr(); 845 Call->addOperand(CallTarget); 846 Call->setCFIType(*MBB.getParent(), MI.getCFIType()); 847 Call->copyImplicitOps(*MBB.getParent(), MI); 848 849 MachineInstr *BTI = 850 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::HINT)) 851 // BTI J so that setjmp can to BR to this. 852 .addImm(36) 853 .getInstr(); 854 855 if (MI.shouldUpdateCallSiteInfo()) 856 MBB.getParent()->moveCallSiteInfo(&MI, Call); 857 858 MI.eraseFromParent(); 859 finalizeBundle(MBB, Call->getIterator(), std::next(BTI->getIterator())); 860 return true; 861 } 862 863 bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext( 864 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { 865 Register CtxReg = MBBI->getOperand(0).getReg(); 866 Register BaseReg = MBBI->getOperand(1).getReg(); 867 int Offset = MBBI->getOperand(2).getImm(); 868 DebugLoc DL(MBBI->getDebugLoc()); 869 auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>(); 870 871 if (STI.getTargetTriple().getArchName() != "arm64e") { 872 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui)) 873 .addUse(CtxReg) 874 .addUse(BaseReg) 875 .addImm(Offset / 8) 876 .setMIFlag(MachineInstr::FrameSetup); 877 MBBI->eraseFromParent(); 878 return true; 879 } 880 881 // We need to sign the context in an address-discriminated way. 0xc31a is a 882 // fixed random value, chosen as part of the ABI. 883 // add x16, xBase, #Offset 884 // movk x16, #0xc31a, lsl #48 885 // mov x17, x22/xzr 886 // pacdb x17, x16 887 // str x17, [xBase, #Offset] 888 unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri; 889 BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16) 890 .addUse(BaseReg) 891 .addImm(abs(Offset)) 892 .addImm(0) 893 .setMIFlag(MachineInstr::FrameSetup); 894 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16) 895 .addUse(AArch64::X16) 896 .addImm(0xc31a) 897 .addImm(48) 898 .setMIFlag(MachineInstr::FrameSetup); 899 // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so 900 // move it somewhere before signing. 901 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17) 902 .addUse(AArch64::XZR) 903 .addUse(CtxReg) 904 .addImm(0) 905 .setMIFlag(MachineInstr::FrameSetup); 906 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17) 907 .addUse(AArch64::X17) 908 .addUse(AArch64::X16) 909 .setMIFlag(MachineInstr::FrameSetup); 910 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui)) 911 .addUse(AArch64::X17) 912 .addUse(BaseReg) 913 .addImm(Offset / 8) 914 .setMIFlag(MachineInstr::FrameSetup); 915 916 MBBI->eraseFromParent(); 917 return true; 918 } 919 920 MachineBasicBlock * 921 AArch64ExpandPseudo::expandRestoreZA(MachineBasicBlock &MBB, 922 MachineBasicBlock::iterator MBBI) { 923 MachineInstr &MI = *MBBI; 924 assert((std::next(MBBI) != MBB.end() || 925 MI.getParent()->successors().begin() != 926 MI.getParent()->successors().end()) && 927 "Unexpected unreachable in block that restores ZA"); 928 929 // Compare TPIDR2_EL0 value against 0. 930 DebugLoc DL = MI.getDebugLoc(); 931 MachineInstrBuilder Cbz = BuildMI(MBB, MBBI, DL, TII->get(AArch64::CBZX)) 932 .add(MI.getOperand(0)); 933 934 // Split MBB and create two new blocks: 935 // - MBB now contains all instructions before RestoreZAPseudo. 936 // - SMBB contains the RestoreZAPseudo instruction only. 937 // - EndBB contains all instructions after RestoreZAPseudo. 938 MachineInstr &PrevMI = *std::prev(MBBI); 939 MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true); 940 MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end() 941 ? *SMBB->successors().begin() 942 : SMBB->splitAt(MI, /*UpdateLiveIns*/ true); 943 944 // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB. 945 Cbz.addMBB(SMBB); 946 BuildMI(&MBB, DL, TII->get(AArch64::B)) 947 .addMBB(EndBB); 948 MBB.addSuccessor(EndBB); 949 950 // Replace the pseudo with a call (BL). 951 MachineInstrBuilder MIB = 952 BuildMI(*SMBB, SMBB->end(), DL, TII->get(AArch64::BL)); 953 MIB.addReg(MI.getOperand(1).getReg(), RegState::Implicit); 954 for (unsigned I = 2; I < MI.getNumOperands(); ++I) 955 MIB.add(MI.getOperand(I)); 956 BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB); 957 958 MI.eraseFromParent(); 959 return EndBB; 960 } 961 962 MachineBasicBlock * 963 AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB, 964 MachineBasicBlock::iterator MBBI) { 965 MachineInstr &MI = *MBBI; 966 // In the case of a smstart/smstop before a unreachable, just remove the pseudo. 967 // Exception handling code generated by Clang may introduce unreachables and it 968 // seems unnecessary to restore pstate.sm when that happens. Note that it is 969 // not just an optimisation, the code below expects a successor instruction/block 970 // in order to split the block at MBBI. 971 if (std::next(MBBI) == MBB.end() && 972 MI.getParent()->successors().begin() == 973 MI.getParent()->successors().end()) { 974 MI.eraseFromParent(); 975 return &MBB; 976 } 977 978 // Expand the pseudo into smstart or smstop instruction. The pseudo has the 979 // following operands: 980 // 981 // MSRpstatePseudo <za|sm|both>, <0|1>, pstate.sm, expectedval, <regmask> 982 // 983 // The pseudo is expanded into a conditional smstart/smstop, with a 984 // check if pstate.sm (register) equals the expected value, and if not, 985 // invokes the smstart/smstop. 986 // 987 // As an example, the following block contains a normal call from a 988 // streaming-compatible function: 989 // 990 // OrigBB: 991 // MSRpstatePseudo 3, 0, %0, 0, <regmask> <- Conditional SMSTOP 992 // bl @normal_callee 993 // MSRpstatePseudo 3, 1, %0, 0, <regmask> <- Conditional SMSTART 994 // 995 // ...which will be transformed into: 996 // 997 // OrigBB: 998 // TBNZx %0:gpr64, 0, SMBB 999 // b EndBB 1000 // 1001 // SMBB: 1002 // MSRpstatesvcrImm1 3, 0, <regmask> <- SMSTOP 1003 // 1004 // EndBB: 1005 // bl @normal_callee 1006 // MSRcond_pstatesvcrImm1 3, 1, <regmask> <- SMSTART 1007 // 1008 DebugLoc DL = MI.getDebugLoc(); 1009 1010 // Create the conditional branch based on the third operand of the 1011 // instruction, which tells us if we are wrapping a normal or streaming 1012 // function. 1013 // We test the live value of pstate.sm and toggle pstate.sm if this is not the 1014 // expected value for the callee (0 for a normal callee and 1 for a streaming 1015 // callee). 1016 auto PStateSM = MI.getOperand(2).getReg(); 1017 auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); 1018 unsigned SMReg32 = TRI->getSubReg(PStateSM, AArch64::sub_32); 1019 bool IsStreamingCallee = MI.getOperand(3).getImm(); 1020 unsigned Opc = IsStreamingCallee ? AArch64::TBZW : AArch64::TBNZW; 1021 MachineInstrBuilder Tbx = 1022 BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(SMReg32).addImm(0); 1023 1024 // Split MBB and create two new blocks: 1025 // - MBB now contains all instructions before MSRcond_pstatesvcrImm1. 1026 // - SMBB contains the MSRcond_pstatesvcrImm1 instruction only. 1027 // - EndBB contains all instructions after MSRcond_pstatesvcrImm1. 1028 MachineInstr &PrevMI = *std::prev(MBBI); 1029 MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true); 1030 MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end() 1031 ? *SMBB->successors().begin() 1032 : SMBB->splitAt(MI, /*UpdateLiveIns*/ true); 1033 1034 // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB. 1035 Tbx.addMBB(SMBB); 1036 BuildMI(&MBB, DL, TII->get(AArch64::B)) 1037 .addMBB(EndBB); 1038 MBB.addSuccessor(EndBB); 1039 1040 // Create the SMSTART/SMSTOP (MSRpstatesvcrImm1) instruction in SMBB. 1041 MachineInstrBuilder MIB = BuildMI(*SMBB, SMBB->begin(), MI.getDebugLoc(), 1042 TII->get(AArch64::MSRpstatesvcrImm1)); 1043 // Copy all but the second and third operands of MSRcond_pstatesvcrImm1 (as 1044 // these contain the CopyFromReg for the first argument and the flag to 1045 // indicate whether the callee is streaming or normal). 1046 MIB.add(MI.getOperand(0)); 1047 MIB.add(MI.getOperand(1)); 1048 for (unsigned i = 4; i < MI.getNumOperands(); ++i) 1049 MIB.add(MI.getOperand(i)); 1050 1051 BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB); 1052 1053 MI.eraseFromParent(); 1054 return EndBB; 1055 } 1056 1057 bool AArch64ExpandPseudo::expandMultiVecPseudo( 1058 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 1059 TargetRegisterClass ContiguousClass, TargetRegisterClass StridedClass, 1060 unsigned ContiguousOp, unsigned StridedOpc) { 1061 MachineInstr &MI = *MBBI; 1062 Register Tuple = MI.getOperand(0).getReg(); 1063 1064 auto ContiguousRange = ContiguousClass.getRegisters(); 1065 auto StridedRange = StridedClass.getRegisters(); 1066 unsigned Opc; 1067 if (llvm::is_contained(ContiguousRange, Tuple.asMCReg())) { 1068 Opc = ContiguousOp; 1069 } else if (llvm::is_contained(StridedRange, Tuple.asMCReg())) { 1070 Opc = StridedOpc; 1071 } else 1072 llvm_unreachable("Cannot expand Multi-Vector pseudo"); 1073 1074 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) 1075 .add(MI.getOperand(0)) 1076 .add(MI.getOperand(1)) 1077 .add(MI.getOperand(2)) 1078 .add(MI.getOperand(3)); 1079 transferImpOps(MI, MIB, MIB); 1080 MI.eraseFromParent(); 1081 return true; 1082 } 1083 1084 /// If MBBI references a pseudo instruction that should be expanded here, 1085 /// do the expansion and return true. Otherwise return false. 1086 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, 1087 MachineBasicBlock::iterator MBBI, 1088 MachineBasicBlock::iterator &NextMBBI) { 1089 MachineInstr &MI = *MBBI; 1090 unsigned Opcode = MI.getOpcode(); 1091 1092 // Check if we can expand the destructive op 1093 int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode()); 1094 if (OrigInstr != -1) { 1095 auto &Orig = TII->get(OrigInstr); 1096 if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask) != 1097 AArch64::NotDestructive) { 1098 return expand_DestructiveOp(MI, MBB, MBBI); 1099 } 1100 } 1101 1102 switch (Opcode) { 1103 default: 1104 break; 1105 1106 case AArch64::BSPv8i8: 1107 case AArch64::BSPv16i8: { 1108 Register DstReg = MI.getOperand(0).getReg(); 1109 if (DstReg == MI.getOperand(3).getReg()) { 1110 // Expand to BIT 1111 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1112 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8 1113 : AArch64::BITv16i8)) 1114 .add(MI.getOperand(0)) 1115 .add(MI.getOperand(3)) 1116 .add(MI.getOperand(2)) 1117 .add(MI.getOperand(1)); 1118 } else if (DstReg == MI.getOperand(2).getReg()) { 1119 // Expand to BIF 1120 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1121 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8 1122 : AArch64::BIFv16i8)) 1123 .add(MI.getOperand(0)) 1124 .add(MI.getOperand(2)) 1125 .add(MI.getOperand(3)) 1126 .add(MI.getOperand(1)); 1127 } else { 1128 // Expand to BSL, use additional move if required 1129 if (DstReg == MI.getOperand(1).getReg()) { 1130 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1131 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8 1132 : AArch64::BSLv16i8)) 1133 .add(MI.getOperand(0)) 1134 .add(MI.getOperand(1)) 1135 .add(MI.getOperand(2)) 1136 .add(MI.getOperand(3)); 1137 } else { 1138 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1139 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8 1140 : AArch64::ORRv16i8)) 1141 .addReg(DstReg, 1142 RegState::Define | 1143 getRenamableRegState(MI.getOperand(0).isRenamable())) 1144 .add(MI.getOperand(1)) 1145 .add(MI.getOperand(1)); 1146 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1147 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8 1148 : AArch64::BSLv16i8)) 1149 .add(MI.getOperand(0)) 1150 .addReg(DstReg, 1151 RegState::Kill | 1152 getRenamableRegState(MI.getOperand(0).isRenamable())) 1153 .add(MI.getOperand(2)) 1154 .add(MI.getOperand(3)); 1155 } 1156 } 1157 MI.eraseFromParent(); 1158 return true; 1159 } 1160 1161 case AArch64::ADDWrr: 1162 case AArch64::SUBWrr: 1163 case AArch64::ADDXrr: 1164 case AArch64::SUBXrr: 1165 case AArch64::ADDSWrr: 1166 case AArch64::SUBSWrr: 1167 case AArch64::ADDSXrr: 1168 case AArch64::SUBSXrr: 1169 case AArch64::ANDWrr: 1170 case AArch64::ANDXrr: 1171 case AArch64::BICWrr: 1172 case AArch64::BICXrr: 1173 case AArch64::ANDSWrr: 1174 case AArch64::ANDSXrr: 1175 case AArch64::BICSWrr: 1176 case AArch64::BICSXrr: 1177 case AArch64::EONWrr: 1178 case AArch64::EONXrr: 1179 case AArch64::EORWrr: 1180 case AArch64::EORXrr: 1181 case AArch64::ORNWrr: 1182 case AArch64::ORNXrr: 1183 case AArch64::ORRWrr: 1184 case AArch64::ORRXrr: { 1185 unsigned Opcode; 1186 switch (MI.getOpcode()) { 1187 default: 1188 return false; 1189 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break; 1190 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break; 1191 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break; 1192 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break; 1193 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break; 1194 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break; 1195 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break; 1196 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break; 1197 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break; 1198 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break; 1199 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break; 1200 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break; 1201 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break; 1202 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break; 1203 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break; 1204 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break; 1205 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break; 1206 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break; 1207 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break; 1208 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break; 1209 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break; 1210 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break; 1211 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break; 1212 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break; 1213 } 1214 MachineFunction &MF = *MBB.getParent(); 1215 // Try to create new inst without implicit operands added. 1216 MachineInstr *NewMI = MF.CreateMachineInstr( 1217 TII->get(Opcode), MI.getDebugLoc(), /*NoImplicit=*/true); 1218 MBB.insert(MBBI, NewMI); 1219 MachineInstrBuilder MIB1(MF, NewMI); 1220 MIB1->setPCSections(MF, MI.getPCSections()); 1221 MIB1.addReg(MI.getOperand(0).getReg(), RegState::Define) 1222 .add(MI.getOperand(1)) 1223 .add(MI.getOperand(2)) 1224 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 1225 transferImpOps(MI, MIB1, MIB1); 1226 if (auto DebugNumber = MI.peekDebugInstrNum()) 1227 NewMI->setDebugInstrNum(DebugNumber); 1228 MI.eraseFromParent(); 1229 return true; 1230 } 1231 1232 case AArch64::LOADgot: { 1233 MachineFunction *MF = MBB.getParent(); 1234 Register DstReg = MI.getOperand(0).getReg(); 1235 const MachineOperand &MO1 = MI.getOperand(1); 1236 unsigned Flags = MO1.getTargetFlags(); 1237 1238 if (MF->getTarget().getCodeModel() == CodeModel::Tiny) { 1239 // Tiny codemodel expand to LDR 1240 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), 1241 TII->get(AArch64::LDRXl), DstReg); 1242 1243 if (MO1.isGlobal()) { 1244 MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags); 1245 } else if (MO1.isSymbol()) { 1246 MIB.addExternalSymbol(MO1.getSymbolName(), Flags); 1247 } else { 1248 assert(MO1.isCPI() && 1249 "Only expect globals, externalsymbols, or constant pools"); 1250 MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags); 1251 } 1252 } else { 1253 // Small codemodel expand into ADRP + LDR. 1254 MachineFunction &MF = *MI.getParent()->getParent(); 1255 DebugLoc DL = MI.getDebugLoc(); 1256 MachineInstrBuilder MIB1 = 1257 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg); 1258 1259 MachineInstrBuilder MIB2; 1260 if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) { 1261 auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); 1262 unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32); 1263 unsigned DstFlags = MI.getOperand(0).getTargetFlags(); 1264 MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui)) 1265 .addDef(Reg32) 1266 .addReg(DstReg, RegState::Kill) 1267 .addReg(DstReg, DstFlags | RegState::Implicit); 1268 } else { 1269 Register DstReg = MI.getOperand(0).getReg(); 1270 MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui)) 1271 .add(MI.getOperand(0)) 1272 .addUse(DstReg, RegState::Kill); 1273 } 1274 1275 if (MO1.isGlobal()) { 1276 MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE); 1277 MIB2.addGlobalAddress(MO1.getGlobal(), 0, 1278 Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 1279 } else if (MO1.isSymbol()) { 1280 MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE); 1281 MIB2.addExternalSymbol(MO1.getSymbolName(), Flags | 1282 AArch64II::MO_PAGEOFF | 1283 AArch64II::MO_NC); 1284 } else { 1285 assert(MO1.isCPI() && 1286 "Only expect globals, externalsymbols, or constant pools"); 1287 MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), 1288 Flags | AArch64II::MO_PAGE); 1289 MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), 1290 Flags | AArch64II::MO_PAGEOFF | 1291 AArch64II::MO_NC); 1292 } 1293 1294 transferImpOps(MI, MIB1, MIB2); 1295 } 1296 MI.eraseFromParent(); 1297 return true; 1298 } 1299 case AArch64::MOVaddrBA: { 1300 MachineFunction &MF = *MI.getParent()->getParent(); 1301 if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) { 1302 // blockaddress expressions have to come from a constant pool because the 1303 // largest addend (and hence offset within a function) allowed for ADRP is 1304 // only 8MB. 1305 const BlockAddress *BA = MI.getOperand(1).getBlockAddress(); 1306 assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset"); 1307 1308 MachineConstantPool *MCP = MF.getConstantPool(); 1309 unsigned CPIdx = MCP->getConstantPoolIndex(BA, Align(8)); 1310 1311 Register DstReg = MI.getOperand(0).getReg(); 1312 auto MIB1 = 1313 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) 1314 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE); 1315 auto MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), 1316 TII->get(AArch64::LDRXui), DstReg) 1317 .addUse(DstReg) 1318 .addConstantPoolIndex( 1319 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 1320 transferImpOps(MI, MIB1, MIB2); 1321 MI.eraseFromParent(); 1322 return true; 1323 } 1324 } 1325 [[fallthrough]]; 1326 case AArch64::MOVaddr: 1327 case AArch64::MOVaddrJT: 1328 case AArch64::MOVaddrCP: 1329 case AArch64::MOVaddrTLS: 1330 case AArch64::MOVaddrEXT: { 1331 // Expand into ADRP + ADD. 1332 Register DstReg = MI.getOperand(0).getReg(); 1333 assert(DstReg != AArch64::XZR); 1334 MachineInstrBuilder MIB1 = 1335 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) 1336 .add(MI.getOperand(1)); 1337 1338 if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) { 1339 // MO_TAGGED on the page indicates a tagged address. Set the tag now. 1340 // We do so by creating a MOVK that sets bits 48-63 of the register to 1341 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in 1342 // the small code model so we can assume a binary size of <= 4GB, which 1343 // makes the untagged PC relative offset positive. The binary must also be 1344 // loaded into address range [0, 2^48). Both of these properties need to 1345 // be ensured at runtime when using tagged addresses. 1346 auto Tag = MI.getOperand(1); 1347 Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3); 1348 Tag.setOffset(0x100000000); 1349 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg) 1350 .addReg(DstReg) 1351 .add(Tag) 1352 .addImm(48); 1353 } 1354 1355 MachineInstrBuilder MIB2 = 1356 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) 1357 .add(MI.getOperand(0)) 1358 .addReg(DstReg) 1359 .add(MI.getOperand(2)) 1360 .addImm(0); 1361 1362 transferImpOps(MI, MIB1, MIB2); 1363 MI.eraseFromParent(); 1364 return true; 1365 } 1366 case AArch64::ADDlowTLS: 1367 // Produce a plain ADD 1368 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) 1369 .add(MI.getOperand(0)) 1370 .add(MI.getOperand(1)) 1371 .add(MI.getOperand(2)) 1372 .addImm(0); 1373 MI.eraseFromParent(); 1374 return true; 1375 1376 case AArch64::MOVbaseTLS: { 1377 Register DstReg = MI.getOperand(0).getReg(); 1378 auto SysReg = AArch64SysReg::TPIDR_EL0; 1379 MachineFunction *MF = MBB.getParent(); 1380 if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP()) 1381 SysReg = AArch64SysReg::TPIDR_EL3; 1382 else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP()) 1383 SysReg = AArch64SysReg::TPIDR_EL2; 1384 else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP()) 1385 SysReg = AArch64SysReg::TPIDR_EL1; 1386 else if (MF->getSubtarget<AArch64Subtarget>().useROEL0ForTP()) 1387 SysReg = AArch64SysReg::TPIDRRO_EL0; 1388 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg) 1389 .addImm(SysReg); 1390 MI.eraseFromParent(); 1391 return true; 1392 } 1393 1394 case AArch64::MOVi32imm: 1395 return expandMOVImm(MBB, MBBI, 32); 1396 case AArch64::MOVi64imm: 1397 return expandMOVImm(MBB, MBBI, 64); 1398 case AArch64::RET_ReallyLR: { 1399 // Hiding the LR use with RET_ReallyLR may lead to extra kills in the 1400 // function and missing live-ins. We are fine in practice because callee 1401 // saved register handling ensures the register value is restored before 1402 // RET, but we need the undef flag here to appease the MachineVerifier 1403 // liveness checks. 1404 MachineInstrBuilder MIB = 1405 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET)) 1406 .addReg(AArch64::LR, RegState::Undef); 1407 transferImpOps(MI, MIB, MIB); 1408 MI.eraseFromParent(); 1409 return true; 1410 } 1411 case AArch64::CMP_SWAP_8: 1412 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB, 1413 AArch64::SUBSWrx, 1414 AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0), 1415 AArch64::WZR, NextMBBI); 1416 case AArch64::CMP_SWAP_16: 1417 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH, 1418 AArch64::SUBSWrx, 1419 AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0), 1420 AArch64::WZR, NextMBBI); 1421 case AArch64::CMP_SWAP_32: 1422 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW, 1423 AArch64::SUBSWrs, 1424 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0), 1425 AArch64::WZR, NextMBBI); 1426 case AArch64::CMP_SWAP_64: 1427 return expandCMP_SWAP(MBB, MBBI, 1428 AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs, 1429 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0), 1430 AArch64::XZR, NextMBBI); 1431 case AArch64::CMP_SWAP_128: 1432 case AArch64::CMP_SWAP_128_RELEASE: 1433 case AArch64::CMP_SWAP_128_ACQUIRE: 1434 case AArch64::CMP_SWAP_128_MONOTONIC: 1435 return expandCMP_SWAP_128(MBB, MBBI, NextMBBI); 1436 1437 case AArch64::AESMCrrTied: 1438 case AArch64::AESIMCrrTied: { 1439 MachineInstrBuilder MIB = 1440 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1441 TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr : 1442 AArch64::AESIMCrr)) 1443 .add(MI.getOperand(0)) 1444 .add(MI.getOperand(1)); 1445 transferImpOps(MI, MIB, MIB); 1446 MI.eraseFromParent(); 1447 return true; 1448 } 1449 case AArch64::IRGstack: { 1450 MachineFunction &MF = *MBB.getParent(); 1451 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 1452 const AArch64FrameLowering *TFI = 1453 MF.getSubtarget<AArch64Subtarget>().getFrameLowering(); 1454 1455 // IRG does not allow immediate offset. getTaggedBasePointerOffset should 1456 // almost always point to SP-after-prologue; if not, emit a longer 1457 // instruction sequence. 1458 int BaseOffset = -AFI->getTaggedBasePointerOffset(); 1459 Register FrameReg; 1460 StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference( 1461 MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg, 1462 /*PreferFP=*/false, 1463 /*ForSimm=*/true); 1464 Register SrcReg = FrameReg; 1465 if (FrameRegOffset) { 1466 // Use output register as temporary. 1467 SrcReg = MI.getOperand(0).getReg(); 1468 emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg, 1469 FrameRegOffset, TII); 1470 } 1471 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG)) 1472 .add(MI.getOperand(0)) 1473 .addUse(SrcReg) 1474 .add(MI.getOperand(2)); 1475 MI.eraseFromParent(); 1476 return true; 1477 } 1478 case AArch64::TAGPstack: { 1479 int64_t Offset = MI.getOperand(2).getImm(); 1480 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1481 TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG)) 1482 .add(MI.getOperand(0)) 1483 .add(MI.getOperand(1)) 1484 .addImm(std::abs(Offset)) 1485 .add(MI.getOperand(4)); 1486 MI.eraseFromParent(); 1487 return true; 1488 } 1489 case AArch64::STGloop_wback: 1490 case AArch64::STZGloop_wback: 1491 return expandSetTagLoop(MBB, MBBI, NextMBBI); 1492 case AArch64::STGloop: 1493 case AArch64::STZGloop: 1494 report_fatal_error( 1495 "Non-writeback variants of STGloop / STZGloop should not " 1496 "survive past PrologEpilogInserter."); 1497 case AArch64::STR_ZZZZXI: 1498 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4); 1499 case AArch64::STR_ZZZXI: 1500 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3); 1501 case AArch64::STR_ZZXI: 1502 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2); 1503 case AArch64::STR_PPXI: 1504 return expandSVESpillFill(MBB, MBBI, AArch64::STR_PXI, 2); 1505 case AArch64::LDR_ZZZZXI: 1506 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4); 1507 case AArch64::LDR_ZZZXI: 1508 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3); 1509 case AArch64::LDR_ZZXI: 1510 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2); 1511 case AArch64::LDR_PPXI: 1512 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_PXI, 2); 1513 case AArch64::BLR_RVMARKER: 1514 return expandCALL_RVMARKER(MBB, MBBI); 1515 case AArch64::BLR_BTI: 1516 return expandCALL_BTI(MBB, MBBI); 1517 case AArch64::StoreSwiftAsyncContext: 1518 return expandStoreSwiftAsyncContext(MBB, MBBI); 1519 case AArch64::RestoreZAPseudo: { 1520 auto *NewMBB = expandRestoreZA(MBB, MBBI); 1521 if (NewMBB != &MBB) 1522 NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated. 1523 return true; 1524 } 1525 case AArch64::MSRpstatePseudo: { 1526 auto *NewMBB = expandCondSMToggle(MBB, MBBI); 1527 if (NewMBB != &MBB) 1528 NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated. 1529 return true; 1530 } 1531 case AArch64::LD1B_2Z_IMM_PSEUDO: 1532 return expandMultiVecPseudo( 1533 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1534 AArch64::LD1B_2Z_IMM, AArch64::LD1B_2Z_STRIDED_IMM); 1535 case AArch64::LD1H_2Z_IMM_PSEUDO: 1536 return expandMultiVecPseudo( 1537 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1538 AArch64::LD1H_2Z_IMM, AArch64::LD1H_2Z_STRIDED_IMM); 1539 case AArch64::LD1W_2Z_IMM_PSEUDO: 1540 return expandMultiVecPseudo( 1541 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1542 AArch64::LD1W_2Z_IMM, AArch64::LD1W_2Z_STRIDED_IMM); 1543 case AArch64::LD1D_2Z_IMM_PSEUDO: 1544 return expandMultiVecPseudo( 1545 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1546 AArch64::LD1D_2Z_IMM, AArch64::LD1D_2Z_STRIDED_IMM); 1547 case AArch64::LDNT1B_2Z_IMM_PSEUDO: 1548 return expandMultiVecPseudo( 1549 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1550 AArch64::LDNT1B_2Z_IMM, AArch64::LDNT1B_2Z_STRIDED_IMM); 1551 case AArch64::LDNT1H_2Z_IMM_PSEUDO: 1552 return expandMultiVecPseudo( 1553 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1554 AArch64::LDNT1H_2Z_IMM, AArch64::LDNT1H_2Z_STRIDED_IMM); 1555 case AArch64::LDNT1W_2Z_IMM_PSEUDO: 1556 return expandMultiVecPseudo( 1557 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1558 AArch64::LDNT1W_2Z_IMM, AArch64::LDNT1W_2Z_STRIDED_IMM); 1559 case AArch64::LDNT1D_2Z_IMM_PSEUDO: 1560 return expandMultiVecPseudo( 1561 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1562 AArch64::LDNT1D_2Z_IMM, AArch64::LDNT1D_2Z_STRIDED_IMM); 1563 case AArch64::LD1B_2Z_PSEUDO: 1564 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass, 1565 AArch64::ZPR2StridedRegClass, AArch64::LD1B_2Z, 1566 AArch64::LD1B_2Z_STRIDED); 1567 case AArch64::LD1H_2Z_PSEUDO: 1568 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass, 1569 AArch64::ZPR2StridedRegClass, AArch64::LD1H_2Z, 1570 AArch64::LD1H_2Z_STRIDED); 1571 case AArch64::LD1W_2Z_PSEUDO: 1572 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass, 1573 AArch64::ZPR2StridedRegClass, AArch64::LD1W_2Z, 1574 AArch64::LD1W_2Z_STRIDED); 1575 case AArch64::LD1D_2Z_PSEUDO: 1576 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass, 1577 AArch64::ZPR2StridedRegClass, AArch64::LD1D_2Z, 1578 AArch64::LD1D_2Z_STRIDED); 1579 case AArch64::LDNT1B_2Z_PSEUDO: 1580 return expandMultiVecPseudo( 1581 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1582 AArch64::LDNT1B_2Z, AArch64::LDNT1B_2Z_STRIDED); 1583 case AArch64::LDNT1H_2Z_PSEUDO: 1584 return expandMultiVecPseudo( 1585 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1586 AArch64::LDNT1H_2Z, AArch64::LDNT1H_2Z_STRIDED); 1587 case AArch64::LDNT1W_2Z_PSEUDO: 1588 return expandMultiVecPseudo( 1589 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1590 AArch64::LDNT1W_2Z, AArch64::LDNT1W_2Z_STRIDED); 1591 case AArch64::LDNT1D_2Z_PSEUDO: 1592 return expandMultiVecPseudo( 1593 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1594 AArch64::LDNT1D_2Z, AArch64::LDNT1D_2Z_STRIDED); 1595 case AArch64::LD1B_4Z_IMM_PSEUDO: 1596 return expandMultiVecPseudo( 1597 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1598 AArch64::LD1B_4Z_IMM, AArch64::LD1B_4Z_STRIDED_IMM); 1599 case AArch64::LD1H_4Z_IMM_PSEUDO: 1600 return expandMultiVecPseudo( 1601 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1602 AArch64::LD1H_4Z_IMM, AArch64::LD1H_4Z_STRIDED_IMM); 1603 case AArch64::LD1W_4Z_IMM_PSEUDO: 1604 return expandMultiVecPseudo( 1605 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1606 AArch64::LD1W_4Z_IMM, AArch64::LD1W_4Z_STRIDED_IMM); 1607 case AArch64::LD1D_4Z_IMM_PSEUDO: 1608 return expandMultiVecPseudo( 1609 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1610 AArch64::LD1D_4Z_IMM, AArch64::LD1D_4Z_STRIDED_IMM); 1611 case AArch64::LDNT1B_4Z_IMM_PSEUDO: 1612 return expandMultiVecPseudo( 1613 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1614 AArch64::LDNT1B_4Z_IMM, AArch64::LDNT1B_4Z_STRIDED_IMM); 1615 case AArch64::LDNT1H_4Z_IMM_PSEUDO: 1616 return expandMultiVecPseudo( 1617 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1618 AArch64::LDNT1H_4Z_IMM, AArch64::LDNT1H_4Z_STRIDED_IMM); 1619 case AArch64::LDNT1W_4Z_IMM_PSEUDO: 1620 return expandMultiVecPseudo( 1621 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1622 AArch64::LDNT1W_4Z_IMM, AArch64::LDNT1W_4Z_STRIDED_IMM); 1623 case AArch64::LDNT1D_4Z_IMM_PSEUDO: 1624 return expandMultiVecPseudo( 1625 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1626 AArch64::LDNT1D_4Z_IMM, AArch64::LDNT1D_4Z_STRIDED_IMM); 1627 case AArch64::LD1B_4Z_PSEUDO: 1628 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass, 1629 AArch64::ZPR4StridedRegClass, AArch64::LD1B_4Z, 1630 AArch64::LD1B_4Z_STRIDED); 1631 case AArch64::LD1H_4Z_PSEUDO: 1632 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass, 1633 AArch64::ZPR4StridedRegClass, AArch64::LD1H_4Z, 1634 AArch64::LD1H_4Z_STRIDED); 1635 case AArch64::LD1W_4Z_PSEUDO: 1636 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass, 1637 AArch64::ZPR4StridedRegClass, AArch64::LD1W_4Z, 1638 AArch64::LD1W_4Z_STRIDED); 1639 case AArch64::LD1D_4Z_PSEUDO: 1640 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass, 1641 AArch64::ZPR4StridedRegClass, AArch64::LD1D_4Z, 1642 AArch64::LD1D_4Z_STRIDED); 1643 case AArch64::LDNT1B_4Z_PSEUDO: 1644 return expandMultiVecPseudo( 1645 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1646 AArch64::LDNT1B_4Z, AArch64::LDNT1B_4Z_STRIDED); 1647 case AArch64::LDNT1H_4Z_PSEUDO: 1648 return expandMultiVecPseudo( 1649 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1650 AArch64::LDNT1H_4Z, AArch64::LDNT1H_4Z_STRIDED); 1651 case AArch64::LDNT1W_4Z_PSEUDO: 1652 return expandMultiVecPseudo( 1653 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1654 AArch64::LDNT1W_4Z, AArch64::LDNT1W_4Z_STRIDED); 1655 case AArch64::LDNT1D_4Z_PSEUDO: 1656 return expandMultiVecPseudo( 1657 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1658 AArch64::LDNT1D_4Z, AArch64::LDNT1D_4Z_STRIDED); 1659 } 1660 return false; 1661 } 1662 1663 /// Iterate over the instructions in basic block MBB and expand any 1664 /// pseudo instructions. Return true if anything was modified. 1665 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) { 1666 bool Modified = false; 1667 1668 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1669 while (MBBI != E) { 1670 MachineBasicBlock::iterator NMBBI = std::next(MBBI); 1671 Modified |= expandMI(MBB, MBBI, NMBBI); 1672 MBBI = NMBBI; 1673 } 1674 1675 return Modified; 1676 } 1677 1678 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) { 1679 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 1680 1681 bool Modified = false; 1682 for (auto &MBB : MF) 1683 Modified |= expandMBB(MBB); 1684 return Modified; 1685 } 1686 1687 /// Returns an instance of the pseudo instruction expansion pass. 1688 FunctionPass *llvm::createAArch64ExpandPseudoPass() { 1689 return new AArch64ExpandPseudo(); 1690 } 1691