1 //===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a pass that expands pseudo instructions into target 10 // instructions to allow proper scheduling and other late optimizations. This 11 // pass should be run after register allocation but before the post-regalloc 12 // scheduling pass. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AArch64ExpandImm.h" 17 #include "AArch64InstrInfo.h" 18 #include "AArch64MachineFunctionInfo.h" 19 #include "AArch64Subtarget.h" 20 #include "MCTargetDesc/AArch64AddressingModes.h" 21 #include "Utils/AArch64BaseInfo.h" 22 #include "llvm/CodeGen/LivePhysRegs.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineConstantPool.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineFunctionPass.h" 27 #include "llvm/CodeGen/MachineInstr.h" 28 #include "llvm/CodeGen/MachineInstrBuilder.h" 29 #include "llvm/CodeGen/MachineOperand.h" 30 #include "llvm/CodeGen/TargetSubtargetInfo.h" 31 #include "llvm/IR/DebugLoc.h" 32 #include "llvm/MC/MCInstrDesc.h" 33 #include "llvm/Pass.h" 34 #include "llvm/Support/CodeGen.h" 35 #include "llvm/Support/MathExtras.h" 36 #include "llvm/Target/TargetMachine.h" 37 #include "llvm/TargetParser/Triple.h" 38 #include <cassert> 39 #include <cstdint> 40 #include <iterator> 41 #include <utility> 42 43 using namespace llvm; 44 45 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass" 46 47 namespace { 48 49 class AArch64ExpandPseudo : public MachineFunctionPass { 50 public: 51 const AArch64InstrInfo *TII; 52 53 static char ID; 54 55 AArch64ExpandPseudo() : MachineFunctionPass(ID) { 56 initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry()); 57 } 58 59 bool runOnMachineFunction(MachineFunction &Fn) override; 60 61 StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; } 62 63 private: 64 bool expandMBB(MachineBasicBlock &MBB); 65 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 66 MachineBasicBlock::iterator &NextMBBI); 67 bool expandMultiVecPseudo(MachineBasicBlock &MBB, 68 MachineBasicBlock::iterator MBBI, 69 TargetRegisterClass ContiguousClass, 70 TargetRegisterClass StridedClass, 71 unsigned ContiguousOpc, unsigned StridedOpc); 72 bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 73 unsigned BitSize); 74 75 bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB, 76 MachineBasicBlock::iterator MBBI); 77 bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 78 unsigned LdarOp, unsigned StlrOp, unsigned CmpOp, 79 unsigned ExtendImm, unsigned ZeroReg, 80 MachineBasicBlock::iterator &NextMBBI); 81 bool expandCMP_SWAP_128(MachineBasicBlock &MBB, 82 MachineBasicBlock::iterator MBBI, 83 MachineBasicBlock::iterator &NextMBBI); 84 bool expandSetTagLoop(MachineBasicBlock &MBB, 85 MachineBasicBlock::iterator MBBI, 86 MachineBasicBlock::iterator &NextMBBI); 87 bool expandSVESpillFill(MachineBasicBlock &MBB, 88 MachineBasicBlock::iterator MBBI, unsigned Opc, 89 unsigned N); 90 bool expandCALL_RVMARKER(MachineBasicBlock &MBB, 91 MachineBasicBlock::iterator MBBI); 92 bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); 93 bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB, 94 MachineBasicBlock::iterator MBBI); 95 MachineBasicBlock *expandRestoreZA(MachineBasicBlock &MBB, 96 MachineBasicBlock::iterator MBBI); 97 MachineBasicBlock *expandCondSMToggle(MachineBasicBlock &MBB, 98 MachineBasicBlock::iterator MBBI); 99 }; 100 101 } // end anonymous namespace 102 103 char AArch64ExpandPseudo::ID = 0; 104 105 INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo", 106 AARCH64_EXPAND_PSEUDO_NAME, false, false) 107 108 /// Transfer implicit operands on the pseudo instruction to the 109 /// instructions created from the expansion. 110 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, 111 MachineInstrBuilder &DefMI) { 112 const MCInstrDesc &Desc = OldMI.getDesc(); 113 for (const MachineOperand &MO : 114 llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) { 115 assert(MO.isReg() && MO.getReg()); 116 if (MO.isUse()) 117 UseMI.add(MO); 118 else 119 DefMI.add(MO); 120 } 121 } 122 123 /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more 124 /// real move-immediate instructions to synthesize the immediate. 125 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, 126 MachineBasicBlock::iterator MBBI, 127 unsigned BitSize) { 128 MachineInstr &MI = *MBBI; 129 Register DstReg = MI.getOperand(0).getReg(); 130 uint64_t RenamableState = 131 MI.getOperand(0).isRenamable() ? RegState::Renamable : 0; 132 uint64_t Imm = MI.getOperand(1).getImm(); 133 134 if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) { 135 // Useless def, and we don't want to risk creating an invalid ORR (which 136 // would really write to sp). 137 MI.eraseFromParent(); 138 return true; 139 } 140 141 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; 142 AArch64_IMM::expandMOVImm(Imm, BitSize, Insn); 143 assert(Insn.size() != 0); 144 145 SmallVector<MachineInstrBuilder, 4> MIBS; 146 for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) { 147 bool LastItem = std::next(I) == E; 148 switch (I->Opcode) 149 { 150 default: llvm_unreachable("unhandled!"); break; 151 152 case AArch64::ORRWri: 153 case AArch64::ORRXri: 154 if (I->Op1 == 0) { 155 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 156 .add(MI.getOperand(0)) 157 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR) 158 .addImm(I->Op2)); 159 } else { 160 Register DstReg = MI.getOperand(0).getReg(); 161 bool DstIsDead = MI.getOperand(0).isDead(); 162 MIBS.push_back( 163 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 164 .addReg(DstReg, RegState::Define | 165 getDeadRegState(DstIsDead && LastItem) | 166 RenamableState) 167 .addReg(DstReg) 168 .addImm(I->Op2)); 169 } 170 break; 171 case AArch64::ANDXri: 172 case AArch64::EORXri: 173 if (I->Op1 == 0) { 174 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 175 .add(MI.getOperand(0)) 176 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR) 177 .addImm(I->Op2)); 178 } else { 179 Register DstReg = MI.getOperand(0).getReg(); 180 bool DstIsDead = MI.getOperand(0).isDead(); 181 MIBS.push_back( 182 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 183 .addReg(DstReg, RegState::Define | 184 getDeadRegState(DstIsDead && LastItem) | 185 RenamableState) 186 .addReg(DstReg) 187 .addImm(I->Op2)); 188 } 189 break; 190 case AArch64::MOVNWi: 191 case AArch64::MOVNXi: 192 case AArch64::MOVZWi: 193 case AArch64::MOVZXi: { 194 bool DstIsDead = MI.getOperand(0).isDead(); 195 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 196 .addReg(DstReg, RegState::Define | 197 getDeadRegState(DstIsDead && LastItem) | 198 RenamableState) 199 .addImm(I->Op1) 200 .addImm(I->Op2)); 201 } break; 202 case AArch64::MOVKWi: 203 case AArch64::MOVKXi: { 204 Register DstReg = MI.getOperand(0).getReg(); 205 bool DstIsDead = MI.getOperand(0).isDead(); 206 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 207 .addReg(DstReg, 208 RegState::Define | 209 getDeadRegState(DstIsDead && LastItem) | 210 RenamableState) 211 .addReg(DstReg) 212 .addImm(I->Op1) 213 .addImm(I->Op2)); 214 } break; 215 } 216 } 217 transferImpOps(MI, MIBS.front(), MIBS.back()); 218 MI.eraseFromParent(); 219 return true; 220 } 221 222 bool AArch64ExpandPseudo::expandCMP_SWAP( 223 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp, 224 unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg, 225 MachineBasicBlock::iterator &NextMBBI) { 226 MachineInstr &MI = *MBBI; 227 MIMetadata MIMD(MI); 228 const MachineOperand &Dest = MI.getOperand(0); 229 Register StatusReg = MI.getOperand(1).getReg(); 230 bool StatusDead = MI.getOperand(1).isDead(); 231 // Duplicating undef operands into 2 instructions does not guarantee the same 232 // value on both; However undef should be replaced by xzr anyway. 233 assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); 234 Register AddrReg = MI.getOperand(2).getReg(); 235 Register DesiredReg = MI.getOperand(3).getReg(); 236 Register NewReg = MI.getOperand(4).getReg(); 237 238 MachineFunction *MF = MBB.getParent(); 239 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 240 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 241 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 242 243 MF->insert(++MBB.getIterator(), LoadCmpBB); 244 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 245 MF->insert(++StoreBB->getIterator(), DoneBB); 246 247 // .Lloadcmp: 248 // mov wStatus, 0 249 // ldaxr xDest, [xAddr] 250 // cmp xDest, xDesired 251 // b.ne .Ldone 252 if (!StatusDead) 253 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::MOVZWi), StatusReg) 254 .addImm(0).addImm(0); 255 BuildMI(LoadCmpBB, MIMD, TII->get(LdarOp), Dest.getReg()) 256 .addReg(AddrReg); 257 BuildMI(LoadCmpBB, MIMD, TII->get(CmpOp), ZeroReg) 258 .addReg(Dest.getReg(), getKillRegState(Dest.isDead())) 259 .addReg(DesiredReg) 260 .addImm(ExtendImm); 261 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::Bcc)) 262 .addImm(AArch64CC::NE) 263 .addMBB(DoneBB) 264 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill); 265 LoadCmpBB->addSuccessor(DoneBB); 266 LoadCmpBB->addSuccessor(StoreBB); 267 268 // .Lstore: 269 // stlxr wStatus, xNew, [xAddr] 270 // cbnz wStatus, .Lloadcmp 271 BuildMI(StoreBB, MIMD, TII->get(StlrOp), StatusReg) 272 .addReg(NewReg) 273 .addReg(AddrReg); 274 BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW)) 275 .addReg(StatusReg, getKillRegState(StatusDead)) 276 .addMBB(LoadCmpBB); 277 StoreBB->addSuccessor(LoadCmpBB); 278 StoreBB->addSuccessor(DoneBB); 279 280 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 281 DoneBB->transferSuccessors(&MBB); 282 283 MBB.addSuccessor(LoadCmpBB); 284 285 NextMBBI = MBB.end(); 286 MI.eraseFromParent(); 287 288 // Recompute livein lists. 289 LivePhysRegs LiveRegs; 290 computeAndAddLiveIns(LiveRegs, *DoneBB); 291 computeAndAddLiveIns(LiveRegs, *StoreBB); 292 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 293 // Do an extra pass around the loop to get loop carried registers right. 294 StoreBB->clearLiveIns(); 295 computeAndAddLiveIns(LiveRegs, *StoreBB); 296 LoadCmpBB->clearLiveIns(); 297 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 298 299 return true; 300 } 301 302 bool AArch64ExpandPseudo::expandCMP_SWAP_128( 303 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 304 MachineBasicBlock::iterator &NextMBBI) { 305 MachineInstr &MI = *MBBI; 306 MIMetadata MIMD(MI); 307 MachineOperand &DestLo = MI.getOperand(0); 308 MachineOperand &DestHi = MI.getOperand(1); 309 Register StatusReg = MI.getOperand(2).getReg(); 310 bool StatusDead = MI.getOperand(2).isDead(); 311 // Duplicating undef operands into 2 instructions does not guarantee the same 312 // value on both; However undef should be replaced by xzr anyway. 313 assert(!MI.getOperand(3).isUndef() && "cannot handle undef"); 314 Register AddrReg = MI.getOperand(3).getReg(); 315 Register DesiredLoReg = MI.getOperand(4).getReg(); 316 Register DesiredHiReg = MI.getOperand(5).getReg(); 317 Register NewLoReg = MI.getOperand(6).getReg(); 318 Register NewHiReg = MI.getOperand(7).getReg(); 319 320 unsigned LdxpOp, StxpOp; 321 322 switch (MI.getOpcode()) { 323 case AArch64::CMP_SWAP_128_MONOTONIC: 324 LdxpOp = AArch64::LDXPX; 325 StxpOp = AArch64::STXPX; 326 break; 327 case AArch64::CMP_SWAP_128_RELEASE: 328 LdxpOp = AArch64::LDXPX; 329 StxpOp = AArch64::STLXPX; 330 break; 331 case AArch64::CMP_SWAP_128_ACQUIRE: 332 LdxpOp = AArch64::LDAXPX; 333 StxpOp = AArch64::STXPX; 334 break; 335 case AArch64::CMP_SWAP_128: 336 LdxpOp = AArch64::LDAXPX; 337 StxpOp = AArch64::STLXPX; 338 break; 339 default: 340 llvm_unreachable("Unexpected opcode"); 341 } 342 343 MachineFunction *MF = MBB.getParent(); 344 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 345 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 346 auto FailBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 347 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 348 349 MF->insert(++MBB.getIterator(), LoadCmpBB); 350 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 351 MF->insert(++StoreBB->getIterator(), FailBB); 352 MF->insert(++FailBB->getIterator(), DoneBB); 353 354 // .Lloadcmp: 355 // ldaxp xDestLo, xDestHi, [xAddr] 356 // cmp xDestLo, xDesiredLo 357 // sbcs xDestHi, xDesiredHi 358 // b.ne .Ldone 359 BuildMI(LoadCmpBB, MIMD, TII->get(LdxpOp)) 360 .addReg(DestLo.getReg(), RegState::Define) 361 .addReg(DestHi.getReg(), RegState::Define) 362 .addReg(AddrReg); 363 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR) 364 .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead())) 365 .addReg(DesiredLoReg) 366 .addImm(0); 367 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg) 368 .addUse(AArch64::WZR) 369 .addUse(AArch64::WZR) 370 .addImm(AArch64CC::EQ); 371 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR) 372 .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead())) 373 .addReg(DesiredHiReg) 374 .addImm(0); 375 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg) 376 .addUse(StatusReg, RegState::Kill) 377 .addUse(StatusReg, RegState::Kill) 378 .addImm(AArch64CC::EQ); 379 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CBNZW)) 380 .addUse(StatusReg, getKillRegState(StatusDead)) 381 .addMBB(FailBB); 382 LoadCmpBB->addSuccessor(FailBB); 383 LoadCmpBB->addSuccessor(StoreBB); 384 385 // .Lstore: 386 // stlxp wStatus, xNewLo, xNewHi, [xAddr] 387 // cbnz wStatus, .Lloadcmp 388 BuildMI(StoreBB, MIMD, TII->get(StxpOp), StatusReg) 389 .addReg(NewLoReg) 390 .addReg(NewHiReg) 391 .addReg(AddrReg); 392 BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW)) 393 .addReg(StatusReg, getKillRegState(StatusDead)) 394 .addMBB(LoadCmpBB); 395 BuildMI(StoreBB, MIMD, TII->get(AArch64::B)).addMBB(DoneBB); 396 StoreBB->addSuccessor(LoadCmpBB); 397 StoreBB->addSuccessor(DoneBB); 398 399 // .Lfail: 400 // stlxp wStatus, xDestLo, xDestHi, [xAddr] 401 // cbnz wStatus, .Lloadcmp 402 BuildMI(FailBB, MIMD, TII->get(StxpOp), StatusReg) 403 .addReg(DestLo.getReg()) 404 .addReg(DestHi.getReg()) 405 .addReg(AddrReg); 406 BuildMI(FailBB, MIMD, TII->get(AArch64::CBNZW)) 407 .addReg(StatusReg, getKillRegState(StatusDead)) 408 .addMBB(LoadCmpBB); 409 FailBB->addSuccessor(LoadCmpBB); 410 FailBB->addSuccessor(DoneBB); 411 412 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 413 DoneBB->transferSuccessors(&MBB); 414 415 MBB.addSuccessor(LoadCmpBB); 416 417 NextMBBI = MBB.end(); 418 MI.eraseFromParent(); 419 420 // Recompute liveness bottom up. 421 LivePhysRegs LiveRegs; 422 computeAndAddLiveIns(LiveRegs, *DoneBB); 423 computeAndAddLiveIns(LiveRegs, *FailBB); 424 computeAndAddLiveIns(LiveRegs, *StoreBB); 425 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 426 427 // Do an extra pass in the loop to get the loop carried dependencies right. 428 FailBB->clearLiveIns(); 429 computeAndAddLiveIns(LiveRegs, *FailBB); 430 StoreBB->clearLiveIns(); 431 computeAndAddLiveIns(LiveRegs, *StoreBB); 432 LoadCmpBB->clearLiveIns(); 433 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 434 435 return true; 436 } 437 438 /// \brief Expand Pseudos to Instructions with destructive operands. 439 /// 440 /// This mechanism uses MOVPRFX instructions for zeroing the false lanes 441 /// or for fixing relaxed register allocation conditions to comply with 442 /// the instructions register constraints. The latter case may be cheaper 443 /// than setting the register constraints in the register allocator, 444 /// since that will insert regular MOV instructions rather than MOVPRFX. 445 /// 446 /// Example (after register allocation): 447 /// 448 /// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0 449 /// 450 /// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B. 451 /// * We cannot map directly to FSUB_ZPmZ_B because the register 452 /// constraints of the instruction are not met. 453 /// * Also the _ZERO specifies the false lanes need to be zeroed. 454 /// 455 /// We first try to see if the destructive operand == result operand, 456 /// if not, we try to swap the operands, e.g. 457 /// 458 /// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1 459 /// 460 /// But because FSUB_ZPmZ is not commutative, this is semantically 461 /// different, so we need a reverse instruction: 462 /// 463 /// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1 464 /// 465 /// Then we implement the zeroing of the false lanes of Z0 by adding 466 /// a zeroing MOVPRFX instruction: 467 /// 468 /// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0 469 /// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1 470 /// 471 /// Note that this can only be done for _ZERO or _UNDEF variants where 472 /// we can guarantee the false lanes to be zeroed (by implementing this) 473 /// or that they are undef (don't care / not used), otherwise the 474 /// swapping of operands is illegal because the operation is not 475 /// (or cannot be emulated to be) fully commutative. 476 bool AArch64ExpandPseudo::expand_DestructiveOp( 477 MachineInstr &MI, 478 MachineBasicBlock &MBB, 479 MachineBasicBlock::iterator MBBI) { 480 unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode()); 481 uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask; 482 uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask; 483 bool FalseZero = FalseLanes == AArch64::FalseLanesZero; 484 Register DstReg = MI.getOperand(0).getReg(); 485 bool DstIsDead = MI.getOperand(0).isDead(); 486 bool UseRev = false; 487 unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx; 488 489 switch (DType) { 490 case AArch64::DestructiveBinaryComm: 491 case AArch64::DestructiveBinaryCommWithRev: 492 if (DstReg == MI.getOperand(3).getReg()) { 493 // FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1 494 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2); 495 UseRev = true; 496 break; 497 } 498 [[fallthrough]]; 499 case AArch64::DestructiveBinary: 500 case AArch64::DestructiveBinaryImm: 501 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3); 502 break; 503 case AArch64::DestructiveUnaryPassthru: 504 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(2, 3, 3); 505 break; 506 case AArch64::DestructiveTernaryCommWithRev: 507 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4); 508 if (DstReg == MI.getOperand(3).getReg()) { 509 // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za 510 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 3, 4, 2); 511 UseRev = true; 512 } else if (DstReg == MI.getOperand(4).getReg()) { 513 // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za 514 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 4, 3, 2); 515 UseRev = true; 516 } 517 break; 518 default: 519 llvm_unreachable("Unsupported Destructive Operand type"); 520 } 521 522 // MOVPRFX can only be used if the destination operand 523 // is the destructive operand, not as any other operand, 524 // so the Destructive Operand must be unique. 525 bool DOPRegIsUnique = false; 526 switch (DType) { 527 case AArch64::DestructiveBinary: 528 DOPRegIsUnique = DstReg != MI.getOperand(SrcIdx).getReg(); 529 break; 530 case AArch64::DestructiveBinaryComm: 531 case AArch64::DestructiveBinaryCommWithRev: 532 DOPRegIsUnique = 533 DstReg != MI.getOperand(DOPIdx).getReg() || 534 MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg(); 535 break; 536 case AArch64::DestructiveUnaryPassthru: 537 case AArch64::DestructiveBinaryImm: 538 DOPRegIsUnique = true; 539 break; 540 case AArch64::DestructiveTernaryCommWithRev: 541 DOPRegIsUnique = 542 DstReg != MI.getOperand(DOPIdx).getReg() || 543 (MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg() && 544 MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg()); 545 break; 546 } 547 548 // Resolve the reverse opcode 549 if (UseRev) { 550 int NewOpcode; 551 // e.g. DIV -> DIVR 552 if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1) 553 Opcode = NewOpcode; 554 // e.g. DIVR -> DIV 555 else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1) 556 Opcode = NewOpcode; 557 } 558 559 // Get the right MOVPRFX 560 uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode); 561 unsigned MovPrfx, LSLZero, MovPrfxZero; 562 switch (ElementSize) { 563 case AArch64::ElementSizeNone: 564 case AArch64::ElementSizeB: 565 MovPrfx = AArch64::MOVPRFX_ZZ; 566 LSLZero = AArch64::LSL_ZPmI_B; 567 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B; 568 break; 569 case AArch64::ElementSizeH: 570 MovPrfx = AArch64::MOVPRFX_ZZ; 571 LSLZero = AArch64::LSL_ZPmI_H; 572 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H; 573 break; 574 case AArch64::ElementSizeS: 575 MovPrfx = AArch64::MOVPRFX_ZZ; 576 LSLZero = AArch64::LSL_ZPmI_S; 577 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S; 578 break; 579 case AArch64::ElementSizeD: 580 MovPrfx = AArch64::MOVPRFX_ZZ; 581 LSLZero = AArch64::LSL_ZPmI_D; 582 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D; 583 break; 584 default: 585 llvm_unreachable("Unsupported ElementSize"); 586 } 587 588 // 589 // Create the destructive operation (if required) 590 // 591 MachineInstrBuilder PRFX, DOP; 592 if (FalseZero) { 593 // If we cannot prefix the requested instruction we'll instead emit a 594 // prefixed_zeroing_mov for DestructiveBinary. 595 assert((DOPRegIsUnique || DType == AArch64::DestructiveBinary || 596 DType == AArch64::DestructiveBinaryComm || 597 DType == AArch64::DestructiveBinaryCommWithRev) && 598 "The destructive operand should be unique"); 599 assert(ElementSize != AArch64::ElementSizeNone && 600 "This instruction is unpredicated"); 601 602 // Merge source operand into destination register 603 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero)) 604 .addReg(DstReg, RegState::Define) 605 .addReg(MI.getOperand(PredIdx).getReg()) 606 .addReg(MI.getOperand(DOPIdx).getReg()); 607 608 // After the movprfx, the destructive operand is same as Dst 609 DOPIdx = 0; 610 611 // Create the additional LSL to zero the lanes when the DstReg is not 612 // unique. Zeros the lanes in z0 that aren't active in p0 with sequence 613 // movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0; 614 if ((DType == AArch64::DestructiveBinary || 615 DType == AArch64::DestructiveBinaryComm || 616 DType == AArch64::DestructiveBinaryCommWithRev) && 617 !DOPRegIsUnique) { 618 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LSLZero)) 619 .addReg(DstReg, RegState::Define) 620 .add(MI.getOperand(PredIdx)) 621 .addReg(DstReg) 622 .addImm(0); 623 } 624 } else if (DstReg != MI.getOperand(DOPIdx).getReg()) { 625 assert(DOPRegIsUnique && "The destructive operand should be unique"); 626 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx)) 627 .addReg(DstReg, RegState::Define) 628 .addReg(MI.getOperand(DOPIdx).getReg()); 629 DOPIdx = 0; 630 } 631 632 // 633 // Create the destructive operation 634 // 635 DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode)) 636 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)); 637 638 switch (DType) { 639 case AArch64::DestructiveUnaryPassthru: 640 DOP.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill) 641 .add(MI.getOperand(PredIdx)) 642 .add(MI.getOperand(SrcIdx)); 643 break; 644 case AArch64::DestructiveBinary: 645 case AArch64::DestructiveBinaryImm: 646 case AArch64::DestructiveBinaryComm: 647 case AArch64::DestructiveBinaryCommWithRev: 648 DOP.add(MI.getOperand(PredIdx)) 649 .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill) 650 .add(MI.getOperand(SrcIdx)); 651 break; 652 case AArch64::DestructiveTernaryCommWithRev: 653 DOP.add(MI.getOperand(PredIdx)) 654 .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill) 655 .add(MI.getOperand(SrcIdx)) 656 .add(MI.getOperand(Src2Idx)); 657 break; 658 } 659 660 if (PRFX) { 661 finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator()); 662 transferImpOps(MI, PRFX, DOP); 663 } else 664 transferImpOps(MI, DOP, DOP); 665 666 MI.eraseFromParent(); 667 return true; 668 } 669 670 bool AArch64ExpandPseudo::expandSetTagLoop( 671 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 672 MachineBasicBlock::iterator &NextMBBI) { 673 MachineInstr &MI = *MBBI; 674 DebugLoc DL = MI.getDebugLoc(); 675 Register SizeReg = MI.getOperand(0).getReg(); 676 Register AddressReg = MI.getOperand(1).getReg(); 677 678 MachineFunction *MF = MBB.getParent(); 679 680 bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback; 681 const unsigned OpCode1 = 682 ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex; 683 const unsigned OpCode2 = 684 ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex; 685 686 unsigned Size = MI.getOperand(2).getImm(); 687 assert(Size > 0 && Size % 16 == 0); 688 if (Size % (16 * 2) != 0) { 689 BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg) 690 .addReg(AddressReg) 691 .addReg(AddressReg) 692 .addImm(1); 693 Size -= 16; 694 } 695 MachineBasicBlock::iterator I = 696 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg) 697 .addImm(Size); 698 expandMOVImm(MBB, I, 64); 699 700 auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 701 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 702 703 MF->insert(++MBB.getIterator(), LoopBB); 704 MF->insert(++LoopBB->getIterator(), DoneBB); 705 706 BuildMI(LoopBB, DL, TII->get(OpCode2)) 707 .addDef(AddressReg) 708 .addReg(AddressReg) 709 .addReg(AddressReg) 710 .addImm(2) 711 .cloneMemRefs(MI) 712 .setMIFlags(MI.getFlags()); 713 BuildMI(LoopBB, DL, TII->get(AArch64::SUBSXri)) 714 .addDef(SizeReg) 715 .addReg(SizeReg) 716 .addImm(16 * 2) 717 .addImm(0); 718 BuildMI(LoopBB, DL, TII->get(AArch64::Bcc)) 719 .addImm(AArch64CC::NE) 720 .addMBB(LoopBB) 721 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill); 722 723 LoopBB->addSuccessor(LoopBB); 724 LoopBB->addSuccessor(DoneBB); 725 726 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 727 DoneBB->transferSuccessors(&MBB); 728 729 MBB.addSuccessor(LoopBB); 730 731 NextMBBI = MBB.end(); 732 MI.eraseFromParent(); 733 // Recompute liveness bottom up. 734 LivePhysRegs LiveRegs; 735 computeAndAddLiveIns(LiveRegs, *DoneBB); 736 computeAndAddLiveIns(LiveRegs, *LoopBB); 737 // Do an extra pass in the loop to get the loop carried dependencies right. 738 // FIXME: is this necessary? 739 LoopBB->clearLiveIns(); 740 computeAndAddLiveIns(LiveRegs, *LoopBB); 741 DoneBB->clearLiveIns(); 742 computeAndAddLiveIns(LiveRegs, *DoneBB); 743 744 return true; 745 } 746 747 bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB, 748 MachineBasicBlock::iterator MBBI, 749 unsigned Opc, unsigned N) { 750 assert((Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI || 751 Opc == AArch64::LDR_PXI || Opc == AArch64::STR_PXI) && 752 "Unexpected opcode"); 753 unsigned RState = (Opc == AArch64::LDR_ZXI || Opc == AArch64::LDR_PXI) 754 ? RegState::Define 755 : 0; 756 unsigned sub0 = (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI) 757 ? AArch64::zsub0 758 : AArch64::psub0; 759 const TargetRegisterInfo *TRI = 760 MBB.getParent()->getSubtarget().getRegisterInfo(); 761 MachineInstr &MI = *MBBI; 762 for (unsigned Offset = 0; Offset < N; ++Offset) { 763 int ImmOffset = MI.getOperand(2).getImm() + Offset; 764 bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false; 765 assert(ImmOffset >= -256 && ImmOffset < 256 && 766 "Immediate spill offset out of range"); 767 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) 768 .addReg(TRI->getSubReg(MI.getOperand(0).getReg(), sub0 + Offset), 769 RState) 770 .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill)) 771 .addImm(ImmOffset); 772 } 773 MI.eraseFromParent(); 774 return true; 775 } 776 777 bool AArch64ExpandPseudo::expandCALL_RVMARKER( 778 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { 779 // Expand CALL_RVMARKER pseudo to: 780 // - a branch to the call target, followed by 781 // - the special `mov x29, x29` marker, and 782 // - another branch, to the runtime function 783 // Mark the sequence as bundle, to avoid passes moving other code in between. 784 MachineInstr &MI = *MBBI; 785 786 MachineInstr *OriginalCall; 787 MachineOperand &RVTarget = MI.getOperand(0); 788 MachineOperand &CallTarget = MI.getOperand(1); 789 assert((CallTarget.isGlobal() || CallTarget.isReg()) && 790 "invalid operand for regular call"); 791 assert(RVTarget.isGlobal() && "invalid operand for attached call"); 792 unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR; 793 OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr(); 794 OriginalCall->addOperand(CallTarget); 795 796 unsigned RegMaskStartIdx = 2; 797 // Skip register arguments. Those are added during ISel, but are not 798 // needed for the concrete branch. 799 while (!MI.getOperand(RegMaskStartIdx).isRegMask()) { 800 auto MOP = MI.getOperand(RegMaskStartIdx); 801 assert(MOP.isReg() && "can only add register operands"); 802 OriginalCall->addOperand(MachineOperand::CreateReg( 803 MOP.getReg(), /*Def=*/false, /*Implicit=*/true, /*isKill=*/false, 804 /*isDead=*/false, /*isUndef=*/MOP.isUndef())); 805 RegMaskStartIdx++; 806 } 807 for (const MachineOperand &MO : 808 llvm::drop_begin(MI.operands(), RegMaskStartIdx)) 809 OriginalCall->addOperand(MO); 810 811 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs)) 812 .addReg(AArch64::FP, RegState::Define) 813 .addReg(AArch64::XZR) 814 .addReg(AArch64::FP) 815 .addImm(0); 816 817 auto *RVCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::BL)) 818 .add(RVTarget) 819 .getInstr(); 820 821 if (MI.shouldUpdateCallSiteInfo()) 822 MBB.getParent()->moveCallSiteInfo(&MI, OriginalCall); 823 824 MI.eraseFromParent(); 825 finalizeBundle(MBB, OriginalCall->getIterator(), 826 std::next(RVCall->getIterator())); 827 return true; 828 } 829 830 bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB, 831 MachineBasicBlock::iterator MBBI) { 832 // Expand CALL_BTI pseudo to: 833 // - a branch to the call target 834 // - a BTI instruction 835 // Mark the sequence as a bundle, to avoid passes moving other code in 836 // between. 837 838 MachineInstr &MI = *MBBI; 839 MachineOperand &CallTarget = MI.getOperand(0); 840 assert((CallTarget.isGlobal() || CallTarget.isReg()) && 841 "invalid operand for regular call"); 842 unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR; 843 MachineInstr *Call = 844 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr(); 845 Call->addOperand(CallTarget); 846 847 // 1 because we already added the branch target above. 848 unsigned RegMaskStartIdx = 1; 849 // The branch is BL <target>, so we cannot attach the arguments of the called 850 // function to it. Those must be added as implicitly used by the branch. 851 while (!MI.getOperand(RegMaskStartIdx).isRegMask()) { 852 auto MOP = MI.getOperand(RegMaskStartIdx); 853 assert(MOP.isReg() && "can only add register operands"); 854 Call->addOperand(MachineOperand::CreateReg( 855 MOP.getReg(), /*Def=*/false, /*Implicit=*/true, /*isKill=*/false, 856 /*isDead=*/false, /*isUndef=*/MOP.isUndef())); 857 RegMaskStartIdx++; 858 } 859 for (const MachineOperand &MO : 860 llvm::drop_begin(MI.operands(), RegMaskStartIdx)) 861 Call->addOperand(MO); 862 863 Call->setCFIType(*MBB.getParent(), MI.getCFIType()); 864 865 MachineInstr *BTI = 866 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::HINT)) 867 // BTI J so that setjmp can to BR to this. 868 .addImm(36) 869 .getInstr(); 870 871 if (MI.shouldUpdateCallSiteInfo()) 872 MBB.getParent()->moveCallSiteInfo(&MI, Call); 873 874 MI.eraseFromParent(); 875 finalizeBundle(MBB, Call->getIterator(), std::next(BTI->getIterator())); 876 return true; 877 } 878 879 bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext( 880 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { 881 Register CtxReg = MBBI->getOperand(0).getReg(); 882 Register BaseReg = MBBI->getOperand(1).getReg(); 883 int Offset = MBBI->getOperand(2).getImm(); 884 DebugLoc DL(MBBI->getDebugLoc()); 885 auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>(); 886 887 if (STI.getTargetTriple().getArchName() != "arm64e") { 888 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui)) 889 .addUse(CtxReg) 890 .addUse(BaseReg) 891 .addImm(Offset / 8) 892 .setMIFlag(MachineInstr::FrameSetup); 893 MBBI->eraseFromParent(); 894 return true; 895 } 896 897 // We need to sign the context in an address-discriminated way. 0xc31a is a 898 // fixed random value, chosen as part of the ABI. 899 // add x16, xBase, #Offset 900 // movk x16, #0xc31a, lsl #48 901 // mov x17, x22/xzr 902 // pacdb x17, x16 903 // str x17, [xBase, #Offset] 904 unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri; 905 BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16) 906 .addUse(BaseReg) 907 .addImm(abs(Offset)) 908 .addImm(0) 909 .setMIFlag(MachineInstr::FrameSetup); 910 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16) 911 .addUse(AArch64::X16) 912 .addImm(0xc31a) 913 .addImm(48) 914 .setMIFlag(MachineInstr::FrameSetup); 915 // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so 916 // move it somewhere before signing. 917 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17) 918 .addUse(AArch64::XZR) 919 .addUse(CtxReg) 920 .addImm(0) 921 .setMIFlag(MachineInstr::FrameSetup); 922 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17) 923 .addUse(AArch64::X17) 924 .addUse(AArch64::X16) 925 .setMIFlag(MachineInstr::FrameSetup); 926 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui)) 927 .addUse(AArch64::X17) 928 .addUse(BaseReg) 929 .addImm(Offset / 8) 930 .setMIFlag(MachineInstr::FrameSetup); 931 932 MBBI->eraseFromParent(); 933 return true; 934 } 935 936 MachineBasicBlock * 937 AArch64ExpandPseudo::expandRestoreZA(MachineBasicBlock &MBB, 938 MachineBasicBlock::iterator MBBI) { 939 MachineInstr &MI = *MBBI; 940 assert((std::next(MBBI) != MBB.end() || 941 MI.getParent()->successors().begin() != 942 MI.getParent()->successors().end()) && 943 "Unexpected unreachable in block that restores ZA"); 944 945 // Compare TPIDR2_EL0 value against 0. 946 DebugLoc DL = MI.getDebugLoc(); 947 MachineInstrBuilder Cbz = BuildMI(MBB, MBBI, DL, TII->get(AArch64::CBZX)) 948 .add(MI.getOperand(0)); 949 950 // Split MBB and create two new blocks: 951 // - MBB now contains all instructions before RestoreZAPseudo. 952 // - SMBB contains the RestoreZAPseudo instruction only. 953 // - EndBB contains all instructions after RestoreZAPseudo. 954 MachineInstr &PrevMI = *std::prev(MBBI); 955 MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true); 956 MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end() 957 ? *SMBB->successors().begin() 958 : SMBB->splitAt(MI, /*UpdateLiveIns*/ true); 959 960 // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB. 961 Cbz.addMBB(SMBB); 962 BuildMI(&MBB, DL, TII->get(AArch64::B)) 963 .addMBB(EndBB); 964 MBB.addSuccessor(EndBB); 965 966 // Replace the pseudo with a call (BL). 967 MachineInstrBuilder MIB = 968 BuildMI(*SMBB, SMBB->end(), DL, TII->get(AArch64::BL)); 969 MIB.addReg(MI.getOperand(1).getReg(), RegState::Implicit); 970 for (unsigned I = 2; I < MI.getNumOperands(); ++I) 971 MIB.add(MI.getOperand(I)); 972 BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB); 973 974 MI.eraseFromParent(); 975 return EndBB; 976 } 977 978 MachineBasicBlock * 979 AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB, 980 MachineBasicBlock::iterator MBBI) { 981 MachineInstr &MI = *MBBI; 982 // In the case of a smstart/smstop before a unreachable, just remove the pseudo. 983 // Exception handling code generated by Clang may introduce unreachables and it 984 // seems unnecessary to restore pstate.sm when that happens. Note that it is 985 // not just an optimisation, the code below expects a successor instruction/block 986 // in order to split the block at MBBI. 987 if (std::next(MBBI) == MBB.end() && 988 MI.getParent()->successors().begin() == 989 MI.getParent()->successors().end()) { 990 MI.eraseFromParent(); 991 return &MBB; 992 } 993 994 // Expand the pseudo into smstart or smstop instruction. The pseudo has the 995 // following operands: 996 // 997 // MSRpstatePseudo <za|sm|both>, <0|1>, pstate.sm, expectedval, <regmask> 998 // 999 // The pseudo is expanded into a conditional smstart/smstop, with a 1000 // check if pstate.sm (register) equals the expected value, and if not, 1001 // invokes the smstart/smstop. 1002 // 1003 // As an example, the following block contains a normal call from a 1004 // streaming-compatible function: 1005 // 1006 // OrigBB: 1007 // MSRpstatePseudo 3, 0, %0, 0, <regmask> <- Conditional SMSTOP 1008 // bl @normal_callee 1009 // MSRpstatePseudo 3, 1, %0, 0, <regmask> <- Conditional SMSTART 1010 // 1011 // ...which will be transformed into: 1012 // 1013 // OrigBB: 1014 // TBNZx %0:gpr64, 0, SMBB 1015 // b EndBB 1016 // 1017 // SMBB: 1018 // MSRpstatesvcrImm1 3, 0, <regmask> <- SMSTOP 1019 // 1020 // EndBB: 1021 // bl @normal_callee 1022 // MSRcond_pstatesvcrImm1 3, 1, <regmask> <- SMSTART 1023 // 1024 DebugLoc DL = MI.getDebugLoc(); 1025 1026 // Create the conditional branch based on the third operand of the 1027 // instruction, which tells us if we are wrapping a normal or streaming 1028 // function. 1029 // We test the live value of pstate.sm and toggle pstate.sm if this is not the 1030 // expected value for the callee (0 for a normal callee and 1 for a streaming 1031 // callee). 1032 auto PStateSM = MI.getOperand(2).getReg(); 1033 auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); 1034 unsigned SMReg32 = TRI->getSubReg(PStateSM, AArch64::sub_32); 1035 bool IsStreamingCallee = MI.getOperand(3).getImm(); 1036 unsigned Opc = IsStreamingCallee ? AArch64::TBZW : AArch64::TBNZW; 1037 MachineInstrBuilder Tbx = 1038 BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(SMReg32).addImm(0); 1039 1040 // Split MBB and create two new blocks: 1041 // - MBB now contains all instructions before MSRcond_pstatesvcrImm1. 1042 // - SMBB contains the MSRcond_pstatesvcrImm1 instruction only. 1043 // - EndBB contains all instructions after MSRcond_pstatesvcrImm1. 1044 MachineInstr &PrevMI = *std::prev(MBBI); 1045 MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true); 1046 MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end() 1047 ? *SMBB->successors().begin() 1048 : SMBB->splitAt(MI, /*UpdateLiveIns*/ true); 1049 1050 // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB. 1051 Tbx.addMBB(SMBB); 1052 BuildMI(&MBB, DL, TII->get(AArch64::B)) 1053 .addMBB(EndBB); 1054 MBB.addSuccessor(EndBB); 1055 1056 // Create the SMSTART/SMSTOP (MSRpstatesvcrImm1) instruction in SMBB. 1057 MachineInstrBuilder MIB = BuildMI(*SMBB, SMBB->begin(), MI.getDebugLoc(), 1058 TII->get(AArch64::MSRpstatesvcrImm1)); 1059 // Copy all but the second and third operands of MSRcond_pstatesvcrImm1 (as 1060 // these contain the CopyFromReg for the first argument and the flag to 1061 // indicate whether the callee is streaming or normal). 1062 MIB.add(MI.getOperand(0)); 1063 MIB.add(MI.getOperand(1)); 1064 for (unsigned i = 4; i < MI.getNumOperands(); ++i) 1065 MIB.add(MI.getOperand(i)); 1066 1067 BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB); 1068 1069 MI.eraseFromParent(); 1070 return EndBB; 1071 } 1072 1073 bool AArch64ExpandPseudo::expandMultiVecPseudo( 1074 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 1075 TargetRegisterClass ContiguousClass, TargetRegisterClass StridedClass, 1076 unsigned ContiguousOp, unsigned StridedOpc) { 1077 MachineInstr &MI = *MBBI; 1078 Register Tuple = MI.getOperand(0).getReg(); 1079 1080 auto ContiguousRange = ContiguousClass.getRegisters(); 1081 auto StridedRange = StridedClass.getRegisters(); 1082 unsigned Opc; 1083 if (llvm::is_contained(ContiguousRange, Tuple.asMCReg())) { 1084 Opc = ContiguousOp; 1085 } else if (llvm::is_contained(StridedRange, Tuple.asMCReg())) { 1086 Opc = StridedOpc; 1087 } else 1088 llvm_unreachable("Cannot expand Multi-Vector pseudo"); 1089 1090 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) 1091 .add(MI.getOperand(0)) 1092 .add(MI.getOperand(1)) 1093 .add(MI.getOperand(2)) 1094 .add(MI.getOperand(3)); 1095 transferImpOps(MI, MIB, MIB); 1096 MI.eraseFromParent(); 1097 return true; 1098 } 1099 1100 /// If MBBI references a pseudo instruction that should be expanded here, 1101 /// do the expansion and return true. Otherwise return false. 1102 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, 1103 MachineBasicBlock::iterator MBBI, 1104 MachineBasicBlock::iterator &NextMBBI) { 1105 MachineInstr &MI = *MBBI; 1106 unsigned Opcode = MI.getOpcode(); 1107 1108 // Check if we can expand the destructive op 1109 int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode()); 1110 if (OrigInstr != -1) { 1111 auto &Orig = TII->get(OrigInstr); 1112 if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask) != 1113 AArch64::NotDestructive) { 1114 return expand_DestructiveOp(MI, MBB, MBBI); 1115 } 1116 } 1117 1118 switch (Opcode) { 1119 default: 1120 break; 1121 1122 case AArch64::BSPv8i8: 1123 case AArch64::BSPv16i8: { 1124 Register DstReg = MI.getOperand(0).getReg(); 1125 if (DstReg == MI.getOperand(3).getReg()) { 1126 // Expand to BIT 1127 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1128 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8 1129 : AArch64::BITv16i8)) 1130 .add(MI.getOperand(0)) 1131 .add(MI.getOperand(3)) 1132 .add(MI.getOperand(2)) 1133 .add(MI.getOperand(1)); 1134 } else if (DstReg == MI.getOperand(2).getReg()) { 1135 // Expand to BIF 1136 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1137 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8 1138 : AArch64::BIFv16i8)) 1139 .add(MI.getOperand(0)) 1140 .add(MI.getOperand(2)) 1141 .add(MI.getOperand(3)) 1142 .add(MI.getOperand(1)); 1143 } else { 1144 // Expand to BSL, use additional move if required 1145 if (DstReg == MI.getOperand(1).getReg()) { 1146 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1147 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8 1148 : AArch64::BSLv16i8)) 1149 .add(MI.getOperand(0)) 1150 .add(MI.getOperand(1)) 1151 .add(MI.getOperand(2)) 1152 .add(MI.getOperand(3)); 1153 } else { 1154 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1155 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8 1156 : AArch64::ORRv16i8)) 1157 .addReg(DstReg, 1158 RegState::Define | 1159 getRenamableRegState(MI.getOperand(0).isRenamable())) 1160 .add(MI.getOperand(1)) 1161 .add(MI.getOperand(1)); 1162 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1163 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8 1164 : AArch64::BSLv16i8)) 1165 .add(MI.getOperand(0)) 1166 .addReg(DstReg, 1167 RegState::Kill | 1168 getRenamableRegState(MI.getOperand(0).isRenamable())) 1169 .add(MI.getOperand(2)) 1170 .add(MI.getOperand(3)); 1171 } 1172 } 1173 MI.eraseFromParent(); 1174 return true; 1175 } 1176 1177 case AArch64::ADDWrr: 1178 case AArch64::SUBWrr: 1179 case AArch64::ADDXrr: 1180 case AArch64::SUBXrr: 1181 case AArch64::ADDSWrr: 1182 case AArch64::SUBSWrr: 1183 case AArch64::ADDSXrr: 1184 case AArch64::SUBSXrr: 1185 case AArch64::ANDWrr: 1186 case AArch64::ANDXrr: 1187 case AArch64::BICWrr: 1188 case AArch64::BICXrr: 1189 case AArch64::ANDSWrr: 1190 case AArch64::ANDSXrr: 1191 case AArch64::BICSWrr: 1192 case AArch64::BICSXrr: 1193 case AArch64::EONWrr: 1194 case AArch64::EONXrr: 1195 case AArch64::EORWrr: 1196 case AArch64::EORXrr: 1197 case AArch64::ORNWrr: 1198 case AArch64::ORNXrr: 1199 case AArch64::ORRWrr: 1200 case AArch64::ORRXrr: { 1201 unsigned Opcode; 1202 switch (MI.getOpcode()) { 1203 default: 1204 return false; 1205 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break; 1206 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break; 1207 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break; 1208 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break; 1209 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break; 1210 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break; 1211 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break; 1212 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break; 1213 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break; 1214 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break; 1215 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break; 1216 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break; 1217 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break; 1218 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break; 1219 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break; 1220 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break; 1221 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break; 1222 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break; 1223 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break; 1224 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break; 1225 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break; 1226 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break; 1227 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break; 1228 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break; 1229 } 1230 MachineFunction &MF = *MBB.getParent(); 1231 // Try to create new inst without implicit operands added. 1232 MachineInstr *NewMI = MF.CreateMachineInstr( 1233 TII->get(Opcode), MI.getDebugLoc(), /*NoImplicit=*/true); 1234 MBB.insert(MBBI, NewMI); 1235 MachineInstrBuilder MIB1(MF, NewMI); 1236 MIB1->setPCSections(MF, MI.getPCSections()); 1237 MIB1.addReg(MI.getOperand(0).getReg(), RegState::Define) 1238 .add(MI.getOperand(1)) 1239 .add(MI.getOperand(2)) 1240 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 1241 transferImpOps(MI, MIB1, MIB1); 1242 if (auto DebugNumber = MI.peekDebugInstrNum()) 1243 NewMI->setDebugInstrNum(DebugNumber); 1244 MI.eraseFromParent(); 1245 return true; 1246 } 1247 1248 case AArch64::LOADgot: { 1249 MachineFunction *MF = MBB.getParent(); 1250 Register DstReg = MI.getOperand(0).getReg(); 1251 const MachineOperand &MO1 = MI.getOperand(1); 1252 unsigned Flags = MO1.getTargetFlags(); 1253 1254 if (MF->getTarget().getCodeModel() == CodeModel::Tiny) { 1255 // Tiny codemodel expand to LDR 1256 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), 1257 TII->get(AArch64::LDRXl), DstReg); 1258 1259 if (MO1.isGlobal()) { 1260 MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags); 1261 } else if (MO1.isSymbol()) { 1262 MIB.addExternalSymbol(MO1.getSymbolName(), Flags); 1263 } else { 1264 assert(MO1.isCPI() && 1265 "Only expect globals, externalsymbols, or constant pools"); 1266 MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags); 1267 } 1268 } else { 1269 // Small codemodel expand into ADRP + LDR. 1270 MachineFunction &MF = *MI.getParent()->getParent(); 1271 DebugLoc DL = MI.getDebugLoc(); 1272 MachineInstrBuilder MIB1 = 1273 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg); 1274 1275 MachineInstrBuilder MIB2; 1276 if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) { 1277 auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); 1278 unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32); 1279 unsigned DstFlags = MI.getOperand(0).getTargetFlags(); 1280 MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui)) 1281 .addDef(Reg32) 1282 .addReg(DstReg, RegState::Kill) 1283 .addReg(DstReg, DstFlags | RegState::Implicit); 1284 } else { 1285 Register DstReg = MI.getOperand(0).getReg(); 1286 MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui)) 1287 .add(MI.getOperand(0)) 1288 .addUse(DstReg, RegState::Kill); 1289 } 1290 1291 if (MO1.isGlobal()) { 1292 MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE); 1293 MIB2.addGlobalAddress(MO1.getGlobal(), 0, 1294 Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 1295 } else if (MO1.isSymbol()) { 1296 MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE); 1297 MIB2.addExternalSymbol(MO1.getSymbolName(), Flags | 1298 AArch64II::MO_PAGEOFF | 1299 AArch64II::MO_NC); 1300 } else { 1301 assert(MO1.isCPI() && 1302 "Only expect globals, externalsymbols, or constant pools"); 1303 MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), 1304 Flags | AArch64II::MO_PAGE); 1305 MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), 1306 Flags | AArch64II::MO_PAGEOFF | 1307 AArch64II::MO_NC); 1308 } 1309 1310 transferImpOps(MI, MIB1, MIB2); 1311 } 1312 MI.eraseFromParent(); 1313 return true; 1314 } 1315 case AArch64::MOVaddrBA: { 1316 MachineFunction &MF = *MI.getParent()->getParent(); 1317 if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) { 1318 // blockaddress expressions have to come from a constant pool because the 1319 // largest addend (and hence offset within a function) allowed for ADRP is 1320 // only 8MB. 1321 const BlockAddress *BA = MI.getOperand(1).getBlockAddress(); 1322 assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset"); 1323 1324 MachineConstantPool *MCP = MF.getConstantPool(); 1325 unsigned CPIdx = MCP->getConstantPoolIndex(BA, Align(8)); 1326 1327 Register DstReg = MI.getOperand(0).getReg(); 1328 auto MIB1 = 1329 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) 1330 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE); 1331 auto MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), 1332 TII->get(AArch64::LDRXui), DstReg) 1333 .addUse(DstReg) 1334 .addConstantPoolIndex( 1335 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 1336 transferImpOps(MI, MIB1, MIB2); 1337 MI.eraseFromParent(); 1338 return true; 1339 } 1340 } 1341 [[fallthrough]]; 1342 case AArch64::MOVaddr: 1343 case AArch64::MOVaddrJT: 1344 case AArch64::MOVaddrCP: 1345 case AArch64::MOVaddrTLS: 1346 case AArch64::MOVaddrEXT: { 1347 // Expand into ADRP + ADD. 1348 Register DstReg = MI.getOperand(0).getReg(); 1349 assert(DstReg != AArch64::XZR); 1350 MachineInstrBuilder MIB1 = 1351 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) 1352 .add(MI.getOperand(1)); 1353 1354 if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) { 1355 // MO_TAGGED on the page indicates a tagged address. Set the tag now. 1356 // We do so by creating a MOVK that sets bits 48-63 of the register to 1357 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in 1358 // the small code model so we can assume a binary size of <= 4GB, which 1359 // makes the untagged PC relative offset positive. The binary must also be 1360 // loaded into address range [0, 2^48). Both of these properties need to 1361 // be ensured at runtime when using tagged addresses. 1362 auto Tag = MI.getOperand(1); 1363 Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3); 1364 Tag.setOffset(0x100000000); 1365 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg) 1366 .addReg(DstReg) 1367 .add(Tag) 1368 .addImm(48); 1369 } 1370 1371 MachineInstrBuilder MIB2 = 1372 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) 1373 .add(MI.getOperand(0)) 1374 .addReg(DstReg) 1375 .add(MI.getOperand(2)) 1376 .addImm(0); 1377 1378 transferImpOps(MI, MIB1, MIB2); 1379 MI.eraseFromParent(); 1380 return true; 1381 } 1382 case AArch64::ADDlowTLS: 1383 // Produce a plain ADD 1384 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) 1385 .add(MI.getOperand(0)) 1386 .add(MI.getOperand(1)) 1387 .add(MI.getOperand(2)) 1388 .addImm(0); 1389 MI.eraseFromParent(); 1390 return true; 1391 1392 case AArch64::MOVbaseTLS: { 1393 Register DstReg = MI.getOperand(0).getReg(); 1394 auto SysReg = AArch64SysReg::TPIDR_EL0; 1395 MachineFunction *MF = MBB.getParent(); 1396 if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP()) 1397 SysReg = AArch64SysReg::TPIDR_EL3; 1398 else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP()) 1399 SysReg = AArch64SysReg::TPIDR_EL2; 1400 else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP()) 1401 SysReg = AArch64SysReg::TPIDR_EL1; 1402 else if (MF->getSubtarget<AArch64Subtarget>().useROEL0ForTP()) 1403 SysReg = AArch64SysReg::TPIDRRO_EL0; 1404 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg) 1405 .addImm(SysReg); 1406 MI.eraseFromParent(); 1407 return true; 1408 } 1409 1410 case AArch64::MOVi32imm: 1411 return expandMOVImm(MBB, MBBI, 32); 1412 case AArch64::MOVi64imm: 1413 return expandMOVImm(MBB, MBBI, 64); 1414 case AArch64::RET_ReallyLR: { 1415 // Hiding the LR use with RET_ReallyLR may lead to extra kills in the 1416 // function and missing live-ins. We are fine in practice because callee 1417 // saved register handling ensures the register value is restored before 1418 // RET, but we need the undef flag here to appease the MachineVerifier 1419 // liveness checks. 1420 MachineInstrBuilder MIB = 1421 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET)) 1422 .addReg(AArch64::LR, RegState::Undef); 1423 transferImpOps(MI, MIB, MIB); 1424 MI.eraseFromParent(); 1425 return true; 1426 } 1427 case AArch64::CMP_SWAP_8: 1428 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB, 1429 AArch64::SUBSWrx, 1430 AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0), 1431 AArch64::WZR, NextMBBI); 1432 case AArch64::CMP_SWAP_16: 1433 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH, 1434 AArch64::SUBSWrx, 1435 AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0), 1436 AArch64::WZR, NextMBBI); 1437 case AArch64::CMP_SWAP_32: 1438 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW, 1439 AArch64::SUBSWrs, 1440 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0), 1441 AArch64::WZR, NextMBBI); 1442 case AArch64::CMP_SWAP_64: 1443 return expandCMP_SWAP(MBB, MBBI, 1444 AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs, 1445 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0), 1446 AArch64::XZR, NextMBBI); 1447 case AArch64::CMP_SWAP_128: 1448 case AArch64::CMP_SWAP_128_RELEASE: 1449 case AArch64::CMP_SWAP_128_ACQUIRE: 1450 case AArch64::CMP_SWAP_128_MONOTONIC: 1451 return expandCMP_SWAP_128(MBB, MBBI, NextMBBI); 1452 1453 case AArch64::AESMCrrTied: 1454 case AArch64::AESIMCrrTied: { 1455 MachineInstrBuilder MIB = 1456 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1457 TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr : 1458 AArch64::AESIMCrr)) 1459 .add(MI.getOperand(0)) 1460 .add(MI.getOperand(1)); 1461 transferImpOps(MI, MIB, MIB); 1462 MI.eraseFromParent(); 1463 return true; 1464 } 1465 case AArch64::IRGstack: { 1466 MachineFunction &MF = *MBB.getParent(); 1467 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 1468 const AArch64FrameLowering *TFI = 1469 MF.getSubtarget<AArch64Subtarget>().getFrameLowering(); 1470 1471 // IRG does not allow immediate offset. getTaggedBasePointerOffset should 1472 // almost always point to SP-after-prologue; if not, emit a longer 1473 // instruction sequence. 1474 int BaseOffset = -AFI->getTaggedBasePointerOffset(); 1475 Register FrameReg; 1476 StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference( 1477 MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg, 1478 /*PreferFP=*/false, 1479 /*ForSimm=*/true); 1480 Register SrcReg = FrameReg; 1481 if (FrameRegOffset) { 1482 // Use output register as temporary. 1483 SrcReg = MI.getOperand(0).getReg(); 1484 emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg, 1485 FrameRegOffset, TII); 1486 } 1487 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG)) 1488 .add(MI.getOperand(0)) 1489 .addUse(SrcReg) 1490 .add(MI.getOperand(2)); 1491 MI.eraseFromParent(); 1492 return true; 1493 } 1494 case AArch64::TAGPstack: { 1495 int64_t Offset = MI.getOperand(2).getImm(); 1496 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1497 TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG)) 1498 .add(MI.getOperand(0)) 1499 .add(MI.getOperand(1)) 1500 .addImm(std::abs(Offset)) 1501 .add(MI.getOperand(4)); 1502 MI.eraseFromParent(); 1503 return true; 1504 } 1505 case AArch64::STGloop_wback: 1506 case AArch64::STZGloop_wback: 1507 return expandSetTagLoop(MBB, MBBI, NextMBBI); 1508 case AArch64::STGloop: 1509 case AArch64::STZGloop: 1510 report_fatal_error( 1511 "Non-writeback variants of STGloop / STZGloop should not " 1512 "survive past PrologEpilogInserter."); 1513 case AArch64::STR_ZZZZXI: 1514 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4); 1515 case AArch64::STR_ZZZXI: 1516 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3); 1517 case AArch64::STR_ZZXI: 1518 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2); 1519 case AArch64::STR_PPXI: 1520 return expandSVESpillFill(MBB, MBBI, AArch64::STR_PXI, 2); 1521 case AArch64::LDR_ZZZZXI: 1522 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4); 1523 case AArch64::LDR_ZZZXI: 1524 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3); 1525 case AArch64::LDR_ZZXI: 1526 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2); 1527 case AArch64::LDR_PPXI: 1528 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_PXI, 2); 1529 case AArch64::BLR_RVMARKER: 1530 return expandCALL_RVMARKER(MBB, MBBI); 1531 case AArch64::BLR_BTI: 1532 return expandCALL_BTI(MBB, MBBI); 1533 case AArch64::StoreSwiftAsyncContext: 1534 return expandStoreSwiftAsyncContext(MBB, MBBI); 1535 case AArch64::RestoreZAPseudo: { 1536 auto *NewMBB = expandRestoreZA(MBB, MBBI); 1537 if (NewMBB != &MBB) 1538 NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated. 1539 return true; 1540 } 1541 case AArch64::MSRpstatePseudo: { 1542 auto *NewMBB = expandCondSMToggle(MBB, MBBI); 1543 if (NewMBB != &MBB) 1544 NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated. 1545 return true; 1546 } 1547 case AArch64::COALESCER_BARRIER_FPR16: 1548 case AArch64::COALESCER_BARRIER_FPR32: 1549 case AArch64::COALESCER_BARRIER_FPR64: 1550 case AArch64::COALESCER_BARRIER_FPR128: 1551 MI.eraseFromParent(); 1552 return true; 1553 case AArch64::LD1B_2Z_IMM_PSEUDO: 1554 return expandMultiVecPseudo( 1555 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1556 AArch64::LD1B_2Z_IMM, AArch64::LD1B_2Z_STRIDED_IMM); 1557 case AArch64::LD1H_2Z_IMM_PSEUDO: 1558 return expandMultiVecPseudo( 1559 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1560 AArch64::LD1H_2Z_IMM, AArch64::LD1H_2Z_STRIDED_IMM); 1561 case AArch64::LD1W_2Z_IMM_PSEUDO: 1562 return expandMultiVecPseudo( 1563 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1564 AArch64::LD1W_2Z_IMM, AArch64::LD1W_2Z_STRIDED_IMM); 1565 case AArch64::LD1D_2Z_IMM_PSEUDO: 1566 return expandMultiVecPseudo( 1567 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1568 AArch64::LD1D_2Z_IMM, AArch64::LD1D_2Z_STRIDED_IMM); 1569 case AArch64::LDNT1B_2Z_IMM_PSEUDO: 1570 return expandMultiVecPseudo( 1571 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1572 AArch64::LDNT1B_2Z_IMM, AArch64::LDNT1B_2Z_STRIDED_IMM); 1573 case AArch64::LDNT1H_2Z_IMM_PSEUDO: 1574 return expandMultiVecPseudo( 1575 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1576 AArch64::LDNT1H_2Z_IMM, AArch64::LDNT1H_2Z_STRIDED_IMM); 1577 case AArch64::LDNT1W_2Z_IMM_PSEUDO: 1578 return expandMultiVecPseudo( 1579 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1580 AArch64::LDNT1W_2Z_IMM, AArch64::LDNT1W_2Z_STRIDED_IMM); 1581 case AArch64::LDNT1D_2Z_IMM_PSEUDO: 1582 return expandMultiVecPseudo( 1583 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1584 AArch64::LDNT1D_2Z_IMM, AArch64::LDNT1D_2Z_STRIDED_IMM); 1585 case AArch64::LD1B_2Z_PSEUDO: 1586 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass, 1587 AArch64::ZPR2StridedRegClass, AArch64::LD1B_2Z, 1588 AArch64::LD1B_2Z_STRIDED); 1589 case AArch64::LD1H_2Z_PSEUDO: 1590 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass, 1591 AArch64::ZPR2StridedRegClass, AArch64::LD1H_2Z, 1592 AArch64::LD1H_2Z_STRIDED); 1593 case AArch64::LD1W_2Z_PSEUDO: 1594 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass, 1595 AArch64::ZPR2StridedRegClass, AArch64::LD1W_2Z, 1596 AArch64::LD1W_2Z_STRIDED); 1597 case AArch64::LD1D_2Z_PSEUDO: 1598 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass, 1599 AArch64::ZPR2StridedRegClass, AArch64::LD1D_2Z, 1600 AArch64::LD1D_2Z_STRIDED); 1601 case AArch64::LDNT1B_2Z_PSEUDO: 1602 return expandMultiVecPseudo( 1603 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1604 AArch64::LDNT1B_2Z, AArch64::LDNT1B_2Z_STRIDED); 1605 case AArch64::LDNT1H_2Z_PSEUDO: 1606 return expandMultiVecPseudo( 1607 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1608 AArch64::LDNT1H_2Z, AArch64::LDNT1H_2Z_STRIDED); 1609 case AArch64::LDNT1W_2Z_PSEUDO: 1610 return expandMultiVecPseudo( 1611 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1612 AArch64::LDNT1W_2Z, AArch64::LDNT1W_2Z_STRIDED); 1613 case AArch64::LDNT1D_2Z_PSEUDO: 1614 return expandMultiVecPseudo( 1615 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1616 AArch64::LDNT1D_2Z, AArch64::LDNT1D_2Z_STRIDED); 1617 case AArch64::LD1B_4Z_IMM_PSEUDO: 1618 return expandMultiVecPseudo( 1619 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1620 AArch64::LD1B_4Z_IMM, AArch64::LD1B_4Z_STRIDED_IMM); 1621 case AArch64::LD1H_4Z_IMM_PSEUDO: 1622 return expandMultiVecPseudo( 1623 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1624 AArch64::LD1H_4Z_IMM, AArch64::LD1H_4Z_STRIDED_IMM); 1625 case AArch64::LD1W_4Z_IMM_PSEUDO: 1626 return expandMultiVecPseudo( 1627 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1628 AArch64::LD1W_4Z_IMM, AArch64::LD1W_4Z_STRIDED_IMM); 1629 case AArch64::LD1D_4Z_IMM_PSEUDO: 1630 return expandMultiVecPseudo( 1631 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1632 AArch64::LD1D_4Z_IMM, AArch64::LD1D_4Z_STRIDED_IMM); 1633 case AArch64::LDNT1B_4Z_IMM_PSEUDO: 1634 return expandMultiVecPseudo( 1635 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1636 AArch64::LDNT1B_4Z_IMM, AArch64::LDNT1B_4Z_STRIDED_IMM); 1637 case AArch64::LDNT1H_4Z_IMM_PSEUDO: 1638 return expandMultiVecPseudo( 1639 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1640 AArch64::LDNT1H_4Z_IMM, AArch64::LDNT1H_4Z_STRIDED_IMM); 1641 case AArch64::LDNT1W_4Z_IMM_PSEUDO: 1642 return expandMultiVecPseudo( 1643 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1644 AArch64::LDNT1W_4Z_IMM, AArch64::LDNT1W_4Z_STRIDED_IMM); 1645 case AArch64::LDNT1D_4Z_IMM_PSEUDO: 1646 return expandMultiVecPseudo( 1647 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1648 AArch64::LDNT1D_4Z_IMM, AArch64::LDNT1D_4Z_STRIDED_IMM); 1649 case AArch64::LD1B_4Z_PSEUDO: 1650 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass, 1651 AArch64::ZPR4StridedRegClass, AArch64::LD1B_4Z, 1652 AArch64::LD1B_4Z_STRIDED); 1653 case AArch64::LD1H_4Z_PSEUDO: 1654 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass, 1655 AArch64::ZPR4StridedRegClass, AArch64::LD1H_4Z, 1656 AArch64::LD1H_4Z_STRIDED); 1657 case AArch64::LD1W_4Z_PSEUDO: 1658 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass, 1659 AArch64::ZPR4StridedRegClass, AArch64::LD1W_4Z, 1660 AArch64::LD1W_4Z_STRIDED); 1661 case AArch64::LD1D_4Z_PSEUDO: 1662 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass, 1663 AArch64::ZPR4StridedRegClass, AArch64::LD1D_4Z, 1664 AArch64::LD1D_4Z_STRIDED); 1665 case AArch64::LDNT1B_4Z_PSEUDO: 1666 return expandMultiVecPseudo( 1667 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1668 AArch64::LDNT1B_4Z, AArch64::LDNT1B_4Z_STRIDED); 1669 case AArch64::LDNT1H_4Z_PSEUDO: 1670 return expandMultiVecPseudo( 1671 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1672 AArch64::LDNT1H_4Z, AArch64::LDNT1H_4Z_STRIDED); 1673 case AArch64::LDNT1W_4Z_PSEUDO: 1674 return expandMultiVecPseudo( 1675 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1676 AArch64::LDNT1W_4Z, AArch64::LDNT1W_4Z_STRIDED); 1677 case AArch64::LDNT1D_4Z_PSEUDO: 1678 return expandMultiVecPseudo( 1679 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1680 AArch64::LDNT1D_4Z, AArch64::LDNT1D_4Z_STRIDED); 1681 } 1682 return false; 1683 } 1684 1685 /// Iterate over the instructions in basic block MBB and expand any 1686 /// pseudo instructions. Return true if anything was modified. 1687 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) { 1688 bool Modified = false; 1689 1690 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1691 while (MBBI != E) { 1692 MachineBasicBlock::iterator NMBBI = std::next(MBBI); 1693 Modified |= expandMI(MBB, MBBI, NMBBI); 1694 MBBI = NMBBI; 1695 } 1696 1697 return Modified; 1698 } 1699 1700 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) { 1701 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 1702 1703 bool Modified = false; 1704 for (auto &MBB : MF) 1705 Modified |= expandMBB(MBB); 1706 return Modified; 1707 } 1708 1709 /// Returns an instance of the pseudo instruction expansion pass. 1710 FunctionPass *llvm::createAArch64ExpandPseudoPass() { 1711 return new AArch64ExpandPseudo(); 1712 } 1713